moved 2 files to the-one

This commit is contained in:
krishnanraman
2013-10-21 11:32:42 -07:00
parent b909a80184
commit 0f3bb8bb5b
3 changed files with 8 additions and 3 deletions

View File

@@ -1,42 +0,0 @@
/**
Attempt to speed up execution time: Avg 4.4 seconds
1. Use a compiled regex
2. accumulate tokens using a catamorphism
$ time scala tf04fold ../pride-and-prejudice.txt
(Mr,786)
(Elizabeth,635)
(very,473)
(Darcy,417)
(such,378)
(Mrs,343)
(much,325)
(more,325)
(Bennet,322)
(Bingley,305)
(Jane,295)
(Miss,281)
(one,261)
(know,239)
(herself,227)
(before,225)
(sister,218)
(never,214)
(soon,214)
(though,212)
(think,210)
(time,203)
(now,197)
(Wickham,194)
(well,188)
real 0m4.392s
*/
object tf04fold extends App {
def l(f:String) = io.Source.fromFile(f).getLines.mkString(",")
val s = l("../stop_words.txt").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
val a:List[Array[String]] = Nil
val w = io.Source.fromFile(args(0)).getLines.foldLeft(a)((b,c)=> p.split(c).filter(x => (x.length > 0) && !s.contains(x.toLowerCase)) :: b).flatten
w.distinct.map(u=> (u,w.count(_==u))).sortBy(-_._2).take(25).foreach(println)
}

View File

@@ -1,53 +0,0 @@
/**
Attempt to speed up execution time: Avg 0.9 seconds
1. Use a compiled regex
2. accumulate tokens using a catamorphism
3. count tokens using a 2nd catamorphism
$ time scala tf04map ../pride-and-prejudice.txt
(Mr,786)
(Elizabeth,635)
(very,473)
(Darcy,417)
(such,378)
(Mrs,343)
(much,325)
(more,325)
(Bennet,322)
(Bingley,305)
(Jane,295)
(Miss,281)
(one,261)
(know,239)
(herself,227)
(before,225)
(sister,218)
(soon,214)
(never,214)
(though,212)
(think,210)
(time,203)
(now,197)
(Wickham,194)
(well,188)
real 0m0.882s
*/
object tf04map extends App {
def l(f:String) = io.Source.fromFile(f).getLines
val s = l("../stop_words.txt").mkString(",").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
l(args(0)).foldLeft(Map[String,Int]()){
(b,c) =>
p
.split(c)
.filter(x => (x.length > 0) && !s.contains(x.toLowerCase))
.foldLeft(b){
(d,e) =>
d ++ Map(e -> (d.getOrElse(e,0)+1))
}
}.toSeq
.sortBy(- _._2)
.take(25)
.foreach(println)
}

View File

@@ -30,9 +30,14 @@
real 0m5.237s
*/
object tf04a extends App {
object tf04 extends App {
def l(f:String) = io.Source.fromFile(f).getLines.mkString(",")
val s = l("../stop_words.txt").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
val w = l(args(0)).split("[^a-zA-Z]+").filter(x => !s.contains(x.toLowerCase))
w.distinct.map(u=> (u,w.count(_==u))).sortBy(-_._2).take(25).foreach(println)
l(args(0)).split("[^a-zA-Z]+").filter(x => !s.contains(x.toLowerCase))
.distinct
.map(u=> (u,l(args(0)).split("[^a-zA-Z]+").filter(x => !s.contains(x.toLowerCase)).count(_==u)))
.sortBy(-_._2)
.take(25)
.foreach(println)
}