Merge pull request #8 from krishnanraman/master
3 Scala versions for codegolf
This commit is contained in:
43
04-code-golf/tf04.scala
Normal file
43
04-code-golf/tf04.scala
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
/**
|
||||||
|
Faithful conversion of tf-04.py to scala, avg execution time 5.2 seconds
|
||||||
|
$ time scala tf04a ../pride-and-prejudice.txt
|
||||||
|
(Mr,786)
|
||||||
|
(Elizabeth,635)
|
||||||
|
(very,473)
|
||||||
|
(Darcy,417)
|
||||||
|
(such,378)
|
||||||
|
(Mrs,343)
|
||||||
|
(much,325)
|
||||||
|
(more,325)
|
||||||
|
(Bennet,322)
|
||||||
|
(Bingley,305)
|
||||||
|
(Jane,295)
|
||||||
|
(Miss,281)
|
||||||
|
(one,261)
|
||||||
|
(know,239)
|
||||||
|
(herself,227)
|
||||||
|
(before,225)
|
||||||
|
(sister,218)
|
||||||
|
(soon,214)
|
||||||
|
(never,214)
|
||||||
|
(though,212)
|
||||||
|
(think,210)
|
||||||
|
(time,203)
|
||||||
|
(now,197)
|
||||||
|
(Wickham,194)
|
||||||
|
(well,188)
|
||||||
|
|
||||||
|
real 0m5.237s
|
||||||
|
|
||||||
|
*/
|
||||||
|
object tf04 extends App {
|
||||||
|
def l(f:String) = io.Source.fromFile(f).getLines.mkString(",")
|
||||||
|
val s = l("../stop_words.txt").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
|
||||||
|
|
||||||
|
l(args(0)).split("[^a-zA-Z]+").filter(x => !s.contains(x.toLowerCase))
|
||||||
|
.distinct
|
||||||
|
.map(u=> (u,l(args(0)).split("[^a-zA-Z]+").filter(x => !s.contains(x.toLowerCase)).count(_==u)))
|
||||||
|
.sortBy(-_._2)
|
||||||
|
.take(25)
|
||||||
|
.foreach(println)
|
||||||
|
}
|
||||||
42
09-the-one/tf-04-fold.scala
Normal file
42
09-the-one/tf-04-fold.scala
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
/**
|
||||||
|
Attempt to speed up execution time: Avg 4.4 seconds
|
||||||
|
1. Use a compiled regex
|
||||||
|
2. accumulate tokens using a catamorphism
|
||||||
|
|
||||||
|
$ time scala tf04fold ../pride-and-prejudice.txt
|
||||||
|
(Mr,786)
|
||||||
|
(Elizabeth,635)
|
||||||
|
(very,473)
|
||||||
|
(Darcy,417)
|
||||||
|
(such,378)
|
||||||
|
(Mrs,343)
|
||||||
|
(much,325)
|
||||||
|
(more,325)
|
||||||
|
(Bennet,322)
|
||||||
|
(Bingley,305)
|
||||||
|
(Jane,295)
|
||||||
|
(Miss,281)
|
||||||
|
(one,261)
|
||||||
|
(know,239)
|
||||||
|
(herself,227)
|
||||||
|
(before,225)
|
||||||
|
(sister,218)
|
||||||
|
(never,214)
|
||||||
|
(soon,214)
|
||||||
|
(though,212)
|
||||||
|
(think,210)
|
||||||
|
(time,203)
|
||||||
|
(now,197)
|
||||||
|
(Wickham,194)
|
||||||
|
(well,188)
|
||||||
|
|
||||||
|
real 0m4.392s
|
||||||
|
*/
|
||||||
|
object tf04fold extends App {
|
||||||
|
def l(f:String) = io.Source.fromFile(f).getLines.mkString(",")
|
||||||
|
val s = l("../stop_words.txt").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
|
||||||
|
val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
|
||||||
|
val a:List[Array[String]] = Nil
|
||||||
|
val w = io.Source.fromFile(args(0)).getLines.foldLeft(a)((b,c)=> p.split(c).filter(x => (x.length > 0) && !s.contains(x.toLowerCase)) :: b).flatten
|
||||||
|
w.distinct.map(u=> (u,w.count(_==u))).sortBy(-_._2).take(25).foreach(println)
|
||||||
|
}
|
||||||
53
09-the-one/tf-04-map.scala
Normal file
53
09-the-one/tf-04-map.scala
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
/**
|
||||||
|
Attempt to speed up execution time: Avg 0.9 seconds
|
||||||
|
1. Use a compiled regex
|
||||||
|
2. accumulate tokens using a catamorphism
|
||||||
|
3. count tokens using a 2nd catamorphism
|
||||||
|
|
||||||
|
$ time scala tf04map ../pride-and-prejudice.txt
|
||||||
|
(Mr,786)
|
||||||
|
(Elizabeth,635)
|
||||||
|
(very,473)
|
||||||
|
(Darcy,417)
|
||||||
|
(such,378)
|
||||||
|
(Mrs,343)
|
||||||
|
(much,325)
|
||||||
|
(more,325)
|
||||||
|
(Bennet,322)
|
||||||
|
(Bingley,305)
|
||||||
|
(Jane,295)
|
||||||
|
(Miss,281)
|
||||||
|
(one,261)
|
||||||
|
(know,239)
|
||||||
|
(herself,227)
|
||||||
|
(before,225)
|
||||||
|
(sister,218)
|
||||||
|
(soon,214)
|
||||||
|
(never,214)
|
||||||
|
(though,212)
|
||||||
|
(think,210)
|
||||||
|
(time,203)
|
||||||
|
(now,197)
|
||||||
|
(Wickham,194)
|
||||||
|
(well,188)
|
||||||
|
|
||||||
|
real 0m0.882s
|
||||||
|
*/
|
||||||
|
object tf04map extends App {
|
||||||
|
def l(f:String) = io.Source.fromFile(f).getLines
|
||||||
|
val s = l("../stop_words.txt").mkString(",").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
|
||||||
|
val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
|
||||||
|
l(args(0)).foldLeft(Map[String,Int]()){
|
||||||
|
(b,c) =>
|
||||||
|
p
|
||||||
|
.split(c)
|
||||||
|
.filter(x => (x.length > 0) && !s.contains(x.toLowerCase))
|
||||||
|
.foldLeft(b){
|
||||||
|
(d,e) =>
|
||||||
|
d ++ Map(e -> (d.getOrElse(e,0)+1))
|
||||||
|
}
|
||||||
|
}.toSeq
|
||||||
|
.sortBy(- _._2)
|
||||||
|
.take(25)
|
||||||
|
.foreach(println)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user