Merge pull request #8 from krishnanraman/master

3 Scala versions for codegolf
2013-11-30 10:16:31 -08:00
parent 3f382c9492 0f3bb8bb5b
commit 4bb057128d
3 changed files with 138 additions and 0 deletions
--- a/04-code-golf/tf04.scala
+++ b/04-code-golf/tf04.scala
@@ -0,0 +1,43 @@
+/**
+ Faithful conversion of tf-04.py to scala, avg execution time 5.2 seconds
+ $ time scala tf04a ../pride-and-prejudice.txt
+(Mr,786)
+(Elizabeth,635)
+(very,473)
+(Darcy,417)
+(such,378)
+(Mrs,343)
+(much,325)
+(more,325)
+(Bennet,322)
+(Bingley,305)
+(Jane,295)
+(Miss,281)
+(one,261)
+(know,239)
+(herself,227)
+(before,225)
+(sister,218)
+(soon,214)
+(never,214)
+(though,212)
+(think,210)
+(time,203)
+(now,197)
+(Wickham,194)
+(well,188)
+
+real  0m5.237s
+
+*/
+object tf04 extends App {
+  def l(f:String) = io.Source.fromFile(f).getLines.mkString(",")
+  val s = l("../stop_words.txt").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
+
+  l(args(0)).split("[^a-zA-Z]+").filter(x => !s.contains(x.toLowerCase))
+  .distinct
+  .map(u=> (u,l(args(0)).split("[^a-zA-Z]+").filter(x => !s.contains(x.toLowerCase)).count(_==u)))
+  .sortBy(-_._2)
+  .take(25)
+  .foreach(println)
+}
--- a/09-the-one/tf-04-fold.scala
+++ b/09-the-one/tf-04-fold.scala
@@ -0,0 +1,42 @@
+/**
+Attempt to speed up execution time: Avg 4.4 seconds
+1. Use a compiled regex
+2. accumulate tokens using a catamorphism
+
+$ time scala tf04fold ../pride-and-prejudice.txt
+(Mr,786)
+(Elizabeth,635)
+(very,473)
+(Darcy,417)
+(such,378)
+(Mrs,343)
+(much,325)
+(more,325)
+(Bennet,322)
+(Bingley,305)
+(Jane,295)
+(Miss,281)
+(one,261)
+(know,239)
+(herself,227)
+(before,225)
+(sister,218)
+(never,214)
+(soon,214)
+(though,212)
+(think,210)
+(time,203)
+(now,197)
+(Wickham,194)
+(well,188)
+
+real  0m4.392s
+*/
+object tf04fold extends App {
+  def l(f:String) = io.Source.fromFile(f).getLines.mkString(",")
+  val s = l("../stop_words.txt").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
+  val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
+  val a:List[Array[String]] = Nil
+  val w = io.Source.fromFile(args(0)).getLines.foldLeft(a)((b,c)=> p.split(c).filter(x => (x.length > 0) && !s.contains(x.toLowerCase)) :: b).flatten
+  w.distinct.map(u=> (u,w.count(_==u))).sortBy(-_._2).take(25).foreach(println)
+}
--- a/09-the-one/tf-04-map.scala
+++ b/09-the-one/tf-04-map.scala
@@ -0,0 +1,53 @@
+/**
+Attempt to speed up execution time: Avg 0.9 seconds
+1. Use a compiled regex
+2. accumulate tokens using a catamorphism
+3. count tokens using a 2nd catamorphism
+
+$ time scala tf04map ../pride-and-prejudice.txt
+(Mr,786)
+(Elizabeth,635)
+(very,473)
+(Darcy,417)
+(such,378)
+(Mrs,343)
+(much,325)
+(more,325)
+(Bennet,322)
+(Bingley,305)
+(Jane,295)
+(Miss,281)
+(one,261)
+(know,239)
+(herself,227)
+(before,225)
+(sister,218)
+(soon,214)
+(never,214)
+(though,212)
+(think,210)
+(time,203)
+(now,197)
+(Wickham,194)
+(well,188)
+
+real  0m0.882s
+*/
+object tf04map extends App {
+  def l(f:String) = io.Source.fromFile(f).getLines
+  val s = l("../stop_words.txt").mkString(",").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
+  val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
+  l(args(0)).foldLeft(Map[String,Int]()){
+    (b,c) =>
+    p
+    .split(c)
+    .filter(x => (x.length > 0) && !s.contains(x.toLowerCase))
+      .foldLeft(b){
+        (d,e) =>
+        d ++ Map(e -> (d.getOrElse(e,0)+1))
+      }
+    }.toSeq
+    .sortBy(- _._2)
+    .take(25)
+    .foreach(println)
+}