Massive renaming!
This commit is contained in:
27
10-the-one/README.md
Normal file
27
10-the-one/README.md
Normal file
@@ -0,0 +1,27 @@
|
||||
Style #9
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
|
||||
- Existence of an abstraction to which values can be
|
||||
converted.
|
||||
|
||||
- This abstraction provides operations to (1) wrap
|
||||
around values, so that they become the abstraction; (2) bind
|
||||
itself to functions, so to establish sequences of functions;
|
||||
and (3) unwrap the value, so to examine the final result.
|
||||
|
||||
- Larger problem is solved as a pipeline of functions bound
|
||||
together, with unwrapping happening at the end.
|
||||
|
||||
- Particularly for The One style, the bind operation simply
|
||||
calls the given function, giving it the value that it holds, and holds
|
||||
on to the returned value.
|
||||
|
||||
|
||||
Possible names:
|
||||
|
||||
- The One
|
||||
- Monadic Identity
|
||||
- The wrapper of all things
|
||||
- Imperative functional style
|
||||
42
10-the-one/tf-04-fold.scala
Normal file
42
10-the-one/tf-04-fold.scala
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
Attempt to speed up execution time: Avg 4.4 seconds
|
||||
1. Use a compiled regex
|
||||
2. accumulate tokens using a catamorphism
|
||||
|
||||
$ time scala tf04fold ../pride-and-prejudice.txt
|
||||
(Mr,786)
|
||||
(Elizabeth,635)
|
||||
(very,473)
|
||||
(Darcy,417)
|
||||
(such,378)
|
||||
(Mrs,343)
|
||||
(much,325)
|
||||
(more,325)
|
||||
(Bennet,322)
|
||||
(Bingley,305)
|
||||
(Jane,295)
|
||||
(Miss,281)
|
||||
(one,261)
|
||||
(know,239)
|
||||
(herself,227)
|
||||
(before,225)
|
||||
(sister,218)
|
||||
(never,214)
|
||||
(soon,214)
|
||||
(though,212)
|
||||
(think,210)
|
||||
(time,203)
|
||||
(now,197)
|
||||
(Wickham,194)
|
||||
(well,188)
|
||||
|
||||
real 0m4.392s
|
||||
*/
|
||||
object tf04fold extends App {
|
||||
def l(f:String) = io.Source.fromFile(f).getLines.mkString(",")
|
||||
val s = l("../stop_words.txt").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
|
||||
val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
|
||||
val a:List[Array[String]] = Nil
|
||||
val w = io.Source.fromFile(args(0)).getLines.foldLeft(a)((b,c)=> p.split(c).filter(x => (x.length > 0) && !s.contains(x.toLowerCase)) :: b).flatten
|
||||
w.distinct.map(u=> (u,w.count(_==u))).sortBy(-_._2).take(25).foreach(println)
|
||||
}
|
||||
53
10-the-one/tf-04-map.scala
Normal file
53
10-the-one/tf-04-map.scala
Normal file
@@ -0,0 +1,53 @@
|
||||
/**
|
||||
Attempt to speed up execution time: Avg 0.9 seconds
|
||||
1. Use a compiled regex
|
||||
2. accumulate tokens using a catamorphism
|
||||
3. count tokens using a 2nd catamorphism
|
||||
|
||||
$ time scala tf04map ../pride-and-prejudice.txt
|
||||
(Mr,786)
|
||||
(Elizabeth,635)
|
||||
(very,473)
|
||||
(Darcy,417)
|
||||
(such,378)
|
||||
(Mrs,343)
|
||||
(much,325)
|
||||
(more,325)
|
||||
(Bennet,322)
|
||||
(Bingley,305)
|
||||
(Jane,295)
|
||||
(Miss,281)
|
||||
(one,261)
|
||||
(know,239)
|
||||
(herself,227)
|
||||
(before,225)
|
||||
(sister,218)
|
||||
(soon,214)
|
||||
(never,214)
|
||||
(though,212)
|
||||
(think,210)
|
||||
(time,203)
|
||||
(now,197)
|
||||
(Wickham,194)
|
||||
(well,188)
|
||||
|
||||
real 0m0.882s
|
||||
*/
|
||||
object tf04map extends App {
|
||||
def l(f:String) = io.Source.fromFile(f).getLines
|
||||
val s = l("../stop_words.txt").mkString(",").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
|
||||
val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
|
||||
l(args(0)).foldLeft(Map[String,Int]()){
|
||||
(b,c) =>
|
||||
p
|
||||
.split(c)
|
||||
.filter(x => (x.length > 0) && !s.contains(x.toLowerCase))
|
||||
.foldLeft(b){
|
||||
(d,e) =>
|
||||
d ++ Map(e -> (d.getOrElse(e,0)+1))
|
||||
}
|
||||
}.toSeq
|
||||
.sortBy(- _._2)
|
||||
.take(25)
|
||||
.foreach(println)
|
||||
}
|
||||
74
10-the-one/tf-10.py
Executable file
74
10-the-one/tf-10.py
Executable file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
# The One class for this example
|
||||
#
|
||||
class TFTheOne:
|
||||
def __init__(self, v):
|
||||
self._value = v
|
||||
|
||||
def bind(self, func):
|
||||
self._value = func(self._value)
|
||||
return self
|
||||
|
||||
def printme(self):
|
||||
print(self._value)
|
||||
|
||||
#
|
||||
# The functions
|
||||
#
|
||||
def read_file(path_to_file):
|
||||
with open(path_to_file) as f:
|
||||
data = f.read()
|
||||
return data
|
||||
|
||||
def filter_chars(str_data):
|
||||
pattern = re.compile('[\W_]+')
|
||||
return pattern.sub(' ', str_data)
|
||||
|
||||
def normalize(str_data):
|
||||
return str_data.lower()
|
||||
|
||||
def scan(str_data):
|
||||
return str_data.split()
|
||||
|
||||
def remove_stop_words(word_list):
|
||||
with open('../stop_words.txt') as f:
|
||||
stop_words = f.read().split(',')
|
||||
# add single-letter words
|
||||
stop_words.extend(list(string.ascii_lowercase))
|
||||
return [w for w in word_list if not w in stop_words]
|
||||
|
||||
def frequencies(word_list):
|
||||
word_freqs = {}
|
||||
for w in word_list:
|
||||
if w in word_freqs:
|
||||
word_freqs[w] += 1
|
||||
else:
|
||||
word_freqs[w] = 1
|
||||
return word_freqs
|
||||
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def top25_freqs(word_freqs):
|
||||
top25 = ""
|
||||
for tf in word_freqs[0:25]:
|
||||
top25 += str(tf[0]) + ' - ' + str(tf[1]) + '\n'
|
||||
return top25
|
||||
|
||||
#
|
||||
# The main function
|
||||
#
|
||||
TFTheOne(sys.argv[1])\
|
||||
.bind(read_file)\
|
||||
.bind(filter_chars)\
|
||||
.bind(normalize)\
|
||||
.bind(scan)\
|
||||
.bind(remove_stop_words)\
|
||||
.bind(frequencies)\
|
||||
.bind(sort)\
|
||||
.bind(top25_freqs)\
|
||||
.printme()
|
||||
|
||||
Reference in New Issue
Block a user