Massive renaming!

This commit is contained in:
Crista Lopes
2019-08-12 14:38:16 -07:00
parent e6c1238a56
commit 61d5f74ad9
90 changed files with 0 additions and 0 deletions

27
10-the-one/README.md Normal file
View File

@@ -0,0 +1,27 @@
Style #9
==============================
Constraints:
- Existence of an abstraction to which values can be
converted.
- This abstraction provides operations to (1) wrap
around values, so that they become the abstraction; (2) bind
itself to functions, so to establish sequences of functions;
and (3) unwrap the value, so to examine the final result.
- Larger problem is solved as a pipeline of functions bound
together, with unwrapping happening at the end.
- Particularly for The One style, the bind operation simply
calls the given function, giving it the value that it holds, and holds
on to the returned value.
Possible names:
- The One
- Monadic Identity
- The wrapper of all things
- Imperative functional style

View File

@@ -0,0 +1,42 @@
/**
Attempt to speed up execution time: Avg 4.4 seconds
1. Use a compiled regex
2. accumulate tokens using a catamorphism
$ time scala tf04fold ../pride-and-prejudice.txt
(Mr,786)
(Elizabeth,635)
(very,473)
(Darcy,417)
(such,378)
(Mrs,343)
(much,325)
(more,325)
(Bennet,322)
(Bingley,305)
(Jane,295)
(Miss,281)
(one,261)
(know,239)
(herself,227)
(before,225)
(sister,218)
(never,214)
(soon,214)
(though,212)
(think,210)
(time,203)
(now,197)
(Wickham,194)
(well,188)
real 0m4.392s
*/
object tf04fold extends App {
def l(f:String) = io.Source.fromFile(f).getLines.mkString(",")
val s = l("../stop_words.txt").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
val a:List[Array[String]] = Nil
val w = io.Source.fromFile(args(0)).getLines.foldLeft(a)((b,c)=> p.split(c).filter(x => (x.length > 0) && !s.contains(x.toLowerCase)) :: b).flatten
w.distinct.map(u=> (u,w.count(_==u))).sortBy(-_._2).take(25).foreach(println)
}

View File

@@ -0,0 +1,53 @@
/**
Attempt to speed up execution time: Avg 0.9 seconds
1. Use a compiled regex
2. accumulate tokens using a catamorphism
3. count tokens using a 2nd catamorphism
$ time scala tf04map ../pride-and-prejudice.txt
(Mr,786)
(Elizabeth,635)
(very,473)
(Darcy,417)
(such,378)
(Mrs,343)
(much,325)
(more,325)
(Bennet,322)
(Bingley,305)
(Jane,295)
(Miss,281)
(one,261)
(know,239)
(herself,227)
(before,225)
(sister,218)
(soon,214)
(never,214)
(though,212)
(think,210)
(time,203)
(now,197)
(Wickham,194)
(well,188)
real 0m0.882s
*/
object tf04map extends App {
def l(f:String) = io.Source.fromFile(f).getLines
val s = l("../stop_words.txt").mkString(",").split(",") ++ (1 to 26).map(i=>String.valueOf(Character.toChars(96+i)))
val p = java.util.regex.Pattern.compile("[^a-zA-Z]+")
l(args(0)).foldLeft(Map[String,Int]()){
(b,c) =>
p
.split(c)
.filter(x => (x.length > 0) && !s.contains(x.toLowerCase))
.foldLeft(b){
(d,e) =>
d ++ Map(e -> (d.getOrElse(e,0)+1))
}
}.toSeq
.sortBy(- _._2)
.take(25)
.foreach(println)
}

74
10-the-one/tf-10.py Executable file
View File

@@ -0,0 +1,74 @@
#!/usr/bin/env python
import sys, re, operator, string
#
# The One class for this example
#
class TFTheOne:
def __init__(self, v):
self._value = v
def bind(self, func):
self._value = func(self._value)
return self
def printme(self):
print(self._value)
#
# The functions
#
def read_file(path_to_file):
with open(path_to_file) as f:
data = f.read()
return data
def filter_chars(str_data):
pattern = re.compile('[\W_]+')
return pattern.sub(' ', str_data)
def normalize(str_data):
return str_data.lower()
def scan(str_data):
return str_data.split()
def remove_stop_words(word_list):
with open('../stop_words.txt') as f:
stop_words = f.read().split(',')
# add single-letter words
stop_words.extend(list(string.ascii_lowercase))
return [w for w in word_list if not w in stop_words]
def frequencies(word_list):
word_freqs = {}
for w in word_list:
if w in word_freqs:
word_freqs[w] += 1
else:
word_freqs[w] = 1
return word_freqs
def sort(word_freq):
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
def top25_freqs(word_freqs):
top25 = ""
for tf in word_freqs[0:25]:
top25 += str(tf[0]) + ' - ' + str(tf[1]) + '\n'
return top25
#
# The main function
#
TFTheOne(sys.argv[1])\
.bind(read_file)\
.bind(filter_chars)\
.bind(normalize)\
.bind(scan)\
.bind(remove_stop_words)\
.bind(frequencies)\
.bind(sort)\
.bind(top25_freqs)\
.printme()