Compare commits
120 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
01160ccb7f | ||
|
|
c80a0216a5 | ||
|
|
f923e1cc7e | ||
|
|
dfd2e61c50 | ||
|
|
180f942397 | ||
|
|
5405edaf93 | ||
|
|
67fdd1e97a | ||
|
|
185f174b6c | ||
|
|
9c8a7b7745 | ||
|
|
275bb3171b | ||
|
|
9a7f21e3c0 | ||
|
|
d1b7eca7d8 | ||
|
|
e3e95f6b76 | ||
|
|
2b8e26b54b | ||
|
|
18951848b3 | ||
|
|
93730988b8 | ||
|
|
b19ee3cd69 | ||
|
|
9f4dbbe9e3 | ||
|
|
e33e5bc3ed | ||
|
|
c49206ae61 | ||
|
|
d341e63254 | ||
|
|
f810ef8723 | ||
|
|
ba2286cde6 | ||
|
|
44e1c052fe | ||
|
|
3a4715588b | ||
|
|
066563dd0b | ||
|
|
7041115a5d | ||
|
|
322be5928d | ||
|
|
2b977aa8b9 | ||
|
|
4e1fceb6c1 | ||
|
|
b9a93f1c6c | ||
|
|
d025aa0b6f | ||
|
|
2e126939ac | ||
|
|
290ec65593 | ||
|
|
4f105a6236 | ||
|
|
816dcd5923 | ||
|
|
69f5a995e9 | ||
|
|
9dabaa4b55 | ||
|
|
4664602675 | ||
|
|
f53a4c8523 | ||
|
|
180572de7e | ||
|
|
b55681f2a4 | ||
|
|
c6a54ff405 | ||
|
|
766778a9a1 | ||
|
|
8a2066014f | ||
|
|
454e4956e3 | ||
|
|
8aa9f80fc8 | ||
|
|
83377bf0df | ||
|
|
f33d353a69 | ||
|
|
a0e47a7d70 | ||
|
|
1a7dad48a2 | ||
|
|
0990084653 | ||
|
|
a81889f2bb | ||
|
|
e8b48906e2 | ||
|
|
ac3d348e45 | ||
|
|
e0d99d546c | ||
|
|
df289f0d2c | ||
|
|
a6d5ee0d13 | ||
|
|
69e9934ae9 | ||
|
|
a6fc28a123 | ||
|
|
a8379345c7 | ||
|
|
9c51b2fc1c | ||
|
|
18c7135d59 | ||
|
|
f4ce13c429 | ||
|
|
78df7731b3 | ||
|
|
40c292a4f7 | ||
|
|
3a579e61e4 | ||
|
|
8709c07dca | ||
|
|
9f76b84cff | ||
|
|
d5c5e00adb | ||
|
|
beed9d10cd | ||
|
|
b855384437 | ||
|
|
997c2827b1 | ||
|
|
239c19732a | ||
|
|
5a2b5975c7 | ||
|
|
3412e6fe95 | ||
|
|
7861836070 | ||
|
|
9549e4473e | ||
|
|
451c5d9937 | ||
|
|
e0d8b1adc2 | ||
|
|
7f7c4e111a | ||
|
|
0520c182be | ||
|
|
6e1d72bd21 | ||
|
|
ebeba3d34c | ||
|
|
9acd61f160 | ||
|
|
c7cb993a56 | ||
|
|
a18c4c6980 | ||
|
|
e2c531fc5c | ||
|
|
de77c8a224 | ||
|
|
75e89c1acf | ||
|
|
a0b28a985a | ||
|
|
659e43e92f | ||
|
|
3250ac1188 | ||
|
|
88bb3ee2df | ||
|
|
d202fea352 | ||
|
|
cc86a2cebc | ||
|
|
eb85af9982 | ||
|
|
61d5f74ad9 | ||
|
|
e6c1238a56 | ||
|
|
053bec0fc1 | ||
|
|
ae79757199 | ||
|
|
38095fd076 | ||
|
|
0f826ee6bc | ||
|
|
c23828777f | ||
|
|
ed8c3cabaa | ||
|
|
4449f0b7ba | ||
|
|
32af06377d | ||
|
|
1e3ec4932a | ||
|
|
f196134fc1 | ||
|
|
037ef4e621 | ||
|
|
aceaf60ce1 | ||
|
|
18d25aa2f0 | ||
|
|
685450ef92 | ||
|
|
f6a7e671c5 | ||
|
|
b6d0b86e56 | ||
|
|
f8233e34ff | ||
|
|
53a60ea121 | ||
|
|
1fa9eb3c2b | ||
|
|
4224ba4ba9 | ||
|
|
c93bc2fbfd |
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys, os, string
|
||||
|
||||
# Utility for handling the intermediate 'secondary memory'
|
||||
@@ -46,7 +45,7 @@ data.append(0) # data[7] is frequency
|
||||
# Open the secondary memory
|
||||
word_freqs = touchopen('word_freqs', 'rb+')
|
||||
# Open the input file
|
||||
f = open(sys.argv[1])
|
||||
f = open(sys.argv[1], 'r')
|
||||
# Loop over input file's lines
|
||||
while True:
|
||||
data[1] = [f.readline()]
|
||||
@@ -71,7 +70,7 @@ while True:
|
||||
if len(data[5]) >= 2 and data[5] not in data[0]:
|
||||
# Let's see if it already exists
|
||||
while True:
|
||||
data[6] = word_freqs.readline().strip()
|
||||
data[6] = str(word_freqs.readline().strip(), 'utf-8')
|
||||
if data[6] == '':
|
||||
break;
|
||||
data[7] = int(data[6].split(',')[1])
|
||||
@@ -83,10 +82,10 @@ while True:
|
||||
break
|
||||
if not data[4]:
|
||||
word_freqs.seek(0, 1) # Needed in Windows
|
||||
word_freqs.writelines("%20s,%04d\n" % (data[5], 1))
|
||||
word_freqs.write(bytes("%20s,%04d\n" % (data[5], 1), 'utf-8'))
|
||||
else:
|
||||
word_freqs.seek(-26, 1)
|
||||
word_freqs.writelines("%20s,%04d\n" % (data[5], data[7]))
|
||||
word_freqs.write(bytes("%20s,%04d\n" % (data[5], data[7]), 'utf-8'))
|
||||
word_freqs.seek(0,0)
|
||||
# Let's reset
|
||||
data[2] = None
|
||||
@@ -96,7 +95,7 @@ f.close()
|
||||
word_freqs.flush()
|
||||
|
||||
# PART 2
|
||||
# Now we need to find the 25 most frequently occuring words.
|
||||
# Now we need to find the 25 most frequently occurring words.
|
||||
# We don't need anything from the previous values in memory
|
||||
del data[:]
|
||||
|
||||
@@ -107,7 +106,7 @@ data.append(0) # data[26] is freq
|
||||
|
||||
# Loop over secondary memory file
|
||||
while True:
|
||||
data[25] = word_freqs.readline().strip()
|
||||
data[25] = str(word_freqs.readline().strip(), 'utf-8')
|
||||
if data[25] == '': # EOF
|
||||
break
|
||||
data[26] = int(data[25].split(',')[1]) # Read it as integer
|
||||
@@ -121,6 +120,6 @@ while True:
|
||||
|
||||
for tf in data[0:25]: # elimination of symbol tf is exercise
|
||||
if len(tf) == 2:
|
||||
print tf[0], ' - ', tf[1]
|
||||
print(tf[0], '-', tf[1])
|
||||
# We're done
|
||||
word_freqs.close()
|
||||
|
||||
@@ -4,7 +4,12 @@
|
||||
# Author: Chris Meyers @
|
||||
# http://openbookproject.net/py4fun/forth/forth.html
|
||||
#
|
||||
import sys, re
|
||||
import re
|
||||
|
||||
try:
|
||||
raw_input # Python 2
|
||||
except NameError:
|
||||
raw_input = input # Python 3
|
||||
|
||||
ds = [] # The data stack
|
||||
cStack = [] # The control struct stack
|
||||
@@ -15,7 +20,7 @@ words = [] # The input stream of tokens
|
||||
def main() :
|
||||
while 1 :
|
||||
pcode = compile() # compile/run from user
|
||||
if pcode == None : print; return
|
||||
if pcode == None : print(); return
|
||||
execute(pcode)
|
||||
|
||||
#============================== Lexical Parsing
|
||||
@@ -56,8 +61,8 @@ def rSwap(cod,p) : a=ds.pop(); b=ds.pop(); ds.append(a); ds.append(b)
|
||||
def rDup (cod,p) : ds.append(ds[-1])
|
||||
def rDrop(cod,p) : ds.pop()
|
||||
def rOver(cod,p) : ds.append(ds[-2])
|
||||
def rDump(cod,p) : print "ds = ", ds
|
||||
def rDot (cod,p) : print ds.pop()
|
||||
def rDump(cod,p) : print("ds = ", ds)
|
||||
def rDot (cod,p) : print(ds.pop())
|
||||
def rJmp (cod,p) : return cod[p]
|
||||
def rJnz (cod,p) : return (cod[p],p+1)[ds.pop()]
|
||||
def rJz (cod,p) : return (p+1,cod[p])[ds.pop()==0]
|
||||
|
||||
@@ -91,7 +91,7 @@ def frequencies():
|
||||
|
||||
def sort():
|
||||
# Not in style, left as exercise
|
||||
stack.extend(sorted(stack.pop().iteritems(), key=operator.itemgetter(1)))
|
||||
stack.extend(sorted(stack.pop().items(), key=operator.itemgetter(1)))
|
||||
|
||||
# The main function
|
||||
#
|
||||
@@ -104,7 +104,7 @@ stack.append(0)
|
||||
# the last word there will be one item left
|
||||
while stack[-1] < 25 and len(stack) > 1:
|
||||
heap['i'] = stack.pop()
|
||||
(w, f) = stack.pop(); print w, ' - ', f
|
||||
(w, f) = stack.pop(); print(w, '-', f)
|
||||
stack.append(heap['i']); stack.append(1)
|
||||
stack.append(stack.pop() + stack.pop())
|
||||
|
||||
|
||||
50
03-arrays/tf-03.py
Normal file
50
03-arrays/tf-03.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import sys, string
|
||||
import numpy as np
|
||||
|
||||
# Example input: "Hello World!"
|
||||
characters = np.array([' ']+list(open(sys.argv[1]).read())+[' '])
|
||||
# Result: array([' ', 'H', 'e', 'l', 'l', 'o', ' ', ' ',
|
||||
# 'W', 'o', 'r', 'l', 'd', '!', ' '], dtype='<U1')
|
||||
|
||||
# Normalize
|
||||
characters[~np.char.isalpha(characters)] = ' '
|
||||
characters = np.char.lower(characters)
|
||||
# Result: array([' ', 'h', 'e', 'l', 'l', 'o', ' ', ' ',
|
||||
# 'w', 'o', 'r', 'l', 'd', ' ', ' '], dtype='<U1')
|
||||
|
||||
### Split the words by finding the indices of spaces
|
||||
sp = np.where(characters == ' ')
|
||||
# Result: (array([ 0, 6, 7, 13, 14], dtype=int64),)
|
||||
# A little trick: let's double each index, and then take pairs
|
||||
sp2 = np.repeat(sp, 2)
|
||||
# Result: array([ 0, 0, 6, 6, 7, 7, 13, 13, 14, 14], dtype=int64)
|
||||
# Get the pairs as a 2D matrix, skip the first and the last
|
||||
w_ranges = np.reshape(sp2[1:-1], (-1, 2))
|
||||
# Result: array([[ 0, 6],
|
||||
# [ 6, 7],
|
||||
# [ 7, 13],
|
||||
# [13, 14]], dtype=int64)
|
||||
# Remove the indexing to the spaces themselves
|
||||
w_ranges = w_ranges[np.where(w_ranges[:, 1] - w_ranges[:, 0] > 2)]
|
||||
# Result: array([[ 0, 6],
|
||||
# [ 7, 13]], dtype=int64)
|
||||
|
||||
# Voila! Words are in between spaces, given as pairs of indices
|
||||
words = list(map(lambda r: characters[r[0]:r[1]], w_ranges))
|
||||
# Result: [array([' ', 'h', 'e', 'l', 'l', 'o'], dtype='<U1'),
|
||||
# array([' ', 'w', 'o', 'r', 'l', 'd'], dtype='<U1')]
|
||||
# Let's recode the characters as strings
|
||||
swords = np.array(list(map(lambda w: ''.join(w).strip(), words)))
|
||||
# Result: array(['hello', 'world'], dtype='<U5')
|
||||
|
||||
# Next, let's remove stop words
|
||||
stop_words = np.array(list(set(open('../stop_words.txt').read().split(','))))
|
||||
ns_words = swords[~np.isin(swords, stop_words)]
|
||||
|
||||
### Finally, count the word occurrences
|
||||
uniq, counts = np.unique(ns_words, axis=0, return_counts=True)
|
||||
wf_sorted = sorted(zip(uniq, counts), key=lambda t: t[1], reverse=True)
|
||||
|
||||
for w, c in wf_sorted[:25]:
|
||||
print(w, '-', c)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #3
|
||||
Style #4
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys, string
|
||||
|
||||
# the global list of [word, frequency] pairs
|
||||
word_freqs = []
|
||||
# the list of stop words
|
||||
@@ -30,7 +30,6 @@ for line in open(sys.argv[1]):
|
||||
if word == pair[0]:
|
||||
pair[1] += 1
|
||||
found = True
|
||||
found_at = pair_index
|
||||
break
|
||||
pair_index += 1
|
||||
if not found:
|
||||
@@ -47,5 +46,5 @@ for line in open(sys.argv[1]):
|
||||
i += 1
|
||||
|
||||
for tf in word_freqs[0:25]:
|
||||
print tf[0], ' - ', tf[1]
|
||||
print(tf[0], '-', tf[1])
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #4
|
||||
Style #5
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -70,7 +70,7 @@ def sort():
|
||||
Sorts word_freqs by frequency
|
||||
"""
|
||||
global word_freqs
|
||||
word_freqs.sort(lambda x, y: cmp(y[1], x[1]))
|
||||
word_freqs.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
|
||||
#
|
||||
@@ -84,5 +84,4 @@ frequencies()
|
||||
sort()
|
||||
|
||||
for tf in word_freqs[0:25]:
|
||||
print tf[0], ' - ', tf[1]
|
||||
|
||||
print(tf[0], '-', tf[1])
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #5
|
||||
Style #6
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -58,15 +58,15 @@ def sort(word_freq):
|
||||
and returns a list of pairs where the entries are
|
||||
sorted by frequency
|
||||
"""
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def print_all(word_freqs):
|
||||
"""
|
||||
Takes a list of pairs where the entries are sorted by frequency and print them recursively.
|
||||
"""
|
||||
if(len(word_freqs) > 0):
|
||||
print word_freqs[0][0], ' - ', word_freqs[0][1]
|
||||
print_all(word_freqs[1:]);
|
||||
print(word_freqs[0][0], '-', word_freqs[0][1])
|
||||
print_all(word_freqs[1:])
|
||||
|
||||
#
|
||||
# The main function
|
||||
26
06-pipeline/tf-06.rexx
Normal file
26
06-pipeline/tf-06.rexx
Normal file
@@ -0,0 +1,26 @@
|
||||
/* WORDFREQ REXX Exercises in Programming - CMS Pipelines style */
|
||||
/* Author: Rob van der Heij, 26 Apr 2019 */
|
||||
|
||||
/* Use: */
|
||||
/* PIPE (end \) < pridenp txt | w: wordfreq | cons */
|
||||
/* \ < stop_wor txt | w: */
|
||||
|
||||
|
||||
alpha = 'A-I a-i J-R j-r S-Z s-z a-i a-i j-r j-r s-z s-z'
|
||||
|
||||
'callpipe (end \ name WORDFREQ.REXX:6)',
|
||||
'\ *: ',
|
||||
'| xlate *-* 00-FF blank' alpha , /* Keep words in lower case */
|
||||
'| split ', /* .. on separate records */
|
||||
'| locate 2', /* At least 2 letters long */
|
||||
'| l: not lookup', /* Drop all stop words */
|
||||
'| sort count', /* Count the remaining words */
|
||||
'| sort 1.10 d ', /* Sort on number of hits */
|
||||
'| take 25', /* Take top-25 */
|
||||
'| spec 11-* 1 , - , nw 1.10 strip nw ', /* Make pretty layout */
|
||||
'| *:',
|
||||
'\ *.input.1: ', /* Read stop words */
|
||||
'| split ,', /* .. as one word per line */
|
||||
'| l:' /* into lookup table */
|
||||
|
||||
return rc * ( rc <> 12 )
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #6
|
||||
Style #7
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -4,5 +4,5 @@ import re, string, sys
|
||||
stops = set(open("../stop_words.txt").read().split(",") + list(string.ascii_lowercase))
|
||||
words = [x.lower() for x in re.split("[^a-zA-Z]+", open(sys.argv[1]).read()) if len(x) > 0 and x.lower() not in stops]
|
||||
unique_words = list(set(words))
|
||||
unique_words.sort(lambda x, y: cmp(words.count(y), words.count(x)))
|
||||
print "\n".join(["%s - %s" % (x, words.count(x)) for x in unique_words[:25]])
|
||||
unique_words.sort(key=lambda x: words.count(x), reverse=True)
|
||||
print("\n".join(["%s - %s" % (x, words.count(x)) for x in unique_words[:25]]))
|
||||
@@ -1 +1,2 @@
|
||||
print (reduce(lambda string, tup: string + tup[0] + ' - ' + str(tup[1]) + '\n', sorted( filter(lambda tup: tup[0] not in open(__import__('os').path.join(__import__('os').path.dirname(__file__), '..', 'stop_words.txt')).read().lower().split(','), reduce(lambda word_dict, word: word_dict if (word_dict.__setitem__(word, word_dict.get(word, 0) + 1) if True else None) else word_dict, filter(lambda word: len(word) > 1, (''.join(map(lambda letter: ' ' if ord(letter) not in set(range(ord('a'), ord('z') + 1)) else letter, open(__import__('sys').argv[1]).read().lower()))).split()), {}).iteritems()), key=lambda tup: tup[1], reverse=True)[0:25], '')) # hole in one?
|
||||
from functools import reduce
|
||||
print (reduce(lambda string, tup: string + tup[0] + ' - ' + str(tup[1]) + '\n', sorted( filter(lambda tup: tup[0] not in open(__import__('os').path.join(__import__('os').path.dirname(__file__), '..', 'stop_words.txt')).read().lower().split(','), reduce(lambda word_dict, word: word_dict if (word_dict.__setitem__(word, word_dict.get(word, 0) + 1) if True else None) else word_dict, filter(lambda word: len(word) > 1, (''.join(map(lambda letter: ' ' if ord(letter) not in set(range(ord('a'), ord('z') + 1)) else letter, open(__import__('sys').argv[1]).read().lower()))).split()), {}).items()), key=lambda tup: tup[1], reverse=True)[0:25], '')) # hole in one?
|
||||
@@ -1,11 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
# My golf score is slightly lower!
|
||||
# Best wishes, Peter Norvig
|
||||
|
||||
import re, sys, collections
|
||||
|
||||
stopwords = set(open('../stop_words.txt').read().split(','))
|
||||
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
||||
counts = collections.Counter(w for w in words if w not in stopwords)
|
||||
for (w, c) in counts.most_common(25):
|
||||
print w, '-', c
|
||||
print(w, '-', c)
|
||||
@@ -3,4 +3,4 @@ import heapq, re, sys
|
||||
|
||||
words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower())
|
||||
for w in heapq.nlargest(25, set(words) - set(open("../stop_words.txt").read().split(",")), words.count):
|
||||
print w, "-", words.count(w)
|
||||
print(w, '-', words.count(w))
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #7
|
||||
Style #8
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -2,7 +2,7 @@
|
||||
import re, sys, operator
|
||||
|
||||
# Mileage may vary. If this crashes, make it lower
|
||||
RECURSION_LIMIT = 9500
|
||||
RECURSION_LIMIT = 5000
|
||||
# We add a few more, because, contrary to the name,
|
||||
# this doesn't just rule recursion: it rules the
|
||||
# depth of the call stack
|
||||
@@ -29,16 +29,16 @@ def wf_print(wordfreq):
|
||||
return
|
||||
else:
|
||||
(w, c) = wordfreq[0]
|
||||
print w, '-', c
|
||||
print(w, '-', c)
|
||||
wf_print(wordfreq[1:])
|
||||
|
||||
stop_words = set(open('../stop_words.txt').read().split(','))
|
||||
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
||||
word_freqs = {}
|
||||
# Theoretically, we would just call count(words, word_freqs)
|
||||
# Theoretically, we would just call count(words, stop_words, word_freqs)
|
||||
# Try doing that and see what happens.
|
||||
for i in range(0, len(words), RECURSION_LIMIT):
|
||||
count(words[i:i+RECURSION_LIMIT], stop_words, word_freqs)
|
||||
|
||||
wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25])
|
||||
wf_print(sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25])
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #8
|
||||
Style #9
|
||||
==============================
|
||||
|
||||
Variation of the candy factory style, with the following additional constraints:
|
||||
@@ -21,7 +21,7 @@ def scan(str_data, func):
|
||||
|
||||
def remove_stop_words(word_list, func):
|
||||
with open('../stop_words.txt') as f:
|
||||
stop_words = f.read().split(',')
|
||||
stop_words = f.read().strip('\n').split(',')
|
||||
# add single-letter words
|
||||
stop_words.extend(list(string.ascii_lowercase))
|
||||
func([w for w in word_list if not w in stop_words], sort)
|
||||
@@ -36,11 +36,11 @@ def frequencies(word_list, func):
|
||||
func(wf, print_text)
|
||||
|
||||
def sort(wf, func):
|
||||
func(sorted(wf.iteritems(), key=operator.itemgetter(1), reverse=True), no_op)
|
||||
func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
|
||||
|
||||
def print_text(word_freqs, func):
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, "-", c
|
||||
print(w, '-', c)
|
||||
func(None)
|
||||
|
||||
def no_op(func):
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #9
|
||||
Style #10
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -13,7 +13,7 @@ class TFTheOne:
|
||||
return self
|
||||
|
||||
def printme(self):
|
||||
print self._value
|
||||
print(self._value)
|
||||
|
||||
#
|
||||
# The functions
|
||||
@@ -35,7 +35,7 @@ def scan(str_data):
|
||||
|
||||
def remove_stop_words(word_list):
|
||||
with open('../stop_words.txt') as f:
|
||||
stop_words = f.read().split(',')
|
||||
stop_words = f.read().strip('\n').split(',')
|
||||
# add single-letter words
|
||||
stop_words.extend(list(string.ascii_lowercase))
|
||||
return [w for w in word_list if not w in stop_words]
|
||||
@@ -50,7 +50,7 @@ def frequencies(word_list):
|
||||
return word_freqs
|
||||
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def top25_freqs(word_freqs):
|
||||
top25 = ""
|
||||
@@ -1,20 +0,0 @@
|
||||
Style #14
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
|
||||
- The larger problem is decomposed into 'things' that make sense for
|
||||
the problem domain
|
||||
|
||||
- Each 'thing' is a capsule of data that exposes one single procedure,
|
||||
namely the ability to receive and dispatch messages that are sent to
|
||||
it
|
||||
|
||||
- Message dispatch can result in sending the message to another capsule
|
||||
|
||||
Possible names:
|
||||
|
||||
- Letterbox
|
||||
- Messaging style
|
||||
- Objects
|
||||
- Actors
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #10
|
||||
Style #11
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -55,7 +55,7 @@ class WordFrequencyManager(TFExercise):
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def sorted(self):
|
||||
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def info(self):
|
||||
return super(WordFrequencyManager, self).info() + ": My major data structure is a " + self._word_freqs.__class__.__name__
|
||||
@@ -73,7 +73,7 @@ class WordFrequencyController(TFExercise):
|
||||
|
||||
word_freqs = self._word_freq_manager.sorted()
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
#
|
||||
# The main function
|
||||
36
12-letterbox/README.md
Normal file
36
12-letterbox/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
Style #12
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
|
||||
- The larger problem is decomposed into 'things' that make sense for
|
||||
the problem domain
|
||||
|
||||
- Each 'thing' is a capsule of data that exposes one single procedure,
|
||||
namely the ability to receive and dispatch messages that are sent to
|
||||
it
|
||||
|
||||
- Message dispatch can result in sending the message to another capsule
|
||||
|
||||
Possible names:
|
||||
|
||||
- Letterbox
|
||||
- Messaging style
|
||||
- Objects
|
||||
- Actors
|
||||
|
||||
## Style #12.2
|
||||
|
||||
Constraints:
|
||||
|
||||
- For each 'thing' class, let's bring the `info` methods from the Things style (#11)
|
||||
|
||||
- In the Things style, we used inheritance via a Python call to `super` to call a shared function from a base class
|
||||
|
||||
- We should apply this concept of code-reuse, but make sure to do so without using inheritance
|
||||
|
||||
- Just like in style 11, we will not be calling `info` in the main routine
|
||||
|
||||
Possible names:
|
||||
|
||||
- Delegation
|
||||
@@ -63,7 +63,7 @@ class WordFrequencyManager():
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def _sorted(self):
|
||||
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
class WordFrequencyController():
|
||||
|
||||
@@ -89,7 +89,7 @@ class WordFrequencyController():
|
||||
|
||||
word_freqs = self._word_freq_manager.dispatch(['sorted'])
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
#
|
||||
# The main function
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #12
|
||||
Style #13
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -34,7 +34,7 @@ stop_words_obj = {
|
||||
word_freqs_obj = {
|
||||
'freqs' : {},
|
||||
'increment_count' : lambda w : increment_count(word_freqs_obj, w),
|
||||
'sorted' : lambda : sorted(word_freqs_obj['freqs'].iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
'sorted' : lambda : sorted(word_freqs_obj['freqs'].items(), key=operator.itemgetter(1), reverse=True)
|
||||
}
|
||||
|
||||
data_storage_obj['init'](sys.argv[1])
|
||||
@@ -46,4 +46,4 @@ for w in data_storage_obj['words']():
|
||||
|
||||
word_freqs = word_freqs_obj['sorted']()
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #13
|
||||
Style #14
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -4,27 +4,24 @@ import abc, sys, re, operator, string
|
||||
#
|
||||
# The abstract things
|
||||
#
|
||||
class IDataStorage (object):
|
||||
class IDataStorage (metaclass=abc.ABCMeta):
|
||||
""" Models the contents of the file """
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def words(self):
|
||||
""" Returns the words in storage """
|
||||
pass
|
||||
|
||||
class IStopWordFilter (object):
|
||||
class IStopWordFilter (metaclass=abc.ABCMeta):
|
||||
""" Models the stop word filter """
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_stop_word(self, word):
|
||||
""" Checks whether the given word is a stop word """
|
||||
pass
|
||||
|
||||
class IWordFrequencyCounter(object):
|
||||
class IWordFrequencyCounter(metaclass=abc.ABCMeta):
|
||||
""" Keeps the word frequency data """
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def increment_count(self, word):
|
||||
@@ -71,15 +68,15 @@ class WordFrequencyManager:
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def sorted(self):
|
||||
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
|
||||
#
|
||||
# The wiring between abstract things and concrete things
|
||||
#
|
||||
IDataStorage.register(DataStorageManager)
|
||||
IStopWordFilter.register(StopWordManager)
|
||||
IWordFrequencyCounter.register(WordFrequencyManager)
|
||||
IDataStorage.register(subclass=DataStorageManager)
|
||||
IStopWordFilter.register(subclass=StopWordManager)
|
||||
IWordFrequencyCounter.register(subclass=WordFrequencyManager)
|
||||
|
||||
#
|
||||
# The application object
|
||||
@@ -97,7 +94,7 @@ class WordFrequencyController:
|
||||
|
||||
word_freqs = self._word_freq_counter.sorted()
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
#
|
||||
# The main function
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #14
|
||||
Style #15
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -87,9 +87,9 @@ class WordFrequencyCounter:
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def __print_freqs(self):
|
||||
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
#
|
||||
# The main function
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #15
|
||||
Style #16
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -77,9 +77,9 @@ class WordFrequencyCounter:
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def print_freqs(self, event):
|
||||
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
class WordFrequencyApplication:
|
||||
def __init__(self, event_manager):
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #16
|
||||
Style #17
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -34,12 +34,12 @@ def frequencies(word_list):
|
||||
|
||||
def sort(word_freq):
|
||||
# Meta-level data: locals()
|
||||
return sorted(locals()['word_freq'].iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(locals()['word_freq'].items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def main():
|
||||
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #17
|
||||
Style #18
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -21,7 +21,7 @@ def frequencies_imp(word_list):
|
||||
if len(sys.argv) > 1:
|
||||
extract_words_func = "lambda name : [x.lower() for x in re.split('[^a-zA-Z]+', open(name).read()) if len(x) > 0 and x.lower() not in stops]"
|
||||
frequencies_func = "lambda wl : frequencies_imp(wl)"
|
||||
sort_func = "lambda word_freq: sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)"
|
||||
sort_func = "lambda word_freq: sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)"
|
||||
filename = sys.argv[1]
|
||||
else:
|
||||
extract_words_func = "lambda x: []"
|
||||
@@ -44,5 +44,5 @@ exec('sort = ' + sort_func)
|
||||
word_freqs = locals()['sort'](locals()['frequencies'](locals()['extract_words'](filename)))
|
||||
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #18
|
||||
Style #19
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -24,7 +24,7 @@ def frequencies(word_list):
|
||||
return word_freqs
|
||||
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
# The side functionality
|
||||
def profile(f):
|
||||
@@ -32,7 +32,7 @@ def profile(f):
|
||||
start_time = time.time()
|
||||
ret_value = f(*arg, **kw)
|
||||
elapsed = time.time() - start_time
|
||||
print "%s(...) took %s secs" % (f.__name__, elapsed)
|
||||
print("%s(...) took %s secs" % (f.__name__, elapsed))
|
||||
return ret_value
|
||||
return profilewrapper
|
||||
|
||||
@@ -40,10 +40,10 @@ def profile(f):
|
||||
tracked_functions = [extract_words, frequencies, sort]
|
||||
# weaver
|
||||
for func in tracked_functions:
|
||||
globals()[func.func_name]=profile(func)
|
||||
globals()[func.__name__]=profile(func)
|
||||
|
||||
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
||||
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,4 +1,4 @@
|
||||
Style #19
|
||||
Style #20
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -7,5 +7,5 @@ def top25(word_list):
|
||||
word_freqs[w] += 1
|
||||
else:
|
||||
word_freqs[w] = 1
|
||||
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
|
||||
BIN
20-plugins/plugins/frequencies1.pyc
Normal file
BIN
20-plugins/plugins/frequencies1.pyc
Normal file
Binary file not shown.
BIN
20-plugins/plugins/frequencies2.pyc
Normal file
BIN
20-plugins/plugins/frequencies2.pyc
Normal file
Binary file not shown.
BIN
20-plugins/plugins/words1.pyc
Normal file
BIN
20-plugins/plugins/words1.pyc
Normal file
Binary file not shown.
BIN
20-plugins/plugins/words2.pyc
Normal file
BIN
20-plugins/plugins/words2.pyc
Normal file
Binary file not shown.
@@ -1,18 +1,18 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, ConfigParser, imp
|
||||
import sys, configparser, importlib.machinery
|
||||
|
||||
def load_plugins():
|
||||
config = ConfigParser.ConfigParser()
|
||||
config = configparser.ConfigParser()
|
||||
config.read("config.ini")
|
||||
words_plugin = config.get("Plugins", "words")
|
||||
frequencies_plugin = config.get("Plugins", "frequencies")
|
||||
global tfwords, tffreqs
|
||||
tfwords = imp.load_compiled('tfwords', words_plugin)
|
||||
tffreqs = imp.load_compiled('tffreqs', frequencies_plugin)
|
||||
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
|
||||
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
|
||||
|
||||
load_plugins()
|
||||
word_freqs = tffreqs.top25(tfwords.extract_words(sys.argv[1]))
|
||||
|
||||
for (w, c) in word_freqs:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #20
|
||||
Style #21
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -12,7 +12,7 @@ def extract_words(path_to_file):
|
||||
with open(path_to_file) as f:
|
||||
str_data = f.read()
|
||||
except IOError as e:
|
||||
print "I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror)
|
||||
print("I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror))
|
||||
return []
|
||||
|
||||
pattern = re.compile('[\W_]+')
|
||||
@@ -27,7 +27,7 @@ def remove_stop_words(word_list):
|
||||
with open('../stop_words.txt') as f:
|
||||
stop_words = f.read().split(',')
|
||||
except IOError as e:
|
||||
print "I/O error({0}) when opening ../stops_words.txt: {1}".format(e.errno, e.strerror)
|
||||
print("I/O error({0}) when opening ../stops_words.txt: {1}".format(e.errno, e.strerror))
|
||||
return word_list
|
||||
|
||||
stop_words.extend(list(string.ascii_lowercase))
|
||||
@@ -49,7 +49,7 @@ def sort(word_freq):
|
||||
if type(word_freq) is not dict or word_freq == {}:
|
||||
return []
|
||||
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
#
|
||||
# The main function
|
||||
@@ -58,5 +58,5 @@ filename = sys.argv[1] if len(sys.argv) > 1 else "../input.txt"
|
||||
word_freqs = sort(frequencies(remove_stop_words(extract_words(filename))))
|
||||
|
||||
for tf in word_freqs[0:25]:
|
||||
print tf[0], ' - ', tf[1]
|
||||
print(tf[0], '-', tf[1])
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #21
|
||||
Style #22
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -13,7 +13,7 @@ def extract_words(path_to_file):
|
||||
with open(path_to_file) as f:
|
||||
str_data = f.read()
|
||||
except IOError as e:
|
||||
print "I/O error({0}) when opening {1}: {2}! I quit!".format(e.errno, path_to_file, e.strerror)
|
||||
print("I/O error({0}) when opening {1}: {2}! I quit!".format(e.errno, path_to_file, e.strerror))
|
||||
raise e
|
||||
|
||||
pattern = re.compile('[\W_]+')
|
||||
@@ -27,7 +27,7 @@ def remove_stop_words(word_list):
|
||||
with open('../stop_words.txt') as f:
|
||||
stop_words = f.read().split(',')
|
||||
except IOError as e:
|
||||
print "I/O error({0}) when opening ../stops_words.txt: {1}! I quit!".format(e.errno, e.strerror)
|
||||
print("I/O error({0}) when opening ../stops_words.txt: {1}! I quit!".format(e.errno, e.strerror))
|
||||
raise e
|
||||
|
||||
stop_words.extend(list(string.ascii_lowercase))
|
||||
@@ -35,7 +35,7 @@ def remove_stop_words(word_list):
|
||||
|
||||
def frequencies(word_list):
|
||||
assert(type(word_list) is list), "I need a list!"
|
||||
assert(word_list <> []), "I need a non-empty list!"
|
||||
assert(word_list != []), "I need a non-empty list!"
|
||||
|
||||
word_freqs = {}
|
||||
for w in word_list:
|
||||
@@ -47,12 +47,12 @@ def frequencies(word_list):
|
||||
|
||||
def sort(word_freq):
|
||||
assert(type(word_freq) is dict), "I need a dictionary!"
|
||||
assert(word_freq <> {}), "I need a non-empty dictionary!"
|
||||
assert(word_freq != {}), "I need a non-empty dictionary!"
|
||||
|
||||
try:
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
except Exception as e:
|
||||
print "Sorted threw {0}: {1}".format(e)
|
||||
print("Sorted threw {0}".format(e))
|
||||
raise e
|
||||
|
||||
#
|
||||
@@ -65,8 +65,8 @@ try:
|
||||
assert(type(word_freqs) is list), "OMG! This is not a list!"
|
||||
assert(len(word_freqs) > 25), "SRSLY? Less than 25 words!"
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
except Exception as e:
|
||||
print "Something wrong: {0}".format(e)
|
||||
print("Something wrong: {0}".format(e))
|
||||
traceback.print_exc()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #22
|
||||
Style #23
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -21,9 +21,9 @@ class TFPassiveAggressive:
|
||||
|
||||
def printme(self):
|
||||
if self._e == None:
|
||||
print self._value
|
||||
print(self._value)
|
||||
else:
|
||||
print self._e, " in ", self._offending_func.__name__
|
||||
print(self._e, " in ", self._offending_func.__name__)
|
||||
|
||||
#
|
||||
# The functions
|
||||
@@ -53,7 +53,7 @@ def remove_stop_words(word_list):
|
||||
|
||||
def frequencies(word_list):
|
||||
assert(type(word_list) is list), "I need a list! I quit!"
|
||||
assert(word_list <> []), "I need a non-empty list! I quit!"
|
||||
assert(word_list != []), "I need a non-empty list! I quit!"
|
||||
|
||||
word_freqs = {}
|
||||
for w in word_list:
|
||||
@@ -65,13 +65,13 @@ def frequencies(word_list):
|
||||
|
||||
def sort(word_freqs):
|
||||
assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
|
||||
assert(word_freqs <> {}), "I need a non-empty dictionary! I quit!"
|
||||
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
|
||||
|
||||
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def top25_freqs(word_freqs):
|
||||
assert(type(word_freqs) is list), "I need a list! I quit!"
|
||||
assert(word_freqs <> {}), "I need a non-empty dictionary! I quit!"
|
||||
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
|
||||
|
||||
top25 = ""
|
||||
for tf in word_freqs[0:25]:
|
||||
@@ -25,7 +25,7 @@ def remove_stop_words(word_list):
|
||||
|
||||
def frequencies(word_list):
|
||||
assert(type(word_list) is list), "I need a list! I quit!"
|
||||
assert(word_list <> []), "I need a non-empty list! I quit!"
|
||||
assert(word_list != []), "I need a non-empty list! I quit!"
|
||||
|
||||
word_freqs = {}
|
||||
for w in word_list:
|
||||
@@ -37,9 +37,9 @@ def frequencies(word_list):
|
||||
|
||||
def sort(word_freqs):
|
||||
assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
|
||||
assert(word_freqs <> {}), "I need a non-empty dictionary! I quit!"
|
||||
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
|
||||
|
||||
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
#
|
||||
# The main function
|
||||
@@ -50,8 +50,8 @@ try:
|
||||
|
||||
assert(len(word_freqs) > 25), "OMG! Less than 25 words! I QUIT!"
|
||||
for tf in word_freqs[0:25]:
|
||||
print tf[0], ' - ', tf[1]
|
||||
print(tf[0], '-', tf[1])
|
||||
except Exception as e:
|
||||
print "Something wrong: {0}".format(e)
|
||||
print("Something wrong: {0}".format(e))
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #23
|
||||
Style #24
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -11,7 +11,7 @@ class AcceptTypes():
|
||||
def __call__(self, f):
|
||||
def wrapped_f(*args):
|
||||
for i in range(len(self._args)):
|
||||
if type(args[i]) <> self._args[i]:
|
||||
if type(args[i]) != self._args[i]:
|
||||
raise TypeError("Expecting %s got %s" % (str(self._args[i]), str(type(args[i]))))
|
||||
return f(*args)
|
||||
return wrapped_f
|
||||
@@ -41,9 +41,9 @@ def frequencies(word_list):
|
||||
|
||||
@AcceptTypes(dict)
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
print(w, '-', c)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #24
|
||||
Style #25
|
||||
==============================
|
||||
|
||||
This style is a variation of style #09, The One, with the following additional constraints:
|
||||
@@ -19,7 +19,7 @@ class TFQuarantine:
|
||||
value = lambda : None
|
||||
for func in self._funcs:
|
||||
value = func(guard_callable(value))
|
||||
print guard_callable(value)
|
||||
print(guard_callable(value))
|
||||
|
||||
#
|
||||
# The functions
|
||||
@@ -57,7 +57,7 @@ def frequencies(word_list):
|
||||
return word_freqs
|
||||
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def top25_freqs(word_freqs):
|
||||
top25 = ""
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #25
|
||||
Style #26
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -66,4 +66,4 @@ with sqlite3.connect('tf.db') as connection:
|
||||
for i in range(25):
|
||||
row = c.fetchone()
|
||||
if row != None:
|
||||
print row[0] + ' - ' + str(row[1])
|
||||
print(row[0], '-', str(row[1]))
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #26
|
||||
Style #27
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -8,18 +8,18 @@ import sys, re, itertools, operator
|
||||
all_words = [(), None]
|
||||
stop_words = [(), None]
|
||||
non_stop_words = [(), lambda : \
|
||||
map(lambda w : \
|
||||
list(map(lambda w : \
|
||||
w if w not in stop_words[0] else '',\
|
||||
all_words[0])]
|
||||
all_words[0]))]
|
||||
unique_words = [(),lambda :
|
||||
set([w for w in non_stop_words[0] if w!=''])]
|
||||
counts = [(), lambda :
|
||||
map(lambda w, word_list : word_list.count(w), \
|
||||
list(map(lambda w, word_list : word_list.count(w), \
|
||||
unique_words[0], \
|
||||
itertools.repeat(non_stop_words[0], \
|
||||
len(unique_words[0])))]
|
||||
len(unique_words[0]))))]
|
||||
sorted_data = [(), lambda : sorted(zip(list(unique_words[0]), \
|
||||
counts[0]), \
|
||||
list(counts[0])), \
|
||||
key=operator.itemgetter(1),
|
||||
reverse=True)]
|
||||
|
||||
@@ -46,4 +46,4 @@ stop_words[0] = set(open('../stop_words.txt').read().split(','))
|
||||
update()
|
||||
|
||||
for (w, c) in sorted_data[0][:25]:
|
||||
print w, '-', c
|
||||
print(w, '-', c)
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #27
|
||||
Style #28
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -1,11 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, operator, string
|
||||
import sys
|
||||
import operator
|
||||
import string
|
||||
|
||||
|
||||
def characters(filename):
|
||||
for line in open(filename):
|
||||
for c in line:
|
||||
yield c
|
||||
|
||||
|
||||
def all_words(filename):
|
||||
start_char = True
|
||||
for c in characters(filename):
|
||||
@@ -15,7 +19,8 @@ def all_words(filename):
|
||||
# We found the start of a word
|
||||
word = c.lower()
|
||||
start_char = False
|
||||
else: pass
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
if c.isalnum():
|
||||
word += c.lower()
|
||||
@@ -24,25 +29,29 @@ def all_words(filename):
|
||||
start_char = True
|
||||
yield word
|
||||
|
||||
|
||||
def non_stop_words(filename):
|
||||
stopwords = set(open('../stop_words.txt').read().split(',') + list(string.ascii_lowercase))
|
||||
stopwords = set(open(
|
||||
'../stop_words.txt').read().strip('\n').split(',') + list(string.ascii_lowercase))
|
||||
for w in all_words(filename):
|
||||
if not w in stopwords:
|
||||
yield w
|
||||
|
||||
|
||||
def count_and_sort(filename):
|
||||
freqs, i = {}, 1
|
||||
for w in non_stop_words(filename):
|
||||
freqs[w] = 1 if w not in freqs else freqs[w]+1
|
||||
if i % 5000 == 0:
|
||||
yield sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
i = i+1
|
||||
yield sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
|
||||
#
|
||||
# The main function
|
||||
#
|
||||
for word_freqs in count_and_sort(sys.argv[1]):
|
||||
print "-----------------------------"
|
||||
print("-----------------------------")
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
|
||||
print(w, '-', c)
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #28
|
||||
Style #29
|
||||
==============================
|
||||
|
||||
Similar to the letterbox style, but where the 'things' have
|
||||
@@ -2,22 +2,22 @@
|
||||
|
||||
import sys, re, operator, string
|
||||
from threading import Thread
|
||||
from Queue import Queue
|
||||
from queue import Queue
|
||||
|
||||
class ActiveWFObject(Thread):
|
||||
def __init__(self):
|
||||
Thread.__init__(self)
|
||||
self.name = str(type(self))
|
||||
self.queue = Queue()
|
||||
self._stop = False
|
||||
self._stopMe = False
|
||||
self.start()
|
||||
|
||||
def run(self):
|
||||
while not self._stop:
|
||||
while not self._stopMe:
|
||||
message = self.queue.get()
|
||||
self._dispatch(message)
|
||||
if message[0] == 'die':
|
||||
self._stop = True
|
||||
self._stopMe = True
|
||||
|
||||
def send(receiver, message):
|
||||
receiver.queue.put(message)
|
||||
@@ -94,7 +94,7 @@ class WordFrequencyManager(ActiveWFObject):
|
||||
|
||||
def _top25(self, message):
|
||||
recipient = message[0]
|
||||
freqs_sorted = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
freqs_sorted = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
send(recipient, ['top25', freqs_sorted])
|
||||
|
||||
class WordFrequencyController(ActiveWFObject):
|
||||
@@ -114,9 +114,9 @@ class WordFrequencyController(ActiveWFObject):
|
||||
def _display(self, message):
|
||||
word_freqs = message[0]
|
||||
for (w, f) in word_freqs[0:25]:
|
||||
print w, ' - ', f
|
||||
print(w, '-', f)
|
||||
send(self._storage_manager, ['die'])
|
||||
self._stop = True
|
||||
self._stopMe = True
|
||||
|
||||
#
|
||||
# The main function
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #29
|
||||
Style #30
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
import re, sys, operator, Queue, threading
|
||||
import re, sys, operator, queue, threading
|
||||
|
||||
# Two data spaces
|
||||
word_space = Queue.Queue()
|
||||
freq_space = Queue.Queue()
|
||||
word_space = queue.Queue()
|
||||
freq_space = queue.Queue()
|
||||
|
||||
stopwords = set(open('../stop_words.txt').read().split(','))
|
||||
|
||||
@@ -14,7 +14,7 @@ def process_words():
|
||||
while True:
|
||||
try:
|
||||
word = word_space.get(timeout=1)
|
||||
except Queue.Empty:
|
||||
except queue.Empty:
|
||||
break
|
||||
if not word in stopwords:
|
||||
if word in word_freqs:
|
||||
@@ -41,12 +41,12 @@ for i in range(5):
|
||||
word_freqs = {}
|
||||
while not freq_space.empty():
|
||||
freqs = freq_space.get()
|
||||
for (k, v) in freqs.iteritems():
|
||||
for (k, v) in freqs.items():
|
||||
if k in word_freqs:
|
||||
count = sum(item[k] for item in [freqs, word_freqs])
|
||||
else:
|
||||
count = freqs[k]
|
||||
word_freqs[k] = count
|
||||
|
||||
for (w, c) in sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]:
|
||||
print w, '-', c
|
||||
for (w, c) in sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]:
|
||||
print(w, '-', c)
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #30
|
||||
Style #31
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, re, operator, string
|
||||
|
||||
from functools import reduce
|
||||
#
|
||||
# Functions for map reduce
|
||||
#
|
||||
@@ -10,7 +10,7 @@ def partition(data_str, nlines):
|
||||
into chunks of nlines.
|
||||
"""
|
||||
lines = data_str.split('\n')
|
||||
for i in xrange(0, len(lines), nlines):
|
||||
for i in range(0, len(lines), nlines):
|
||||
yield '\n'.join(lines[i:i+nlines])
|
||||
|
||||
def split_words(data_str):
|
||||
@@ -43,12 +43,13 @@ def count_words(pairs_list_1, pairs_list_2):
|
||||
and returns a list of pairs [(w1, frequency), ...],
|
||||
where frequency is the sum of all the reported occurrences
|
||||
"""
|
||||
mapping = dict((k, v) for k, v in pairs_list_1)
|
||||
for p in pairs_list_2:
|
||||
mapping = {}
|
||||
for pl in [pairs_list_1, pairs_list_2]:
|
||||
for p in pl:
|
||||
if p[0] in mapping:
|
||||
mapping[p[0]] += p[1]
|
||||
else:
|
||||
mapping[p[0]] = 1
|
||||
mapping[p[0]] = p[1]
|
||||
return mapping.items()
|
||||
|
||||
#
|
||||
@@ -66,9 +67,7 @@ def sort(word_freq):
|
||||
# The main function
|
||||
#
|
||||
splits = map(split_words, partition(read_file(sys.argv[1]), 200))
|
||||
splits.insert(0, []) # Normalize input to reduce
|
||||
word_freqs = sort(reduce(count_words, splits))
|
||||
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
|
||||
print(w, '-', c)
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #31
|
||||
Style #32
|
||||
==============================
|
||||
|
||||
Very similar to style #30, but with an additional twist
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, re, operator, string
|
||||
|
||||
from functools import reduce
|
||||
#
|
||||
# Functions for map reduce
|
||||
#
|
||||
@@ -10,7 +10,7 @@ def partition(data_str, nlines):
|
||||
into chunks of nlines.
|
||||
"""
|
||||
lines = data_str.split('\n')
|
||||
for i in xrange(0, len(lines), nlines):
|
||||
for i in range(0, len(lines), nlines):
|
||||
yield '\n'.join(lines[i:i+nlines])
|
||||
|
||||
def split_words(data_str):
|
||||
@@ -87,5 +87,4 @@ splits_per_word = regroup(splits)
|
||||
word_freqs = sort(map(count_words, splits_per_word.items()))
|
||||
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
|
||||
print(w, '-', c)
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #32
|
||||
Style #33
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
@@ -14,7 +14,7 @@ class FreqObserver(threading.Thread):
|
||||
self.daemon,self._end = True, False
|
||||
# freqs is the part of the model to be observed
|
||||
self._freqs = freqs
|
||||
self._freqs_0 = sorted(self._freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
self._freqs_0 = sorted(self._freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
self.start()
|
||||
|
||||
def run(self):
|
||||
@@ -28,7 +28,7 @@ class FreqObserver(threading.Thread):
|
||||
|
||||
def _update_view(self):
|
||||
lock.acquire()
|
||||
freqs_1 = sorted(self._freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
freqs_1 = sorted(self._freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
lock.release()
|
||||
if (freqs_1 != self._freqs_0):
|
||||
self._update_display(freqs_1)
|
||||
@@ -38,7 +38,7 @@ class FreqObserver(threading.Thread):
|
||||
def refresh_screen(data):
|
||||
# clear screen
|
||||
cls()
|
||||
print data
|
||||
print(data)
|
||||
sys.stdout.flush()
|
||||
|
||||
data_str = ""
|
||||
@@ -57,7 +57,7 @@ class WordsCounter:
|
||||
for line in f:
|
||||
yield [w for w in re.findall('[a-z]{2,}', line.lower()) if w not in stopwords]
|
||||
|
||||
words = non_stop_words().next()
|
||||
words = next(non_stop_words())
|
||||
lock.acquire()
|
||||
for w in words:
|
||||
self.freqs[w] = 1 if w not in self.freqs else self.freqs[w]+1
|
||||
@@ -66,8 +66,8 @@ class WordsCounter:
|
||||
#
|
||||
# The controller
|
||||
#
|
||||
print "Press space bar to fetch words from the file one by one"
|
||||
print "Press ESC to switch to automatic mode"
|
||||
print("Press space bar to fetch words from the file one by one")
|
||||
print("Press ESC to switch to automatic mode")
|
||||
model = WordsCounter()
|
||||
view = FreqObserver(model.freqs)
|
||||
with open(sys.argv[1]) as f:
|
||||
@@ -19,7 +19,7 @@ class WordFrequenciesModel:
|
||||
for obs in self._observers:
|
||||
obs.render()
|
||||
except IOError:
|
||||
print "File not found"
|
||||
print("File not found")
|
||||
self.freqs = {}
|
||||
|
||||
class WordFrequenciesView:
|
||||
@@ -28,9 +28,9 @@ class WordFrequenciesView:
|
||||
model.register(self)
|
||||
|
||||
def render(self):
|
||||
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
sorted_freqs = sorted(self._model.freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
for (w, c) in sorted_freqs[:25]:
|
||||
print w, '-', c
|
||||
print(w, '-', c)
|
||||
|
||||
class WordFrequencyController:
|
||||
def __init__(self, model, view):
|
||||
@@ -39,7 +39,7 @@ class WordFrequencyController:
|
||||
def run(self):
|
||||
self._model.update(sys.argv[1])
|
||||
while True:
|
||||
print "Next file: "
|
||||
print("Next file: ")
|
||||
sys.stdout.flush()
|
||||
filename = sys.stdin.readline().strip()
|
||||
self._model.update(filename)
|
||||
@@ -14,7 +14,7 @@ class WordFrequenciesModel:
|
||||
words = re.findall('[a-z]{2,}', open(path_to_file).read().lower())
|
||||
self.freqs = collections.Counter(w for w in words if w not in self.stopwords)
|
||||
except IOError:
|
||||
print "File not found"
|
||||
print("File not found")
|
||||
self.freqs = {}
|
||||
|
||||
class WordFrequenciesView:
|
||||
@@ -22,9 +22,9 @@ class WordFrequenciesView:
|
||||
self._model = model
|
||||
|
||||
def render(self):
|
||||
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
sorted_freqs = sorted(self._model.freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
for (w, c) in sorted_freqs[0:25]:
|
||||
print w, '-', c
|
||||
print(w, '-', c)
|
||||
|
||||
class WordFrequencyController:
|
||||
def __init__(self, model, view):
|
||||
@@ -33,7 +33,7 @@ class WordFrequencyController:
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
print "Next file: "
|
||||
print("Next file: ")
|
||||
sys.stdout.flush()
|
||||
filename = sys.stdin.readline().strip()
|
||||
self._model.update(filename)
|
||||
@@ -1,4 +1,4 @@
|
||||
import sys, os
|
||||
import os
|
||||
|
||||
#
|
||||
# getch in a platform-independent way
|
||||
@@ -72,4 +72,3 @@ def get_input():
|
||||
elif key == 27: # ESC
|
||||
interactive = False
|
||||
return True
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Style #33
|
||||
Style #34
|
||||
==============================
|
||||
|
||||
REST = REpresentational State Transfer (http://www.ics.uci.edu/~fielding/pubs/dissertation/top.htm)
|
||||
@@ -24,16 +24,15 @@ def upload_get_handler(args):
|
||||
return "Name of file to upload?", ["post", "file"]
|
||||
|
||||
def upload_post_handler(args):
|
||||
def create_data(filename):
|
||||
if filename in data:
|
||||
def create_data(fn):
|
||||
if fn in data:
|
||||
return
|
||||
word_freqs = {}
|
||||
with open(filename) as f:
|
||||
with open(fn) as f:
|
||||
for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]:
|
||||
word_freqs[w] = word_freqs.get(w, 0) + 1
|
||||
word_freqsl = word_freqs.items()
|
||||
word_freqsl.sort(lambda x, y: cmp(y[1], x[1]))
|
||||
data[filename] = word_freqsl
|
||||
wf = list(word_freqs.items())
|
||||
data[fn] = sorted(wf,key=lambda x: x[1],reverse=True)
|
||||
|
||||
if args == None:
|
||||
return error_state()
|
||||
@@ -41,6 +40,7 @@ def upload_post_handler(args):
|
||||
try:
|
||||
create_data(filename)
|
||||
except:
|
||||
print("Unexpected error: %s" % sys.exc_info()[0])
|
||||
return error_state()
|
||||
return word_get_handler([filename, 0])
|
||||
|
||||
@@ -81,7 +81,7 @@ def handle_request(verb, uri, args):
|
||||
|
||||
# A very simple client "browser"
|
||||
def render_and_get_input(state_representation, links):
|
||||
print state_representation
|
||||
print(state_representation)
|
||||
sys.stdout.flush()
|
||||
if type(links) is dict: # many possible next states
|
||||
input = sys.stdin.readline().strip()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user