120 Commits
v1.0 ... master

Author SHA1 Message Date
crista
01160ccb7f Update README.md 2023-04-30 14:24:17 -07:00
crista
c80a0216a5 Merge pull request #55 from Windows81/patch-1
Clarify Exercise 12.2
2023-04-30 14:22:58 -07:00
VisualPlugin
f923e1cc7e Update README.md 2023-04-30 06:01:25 +00:00
VisualPlugin
dfd2e61c50 Update README.md 2023-04-30 04:44:18 +00:00
Crista Lopes
180f942397 Renumber these files 2021-03-29 11:27:56 -07:00
Crista Lopes
5405edaf93 Move this file to the right folder 2021-03-29 11:25:51 -07:00
crista
67fdd1e97a Merge pull request #47 from rvdheij/master
Solution with REXX filter in CMS Pipelines
2021-03-29 11:19:56 -07:00
crista
185f174b6c Remove semi-colon 2021-03-29 11:18:06 -07:00
crista
9c8a7b7745 Merge pull request #51 from kdabiria/master
fixed the end of line bug
2021-03-29 11:14:43 -07:00
Kamyar Dabiri
275bb3171b fixed reading stop word yours 2021-02-16 16:51:42 -08:00
Kamyar Dabiri
9a7f21e3c0 fixed the end of line bug 2021-02-02 22:54:16 -08:00
Crista Lopes
d1b7eca7d8 Merge branch 'master' of https://github.com/crista/exercises-in-programming-style 2020-12-03 10:20:50 -08:00
Crista Lopes
e3e95f6b76 Typo 2020-12-03 10:19:46 -08:00
Crista Lopes
2b8e26b54b Added this other version for bow-tie 2020-12-03 10:19:24 -08:00
crista
18951848b3 Update README.md 2020-05-05 09:06:21 -07:00
crista
93730988b8 Update README.md 2020-05-05 09:06:08 -07:00
crista
b19ee3cd69 Update README.md 2020-05-05 09:05:54 -07:00
crista
9f4dbbe9e3 Update README.md 2020-05-05 09:05:39 -07:00
crista
e33e5bc3ed Update README.md 2020-05-05 09:05:19 -07:00
crista
c49206ae61 Update README.md 2020-05-05 09:05:06 -07:00
crista
d341e63254 Update README.md 2020-05-05 09:04:52 -07:00
crista
f810ef8723 Update README.md 2020-05-05 09:04:39 -07:00
crista
ba2286cde6 Update README.md 2020-05-05 09:04:25 -07:00
crista
44e1c052fe Update README.md 2020-05-05 09:04:12 -07:00
crista
3a4715588b Update README.md 2020-05-05 09:03:57 -07:00
crista
066563dd0b Update README.md 2020-05-05 09:03:44 -07:00
crista
7041115a5d Update README.md 2020-05-05 09:03:30 -07:00
crista
322be5928d Update README.md 2020-05-05 09:03:16 -07:00
crista
2b977aa8b9 Update README.md 2020-05-05 09:03:04 -07:00
crista
4e1fceb6c1 Update README.md 2020-05-05 09:02:51 -07:00
crista
b9a93f1c6c Update README.md 2020-05-05 09:02:38 -07:00
crista
d025aa0b6f Update README.md 2020-05-05 09:02:25 -07:00
crista
2e126939ac Update README.md 2020-05-05 09:01:57 -07:00
crista
290ec65593 Update README.md 2020-05-05 09:01:46 -07:00
crista
4f105a6236 Update README.md 2020-05-05 09:01:33 -07:00
crista
816dcd5923 Update README.md 2020-05-05 09:01:18 -07:00
crista
69f5a995e9 Update README.md 2020-05-05 09:01:05 -07:00
crista
9dabaa4b55 Update README.md 2020-05-05 09:00:48 -07:00
crista
4664602675 Update README.md 2020-05-05 09:00:35 -07:00
crista
f53a4c8523 Update README.md 2020-05-05 09:00:13 -07:00
crista
180572de7e Update README.md 2020-05-05 08:59:54 -07:00
crista
b55681f2a4 Update README.md 2020-05-04 18:05:28 -07:00
crista
c6a54ff405 Changed number 2020-04-24 07:30:41 -07:00
crista
766778a9a1 Changed number 2020-04-24 07:30:08 -07:00
crista
8a2066014f Changed number 2020-04-24 07:29:27 -07:00
Crista Lopes
454e4956e3 Shuffle things around 2020-03-08 17:33:15 -07:00
Crista Lopes
8aa9f80fc8 Adding the counter example, no learning 2020-02-27 17:10:36 -08:00
Crista Lopes
83377bf0df We don't need this extra white space 2020-01-06 17:46:01 -08:00
Crista Lopes
f33d353a69 Removed unused function 2020-01-02 17:26:43 -08:00
Crista Lopes
a0e47a7d70 Renaming to make space for bow tie 2020-01-02 14:09:19 -08:00
Crista Lopes
1a7dad48a2 Bow tie 2020-01-02 14:05:51 -08:00
Crista Lopes
0990084653 Rename these two in preparation for deletion 2020-01-02 09:59:52 -08:00
Crista Lopes
a81889f2bb Minor clean up 2020-01-02 09:57:02 -08:00
Crista Lopes
e8b48906e2 Added the learning counterpart of 37 2020-01-02 09:56:35 -08:00
Crista Lopes
ac3d348e45 Style name change 2020-01-01 15:46:01 -08:00
Crista Lopes
e0d99d546c Minor clean up 2020-01-01 15:45:18 -08:00
Crista Lopes
df289f0d2c oh look, one more style. Couldn't resist 2020-01-01 12:38:17 -08:00
Crista Lopes
a6d5ee0d13 SimpleRNN does the job equally well. 2020-01-01 12:03:23 -08:00
Crista Lopes
69e9934ae9 Minor aesthetic improvements 2019-12-31 16:52:39 -08:00
Crista Lopes
a6fc28a123 Rename this style 2019-12-31 10:18:06 -08:00
Crista Lopes
a8379345c7 Clean up 2019-12-31 10:17:12 -08:00
Crista Lopes
9c51b2fc1c This one is perfect 2019-12-31 10:13:41 -08:00
Crista Lopes
18c7135d59 This works almost perfectly, except for the first letter of the lines. 2019-12-31 10:09:48 -08:00
Crista Lopes
f4ce13c429 38 rnn 2019-12-30 19:02:08 -08:00
Crista Lopes
78df7731b3 dense, shallow, monolithic 2019-12-30 10:36:16 -08:00
Crista Lopes
40c292a4f7 Make 35 even more like 36 2019-12-28 18:24:52 -08:00
Crista Lopes
3a579e61e4 Added 36 2019-12-28 18:24:25 -08:00
Crista Lopes
8709c07dca Rename this style 2019-12-27 09:58:44 -08:00
Crista Lopes
9f76b84cff Flip two lines 2019-12-27 09:54:06 -08:00
Crista Lopes
d5c5e00adb Simplest possible thing that works for 35 2019-12-26 18:28:23 -08:00
Crista Lopes
beed9d10cd Change the name of this style to dense, because it's focusing on the dense layer 2019-12-26 16:44:21 -08:00
Crista Lopes
b855384437 Add a version of this that works on single characters, and batches on the line. It's much simpler to explain. 2019-12-26 16:42:33 -08:00
Crista Lopes
997c2827b1 Make space for more variations 2019-12-26 16:41:01 -08:00
Crista Lopes
239c19732a Renaming 2019-12-26 10:41:49 -08:00
Crista Lopes
5a2b5975c7 Some renaming 2019-12-26 10:40:03 -08:00
Crista Lopes
3412e6fe95 Minor 2019-12-26 10:36:45 -08:00
Crista Lopes
7861836070 Rename to tf-35.py 2019-12-26 10:35:28 -08:00
Crista Lopes
9549e4473e Delete this file 2019-12-26 10:34:49 -08:00
Crista Lopes
451c5d9937 Make it simpler 2019-12-26 10:34:07 -08:00
Crista Lopes
e0d8b1adc2 Added this attempt 2019-12-25 14:57:12 -08:00
Crista Lopes
7f7c4e111a Line styling 2019-12-24 07:02:45 -08:00
Crista Lopes
0520c182be Finish the last commit 2019-12-24 06:47:54 -08:00
Crista Lopes
6e1d72bd21 Tidy up, make line numbers consistent with text 2019-12-22 17:43:07 -08:00
crista
ebeba3d34c Merge pull request #50 from amcheng/master
replace list comprehension with map function
2019-12-16 15:54:38 -08:00
Crista Lopes
9acd61f160 Add this one more train/validation data generator 2019-11-26 00:56:33 -08:00
Crista Lopes
c7cb993a56 Very rough learning to count words 2019-11-26 00:38:29 -08:00
Crista Lopes
a18c4c6980 Learn to normalize characters given a line, and using the model of the no-learning version 2019-11-26 00:21:25 -08:00
Crista Lopes
e2c531fc5c Learn how to normalize one character at a time 2019-11-26 00:02:24 -08:00
Crista Lopes
de77c8a224 Rename 2019-11-26 00:02:03 -08:00
Crista Lopes
75e89c1acf Added same as no-learning but with learning. Doesn't learn. 2019-11-25 22:53:41 -08:00
Crista Lopes
a0b28a985a Simple program to normalize characters 2019-11-25 20:04:25 -08:00
Crista Lopes
659e43e92f First simple word counter. Doesn't work on pride-prejudice -- OOM. It works on half of it. 2019-11-24 19:04:13 -08:00
Alex Cheng
3250ac1188 replace list comprehension with map function 2019-11-19 19:32:40 -08:00
Crista Lopes
88bb3ee2df Better 2019-08-13 09:17:37 -07:00
Crista Lopes
d202fea352 Format to fit in 1 page 2019-08-12 20:15:33 -07:00
Crista Lopes
cc86a2cebc Rename these too 2019-08-12 19:44:32 -07:00
Crista Lopes
eb85af9982 Place the APL example in the right folder 2019-08-12 14:39:29 -07:00
Crista Lopes
61d5f74ad9 Massive renaming! 2019-08-12 14:38:16 -07:00
Crista Lopes
e6c1238a56 Better comment 2019-08-12 14:19:05 -07:00
Crista Lopes
053bec0fc1 Clean up APL code and sprinkle an example in comments 2019-08-12 14:15:10 -07:00
Crista Lopes
ae79757199 APL, baby! 2019-08-11 19:07:16 -07:00
Crista Lopes
38095fd076 Move experimental code to zothers folder 2019-08-11 08:54:20 -07:00
rvdheij
0f826ee6bc Merge pull request #1 from rvdheij/rvdheij-cms
Solution with REXX filter in CMS Pipelines
2019-04-26 16:55:53 +02:00
rvdheij
c23828777f Solution with REXX filter in CMS Pipelines
CMS Pipelines implements piping in z/VM. https://en.wikipedia.org/wiki/CMS_Pipelines
This code implements a pipeline stage that takes two input streams: the document to count, and the stop words to exclude from counting.
2019-04-26 16:54:50 +02:00
Crista Lopes
ed8c3cabaa Finally rewrote the code for Python3! 2018-12-13 19:57:17 -08:00
crista
4449f0b7ba Merge pull request #37 from cclauss/modernize-python2-code
Modernize Python 2 code to get ready for Python 3
2018-12-13 12:04:20 -08:00
crista
32af06377d Merge pull request #36 from imarudina/master
Stop words contained '\n\n' due to an empty line in stop_words.txt
2018-12-13 11:47:25 -08:00
crista
1e3ec4932a Merge pull request #39 from tingstad/patch-1
tf-03.py: unused variable
2018-12-13 11:27:48 -08:00
crista
f196134fc1 Merge pull request #40 from gqqnbig/stop
Fix TypeError: 'bool' object is not callable
2018-12-13 11:23:37 -08:00
crista
037ef4e621 Merge pull request #41 from j10sanders/patch-1
Fix function arguments in comment
2018-12-07 11:06:02 -08:00
crista
aceaf60ce1 Merge pull request #42 from ericandrewlewis/patch-1
Wrap code in a preformatted block
2018-12-07 11:05:02 -08:00
crista
18d25aa2f0 Merge branch 'master' into modernize-python2-code 2018-12-06 19:51:27 -08:00
Crista Lopes
685450ef92 Started to port the code to Python 3. Ugh! 2018-12-06 19:33:56 -08:00
Eric Lewis
f6a7e671c5 Wrap code in a preformatted block 2018-10-08 10:55:38 -04:00
Jonathan Sanders
b6d0b86e56 Fix function arguments in comment 2018-07-13 12:24:50 -04:00
Qiqi Gu
f8233e34ff Fix TypeError: 'bool' object is not callable
Thread has a method called stop, ActiveWFObject._stop will override the parent one. Therefore I have to rename _stop.
2018-07-09 09:06:50 -07:00
tingstad
53a60ea121 tf03.py: unused variable
Removed unused variable "found_at"
2018-06-17 18:07:14 +02:00
cclauss
1fa9eb3c2b Modernize Python 2 code to get ready for Python 3 2018-03-21 21:41:35 +01:00
Irina Marudina
4224ba4ba9 monolith: remove unused variable 2018-03-21 18:37:24 +02:00
Irina Marudina
c93bc2fbfd Stop words contained '\n\n' due to an empty line in stop_words.txt 2018-03-21 15:46:37 +02:00
123 changed files with 2242 additions and 260 deletions

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
import sys, os, string
# Utility for handling the intermediate 'secondary memory'
@@ -46,7 +45,7 @@ data.append(0) # data[7] is frequency
# Open the secondary memory
word_freqs = touchopen('word_freqs', 'rb+')
# Open the input file
f = open(sys.argv[1])
f = open(sys.argv[1], 'r')
# Loop over input file's lines
while True:
data[1] = [f.readline()]
@@ -71,7 +70,7 @@ while True:
if len(data[5]) >= 2 and data[5] not in data[0]:
# Let's see if it already exists
while True:
data[6] = word_freqs.readline().strip()
data[6] = str(word_freqs.readline().strip(), 'utf-8')
if data[6] == '':
break;
data[7] = int(data[6].split(',')[1])
@@ -83,10 +82,10 @@ while True:
break
if not data[4]:
word_freqs.seek(0, 1) # Needed in Windows
word_freqs.writelines("%20s,%04d\n" % (data[5], 1))
word_freqs.write(bytes("%20s,%04d\n" % (data[5], 1), 'utf-8'))
else:
word_freqs.seek(-26, 1)
word_freqs.writelines("%20s,%04d\n" % (data[5], data[7]))
word_freqs.write(bytes("%20s,%04d\n" % (data[5], data[7]), 'utf-8'))
word_freqs.seek(0,0)
# Let's reset
data[2] = None
@@ -96,7 +95,7 @@ f.close()
word_freqs.flush()
# PART 2
# Now we need to find the 25 most frequently occuring words.
# Now we need to find the 25 most frequently occurring words.
# We don't need anything from the previous values in memory
del data[:]
@@ -107,7 +106,7 @@ data.append(0) # data[26] is freq
# Loop over secondary memory file
while True:
data[25] = word_freqs.readline().strip()
data[25] = str(word_freqs.readline().strip(), 'utf-8')
if data[25] == '': # EOF
break
data[26] = int(data[25].split(',')[1]) # Read it as integer
@@ -121,6 +120,6 @@ while True:
for tf in data[0:25]: # elimination of symbol tf is exercise
if len(tf) == 2:
print tf[0], ' - ', tf[1]
print(tf[0], '-', tf[1])
# We're done
word_freqs.close()

View File

@@ -4,7 +4,12 @@
# Author: Chris Meyers @
# http://openbookproject.net/py4fun/forth/forth.html
#
import sys, re
import re
try:
raw_input # Python 2
except NameError:
raw_input = input # Python 3
ds = [] # The data stack
cStack = [] # The control struct stack
@@ -15,7 +20,7 @@ words = [] # The input stream of tokens
def main() :
while 1 :
pcode = compile() # compile/run from user
if pcode == None : print; return
if pcode == None : print(); return
execute(pcode)
#============================== Lexical Parsing
@@ -56,8 +61,8 @@ def rSwap(cod,p) : a=ds.pop(); b=ds.pop(); ds.append(a); ds.append(b)
def rDup (cod,p) : ds.append(ds[-1])
def rDrop(cod,p) : ds.pop()
def rOver(cod,p) : ds.append(ds[-2])
def rDump(cod,p) : print "ds = ", ds
def rDot (cod,p) : print ds.pop()
def rDump(cod,p) : print("ds = ", ds)
def rDot (cod,p) : print(ds.pop())
def rJmp (cod,p) : return cod[p]
def rJnz (cod,p) : return (cod[p],p+1)[ds.pop()]
def rJz (cod,p) : return (p+1,cod[p])[ds.pop()==0]

View File

@@ -91,7 +91,7 @@ def frequencies():
def sort():
# Not in style, left as exercise
stack.extend(sorted(stack.pop().iteritems(), key=operator.itemgetter(1)))
stack.extend(sorted(stack.pop().items(), key=operator.itemgetter(1)))
# The main function
#
@@ -104,7 +104,7 @@ stack.append(0)
# the last word there will be one item left
while stack[-1] < 25 and len(stack) > 1:
heap['i'] = stack.pop()
(w, f) = stack.pop(); print w, ' - ', f
(w, f) = stack.pop(); print(w, '-', f)
stack.append(heap['i']); stack.append(1)
stack.append(stack.pop() + stack.pop())

50
03-arrays/tf-03.py Normal file
View File

@@ -0,0 +1,50 @@
import sys, string
import numpy as np
# Example input: "Hello World!"
characters = np.array([' ']+list(open(sys.argv[1]).read())+[' '])
# Result: array([' ', 'H', 'e', 'l', 'l', 'o', ' ', ' ',
# 'W', 'o', 'r', 'l', 'd', '!', ' '], dtype='<U1')
# Normalize
characters[~np.char.isalpha(characters)] = ' '
characters = np.char.lower(characters)
# Result: array([' ', 'h', 'e', 'l', 'l', 'o', ' ', ' ',
# 'w', 'o', 'r', 'l', 'd', ' ', ' '], dtype='<U1')
### Split the words by finding the indices of spaces
sp = np.where(characters == ' ')
# Result: (array([ 0, 6, 7, 13, 14], dtype=int64),)
# A little trick: let's double each index, and then take pairs
sp2 = np.repeat(sp, 2)
# Result: array([ 0, 0, 6, 6, 7, 7, 13, 13, 14, 14], dtype=int64)
# Get the pairs as a 2D matrix, skip the first and the last
w_ranges = np.reshape(sp2[1:-1], (-1, 2))
# Result: array([[ 0, 6],
# [ 6, 7],
# [ 7, 13],
# [13, 14]], dtype=int64)
# Remove the indexing to the spaces themselves
w_ranges = w_ranges[np.where(w_ranges[:, 1] - w_ranges[:, 0] > 2)]
# Result: array([[ 0, 6],
# [ 7, 13]], dtype=int64)
# Voila! Words are in between spaces, given as pairs of indices
words = list(map(lambda r: characters[r[0]:r[1]], w_ranges))
# Result: [array([' ', 'h', 'e', 'l', 'l', 'o'], dtype='<U1'),
# array([' ', 'w', 'o', 'r', 'l', 'd'], dtype='<U1')]
# Let's recode the characters as strings
swords = np.array(list(map(lambda w: ''.join(w).strip(), words)))
# Result: array(['hello', 'world'], dtype='<U5')
# Next, let's remove stop words
stop_words = np.array(list(set(open('../stop_words.txt').read().split(','))))
ns_words = swords[~np.isin(swords, stop_words)]
### Finally, count the word occurrences
uniq, counts = np.unique(ns_words, axis=0, return_counts=True)
wf_sorted = sorted(zip(uniq, counts), key=lambda t: t[1], reverse=True)
for w, c in wf_sorted[:25]:
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #3
Style #4
==============================
Constraints:

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys, string
# the global list of [word, frequency] pairs
word_freqs = []
# the list of stop words
@@ -30,7 +30,6 @@ for line in open(sys.argv[1]):
if word == pair[0]:
pair[1] += 1
found = True
found_at = pair_index
break
pair_index += 1
if not found:
@@ -47,5 +46,5 @@ for line in open(sys.argv[1]):
i += 1
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]
print(tf[0], '-', tf[1])

View File

@@ -1,4 +1,4 @@
Style #4
Style #5
==============================
Constraints:

View File

@@ -70,7 +70,7 @@ def sort():
Sorts word_freqs by frequency
"""
global word_freqs
word_freqs.sort(lambda x, y: cmp(y[1], x[1]))
word_freqs.sort(key=lambda x: x[1], reverse=True)
#
@@ -84,5 +84,4 @@ frequencies()
sort()
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]
print(tf[0], '-', tf[1])

View File

@@ -1,4 +1,4 @@
Style #5
Style #6
==============================
Constraints:

View File

@@ -58,15 +58,15 @@ def sort(word_freq):
and returns a list of pairs where the entries are
sorted by frequency
"""
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
def print_all(word_freqs):
"""
Takes a list of pairs where the entries are sorted by frequency and print them recursively.
"""
if(len(word_freqs) > 0):
print word_freqs[0][0], ' - ', word_freqs[0][1]
print_all(word_freqs[1:]);
print(word_freqs[0][0], '-', word_freqs[0][1])
print_all(word_freqs[1:])
#
# The main function

26
06-pipeline/tf-06.rexx Normal file
View File

@@ -0,0 +1,26 @@
/* WORDFREQ REXX Exercises in Programming - CMS Pipelines style */
/* Author: Rob van der Heij, 26 Apr 2019 */
/* Use: */
/* PIPE (end \) < pridenp txt | w: wordfreq | cons */
/* \ < stop_wor txt | w: */
alpha = 'A-I a-i J-R j-r S-Z s-z a-i a-i j-r j-r s-z s-z'
'callpipe (end \ name WORDFREQ.REXX:6)',
'\ *: ',
'| xlate *-* 00-FF blank' alpha , /* Keep words in lower case */
'| split ', /* .. on separate records */
'| locate 2', /* At least 2 letters long */
'| l: not lookup', /* Drop all stop words */
'| sort count', /* Count the remaining words */
'| sort 1.10 d ', /* Sort on number of hits */
'| take 25', /* Take top-25 */
'| spec 11-* 1 , - , nw 1.10 strip nw ', /* Make pretty layout */
'| *:',
'\ *.input.1: ', /* Read stop words */
'| split ,', /* .. as one word per line */
'| l:' /* into lookup table */
return rc * ( rc <> 12 )

View File

@@ -1,4 +1,4 @@
Style #6
Style #7
==============================
Constraints:

View File

@@ -4,5 +4,5 @@ import re, string, sys
stops = set(open("../stop_words.txt").read().split(",") + list(string.ascii_lowercase))
words = [x.lower() for x in re.split("[^a-zA-Z]+", open(sys.argv[1]).read()) if len(x) > 0 and x.lower() not in stops]
unique_words = list(set(words))
unique_words.sort(lambda x, y: cmp(words.count(y), words.count(x)))
print "\n".join(["%s - %s" % (x, words.count(x)) for x in unique_words[:25]])
unique_words.sort(key=lambda x: words.count(x), reverse=True)
print("\n".join(["%s - %s" % (x, words.count(x)) for x in unique_words[:25]]))

View File

@@ -1 +1,2 @@
print (reduce(lambda string, tup: string + tup[0] + ' - ' + str(tup[1]) + '\n', sorted( filter(lambda tup: tup[0] not in open(__import__('os').path.join(__import__('os').path.dirname(__file__), '..', 'stop_words.txt')).read().lower().split(','), reduce(lambda word_dict, word: word_dict if (word_dict.__setitem__(word, word_dict.get(word, 0) + 1) if True else None) else word_dict, filter(lambda word: len(word) > 1, (''.join(map(lambda letter: ' ' if ord(letter) not in set(range(ord('a'), ord('z') + 1)) else letter, open(__import__('sys').argv[1]).read().lower()))).split()), {}).iteritems()), key=lambda tup: tup[1], reverse=True)[0:25], '')) # hole in one?
from functools import reduce
print (reduce(lambda string, tup: string + tup[0] + ' - ' + str(tup[1]) + '\n', sorted( filter(lambda tup: tup[0] not in open(__import__('os').path.join(__import__('os').path.dirname(__file__), '..', 'stop_words.txt')).read().lower().split(','), reduce(lambda word_dict, word: word_dict if (word_dict.__setitem__(word, word_dict.get(word, 0) + 1) if True else None) else word_dict, filter(lambda word: len(word) > 1, (''.join(map(lambda letter: ' ' if ord(letter) not in set(range(ord('a'), ord('z') + 1)) else letter, open(__import__('sys').argv[1]).read().lower()))).split()), {}).items()), key=lambda tup: tup[1], reverse=True)[0:25], '')) # hole in one?

View File

@@ -1,11 +1,10 @@
#!/usr/bin/env python
# My golf score is slightly lower!
# Best wishes, Peter Norvig
import re, sys, collections
stopwords = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
counts = collections.Counter(w for w in words if w not in stopwords)
for (w, c) in counts.most_common(25):
print w, '-', c
print(w, '-', c)

View File

@@ -3,4 +3,4 @@ import heapq, re, sys
words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower())
for w in heapq.nlargest(25, set(words) - set(open("../stop_words.txt").read().split(",")), words.count):
print w, "-", words.count(w)
print(w, '-', words.count(w))

View File

@@ -1,4 +1,4 @@
Style #7
Style #8
==============================
Constraints:

View File

@@ -2,7 +2,7 @@
import re, sys, operator
# Mileage may vary. If this crashes, make it lower
RECURSION_LIMIT = 9500
RECURSION_LIMIT = 5000
# We add a few more, because, contrary to the name,
# this doesn't just rule recursion: it rules the
# depth of the call stack
@@ -29,16 +29,16 @@ def wf_print(wordfreq):
return
else:
(w, c) = wordfreq[0]
print w, '-', c
print(w, '-', c)
wf_print(wordfreq[1:])
stop_words = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
word_freqs = {}
# Theoretically, we would just call count(words, word_freqs)
# Theoretically, we would just call count(words, stop_words, word_freqs)
# Try doing that and see what happens.
for i in range(0, len(words), RECURSION_LIMIT):
count(words[i:i+RECURSION_LIMIT], stop_words, word_freqs)
wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25])
wf_print(sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25])

View File

@@ -1,4 +1,4 @@
Style #8
Style #9
==============================
Variation of the candy factory style, with the following additional constraints:

View File

@@ -21,7 +21,7 @@ def scan(str_data, func):
def remove_stop_words(word_list, func):
with open('../stop_words.txt') as f:
stop_words = f.read().split(',')
stop_words = f.read().strip('\n').split(',')
# add single-letter words
stop_words.extend(list(string.ascii_lowercase))
func([w for w in word_list if not w in stop_words], sort)
@@ -36,11 +36,11 @@ def frequencies(word_list, func):
func(wf, print_text)
def sort(wf, func):
func(sorted(wf.iteritems(), key=operator.itemgetter(1), reverse=True), no_op)
func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
def print_text(word_freqs, func):
for (w, c) in word_freqs[0:25]:
print w, "-", c
print(w, '-', c)
func(None)
def no_op(func):

View File

@@ -1,4 +1,4 @@
Style #9
Style #10
==============================
Constraints:

View File

@@ -13,7 +13,7 @@ class TFTheOne:
return self
def printme(self):
print self._value
print(self._value)
#
# The functions
@@ -35,7 +35,7 @@ def scan(str_data):
def remove_stop_words(word_list):
with open('../stop_words.txt') as f:
stop_words = f.read().split(',')
stop_words = f.read().strip('\n').split(',')
# add single-letter words
stop_words.extend(list(string.ascii_lowercase))
return [w for w in word_list if not w in stop_words]
@@ -50,7 +50,7 @@ def frequencies(word_list):
return word_freqs
def sort(word_freq):
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
def top25_freqs(word_freqs):
top25 = ""

View File

@@ -1,20 +0,0 @@
Style #14
==============================
Constraints:
- The larger problem is decomposed into 'things' that make sense for
the problem domain
- Each 'thing' is a capsule of data that exposes one single procedure,
namely the ability to receive and dispatch messages that are sent to
it
- Message dispatch can result in sending the message to another capsule
Possible names:
- Letterbox
- Messaging style
- Objects
- Actors

View File

@@ -1,4 +1,4 @@
Style #10
Style #11
==============================
Constraints:

View File

@@ -55,7 +55,7 @@ class WordFrequencyManager(TFExercise):
self._word_freqs[word] = 1
def sorted(self):
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
def info(self):
return super(WordFrequencyManager, self).info() + ": My major data structure is a " + self._word_freqs.__class__.__name__
@@ -73,7 +73,7 @@ class WordFrequencyController(TFExercise):
word_freqs = self._word_freq_manager.sorted()
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)
#
# The main function

36
12-letterbox/README.md Normal file
View File

@@ -0,0 +1,36 @@
Style #12
==============================
Constraints:
- The larger problem is decomposed into 'things' that make sense for
the problem domain
- Each 'thing' is a capsule of data that exposes one single procedure,
namely the ability to receive and dispatch messages that are sent to
it
- Message dispatch can result in sending the message to another capsule
Possible names:
- Letterbox
- Messaging style
- Objects
- Actors
## Style #12.2
Constraints:
- For each 'thing' class, let's bring the `info` methods from the Things style (#11)
- In the Things style, we used inheritance via a Python call to `super` to call a shared function from a base class
- We should apply this concept of code-reuse, but make sure to do so without using inheritance
- Just like in style 11, we will not be calling `info` in the main routine
Possible names:
- Delegation

View File

@@ -63,7 +63,7 @@ class WordFrequencyManager():
self._word_freqs[word] = 1
def _sorted(self):
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
class WordFrequencyController():
@@ -89,7 +89,7 @@ class WordFrequencyController():
word_freqs = self._word_freq_manager.dispatch(['sorted'])
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)
#
# The main function

View File

@@ -1,4 +1,4 @@
Style #12
Style #13
==============================
Constraints:

View File

@@ -34,7 +34,7 @@ stop_words_obj = {
word_freqs_obj = {
'freqs' : {},
'increment_count' : lambda w : increment_count(word_freqs_obj, w),
'sorted' : lambda : sorted(word_freqs_obj['freqs'].iteritems(), key=operator.itemgetter(1), reverse=True)
'sorted' : lambda : sorted(word_freqs_obj['freqs'].items(), key=operator.itemgetter(1), reverse=True)
}
data_storage_obj['init'](sys.argv[1])
@@ -46,4 +46,4 @@ for w in data_storage_obj['words']():
word_freqs = word_freqs_obj['sorted']()
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #13
Style #14
==============================
Constraints:

View File

@@ -4,27 +4,24 @@ import abc, sys, re, operator, string
#
# The abstract things
#
class IDataStorage (object):
class IDataStorage (metaclass=abc.ABCMeta):
""" Models the contents of the file """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def words(self):
""" Returns the words in storage """
pass
class IStopWordFilter (object):
class IStopWordFilter (metaclass=abc.ABCMeta):
""" Models the stop word filter """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def is_stop_word(self, word):
""" Checks whether the given word is a stop word """
pass
class IWordFrequencyCounter(object):
class IWordFrequencyCounter(metaclass=abc.ABCMeta):
""" Keeps the word frequency data """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def increment_count(self, word):
@@ -71,15 +68,15 @@ class WordFrequencyManager:
self._word_freqs[word] = 1
def sorted(self):
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
#
# The wiring between abstract things and concrete things
#
IDataStorage.register(DataStorageManager)
IStopWordFilter.register(StopWordManager)
IWordFrequencyCounter.register(WordFrequencyManager)
IDataStorage.register(subclass=DataStorageManager)
IStopWordFilter.register(subclass=StopWordManager)
IWordFrequencyCounter.register(subclass=WordFrequencyManager)
#
# The application object
@@ -97,7 +94,7 @@ class WordFrequencyController:
word_freqs = self._word_freq_counter.sorted()
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)
#
# The main function

View File

@@ -1,4 +1,4 @@
Style #14
Style #15
==============================
Constraints:

View File

@@ -87,9 +87,9 @@ class WordFrequencyCounter:
self._word_freqs[word] = 1
def __print_freqs(self):
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)
#
# The main function

View File

@@ -1,4 +1,4 @@
Style #15
Style #16
==============================
Constraints:

View File

@@ -77,9 +77,9 @@ class WordFrequencyCounter:
self._word_freqs[word] = 1
def print_freqs(self, event):
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)
class WordFrequencyApplication:
def __init__(self, event_manager):

View File

@@ -1,4 +1,4 @@
Style #16
Style #17
==============================
Constraints:

View File

@@ -34,12 +34,12 @@ def frequencies(word_list):
def sort(word_freq):
# Meta-level data: locals()
return sorted(locals()['word_freq'].iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(locals()['word_freq'].items(), key=operator.itemgetter(1), reverse=True)
def main():
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)
if __name__ == "__main__":
main()

View File

@@ -1,4 +1,4 @@
Style #17
Style #18
==============================
Constraints:

View File

@@ -21,7 +21,7 @@ def frequencies_imp(word_list):
if len(sys.argv) > 1:
extract_words_func = "lambda name : [x.lower() for x in re.split('[^a-zA-Z]+', open(name).read()) if len(x) > 0 and x.lower() not in stops]"
frequencies_func = "lambda wl : frequencies_imp(wl)"
sort_func = "lambda word_freq: sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)"
sort_func = "lambda word_freq: sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)"
filename = sys.argv[1]
else:
extract_words_func = "lambda x: []"
@@ -44,5 +44,5 @@ exec('sort = ' + sort_func)
word_freqs = locals()['sort'](locals()['frequencies'](locals()['extract_words'](filename)))
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #18
Style #19
==============================
Constraints:

View File

@@ -24,7 +24,7 @@ def frequencies(word_list):
return word_freqs
def sort(word_freq):
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
# The side functionality
def profile(f):
@@ -32,7 +32,7 @@ def profile(f):
start_time = time.time()
ret_value = f(*arg, **kw)
elapsed = time.time() - start_time
print "%s(...) took %s secs" % (f.__name__, elapsed)
print("%s(...) took %s secs" % (f.__name__, elapsed))
return ret_value
return profilewrapper
@@ -40,10 +40,10 @@ def profile(f):
tracked_functions = [extract_words, frequencies, sort]
# weaver
for func in tracked_functions:
globals()[func.func_name]=profile(func)
globals()[func.__name__]=profile(func)
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,4 +1,4 @@
Style #19
Style #20
==============================
Constraints:

View File

@@ -7,5 +7,5 @@ def top25(word_list):
word_freqs[w] += 1
else:
word_freqs[w] = 1
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,18 +1,18 @@
#!/usr/bin/env python
import sys, ConfigParser, imp
import sys, configparser, importlib.machinery
def load_plugins():
config = ConfigParser.ConfigParser()
config = configparser.ConfigParser()
config.read("config.ini")
words_plugin = config.get("Plugins", "words")
frequencies_plugin = config.get("Plugins", "frequencies")
global tfwords, tffreqs
tfwords = imp.load_compiled('tfwords', words_plugin)
tffreqs = imp.load_compiled('tffreqs', frequencies_plugin)
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
load_plugins()
word_freqs = tffreqs.top25(tfwords.extract_words(sys.argv[1]))
for (w, c) in word_freqs:
print w, ' - ', c
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #20
Style #21
==============================
Constraints:

View File

@@ -12,7 +12,7 @@ def extract_words(path_to_file):
with open(path_to_file) as f:
str_data = f.read()
except IOError as e:
print "I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror)
print("I/O error({0}) when opening {1}: {2}".format(e.errno, path_to_file, e.strerror))
return []
pattern = re.compile('[\W_]+')
@@ -27,7 +27,7 @@ def remove_stop_words(word_list):
with open('../stop_words.txt') as f:
stop_words = f.read().split(',')
except IOError as e:
print "I/O error({0}) when opening ../stops_words.txt: {1}".format(e.errno, e.strerror)
print("I/O error({0}) when opening ../stops_words.txt: {1}".format(e.errno, e.strerror))
return word_list
stop_words.extend(list(string.ascii_lowercase))
@@ -49,7 +49,7 @@ def sort(word_freq):
if type(word_freq) is not dict or word_freq == {}:
return []
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
#
# The main function
@@ -58,5 +58,5 @@ filename = sys.argv[1] if len(sys.argv) > 1 else "../input.txt"
word_freqs = sort(frequencies(remove_stop_words(extract_words(filename))))
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]
print(tf[0], '-', tf[1])

View File

@@ -1,4 +1,4 @@
Style #21
Style #22
==============================
Constraints:

View File

@@ -13,7 +13,7 @@ def extract_words(path_to_file):
with open(path_to_file) as f:
str_data = f.read()
except IOError as e:
print "I/O error({0}) when opening {1}: {2}! I quit!".format(e.errno, path_to_file, e.strerror)
print("I/O error({0}) when opening {1}: {2}! I quit!".format(e.errno, path_to_file, e.strerror))
raise e
pattern = re.compile('[\W_]+')
@@ -27,7 +27,7 @@ def remove_stop_words(word_list):
with open('../stop_words.txt') as f:
stop_words = f.read().split(',')
except IOError as e:
print "I/O error({0}) when opening ../stops_words.txt: {1}! I quit!".format(e.errno, e.strerror)
print("I/O error({0}) when opening ../stops_words.txt: {1}! I quit!".format(e.errno, e.strerror))
raise e
stop_words.extend(list(string.ascii_lowercase))
@@ -35,7 +35,7 @@ def remove_stop_words(word_list):
def frequencies(word_list):
assert(type(word_list) is list), "I need a list!"
assert(word_list <> []), "I need a non-empty list!"
assert(word_list != []), "I need a non-empty list!"
word_freqs = {}
for w in word_list:
@@ -47,12 +47,12 @@ def frequencies(word_list):
def sort(word_freq):
assert(type(word_freq) is dict), "I need a dictionary!"
assert(word_freq <> {}), "I need a non-empty dictionary!"
assert(word_freq != {}), "I need a non-empty dictionary!"
try:
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
except Exception as e:
print "Sorted threw {0}: {1}".format(e)
print("Sorted threw {0}".format(e))
raise e
#
@@ -65,8 +65,8 @@ try:
assert(type(word_freqs) is list), "OMG! This is not a list!"
assert(len(word_freqs) > 25), "SRSLY? Less than 25 words!"
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)
except Exception as e:
print "Something wrong: {0}".format(e)
print("Something wrong: {0}".format(e))
traceback.print_exc()

View File

@@ -1,4 +1,4 @@
Style #22
Style #23
==============================
Constraints:

View File

@@ -21,9 +21,9 @@ class TFPassiveAggressive:
def printme(self):
if self._e == None:
print self._value
print(self._value)
else:
print self._e, " in ", self._offending_func.__name__
print(self._e, " in ", self._offending_func.__name__)
#
# The functions
@@ -53,7 +53,7 @@ def remove_stop_words(word_list):
def frequencies(word_list):
assert(type(word_list) is list), "I need a list! I quit!"
assert(word_list <> []), "I need a non-empty list! I quit!"
assert(word_list != []), "I need a non-empty list! I quit!"
word_freqs = {}
for w in word_list:
@@ -65,13 +65,13 @@ def frequencies(word_list):
def sort(word_freqs):
assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
assert(word_freqs <> {}), "I need a non-empty dictionary! I quit!"
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
def top25_freqs(word_freqs):
assert(type(word_freqs) is list), "I need a list! I quit!"
assert(word_freqs <> {}), "I need a non-empty dictionary! I quit!"
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
top25 = ""
for tf in word_freqs[0:25]:

View File

@@ -25,7 +25,7 @@ def remove_stop_words(word_list):
def frequencies(word_list):
assert(type(word_list) is list), "I need a list! I quit!"
assert(word_list <> []), "I need a non-empty list! I quit!"
assert(word_list != []), "I need a non-empty list! I quit!"
word_freqs = {}
for w in word_list:
@@ -37,9 +37,9 @@ def frequencies(word_list):
def sort(word_freqs):
assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
assert(word_freqs <> {}), "I need a non-empty dictionary! I quit!"
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
#
# The main function
@@ -50,8 +50,8 @@ try:
assert(len(word_freqs) > 25), "OMG! Less than 25 words! I QUIT!"
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]
print(tf[0], '-', tf[1])
except Exception as e:
print "Something wrong: {0}".format(e)
print("Something wrong: {0}".format(e))

View File

@@ -1,4 +1,4 @@
Style #23
Style #24
==============================
Constraints:

View File

@@ -11,7 +11,7 @@ class AcceptTypes():
def __call__(self, f):
def wrapped_f(*args):
for i in range(len(self._args)):
if type(args[i]) <> self._args[i]:
if type(args[i]) != self._args[i]:
raise TypeError("Expecting %s got %s" % (str(self._args[i]), str(type(args[i]))))
return f(*args)
return wrapped_f
@@ -41,9 +41,9 @@ def frequencies(word_list):
@AcceptTypes(dict)
def sort(word_freq):
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #24
Style #25
==============================
This style is a variation of style #09, The One, with the following additional constraints:

View File

@@ -19,7 +19,7 @@ class TFQuarantine:
value = lambda : None
for func in self._funcs:
value = func(guard_callable(value))
print guard_callable(value)
print(guard_callable(value))
#
# The functions
@@ -57,7 +57,7 @@ def frequencies(word_list):
return word_freqs
def sort(word_freq):
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
def top25_freqs(word_freqs):
top25 = ""

View File

@@ -1,4 +1,4 @@
Style #25
Style #26
==============================
Constraints:

View File

@@ -66,4 +66,4 @@ with sqlite3.connect('tf.db') as connection:
for i in range(25):
row = c.fetchone()
if row != None:
print row[0] + ' - ' + str(row[1])
print(row[0], '-', str(row[1]))

View File

@@ -1,4 +1,4 @@
Style #26
Style #27
==============================
Constraints:

View File

@@ -8,18 +8,18 @@ import sys, re, itertools, operator
all_words = [(), None]
stop_words = [(), None]
non_stop_words = [(), lambda : \
map(lambda w : \
list(map(lambda w : \
w if w not in stop_words[0] else '',\
all_words[0])]
all_words[0]))]
unique_words = [(),lambda :
set([w for w in non_stop_words[0] if w!=''])]
counts = [(), lambda :
map(lambda w, word_list : word_list.count(w), \
list(map(lambda w, word_list : word_list.count(w), \
unique_words[0], \
itertools.repeat(non_stop_words[0], \
len(unique_words[0])))]
len(unique_words[0]))))]
sorted_data = [(), lambda : sorted(zip(list(unique_words[0]), \
counts[0]), \
list(counts[0])), \
key=operator.itemgetter(1),
reverse=True)]
@@ -46,4 +46,4 @@ stop_words[0] = set(open('../stop_words.txt').read().split(','))
update()
for (w, c) in sorted_data[0][:25]:
print w, '-', c
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #27
Style #28
==============================
Constraints:

View File

@@ -1,11 +1,15 @@
#!/usr/bin/env python
import sys, operator, string
import sys
import operator
import string
def characters(filename):
for line in open(filename):
for c in line:
yield c
def all_words(filename):
start_char = True
for c in characters(filename):
@@ -15,7 +19,8 @@ def all_words(filename):
# We found the start of a word
word = c.lower()
start_char = False
else: pass
else:
pass
else:
if c.isalnum():
word += c.lower()
@@ -24,25 +29,29 @@ def all_words(filename):
start_char = True
yield word
def non_stop_words(filename):
stopwords = set(open('../stop_words.txt').read().split(',') + list(string.ascii_lowercase))
stopwords = set(open(
'../stop_words.txt').read().strip('\n').split(',') + list(string.ascii_lowercase))
for w in all_words(filename):
if not w in stopwords:
yield w
def count_and_sort(filename):
freqs, i = {}, 1
for w in non_stop_words(filename):
freqs[w] = 1 if w not in freqs else freqs[w]+1
if i % 5000 == 0:
yield sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
i = i+1
yield sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
#
# The main function
#
for word_freqs in count_and_sort(sys.argv[1]):
print "-----------------------------"
print("-----------------------------")
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #28
Style #29
==============================
Similar to the letterbox style, but where the 'things' have

View File

@@ -2,22 +2,22 @@
import sys, re, operator, string
from threading import Thread
from Queue import Queue
from queue import Queue
class ActiveWFObject(Thread):
def __init__(self):
Thread.__init__(self)
self.name = str(type(self))
self.queue = Queue()
self._stop = False
self._stopMe = False
self.start()
def run(self):
while not self._stop:
while not self._stopMe:
message = self.queue.get()
self._dispatch(message)
if message[0] == 'die':
self._stop = True
self._stopMe = True
def send(receiver, message):
receiver.queue.put(message)
@@ -94,7 +94,7 @@ class WordFrequencyManager(ActiveWFObject):
def _top25(self, message):
recipient = message[0]
freqs_sorted = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
freqs_sorted = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
send(recipient, ['top25', freqs_sorted])
class WordFrequencyController(ActiveWFObject):
@@ -114,9 +114,9 @@ class WordFrequencyController(ActiveWFObject):
def _display(self, message):
word_freqs = message[0]
for (w, f) in word_freqs[0:25]:
print w, ' - ', f
print(w, '-', f)
send(self._storage_manager, ['die'])
self._stop = True
self._stopMe = True
#
# The main function

View File

@@ -1,4 +1,4 @@
Style #29
Style #30
==============================
Constraints:

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python
import re, sys, operator, Queue, threading
import re, sys, operator, queue, threading
# Two data spaces
word_space = Queue.Queue()
freq_space = Queue.Queue()
word_space = queue.Queue()
freq_space = queue.Queue()
stopwords = set(open('../stop_words.txt').read().split(','))
@@ -14,7 +14,7 @@ def process_words():
while True:
try:
word = word_space.get(timeout=1)
except Queue.Empty:
except queue.Empty:
break
if not word in stopwords:
if word in word_freqs:
@@ -41,12 +41,12 @@ for i in range(5):
word_freqs = {}
while not freq_space.empty():
freqs = freq_space.get()
for (k, v) in freqs.iteritems():
for (k, v) in freqs.items():
if k in word_freqs:
count = sum(item[k] for item in [freqs, word_freqs])
else:
count = freqs[k]
word_freqs[k] = count
for (w, c) in sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]:
print w, '-', c
for (w, c) in sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]:
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #30
Style #31
==============================
Constraints:

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys, re, operator, string
from functools import reduce
#
# Functions for map reduce
#
@@ -10,7 +10,7 @@ def partition(data_str, nlines):
into chunks of nlines.
"""
lines = data_str.split('\n')
for i in xrange(0, len(lines), nlines):
for i in range(0, len(lines), nlines):
yield '\n'.join(lines[i:i+nlines])
def split_words(data_str):
@@ -43,12 +43,13 @@ def count_words(pairs_list_1, pairs_list_2):
and returns a list of pairs [(w1, frequency), ...],
where frequency is the sum of all the reported occurrences
"""
mapping = dict((k, v) for k, v in pairs_list_1)
for p in pairs_list_2:
mapping = {}
for pl in [pairs_list_1, pairs_list_2]:
for p in pl:
if p[0] in mapping:
mapping[p[0]] += p[1]
else:
mapping[p[0]] = 1
mapping[p[0]] = p[1]
return mapping.items()
#
@@ -66,9 +67,7 @@ def sort(word_freq):
# The main function
#
splits = map(split_words, partition(read_file(sys.argv[1]), 200))
splits.insert(0, []) # Normalize input to reduce
word_freqs = sort(reduce(count_words, splits))
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #31
Style #32
==============================
Very similar to style #30, but with an additional twist

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys, re, operator, string
from functools import reduce
#
# Functions for map reduce
#
@@ -10,7 +10,7 @@ def partition(data_str, nlines):
into chunks of nlines.
"""
lines = data_str.split('\n')
for i in xrange(0, len(lines), nlines):
for i in range(0, len(lines), nlines):
yield '\n'.join(lines[i:i+nlines])
def split_words(data_str):
@@ -87,5 +87,4 @@ splits_per_word = regroup(splits)
word_freqs = sort(map(count_words, splits_per_word.items()))
for (w, c) in word_freqs[0:25]:
print w, ' - ', c
print(w, '-', c)

View File

@@ -1,4 +1,4 @@
Style #32
Style #33
==============================
Constraints:

View File

@@ -14,7 +14,7 @@ class FreqObserver(threading.Thread):
self.daemon,self._end = True, False
# freqs is the part of the model to be observed
self._freqs = freqs
self._freqs_0 = sorted(self._freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
self._freqs_0 = sorted(self._freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
self.start()
def run(self):
@@ -28,7 +28,7 @@ class FreqObserver(threading.Thread):
def _update_view(self):
lock.acquire()
freqs_1 = sorted(self._freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
freqs_1 = sorted(self._freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
lock.release()
if (freqs_1 != self._freqs_0):
self._update_display(freqs_1)
@@ -38,7 +38,7 @@ class FreqObserver(threading.Thread):
def refresh_screen(data):
# clear screen
cls()
print data
print(data)
sys.stdout.flush()
data_str = ""
@@ -57,7 +57,7 @@ class WordsCounter:
for line in f:
yield [w for w in re.findall('[a-z]{2,}', line.lower()) if w not in stopwords]
words = non_stop_words().next()
words = next(non_stop_words())
lock.acquire()
for w in words:
self.freqs[w] = 1 if w not in self.freqs else self.freqs[w]+1
@@ -66,8 +66,8 @@ class WordsCounter:
#
# The controller
#
print "Press space bar to fetch words from the file one by one"
print "Press ESC to switch to automatic mode"
print("Press space bar to fetch words from the file one by one")
print("Press ESC to switch to automatic mode")
model = WordsCounter()
view = FreqObserver(model.freqs)
with open(sys.argv[1]) as f:

View File

@@ -19,7 +19,7 @@ class WordFrequenciesModel:
for obs in self._observers:
obs.render()
except IOError:
print "File not found"
print("File not found")
self.freqs = {}
class WordFrequenciesView:
@@ -28,9 +28,9 @@ class WordFrequenciesView:
model.register(self)
def render(self):
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
sorted_freqs = sorted(self._model.freqs.items(), key=operator.itemgetter(1), reverse=True)
for (w, c) in sorted_freqs[:25]:
print w, '-', c
print(w, '-', c)
class WordFrequencyController:
def __init__(self, model, view):
@@ -39,7 +39,7 @@ class WordFrequencyController:
def run(self):
self._model.update(sys.argv[1])
while True:
print "Next file: "
print("Next file: ")
sys.stdout.flush()
filename = sys.stdin.readline().strip()
self._model.update(filename)

View File

@@ -14,7 +14,7 @@ class WordFrequenciesModel:
words = re.findall('[a-z]{2,}', open(path_to_file).read().lower())
self.freqs = collections.Counter(w for w in words if w not in self.stopwords)
except IOError:
print "File not found"
print("File not found")
self.freqs = {}
class WordFrequenciesView:
@@ -22,9 +22,9 @@ class WordFrequenciesView:
self._model = model
def render(self):
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
sorted_freqs = sorted(self._model.freqs.items(), key=operator.itemgetter(1), reverse=True)
for (w, c) in sorted_freqs[0:25]:
print w, '-', c
print(w, '-', c)
class WordFrequencyController:
def __init__(self, model, view):
@@ -33,7 +33,7 @@ class WordFrequencyController:
def run(self):
while True:
print "Next file: "
print("Next file: ")
sys.stdout.flush()
filename = sys.stdin.readline().strip()
self._model.update(filename)

View File

@@ -1,4 +1,4 @@
import sys, os
import os
#
# getch in a platform-independent way
@@ -72,4 +72,3 @@ def get_input():
elif key == 27: # ESC
interactive = False
return True

View File

@@ -1,4 +1,4 @@
Style #33
Style #34
==============================
REST = REpresentational State Transfer (http://www.ics.uci.edu/~fielding/pubs/dissertation/top.htm)

View File

@@ -24,16 +24,15 @@ def upload_get_handler(args):
return "Name of file to upload?", ["post", "file"]
def upload_post_handler(args):
def create_data(filename):
if filename in data:
def create_data(fn):
if fn in data:
return
word_freqs = {}
with open(filename) as f:
with open(fn) as f:
for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]:
word_freqs[w] = word_freqs.get(w, 0) + 1
word_freqsl = word_freqs.items()
word_freqsl.sort(lambda x, y: cmp(y[1], x[1]))
data[filename] = word_freqsl
wf = list(word_freqs.items())
data[fn] = sorted(wf,key=lambda x: x[1],reverse=True)
if args == None:
return error_state()
@@ -41,6 +40,7 @@ def upload_post_handler(args):
try:
create_data(filename)
except:
print("Unexpected error: %s" % sys.exc_info()[0])
return error_state()
return word_get_handler([filename, 0])
@@ -81,7 +81,7 @@ def handle_request(verb, uri, args):
# A very simple client "browser"
def render_and_get_input(state_representation, links):
print state_representation
print(state_representation)
sys.stdout.flush()
if type(links) is dict: # many possible next states
input = sys.stdin.readline().strip()

Some files were not shown because too many files have changed in this diff Show More