Finally rewrote the code for Python3!

This commit is contained in:
Crista Lopes
2018-12-13 19:57:17 -08:00
parent 4449f0b7ba
commit ed8c3cabaa
47 changed files with 70 additions and 115 deletions

View File

@@ -1,6 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, os, string import sys, os, string
# Utility for handling the intermediate 'secondary memory' # Utility for handling the intermediate 'secondary memory'

View File

@@ -4,7 +4,6 @@
# Author: Chris Meyers @ # Author: Chris Meyers @
# http://openbookproject.net/py4fun/forth/forth.html # http://openbookproject.net/py4fun/forth/forth.html
# #
from __future__ import print_function
import re import re
try: try:

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -92,7 +91,7 @@ def frequencies():
def sort(): def sort():
# Not in style, left as exercise # Not in style, left as exercise
stack.extend(sorted(stack.pop().iteritems(), key=operator.itemgetter(1))) stack.extend(sorted(stack.pop().items(), key=operator.itemgetter(1)))
# The main function # The main function
# #

View File

@@ -1,6 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, string import sys, string
# the global list of [word, frequency] pairs # the global list of [word, frequency] pairs
word_freqs = [] word_freqs = []

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, string import sys, string
# The shared mutable data # The shared mutable data

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -59,7 +58,7 @@ def sort(word_freq):
and returns a list of pairs where the entries are and returns a list of pairs where the entries are
sorted by frequency sorted by frequency
""" """
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
def print_all(word_freqs): def print_all(word_freqs):
""" """

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import re, string, sys import re, string, sys
stops = set(open("../stop_words.txt").read().split(",") + list(string.ascii_lowercase)) stops = set(open("../stop_words.txt").read().split(",") + list(string.ascii_lowercase))

View File

@@ -1,3 +1,2 @@
from __future__ import print_function
from functools import reduce from functools import reduce
print (reduce(lambda string, tup: string + tup[0] + ' - ' + str(tup[1]) + '\n', sorted( filter(lambda tup: tup[0] not in open(__import__('os').path.join(__import__('os').path.dirname(__file__), '..', 'stop_words.txt')).read().lower().split(','), reduce(lambda word_dict, word: word_dict if (word_dict.__setitem__(word, word_dict.get(word, 0) + 1) if True else None) else word_dict, filter(lambda word: len(word) > 1, (''.join(map(lambda letter: ' ' if ord(letter) not in set(range(ord('a'), ord('z') + 1)) else letter, open(__import__('sys').argv[1]).read().lower()))).split()), {}).iteritems()), key=lambda tup: tup[1], reverse=True)[0:25], '')) # hole in one? print (reduce(lambda string, tup: string + tup[0] + ' - ' + str(tup[1]) + '\n', sorted( filter(lambda tup: tup[0] not in open(__import__('os').path.join(__import__('os').path.dirname(__file__), '..', 'stop_words.txt')).read().lower().split(','), reduce(lambda word_dict, word: word_dict if (word_dict.__setitem__(word, word_dict.get(word, 0) + 1) if True else None) else word_dict, filter(lambda word: len(word) > 1, (''.join(map(lambda letter: ' ' if ord(letter) not in set(range(ord('a'), ord('z') + 1)) else letter, open(__import__('sys').argv[1]).read().lower()))).split()), {}).items()), key=lambda tup: tup[1], reverse=True)[0:25], '')) # hole in one?

View File

@@ -1,8 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# My golf score is slightly lower! # My golf score is slightly lower!
# Best wishes, Peter Norvig # Best wishes, Peter Norvig
from __future__ import print_function
import re, sys, collections import re, sys, collections
stopwords = set(open('../stop_words.txt').read().split(',')) stopwords = set(open('../stop_words.txt').read().split(','))

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import heapq, re, sys import heapq, re, sys
words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower()) words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower())

View File

@@ -1,9 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import re, sys, operator import re, sys, operator
# Mileage may vary. If this crashes, make it lower # Mileage may vary. If this crashes, make it lower
RECURSION_LIMIT = 9500 RECURSION_LIMIT = 5000
# We add a few more, because, contrary to the name, # We add a few more, because, contrary to the name,
# this doesn't just rule recursion: it rules the # this doesn't just rule recursion: it rules the
# depth of the call stack # depth of the call stack
@@ -35,11 +34,12 @@ def wf_print(wordfreq):
stop_words = set(open('../stop_words.txt').read().split(',')) stop_words = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
word_freqs = {} word_freqs = {}
# Theoretically, we would just call count(words, stop_words, word_freqs) # Theoretically, we would just call count(words, stop_words, word_freqs)
# Try doing that and see what happens. # Try doing that and see what happens.
for i in range(0, len(words), RECURSION_LIMIT): for i in range(0, len(words), RECURSION_LIMIT):
count(words[i:i+RECURSION_LIMIT], stop_words, word_freqs) count(words[i:i+RECURSION_LIMIT], stop_words, word_freqs)
wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]) wf_print(sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25])

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -37,7 +36,7 @@ def frequencies(word_list, func):
func(wf, print_text) func(wf, print_text)
def sort(wf, func): def sort(wf, func):
func(sorted(wf.iteritems(), key=operator.itemgetter(1), reverse=True), no_op) func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
def print_text(word_freqs, func): def print_text(word_freqs, func):
for (w, c) in word_freqs[0:25]: for (w, c) in word_freqs[0:25]:

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -51,7 +50,7 @@ def frequencies(word_list):
return word_freqs return word_freqs
def sort(word_freq): def sort(word_freq):
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
def top25_freqs(word_freqs): def top25_freqs(word_freqs):
top25 = "" top25 = ""

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
from abc import ABCMeta from abc import ABCMeta
@@ -56,7 +55,7 @@ class WordFrequencyManager(TFExercise):
self._word_freqs[word] = 1 self._word_freqs[word] = 1
def sorted(self): def sorted(self):
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
def info(self): def info(self):
return super(WordFrequencyManager, self).info() + ": My major data structure is a " + self._word_freqs.__class__.__name__ return super(WordFrequencyManager, self).info() + ": My major data structure is a " + self._word_freqs.__class__.__name__

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
class DataStorageManager(): class DataStorageManager():
@@ -64,7 +63,7 @@ class WordFrequencyManager():
self._word_freqs[word] = 1 self._word_freqs[word] = 1
def _sorted(self): def _sorted(self):
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
class WordFrequencyController(): class WordFrequencyController():

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# Auxiliary functions that can't be lambdas # Auxiliary functions that can't be lambdas
@@ -35,7 +34,7 @@ stop_words_obj = {
word_freqs_obj = { word_freqs_obj = {
'freqs' : {}, 'freqs' : {},
'increment_count' : lambda w : increment_count(word_freqs_obj, w), 'increment_count' : lambda w : increment_count(word_freqs_obj, w),
'sorted' : lambda : sorted(word_freqs_obj['freqs'].iteritems(), key=operator.itemgetter(1), reverse=True) 'sorted' : lambda : sorted(word_freqs_obj['freqs'].items(), key=operator.itemgetter(1), reverse=True)
} }
data_storage_obj['init'](sys.argv[1]) data_storage_obj['init'](sys.argv[1])

View File

@@ -1,31 +1,27 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import abc, sys, re, operator, string import abc, sys, re, operator, string
# #
# The abstract things # The abstract things
# #
class IDataStorage (object): class IDataStorage (metaclass=abc.ABCMeta):
""" Models the contents of the file """ """ Models the contents of the file """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod @abc.abstractmethod
def words(self): def words(self):
""" Returns the words in storage """ """ Returns the words in storage """
pass pass
class IStopWordFilter (object): class IStopWordFilter (metaclass=abc.ABCMeta):
""" Models the stop word filter """ """ Models the stop word filter """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod @abc.abstractmethod
def is_stop_word(self, word): def is_stop_word(self, word):
""" Checks whether the given word is a stop word """ """ Checks whether the given word is a stop word """
pass pass
class IWordFrequencyCounter(object): class IWordFrequencyCounter(metaclass=abc.ABCMeta):
""" Keeps the word frequency data """ """ Keeps the word frequency data """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod @abc.abstractmethod
def increment_count(self, word): def increment_count(self, word):
@@ -72,15 +68,15 @@ class WordFrequencyManager:
self._word_freqs[word] = 1 self._word_freqs[word] = 1
def sorted(self): def sorted(self):
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
# #
# The wiring between abstract things and concrete things # The wiring between abstract things and concrete things
# #
IDataStorage.register(DataStorageManager) IDataStorage.register(subclass=DataStorageManager)
IStopWordFilter.register(StopWordManager) IStopWordFilter.register(subclass=StopWordManager)
IWordFrequencyCounter.register(WordFrequencyManager) IWordFrequencyCounter.register(subclass=WordFrequencyManager)
# #
# The application object # The application object

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -88,7 +87,7 @@ class WordFrequencyCounter:
self._word_freqs[word] = 1 self._word_freqs[word] = 1
def __print_freqs(self): def __print_freqs(self):
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True) word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
for (w, c) in word_freqs[0:25]: for (w, c) in word_freqs[0:25]:
print(w, ' - ', c) print(w, ' - ', c)

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -78,7 +77,7 @@ class WordFrequencyCounter:
self._word_freqs[word] = 1 self._word_freqs[word] = 1
def print_freqs(self, event): def print_freqs(self, event):
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True) word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
for (w, c) in word_freqs[0:25]: for (w, c) in word_freqs[0:25]:
print(w, ' - ', c) print(w, ' - ', c)

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string, inspect import sys, re, operator, string, inspect
def read_stop_words(): def read_stop_words():
@@ -35,7 +34,7 @@ def frequencies(word_list):
def sort(word_freq): def sort(word_freq):
# Meta-level data: locals() # Meta-level data: locals()
return sorted(locals()['word_freq'].iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(locals()['word_freq'].items(), key=operator.itemgetter(1), reverse=True)
def main(): def main():
word_freqs = sort(frequencies(extract_words(sys.argv[1]))) word_freqs = sort(frequencies(extract_words(sys.argv[1])))

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string, os import sys, re, operator, string, os
# #
@@ -22,7 +21,7 @@ def frequencies_imp(word_list):
if len(sys.argv) > 1: if len(sys.argv) > 1:
extract_words_func = "lambda name : [x.lower() for x in re.split('[^a-zA-Z]+', open(name).read()) if len(x) > 0 and x.lower() not in stops]" extract_words_func = "lambda name : [x.lower() for x in re.split('[^a-zA-Z]+', open(name).read()) if len(x) > 0 and x.lower() not in stops]"
frequencies_func = "lambda wl : frequencies_imp(wl)" frequencies_func = "lambda wl : frequencies_imp(wl)"
sort_func = "lambda word_freq: sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)" sort_func = "lambda word_freq: sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)"
filename = sys.argv[1] filename = sys.argv[1]
else: else:
extract_words_func = "lambda x: []" extract_words_func = "lambda x: []"

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string, time import sys, re, operator, string, time
# #
@@ -25,7 +24,7 @@ def frequencies(word_list):
return word_freqs return word_freqs
def sort(word_freq): def sort(word_freq):
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
# The side functionality # The side functionality
def profile(f): def profile(f):

View File

@@ -7,5 +7,5 @@ def top25(word_list):
word_freqs[w] += 1 word_freqs[w] += 1
else: else:
word_freqs[w] = 1 word_freqs[w] = 1
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25] return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,15 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function import sys, configparser, importlib.machinery
import sys, ConfigParser, imp
def load_plugins(): def load_plugins():
config = ConfigParser.ConfigParser() config = configparser.ConfigParser()
config.read("config.ini") config.read("config.ini")
words_plugin = config.get("Plugins", "words") words_plugin = config.get("Plugins", "words")
frequencies_plugin = config.get("Plugins", "frequencies") frequencies_plugin = config.get("Plugins", "frequencies")
global tfwords, tffreqs global tfwords, tffreqs
tfwords = imp.load_compiled('tfwords', words_plugin) # tfwords = importlib.load_compiled('tfwords', words_plugin)
tffreqs = imp.load_compiled('tffreqs', frequencies_plugin) # tffreqs = importlib.load_compiled('tffreqs', frequencies_plugin)
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
load_plugins() load_plugins()
word_freqs = tffreqs.top25(tfwords.extract_words(sys.argv[1])) word_freqs = tffreqs.top25(tfwords.extract_words(sys.argv[1]))

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string, inspect import sys, re, operator, string, inspect
# #
@@ -50,7 +49,7 @@ def sort(word_freq):
if type(word_freq) is not dict or word_freq == {}: if type(word_freq) is not dict or word_freq == {}:
return [] return []
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
# #
# The main function # The main function

View File

@@ -1,6 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string, traceback import sys, re, operator, string, traceback
# #
@@ -51,9 +49,9 @@ def sort(word_freq):
assert(word_freq != {}), "I need a non-empty dictionary!" assert(word_freq != {}), "I need a non-empty dictionary!"
try: try:
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
except Exception as e: except Exception as e:
print("Sorted threw {0}: {1}".format(e)) print("Sorted threw {0}".format(e))
raise e raise e
# #

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -40,7 +39,7 @@ def sort(word_freqs):
assert(type(word_freqs) is dict), "I need a dictionary! I quit!" assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!" assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
# #
# The main function # The main function

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -68,7 +67,7 @@ def sort(word_freqs):
assert(type(word_freqs) is dict), "I need a dictionary! I quit!" assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!" assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
def top25_freqs(word_freqs): def top25_freqs(word_freqs):
assert(type(word_freqs) is list), "I need a list! I quit!" assert(type(word_freqs) is list), "I need a list! I quit!"

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string, inspect import sys, re, operator, string, inspect
# #
@@ -42,7 +41,7 @@ def frequencies(word_list):
@AcceptTypes(dict) @AcceptTypes(dict)
def sort(word_freq): def sort(word_freq):
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
word_freqs = sort(frequencies(extract_words(sys.argv[1]))) word_freqs = sort(frequencies(extract_words(sys.argv[1])))
for (w, c) in word_freqs[0:25]: for (w, c) in word_freqs[0:25]:

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
# #
@@ -58,7 +57,7 @@ def frequencies(word_list):
return word_freqs return word_freqs
def sort(word_freq): def sort(word_freq):
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
def top25_freqs(word_freqs): def top25_freqs(word_freqs):
top25 = "" top25 = ""

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, string, sqlite3, os.path import sys, re, string, sqlite3, os.path
# #

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, itertools, operator import sys, re, itertools, operator
# #
@@ -9,18 +8,18 @@ import sys, re, itertools, operator
all_words = [(), None] all_words = [(), None]
stop_words = [(), None] stop_words = [(), None]
non_stop_words = [(), lambda : \ non_stop_words = [(), lambda : \
map(lambda w : \ list(map(lambda w : \
w if w not in stop_words[0] else '',\ w if w not in stop_words[0] else '',\
all_words[0])] all_words[0]))]
unique_words = [(),lambda : unique_words = [(),lambda :
set([w for w in non_stop_words[0] if w!=''])] set([w for w in non_stop_words[0] if w!=''])]
counts = [(), lambda : counts = [(), lambda :
map(lambda w, word_list : word_list.count(w), \ list(map(lambda w, word_list : word_list.count(w), \
unique_words[0], \ unique_words[0], \
itertools.repeat(non_stop_words[0], \ itertools.repeat(non_stop_words[0], \
len(unique_words[0])))] len(unique_words[0]))))]
sorted_data = [(), lambda : sorted(zip(list(unique_words[0]), \ sorted_data = [(), lambda : sorted(zip(list(unique_words[0]), \
counts[0]), \ list(counts[0])), \
key=operator.itemgetter(1), key=operator.itemgetter(1),
reverse=True)] reverse=True)]
@@ -39,7 +38,6 @@ def update():
if c[1] != None: if c[1] != None:
c[0] = c[1]() c[0] = c[1]()
# Load the fixed data into the first 2 columns # Load the fixed data into the first 2 columns
all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
stop_words[0] = set(open('../stop_words.txt').read().split(',')) stop_words[0] = set(open('../stop_words.txt').read().split(','))

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, operator, string import sys, operator, string
def characters(filename): def characters(filename):
@@ -36,9 +35,9 @@ def count_and_sort(filename):
for w in non_stop_words(filename): for w in non_stop_words(filename):
freqs[w] = 1 if w not in freqs else freqs[w]+1 freqs[w] = 1 if w not in freqs else freqs[w]+1
if i % 5000 == 0: if i % 5000 == 0:
yield sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True) yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
i = i+1 i = i+1
yield sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True) yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
# #
# The main function # The main function
# #

View File

@@ -1,9 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
from threading import Thread from threading import Thread
from Queue import Queue from queue import Queue
class ActiveWFObject(Thread): class ActiveWFObject(Thread):
def __init__(self): def __init__(self):
@@ -95,7 +93,7 @@ class WordFrequencyManager(ActiveWFObject):
def _top25(self, message): def _top25(self, message):
recipient = message[0] recipient = message[0]
freqs_sorted = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True) freqs_sorted = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
send(recipient, ['top25', freqs_sorted]) send(recipient, ['top25', freqs_sorted])
class WordFrequencyController(ActiveWFObject): class WordFrequencyController(ActiveWFObject):

View File

@@ -1,10 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function import re, sys, operator, queue, threading
import re, sys, operator, Queue, threading
# Two data spaces # Two data spaces
word_space = Queue.Queue() word_space = queue.Queue()
freq_space = Queue.Queue() freq_space = queue.Queue()
stopwords = set(open('../stop_words.txt').read().split(',')) stopwords = set(open('../stop_words.txt').read().split(','))
@@ -15,7 +14,7 @@ def process_words():
while True: while True:
try: try:
word = word_space.get(timeout=1) word = word_space.get(timeout=1)
except Queue.Empty: except queue.Empty:
break break
if not word in stopwords: if not word in stopwords:
if word in word_freqs: if word in word_freqs:
@@ -42,12 +41,12 @@ for i in range(5):
word_freqs = {} word_freqs = {}
while not freq_space.empty(): while not freq_space.empty():
freqs = freq_space.get() freqs = freq_space.get()
for (k, v) in freqs.iteritems(): for (k, v) in freqs.items():
if k in word_freqs: if k in word_freqs:
count = sum(item[k] for item in [freqs, word_freqs]) count = sum(item[k] for item in [freqs, word_freqs])
else: else:
count = freqs[k] count = freqs[k]
word_freqs[k] = count word_freqs[k] = count
for (w, c) in sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]: for (w, c) in sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]:
print(w, '-', c) print(w, '-', c)

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
from functools import reduce from functools import reduce
@@ -51,12 +50,13 @@ def count_words(pairs_list_1, pairs_list_2):
and returns a list of pairs [(w1, frequency), ...], and returns a list of pairs [(w1, frequency), ...],
where frequency is the sum of all the reported occurrences where frequency is the sum of all the reported occurrences
""" """
mapping = dict((k, v) for k, v in pairs_list_1) mapping = {}
for p in pairs_list_2: for pl in [pairs_list_1, pairs_list_2]:
if p[0] in mapping: for p in pl:
mapping[p[0]] += p[1] if p[0] in mapping:
else: mapping[p[0]] += p[1]
mapping[p[0]] = 1 else:
mapping[p[0]] = p[1]
return mapping.items() return mapping.items()
# #
@@ -74,7 +74,6 @@ def sort(word_freq):
# The main function # The main function
# #
splits = map(split_words, partition(read_file(sys.argv[1]), 200)) splits = map(split_words, partition(read_file(sys.argv[1]), 200))
splits.insert(0, []) # Normalize input to reduce
word_freqs = sort(reduce(count_words, splits)) word_freqs = sort(reduce(count_words, splits))
for (w, c) in word_freqs[0:25]: for (w, c) in word_freqs[0:25]:

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string import sys, re, operator, string
from functools import reduce from functools import reduce

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, operator, string, os, threading, re import sys, operator, string, os, threading, re
from util import getch, cls, get_input from util import getch, cls, get_input
from time import sleep from time import sleep
@@ -15,7 +14,7 @@ class FreqObserver(threading.Thread):
self.daemon,self._end = True, False self.daemon,self._end = True, False
# freqs is the part of the model to be observed # freqs is the part of the model to be observed
self._freqs = freqs self._freqs = freqs
self._freqs_0 = sorted(self._freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25] self._freqs_0 = sorted(self._freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
self.start() self.start()
def run(self): def run(self):
@@ -29,7 +28,7 @@ class FreqObserver(threading.Thread):
def _update_view(self): def _update_view(self):
lock.acquire() lock.acquire()
freqs_1 = sorted(self._freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25] freqs_1 = sorted(self._freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
lock.release() lock.release()
if (freqs_1 != self._freqs_0): if (freqs_1 != self._freqs_0):
self._update_display(freqs_1) self._update_display(freqs_1)

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, collections import sys, re, operator, collections
class WordFrequenciesModel: class WordFrequenciesModel:
@@ -29,7 +28,7 @@ class WordFrequenciesView:
model.register(self) model.register(self)
def render(self): def render(self):
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_freqs = sorted(self._model.freqs.items(), key=operator.itemgetter(1), reverse=True)
for (w, c) in sorted_freqs[:25]: for (w, c) in sorted_freqs[:25]:
print(w, '-', c) print(w, '-', c)

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, collections import sys, re, operator, collections
class WordFrequenciesModel: class WordFrequenciesModel:
@@ -23,7 +22,7 @@ class WordFrequenciesView:
self._model = model self._model = model
def render(self): def render(self):
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_freqs = sorted(self._model.freqs.items(), key=operator.itemgetter(1), reverse=True)
for (w, c) in sorted_freqs[0:25]: for (w, c) in sorted_freqs[0:25]:
print(w, '-', c) print(w, '-', c)

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import re, string, sys import re, string, sys
with open("../stop_words.txt") as f: with open("../stop_words.txt") as f:
@@ -32,9 +31,8 @@ def upload_post_handler(args):
with open(filename) as f: with open(filename) as f:
for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]: for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]:
word_freqs[w] = word_freqs.get(w, 0) + 1 word_freqs[w] = word_freqs.get(w, 0) + 1
word_freqsl = word_freqs.items() word_freqsl = list(word_freqs.items())
word_freqsl.sort(key=lambda x: x[1], reverse=True) data[filename] = sorted(word_freqsl, key=lambda x: x[1], reverse=True)
data[filename] = word_freqsl
if args == None: if args == None:
return error_state() return error_state()
@@ -42,6 +40,7 @@ def upload_post_handler(args):
try: try:
create_data(filename) create_data(filename)
except: except:
print("Unexpected error: %s" % sys.exc_info()[0])
return error_state() return error_state()
return word_get_handler([filename, 0]) return word_get_handler([filename, 0])

View File

@@ -1,6 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import print_function
import sys, re, operator, string, inspect import sys, re, operator, string, inspect
# Reusing the defensive style program to illustrate this # Reusing the defensive style program to illustrate this
@@ -63,7 +61,7 @@ def sort(word_freq):
sorted by frequency sorted by frequency
""" """
if type(word_freq) is dict and word_freq != {}: if type(word_freq) is dict and word_freq != {}:
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True) return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
else: else:
return [] return []

View File

@@ -9,6 +9,9 @@ with the following command line:
python tf-NN.py ../pride-and-prejudice.txt python tf-NN.py ../pride-and-prejudice.txt
``` ```
NOTE: the code in the master branch is written for Python 3. If you are looking
for a version for Python 2.7, check out the 2.7 branch or the v1.0 release.
Explanations and historical context for all these styles can be found in the Explanations and historical context for all these styles can be found in the
companion book [Exercises in Programming Style](http://www.amazon.com/Exercises-Programming-Style-Cristina-Videira/dp/1482227371/). companion book [Exercises in Programming Style](http://www.amazon.com/Exercises-Programming-Style-Cristina-Videira/dp/1482227371/).