Finally rewrote the code for Python3!
This commit is contained in:
@@ -1,6 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import print_function
|
||||
import sys, os, string
|
||||
|
||||
# Utility for handling the intermediate 'secondary memory'
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
# Author: Chris Meyers @
|
||||
# http://openbookproject.net/py4fun/forth/forth.html
|
||||
#
|
||||
from __future__ import print_function
|
||||
import re
|
||||
|
||||
try:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -92,7 +91,7 @@ def frequencies():
|
||||
|
||||
def sort():
|
||||
# Not in style, left as exercise
|
||||
stack.extend(sorted(stack.pop().iteritems(), key=operator.itemgetter(1)))
|
||||
stack.extend(sorted(stack.pop().items(), key=operator.itemgetter(1)))
|
||||
|
||||
# The main function
|
||||
#
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import print_function
|
||||
import sys, string
|
||||
# the global list of [word, frequency] pairs
|
||||
word_freqs = []
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, string
|
||||
|
||||
# The shared mutable data
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -59,7 +58,7 @@ def sort(word_freq):
|
||||
and returns a list of pairs where the entries are
|
||||
sorted by frequency
|
||||
"""
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def print_all(word_freqs):
|
||||
"""
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import re, string, sys
|
||||
|
||||
stops = set(open("../stop_words.txt").read().split(",") + list(string.ascii_lowercase))
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
from __future__ import print_function
|
||||
from functools import reduce
|
||||
print (reduce(lambda string, tup: string + tup[0] + ' - ' + str(tup[1]) + '\n', sorted( filter(lambda tup: tup[0] not in open(__import__('os').path.join(__import__('os').path.dirname(__file__), '..', 'stop_words.txt')).read().lower().split(','), reduce(lambda word_dict, word: word_dict if (word_dict.__setitem__(word, word_dict.get(word, 0) + 1) if True else None) else word_dict, filter(lambda word: len(word) > 1, (''.join(map(lambda letter: ' ' if ord(letter) not in set(range(ord('a'), ord('z') + 1)) else letter, open(__import__('sys').argv[1]).read().lower()))).split()), {}).iteritems()), key=lambda tup: tup[1], reverse=True)[0:25], '')) # hole in one?
|
||||
print (reduce(lambda string, tup: string + tup[0] + ' - ' + str(tup[1]) + '\n', sorted( filter(lambda tup: tup[0] not in open(__import__('os').path.join(__import__('os').path.dirname(__file__), '..', 'stop_words.txt')).read().lower().split(','), reduce(lambda word_dict, word: word_dict if (word_dict.__setitem__(word, word_dict.get(word, 0) + 1) if True else None) else word_dict, filter(lambda word: len(word) > 1, (''.join(map(lambda letter: ' ' if ord(letter) not in set(range(ord('a'), ord('z') + 1)) else letter, open(__import__('sys').argv[1]).read().lower()))).split()), {}).items()), key=lambda tup: tup[1], reverse=True)[0:25], '')) # hole in one?
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# My golf score is slightly lower!
|
||||
# Best wishes, Peter Norvig
|
||||
|
||||
from __future__ import print_function
|
||||
import re, sys, collections
|
||||
|
||||
stopwords = set(open('../stop_words.txt').read().split(','))
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import heapq, re, sys
|
||||
|
||||
words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower())
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import re, sys, operator
|
||||
|
||||
# Mileage may vary. If this crashes, make it lower
|
||||
RECURSION_LIMIT = 9500
|
||||
RECURSION_LIMIT = 5000
|
||||
# We add a few more, because, contrary to the name,
|
||||
# this doesn't just rule recursion: it rules the
|
||||
# depth of the call stack
|
||||
@@ -35,11 +34,12 @@ def wf_print(wordfreq):
|
||||
|
||||
stop_words = set(open('../stop_words.txt').read().split(','))
|
||||
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
||||
|
||||
word_freqs = {}
|
||||
# Theoretically, we would just call count(words, stop_words, word_freqs)
|
||||
# Try doing that and see what happens.
|
||||
for i in range(0, len(words), RECURSION_LIMIT):
|
||||
count(words[i:i+RECURSION_LIMIT], stop_words, word_freqs)
|
||||
|
||||
wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25])
|
||||
wf_print(sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25])
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -37,7 +36,7 @@ def frequencies(word_list, func):
|
||||
func(wf, print_text)
|
||||
|
||||
def sort(wf, func):
|
||||
func(sorted(wf.iteritems(), key=operator.itemgetter(1), reverse=True), no_op)
|
||||
func(sorted(wf.items(), key=operator.itemgetter(1), reverse=True), no_op)
|
||||
|
||||
def print_text(word_freqs, func):
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -51,7 +50,7 @@ def frequencies(word_list):
|
||||
return word_freqs
|
||||
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def top25_freqs(word_freqs):
|
||||
top25 = ""
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
from abc import ABCMeta
|
||||
|
||||
@@ -56,7 +55,7 @@ class WordFrequencyManager(TFExercise):
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def sorted(self):
|
||||
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def info(self):
|
||||
return super(WordFrequencyManager, self).info() + ": My major data structure is a " + self._word_freqs.__class__.__name__
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
class DataStorageManager():
|
||||
@@ -64,7 +63,7 @@ class WordFrequencyManager():
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def _sorted(self):
|
||||
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
class WordFrequencyController():
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
# Auxiliary functions that can't be lambdas
|
||||
@@ -35,7 +34,7 @@ stop_words_obj = {
|
||||
word_freqs_obj = {
|
||||
'freqs' : {},
|
||||
'increment_count' : lambda w : increment_count(word_freqs_obj, w),
|
||||
'sorted' : lambda : sorted(word_freqs_obj['freqs'].iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
'sorted' : lambda : sorted(word_freqs_obj['freqs'].items(), key=operator.itemgetter(1), reverse=True)
|
||||
}
|
||||
|
||||
data_storage_obj['init'](sys.argv[1])
|
||||
|
||||
@@ -1,31 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import abc, sys, re, operator, string
|
||||
|
||||
#
|
||||
# The abstract things
|
||||
#
|
||||
class IDataStorage (object):
|
||||
class IDataStorage (metaclass=abc.ABCMeta):
|
||||
""" Models the contents of the file """
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def words(self):
|
||||
""" Returns the words in storage """
|
||||
pass
|
||||
|
||||
class IStopWordFilter (object):
|
||||
class IStopWordFilter (metaclass=abc.ABCMeta):
|
||||
""" Models the stop word filter """
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_stop_word(self, word):
|
||||
""" Checks whether the given word is a stop word """
|
||||
pass
|
||||
|
||||
class IWordFrequencyCounter(object):
|
||||
class IWordFrequencyCounter(metaclass=abc.ABCMeta):
|
||||
""" Keeps the word frequency data """
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def increment_count(self, word):
|
||||
@@ -72,15 +68,15 @@ class WordFrequencyManager:
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def sorted(self):
|
||||
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
|
||||
#
|
||||
# The wiring between abstract things and concrete things
|
||||
#
|
||||
IDataStorage.register(DataStorageManager)
|
||||
IStopWordFilter.register(StopWordManager)
|
||||
IWordFrequencyCounter.register(WordFrequencyManager)
|
||||
IDataStorage.register(subclass=DataStorageManager)
|
||||
IStopWordFilter.register(subclass=StopWordManager)
|
||||
IWordFrequencyCounter.register(subclass=WordFrequencyManager)
|
||||
|
||||
#
|
||||
# The application object
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -88,7 +87,7 @@ class WordFrequencyCounter:
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def __print_freqs(self):
|
||||
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print(w, ' - ', c)
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -78,7 +77,7 @@ class WordFrequencyCounter:
|
||||
self._word_freqs[word] = 1
|
||||
|
||||
def print_freqs(self, event):
|
||||
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print(w, ' - ', c)
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string, inspect
|
||||
|
||||
def read_stop_words():
|
||||
@@ -35,7 +34,7 @@ def frequencies(word_list):
|
||||
|
||||
def sort(word_freq):
|
||||
# Meta-level data: locals()
|
||||
return sorted(locals()['word_freq'].iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(locals()['word_freq'].items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def main():
|
||||
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string, os
|
||||
|
||||
#
|
||||
@@ -22,7 +21,7 @@ def frequencies_imp(word_list):
|
||||
if len(sys.argv) > 1:
|
||||
extract_words_func = "lambda name : [x.lower() for x in re.split('[^a-zA-Z]+', open(name).read()) if len(x) > 0 and x.lower() not in stops]"
|
||||
frequencies_func = "lambda wl : frequencies_imp(wl)"
|
||||
sort_func = "lambda word_freq: sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)"
|
||||
sort_func = "lambda word_freq: sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)"
|
||||
filename = sys.argv[1]
|
||||
else:
|
||||
extract_words_func = "lambda x: []"
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string, time
|
||||
|
||||
#
|
||||
@@ -25,7 +24,7 @@ def frequencies(word_list):
|
||||
return word_freqs
|
||||
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
# The side functionality
|
||||
def profile(f):
|
||||
|
||||
@@ -7,5 +7,5 @@ def top25(word_list):
|
||||
word_freqs[w] += 1
|
||||
else:
|
||||
word_freqs[w] = 1
|
||||
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,15 +1,16 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, ConfigParser, imp
|
||||
import sys, configparser, importlib.machinery
|
||||
|
||||
def load_plugins():
|
||||
config = ConfigParser.ConfigParser()
|
||||
config = configparser.ConfigParser()
|
||||
config.read("config.ini")
|
||||
words_plugin = config.get("Plugins", "words")
|
||||
frequencies_plugin = config.get("Plugins", "frequencies")
|
||||
global tfwords, tffreqs
|
||||
tfwords = imp.load_compiled('tfwords', words_plugin)
|
||||
tffreqs = imp.load_compiled('tffreqs', frequencies_plugin)
|
||||
# tfwords = importlib.load_compiled('tfwords', words_plugin)
|
||||
# tffreqs = importlib.load_compiled('tffreqs', frequencies_plugin)
|
||||
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
|
||||
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
|
||||
|
||||
load_plugins()
|
||||
word_freqs = tffreqs.top25(tfwords.extract_words(sys.argv[1]))
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string, inspect
|
||||
|
||||
#
|
||||
@@ -50,7 +49,7 @@ def sort(word_freq):
|
||||
if type(word_freq) is not dict or word_freq == {}:
|
||||
return []
|
||||
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
#
|
||||
# The main function
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string, traceback
|
||||
|
||||
#
|
||||
@@ -51,9 +49,9 @@ def sort(word_freq):
|
||||
assert(word_freq != {}), "I need a non-empty dictionary!"
|
||||
|
||||
try:
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
except Exception as e:
|
||||
print("Sorted threw {0}: {1}".format(e))
|
||||
print("Sorted threw {0}".format(e))
|
||||
raise e
|
||||
|
||||
#
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -40,7 +39,7 @@ def sort(word_freqs):
|
||||
assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
|
||||
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
|
||||
|
||||
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
#
|
||||
# The main function
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -68,7 +67,7 @@ def sort(word_freqs):
|
||||
assert(type(word_freqs) is dict), "I need a dictionary! I quit!"
|
||||
assert(word_freqs != {}), "I need a non-empty dictionary! I quit!"
|
||||
|
||||
return sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def top25_freqs(word_freqs):
|
||||
assert(type(word_freqs) is list), "I need a list! I quit!"
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string, inspect
|
||||
|
||||
#
|
||||
@@ -42,7 +41,7 @@ def frequencies(word_list):
|
||||
|
||||
@AcceptTypes(dict)
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
@@ -58,7 +57,7 @@ def frequencies(word_list):
|
||||
return word_freqs
|
||||
|
||||
def sort(word_freq):
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
def top25_freqs(word_freqs):
|
||||
top25 = ""
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, string, sqlite3, os.path
|
||||
|
||||
#
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, itertools, operator
|
||||
|
||||
#
|
||||
@@ -9,18 +8,18 @@ import sys, re, itertools, operator
|
||||
all_words = [(), None]
|
||||
stop_words = [(), None]
|
||||
non_stop_words = [(), lambda : \
|
||||
map(lambda w : \
|
||||
list(map(lambda w : \
|
||||
w if w not in stop_words[0] else '',\
|
||||
all_words[0])]
|
||||
all_words[0]))]
|
||||
unique_words = [(),lambda :
|
||||
set([w for w in non_stop_words[0] if w!=''])]
|
||||
counts = [(), lambda :
|
||||
map(lambda w, word_list : word_list.count(w), \
|
||||
list(map(lambda w, word_list : word_list.count(w), \
|
||||
unique_words[0], \
|
||||
itertools.repeat(non_stop_words[0], \
|
||||
len(unique_words[0])))]
|
||||
len(unique_words[0]))))]
|
||||
sorted_data = [(), lambda : sorted(zip(list(unique_words[0]), \
|
||||
counts[0]), \
|
||||
list(counts[0])), \
|
||||
key=operator.itemgetter(1),
|
||||
reverse=True)]
|
||||
|
||||
@@ -39,7 +38,6 @@ def update():
|
||||
if c[1] != None:
|
||||
c[0] = c[1]()
|
||||
|
||||
|
||||
# Load the fixed data into the first 2 columns
|
||||
all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
||||
stop_words[0] = set(open('../stop_words.txt').read().split(','))
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, operator, string
|
||||
|
||||
def characters(filename):
|
||||
@@ -36,9 +35,9 @@ def count_and_sort(filename):
|
||||
for w in non_stop_words(filename):
|
||||
freqs[w] = 1 if w not in freqs else freqs[w]+1
|
||||
if i % 5000 == 0:
|
||||
yield sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
i = i+1
|
||||
yield sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
#
|
||||
# The main function
|
||||
#
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
from threading import Thread
|
||||
from Queue import Queue
|
||||
from queue import Queue
|
||||
|
||||
class ActiveWFObject(Thread):
|
||||
def __init__(self):
|
||||
@@ -95,7 +93,7 @@ class WordFrequencyManager(ActiveWFObject):
|
||||
|
||||
def _top25(self, message):
|
||||
recipient = message[0]
|
||||
freqs_sorted = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
freqs_sorted = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
send(recipient, ['top25', freqs_sorted])
|
||||
|
||||
class WordFrequencyController(ActiveWFObject):
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import re, sys, operator, Queue, threading
|
||||
import re, sys, operator, queue, threading
|
||||
|
||||
# Two data spaces
|
||||
word_space = Queue.Queue()
|
||||
freq_space = Queue.Queue()
|
||||
word_space = queue.Queue()
|
||||
freq_space = queue.Queue()
|
||||
|
||||
stopwords = set(open('../stop_words.txt').read().split(','))
|
||||
|
||||
@@ -15,7 +14,7 @@ def process_words():
|
||||
while True:
|
||||
try:
|
||||
word = word_space.get(timeout=1)
|
||||
except Queue.Empty:
|
||||
except queue.Empty:
|
||||
break
|
||||
if not word in stopwords:
|
||||
if word in word_freqs:
|
||||
@@ -42,12 +41,12 @@ for i in range(5):
|
||||
word_freqs = {}
|
||||
while not freq_space.empty():
|
||||
freqs = freq_space.get()
|
||||
for (k, v) in freqs.iteritems():
|
||||
for (k, v) in freqs.items():
|
||||
if k in word_freqs:
|
||||
count = sum(item[k] for item in [freqs, word_freqs])
|
||||
else:
|
||||
count = freqs[k]
|
||||
word_freqs[k] = count
|
||||
|
||||
for (w, c) in sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]:
|
||||
for (w, c) in sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]:
|
||||
print(w, '-', c)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
from functools import reduce
|
||||
|
||||
@@ -51,12 +50,13 @@ def count_words(pairs_list_1, pairs_list_2):
|
||||
and returns a list of pairs [(w1, frequency), ...],
|
||||
where frequency is the sum of all the reported occurrences
|
||||
"""
|
||||
mapping = dict((k, v) for k, v in pairs_list_1)
|
||||
for p in pairs_list_2:
|
||||
if p[0] in mapping:
|
||||
mapping[p[0]] += p[1]
|
||||
else:
|
||||
mapping[p[0]] = 1
|
||||
mapping = {}
|
||||
for pl in [pairs_list_1, pairs_list_2]:
|
||||
for p in pl:
|
||||
if p[0] in mapping:
|
||||
mapping[p[0]] += p[1]
|
||||
else:
|
||||
mapping[p[0]] = p[1]
|
||||
return mapping.items()
|
||||
|
||||
#
|
||||
@@ -74,7 +74,6 @@ def sort(word_freq):
|
||||
# The main function
|
||||
#
|
||||
splits = map(split_words, partition(read_file(sys.argv[1]), 200))
|
||||
splits.insert(0, []) # Normalize input to reduce
|
||||
word_freqs = sort(reduce(count_words, splits))
|
||||
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string
|
||||
from functools import reduce
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, operator, string, os, threading, re
|
||||
from util import getch, cls, get_input
|
||||
from time import sleep
|
||||
@@ -15,7 +14,7 @@ class FreqObserver(threading.Thread):
|
||||
self.daemon,self._end = True, False
|
||||
# freqs is the part of the model to be observed
|
||||
self._freqs = freqs
|
||||
self._freqs_0 = sorted(self._freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
self._freqs_0 = sorted(self._freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
self.start()
|
||||
|
||||
def run(self):
|
||||
@@ -29,7 +28,7 @@ class FreqObserver(threading.Thread):
|
||||
|
||||
def _update_view(self):
|
||||
lock.acquire()
|
||||
freqs_1 = sorted(self._freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
freqs_1 = sorted(self._freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]
|
||||
lock.release()
|
||||
if (freqs_1 != self._freqs_0):
|
||||
self._update_display(freqs_1)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, collections
|
||||
|
||||
class WordFrequenciesModel:
|
||||
@@ -29,7 +28,7 @@ class WordFrequenciesView:
|
||||
model.register(self)
|
||||
|
||||
def render(self):
|
||||
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
sorted_freqs = sorted(self._model.freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
for (w, c) in sorted_freqs[:25]:
|
||||
print(w, '-', c)
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, collections
|
||||
|
||||
class WordFrequenciesModel:
|
||||
@@ -23,7 +22,7 @@ class WordFrequenciesView:
|
||||
self._model = model
|
||||
|
||||
def render(self):
|
||||
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
sorted_freqs = sorted(self._model.freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||
for (w, c) in sorted_freqs[0:25]:
|
||||
print(w, '-', c)
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
import re, string, sys
|
||||
|
||||
with open("../stop_words.txt") as f:
|
||||
@@ -32,9 +31,8 @@ def upload_post_handler(args):
|
||||
with open(filename) as f:
|
||||
for w in [x.lower() for x in re.split("[^a-zA-Z]+", f.read()) if len(x) > 0 and x.lower() not in stops]:
|
||||
word_freqs[w] = word_freqs.get(w, 0) + 1
|
||||
word_freqsl = word_freqs.items()
|
||||
word_freqsl.sort(key=lambda x: x[1], reverse=True)
|
||||
data[filename] = word_freqsl
|
||||
word_freqsl = list(word_freqs.items())
|
||||
data[filename] = sorted(word_freqsl, key=lambda x: x[1], reverse=True)
|
||||
|
||||
if args == None:
|
||||
return error_state()
|
||||
@@ -42,6 +40,7 @@ def upload_post_handler(args):
|
||||
try:
|
||||
create_data(filename)
|
||||
except:
|
||||
print("Unexpected error: %s" % sys.exc_info()[0])
|
||||
return error_state()
|
||||
return word_get_handler([filename, 0])
|
||||
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import print_function
|
||||
import sys, re, operator, string, inspect
|
||||
|
||||
# Reusing the defensive style program to illustrate this
|
||||
@@ -63,7 +61,7 @@ def sort(word_freq):
|
||||
sorted by frequency
|
||||
"""
|
||||
if type(word_freq) is dict and word_freq != {}:
|
||||
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
return sorted(word_freq.items(), key=operator.itemgetter(1), reverse=True)
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
@@ -9,6 +9,9 @@ with the following command line:
|
||||
python tf-NN.py ../pride-and-prejudice.txt
|
||||
```
|
||||
|
||||
NOTE: the code in the master branch is written for Python 3. If you are looking
|
||||
for a version for Python 2.7, check out the 2.7 branch or the v1.0 release.
|
||||
|
||||
Explanations and historical context for all these styles can be found in the
|
||||
companion book [Exercises in Programming Style](http://www.amazon.com/Exercises-Programming-Style-Cristina-Videira/dp/1482227371/).
|
||||
|
||||
|
||||
Reference in New Issue
Block a user