Merge pull request #23 from hukun01/master

Little refinement for tf-32.py
This commit is contained in:
crista
2014-03-08 08:37:03 -08:00

View File

@@ -5,14 +5,14 @@ class WordFrequenciesModel:
""" Models the data. In this case, we're only interested """ Models the data. In this case, we're only interested
in words and their frequencies as an end result """ in words and their frequencies as an end result """
freqs = {} freqs = {}
stopwords = set(open('../stop_words.txt').read().split(','))
def __init__(self, path_to_file): def __init__(self, path_to_file):
self.update(path_to_file) self.update(path_to_file)
def update(self, path_to_file): def update(self, path_to_file):
try: try:
stopwords = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(path_to_file).read().lower()) words = re.findall('[a-z]{2,}', open(path_to_file).read().lower())
self.freqs = collections.Counter(w for w in words if w not in stopwords) self.freqs = collections.Counter(w for w in words if w not in self.stopwords)
except IOError: except IOError:
print "File not found" print "File not found"
self.freqs = {} self.freqs = {}
@@ -23,7 +23,7 @@ class WordFrequenciesView:
def render(self): def render(self):
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
for (w, c) in sorted_freqs[:25]: for (w, c) in sorted_freqs[0:25]:
print w, '-', c print w, '-', c
class WordFrequencyController: class WordFrequencyController: