Read stopwords.txt when initialize, to avoid reading it in every update.

This commit is contained in:
hukun01
2014-02-28 19:46:06 -08:00
parent 854f78c80c
commit 5dbb552bc5

View File

@@ -5,14 +5,14 @@ class WordFrequenciesModel:
""" Models the data. In this case, we're only interested
in words and their frequencies as an end result """
freqs = {}
stopwords = set(open('../stop_words.txt').read().split(','))
def __init__(self, path_to_file):
self.update(path_to_file)
def update(self, path_to_file):
try:
stopwords = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(path_to_file).read().lower())
self.freqs = collections.Counter(w for w in words if w not in stopwords)
self.freqs = collections.Counter(w for w in words if w not in self.stopwords)
except IOError:
print "File not found"
self.freqs = {}
@@ -23,7 +23,7 @@ class WordFrequenciesView:
def render(self):
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
for (w, c) in sorted_freqs[:25]:
for (w, c) in sorted_freqs[0:25]:
print w, '-', c
class WordFrequencyController: