Read stopwords.txt when initialize, to avoid reading it in every update.

This commit is contained in:
hukun01
2014-02-28 19:46:06 -08:00
parent 854f78c80c
commit 5dbb552bc5

View File

@@ -5,14 +5,14 @@ class WordFrequenciesModel:
""" Models the data. In this case, we're only interested """ Models the data. In this case, we're only interested
in words and their frequencies as an end result """ in words and their frequencies as an end result """
freqs = {} freqs = {}
stopwords = set(open('../stop_words.txt').read().split(','))
def __init__(self, path_to_file): def __init__(self, path_to_file):
self.update(path_to_file) self.update(path_to_file)
def update(self, path_to_file): def update(self, path_to_file):
try: try:
stopwords = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(path_to_file).read().lower()) words = re.findall('[a-z]{2,}', open(path_to_file).read().lower())
self.freqs = collections.Counter(w for w in words if w not in stopwords) self.freqs = collections.Counter(w for w in words if w not in self.stopwords)
except IOError: except IOError:
print "File not found" print "File not found"
self.freqs = {} self.freqs = {}
@@ -23,7 +23,7 @@ class WordFrequenciesView:
def render(self): def render(self):
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True) sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
for (w, c) in sorted_freqs[:25]: for (w, c) in sorted_freqs[0:25]:
print w, '-', c print w, '-', c
class WordFrequencyController: class WordFrequencyController: