From 5dbb552bc54904b1e3f4f4ac45d7192fe6107c3c Mon Sep 17 00:00:00 2001 From: hukun01 Date: Fri, 28 Feb 2014 19:46:06 -0800 Subject: [PATCH] Read stopwords.txt when initialize, to avoid reading it in every update. --- 32-trinity/tf-32.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/32-trinity/tf-32.py b/32-trinity/tf-32.py index 6152418..383ca56 100755 --- a/32-trinity/tf-32.py +++ b/32-trinity/tf-32.py @@ -5,14 +5,14 @@ class WordFrequenciesModel: """ Models the data. In this case, we're only interested in words and their frequencies as an end result """ freqs = {} + stopwords = set(open('../stop_words.txt').read().split(',')) def __init__(self, path_to_file): self.update(path_to_file) def update(self, path_to_file): try: - stopwords = set(open('../stop_words.txt').read().split(',')) words = re.findall('[a-z]{2,}', open(path_to_file).read().lower()) - self.freqs = collections.Counter(w for w in words if w not in stopwords) + self.freqs = collections.Counter(w for w in words if w not in self.stopwords) except IOError: print "File not found" self.freqs = {} @@ -23,7 +23,7 @@ class WordFrequenciesView: def render(self): sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True) - for (w, c) in sorted_freqs[:25]: + for (w, c) in sorted_freqs[0:25]: print w, '-', c class WordFrequencyController: