Read stopwords.txt when initialize, to avoid reading it in every update.
This commit is contained in:
@@ -5,14 +5,14 @@ class WordFrequenciesModel:
|
|||||||
""" Models the data. In this case, we're only interested
|
""" Models the data. In this case, we're only interested
|
||||||
in words and their frequencies as an end result """
|
in words and their frequencies as an end result """
|
||||||
freqs = {}
|
freqs = {}
|
||||||
|
stopwords = set(open('../stop_words.txt').read().split(','))
|
||||||
def __init__(self, path_to_file):
|
def __init__(self, path_to_file):
|
||||||
self.update(path_to_file)
|
self.update(path_to_file)
|
||||||
|
|
||||||
def update(self, path_to_file):
|
def update(self, path_to_file):
|
||||||
try:
|
try:
|
||||||
stopwords = set(open('../stop_words.txt').read().split(','))
|
|
||||||
words = re.findall('[a-z]{2,}', open(path_to_file).read().lower())
|
words = re.findall('[a-z]{2,}', open(path_to_file).read().lower())
|
||||||
self.freqs = collections.Counter(w for w in words if w not in stopwords)
|
self.freqs = collections.Counter(w for w in words if w not in self.stopwords)
|
||||||
except IOError:
|
except IOError:
|
||||||
print "File not found"
|
print "File not found"
|
||||||
self.freqs = {}
|
self.freqs = {}
|
||||||
@@ -23,7 +23,7 @@ class WordFrequenciesView:
|
|||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||||
for (w, c) in sorted_freqs[:25]:
|
for (w, c) in sorted_freqs[0:25]:
|
||||||
print w, '-', c
|
print w, '-', c
|
||||||
|
|
||||||
class WordFrequencyController:
|
class WordFrequencyController:
|
||||||
|
|||||||
Reference in New Issue
Block a user