45 lines
1.0 KiB
Python
Executable File
45 lines
1.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import sys, re, operator, collections
|
|
|
|
#
|
|
# Model
|
|
#
|
|
class WordFrequenciesModel:
|
|
""" Models the data. In this case, we're only interested
|
|
in words and their frequencies as an end result """
|
|
freqs = {}
|
|
def __init__(self, path_to_file):
|
|
stopwords = set(open('../stop_words.txt').read().split(','))
|
|
words = re.findall('[a-z]{2,}', open(path_to_file).read().lower())
|
|
self.freqs = collections.Counter(w for w in words if w not in stopwords)
|
|
|
|
|
|
#
|
|
# View
|
|
#
|
|
class WordFrequenciesView:
|
|
def __init__(self, model):
|
|
self._model = model
|
|
|
|
def render(self):
|
|
sorted_freqs = sorted(self._model.freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
|
for (w, c) in sorted_freqs[:25]:
|
|
print w, '-', c
|
|
|
|
#
|
|
# Controller
|
|
#
|
|
class WordFrequencyController:
|
|
def __init__(self, model, view):
|
|
self._model = model
|
|
self._view = view
|
|
view.render()
|
|
|
|
#
|
|
# Main
|
|
#
|
|
m = WordFrequenciesModel(sys.argv[1])
|
|
v = WordFrequenciesView(m)
|
|
c = WordFrequencyController(m, v)
|