Files
exercises-in-programming-style/10-hollywood/tf-10.py
Bruce Adams 5749c2c50f Enhance testing: run all executables
Add a #! line at the beginning of each of the existing Python programs
and change these files to be executable. This sets the stage for having
the test script blindly run anything that is executable, adding support
for testing many programming languages.
2013-09-24 22:09:30 -04:00

106 lines
3.1 KiB
Python
Executable File

#!/usr/bin/env python
import sys, re, operator, string
#
# The "I'll call you back" Word Frequency Framework
#
class WordFrequencyFramework:
_load_event_handlers = []
_dowork_event_handlers = []
_end_event_handlers = []
def register_for_load_event(self, handler):
self._load_event_handlers.append(handler)
def register_for_dowork_event(self, handler):
self._dowork_event_handlers.append(handler)
def register_for_end_event(self, handler):
self._end_event_handlers.append(handler)
def run(self, path_to_file):
for h in self._load_event_handlers:
h(path_to_file)
for h in self._dowork_event_handlers:
h()
for h in self._end_event_handlers:
h()
#
# The entities of the application
#
class DataStorage:
""" Models the contents of the file """
_data = ''
_stop_word_filter = None
_word_event_handlers = []
def __init__(self, wfapp, stop_word_filter):
self._stop_word_filter = stop_word_filter
wfapp.register_for_load_event(self.__load)
wfapp.register_for_dowork_event(self.__produce_words)
def __load(self, path_to_file):
f = open(path_to_file)
self._data = f.read()
f.close()
pattern = re.compile('[\W_]+')
self._data = pattern.sub(' ', self._data).lower()
def __produce_words(self):
""" Iterates through the list words in storage
calling back handlers for words """
data_str = ''.join(self._data)
for w in data_str.split():
if not self._stop_word_filter.is_stop_word(w):
for h in self._word_event_handlers:
h(w)
def register_for_word_event(self, handler):
self._word_event_handlers.append(handler)
class StopWordFilter:
""" Models the stop word filter """
_stop_words = []
def __init__(self, wfapp):
wfapp.register_for_load_event(self.__load)
def __load(self, ignore):
f = open('../stop_words.txt')
self._stop_words = f.read().split(',')
f.close()
# add single-letter words
self._stop_words.extend(list(string.ascii_lowercase))
def is_stop_word(self, word):
return word in self._stop_words
class WordFrequencyCounter:
""" Keeps the word frequency data """
_word_freqs = {}
def __init__(self, wfapp, data_storage):
data_storage.register_for_word_event(self.__increment_count)
wfapp.register_for_end_event(self.__print_freqs)
def __increment_count(self, word):
if word in self._word_freqs:
self._word_freqs[word] += 1
else:
self._word_freqs[word] = 1
def __print_freqs(self):
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]
#
# The main function
#
wfapp = WordFrequencyFramework()
stop_word_filter = StopWordFilter(wfapp)
data_storage = DataStorage(wfapp, stop_word_filter)
word_freq_counter = WordFrequencyCounter(wfapp, data_storage)
wfapp.run(sys.argv[1])