Add a #! line at the beginning of each of the existing Python programs and change these files to be executable. This sets the stage for having the test script blindly run anything that is executable, adding support for testing many programming languages.
107 lines
3.1 KiB
Python
Executable File
107 lines
3.1 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import sys, re, operator, string
|
|
|
|
class DataStorageManager():
|
|
""" Models the contents of the file """
|
|
_data = ''
|
|
|
|
def dispatch(self, message):
|
|
if message[0] == 'init':
|
|
return self._init(message[1])
|
|
elif message[0] == 'words':
|
|
return self._words()
|
|
else:
|
|
raise Exception("Message not understood " + message[0])
|
|
|
|
def _init(self, path_to_file):
|
|
f = open(path_to_file)
|
|
self._data = f.read()
|
|
f.close()
|
|
pattern = re.compile('[\W_]+')
|
|
self._data = pattern.sub(' ', self._data).lower()
|
|
|
|
def _words(self):
|
|
"""
|
|
Returns the list words in storage
|
|
"""
|
|
data_str = ''.join(self._data)
|
|
return data_str.split()
|
|
|
|
|
|
class StopWordManager():
|
|
""" Models the stop word filter """
|
|
_stop_words = []
|
|
|
|
def dispatch(self, message):
|
|
if message[0] == 'init':
|
|
return self._init()
|
|
elif message[0] == 'is_stop_word':
|
|
return self._is_stop_word(message[1])
|
|
else:
|
|
raise Exception("Message not understood " + message[0])
|
|
|
|
def _init(self):
|
|
f = open('../stop_words.txt')
|
|
self._stop_words = f.read().split(',')
|
|
f.close()
|
|
self._stop_words.extend(list(string.ascii_lowercase))
|
|
|
|
def _is_stop_word(self, word):
|
|
return word in self._stop_words
|
|
|
|
class WordFrequencyManager():
|
|
""" Keeps the word frequency data """
|
|
_word_freqs = {}
|
|
|
|
def dispatch(self, message):
|
|
if message[0] == 'increment_count':
|
|
return self._increment_count(message[1])
|
|
elif message[0] == 'sorted':
|
|
return self._sorted()
|
|
else:
|
|
raise Exception("Message not understood " + message[0])
|
|
|
|
def _increment_count(self, word):
|
|
if word in self._word_freqs:
|
|
self._word_freqs[word] += 1
|
|
else:
|
|
self._word_freqs[word] = 1
|
|
|
|
def _sorted(self):
|
|
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
|
|
|
class WordFrequencyController():
|
|
|
|
def dispatch(self, message):
|
|
if message[0] == 'init':
|
|
return self._init(message[1])
|
|
elif message[0] == 'run':
|
|
return self._run()
|
|
else:
|
|
raise Exception("Message not understood " + message[0])
|
|
|
|
def _init(self, path_to_file):
|
|
self._storage_manager = DataStorageManager()
|
|
self._stop_word_manager = StopWordManager()
|
|
self._word_freq_manager = WordFrequencyManager()
|
|
self._storage_manager.dispatch(['init', path_to_file])
|
|
self._stop_word_manager.dispatch(['init'])
|
|
|
|
def _run(self):
|
|
for w in self._storage_manager.dispatch(['words']):
|
|
if not self._stop_word_manager.dispatch(['is_stop_word', w]):
|
|
self._word_freq_manager.dispatch(['increment_count', w])
|
|
|
|
word_freqs = self._word_freq_manager.dispatch(['sorted'])
|
|
for tf in word_freqs[0:25]:
|
|
print tf[0], ' - ', tf[1]
|
|
|
|
#
|
|
# The main function
|
|
#
|
|
wfcontroller = WordFrequencyController()
|
|
wfcontroller.dispatch(['init', sys.argv[1]])
|
|
wfcontroller.dispatch(['run'])
|
|
|