Add a #! line at the beginning of each of the existing Python programs and change these files to be executable. This sets the stage for having the test script blindly run anything that is executable, adding support for testing many programming languages.
101 lines
2.8 KiB
Python
Executable File
101 lines
2.8 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import sys, re, operator, string
|
|
from abc import ABCMeta
|
|
|
|
#
|
|
# The classes
|
|
#
|
|
class TFExercise(object):
|
|
__metaclass__ = ABCMeta
|
|
|
|
def info(self):
|
|
return self.__class__.__name__ + ": No major data structure"
|
|
|
|
class DataStorageManager(TFExercise):
|
|
""" Models the contents of the file """
|
|
_data = ''
|
|
def __init__(self, path_to_file):
|
|
f = open(path_to_file)
|
|
self._data = f.read()
|
|
f.close()
|
|
self.__filter_chars()
|
|
self.__normalize()
|
|
|
|
def __filter_chars(self):
|
|
"""
|
|
Takes a string and returns a copy with all nonalphanumeric chars
|
|
replaced by white space
|
|
"""
|
|
pattern = re.compile('[\W_]+')
|
|
self._data = pattern.sub(' ', self._data)
|
|
|
|
def __normalize(self):
|
|
"""
|
|
Takes a string and returns a copy with all characters in lower case
|
|
"""
|
|
self._data = self._data.lower()
|
|
|
|
def words(self):
|
|
"""
|
|
Returns the list words in storage
|
|
"""
|
|
data_str = ''.join(self._data)
|
|
return data_str.split()
|
|
|
|
def info(self):
|
|
return self.__class__.__name__ + ": My major data structure is a " + self._data.__class__.__name__
|
|
|
|
class StopWordManager(TFExercise):
|
|
""" Models the stop word filter """
|
|
_stop_words = []
|
|
def __init__(self):
|
|
f = open('../stop_words.txt')
|
|
self._stop_words = f.read().split(',')
|
|
f.close()
|
|
# add single-letter words
|
|
self._stop_words.extend(list(string.ascii_lowercase))
|
|
|
|
def is_stop_word(self, word):
|
|
return word in self._stop_words
|
|
|
|
def info(self):
|
|
return self.__class__.__name__ + ": My major data structure is a " + self._stop_words.__class__.__name__
|
|
|
|
class WordFrequencyManager(TFExercise):
|
|
""" Keeps the word frequency data """
|
|
_word_freqs = {}
|
|
|
|
def increment_count(self, word):
|
|
if word in self._word_freqs:
|
|
self._word_freqs[word] += 1
|
|
else:
|
|
self._word_freqs[word] = 1
|
|
|
|
def sorted(self):
|
|
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
|
|
|
def info(self):
|
|
return self.__class__.__name__ + ": My major data structure is a " + self._word_freqs.__class__.__name__
|
|
|
|
|
|
class WordFrequencyController(TFExercise):
|
|
def __init__(self, path_to_file):
|
|
self._storage_manager = DataStorageManager(path_to_file)
|
|
self._stop_word_manager = StopWordManager()
|
|
self._word_freq_manager = WordFrequencyManager()
|
|
|
|
def run(self):
|
|
for w in self._storage_manager.words():
|
|
if not self._stop_word_manager.is_stop_word(w):
|
|
self._word_freq_manager.increment_count(w)
|
|
|
|
word_freqs = self._word_freq_manager.sorted()
|
|
for tf in word_freqs[0:25]:
|
|
print tf[0], ' - ', tf[1]
|
|
|
|
#
|
|
# The main function
|
|
#
|
|
WordFrequencyController(sys.argv[1]).run()
|