Add a #! line at the beginning of each of the existing Python programs and change these files to be executable. This sets the stage for having the test script blindly run anything that is executable, adding support for testing many programming languages.
123 lines
3.3 KiB
Python
Executable File
123 lines
3.3 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import abc, sys, re, operator, string
|
|
|
|
#
|
|
# The abstract data types
|
|
#
|
|
class IDataStorage (object):
|
|
""" Models the contents of the file """
|
|
__metaclass__ = abc.ABCMeta
|
|
|
|
@abc.abstractmethod
|
|
def words(self):
|
|
""" Returns the words in storage """
|
|
raise NotImplementedError("Abstract Data Type")
|
|
|
|
class IStopWordFilter (object):
|
|
""" Models the stop word filter """
|
|
__metaclass__ = abc.ABCMeta
|
|
|
|
@abc.abstractmethod
|
|
def is_stop_word(self, word):
|
|
""" Checks whether the given word is a stop word """
|
|
raise NotImplementedError("Abstract Data Type")
|
|
|
|
class IWordFrequencyCounter(object):
|
|
""" Keeps the word frequency data """
|
|
__metaclass__ = abc.ABCMeta
|
|
|
|
@abc.abstractmethod
|
|
def increment_count(self, word):
|
|
""" Increments the count for the given word """
|
|
raise NotImplementedError("Abstract Data Type")
|
|
|
|
@abc.abstractmethod
|
|
def sorted(self):
|
|
""" Returns the words and their frequencies, sorted by frequency"""
|
|
raise NotImplementedError("Abstract Data Type")
|
|
|
|
#
|
|
# The concrete type implementations
|
|
#
|
|
class DataStorage:
|
|
""" Implements the contents of the file """
|
|
_data = ''
|
|
def __init__(self, path_to_file):
|
|
f = open(path_to_file)
|
|
self._data = f.read()
|
|
f.close()
|
|
self.__filter_chars_normalize()
|
|
self.__scan()
|
|
|
|
def __filter_chars_normalize(self):
|
|
"""
|
|
Takes a string and returns a copy with all nonalphanumeric chars
|
|
replaced by white space
|
|
"""
|
|
pattern = re.compile('[\W_]+')
|
|
self._data = pattern.sub(' ', self._data).lower()
|
|
|
|
def __scan(self):
|
|
self._data = ''.join(self._data).split()
|
|
|
|
def words(self):
|
|
"""
|
|
Returns the list words in storage
|
|
"""
|
|
return self._data
|
|
|
|
class StopWordFilter:
|
|
""" Implements the stop word filter """
|
|
_stop_words = []
|
|
def __init__(self):
|
|
f = open('../stop_words.txt')
|
|
self._stop_words = f.read().split(',')
|
|
f.close()
|
|
# add single-letter words
|
|
self._stop_words.extend(list(string.ascii_lowercase))
|
|
|
|
def is_stop_word(self, word):
|
|
return word in self._stop_words
|
|
|
|
class WordFrequencyCounter:
|
|
""" Implements the word frequency data """
|
|
_word_freqs = {}
|
|
|
|
def increment_count(self, word):
|
|
if word in self._word_freqs:
|
|
self._word_freqs[word] += 1
|
|
else:
|
|
self._word_freqs[word] = 1
|
|
|
|
def sorted(self):
|
|
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
|
|
|
|
|
#
|
|
# The wiring between ADTs and concrete implementations
|
|
#
|
|
IDataStorage.register(DataStorage)
|
|
IStopWordFilter.register(StopWordFilter)
|
|
IWordFrequencyCounter.register(WordFrequencyCounter)
|
|
|
|
class WordFrequencyApplication:
|
|
def __init__(self, path_to_file):
|
|
self._storage = DataStorage(path_to_file)
|
|
self._stop_word_manager = StopWordFilter()
|
|
self._word_freq_counter = WordFrequencyCounter()
|
|
|
|
def run(self):
|
|
for w in self._storage.words():
|
|
if not self._stop_word_manager.is_stop_word(w):
|
|
self._word_freq_counter.increment_count(w)
|
|
|
|
word_freqs = self._word_freq_counter.sorted()
|
|
for tf in word_freqs[0:25]:
|
|
print tf[0], ' - ', tf[1]
|
|
|
|
#
|
|
# The main function
|
|
#
|
|
WordFrequencyApplication(sys.argv[1]).run()
|