Files
exercises-in-programming-style/09-kinds-of-things/tf-09.py
Bruce Adams 5749c2c50f Enhance testing: run all executables
Add a #! line at the beginning of each of the existing Python programs
and change these files to be executable. This sets the stage for having
the test script blindly run anything that is executable, adding support
for testing many programming languages.
2013-09-24 22:09:30 -04:00

123 lines
3.3 KiB
Python
Executable File

#!/usr/bin/env python
import abc, sys, re, operator, string
#
# The abstract data types
#
class IDataStorage (object):
""" Models the contents of the file """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def words(self):
""" Returns the words in storage """
raise NotImplementedError("Abstract Data Type")
class IStopWordFilter (object):
""" Models the stop word filter """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def is_stop_word(self, word):
""" Checks whether the given word is a stop word """
raise NotImplementedError("Abstract Data Type")
class IWordFrequencyCounter(object):
""" Keeps the word frequency data """
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def increment_count(self, word):
""" Increments the count for the given word """
raise NotImplementedError("Abstract Data Type")
@abc.abstractmethod
def sorted(self):
""" Returns the words and their frequencies, sorted by frequency"""
raise NotImplementedError("Abstract Data Type")
#
# The concrete type implementations
#
class DataStorage:
""" Implements the contents of the file """
_data = ''
def __init__(self, path_to_file):
f = open(path_to_file)
self._data = f.read()
f.close()
self.__filter_chars_normalize()
self.__scan()
def __filter_chars_normalize(self):
"""
Takes a string and returns a copy with all nonalphanumeric chars
replaced by white space
"""
pattern = re.compile('[\W_]+')
self._data = pattern.sub(' ', self._data).lower()
def __scan(self):
self._data = ''.join(self._data).split()
def words(self):
"""
Returns the list words in storage
"""
return self._data
class StopWordFilter:
""" Implements the stop word filter """
_stop_words = []
def __init__(self):
f = open('../stop_words.txt')
self._stop_words = f.read().split(',')
f.close()
# add single-letter words
self._stop_words.extend(list(string.ascii_lowercase))
def is_stop_word(self, word):
return word in self._stop_words
class WordFrequencyCounter:
""" Implements the word frequency data """
_word_freqs = {}
def increment_count(self, word):
if word in self._word_freqs:
self._word_freqs[word] += 1
else:
self._word_freqs[word] = 1
def sorted(self):
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
#
# The wiring between ADTs and concrete implementations
#
IDataStorage.register(DataStorage)
IStopWordFilter.register(StopWordFilter)
IWordFrequencyCounter.register(WordFrequencyCounter)
class WordFrequencyApplication:
def __init__(self, path_to_file):
self._storage = DataStorage(path_to_file)
self._stop_word_manager = StopWordFilter()
self._word_freq_counter = WordFrequencyCounter()
def run(self):
for w in self._storage.words():
if not self._stop_word_manager.is_stop_word(w):
self._word_freq_counter.increment_count(w)
word_freqs = self._word_freq_counter.sorted()
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]
#
# The main function
#
WordFrequencyApplication(sys.argv[1]).run()