Files
exercises-in-programming-style/07-things/tf-07.py
Bruce Adams 5749c2c50f Enhance testing: run all executables
Add a #! line at the beginning of each of the existing Python programs
and change these files to be executable. This sets the stage for having
the test script blindly run anything that is executable, adding support
for testing many programming languages.
2013-09-24 22:09:30 -04:00

101 lines
2.8 KiB
Python
Executable File

#!/usr/bin/env python
import sys, re, operator, string
from abc import ABCMeta
#
# The classes
#
class TFExercise(object):
__metaclass__ = ABCMeta
def info(self):
return self.__class__.__name__ + ": No major data structure"
class DataStorageManager(TFExercise):
""" Models the contents of the file """
_data = ''
def __init__(self, path_to_file):
f = open(path_to_file)
self._data = f.read()
f.close()
self.__filter_chars()
self.__normalize()
def __filter_chars(self):
"""
Takes a string and returns a copy with all nonalphanumeric chars
replaced by white space
"""
pattern = re.compile('[\W_]+')
self._data = pattern.sub(' ', self._data)
def __normalize(self):
"""
Takes a string and returns a copy with all characters in lower case
"""
self._data = self._data.lower()
def words(self):
"""
Returns the list words in storage
"""
data_str = ''.join(self._data)
return data_str.split()
def info(self):
return self.__class__.__name__ + ": My major data structure is a " + self._data.__class__.__name__
class StopWordManager(TFExercise):
""" Models the stop word filter """
_stop_words = []
def __init__(self):
f = open('../stop_words.txt')
self._stop_words = f.read().split(',')
f.close()
# add single-letter words
self._stop_words.extend(list(string.ascii_lowercase))
def is_stop_word(self, word):
return word in self._stop_words
def info(self):
return self.__class__.__name__ + ": My major data structure is a " + self._stop_words.__class__.__name__
class WordFrequencyManager(TFExercise):
""" Keeps the word frequency data """
_word_freqs = {}
def increment_count(self, word):
if word in self._word_freqs:
self._word_freqs[word] += 1
else:
self._word_freqs[word] = 1
def sorted(self):
return sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
def info(self):
return self.__class__.__name__ + ": My major data structure is a " + self._word_freqs.__class__.__name__
class WordFrequencyController(TFExercise):
def __init__(self, path_to_file):
self._storage_manager = DataStorageManager(path_to_file)
self._stop_word_manager = StopWordManager()
self._word_freq_manager = WordFrequencyManager()
def run(self):
for w in self._storage_manager.words():
if not self._stop_word_manager.is_stop_word(w):
self._word_freq_manager.increment_count(w)
word_freqs = self._word_freq_manager.sorted()
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]
#
# The main function
#
WordFrequencyController(sys.argv[1]).run()