Added style #11

This commit is contained in:
Crista Lopes
2013-09-22 12:07:52 -07:00
parent 7b76bde7f7
commit fb1766e169
2 changed files with 136 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
Style #11
==============================
Constraints:
- Larger problem is decomposed into entities using some form of abstraction
(objects, modules or similar)
- The entities are never called on directly for actions
- Existence of an infrastructure for publishing and subscribing to
events, aka the bulletin board
- Entities post event subscriptions (aka 'wanted') to the bulletin
board and publish events (aka 'offered') to the bulletin board. the
bulletin board does all the event management and distribution
Possible names:
- Bulletin board
- Publish-Subscribe

115
11-bulletin-board/tf-11.py Normal file
View File

@@ -0,0 +1,115 @@
import sys, re, operator, string
#
# The event management substrate
#
class EventManager:
def __init__(self):
self._subscriptions = {}
def subscribe(self, event_type, handler):
if event_type in self._subscriptions:
self._subscriptions[event_type].append(handler)
else:
self._subscriptions[event_type] = [handler]
def publish(self, event):
event_type = event[0]
if event_type in self._subscriptions:
for h in self._subscriptions[event_type]:
h(event)
#
# The "agents"
#
class DataStorage:
""" Models the contents of the file """
def __init__(self, event_manager):
self._event_manager = event_manager
self._event_manager.subscribe('load', self.load)
self._event_manager.subscribe('start', self.produce_words)
def load(self, event):
path_to_file = event[1]
f = open(path_to_file)
self._data = f.read()
f.close()
self.__filter_chars_normalize()
def __filter_chars_normalize(self):
"""
Takes a string and returns a copy with all nonalphanumeric chars
replaced by white space
"""
pattern = re.compile('[\W_]+')
self._data = pattern.sub(' ', self._data).lower()
def produce_words(self, event):
""" Yields the list words in storage, one word at a time """
data_str = ''.join(self._data)
for w in data_str.split():
self._event_manager.publish(('word', w))
self._event_manager.publish(('eof', None))
class StopWordFilter:
""" Models the stop word filter """
def __init__(self, event_manager):
self._stop_words = []
self._event_manager = event_manager
self._event_manager.subscribe('load', self.load)
self._event_manager.subscribe('word', self.is_stop_word)
def load(self, event):
f = open('../stop_words.txt')
self._stop_words = f.read().split(',')
f.close()
# add single-letter words
self._stop_words.extend(list(string.ascii_lowercase))
def is_stop_word(self, event):
word = event[1]
if word not in self._stop_words:
self._event_manager.publish(('valid_word', word))
class WordFrequencyCounter:
""" Keeps the word frequency data """
def __init__(self, event_manager):
self._word_freqs = {}
self._event_manager = event_manager
self._event_manager.subscribe('valid_word', self.increment_count)
self._event_manager.subscribe('print', self.print_freqs)
def increment_count(self, event):
word = event[1]
if word in self._word_freqs:
self._word_freqs[word] += 1
else:
self._word_freqs[word] = 1
def print_freqs(self, event):
word_freqs = sorted(self._word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]
class WordFrequencyApplication:
def __init__(self, event_manager):
self._event_manager = event_manager
self._event_manager.subscribe('run', self.run)
self._event_manager.subscribe('eof', self.stop)
def run(self, event):
path_to_file = event[1]
self._event_manager.publish(('load', path_to_file))
self._event_manager.publish(('start', None))
def stop(self, event):
self._event_manager.publish(('print', None))
#
# The main function
#
em = EventManager()
DataStorage(em), StopWordFilter(em), WordFrequencyCounter(em), WordFrequencyApplication(em)
em.publish(('run', sys.argv[1]))