Here's the little sucker that was missing from the collection of styles related to objects

2014-01-02 15:26:16 -08:00
parent 288407820c
commit 1d6cec0d33
2 changed files with 69 additions and 0 deletions
--- a/12-closed-maps/README.md
+++ b/12-closed-maps/README.md
@@ -0,0 +1,19 @@
+Style #12
+==============================
+
+Constraints:
+
+- The larger problem is decomposed into 'things' that make sense for
+  the problem domain 
+
+- Each 'thing' is a map from keys to values. Some values
+are procedures/functions.
+
+- When the keys map to procedures/functions, these
+procedures/functions' first argument is the map itself.
+
+Possible names:
+
+- Closed Maps
+- Prototypes
+
--- a/12-closed-maps/tf-12.py
+++ b/12-closed-maps/tf-12.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+import sys, re, operator, string
+
+#
+# Auxiliary functions that can't be lambdas
+#
+def extract_words(obj, path_to_file):
+    with open(path_to_file) as f:
+        obj['data'] = f.read()
+    pattern = re.compile('[\W_]+')
+    data_str = ''.join(pattern.sub(' ', obj['data']).lower())
+    obj['data'] = data_str.split()
+
+def load_stop_words(obj):
+    with open('../stop_words.txt') as f:
+        obj['stop_words'] = f.read().split(',')
+    # add single-letter words
+    obj['stop_words'].extend(list(string.ascii_lowercase))
+
+def increment_count(obj, w):
+    obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1
+
+data_storage_obj = {
+    'data' : [],
+    'init' : lambda obj, path_to_file : extract_words(obj, path_to_file),
+    'words' : lambda obj : obj['data']
+}
+
+stop_words_obj = {
+    'stop_words' : [],
+    'init' : lambda obj : load_stop_words(obj),
+    'is_stop_word' : lambda obj, word : word in obj['stop_words']
+}
+
+word_freqs_obj = {
+    'freqs' : {},
+    'increment_count' : lambda obj, w : increment_count(obj, w),
+    'sorted' : lambda obj : sorted(obj['freqs'].iteritems(), key=operator.itemgetter(1), reverse=True)
+}
+
+data_storage_obj['init'](data_storage_obj, sys.argv[1])
+stop_words_obj['init'](stop_words_obj)
+
+for w in data_storage_obj['words'](data_storage_obj):
+    if not stop_words_obj['is_stop_word'](stop_words_obj, w):
+        word_freqs_obj['increment_count'](word_freqs_obj, w)
+
+word_freqs = word_freqs_obj['sorted'](word_freqs_obj)
+for (w, c) in word_freqs[0:25]:
+    print w, ' - ', c