Here's the little sucker that was missing from the collection of styles related to objects
This commit is contained in:
19
12-closed-maps/README.md
Normal file
19
12-closed-maps/README.md
Normal file
@@ -0,0 +1,19 @@
|
||||
Style #12
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
|
||||
- The larger problem is decomposed into 'things' that make sense for
|
||||
the problem domain
|
||||
|
||||
- Each 'thing' is a map from keys to values. Some values
|
||||
are procedures/functions.
|
||||
|
||||
- When the keys map to procedures/functions, these
|
||||
procedures/functions' first argument is the map itself.
|
||||
|
||||
Possible names:
|
||||
|
||||
- Closed Maps
|
||||
- Prototypes
|
||||
|
||||
50
12-closed-maps/tf-12.py
Normal file
50
12-closed-maps/tf-12.py
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, re, operator, string
|
||||
|
||||
#
|
||||
# Auxiliary functions that can't be lambdas
|
||||
#
|
||||
def extract_words(obj, path_to_file):
|
||||
with open(path_to_file) as f:
|
||||
obj['data'] = f.read()
|
||||
pattern = re.compile('[\W_]+')
|
||||
data_str = ''.join(pattern.sub(' ', obj['data']).lower())
|
||||
obj['data'] = data_str.split()
|
||||
|
||||
def load_stop_words(obj):
|
||||
with open('../stop_words.txt') as f:
|
||||
obj['stop_words'] = f.read().split(',')
|
||||
# add single-letter words
|
||||
obj['stop_words'].extend(list(string.ascii_lowercase))
|
||||
|
||||
def increment_count(obj, w):
|
||||
obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1
|
||||
|
||||
data_storage_obj = {
|
||||
'data' : [],
|
||||
'init' : lambda obj, path_to_file : extract_words(obj, path_to_file),
|
||||
'words' : lambda obj : obj['data']
|
||||
}
|
||||
|
||||
stop_words_obj = {
|
||||
'stop_words' : [],
|
||||
'init' : lambda obj : load_stop_words(obj),
|
||||
'is_stop_word' : lambda obj, word : word in obj['stop_words']
|
||||
}
|
||||
|
||||
word_freqs_obj = {
|
||||
'freqs' : {},
|
||||
'increment_count' : lambda obj, w : increment_count(obj, w),
|
||||
'sorted' : lambda obj : sorted(obj['freqs'].iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
}
|
||||
|
||||
data_storage_obj['init'](data_storage_obj, sys.argv[1])
|
||||
stop_words_obj['init'](stop_words_obj)
|
||||
|
||||
for w in data_storage_obj['words'](data_storage_obj):
|
||||
if not stop_words_obj['is_stop_word'](stop_words_obj, w):
|
||||
word_freqs_obj['increment_count'](word_freqs_obj, w)
|
||||
|
||||
word_freqs = word_freqs_obj['sorted'](word_freqs_obj)
|
||||
for (w, c) in word_freqs[0:25]:
|
||||
print w, ' - ', c
|
||||
Reference in New Issue
Block a user