From 1d6cec0d3313a341bfe0dea31e7f9f519edb36a8 Mon Sep 17 00:00:00 2001 From: Crista Lopes Date: Thu, 2 Jan 2014 15:26:16 -0800 Subject: [PATCH] Here's the little sucker that was missing from the collection of styles related to objects --- 12-closed-maps/README.md | 19 +++++++++++++++ 12-closed-maps/tf-12.py | 50 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 12-closed-maps/README.md create mode 100644 12-closed-maps/tf-12.py diff --git a/12-closed-maps/README.md b/12-closed-maps/README.md new file mode 100644 index 0000000..c569b66 --- /dev/null +++ b/12-closed-maps/README.md @@ -0,0 +1,19 @@ +Style #12 +============================== + +Constraints: + +- The larger problem is decomposed into 'things' that make sense for + the problem domain + +- Each 'thing' is a map from keys to values. Some values +are procedures/functions. + +- When the keys map to procedures/functions, these +procedures/functions' first argument is the map itself. + +Possible names: + +- Closed Maps +- Prototypes + diff --git a/12-closed-maps/tf-12.py b/12-closed-maps/tf-12.py new file mode 100644 index 0000000..26d1e6a --- /dev/null +++ b/12-closed-maps/tf-12.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +import sys, re, operator, string + +# +# Auxiliary functions that can't be lambdas +# +def extract_words(obj, path_to_file): + with open(path_to_file) as f: + obj['data'] = f.read() + pattern = re.compile('[\W_]+') + data_str = ''.join(pattern.sub(' ', obj['data']).lower()) + obj['data'] = data_str.split() + +def load_stop_words(obj): + with open('../stop_words.txt') as f: + obj['stop_words'] = f.read().split(',') + # add single-letter words + obj['stop_words'].extend(list(string.ascii_lowercase)) + +def increment_count(obj, w): + obj['freqs'][w] = 1 if w not in obj['freqs'] else obj['freqs'][w]+1 + +data_storage_obj = { + 'data' : [], + 'init' : lambda obj, path_to_file : extract_words(obj, path_to_file), + 'words' : lambda obj : obj['data'] +} + +stop_words_obj = { + 'stop_words' : [], + 'init' : lambda obj : load_stop_words(obj), + 'is_stop_word' : lambda obj, word : word in obj['stop_words'] +} + +word_freqs_obj = { + 'freqs' : {}, + 'increment_count' : lambda obj, w : increment_count(obj, w), + 'sorted' : lambda obj : sorted(obj['freqs'].iteritems(), key=operator.itemgetter(1), reverse=True) +} + +data_storage_obj['init'](data_storage_obj, sys.argv[1]) +stop_words_obj['init'](stop_words_obj) + +for w in data_storage_obj['words'](data_storage_obj): + if not stop_words_obj['is_stop_word'](stop_words_obj, w): + word_freqs_obj['increment_count'](word_freqs_obj, w) + +word_freqs = word_freqs_obj['sorted'](word_freqs_obj) +for (w, c) in word_freqs[0:25]: + print w, ' - ', c