Added style #3

2013-09-22 08:59:26 -07:00
parent 3468b95a94
commit 91ecd37836
2 changed files with 101 additions and 0 deletions
--- a/03-cookbook/README.md
+++ b/03-cookbook/README.md
@@ -0,0 +1,12 @@
 Style #3
 ==============================
 Constraints:
 - Larger problem decomposed in procedural abstractions
 - Larger problem solved as a sequence of commands, each corresponding to a procedure
 Possible names:
 - Cookbook
 - Procedural
--- a/03-cookbook/tf-03.py
+++ b/03-cookbook/tf-03.py
@@ -0,0 +1,89 @@
 import sys, string
 # The shared mutable data
 data = []
 words = []
 word_freqs = []
 #
 # The functions
 #
 def read_file(path_to_file):
    """
    Takes a path to a file and assigns the entire
    contents of the file to the global variable data
    """
    global data
    f = open(path_to_file)
    data = data + list(f.read())
    f.close()
 def filter_chars_and_normalize():
    """
    Replaces all nonalphanumeric chars in data with white space
    """
    global data
    for i in range(len(data)):
        if not data[i].isalnum():
            data[i] = ' '
        else:
            data[i] = data[i].lower()
 def scan():
    """
    Scans data for words, filling the global variable words
    """
    global data
    global words
    data_str = ''.join(data)
    words = words + data_str.split()
 def remove_stop_words():
    global words
    f = open('../stop_words.txt')
    stop_words = f.read().split(',')
    f.close()
    # add single-letter words
    stop_words.extend(list(string.ascii_lowercase))
    indeces = []
    for i in range(len(words)):
        if words[i] in stop_words:
            indeces.append(i)
    for i in reversed(indeces):
        words.pop(i)
 def frequencies():
    """
    Creates a list of pairs associating
    words with frequencies 
    """
    global words
    global word_freqs
    for w in words:
        keys = [wd[0] for wd in word_freqs]
        if w in keys:
            word_freqs[keys.index(w)][1] += 1
        else:
            word_freqs.append([w, 1])
 def sort():
    """
    Sorts word_freqs by frequency
    """
    global word_freqs
    word_freqs.sort(lambda x, y: cmp(y[1], x[1]))
 #
 # The main function
 #
 read_file(sys.argv[1])
 filter_chars_and_normalize()
 scan()
 remove_stop_words()
 frequencies()
 sort()
 for tf in word_freqs[0:25]:
    print tf[0], ' - ', tf[1]