Added style #4

2013-09-22 10:02:59 -07:00
parent 91ecd37836
commit e7fef7ea6e
2 changed files with 90 additions and 0 deletions
--- a/04-candy-factory/README.md
+++ b/04-candy-factory/README.md
@@ -0,0 +1,12 @@
 Style #4
 ==============================
 Constraints:
 - Larger problem decomposed in functional abstractions. Functions, according to Mathematics, are relations from inputs to outputs.
 - Larger problem solved as a pipeline of function applications
 Possible names:
 - Functional
 - Pipeline
--- a/04-candy-factory/tf-04.py
+++ b/04-candy-factory/tf-04.py
@@ -0,0 +1,78 @@
 import sys, re, operator, string
 #
 # The functions
 #
 def read_file(path_to_file):
    """
    Takes a path to a file and returns the entire
    contents of the file as a string
    """
    f = open(path_to_file)
    data = f.read()
    f.close()
    return data
 def filter_chars(str_data):
    """
    Takes a string and returns a copy with all nonalphanumeric 
    chars replaced by white space
    """
    pattern = re.compile('[\W_]+')
    return pattern.sub(' ', str_data)
 def normalize(str_data):
    """
    Takes a string and returns a copy with all chars in lower case
    """
    return str_data.lower()
 def scan(str_data):
    """
    Takes a string and scans for words, returning
    a list of words.
    """
    return str_data.split()
 def remove_stop_words(word_list):
    """ 
    Takes a list of words and returns a copy with all stop 
    words removed 
    """
    f = open('../stop_words.txt')
    stop_words = f.read().split(',')
    f.close()
    # add single-letter words
    stop_words.extend(list(string.ascii_lowercase))
    return [w for w in word_list if not w in stop_words]
 def frequencies(word_list):
    """
    Takes a list of words and returns a dictionary associating
    words with frequencies of occurrence
    """
    word_freqs = {}
    for w in word_list:
        if w in word_freqs:
            word_freqs[w] += 1
        else:
            word_freqs[w] = 1
    return word_freqs
 def sort(word_freq):
    """
    Takes a dictionary of words and their frequencies
    and returns a list of pairs where the entries are
    sorted by frequency 
    """
    return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
 #
 # The main function
 #
 word_freqs = sort(frequencies(remove_stop_words(scan(normalize(filter_chars(read_file(sys.argv[1])))))))
 for tf in word_freqs[0:25]:
    print tf[0], ' - ', tf[1]