From 40637fdc9bfef0880764c0809d9a2f20c69f9f50 Mon Sep 17 00:00:00 2001 From: Crista Lopes Date: Wed, 16 Oct 2013 21:25:43 -0700 Subject: [PATCH] Added lazy rivers style --- 28-lazy-rivers/README.md | 14 ++++++++++++ 28-lazy-rivers/tf-28.py | 48 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 28-lazy-rivers/README.md create mode 100644 28-lazy-rivers/tf-28.py diff --git a/28-lazy-rivers/README.md b/28-lazy-rivers/README.md new file mode 100644 index 0000000..bdb57b1 --- /dev/null +++ b/28-lazy-rivers/README.md @@ -0,0 +1,14 @@ +Style #28 +============================== + +Constraints: + +- Data comes to functions in streams, rather than as a complete whole all at at once +- Functions are filters / transformers from one kind of data stream to another + +Possible names: + +- Lazy river +- Data streams +- Dataflow +- Data generators diff --git a/28-lazy-rivers/tf-28.py b/28-lazy-rivers/tf-28.py new file mode 100644 index 0000000..99a2d93 --- /dev/null +++ b/28-lazy-rivers/tf-28.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +import sys, operator, string + +def characters(filename): + for line in open(filename): + for c in line: + yield c + +def all_words(filename): + start_char = True + for c in characters(filename): + if start_char == True: + word = "" + if c.isalnum(): + # We found the start of a word + word = c.lower() + start_char = False + else: pass + else: + if c.isalnum(): + word += c.lower() + else: + # We found the end of a word, emit it + start_char = True + yield word + +def non_stop_words(filename): + stopwords = set(open('../stop_words.txt').read().split(',') + list(string.ascii_lowercase)) + for w in all_words(filename): + if not w in stopwords: + yield w + +def count_and_sort(filename): + freqs = {} + for w in non_stop_words(filename): + freqs[w] = 1 if w not in freqs else freqs[w]+1 + return sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True) + +# +# The main function +# + +word_freqs = count_and_sort(sys.argv[1]) + +for tf in word_freqs[0:25]: + print tf[0], ' - ', tf[1] +