Another renumbering, so that all concurrent styles are bundled together

This commit is contained in:
Crista Lopes
2013-12-27 15:41:39 -08:00
parent de06b9b09c
commit 750016ac52
39 changed files with 0 additions and 72 deletions

View File

@@ -1,14 +0,0 @@
Style #28
==============================
Constraints:
- Data comes to functions in streams, rather than as a complete whole all at at once
- Functions are filters / transformers from one kind of data stream to another
Possible names:
- Lazy rivers
- Data streams
- Dataflow
- Data generators

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env python
import sys, operator, string
def characters(filename):
for line in open(filename):
for c in line:
yield c
def all_words(filename):
start_char = True
for c in characters(filename):
if start_char == True:
word = ""
if c.isalnum():
# We found the start of a word
word = c.lower()
start_char = False
else: pass
else:
if c.isalnum():
word += c.lower()
else:
# We found the end of a word, emit it
start_char = True
yield word
def non_stop_words(filename):
stopwords = set(open('../stop_words.txt').read().split(',') + list(string.ascii_lowercase))
for w in all_words(filename):
if not w in stopwords:
yield w
def count_and_sort(filename):
freqs = {}
for w in non_stop_words(filename):
freqs[w] = 1 if w not in freqs else freqs[w]+1
return sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
#
# The main function
#
word_freqs = count_and_sort(sys.argv[1])
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]