One more (hopefully the last!) renumbering
This commit is contained in:
14
28-lazy-rivers/README.md
Normal file
14
28-lazy-rivers/README.md
Normal file
@@ -0,0 +1,14 @@
|
||||
Style #28
|
||||
==============================
|
||||
|
||||
Constraints:
|
||||
|
||||
- Data comes to functions in streams, rather than as a complete whole all at at once
|
||||
- Functions are filters / transformers from one kind of data stream to another
|
||||
|
||||
Possible names:
|
||||
|
||||
- Lazy rivers
|
||||
- Data streams
|
||||
- Dataflow
|
||||
- Data generators
|
||||
48
28-lazy-rivers/tf-28.py
Executable file
48
28-lazy-rivers/tf-28.py
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys, operator, string
|
||||
|
||||
def characters(filename):
|
||||
for line in open(filename):
|
||||
for c in line:
|
||||
yield c
|
||||
|
||||
def all_words(filename):
|
||||
start_char = True
|
||||
for c in characters(filename):
|
||||
if start_char == True:
|
||||
word = ""
|
||||
if c.isalnum():
|
||||
# We found the start of a word
|
||||
word = c.lower()
|
||||
start_char = False
|
||||
else: pass
|
||||
else:
|
||||
if c.isalnum():
|
||||
word += c.lower()
|
||||
else:
|
||||
# We found the end of a word, emit it
|
||||
start_char = True
|
||||
yield word
|
||||
|
||||
def non_stop_words(filename):
|
||||
stopwords = set(open('../stop_words.txt').read().split(',') + list(string.ascii_lowercase))
|
||||
for w in all_words(filename):
|
||||
if not w in stopwords:
|
||||
yield w
|
||||
|
||||
def count_and_sort(filename):
|
||||
freqs = {}
|
||||
for w in non_stop_words(filename):
|
||||
freqs[w] = 1 if w not in freqs else freqs[w]+1
|
||||
return sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
#
|
||||
# The main function
|
||||
#
|
||||
|
||||
word_freqs = count_and_sort(sys.argv[1])
|
||||
|
||||
for tf in word_freqs[0:25]:
|
||||
print tf[0], ' - ', tf[1]
|
||||
|
||||
Reference in New Issue
Block a user