Added style #2

This commit is contained in:
Crista Lopes
2013-09-22 08:51:35 -07:00
parent 27a419e6d2
commit 3468b95a94
4 changed files with 21 additions and 2 deletions

12
01-monolith/README.md Normal file
View File

@@ -0,0 +1,12 @@
Style #1
==============================
Constraints:
- No abstractions
Possible names:
- Monolith
- Labyrinth
- Brain dump

49
01-monolith/tf-01.py Normal file
View File

@@ -0,0 +1,49 @@
import sys, string
# the global list of [word, frequency] pairs
word_freqs = []
# the list of stop words
with open('../stop_words.txt') as f:
stop_words = f.read().split(',')
stop_words.extend(list(string.ascii_lowercase))
# iterate through the file one line at a time
for line in open(sys.argv[1]):
start_char = None
i = 0
for c in line:
if start_char == None:
if c.isalnum():
# We found the start of a word
start_char = i
else:
if not c.isalnum():
# We found the end of a word. Process it
found = False
word = line[start_char:i].lower()
# Ignore stop words
if word not in stop_words:
pair_index = 0
# Let's see if it already exists
for pair in word_freqs:
if word == pair[0]:
pair[1] += 1
found = True
found_at = pair_index
break
pair_index += 1
if not found:
word_freqs.append([word, 1])
elif len(word_freqs) > 1:
# We may need to reorder
for n in reversed(range(pair_index)):
if word_freqs[pair_index][1] > word_freqs[n][1]:
# swap
word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
pair_index = n
# Let's reset
start_char = None
i += 1
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]