Files
exercises-in-programming-style/01-monolith/tf-01.py
Bruce Adams 5749c2c50f Enhance testing: run all executables
Add a #! line at the beginning of each of the existing Python programs
and change these files to be executable. This sets the stage for having
the test script blindly run anything that is executable, adding support
for testing many programming languages.
2013-09-24 22:09:30 -04:00

52 lines
1.8 KiB
Python
Executable File

#!/usr/bin/env python
import sys, string
# the global list of [word, frequency] pairs
word_freqs = []
# the list of stop words
with open('../stop_words.txt') as f:
stop_words = f.read().split(',')
stop_words.extend(list(string.ascii_lowercase))
# iterate through the file one line at a time
for line in open(sys.argv[1]):
start_char = None
i = 0
for c in line:
if start_char == None:
if c.isalnum():
# We found the start of a word
start_char = i
else:
if not c.isalnum():
# We found the end of a word. Process it
found = False
word = line[start_char:i].lower()
# Ignore stop words
if word not in stop_words:
pair_index = 0
# Let's see if it already exists
for pair in word_freqs:
if word == pair[0]:
pair[1] += 1
found = True
found_at = pair_index
break
pair_index += 1
if not found:
word_freqs.append([word, 1])
elif len(word_freqs) > 1:
# We may need to reorder
for n in reversed(range(pair_index)):
if word_freqs[pair_index][1] > word_freqs[n][1]:
# swap
word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
pair_index = n
# Let's reset
start_char = None
i += 1
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]