Add a #! line at the beginning of each of the existing Python programs and change these files to be executable. This sets the stage for having the test script blindly run anything that is executable, adding support for testing many programming languages.
52 lines
1.8 KiB
Python
Executable File
52 lines
1.8 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import sys, string
|
|
# the global list of [word, frequency] pairs
|
|
word_freqs = []
|
|
# the list of stop words
|
|
with open('../stop_words.txt') as f:
|
|
stop_words = f.read().split(',')
|
|
stop_words.extend(list(string.ascii_lowercase))
|
|
|
|
# iterate through the file one line at a time
|
|
for line in open(sys.argv[1]):
|
|
start_char = None
|
|
i = 0
|
|
for c in line:
|
|
if start_char == None:
|
|
if c.isalnum():
|
|
# We found the start of a word
|
|
start_char = i
|
|
else:
|
|
if not c.isalnum():
|
|
# We found the end of a word. Process it
|
|
found = False
|
|
word = line[start_char:i].lower()
|
|
# Ignore stop words
|
|
if word not in stop_words:
|
|
pair_index = 0
|
|
# Let's see if it already exists
|
|
for pair in word_freqs:
|
|
if word == pair[0]:
|
|
pair[1] += 1
|
|
found = True
|
|
found_at = pair_index
|
|
break
|
|
pair_index += 1
|
|
if not found:
|
|
word_freqs.append([word, 1])
|
|
elif len(word_freqs) > 1:
|
|
# We may need to reorder
|
|
for n in reversed(range(pair_index)):
|
|
if word_freqs[pair_index][1] > word_freqs[n][1]:
|
|
# swap
|
|
word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
|
|
pair_index = n
|
|
# Let's reset
|
|
start_char = None
|
|
i += 1
|
|
|
|
for tf in word_freqs[0:25]:
|
|
print tf[0], ' - ', tf[1]
|
|
|