99 lines
2.3 KiB
Python
99 lines
2.3 KiB
Python
#!/usr/bin/env python
|
|
|
|
import sys, operator, string, os
|
|
from util import getch, cls
|
|
from time import sleep
|
|
|
|
def refresh_screen(data):
|
|
# clear screen
|
|
cls()
|
|
print data
|
|
sys.stdout.flush()
|
|
|
|
def update_display(tuples):
|
|
data_str = ""
|
|
for tf in tuples:
|
|
data_str += str(tf[0]) + ' - ' + str(tf[1]) + '\n'
|
|
refresh_screen(data_str)
|
|
|
|
automatic = False
|
|
def get_input():
|
|
global automatic
|
|
if automatic:
|
|
return True
|
|
|
|
while True:
|
|
key = ord(getch())
|
|
if key == 32: # space bar
|
|
return True
|
|
elif key == 27: # ESC
|
|
automatic = True
|
|
return True
|
|
else: pass
|
|
|
|
f = open(sys.argv[1])
|
|
|
|
def characters(filename):
|
|
c = f.read(1)
|
|
if c != "":
|
|
yield c
|
|
else:
|
|
raise StopIteration()
|
|
|
|
def all_words(filename):
|
|
found_word = False
|
|
start_char = True
|
|
while not found_word:
|
|
try:
|
|
c = characters(filename).next()
|
|
except StopIteration:
|
|
raise StopIteration()
|
|
|
|
if start_char == True:
|
|
word = ""
|
|
if c.isalnum():
|
|
# We found the start of a word
|
|
word = c.lower()
|
|
start_char = False
|
|
else:
|
|
if c.isalnum():
|
|
word += c.lower()
|
|
else:
|
|
# We found the end of a word, emit it
|
|
start_char = True
|
|
found_word = True
|
|
yield word
|
|
|
|
def non_stop_words(filename):
|
|
stopwords = set(open('../stop_words.txt').read().split(',') + list(string.ascii_lowercase))
|
|
while True:
|
|
w = all_words(filename).next()
|
|
if not w in stopwords:
|
|
yield w
|
|
|
|
def count_and_sort(filename):
|
|
freqs_0 = ()
|
|
freqs_1 = ()
|
|
freqs = {}
|
|
while get_input():
|
|
try:
|
|
w = non_stop_words(filename).next()
|
|
freqs[w] = 1 if w not in freqs else freqs[w]+1
|
|
freqs_0 = freqs_1
|
|
freqs_1 = sorted(freqs.iteritems(), key=operator.itemgetter(1), reverse=True)
|
|
if (freqs_1[0:25] != freqs_0[0:25]):
|
|
update_display(freqs_1[0:25])
|
|
except StopIteration:
|
|
break
|
|
return freqs_1
|
|
|
|
#
|
|
# The main function
|
|
#
|
|
|
|
print "Press space bar to fetch words from the file one by one"
|
|
print "Press ESC to switch to automatic mode"
|
|
count_and_sort(sys.argv[1])
|
|
|
|
|