Better spreadsheet style, more uniform across columns

This commit is contained in:
Crista Lopes
2013-10-21 19:21:40 -07:00
parent 5cde2691ea
commit 4576b3c5e8

View File

@@ -6,28 +6,34 @@ import sys, re, itertools, operator
# #
all_words = [(), None] all_words = [(), None]
stop_words = [(), None] stop_words = [(), None]
non_stop_words = [(), lambda w : w if w not in stop_words[0] else ''] non_stop_words = [(), lambda : map(lambda w : w if w not in stop_words[0] else '', \
unique_words = [(), lambda word_list: set([w for w in word_list if w != ''])] all_words[0])]
counts = [(), lambda w, word_list : word_list.count(w)] unique_words = [(), lambda : set([w for w in non_stop_words[0] if w != ''])]
sorted_data = [(), lambda z : sorted(z, key=operator.itemgetter(1), reverse=True)] counts = [(), lambda : map(lambda w, word_list : word_list.count(w), \
unique_words[0], \
itertools.repeat(non_stop_words[0], len(unique_words[0])))]
sorted_data = [(), lambda : sorted(zip(list(unique_words[0]), counts[0]),
key=operator.itemgetter(1), reverse=True)]
# The entire spreadsheet
all_columns = [all_words, stop_words, non_stop_words, unique_words, counts, sorted_data]
# #
# The active procedure over the columns of data. # The active procedure over the columns of data.
# Call this everytime the input data changes, or periodically. # Call this everytime the input data changes, or periodically.
# #
def update(): def update():
global non_stop_words global all_columns
global unique_words # Apply the formula in each column
global counts for c in all_columns:
global sorted_data if c[1] != None:
# Apply the formulas to the 4 last columns c[0] = c[1]()
non_stop_words[0] = map(non_stop_words[1], all_words[0])
unique_words[0] = unique_words[1](non_stop_words[0])
counts[0] = map(counts[1], unique_words[0], itertools.repeat(non_stop_words[0], len(unique_words[0])))
sorted_data[0] = sorted_data[1](zip(list(unique_words[0]), counts[0]))
# #
# The Main program
#
# Load the fixed data into the first 2 columns # Load the fixed data into the first 2 columns
#
all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
stop_words[0] = set(open('../stop_words.txt').read().split(',')) stop_words[0] = set(open('../stop_words.txt').read().split(','))
# Update the columns with formulas # Update the columns with formulas