Eliminate the use of global variables; makes the example cleaner.

This commit is contained in:
Crista Lopes
2014-01-26 08:28:17 -08:00
parent 390126b6f6
commit dcf6e6426a

View File

@@ -7,7 +7,7 @@ RECURSION_LIMIT = 9500
# this doesn't just rule recursion: it rules the depth of the call stack # this doesn't just rule recursion: it rules the depth of the call stack
sys.setrecursionlimit(RECURSION_LIMIT+10) sys.setrecursionlimit(RECURSION_LIMIT+10)
def count(word_list, word_freqs): def count(word_list, stopwords, wordfreqs):
# What to do with an empty list # What to do with an empty list
if word_list == []: if word_list == []:
return return
@@ -16,33 +16,33 @@ def count(word_list, word_freqs):
word = word_list[0] word = word_list[0]
if word not in stopwords: if word not in stopwords:
if word in word_freqs: if word in word_freqs:
word_freqs[word] += 1 wordfreqs[word] += 1
else: else:
word_freqs[word] = 1 wordfreqs[word] = 1
# The inductive case, what to do with a list of words # The inductive case, what to do with a list of words
else: else:
# Process the head word # Process the head word
count([word_list[0]], word_freqs) count([word_list[0]], stopwords, wordfreqs)
# Process the tail # Process the tail
count(word_list[1:], word_freqs) count(word_list[1:], stopwords, wordfreqs)
def wf_print(word_freq): def wf_print(wordfreq):
if word_freq == []: if wordfreq == []:
return return
if len(word_freq) == 1: if len(wordfreq) == 1:
(w, c) = word_freq[0] (w, c) = wordfreq[0]
print w, '-', c print w, '-', c
else: else:
wf_print([word_freq[0]]) wf_print([wordfreq[0]])
wf_print(word_freq[1:]) wf_print(wordfreq[1:])
stopwords = set(open('../stop_words.txt').read().split(',')) stop_words = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
word_freqs = {} word_freqs = {}
# Theoretically, we would just call count(words, word_freqs) # Theoretically, we would just call count(words, word_freqs)
# Try doing that and see what happens. # Try doing that and see what happens.
for i in range(0, len(words), RECURSION_LIMIT): for i in range(0, len(words), RECURSION_LIMIT):
count(words[i:i+RECURSION_LIMIT], word_freqs) count(words[i:i+RECURSION_LIMIT], stop_words, word_freqs)
wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]) wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25])