Eliminate the use of global variables; makes the example cleaner.
This commit is contained in:
@@ -7,7 +7,7 @@ RECURSION_LIMIT = 9500
|
|||||||
# this doesn't just rule recursion: it rules the depth of the call stack
|
# this doesn't just rule recursion: it rules the depth of the call stack
|
||||||
sys.setrecursionlimit(RECURSION_LIMIT+10)
|
sys.setrecursionlimit(RECURSION_LIMIT+10)
|
||||||
|
|
||||||
def count(word_list, word_freqs):
|
def count(word_list, stopwords, wordfreqs):
|
||||||
# What to do with an empty list
|
# What to do with an empty list
|
||||||
if word_list == []:
|
if word_list == []:
|
||||||
return
|
return
|
||||||
@@ -16,33 +16,33 @@ def count(word_list, word_freqs):
|
|||||||
word = word_list[0]
|
word = word_list[0]
|
||||||
if word not in stopwords:
|
if word not in stopwords:
|
||||||
if word in word_freqs:
|
if word in word_freqs:
|
||||||
word_freqs[word] += 1
|
wordfreqs[word] += 1
|
||||||
else:
|
else:
|
||||||
word_freqs[word] = 1
|
wordfreqs[word] = 1
|
||||||
# The inductive case, what to do with a list of words
|
# The inductive case, what to do with a list of words
|
||||||
else:
|
else:
|
||||||
# Process the head word
|
# Process the head word
|
||||||
count([word_list[0]], word_freqs)
|
count([word_list[0]], stopwords, wordfreqs)
|
||||||
# Process the tail
|
# Process the tail
|
||||||
count(word_list[1:], word_freqs)
|
count(word_list[1:], stopwords, wordfreqs)
|
||||||
|
|
||||||
def wf_print(word_freq):
|
def wf_print(wordfreq):
|
||||||
if word_freq == []:
|
if wordfreq == []:
|
||||||
return
|
return
|
||||||
if len(word_freq) == 1:
|
if len(wordfreq) == 1:
|
||||||
(w, c) = word_freq[0]
|
(w, c) = wordfreq[0]
|
||||||
print w, '-', c
|
print w, '-', c
|
||||||
else:
|
else:
|
||||||
wf_print([word_freq[0]])
|
wf_print([wordfreq[0]])
|
||||||
wf_print(word_freq[1:])
|
wf_print(wordfreq[1:])
|
||||||
|
|
||||||
stopwords = set(open('../stop_words.txt').read().split(','))
|
stop_words = set(open('../stop_words.txt').read().split(','))
|
||||||
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
||||||
word_freqs = {}
|
word_freqs = {}
|
||||||
# Theoretically, we would just call count(words, word_freqs)
|
# Theoretically, we would just call count(words, word_freqs)
|
||||||
# Try doing that and see what happens.
|
# Try doing that and see what happens.
|
||||||
for i in range(0, len(words), RECURSION_LIMIT):
|
for i in range(0, len(words), RECURSION_LIMIT):
|
||||||
count(words[i:i+RECURSION_LIMIT], word_freqs)
|
count(words[i:i+RECURSION_LIMIT], stop_words, word_freqs)
|
||||||
|
|
||||||
wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25])
|
wf_print(sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user