More forthism

This commit is contained in:
Crista Lopes
2013-11-07 16:31:42 -08:00
parent 6cf223bb07
commit b52c783b32

View File

@@ -8,13 +8,12 @@ import sys, re, operator, string
stack = [] stack = []
# #
# The heap. Let's make it an associative array # The heap. Maps names to data (i.e. variables)
# mapping names to data (i.e. variables)
# #
heap = {} heap = {}
# #
# The new "words" of our program # The new "words" (procedures) of our program
# #
def read_file(): def read_file():
""" """
@@ -42,15 +41,13 @@ def scan():
Takes a string on the stack and scans for words, placing Takes a string on the stack and scans for words, placing
the list of words back on the stack the list of words back on the stack
""" """
# Push the result onto the stack.
# Again, split() is too high-level for this style, but using # Again, split() is too high-level for this style, but using
# it for doing this fast and short. Left as exercise. # it for doing this fast and short. Left as exercise.
stack.extend(stack.pop()[0].split()) stack.extend(stack.pop()[0].split())
def remove_stop_words(): def remove_stop_words():
""" """
Takes a list of words on the stack and removes Takes a list of words on the stack and removes stop words.
all stop words.
""" """
f = open('../stop_words.txt') f = open('../stop_words.txt')
stack.append(f.read().split(',')) stack.append(f.read().split(','))
@@ -65,24 +62,24 @@ def remove_stop_words():
if stack[-1] in heap['stop_words']: if stack[-1] in heap['stop_words']:
stack.pop() # pop it and drop it stack.pop() # pop it and drop it
else: else:
heap['words'].append(stack.pop()) # pop it and store it heap['words'].append(stack.pop()) # pop it, store it
stack.extend(heap['words']) # Load the words onto the stack stack.extend(heap['words']) # Load the words onto the stack
del heap['stop_words'] # We don't need this variable anymore
del heap['words'] # We don't need this variable anymore del heap['words'] # We don't need this variable anymore
def frequencies(): def frequencies():
""" """
Takes a list of words and returns a dictionary associating Takes a list of words and returns a dictionary associating
words with frequencies of occurrence. The words are assumed words with frequencies of occurrence.
to be on the stack.
""" """
heap['word_freqs'] = {} heap['word_freqs'] = {}
# A little flavour of the real Forth style here... # A little flavour of the real Forth style here...
while len(stack) > 0: while len(stack) > 0:
# ... but the following line is not in style, because the naive implementation # ... but the following line is not in style, because the
# would be too slow, or we'd need to implement faster, hash-based search # naive implementation would be too slow
if stack[-1] in heap['word_freqs']: if stack[-1] in heap['word_freqs']:
# Increment the frequency, postfix style: f 1 + # Increment the frequency, postfix style: f 1 +
stack.append(heap['word_freqs'][stack[-1]]) # push the frequency stack.append(heap['word_freqs'][stack[-1]]) # push f
stack.append(1) # push 1 stack.append(1) # push 1
stack.append(stack.pop() + stack.pop()) # add stack.append(stack.pop() + stack.pop()) # add
else: else:
@@ -95,14 +92,8 @@ def frequencies():
del heap['word_freqs'] # We dont need this variable anymore del heap['word_freqs'] # We dont need this variable anymore
def sort(): def sort():
"""
Takes a dictionary of words and their frequencies
and returns a list of pairs where the entries are
sorted by frequency
"""
# Not in style, left as exercise # Not in style, left as exercise
stack.append(sorted(stack.pop().iteritems(), key=operator.itemgetter(1), reverse=True)) stack.extend(sorted(stack.pop().iteritems(), key=operator.itemgetter(1)))
# #
# The main function # The main function
@@ -111,9 +102,7 @@ stack.append(sys.argv[1])
read_file(); filter_chars(); scan(); remove_stop_words() read_file(); filter_chars(); scan(); remove_stop_words()
frequencies(); sort() frequencies(); sort()
word_freqs = stack.pop()
for i in range(0, 25): for i in range(0, 25):
stack.append(word_freqs[i]) (w, f) = stack.pop()
print stack[0][0], ' - ', stack[0][1] print w, ' - ', f
stack.pop()