Improvements to forth style

This commit is contained in:
Crista Lopes
2013-11-06 10:13:41 -08:00
parent 912864de95
commit c88a0844ad

View File

@@ -7,6 +7,12 @@ import sys, re, operator, string
# #
stack = [] stack = []
#
# The heap. Let's make it an associative array
# mapping names to data (i.e. variables)
#
heap = {}
# #
# The new "words" of our program # The new "words" of our program
# #
@@ -16,8 +22,7 @@ def read_file():
contents of the file as a string. contents of the file as a string.
Path to file expected to be on the stack Path to file expected to be on the stack
""" """
path_to_file = stack.pop() f = open(stack.pop())
f = open(path_to_file)
# Push the result onto the stack # Push the result onto the stack
stack.append([f.read()]) stack.append([f.read()])
f.close() f.close()
@@ -25,42 +30,39 @@ def read_file():
def filter_chars(): def filter_chars():
""" """
Takes a string and returns a copy with all nonalphanumeric Takes a string and returns a copy with all nonalphanumeric
chars replaced by white space. The data is assumed to be on the stack. chars replaced by white space. The data is assumed to be
on the stack.
""" """
str_data = stack.pop()
# This is not in style. RE is too high-level, but using it # This is not in style. RE is too high-level, but using it
# for doing this fast and short. # for doing this fast and short. Push the pattern onto stack
stack.append(re.compile('[\W_]+')) stack.append(re.compile('[\W_]+'))
pattern = stack.pop()
# Push the result onto the stack # Push the result onto the stack
stack.append([pattern.sub(' ', str_data[0]).lower()]) stack.append([stack.pop().sub(' ', stack.pop()[0]).lower()])
def scan(): def scan():
""" """
Takes a string and scans for words, returning Takes a string and scans for words, returning
a list of words. The data is assumed to be on the stack. a list of words. The data is assumed to be on the stack.
""" """
str_data = stack.pop() # Push the result onto the stack.
# Push the result onto the stack
# Again, split() is too high-level for this style, but using it # Again, split() is too high-level for this style, but using it
# for doing this fast and short. Left as exercise. # for doing this fast and short. Left as exercise.
stack.append(str_data[0].split()) stack.append(stack.pop()[0].split())
def remove_stop_words(): def remove_stop_words():
""" """
Takes a list of words and returns a copy with all stop Takes a list of words and returns a copy with all stop
words removed. The data is assumed to be on the stack. words removed. The data is assumed to be on the stack.
""" """
word_list = stack.pop()
f = open('../stop_words.txt') f = open('../stop_words.txt')
stack.append([f.read().split(',')]) stack.append(f.read().split(','))
f.close() f.close()
# add single-letter words # add single-letter words
stack[0][0].extend(list(string.ascii_lowercase)) stack[1].extend(list(string.ascii_lowercase))
stop_words = stack.pop()[0] heap['stop_words'] = stack.pop()
# Again, this is too high-level for this style, but using it # Again, this is too high-level for this style, but using it
# for doing this fast and short. Left as exercise. # for doing this fast and short. Left as exercise.
stack.append([w for w in word_list if not w in stop_words]) stack.append([w for w in stack.pop() if not w in heap['stop_words']])
def frequencies(): def frequencies():
""" """
@@ -68,26 +70,25 @@ def frequencies():
words with frequencies of occurrence. The word list is assumed words with frequencies of occurrence. The word list is assumed
to be on the stack. to be on the stack.
""" """
word_list = stack.pop() heap['word_list'] = stack.pop()
word_freqs = {} heap['word_freqs'] = {}
i = len(word_list)
# A little flavour of the real Forth style here... # A little flavour of the real Forth style here...
for wi in range(0, len(word_list)): for wi in range(0, len(heap['word_list'])):
stack.append(word_list[wi]) # Push the word, stack[0] stack.append(heap['word_list'][wi]) # Push the word, stack[0]
# ... but the following line is not in style, because the naive implementation # ... but the following line is not in style, because the naive implementation
# would be too slow, or we'd need to implement faster, hash-based search # would be too slow, or we'd need to implement faster, hash-based search
if stack[0] in word_freqs: if stack[0] in heap['word_freqs']:
stack.append((word_freqs[stack[0]], word_freqs[stack[0]])) # (w, f) in stack[1] stack.append((heap['word_freqs'][stack[0]], heap['word_freqs'][stack[0]])) # (w, f) in stack[1]
stack[1] = (stack[0], stack[1][1] + 1) # Swap the tuple the stack with a new one stack[1] = (stack[0], stack[1][1] + 1) # Swap the tuple the stack with a new one
word_freqs[stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap
else: else:
stack.append((stack[0], 1)) # Push the tuple (w, 1) stack.append((stack[0], 1)) # Push the tuple (w, 1)
word_freqs[stack[-1][0]] = stack[-1][1] # Load it back to the heap heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load it back to the heap
stack.pop() # Pop (w, f) stack.pop() # Pop (w, f)
stack.pop() # Pop word stack.pop() # Pop word
# Push the result onto the stack # Push the result onto the stack
stack.append(word_freqs) stack.append(heap['word_freqs'])
def sort(): def sort():
""" """
@@ -95,9 +96,8 @@ def sort():
and returns a list of pairs where the entries are and returns a list of pairs where the entries are
sorted by frequency sorted by frequency
""" """
word_freq = stack.pop()
# Not in style, left as exercise # Not in style, left as exercise
stack.append(sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)) stack.append(sorted(stack.pop().iteritems(), key=operator.itemgetter(1), reverse=True))
# #