Improvements to forth style

This commit is contained in:
Crista Lopes
2013-11-06 10:13:41 -08:00
parent 912864de95
commit c88a0844ad

View File

@@ -7,6 +7,12 @@ import sys, re, operator, string
#
stack = []
#
# The heap. Let's make it an associative array
# mapping names to data (i.e. variables)
#
heap = {}
#
# The new "words" of our program
#
@@ -16,8 +22,7 @@ def read_file():
contents of the file as a string.
Path to file expected to be on the stack
"""
path_to_file = stack.pop()
f = open(path_to_file)
f = open(stack.pop())
# Push the result onto the stack
stack.append([f.read()])
f.close()
@@ -25,42 +30,39 @@ def read_file():
def filter_chars():
"""
Takes a string and returns a copy with all nonalphanumeric
chars replaced by white space. The data is assumed to be on the stack.
chars replaced by white space. The data is assumed to be
on the stack.
"""
str_data = stack.pop()
# This is not in style. RE is too high-level, but using it
# for doing this fast and short.
# for doing this fast and short. Push the pattern onto stack
stack.append(re.compile('[\W_]+'))
pattern = stack.pop()
# Push the result onto the stack
stack.append([pattern.sub(' ', str_data[0]).lower()])
stack.append([stack.pop().sub(' ', stack.pop()[0]).lower()])
def scan():
"""
Takes a string and scans for words, returning
a list of words. The data is assumed to be on the stack.
"""
str_data = stack.pop()
# Push the result onto the stack
# Push the result onto the stack.
# Again, split() is too high-level for this style, but using it
# for doing this fast and short. Left as exercise.
stack.append(str_data[0].split())
stack.append(stack.pop()[0].split())
def remove_stop_words():
"""
Takes a list of words and returns a copy with all stop
words removed. The data is assumed to be on the stack.
"""
word_list = stack.pop()
f = open('../stop_words.txt')
stack.append([f.read().split(',')])
stack.append(f.read().split(','))
f.close()
# add single-letter words
stack[0][0].extend(list(string.ascii_lowercase))
stop_words = stack.pop()[0]
stack[1].extend(list(string.ascii_lowercase))
heap['stop_words'] = stack.pop()
# Again, this is too high-level for this style, but using it
# for doing this fast and short. Left as exercise.
stack.append([w for w in word_list if not w in stop_words])
stack.append([w for w in stack.pop() if not w in heap['stop_words']])
def frequencies():
"""
@@ -68,26 +70,25 @@ def frequencies():
words with frequencies of occurrence. The word list is assumed
to be on the stack.
"""
word_list = stack.pop()
word_freqs = {}
i = len(word_list)
heap['word_list'] = stack.pop()
heap['word_freqs'] = {}
# A little flavour of the real Forth style here...
for wi in range(0, len(word_list)):
stack.append(word_list[wi]) # Push the word, stack[0]
for wi in range(0, len(heap['word_list'])):
stack.append(heap['word_list'][wi]) # Push the word, stack[0]
# ... but the following line is not in style, because the naive implementation
# would be too slow, or we'd need to implement faster, hash-based search
if stack[0] in word_freqs:
stack.append((word_freqs[stack[0]], word_freqs[stack[0]])) # (w, f) in stack[1]
if stack[0] in heap['word_freqs']:
stack.append((heap['word_freqs'][stack[0]], heap['word_freqs'][stack[0]])) # (w, f) in stack[1]
stack[1] = (stack[0], stack[1][1] + 1) # Swap the tuple the stack with a new one
word_freqs[stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap
heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap
else:
stack.append((stack[0], 1)) # Push the tuple (w, 1)
word_freqs[stack[-1][0]] = stack[-1][1] # Load it back to the heap
heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load it back to the heap
stack.pop() # Pop (w, f)
stack.pop() # Pop word
# Push the result onto the stack
stack.append(word_freqs)
stack.append(heap['word_freqs'])
def sort():
"""
@@ -95,9 +96,8 @@ def sort():
and returns a list of pairs where the entries are
sorted by frequency
"""
word_freq = stack.pop()
# Not in style, left as exercise
stack.append(sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True))
stack.append(sorted(stack.pop().iteritems(), key=operator.itemgetter(1), reverse=True))
#