diff --git a/02-go-forth/tf-02.py b/02-go-forth/tf-02.py index c8fd98c..6a254bd 100755 --- a/02-go-forth/tf-02.py +++ b/02-go-forth/tf-02.py @@ -7,6 +7,12 @@ import sys, re, operator, string # stack = [] +# +# The heap. Let's make it an associative array +# mapping names to data (i.e. variables) +# +heap = {} + # # The new "words" of our program # @@ -16,8 +22,7 @@ def read_file(): contents of the file as a string. Path to file expected to be on the stack """ - path_to_file = stack.pop() - f = open(path_to_file) + f = open(stack.pop()) # Push the result onto the stack stack.append([f.read()]) f.close() @@ -25,42 +30,39 @@ def read_file(): def filter_chars(): """ Takes a string and returns a copy with all nonalphanumeric - chars replaced by white space. The data is assumed to be on the stack. + chars replaced by white space. The data is assumed to be + on the stack. """ - str_data = stack.pop() # This is not in style. RE is too high-level, but using it - # for doing this fast and short. + # for doing this fast and short. Push the pattern onto stack stack.append(re.compile('[\W_]+')) - pattern = stack.pop() # Push the result onto the stack - stack.append([pattern.sub(' ', str_data[0]).lower()]) + stack.append([stack.pop().sub(' ', stack.pop()[0]).lower()]) def scan(): """ Takes a string and scans for words, returning a list of words. The data is assumed to be on the stack. """ - str_data = stack.pop() - # Push the result onto the stack + # Push the result onto the stack. # Again, split() is too high-level for this style, but using it # for doing this fast and short. Left as exercise. - stack.append(str_data[0].split()) + stack.append(stack.pop()[0].split()) def remove_stop_words(): """ Takes a list of words and returns a copy with all stop words removed. The data is assumed to be on the stack. """ - word_list = stack.pop() f = open('../stop_words.txt') - stack.append([f.read().split(',')]) + stack.append(f.read().split(',')) f.close() # add single-letter words - stack[0][0].extend(list(string.ascii_lowercase)) - stop_words = stack.pop()[0] + stack[1].extend(list(string.ascii_lowercase)) + heap['stop_words'] = stack.pop() # Again, this is too high-level for this style, but using it # for doing this fast and short. Left as exercise. - stack.append([w for w in word_list if not w in stop_words]) + stack.append([w for w in stack.pop() if not w in heap['stop_words']]) def frequencies(): """ @@ -68,26 +70,25 @@ def frequencies(): words with frequencies of occurrence. The word list is assumed to be on the stack. """ - word_list = stack.pop() - word_freqs = {} - i = len(word_list) + heap['word_list'] = stack.pop() + heap['word_freqs'] = {} # A little flavour of the real Forth style here... - for wi in range(0, len(word_list)): - stack.append(word_list[wi]) # Push the word, stack[0] + for wi in range(0, len(heap['word_list'])): + stack.append(heap['word_list'][wi]) # Push the word, stack[0] # ... but the following line is not in style, because the naive implementation # would be too slow, or we'd need to implement faster, hash-based search - if stack[0] in word_freqs: - stack.append((word_freqs[stack[0]], word_freqs[stack[0]])) # (w, f) in stack[1] + if stack[0] in heap['word_freqs']: + stack.append((heap['word_freqs'][stack[0]], heap['word_freqs'][stack[0]])) # (w, f) in stack[1] stack[1] = (stack[0], stack[1][1] + 1) # Swap the tuple the stack with a new one - word_freqs[stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap + heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap else: stack.append((stack[0], 1)) # Push the tuple (w, 1) - word_freqs[stack[-1][0]] = stack[-1][1] # Load it back to the heap + heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load it back to the heap stack.pop() # Pop (w, f) stack.pop() # Pop word # Push the result onto the stack - stack.append(word_freqs) + stack.append(heap['word_freqs']) def sort(): """ @@ -95,9 +96,8 @@ def sort(): and returns a list of pairs where the entries are sorted by frequency """ - word_freq = stack.pop() # Not in style, left as exercise - stack.append(sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)) + stack.append(sorted(stack.pop().iteritems(), key=operator.itemgetter(1), reverse=True)) #