Improvements to forth style

2013-11-06 10:13:41 -08:00
parent 912864de95
commit c88a0844ad
1 changed files with 27 additions and 27 deletions
--- a/02-go-forth/tf-02.py
+++ b/02-go-forth/tf-02.py
@@ -7,6 +7,12 @@ import sys, re, operator, string
 #
 stack = []

+#
+# The heap. Let's make it an associative array
+# mapping names to data (i.e. variables)
+#
+heap = {}
+
 #
 # The new "words" of our program
 #
@@ -16,8 +22,7 @@ def read_file():
    contents of the file as a string.
    Path to file expected to be on the stack
    """
-    path_to_file = stack.pop()
-    f = open(path_to_file)
+    f = open(stack.pop())
    # Push the result onto the stack
    stack.append([f.read()])
    f.close()
@@ -25,42 +30,39 @@ def read_file():
 def filter_chars():
    """
    Takes a string and returns a copy with all nonalphanumeric 
-    chars replaced by white space. The data is assumed to be on the stack.
+    chars replaced by white space. The data is assumed to be 
+    on the stack.
    """
-    str_data = stack.pop()
    # This is not in style. RE is too high-level, but using it
-    # for doing this fast and short. 
+    # for doing this fast and short. Push the pattern onto stack
    stack.append(re.compile('[\W_]+'))
-    pattern = stack.pop()
    # Push the result onto the stack
-    stack.append([pattern.sub(' ', str_data[0]).lower()])
+    stack.append([stack.pop().sub(' ', stack.pop()[0]).lower()])

 def scan():
    """
    Takes a string and scans for words, returning
    a list of words. The data is assumed to be on the stack.
    """
-    str_data = stack.pop()
-    # Push the result onto the stack
+    # Push the result onto the stack.
    # Again, split() is too high-level for this style, but using it
    # for doing this fast and short. Left as exercise.
-    stack.append(str_data[0].split())
+    stack.append(stack.pop()[0].split())

 def remove_stop_words():
    """ 
    Takes a list of words and returns a copy with all stop 
    words removed. The data is assumed to be on the stack.
    """
-    word_list = stack.pop()
    f = open('../stop_words.txt')
-    stack.append([f.read().split(',')])
+    stack.append(f.read().split(','))
    f.close()
    # add single-letter words
-    stack[0][0].extend(list(string.ascii_lowercase))
-    stop_words = stack.pop()[0]
+    stack[1].extend(list(string.ascii_lowercase))
+    heap['stop_words'] = stack.pop()
    # Again, this is too high-level for this style, but using it
    # for doing this fast and short. Left as exercise.
-    stack.append([w for w in word_list if not w in stop_words])
+    stack.append([w for w in stack.pop() if not w in heap['stop_words']])

 def frequencies():
    """
@@ -68,26 +70,25 @@ def frequencies():
    words with frequencies of occurrence. The word list is assumed
    to be on the stack.
    """
-    word_list = stack.pop()
-    word_freqs = {}
-    i = len(word_list)
+    heap['word_list'] = stack.pop()
+    heap['word_freqs'] = {}
    # A little flavour of the real Forth style here...
-    for wi in range(0, len(word_list)):
-        stack.append(word_list[wi]) # Push the word, stack[0]
+    for wi in range(0, len(heap['word_list'])):
+        stack.append(heap['word_list'][wi]) # Push the word, stack[0]
        # ... but the following line is not in style, because the naive implementation 
        # would be too slow, or we'd need to implement faster, hash-based search
-        if stack[0] in word_freqs:
-            stack.append((word_freqs[stack[0]], word_freqs[stack[0]])) # (w, f) in stack[1]
+        if stack[0] in heap['word_freqs']:
+            stack.append((heap['word_freqs'][stack[0]], heap['word_freqs'][stack[0]])) # (w, f) in stack[1]
            stack[1] = (stack[0], stack[1][1] + 1) # Swap the tuple the stack with a new one
-            word_freqs[stack[-1][0]] = stack[-1][1]  # Load the updated freq back onto the heap
+            heap['word_freqs'][stack[-1][0]] = stack[-1][1]  # Load the updated freq back onto the heap
        else:
            stack.append((stack[0], 1)) # Push the tuple (w, 1)
-            word_freqs[stack[-1][0]] = stack[-1][1] # Load it back to the heap
+            heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load it back to the heap
        stack.pop() # Pop (w, f)
        stack.pop() # Pop word

    # Push the result onto the stack
-    stack.append(word_freqs)
+    stack.append(heap['word_freqs'])

 def sort():
    """
@@ -95,9 +96,8 @@ def sort():
    and returns a list of pairs where the entries are
    sorted by frequency 
    """
-    word_freq = stack.pop()
    # Not in style, left as exercise
-    stack.append(sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True))
+    stack.append(sorted(stack.pop().iteritems(), key=operator.itemgetter(1), reverse=True))


 #