Improvements to forth style
This commit is contained in:
@@ -7,6 +7,12 @@ import sys, re, operator, string
|
|||||||
#
|
#
|
||||||
stack = []
|
stack = []
|
||||||
|
|
||||||
|
#
|
||||||
|
# The heap. Let's make it an associative array
|
||||||
|
# mapping names to data (i.e. variables)
|
||||||
|
#
|
||||||
|
heap = {}
|
||||||
|
|
||||||
#
|
#
|
||||||
# The new "words" of our program
|
# The new "words" of our program
|
||||||
#
|
#
|
||||||
@@ -16,8 +22,7 @@ def read_file():
|
|||||||
contents of the file as a string.
|
contents of the file as a string.
|
||||||
Path to file expected to be on the stack
|
Path to file expected to be on the stack
|
||||||
"""
|
"""
|
||||||
path_to_file = stack.pop()
|
f = open(stack.pop())
|
||||||
f = open(path_to_file)
|
|
||||||
# Push the result onto the stack
|
# Push the result onto the stack
|
||||||
stack.append([f.read()])
|
stack.append([f.read()])
|
||||||
f.close()
|
f.close()
|
||||||
@@ -25,42 +30,39 @@ def read_file():
|
|||||||
def filter_chars():
|
def filter_chars():
|
||||||
"""
|
"""
|
||||||
Takes a string and returns a copy with all nonalphanumeric
|
Takes a string and returns a copy with all nonalphanumeric
|
||||||
chars replaced by white space. The data is assumed to be on the stack.
|
chars replaced by white space. The data is assumed to be
|
||||||
|
on the stack.
|
||||||
"""
|
"""
|
||||||
str_data = stack.pop()
|
|
||||||
# This is not in style. RE is too high-level, but using it
|
# This is not in style. RE is too high-level, but using it
|
||||||
# for doing this fast and short.
|
# for doing this fast and short. Push the pattern onto stack
|
||||||
stack.append(re.compile('[\W_]+'))
|
stack.append(re.compile('[\W_]+'))
|
||||||
pattern = stack.pop()
|
|
||||||
# Push the result onto the stack
|
# Push the result onto the stack
|
||||||
stack.append([pattern.sub(' ', str_data[0]).lower()])
|
stack.append([stack.pop().sub(' ', stack.pop()[0]).lower()])
|
||||||
|
|
||||||
def scan():
|
def scan():
|
||||||
"""
|
"""
|
||||||
Takes a string and scans for words, returning
|
Takes a string and scans for words, returning
|
||||||
a list of words. The data is assumed to be on the stack.
|
a list of words. The data is assumed to be on the stack.
|
||||||
"""
|
"""
|
||||||
str_data = stack.pop()
|
# Push the result onto the stack.
|
||||||
# Push the result onto the stack
|
|
||||||
# Again, split() is too high-level for this style, but using it
|
# Again, split() is too high-level for this style, but using it
|
||||||
# for doing this fast and short. Left as exercise.
|
# for doing this fast and short. Left as exercise.
|
||||||
stack.append(str_data[0].split())
|
stack.append(stack.pop()[0].split())
|
||||||
|
|
||||||
def remove_stop_words():
|
def remove_stop_words():
|
||||||
"""
|
"""
|
||||||
Takes a list of words and returns a copy with all stop
|
Takes a list of words and returns a copy with all stop
|
||||||
words removed. The data is assumed to be on the stack.
|
words removed. The data is assumed to be on the stack.
|
||||||
"""
|
"""
|
||||||
word_list = stack.pop()
|
|
||||||
f = open('../stop_words.txt')
|
f = open('../stop_words.txt')
|
||||||
stack.append([f.read().split(',')])
|
stack.append(f.read().split(','))
|
||||||
f.close()
|
f.close()
|
||||||
# add single-letter words
|
# add single-letter words
|
||||||
stack[0][0].extend(list(string.ascii_lowercase))
|
stack[1].extend(list(string.ascii_lowercase))
|
||||||
stop_words = stack.pop()[0]
|
heap['stop_words'] = stack.pop()
|
||||||
# Again, this is too high-level for this style, but using it
|
# Again, this is too high-level for this style, but using it
|
||||||
# for doing this fast and short. Left as exercise.
|
# for doing this fast and short. Left as exercise.
|
||||||
stack.append([w for w in word_list if not w in stop_words])
|
stack.append([w for w in stack.pop() if not w in heap['stop_words']])
|
||||||
|
|
||||||
def frequencies():
|
def frequencies():
|
||||||
"""
|
"""
|
||||||
@@ -68,26 +70,25 @@ def frequencies():
|
|||||||
words with frequencies of occurrence. The word list is assumed
|
words with frequencies of occurrence. The word list is assumed
|
||||||
to be on the stack.
|
to be on the stack.
|
||||||
"""
|
"""
|
||||||
word_list = stack.pop()
|
heap['word_list'] = stack.pop()
|
||||||
word_freqs = {}
|
heap['word_freqs'] = {}
|
||||||
i = len(word_list)
|
|
||||||
# A little flavour of the real Forth style here...
|
# A little flavour of the real Forth style here...
|
||||||
for wi in range(0, len(word_list)):
|
for wi in range(0, len(heap['word_list'])):
|
||||||
stack.append(word_list[wi]) # Push the word, stack[0]
|
stack.append(heap['word_list'][wi]) # Push the word, stack[0]
|
||||||
# ... but the following line is not in style, because the naive implementation
|
# ... but the following line is not in style, because the naive implementation
|
||||||
# would be too slow, or we'd need to implement faster, hash-based search
|
# would be too slow, or we'd need to implement faster, hash-based search
|
||||||
if stack[0] in word_freqs:
|
if stack[0] in heap['word_freqs']:
|
||||||
stack.append((word_freqs[stack[0]], word_freqs[stack[0]])) # (w, f) in stack[1]
|
stack.append((heap['word_freqs'][stack[0]], heap['word_freqs'][stack[0]])) # (w, f) in stack[1]
|
||||||
stack[1] = (stack[0], stack[1][1] + 1) # Swap the tuple the stack with a new one
|
stack[1] = (stack[0], stack[1][1] + 1) # Swap the tuple the stack with a new one
|
||||||
word_freqs[stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap
|
heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap
|
||||||
else:
|
else:
|
||||||
stack.append((stack[0], 1)) # Push the tuple (w, 1)
|
stack.append((stack[0], 1)) # Push the tuple (w, 1)
|
||||||
word_freqs[stack[-1][0]] = stack[-1][1] # Load it back to the heap
|
heap['word_freqs'][stack[-1][0]] = stack[-1][1] # Load it back to the heap
|
||||||
stack.pop() # Pop (w, f)
|
stack.pop() # Pop (w, f)
|
||||||
stack.pop() # Pop word
|
stack.pop() # Pop word
|
||||||
|
|
||||||
# Push the result onto the stack
|
# Push the result onto the stack
|
||||||
stack.append(word_freqs)
|
stack.append(heap['word_freqs'])
|
||||||
|
|
||||||
def sort():
|
def sort():
|
||||||
"""
|
"""
|
||||||
@@ -95,9 +96,8 @@ def sort():
|
|||||||
and returns a list of pairs where the entries are
|
and returns a list of pairs where the entries are
|
||||||
sorted by frequency
|
sorted by frequency
|
||||||
"""
|
"""
|
||||||
word_freq = stack.pop()
|
|
||||||
# Not in style, left as exercise
|
# Not in style, left as exercise
|
||||||
stack.append(sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True))
|
stack.append(sorted(stack.pop().iteritems(), key=operator.itemgetter(1), reverse=True))
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|||||||
Reference in New Issue
Block a user