Changed the name of the 02 folder to reflect the name of the style

This commit is contained in:
Crista Lopes
2013-10-25 16:22:18 -07:00
parent 6de104653d
commit 8b5fbdc77a
2 changed files with 0 additions and 0 deletions

23
02-go-forth/README.md Normal file
View File

@@ -0,0 +1,23 @@
Style #2
==============================
Constraints:
- Existence of an all-important data stack. All operations
(conditionals, arithmetic, etc.) are done over data on the stack
- Existence of a heap for storing data that's needed for later
operations. The heap data can be associated with names
(i.e. variables). As said above, all operations are done over
data on the stack, so any heap data that needs to be operated upon
needs to be moved first to the stack and eventually back to the heap
- Abstraction in the form of user-defined "procedures" (i.e. names
bound to a set of instructions), which may be called something else
entirely
Possible names:
- Go-Forth (as in the Forth programming language)
- Stack machine

116
02-go-forth/tf-02.py Normal file
View File

@@ -0,0 +1,116 @@
#!/usr/bin/env python
import sys, re, operator, string
#
# The all-important data stack
#
stack = []
#
# The new "words" of our program
#
def read_file():
"""
Takes a path to a file and returns the entire
contents of the file as a string.
Path to file expected to be on the stack
"""
path_to_file = stack.pop()
f = open(path_to_file)
# Push the result onto the stack
stack.append([f.read()])
f.close()
def filter_chars():
"""
Takes a string and returns a copy with all nonalphanumeric
chars replaced by white space. The data is assumed to be on the stack.
"""
str_data = stack.pop()
# This is not in style. RE is too high-level, but using it
# for doing this fast and short.
stack.append(re.compile('[\W_]+'))
pattern = stack.pop()
# Push the result onto the stack
stack.append([pattern.sub(' ', str_data[0]).lower()])
def scan():
"""
Takes a string and scans for words, returning
a list of words. The data is assumed to be on the stack.
"""
str_data = stack.pop()
# Push the result onto the stack
# Again, split() is too high-level for this style, but using it
# for doing this fast and short. Left as exercise.
stack.append(str_data[0].split())
def remove_stop_words():
"""
Takes a list of words and returns a copy with all stop
words removed. The data is assumed to be on the stack.
"""
word_list = stack.pop()
f = open('../stop_words.txt')
stack.append([f.read().split(',')])
f.close()
# add single-letter words
stack[0][0].extend(list(string.ascii_lowercase))
stop_words = stack.pop()[0]
# Again, this is too high-level for this style, but using it
# for doing this fast and short. Left as exercise.
stack.append([w for w in word_list if not w in stop_words])
def frequencies():
"""
Takes a list of words and returns a dictionary associating
words with frequencies of occurrence. The word list is assumed
to be on the stack.
"""
word_list = stack.pop()
word_freqs = {}
i = len(word_list)
# A little flavour of the real Forth style here...
for wi in range(0, len(word_list)):
stack.append(word_list[wi]) # Push the word, stack[0]
# ... but the following line is not in style, because the naive implementation
# would be too slow, or we'd need to implement faster, hash-based search
if stack[0] in word_freqs:
stack.append((word_freqs[stack[0]], word_freqs[stack[0]])) # (w, f) in stack[1]
stack[1] = (stack[0], stack[1][1] + 1) # Swap the tuple the stack with a new one
word_freqs[stack[-1][0]] = stack[-1][1] # Load the updated freq back onto the heap
else:
stack.append((stack[0], 1)) # Push the tuple (w, 1)
word_freqs[stack[-1][0]] = stack[-1][1] # Load it back to the heap
stack.pop() # Pop (w, f)
stack.pop() # Pop word
# Push the result onto the stack
stack.append(word_freqs)
def sort():
"""
Takes a dictionary of words and their frequencies
and returns a list of pairs where the entries are
sorted by frequency
"""
word_freq = stack.pop()
# Not in style, left as exercise
return sorted(word_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
#
# The main function
#
stack.append(sys.argv[1])
read_file()
filter_chars()
scan()
remove_stop_words()
frequencies()
word_freqs = sort()
for tf in word_freqs[0:25]:
print tf[0], ' - ', tf[1]