diff --git a/15-introspective/tf-15.py b/15-introspective/tf-15.py index 3ec896e..01f9553 100755 --- a/15-introspective/tf-15.py +++ b/15-introspective/tf-15.py @@ -1,32 +1,33 @@ #!/usr/bin/env python - import sys, re, operator, string, inspect # # The functions # +def read_stop_words(): + # Meta-level data: inspect.stack() and locals() + print "My name is " + inspect.stack()[0][3] + ", my arguments are " + str(locals().keys()) + print " and I'm being called from ", inspect.stack()[1][3] + with open('../stop_words.txt') as f: + stop_words = f.read().split(',') + stop_words.extend(list(string.ascii_lowercase)) + return stop_words + def extract_words(path_to_file): - """ - Takes a path to a file and returns the non-stop - words, after properly removing nonalphanumeric chars - and normalizing for lower case - """ - print "My name is " + inspect.stack()[0][3] + " and my arguments are " + str(locals().keys()) + # Meta-level data: inspect.stack() and locals() + print "My name is " + inspect.stack()[0][3] + ", my arguments are " + str(locals().keys()) + print " and I'm being called from ", inspect.stack()[1][3] with open(locals()['path_to_file']) as f: str_data = f.read() pattern = re.compile('[\W_]+') word_list = pattern.sub(' ', str_data).lower().split() - with open('../stop_words.txt') as f: - stop_words = f.read().split(',') - stop_words.extend(list(string.ascii_lowercase)) + stop_words = read_stop_words() return [w for w in word_list if not w in stop_words] def frequencies(word_list): - """ - Takes a list of words and returns a dictionary associating - words with frequencies of occurrence - """ - print "My name is " + inspect.stack()[0][3] + " and my arguments are " + str(locals().keys()) + # Meta-level data: inspect.stack() and locals() + print "My name is " + inspect.stack()[0][3] + ", my arguments are " + str(locals().keys()) + print " and I'm being called from ", inspect.stack()[1][3] word_freqs = {} for w in locals()['word_list']: if w in word_freqs: @@ -36,19 +37,19 @@ def frequencies(word_list): return word_freqs def sort(word_freq): - """ - Takes a dictionary of words and their frequencies - and returns a list of pairs where the entries are - sorted by frequency - """ + # Meta-level data: inspect.stack() and locals() print "My name is " + inspect.stack()[0][3] + " and my arguments are " + str(locals().keys()) + print " and I'm being called from ", inspect.stack()[1][3] return sorted(locals()['word_freq'].iteritems(), key=operator.itemgetter(1), reverse=True) # # The main function # -word_freqs = sort(frequencies(extract_words(sys.argv[1]))) +def main(): + word_freqs = sort(frequencies(extract_words(sys.argv[1]))) + for (w, c) in word_freqs[0:25]: + print w, ' - ', c -for tf in word_freqs[0:25]: - print tf[0], ' - ', tf[1] +if __name__ == "__main__": + main()