diff --git a/01-monolithic/README.md b/01-monolithic/README.md new file mode 100644 index 0000000..9625b63 --- /dev/null +++ b/01-monolithic/README.md @@ -0,0 +1,11 @@ +exercises-in-programming-style +============================== + +Constraints: + +- No abstractions + +Alternative names of this style: + +- Labyrinth +- Brain dump diff --git a/01-monolithic/tf-01.py b/01-monolithic/tf-01.py new file mode 100644 index 0000000..2ebf15b --- /dev/null +++ b/01-monolithic/tf-01.py @@ -0,0 +1,49 @@ +import sys, string +# the global list of [word, frequency] pairs +word_freqs = [] +# the list of stop words +with open('../stop_words.txt') as f: + stop_words = f.read().split(',') +stop_words.extend(list(string.ascii_lowercase)) + +# iterate through the file one line at a time +for line in open(sys.argv[1]): + start_char = None + i = 0 + for c in line: + if start_char == None: + if c.isalnum(): + # We found the start of a word + start_char = i + else: + if not c.isalnum(): + # We found the end of a word. Process it + found = False + word = line[start_char:i].lower() + # Ignore stop words + if word not in stop_words: + pair_index = 0 + # Let's see if it already exists + for pair in word_freqs: + if word == pair[0]: + pair[1] += 1 + found = True + found_at = pair_index + break + pair_index += 1 + if not found: + word_freqs.append([word, 1]) + elif len(word_freqs) > 1: + # We may need to reorder + for n in reversed(range(pair_index)): + if word_freqs[pair_index][1] > word_freqs[n][1]: + # swap + word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n] + pair_index = n + # Let's reset + start_char = None + i += 1 + +for tf in word_freqs[0:25]: + print tf[0], ' - ', tf[1] +