Added style #2

2013-09-22 08:51:35 -07:00
parent 27a419e6d2
commit 3468b95a94
4 changed files with 21 additions and 2 deletions
--- a/01-monolith/README.md
+++ b/01-monolith/README.md
@@ -0,0 +1,12 @@
+Style #1 
+==============================
+
+Constraints:
+
+- No abstractions
+
+Possible names:
+
+- Monolith
+- Labyrinth
+- Brain dump
--- a/01-monolith/tf-01.py
+++ b/01-monolith/tf-01.py
@@ -0,0 +1,49 @@
+import sys, string
+# the global list of [word, frequency] pairs
+word_freqs = []
+# the list of stop words
+with open('../stop_words.txt') as f:
+    stop_words = f.read().split(',')
+stop_words.extend(list(string.ascii_lowercase))
+
+# iterate through the file one line at a time 
+for line in open(sys.argv[1]):
+    start_char = None
+    i = 0
+    for c in line:
+        if start_char == None:
+            if c.isalnum():
+                # We found the start of a word
+                start_char = i
+        else:
+            if not c.isalnum():
+                # We found the end of a word. Process it
+                found = False
+                word = line[start_char:i].lower()
+                # Ignore stop words
+                if word not in stop_words:
+                    pair_index = 0
+                    # Let's see if it already exists
+                    for pair in word_freqs:
+                        if word == pair[0]:
+                            pair[1] += 1
+                            found = True
+                            found_at = pair_index
+                            break
+                        pair_index += 1
+                    if not found:
+                        word_freqs.append([word, 1])
+                    elif len(word_freqs) > 1:
+                        # We may need to reorder
+                        for n in reversed(range(pair_index)):
+                            if word_freqs[pair_index][1] > word_freqs[n][1]:
+                                # swap
+                                word_freqs[n], word_freqs[pair_index] = word_freqs[pair_index], word_freqs[n]
+                                pair_index = n
+                # Let's reset
+                start_char = None
+        i += 1
+
+for tf in word_freqs[0:25]:
+    print tf[0], ' - ', tf[1]
+