Added dataspaces style

2013-10-15 16:29:32 -07:00
parent c19f6d4263
commit 25b681a275
2 changed files with 67 additions and 0 deletions
--- a/15-dataspaces/README.md
+++ b/15-dataspaces/README.md
@@ -0,0 +1,15 @@
+Style #15
+==============================
+
+Constraints:
+
+- Existence of one or more data spaces where processes store and
+  retrieve data
+
+- Existence of one or more processes that execute concurrently
+
+Possible names:
+
+- Dataspaces
+- Linda
+
--- a/15-dataspaces/tf-15.py
+++ b/15-dataspaces/tf-15.py
@@ -0,0 +1,52 @@
+import re, sys, operator, Queue, threading
+
+# Two data spaces
+word_space = Queue.Queue()
+freq_space = Queue.Queue()
+
+stopwords = set(open('../stop_words.txt').read().split(','))
+
+# Worker function that consumes words from the word space
+# and sends partial results to the frequency space
+def process_words():
+    word_freqs = {}
+    while True:
+        try:
+            word = word_space.get(timeout=1)
+        except Queue.Empty:
+            break
+        if not word in stopwords:
+            if word in word_freqs:
+                word_freqs[word] += 1
+            else:
+                word_freqs[word] = 1
+    freq_space.put(word_freqs)
+
+
+# Let's create the workers and launch them at their jobs
+workers = []
+for i in range(5):
+    workers.append(threading.Thread(target = process_words))
+[t.start() for t in workers]
+
+# Let's have this thread populate the word space
+for word in re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()):
+    word_space.put(word)
+
+# Let's wait for the workers to finish
+[t.join() for t in workers]
+
+# Let's merge the partial frequency results by consuming
+# frequency data from the frequency space
+word_freqs = {}
+while not freq_space.empty():
+    freqs = freq_space.get()
+    for (k, v) in freqs.iteritems():
+        if k in word_freqs:
+            count = sum(item[k] for item in [freqs, word_freqs])
+        else:
+            count = freqs[k]
+        word_freqs[k] = count
+        
+for (w, c) in sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]:
+    print w, '-', c