diff --git a/15-dataspaces/README.md b/15-dataspaces/README.md new file mode 100644 index 0000000..f9a82ec --- /dev/null +++ b/15-dataspaces/README.md @@ -0,0 +1,15 @@ +Style #15 +============================== + +Constraints: + +- Existence of one or more data spaces where processes store and + retrieve data + +- Existence of one or more processes that execute concurrently + +Possible names: + +- Dataspaces +- Linda + diff --git a/15-dataspaces/tf-15.py b/15-dataspaces/tf-15.py new file mode 100644 index 0000000..bbce10e --- /dev/null +++ b/15-dataspaces/tf-15.py @@ -0,0 +1,52 @@ +import re, sys, operator, Queue, threading + +# Two data spaces +word_space = Queue.Queue() +freq_space = Queue.Queue() + +stopwords = set(open('../stop_words.txt').read().split(',')) + +# Worker function that consumes words from the word space +# and sends partial results to the frequency space +def process_words(): + word_freqs = {} + while True: + try: + word = word_space.get(timeout=1) + except Queue.Empty: + break + if not word in stopwords: + if word in word_freqs: + word_freqs[word] += 1 + else: + word_freqs[word] = 1 + freq_space.put(word_freqs) + + +# Let's create the workers and launch them at their jobs +workers = [] +for i in range(5): + workers.append(threading.Thread(target = process_words)) +[t.start() for t in workers] + +# Let's have this thread populate the word space +for word in re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()): + word_space.put(word) + +# Let's wait for the workers to finish +[t.join() for t in workers] + +# Let's merge the partial frequency results by consuming +# frequency data from the frequency space +word_freqs = {} +while not freq_space.empty(): + freqs = freq_space.get() + for (k, v) in freqs.iteritems(): + if k in word_freqs: + count = sum(item[k] for item in [freqs, word_freqs]) + else: + count = freqs[k] + word_freqs[k] = count + +for (w, c) in sorted(word_freqs.iteritems(), key=operator.itemgetter(1), reverse=True)[:25]: + print w, '-', c