Massive renaming!

This commit is contained in:
Crista Lopes
2019-08-12 14:38:16 -07:00
parent e6c1238a56
commit 61d5f74ad9
90 changed files with 0 additions and 0 deletions

17
30-dataspaces/README.md Normal file
View File

@@ -0,0 +1,17 @@
Style #29
==============================
Constraints:
- Existence of one or more units that execute concurrently
- Existence of one or more data spaces where concurrent units store and
retrieve data
- No direct data exchanges between the concurrent units, other than via the data spaces
Possible names:
- Dataspaces
- Linda

52
30-dataspaces/tf-30.py Executable file
View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python
import re, sys, operator, queue, threading
# Two data spaces
word_space = queue.Queue()
freq_space = queue.Queue()
stopwords = set(open('../stop_words.txt').read().split(','))
# Worker function that consumes words from the word space
# and sends partial results to the frequency space
def process_words():
word_freqs = {}
while True:
try:
word = word_space.get(timeout=1)
except queue.Empty:
break
if not word in stopwords:
if word in word_freqs:
word_freqs[word] += 1
else:
word_freqs[word] = 1
freq_space.put(word_freqs)
# Let's have this thread populate the word space
for word in re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()):
word_space.put(word)
# Let's create the workers and launch them at their jobs
workers = []
for i in range(5):
workers.append(threading.Thread(target = process_words))
[t.start() for t in workers]
# Let's wait for the workers to finish
[t.join() for t in workers]
# Let's merge the partial frequency results by consuming
# frequency data from the frequency space
word_freqs = {}
while not freq_space.empty():
freqs = freq_space.get()
for (k, v) in freqs.items():
if k in word_freqs:
count = sum(item[k] for item in [freqs, word_freqs])
else:
count = freqs[k]
word_freqs[k] = count
for (w, c) in sorted(word_freqs.items(), key=operator.itemgetter(1), reverse=True)[:25]:
print(w, '-', c)