Tidy up, make line numbers consistent with text
This commit is contained in:
@@ -120,6 +120,6 @@ while True:
|
|||||||
|
|
||||||
for tf in data[0:25]: # elimination of symbol tf is exercise
|
for tf in data[0:25]: # elimination of symbol tf is exercise
|
||||||
if len(tf) == 2:
|
if len(tf) == 2:
|
||||||
print(tf[0], ' - ', tf[1])
|
print(tf[0], '-', tf[1])
|
||||||
# We're done
|
# We're done
|
||||||
word_freqs.close()
|
word_freqs.close()
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ stack.append(0)
|
|||||||
# the last word there will be one item left
|
# the last word there will be one item left
|
||||||
while stack[-1] < 25 and len(stack) > 1:
|
while stack[-1] < 25 and len(stack) > 1:
|
||||||
heap['i'] = stack.pop()
|
heap['i'] = stack.pop()
|
||||||
(w, f) = stack.pop(); print(w, ' - ', f)
|
(w, f) = stack.pop(); print(w, '-', f)
|
||||||
stack.append(heap['i']); stack.append(1)
|
stack.append(heap['i']); stack.append(1)
|
||||||
stack.append(stack.pop() + stack.pop())
|
stack.append(stack.pop() + stack.pop())
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import sys, string
|
import sys, string
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# Example input: "Hello World!!"
|
# Example input: "Hello World!"
|
||||||
characters = np.array([' ']+list(open(sys.argv[1]).read())+[' '])
|
characters = np.array([' ']+list(open(sys.argv[1]).read())+[' '])
|
||||||
# Result: array([' ', 'H', 'e', 'l', 'l', 'o', ' ', ' ',
|
# Result: array([' ', 'H', 'e', 'l', 'l', 'o', ' ', ' ',
|
||||||
# 'W', 'o', 'r', 'l', 'd', '!', '!', ' '], dtype='<U1')
|
# 'W', 'o', 'r', 'l', 'd', '!', ' '], dtype='<U1')
|
||||||
|
|
||||||
# Normalize
|
# Normalize
|
||||||
characters[~np.char.isalpha(characters)] = ' '
|
characters[~np.char.isalpha(characters)] = ' '
|
||||||
@@ -25,7 +25,7 @@ w_ranges = np.reshape(sp2[1:-1], (-1, 2))
|
|||||||
# [ 7, 13],
|
# [ 7, 13],
|
||||||
# [13, 14]], dtype=int64)
|
# [13, 14]], dtype=int64)
|
||||||
# Remove the indexing to the spaces themselves
|
# Remove the indexing to the spaces themselves
|
||||||
w_ranges = w_ranges[np.where(w_ranges[:, 1] - w_ranges[:, 0] > 1)]
|
w_ranges = w_ranges[np.where(w_ranges[:, 1] - w_ranges[:, 0] > 2)]
|
||||||
# Result: array([[ 0, 6],
|
# Result: array([[ 0, 6],
|
||||||
# [ 7, 13]], dtype=int64)
|
# [ 7, 13]], dtype=int64)
|
||||||
|
|
||||||
@@ -38,9 +38,7 @@ swords = np.array(list(map(lambda w: ''.join(w).strip(), words)))
|
|||||||
# Result: array(['hello', 'world'], dtype='<U5')
|
# Result: array(['hello', 'world'], dtype='<U5')
|
||||||
|
|
||||||
# Next, let's remove stop words
|
# Next, let's remove stop words
|
||||||
stop_words = open('../stop_words.txt').read().split(',')
|
stop_words = np.array(list(set(open('../stop_words.txt').read().split(','))))
|
||||||
stop_words.extend(list(string.ascii_lowercase))
|
|
||||||
stop_words = np.array(list(set(stop_words)))
|
|
||||||
ns_words = swords[~np.isin(swords, stop_words)]
|
ns_words = swords[~np.isin(swords, stop_words)]
|
||||||
|
|
||||||
### Finally, count the word occurrences
|
### Finally, count the word occurrences
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import sys, string
|
import sys, string
|
||||||
|
|
||||||
# the global list of [word, frequency] pairs
|
# the global list of [word, frequency] pairs
|
||||||
word_freqs = []
|
word_freqs = []
|
||||||
# the list of stop words
|
# the list of stop words
|
||||||
@@ -45,5 +46,5 @@ for line in open(sys.argv[1]):
|
|||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
for tf in word_freqs[0:25]:
|
for tf in word_freqs[0:25]:
|
||||||
print(tf[0], ' - ', tf[1])
|
print(tf[0], '-', tf[1])
|
||||||
|
|
||||||
|
|||||||
@@ -84,4 +84,4 @@ frequencies()
|
|||||||
sort()
|
sort()
|
||||||
|
|
||||||
for tf in word_freqs[0:25]:
|
for tf in word_freqs[0:25]:
|
||||||
print(tf[0], ' - ', tf[1])
|
print(tf[0], '-', tf[1])
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ def print_all(word_freqs):
|
|||||||
Takes a list of pairs where the entries are sorted by frequency and print them recursively.
|
Takes a list of pairs where the entries are sorted by frequency and print them recursively.
|
||||||
"""
|
"""
|
||||||
if(len(word_freqs) > 0):
|
if(len(word_freqs) > 0):
|
||||||
print(word_freqs[0][0], ' - ', word_freqs[0][1])
|
print(word_freqs[0][0], '-', word_freqs[0][1])
|
||||||
print_all(word_freqs[1:]);
|
print_all(word_freqs[1:]);
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -3,4 +3,4 @@ import heapq, re, sys
|
|||||||
|
|
||||||
words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower())
|
words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower())
|
||||||
for w in heapq.nlargest(25, set(words) - set(open("../stop_words.txt").read().split(",")), words.count):
|
for w in heapq.nlargest(25, set(words) - set(open("../stop_words.txt").read().split(",")), words.count):
|
||||||
print(w, "-", words.count(w))
|
print(w, '-', words.count(w))
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ def wf_print(wordfreq):
|
|||||||
|
|
||||||
stop_words = set(open('../stop_words.txt').read().split(','))
|
stop_words = set(open('../stop_words.txt').read().split(','))
|
||||||
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
||||||
|
|
||||||
word_freqs = {}
|
word_freqs = {}
|
||||||
# Theoretically, we would just call count(words, stop_words, word_freqs)
|
# Theoretically, we would just call count(words, stop_words, word_freqs)
|
||||||
# Try doing that and see what happens.
|
# Try doing that and see what happens.
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ def sort(wf, func):
|
|||||||
|
|
||||||
def print_text(word_freqs, func):
|
def print_text(word_freqs, func):
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, "-", c)
|
print(w, '-', c)
|
||||||
func(None)
|
func(None)
|
||||||
|
|
||||||
def no_op(func):
|
def no_op(func):
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ class WordFrequencyController(TFExercise):
|
|||||||
|
|
||||||
word_freqs = self._word_freq_manager.sorted()
|
word_freqs = self._word_freq_manager.sorted()
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
#
|
#
|
||||||
# The main function
|
# The main function
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ class WordFrequencyController():
|
|||||||
|
|
||||||
word_freqs = self._word_freq_manager.dispatch(['sorted'])
|
word_freqs = self._word_freq_manager.dispatch(['sorted'])
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
#
|
#
|
||||||
# The main function
|
# The main function
|
||||||
|
|||||||
@@ -46,4 +46,4 @@ for w in data_storage_obj['words']():
|
|||||||
|
|
||||||
word_freqs = word_freqs_obj['sorted']()
|
word_freqs = word_freqs_obj['sorted']()
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ class WordFrequencyController:
|
|||||||
|
|
||||||
word_freqs = self._word_freq_counter.sorted()
|
word_freqs = self._word_freq_counter.sorted()
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
#
|
#
|
||||||
# The main function
|
# The main function
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ class WordFrequencyCounter:
|
|||||||
def __print_freqs(self):
|
def __print_freqs(self):
|
||||||
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
#
|
#
|
||||||
# The main function
|
# The main function
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ class WordFrequencyCounter:
|
|||||||
def print_freqs(self, event):
|
def print_freqs(self, event):
|
||||||
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
word_freqs = sorted(self._word_freqs.items(), key=operator.itemgetter(1), reverse=True)
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
class WordFrequencyApplication:
|
class WordFrequencyApplication:
|
||||||
def __init__(self, event_manager):
|
def __init__(self, event_manager):
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ def sort(word_freq):
|
|||||||
def main():
|
def main():
|
||||||
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -44,5 +44,5 @@ exec('sort = ' + sort_func)
|
|||||||
word_freqs = locals()['sort'](locals()['frequencies'](locals()['extract_words'](filename)))
|
word_freqs = locals()['sort'](locals()['frequencies'](locals()['extract_words'](filename)))
|
||||||
|
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
|
|||||||
@@ -45,5 +45,5 @@ for func in tracked_functions:
|
|||||||
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
||||||
|
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ def load_plugins():
|
|||||||
words_plugin = config.get("Plugins", "words")
|
words_plugin = config.get("Plugins", "words")
|
||||||
frequencies_plugin = config.get("Plugins", "frequencies")
|
frequencies_plugin = config.get("Plugins", "frequencies")
|
||||||
global tfwords, tffreqs
|
global tfwords, tffreqs
|
||||||
# tfwords = importlib.load_compiled('tfwords', words_plugin)
|
|
||||||
# tffreqs = importlib.load_compiled('tffreqs', frequencies_plugin)
|
|
||||||
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
|
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
|
||||||
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
|
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()
|
||||||
|
|
||||||
@@ -16,5 +14,5 @@ load_plugins()
|
|||||||
word_freqs = tffreqs.top25(tfwords.extract_words(sys.argv[1]))
|
word_freqs = tffreqs.top25(tfwords.extract_words(sys.argv[1]))
|
||||||
|
|
||||||
for (w, c) in word_freqs:
|
for (w, c) in word_freqs:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
|
|||||||
@@ -58,5 +58,5 @@ filename = sys.argv[1] if len(sys.argv) > 1 else "../input.txt"
|
|||||||
word_freqs = sort(frequencies(remove_stop_words(extract_words(filename))))
|
word_freqs = sort(frequencies(remove_stop_words(extract_words(filename))))
|
||||||
|
|
||||||
for tf in word_freqs[0:25]:
|
for tf in word_freqs[0:25]:
|
||||||
print(tf[0], ' - ', tf[1])
|
print(tf[0], '-', tf[1])
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import sys, re, operator, string, traceback
|
import sys, re, operator, string, traceback
|
||||||
|
|
||||||
#
|
#
|
||||||
@@ -64,7 +65,7 @@ try:
|
|||||||
assert(type(word_freqs) is list), "OMG! This is not a list!"
|
assert(type(word_freqs) is list), "OMG! This is not a list!"
|
||||||
assert(len(word_freqs) > 25), "SRSLY? Less than 25 words!"
|
assert(len(word_freqs) > 25), "SRSLY? Less than 25 words!"
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Something wrong: {0}".format(e))
|
print("Something wrong: {0}".format(e))
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ try:
|
|||||||
|
|
||||||
assert(len(word_freqs) > 25), "OMG! Less than 25 words! I QUIT!"
|
assert(len(word_freqs) > 25), "OMG! Less than 25 words! I QUIT!"
|
||||||
for tf in word_freqs[0:25]:
|
for tf in word_freqs[0:25]:
|
||||||
print(tf[0], ' - ', tf[1])
|
print(tf[0], '-', tf[1])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Something wrong: {0}".format(e))
|
print("Something wrong: {0}".format(e))
|
||||||
|
|
||||||
|
|||||||
@@ -45,5 +45,5 @@ def sort(word_freq):
|
|||||||
|
|
||||||
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
word_freqs = sort(frequencies(extract_words(sys.argv[1])))
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
|
|||||||
@@ -66,4 +66,4 @@ with sqlite3.connect('tf.db') as connection:
|
|||||||
for i in range(25):
|
for i in range(25):
|
||||||
row = c.fetchone()
|
row = c.fetchone()
|
||||||
if row != None:
|
if row != None:
|
||||||
print(row[0] + ' - ' + str(row[1]))
|
print(row[0], '-', str(row[1]))
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ def update():
|
|||||||
if c[1] != None:
|
if c[1] != None:
|
||||||
c[0] = c[1]()
|
c[0] = c[1]()
|
||||||
|
|
||||||
|
|
||||||
# Load the fixed data into the first 2 columns
|
# Load the fixed data into the first 2 columns
|
||||||
all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
|
||||||
stop_words[0] = set(open('../stop_words.txt').read().split(','))
|
stop_words[0] = set(open('../stop_words.txt').read().split(','))
|
||||||
|
|||||||
@@ -44,5 +44,5 @@ def count_and_sort(filename):
|
|||||||
for word_freqs in count_and_sort(sys.argv[1]):
|
for word_freqs in count_and_sort(sys.argv[1]):
|
||||||
print("-----------------------------")
|
print("-----------------------------")
|
||||||
for (w, c) in word_freqs[0:25]:
|
for (w, c) in word_freqs[0:25]:
|
||||||
print(w, ' - ', c)
|
print(w, '-', c)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user