Tidy up, make line numbers consistent with text

This commit is contained in:
Crista Lopes
2019-12-22 17:43:07 -08:00
parent ebeba3d34c
commit 6e1d72bd21
26 changed files with 30 additions and 32 deletions

View File

@@ -1,10 +1,10 @@
import sys, string import sys, string
import numpy as np import numpy as np
# Example input: "Hello World!!" # Example input: "Hello World!"
characters = np.array([' ']+list(open(sys.argv[1]).read())+[' ']) characters = np.array([' ']+list(open(sys.argv[1]).read())+[' '])
# Result: array([' ', 'H', 'e', 'l', 'l', 'o', ' ', ' ', # Result: array([' ', 'H', 'e', 'l', 'l', 'o', ' ', ' ',
# 'W', 'o', 'r', 'l', 'd', '!', '!', ' '], dtype='<U1') # 'W', 'o', 'r', 'l', 'd', '!', ' '], dtype='<U1')
# Normalize # Normalize
characters[~np.char.isalpha(characters)] = ' ' characters[~np.char.isalpha(characters)] = ' '
@@ -25,7 +25,7 @@ w_ranges = np.reshape(sp2[1:-1], (-1, 2))
# [ 7, 13], # [ 7, 13],
# [13, 14]], dtype=int64) # [13, 14]], dtype=int64)
# Remove the indexing to the spaces themselves # Remove the indexing to the spaces themselves
w_ranges = w_ranges[np.where(w_ranges[:, 1] - w_ranges[:, 0] > 1)] w_ranges = w_ranges[np.where(w_ranges[:, 1] - w_ranges[:, 0] > 2)]
# Result: array([[ 0, 6], # Result: array([[ 0, 6],
# [ 7, 13]], dtype=int64) # [ 7, 13]], dtype=int64)
@@ -38,9 +38,7 @@ swords = np.array(list(map(lambda w: ''.join(w).strip(), words)))
# Result: array(['hello', 'world'], dtype='<U5') # Result: array(['hello', 'world'], dtype='<U5')
# Next, let's remove stop words # Next, let's remove stop words
stop_words = open('../stop_words.txt').read().split(',') stop_words = np.array(list(set(open('../stop_words.txt').read().split(','))))
stop_words.extend(list(string.ascii_lowercase))
stop_words = np.array(list(set(stop_words)))
ns_words = swords[~np.isin(swords, stop_words)] ns_words = swords[~np.isin(swords, stop_words)]
### Finally, count the word occurrences ### Finally, count the word occurrences

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys, string import sys, string
# the global list of [word, frequency] pairs # the global list of [word, frequency] pairs
word_freqs = [] word_freqs = []
# the list of stop words # the list of stop words

View File

@@ -3,4 +3,4 @@ import heapq, re, sys
words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower()) words = re.findall("[a-z]{2,}", open(sys.argv[1]).read().lower())
for w in heapq.nlargest(25, set(words) - set(open("../stop_words.txt").read().split(",")), words.count): for w in heapq.nlargest(25, set(words) - set(open("../stop_words.txt").read().split(",")), words.count):
print(w, "-", words.count(w)) print(w, '-', words.count(w))

View File

@@ -34,7 +34,6 @@ def wf_print(wordfreq):
stop_words = set(open('../stop_words.txt').read().split(',')) stop_words = set(open('../stop_words.txt').read().split(','))
words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) words = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
word_freqs = {} word_freqs = {}
# Theoretically, we would just call count(words, stop_words, word_freqs) # Theoretically, we would just call count(words, stop_words, word_freqs)
# Try doing that and see what happens. # Try doing that and see what happens.

View File

@@ -40,7 +40,7 @@ def sort(wf, func):
def print_text(word_freqs, func): def print_text(word_freqs, func):
for (w, c) in word_freqs[0:25]: for (w, c) in word_freqs[0:25]:
print(w, "-", c) print(w, '-', c)
func(None) func(None)
def no_op(func): def no_op(func):

View File

@@ -7,8 +7,6 @@ def load_plugins():
words_plugin = config.get("Plugins", "words") words_plugin = config.get("Plugins", "words")
frequencies_plugin = config.get("Plugins", "frequencies") frequencies_plugin = config.get("Plugins", "frequencies")
global tfwords, tffreqs global tfwords, tffreqs
# tfwords = importlib.load_compiled('tfwords', words_plugin)
# tffreqs = importlib.load_compiled('tffreqs', frequencies_plugin)
tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module() tfwords = importlib.machinery.SourcelessFileLoader('tfwords', words_plugin).load_module()
tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module() tffreqs = importlib.machinery.SourcelessFileLoader('tffreqs', frequencies_plugin).load_module()

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys, re, operator, string, traceback import sys, re, operator, string, traceback
# #

View File

@@ -66,4 +66,4 @@ with sqlite3.connect('tf.db') as connection:
for i in range(25): for i in range(25):
row = c.fetchone() row = c.fetchone()
if row != None: if row != None:
print(row[0] + ' - ' + str(row[1])) print(row[0], '-', str(row[1]))

View File

@@ -38,6 +38,7 @@ def update():
if c[1] != None: if c[1] != None:
c[0] = c[1]() c[0] = c[1]()
# Load the fixed data into the first 2 columns # Load the fixed data into the first 2 columns
all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
stop_words[0] = set(open('../stop_words.txt').read().split(',')) stop_words[0] = set(open('../stop_words.txt').read().split(','))