Merge pull request #51 from kdabiria/master

fixed the end of line bug
This commit is contained in:
crista
2021-03-29 11:14:43 -07:00
committed by GitHub
3 changed files with 15 additions and 6 deletions

View File

@@ -21,7 +21,7 @@ def scan(str_data, func):
def remove_stop_words(word_list, func): def remove_stop_words(word_list, func):
with open('../stop_words.txt') as f: with open('../stop_words.txt') as f:
stop_words = f.read().split(',') stop_words = f.read().strip('\n').split(',')
# add single-letter words # add single-letter words
stop_words.extend(list(string.ascii_lowercase)) stop_words.extend(list(string.ascii_lowercase))
func([w for w in word_list if not w in stop_words], sort) func([w for w in word_list if not w in stop_words], sort)

View File

@@ -35,7 +35,7 @@ def scan(str_data):
def remove_stop_words(word_list): def remove_stop_words(word_list):
with open('../stop_words.txt') as f: with open('../stop_words.txt') as f:
stop_words = f.read().split(',') stop_words = f.read().strip('\n').split(',')
# add single-letter words # add single-letter words
stop_words.extend(list(string.ascii_lowercase)) stop_words.extend(list(string.ascii_lowercase))
return [w for w in word_list if not w in stop_words] return [w for w in word_list if not w in stop_words]

View File

@@ -1,11 +1,15 @@
#!/usr/bin/env python #!/usr/bin/env python
import sys, operator, string import sys
import operator
import string
def characters(filename): def characters(filename):
for line in open(filename): for line in open(filename):
for c in line: for c in line:
yield c yield c
def all_words(filename): def all_words(filename):
start_char = True start_char = True
for c in characters(filename): for c in characters(filename):
@@ -15,7 +19,8 @@ def all_words(filename):
# We found the start of a word # We found the start of a word
word = c.lower() word = c.lower()
start_char = False start_char = False
else: pass else:
pass
else: else:
if c.isalnum(): if c.isalnum():
word += c.lower() word += c.lower()
@@ -24,12 +29,15 @@ def all_words(filename):
start_char = True start_char = True
yield word yield word
def non_stop_words(filename): def non_stop_words(filename):
stopwords = set(open('../stop_words.txt').read().split(',') + list(string.ascii_lowercase)) stopwords = set(open(
'../stop_words.txt').read().strip('\n').split(',') + list(string.ascii_lowercase))
for w in all_words(filename): for w in all_words(filename):
if not w in stopwords: if not w in stopwords:
yield w yield w
def count_and_sort(filename): def count_and_sort(filename):
freqs, i = {}, 1 freqs, i = {}, 1
for w in non_stop_words(filename): for w in non_stop_words(filename):
@@ -38,6 +46,8 @@ def count_and_sort(filename):
yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True) yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
i = i+1 i = i+1
yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True) yield sorted(freqs.items(), key=operator.itemgetter(1), reverse=True)
# #
# The main function # The main function
# #
@@ -45,4 +55,3 @@ for word_freqs in count_and_sort(sys.argv[1]):
print("-----------------------------") print("-----------------------------")
for (w, c) in word_freqs[0:25]: for (w, c) in word_freqs[0:25]:
print(w, '-', c) print(w, '-', c)