Minor
This commit is contained in:
@@ -15,15 +15,11 @@ def partition(data_str, nlines):
|
|||||||
|
|
||||||
def split_words(data_str):
|
def split_words(data_str):
|
||||||
"""
|
"""
|
||||||
Takes a string, filters non alphanumeric characters, normalizes to
|
Takes a string, returns a list of pairs (word, 1),
|
||||||
lower case, scans for words, and filters the stop words.
|
one for each word in the input, so
|
||||||
It returns a list of pairs (word, 1), one for each word in the input, so
|
|
||||||
[(w1, 1), (w2, 1), ..., (wn, 1)]
|
[(w1, 1), (w2, 1), ..., (wn, 1)]
|
||||||
"""
|
"""
|
||||||
def _scan(str_data):
|
def _scan(str_data):
|
||||||
"""
|
|
||||||
Takes a string and returns a list of words
|
|
||||||
"""
|
|
||||||
pattern = re.compile('[\W_]+')
|
pattern = re.compile('[\W_]+')
|
||||||
return pattern.sub(' ', str_data).lower().split()
|
return pattern.sub(' ', str_data).lower().split()
|
||||||
|
|
||||||
@@ -60,7 +56,6 @@ def count_words(pairs_list_1, pairs_list_2):
|
|||||||
#
|
#
|
||||||
# Auxiliary functions
|
# Auxiliary functions
|
||||||
#
|
#
|
||||||
|
|
||||||
def read_file(path_to_file):
|
def read_file(path_to_file):
|
||||||
with open(path_to_file) as f:
|
with open(path_to_file) as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
|
|||||||
Reference in New Issue
Block a user