From 0df25aea21fecc09528f6d869a32ce18538b6835 Mon Sep 17 00:00:00 2001 From: Crista Lopes Date: Tue, 31 Dec 2013 07:32:57 -0800 Subject: [PATCH] Simplification of 24 --- 24-tabular/tf-24.py | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/24-tabular/tf-24.py b/24-tabular/tf-24.py index 26fb439..3f93dd9 100755 --- a/24-tabular/tf-24.py +++ b/24-tabular/tf-24.py @@ -1,5 +1,4 @@ #!/usr/bin/env python - import sys, re, string, sqlite3 # @@ -16,38 +15,17 @@ def create_db_schema(connection): def load_file_into_database(path_to_file, connection): """ Takes the path to a file and loads the contents into the database """ - def _read_file(path_to_file): - """ - Takes a path to a file and returns the entire contents of the - file as a string - """ - f = open(path_to_file) - data = f.read() - f.close() - return data - - def _filter_chars_and_normalize(str_data): - """ - Takes a string and returns a copy with all nonalphanumeric chars - replaced by white space, and all characters lower-cased - """ + def _extract_words(path_to_file): + with open(path_to_file) as f: + str_data = f.read() pattern = re.compile('[\W_]+') - return pattern.sub(' ', str_data).lower() - - def _scan(str_data): - """ Takes a string and scans for words, returning a list of words. """ - return str_data.split() - - def _remove_stop_words(word_list): - f = open('../stop_words.txt') - stop_words = f.read().split(',') - f.close() - # add single-letter words + word_list = pattern.sub(' ', str_data).lower().split() + with open('../stop_words.txt') as f: + stop_words = f.read().split(',') stop_words.extend(list(string.ascii_lowercase)) return [w for w in word_list if not w in stop_words] - # The actual work of splitting the input into words - words = _remove_stop_words(_scan(_filter_chars_and_normalize(_read_file(path_to_file)))) + words = _extract_words(path_to_file) # Now let's add data to the database # Add the document itself to the database