diff --git a/utils/gooodreads.py b/utils/gooodreads.py new file mode 100644 index 0000000..0b6c3f9 --- /dev/null +++ b/utils/gooodreads.py @@ -0,0 +1,44 @@ +import time +import xml.etree.ElementTree as ET +import urllib.request +import urllib.error + +from config import GOODREADS_PUBLIC_API_KEY + + +def get_details(book_object): + + url = "http://www.goodreads.com/book/title.xml?key={}&title={}".format(GOODREADS_PUBLIC_API_KEY, urllib.parse.quote_plus(book_object['title'])) + # url = url.replace(' ', '%20') + print(url) + try: + tree = ET.ElementTree(file=urllib.request.urlopen(url)) + root = tree.getroot() + book = root.find('book') + book_object['year'] = book.find('publication_year').text or '' + book_object['lang'] = book.find('language_code').text + book_object['rating'] = book.find('average_rating').text + book_object['pages'] = book.find('num_pages').text + except urllib.error.HTTPError as e: + print('Error getting book details from GoodReads: ') + print(str(e.getcode()) + ' ' + e.msg) + print(book_object) + + +def get_goodread_info(library): + i = 0 + + for chapter in library: + book_list = library[chapter] + for book in book_list: + + # do not call the api again if we already have the infomration + if 'rating' in book and book['rating']: + continue + #print(i) + #if i == 10: + #break + get_details(book) + #i += 1 + # need to wait a second between the requests, to not abuse the API + time.sleep(1) \ No newline at end of file diff --git a/utils/housekeep.py b/utils/housekeep.py index 30849a2..b21b21e 100644 --- a/utils/housekeep.py +++ b/utils/housekeep.py @@ -1,116 +1,57 @@ -from config import GOODREADS_PUBLIC_API_KEY +# we assume that every line after # Books +# starting with * is a book title if file type is old +# starting with | (and not with | Name or |--) is a book if the file type is new -file_with_books = './../README.MD' - -# we assume that every line after # Books starting with * is a book title +# ARGUMENT HANDLING +try: + import argparse + parser = argparse.ArgumentParser(description='Process file.') + parser.add_argument( + '--in_file', + help='File to process, defaults to ./../README.MD') + parser.add_argument( + '--out_file', + help='File to save to, defaults to ./../README-NEW.MD') + parser.add_argument( + '--file_type', + choices=['old', 'new'], + help='old if links are displayed in a list, new if in a table') + parser.add_argument( + '--sort_by', + choices = ['rating', 'title', 'author', 'year'], + help='defaults to rating') + flags = parser.parse_args() + #argparse.ArgumentParser(parents=[tools.argparser]).parse_args() + print(flags) +except ImportError: + flags = None -def read_file_content(file): - with open(file) as f: - content = f.readlines() - # remove whitespaces - return [_.strip() for _ in content] - - -def parse_book_string(book_string): - book = {} - book['title'] = book_string.split('[')[1].split(']')[0] - book['url'] = book_string.split('(')[1].split(')')[0] - book['author'] = book_string.split(' by ')[-1] - return book - - -def load(file): - file = read_file_content(file) - print(file) - # we start one line after tilte # Books - line_to_start = file.index('# Books') + 1 - current_title = '' - books_under_current_title = [] - library = {} - - for i in range(line_to_start, len(file)): - line = file[i] - - # we have a title - if line.startswith('##'): - if len(current_title) == 0: - current_title = line - else: - library[current_title] = books_under_current_title - books_under_current_title = [] - current_title = line - continue - - # we have a book - if line.startswith('*'): - book = parse_book_string(line) - books_under_current_title.append(book) - return library - - -def sort_by(library, key_to_sort_on): +def sort(library, key_to_sort_on, reverse = False): new_library = {} for key in library: books = library[key] - new_library[key] = sorted(books, key=lambda k: k[key_to_sort_on]) + new_library[key] = sorted(books, key=lambda k: k[key_to_sort_on], reverse=reverse) return new_library -def render_book_line(book_object): - return '* [{}]({}) by {}.\n'.format(book_object['title'], book_object['url'], book_object['author']) +def main(): + from read_file import load + from gooodreads import get_goodread_info + from write_file import render + + in_file = flags.in_file or './../README.MD' + out_file = flags.out_file or './../README-NEW.md' + file_type = flags.file_type or 'new' + sort_by = flags.sort_by or 'rating' + reverse = True if sort_by == 'rating' else False + + library = load(in_file, file_type) + get_goodread_info(library) + library = sort(library, sort_by, reverse) + render(in_file, out_file, library) + +if __name__ == '__main__': + main() -# TODO: refine this logic -def render(file_name, library): - books_not_reached = True - with open(file_name, 'w') as out_file: - with open(file_with_books) as original_file: - for line in original_file: - - if line.strip() in library: - if not books_not_reached: out_file.write('\n') - books_not_reached = False - out_file.write(line) - for book in library[line.strip()]: - out_file.write(render_book_line(book)) - elif books_not_reached: - out_file.write(line) - elif line.startswith('## License'): - out_file.write('\n') - out_file.write('\n') - out_file.write(line) - books_not_reached = True - -def get_details(book_object): - import xml.etree.ElementTree as ET - import urllib.request - import urllib.error - url = "http://www.goodreads.com/book/title.xml?key={}&title={}".format(GOODREADS_PUBLIC_API_KEY, book_object['title']) - url = url.replace(' ', '%20') - print(url) - try: - tree = ET.ElementTree(file=urllib.request.urlopen(url)) - root = tree.getroot() - book = root.find('book') - book_object['year'] = book.find('publication_year').text - book_object['lang'] = book.find('language_code').text - book_object['avg_rt'] = book.find('average_rating').text - book_object['pages'] = book.find('num_pages').text - except urllib.error.HTTPError as e: - print('Error getting book details from GoodReads: ') - print(str(e.getcode()) + ' ' + e.msg) - print(book_object) - -library = load(file_with_books) -library = sort_by(library, 'title') - -render('./../by-title.md', library) - -library = load(file_with_books) -library = sort_by(library, 'author') - -render('./../by-author.md', library) - -print(library) -get_details(library['### Miscellaneous'][0]) \ No newline at end of file diff --git a/utils/read_file.py b/utils/read_file.py new file mode 100644 index 0000000..0509c10 --- /dev/null +++ b/utils/read_file.py @@ -0,0 +1,64 @@ +def read_file_content(file): + with open(file) as f: + content = f.readlines() + # remove whitespaces + return [_.strip() for _ in content] + + +# old (list) +def parse_book_string(book_string): + book = {} + book['title'] = book_string.split('[')[1].split(']')[0] + book['url'] = book_string.split('(')[1].split(')')[0] + book['author'] = book_string.split(' by ')[-1] + book['rating'] = '' + book['year'] = '' + return book + + +# new (table) +def parse_book_string_new(book_string): + book = {} + book_split = book_string.split('|') + print(book_split) + book['title'] = book_split[1].strip() + book['author'] = book_split[2].strip() + book['url'] = book_split[3].strip().split('[')[1].split('(')[1].split(')')[0] + book['rating'] = book_split[3].strip().split('[')[1].split(']')[0] + book['year'] = book_split[4].strip() + return book + + +def load(file, file_type): + file = read_file_content(file) + print(file) + # we start one line after tilte # Books + line_to_start = file.index('# Books') + 1 + current_title = '' + books_under_current_title = [] + library = {} + + for i in range(line_to_start, len(file)): + line = file[i] + + # we have a title + if line.startswith('##'): + if len(current_title) == 0: + current_title = line + else: + library[current_title] = books_under_current_title + books_under_current_title = [] + current_title = line + continue + + # we have a book + if file_type == 'old': + if line.startswith('*'): + book = parse_book_string(line) + books_under_current_title.append(book) + else: + if line.startswith('|') and not line.startswith('| Name') and not line.startswith('|---'): + book = parse_book_string_new(line) + books_under_current_title.append(book) + + return library diff --git a/utils/write_file.py b/utils/write_file.py new file mode 100644 index 0000000..c7667cd --- /dev/null +++ b/utils/write_file.py @@ -0,0 +1,37 @@ +def render_book_line(book_object): + book = book_object + book['rating'] = '?' if not 'rating' in book else book['rating'] + book['url'] = '' if not 'url' in book else book['url'] + book['year'] = '' if not 'year' in book else book['year'] + return '| {} | {} | [{}]({}) | {} | \n'.format(book['title'], + book['author'], + book['rating'], + book['url'], + book['year']) + + +# TODO: refine this logic +def render(in_file, out_file, library): + books_not_reached = True + with open(out_file, 'w') as out_file: + with open(in_file) as original_file: + for line in original_file: + + if line.strip() in library: + if not books_not_reached: out_file.write('\n') + books_not_reached = False + + # render chapter and start of the table + out_file.write(line) + out_file.write('| Name | Author | Goodreads Rating | Year Published | \n') + out_file.write('|------|--------|------------------|----------------| \n') + # render books + for book in library[line.strip()]: + out_file.write(render_book_line(book)) + elif books_not_reached: + out_file.write(line) + elif line.startswith('## License'): + out_file.write('\n') + out_file.write('\n') + out_file.write(line) + books_not_reached = True \ No newline at end of file