from config import GOODREADS_PUBLIC_API_KEY file_with_books = './../README.MD' # we assume that every line after # Books starting with * is a book title def read_file_content(file): with open(file) as f: content = f.readlines() # remove whitespaces return [_.strip() for _ in content] def parse_book_string(book_string): book = {} book['title'] = book_string.split('[')[1].split(']')[0] book['url'] = book_string.split('(')[1].split(')')[0] book['author'] = book_string.split(' by ')[-1] return book def load(file): file = read_file_content(file) print(file) # we start one line after tilte # Books line_to_start = file.index('# Books') + 1 current_title = '' books_under_current_title = [] library = {} for i in range(line_to_start, len(file)): line = file[i] # we have a title if line.startswith('##'): if len(current_title) == 0: current_title = line else: library[current_title] = books_under_current_title books_under_current_title = [] current_title = line continue # we have a book if line.startswith('*'): book = parse_book_string(line) books_under_current_title.append(book) return library def sort_by(library, key_to_sort_on): new_library = {} for key in library: books = library[key] new_library[key] = sorted(books, key=lambda k: k[key_to_sort_on]) return new_library def render_book_line(book_object): return '* [{}]({}) by {}.\n'.format(book_object['title'], book_object['url'], book_object['author']) # TODO: refine this logic def render(file_name, library): books_not_reached = True with open(file_name, 'w') as out_file: with open(file_with_books) as original_file: for line in original_file: if line.strip() in library: if not books_not_reached: out_file.write('\n') books_not_reached = False out_file.write(line) for book in library[line.strip()]: out_file.write(render_book_line(book)) elif books_not_reached: out_file.write(line) elif line.startswith('## License'): out_file.write('\n') out_file.write('\n') out_file.write(line) books_not_reached = True def get_details(book_object): import xml.etree.ElementTree as ET import urllib.request import urllib.error url = "http://www.goodreads.com/book/title.xml?key={}&title={}".format(GOODREADS_PUBLIC_API_KEY, book_object['title']) url = url.replace(' ', '%20') print(url) try: tree = ET.ElementTree(file=urllib.request.urlopen(url)) root = tree.getroot() book = root.find('book') book_object['year'] = book.find('publication_year').text book_object['lang'] = book.find('language_code').text book_object['avg_rt'] = book.find('average_rating').text book_object['pages'] = book.find('num_pages').text except urllib.error.HTTPError as e: print('Error getting book details from goodread: ') print(str(e.getcode()) + ' ' + e.msg) print(book_object) library = load(file_with_books) library = sort_by(library, 'title') render('./../by-title.md', library) library = load(file_with_books) library = sort_by(library, 'author') render('./../by-author.md', library) print(library) get_details(library['### Miscellaneous'][0])