- fixed a bug where table header was rendered for titles with no book children - now correctly parsing url (some of the record whereat parsed properly)
54 lines
1.8 KiB
Python
54 lines
1.8 KiB
Python
import time
|
|
import xml.etree.ElementTree as ET
|
|
import urllib.request
|
|
import urllib.error
|
|
|
|
from config import GOODREADS_PUBLIC_API_KEY
|
|
|
|
|
|
def get_details(book_object):
|
|
|
|
url = "http://www.goodreads.com/book/title.xml?key={}&title={}".format(GOODREADS_PUBLIC_API_KEY,
|
|
urllib.parse.quote_plus(book_object['title']))
|
|
|
|
try:
|
|
tree = ET.ElementTree(file=urllib.request.urlopen(url))
|
|
root = tree.getroot()
|
|
book = root.find('book')
|
|
book_object['year'] = book.find('publication_year').text or ''
|
|
book_object['lang'] = book.find('language_code').text
|
|
book_object['rating'] = book.find('average_rating').text
|
|
book_object['pages'] = book.find('num_pages').text
|
|
except urllib.error.HTTPError as e:
|
|
print('Error getting book details from GoodReads for book: {}. \nGot error: '.format(book_object['title']))
|
|
print(str(e.getcode()) + ' ' + e.msg)
|
|
|
|
|
|
def get_goodread_info(library):
|
|
import sys
|
|
return
|
|
print('')
|
|
print('Getting GoodReads data...')
|
|
|
|
processed = 0
|
|
total_book_count = 0
|
|
for key in library:
|
|
total_book_count += len(library[key])
|
|
|
|
|
|
for chapter in library:
|
|
book_list = library[chapter]
|
|
for book in book_list:
|
|
# do not call the api again if we already have the infomation
|
|
if 'rating' in book and book['rating']:
|
|
processed += 1
|
|
continue
|
|
get_details(book)
|
|
processed += 1
|
|
|
|
print('{}/{} records processed.'.format(processed, total_book_count), end="\b")
|
|
sys.stdout.write('\r')
|
|
sys.stdout.flush() # <- makes python print it anyway
|
|
|
|
# need to wait a second between the requests, to not abuse the API
|
|
time.sleep(1) |