Files
Mind-Expanding-Books/utils/gooodreads.py
2020-08-23 23:10:26 +05:30

69 lines
2.2 KiB
Python

import time
import xml.etree.ElementTree as ET
import urllib.request
import urllib.error
from bs4 import BeautifulSoup
from config import GOODREADS_PUBLIC_API_KEY
def get_details(book_object):
url = "http://www.goodreads.com/book/title.xml?key={}&title={}".format(
GOODREADS_PUBLIC_API_KEY, urllib.parse.quote_plus(book_object["title"])
)
try:
tree = ET.ElementTree(file=urllib.request.urlopen(url))
root = tree.getroot()
book = root.find("book")
book_object["year"] = book.find("publication_year").text or ""
book_object["lang"] = book.find("language_code").text
book_object["rating"] = book.find("average_rating").text
book_object["pages"] = book.find("num_pages").text
book_object["image_url"] = book.find("image_url").text
if (description := book.find("description").text):
book_object["description"] = BeautifulSoup(description).text
book_object["isbn"] = book.find("isbn").text
return True
except urllib.error.HTTPError as e:
print(
"Error getting book details from GoodReads for book: {}. \nGot error: ".format(
book_object["title"]
)
)
print(str(e.getcode()) + " " + e.msg)
return False
def get_goodread_info(library, force):
import sys
print("")
print("Getting GoodReads data...")
processed = 0
total_book_count = 0
for key in library:
total_book_count += len(library[key])
for category in library:
book_list = library[category]
for book in book_list:
# do not call the api again if we already have the infomation
if not force and "rating" in book and book["rating"]:
processed += 1
continue
get_details(book)
processed += 1
print(
"{}/{} records processed.".format(processed, total_book_count), end="\b"
)
sys.stdout.write("\r")
sys.stdout.flush() # <- makes python print it anyway
# need to wait a second between the requests, to not abuse the API
time.sleep(1)