107 lines
3.7 KiB
Python
107 lines
3.7 KiB
Python
import time
|
|
import xml.etree.ElementTree as ET
|
|
import urllib.request
|
|
import urllib.error
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from config import GOODREADS_PUBLIC_API_KEY, GOOGLE_SEARCH_RAPIDAPI_HOST, GOOGLE_SEARCH_RAPIDAPI_KEY, GOOGLE_BOOK_API_KEY
|
|
from googlesearch import search
|
|
|
|
def get_details(book_object):
|
|
|
|
url = "http://www.goodreads.com/book/title.xml?key={}&title={}".format(
|
|
GOODREADS_PUBLIC_API_KEY, urllib.parse.quote_plus(book_object["title"])
|
|
)
|
|
print(url)
|
|
try:
|
|
time_to_sleep = 1
|
|
while True:
|
|
response = urllib.request.urlopen(url)
|
|
print(response.getcode())
|
|
if response.getcode() == 429:
|
|
time_to_sleep = time_to_sleep * 2
|
|
print("Sleeping for {}".format(time_to_sleep))
|
|
time.sleep(time_to_sleep)
|
|
else:
|
|
break
|
|
tree = ET.ElementTree(file=response)
|
|
root = tree.getroot()
|
|
book = root.find("book")
|
|
book_object["year"] = book.find("publication_year").text or ""
|
|
book_object["lang"] = book.find("language_code").text
|
|
book_object["rating"] = book.find("average_rating").text
|
|
book_object["pages"] = book.find("num_pages").text
|
|
book_object["image_url"] = book.find("image_url").text
|
|
book_object["isbn"] = book.find("isbn").text
|
|
|
|
description = book.find("description").text
|
|
if description:
|
|
book_object["description"] = BeautifulSoup(description).text
|
|
else:
|
|
book_object["description"] = ""
|
|
# Attempt to use Google Book API
|
|
url = "https://www.googleapis.com/books/v1/volumes?q={}+inauthor:{}&key={}".format(
|
|
book_object["title"], book_object["author"], GOOGLE_BOOK_API_KEY,
|
|
)
|
|
response = requests.request("GET", url)
|
|
|
|
for item in response.json()["items"]:
|
|
if "description" in item["volumeInfo"]:
|
|
book_object["description"] = item["volumeInfo"]["description"]
|
|
break
|
|
|
|
print("Fetching amazon link")
|
|
|
|
url = "https://google-search3.p.rapidapi.com/api/v1/search/q=site:amazon.com {} {}".format(book_object["title"], book_object["author"])
|
|
|
|
headers = {
|
|
'x-rapidapi-host': GOOGLE_SEARCH_RAPIDAPI_HOST,
|
|
'x-rapidapi-key': GOOGLE_SEARCH_RAPIDAPI_KEY,
|
|
}
|
|
|
|
response = requests.request("GET", url, headers=headers)
|
|
book_object["amazon_url"] = response.json()["results"][0]["link"]
|
|
return True
|
|
except urllib.error.HTTPError as e:
|
|
print(
|
|
"Error getting book details from GoodReads for book: {}. \nGot error: ".format(
|
|
book_object["title"]
|
|
)
|
|
)
|
|
print(str(e.getcode()) + " " + e.msg)
|
|
return False
|
|
|
|
|
|
def get_goodread_info(library, force):
|
|
import sys
|
|
|
|
print("")
|
|
print("Getting GoodReads data...")
|
|
|
|
processed = 0
|
|
total_book_count = 0
|
|
for key in library:
|
|
total_book_count += len(library[key])
|
|
|
|
for category in library:
|
|
book_list = library[category]
|
|
for book in book_list:
|
|
# do not call the api again if we already have the infomation
|
|
if not force and "rating" in book and book["rating"]:
|
|
processed += 1
|
|
continue
|
|
get_details(book)
|
|
processed += 1
|
|
|
|
print(
|
|
"{}/{} records processed.".format(processed, total_book_count), end="\b"
|
|
)
|
|
sys.stdout.write("\r")
|
|
sys.stdout.flush() # <- makes python print it anyway
|
|
|
|
# need to wait a second between the requests, to not abuse the API
|
|
time.sleep(1)
|