app: Include amazon links.

2020-09-27 16:02:12 +05:30
parent 18ef998136
commit c6197e9646
13 changed files with 6377 additions and 239 deletions
--- a/utils/books.json
+++ b/utils/books.json
--- a/utils/books_list.json
+++ b/utils/books_list.json
--- a/utils/config-sample.py
+++ b/utils/config-sample.py
@@ -1,2 +1,4 @@
 # save this file as 'config.py' and then, fill it with you api key
 GOODREADS_PUBLIC_API_KEY = "write here your goodreads public API key"
+GOOGLE_SEARCH_RAPIDAPI_HOST = ""
+GOOGLESEARCH_RAPIDAPI_KEY = ""
--- a/utils/gooodreads.py
+++ b/utils/gooodreads.py
@@ -5,8 +5,8 @@ import urllib.error

 from bs4 import BeautifulSoup

-from config import GOODREADS_PUBLIC_API_KEY
-
+from config import GOODREADS_PUBLIC_API_KEY, GOOGLE_SEARCH_RAPIDAPI_HOST, GOOGLE_SEARCH_RAPIDAPI_KEY
+from googlesearch import search

 def get_details(book_object):

@@ -15,7 +15,17 @@ def get_details(book_object):
    )

    try:
-        tree = ET.ElementTree(file=urllib.request.urlopen(url))
+        time_to_sleep = 1
+        while True:
+            response = urllib.request.urlopen(url)
+            print(response.getcode())
+            if response.getcode() == 429:
+                time_to_sleep = time_to_sleep * 2
+                print("Sleeping for {}".format(time_to_sleep))
+                time.sleep(time_to_sleep)
+            else:
+                break
+        tree = ET.ElementTree(file=response)
        root = tree.getroot()
        book = root.find("book")
        book_object["year"] = book.find("publication_year").text or ""
@@ -23,9 +33,23 @@ def get_details(book_object):
        book_object["rating"] = book.find("average_rating").text
        book_object["pages"] = book.find("num_pages").text
        book_object["image_url"] = book.find("image_url").text
-        if (description := book.find("description").text) :
+        if (description := book.find("description").text):
            book_object["description"] = BeautifulSoup(description).text
+        else:
+            book_object["description"] = ""
        book_object["isbn"] = book.find("isbn").text
+        print("Fetching amazon link")
+        import requests
+
+        url = "https://google-search3.p.rapidapi.com/api/v1/search/q=site:amazon.com {} {}".format(book_object["title"], book_object["author"])
+
+        headers = {
+            'x-rapidapi-host': GOOGLE_SEARCH_RAPIDAPI_HOST,
+            'x-rapidapi-key': GOOGLE_SEARCH_RAPIDAPI_KEY,
+        }
+
+        response = requests.request("GET", url, headers=headers)
+        book_object["amazon_url"] = response.json()["results"][0]["link"]
        return True
    except urllib.error.HTTPError as e:
        print(
--- a/utils/update_json_files.py
+++ b/utils/update_json_files.py
@@ -15,6 +15,7 @@ required_fields = [
    "image_url",
    "description",
    "category",
+    "amazon_url",
 ]


@@ -63,22 +64,11 @@ if __name__ == "__main__":
                        indent=4,
                        separators=(",", ": "),
                    )
+                
+                book_list = []
+                for _, book in existing_book_names_to_details.items():
+                    book_list.append(book)
+                with open("books_list.json", "w") as f:
+                    json.dump(book_list, f, sort_keys=True, indent=4, separators=(",", ": "))
            else:
                print(f"❌ Error while fetching {title}")
-            time.sleep(1)
-
-    book_list = []
-    for _, book in existing_book_names_to_details.items():
-        book_list.append(book)
-
-    with open("books.json", "w") as f:
-        json.dump(
-            existing_book_names_to_details,
-            f,
-            sort_keys=True,
-            indent=4,
-            separators=(",", ": "),
-        )
-
-    with open("books_list.json", "w") as f:
-        json.dump(book_list, f, sort_keys=True, indent=4, separators=(",", ": "))