utils: Store only text as description.

This commit is contained in:
Vishnu KS
2020-08-23 23:10:26 +05:30
parent 52285301d1
commit 9fe8505ea4
7 changed files with 5477 additions and 3145 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -3,6 +3,8 @@ import xml.etree.ElementTree as ET
import urllib.request
import urllib.error
from bs4 import BeautifulSoup
from config import GOODREADS_PUBLIC_API_KEY
@@ -21,7 +23,8 @@ def get_details(book_object):
book_object["rating"] = book.find("average_rating").text
book_object["pages"] = book.find("num_pages").text
book_object["image_url"] = book.find("image_url").text
book_object["description"] = book.find("description").text
if (description := book.find("description").text):
book_object["description"] = BeautifulSoup(description).text
book_object["isbn"] = book.find("isbn").text
return True
except urllib.error.HTTPError as e:

View File

@@ -3,8 +3,9 @@ import time
from read_file import load
from gooodreads import get_details
from bs4 import BeautifulSoup
required_fields = ["title", "author", "url", "rating", "year", "pages", "image_url", "description"]
required_fields = ["title", "author", "url", "rating", "year", "pages", "image_url", "description", "category"]
def book_has_all_fields(book):
for required_field in required_fields:
@@ -13,11 +14,18 @@ def book_has_all_fields(book):
return False
return True
def clean_category(category_raw):
if "### " in category_raw:
return category_raw[4:]
if "## " in category_raw:
return category_raw[3:]
if __name__ == "__main__":
library = load("../README.md", "new")
existing_book_names_to_details = json.load(open("books.json"))
for category in library:
category_name = clean_category(category)
for book in library[category]:
if (title := book["title"]) in existing_book_names_to_details:
existing_book = existing_book_names_to_details[title]
@@ -28,6 +36,7 @@ if __name__ == "__main__":
"title": title,
"author": book["author"],
"url": book["url"],
"category": category_name
}
fetched = get_details(new_book)
if fetched:
@@ -38,3 +47,13 @@ if __name__ == "__main__":
else:
print(f"❌ Error while fetching {title}")
time.sleep(1)
book_list = []
for _, book in existing_book_names_to_details.items():
book_list.append(book)
with open("books.json", "w") as f:
json.dump(existing_book_names_to_details, f, sort_keys=True, indent=4, separators=(',', ': '))
with open("books_list.json", "w") as f:
json.dump(book_list, f, sort_keys=True, indent=4, separators=(',', ': '))