utils: Store only text as description.
This commit is contained in:
946
utils/books.json
946
utils/books.json
File diff suppressed because it is too large
Load Diff
@@ -3,6 +3,8 @@ import xml.etree.ElementTree as ET
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from config import GOODREADS_PUBLIC_API_KEY
|
||||
|
||||
|
||||
@@ -21,7 +23,8 @@ def get_details(book_object):
|
||||
book_object["rating"] = book.find("average_rating").text
|
||||
book_object["pages"] = book.find("num_pages").text
|
||||
book_object["image_url"] = book.find("image_url").text
|
||||
book_object["description"] = book.find("description").text
|
||||
if (description := book.find("description").text):
|
||||
book_object["description"] = BeautifulSoup(description).text
|
||||
book_object["isbn"] = book.find("isbn").text
|
||||
return True
|
||||
except urllib.error.HTTPError as e:
|
||||
|
||||
@@ -3,8 +3,9 @@ import time
|
||||
|
||||
from read_file import load
|
||||
from gooodreads import get_details
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
required_fields = ["title", "author", "url", "rating", "year", "pages", "image_url", "description"]
|
||||
required_fields = ["title", "author", "url", "rating", "year", "pages", "image_url", "description", "category"]
|
||||
|
||||
def book_has_all_fields(book):
|
||||
for required_field in required_fields:
|
||||
@@ -13,11 +14,18 @@ def book_has_all_fields(book):
|
||||
return False
|
||||
return True
|
||||
|
||||
def clean_category(category_raw):
|
||||
if "### " in category_raw:
|
||||
return category_raw[4:]
|
||||
if "## " in category_raw:
|
||||
return category_raw[3:]
|
||||
|
||||
if __name__ == "__main__":
|
||||
library = load("../README.md", "new")
|
||||
existing_book_names_to_details = json.load(open("books.json"))
|
||||
|
||||
for category in library:
|
||||
category_name = clean_category(category)
|
||||
for book in library[category]:
|
||||
if (title := book["title"]) in existing_book_names_to_details:
|
||||
existing_book = existing_book_names_to_details[title]
|
||||
@@ -28,6 +36,7 @@ if __name__ == "__main__":
|
||||
"title": title,
|
||||
"author": book["author"],
|
||||
"url": book["url"],
|
||||
"category": category_name
|
||||
}
|
||||
fetched = get_details(new_book)
|
||||
if fetched:
|
||||
@@ -38,3 +47,13 @@ if __name__ == "__main__":
|
||||
else:
|
||||
print(f"❌ Error while fetching {title}")
|
||||
time.sleep(1)
|
||||
|
||||
book_list = []
|
||||
for _, book in existing_book_names_to_details.items():
|
||||
book_list.append(book)
|
||||
|
||||
with open("books.json", "w") as f:
|
||||
json.dump(existing_book_names_to_details, f, sort_keys=True, indent=4, separators=(',', ': '))
|
||||
|
||||
with open("books_list.json", "w") as f:
|
||||
json.dump(book_list, f, sort_keys=True, indent=4, separators=(',', ': '))
|
||||
|
||||
Reference in New Issue
Block a user