refactored, added command line capabilities
This commit is contained in:
44
utils/gooodreads.py
Normal file
44
utils/gooodreads.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import time
|
||||
import xml.etree.ElementTree as ET
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
from config import GOODREADS_PUBLIC_API_KEY
|
||||
|
||||
|
||||
def get_details(book_object):
|
||||
|
||||
url = "http://www.goodreads.com/book/title.xml?key={}&title={}".format(GOODREADS_PUBLIC_API_KEY, urllib.parse.quote_plus(book_object['title']))
|
||||
# url = url.replace(' ', '%20')
|
||||
print(url)
|
||||
try:
|
||||
tree = ET.ElementTree(file=urllib.request.urlopen(url))
|
||||
root = tree.getroot()
|
||||
book = root.find('book')
|
||||
book_object['year'] = book.find('publication_year').text or ''
|
||||
book_object['lang'] = book.find('language_code').text
|
||||
book_object['rating'] = book.find('average_rating').text
|
||||
book_object['pages'] = book.find('num_pages').text
|
||||
except urllib.error.HTTPError as e:
|
||||
print('Error getting book details from GoodReads: ')
|
||||
print(str(e.getcode()) + ' ' + e.msg)
|
||||
print(book_object)
|
||||
|
||||
|
||||
def get_goodread_info(library):
|
||||
i = 0
|
||||
|
||||
for chapter in library:
|
||||
book_list = library[chapter]
|
||||
for book in book_list:
|
||||
|
||||
# do not call the api again if we already have the infomration
|
||||
if 'rating' in book and book['rating']:
|
||||
continue
|
||||
#print(i)
|
||||
#if i == 10:
|
||||
#break
|
||||
get_details(book)
|
||||
#i += 1
|
||||
# need to wait a second between the requests, to not abuse the API
|
||||
time.sleep(1)
|
||||
@@ -1,116 +1,57 @@
|
||||
from config import GOODREADS_PUBLIC_API_KEY
|
||||
# we assume that every line after # Books
|
||||
# starting with * is a book title if file type is old
|
||||
# starting with | (and not with | Name or |--) is a book if the file type is new
|
||||
|
||||
file_with_books = './../README.MD'
|
||||
|
||||
# we assume that every line after # Books starting with * is a book title
|
||||
# ARGUMENT HANDLING
|
||||
try:
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='Process file.')
|
||||
parser.add_argument(
|
||||
'--in_file',
|
||||
help='File to process, defaults to ./../README.MD')
|
||||
parser.add_argument(
|
||||
'--out_file',
|
||||
help='File to save to, defaults to ./../README-NEW.MD')
|
||||
parser.add_argument(
|
||||
'--file_type',
|
||||
choices=['old', 'new'],
|
||||
help='old if links are displayed in a list, new if in a table')
|
||||
parser.add_argument(
|
||||
'--sort_by',
|
||||
choices = ['rating', 'title', 'author', 'year'],
|
||||
help='defaults to rating')
|
||||
flags = parser.parse_args()
|
||||
#argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
|
||||
print(flags)
|
||||
except ImportError:
|
||||
flags = None
|
||||
|
||||
|
||||
def read_file_content(file):
|
||||
with open(file) as f:
|
||||
content = f.readlines()
|
||||
# remove whitespaces
|
||||
return [_.strip() for _ in content]
|
||||
|
||||
|
||||
def parse_book_string(book_string):
|
||||
book = {}
|
||||
book['title'] = book_string.split('[')[1].split(']')[0]
|
||||
book['url'] = book_string.split('(')[1].split(')')[0]
|
||||
book['author'] = book_string.split(' by ')[-1]
|
||||
return book
|
||||
|
||||
|
||||
def load(file):
|
||||
file = read_file_content(file)
|
||||
print(file)
|
||||
# we start one line after tilte # Books
|
||||
line_to_start = file.index('# Books') + 1
|
||||
current_title = ''
|
||||
books_under_current_title = []
|
||||
library = {}
|
||||
|
||||
for i in range(line_to_start, len(file)):
|
||||
line = file[i]
|
||||
|
||||
# we have a title
|
||||
if line.startswith('##'):
|
||||
if len(current_title) == 0:
|
||||
current_title = line
|
||||
else:
|
||||
library[current_title] = books_under_current_title
|
||||
books_under_current_title = []
|
||||
current_title = line
|
||||
continue
|
||||
|
||||
# we have a book
|
||||
if line.startswith('*'):
|
||||
book = parse_book_string(line)
|
||||
books_under_current_title.append(book)
|
||||
return library
|
||||
|
||||
|
||||
def sort_by(library, key_to_sort_on):
|
||||
def sort(library, key_to_sort_on, reverse = False):
|
||||
new_library = {}
|
||||
for key in library:
|
||||
books = library[key]
|
||||
new_library[key] = sorted(books, key=lambda k: k[key_to_sort_on])
|
||||
new_library[key] = sorted(books, key=lambda k: k[key_to_sort_on], reverse=reverse)
|
||||
return new_library
|
||||
|
||||
|
||||
def render_book_line(book_object):
|
||||
return '* [{}]({}) by {}.\n'.format(book_object['title'], book_object['url'], book_object['author'])
|
||||
def main():
|
||||
from read_file import load
|
||||
from gooodreads import get_goodread_info
|
||||
from write_file import render
|
||||
|
||||
in_file = flags.in_file or './../README.MD'
|
||||
out_file = flags.out_file or './../README-NEW.md'
|
||||
file_type = flags.file_type or 'new'
|
||||
sort_by = flags.sort_by or 'rating'
|
||||
reverse = True if sort_by == 'rating' else False
|
||||
|
||||
library = load(in_file, file_type)
|
||||
get_goodread_info(library)
|
||||
library = sort(library, sort_by, reverse)
|
||||
render(in_file, out_file, library)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
# TODO: refine this logic
|
||||
def render(file_name, library):
|
||||
books_not_reached = True
|
||||
with open(file_name, 'w') as out_file:
|
||||
with open(file_with_books) as original_file:
|
||||
for line in original_file:
|
||||
|
||||
if line.strip() in library:
|
||||
if not books_not_reached: out_file.write('\n')
|
||||
books_not_reached = False
|
||||
out_file.write(line)
|
||||
for book in library[line.strip()]:
|
||||
out_file.write(render_book_line(book))
|
||||
elif books_not_reached:
|
||||
out_file.write(line)
|
||||
elif line.startswith('## License'):
|
||||
out_file.write('\n')
|
||||
out_file.write('\n')
|
||||
out_file.write(line)
|
||||
books_not_reached = True
|
||||
|
||||
def get_details(book_object):
|
||||
import xml.etree.ElementTree as ET
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
url = "http://www.goodreads.com/book/title.xml?key={}&title={}".format(GOODREADS_PUBLIC_API_KEY, book_object['title'])
|
||||
url = url.replace(' ', '%20')
|
||||
print(url)
|
||||
try:
|
||||
tree = ET.ElementTree(file=urllib.request.urlopen(url))
|
||||
root = tree.getroot()
|
||||
book = root.find('book')
|
||||
book_object['year'] = book.find('publication_year').text
|
||||
book_object['lang'] = book.find('language_code').text
|
||||
book_object['avg_rt'] = book.find('average_rating').text
|
||||
book_object['pages'] = book.find('num_pages').text
|
||||
except urllib.error.HTTPError as e:
|
||||
print('Error getting book details from GoodReads: ')
|
||||
print(str(e.getcode()) + ' ' + e.msg)
|
||||
print(book_object)
|
||||
|
||||
library = load(file_with_books)
|
||||
library = sort_by(library, 'title')
|
||||
|
||||
render('./../by-title.md', library)
|
||||
|
||||
library = load(file_with_books)
|
||||
library = sort_by(library, 'author')
|
||||
|
||||
render('./../by-author.md', library)
|
||||
|
||||
print(library)
|
||||
get_details(library['### Miscellaneous'][0])
|
||||
64
utils/read_file.py
Normal file
64
utils/read_file.py
Normal file
@@ -0,0 +1,64 @@
|
||||
def read_file_content(file):
|
||||
with open(file) as f:
|
||||
content = f.readlines()
|
||||
# remove whitespaces
|
||||
return [_.strip() for _ in content]
|
||||
|
||||
|
||||
# old (list)
|
||||
def parse_book_string(book_string):
|
||||
book = {}
|
||||
book['title'] = book_string.split('[')[1].split(']')[0]
|
||||
book['url'] = book_string.split('(')[1].split(')')[0]
|
||||
book['author'] = book_string.split(' by ')[-1]
|
||||
book['rating'] = ''
|
||||
book['year'] = ''
|
||||
return book
|
||||
|
||||
|
||||
# new (table)
|
||||
def parse_book_string_new(book_string):
|
||||
book = {}
|
||||
book_split = book_string.split('|')
|
||||
print(book_split)
|
||||
book['title'] = book_split[1].strip()
|
||||
book['author'] = book_split[2].strip()
|
||||
book['url'] = book_split[3].strip().split('[')[1].split('(')[1].split(')')[0]
|
||||
book['rating'] = book_split[3].strip().split('[')[1].split(']')[0]
|
||||
book['year'] = book_split[4].strip()
|
||||
return book
|
||||
|
||||
|
||||
def load(file, file_type):
|
||||
file = read_file_content(file)
|
||||
print(file)
|
||||
# we start one line after tilte # Books
|
||||
line_to_start = file.index('# Books') + 1
|
||||
current_title = ''
|
||||
books_under_current_title = []
|
||||
library = {}
|
||||
|
||||
for i in range(line_to_start, len(file)):
|
||||
line = file[i]
|
||||
|
||||
# we have a title
|
||||
if line.startswith('##'):
|
||||
if len(current_title) == 0:
|
||||
current_title = line
|
||||
else:
|
||||
library[current_title] = books_under_current_title
|
||||
books_under_current_title = []
|
||||
current_title = line
|
||||
continue
|
||||
|
||||
# we have a book
|
||||
if file_type == 'old':
|
||||
if line.startswith('*'):
|
||||
book = parse_book_string(line)
|
||||
books_under_current_title.append(book)
|
||||
else:
|
||||
if line.startswith('|') and not line.startswith('| Name') and not line.startswith('|---'):
|
||||
book = parse_book_string_new(line)
|
||||
books_under_current_title.append(book)
|
||||
|
||||
return library
|
||||
37
utils/write_file.py
Normal file
37
utils/write_file.py
Normal file
@@ -0,0 +1,37 @@
|
||||
def render_book_line(book_object):
|
||||
book = book_object
|
||||
book['rating'] = '?' if not 'rating' in book else book['rating']
|
||||
book['url'] = '' if not 'url' in book else book['url']
|
||||
book['year'] = '' if not 'year' in book else book['year']
|
||||
return '| {} | {} | [{}]({}) | {} | \n'.format(book['title'],
|
||||
book['author'],
|
||||
book['rating'],
|
||||
book['url'],
|
||||
book['year'])
|
||||
|
||||
|
||||
# TODO: refine this logic
|
||||
def render(in_file, out_file, library):
|
||||
books_not_reached = True
|
||||
with open(out_file, 'w') as out_file:
|
||||
with open(in_file) as original_file:
|
||||
for line in original_file:
|
||||
|
||||
if line.strip() in library:
|
||||
if not books_not_reached: out_file.write('\n')
|
||||
books_not_reached = False
|
||||
|
||||
# render chapter and start of the table
|
||||
out_file.write(line)
|
||||
out_file.write('| Name | Author | Goodreads Rating | Year Published | \n')
|
||||
out_file.write('|------|--------|------------------|----------------| \n')
|
||||
# render books
|
||||
for book in library[line.strip()]:
|
||||
out_file.write(render_book_line(book))
|
||||
elif books_not_reached:
|
||||
out_file.write(line)
|
||||
elif line.startswith('## License'):
|
||||
out_file.write('\n')
|
||||
out_file.write('\n')
|
||||
out_file.write(line)
|
||||
books_not_reached = True
|
||||
Reference in New Issue
Block a user