From 97f5fad05625372cb4e50dcf99826f8e2f26f0a2 Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Wed, 22 Mar 2017 17:17:38 +0100 Subject: [PATCH] more or less finished --- build_frontmatter.py | 212 ++++++++++++++++++++++++++++++++----------- 1 file changed, 161 insertions(+), 51 deletions(-) diff --git a/build_frontmatter.py b/build_frontmatter.py index b849fd2..10a48d5 100644 --- a/build_frontmatter.py +++ b/build_frontmatter.py @@ -11,7 +11,9 @@ import sys import re import os +from datetime import datetime import psycopg2 +import psycopg2.extras # using https://wiki.postgresql.org/wiki/Psycopg2_Tutorial @@ -64,7 +66,7 @@ def get_publication_id(input_string, eoa_cursor): rows = eoa_cursor.fetchall() if len(rows) > 1: - print_error("There should be only one database entry that matches the input. Found %s") % len(rows) + print_error("There should be only one database entry that matches the input. Found %s" % len(rows)) elif len(rows) == 0: print_error("It seems like there is no such as publication %s %s. Exiting." % (eoa_series.title(), eoa_number)) sys.exit() @@ -77,12 +79,7 @@ def get_publication_id(input_string, eoa_cursor): def get_publication_info(eoa_pub_id, eoa_cursor): """Get more information from one publication""" - query_string = """SELECT "Title", "Subtitle", - "Publicationauthor1", "Publicationauthor2", "Publicationauthor3", - "Publicationauthor4", "Publicationauthor5", - "Publicationauthorsuffix", "Serie", "Number", "Isbn", - "Datepublished", "Coverbig" , "Publicationlicense", "Shoplink" FROM publications_publication WHERE - "id" = '%s' """ % eoa_pub_id + query_string = """SELECT * FROM publications_publication WHERE "id" = '%s' """ % eoa_pub_id eoa_cursor.execute(query_string) @@ -142,6 +139,31 @@ def format_authors(result_list, start_range, end_range): return(authors_as_string, authors) # def format_authors ends here +def format_authors_xml(eoa_publication_info): + """Format the list of authors + + Input is the start and end point of the authors in a list. Return + an XML element as a string. + """ + + authors_as_string = "" + + for authornumber in range(1, 6): + author_key = "Publicationauthor" + str(authornumber) + tmp_author = eoa_publication_info[author_key] + if len(tmp_author) > 0: + nameparts = tmp_author.split(" ") + author_string = """ + + %s + %s + author@example.com + """ % (nameparts[0], nameparts[-1]) + authors_as_string += author_string + + return(authors_as_string) +# def format_authors_xml ends here + def format_title(title_string, is_book_subtitle=False, unformatted=False): """Convert html tags to their LaTeX counterpart.""" @@ -217,7 +239,9 @@ def format_chapter_info(chapter_result): # def format_chapter_info ends here def which_publisher(series): - """Make layout dependant on series""" + """Make layout dependant on series + + Return base url as well as publisher string""" if series == "sources": base_url = "http://edition-open-sources.org" @@ -248,36 +272,36 @@ def choose_geometry(eoa_series): def format_publication_info(eoa_publication_info): """Provide strings for the publication info. - Return a tuple of items. + Return a dictionary of items. """ pub_suffix = "" - base_url, publisher_string = which_publisher(eoa_publication_info[8]) - cover_url = "%s/media/%s" % (PRODUCTION_URL, eoa_publication_info[12]) + base_url, publisher_string = which_publisher(eoa_publication_info["Serie"]) + cover_url = "%s/media/%s" % (PRODUCTION_URL, eoa_publication_info["Coverbig"]) download_cover_image(cover_url) - publication_url = "%s/%s/%s/" % (base_url, eoa_publication_info[8], eoa_publication_info[9]) + publication_url = "%s/%s/%s/" % (base_url, eoa_publication_info["Serie"], eoa_publication_info["Number"]) - if len(eoa_publication_info[7]) > 0: - pub_suffix = " " + eoa_publication_info[7] + if len(eoa_publication_info["Publicationauthorsuffix"]) > 0: + pub_suffix = " " + eoa_publication_info["Publicationauthorsuffix"] - licence_string = format_licence(eoa_publication_info[13]) - shoplink_string = format_shoplink(eoa_publication_info[14]) - book_authors_string = format_authors(eoa_publication_info, 2, 7)[0] + licence_string = format_licence(eoa_publication_info["Publicationlicense"]) + shoplink_string = format_shoplink(eoa_publication_info['Shoplink']) + book_authors_string = format_authors_xml(eoa_publication_info) - items_to_return = (book_authors_string, - pub_suffix, - format_title(eoa_publication_info[0]), - format_title(eoa_publication_info[1], is_book_subtitle=True), - publisher_string, - eoa_publication_info[8].title(), - eoa_publication_info[9], - eoa_publication_info[10], - eoa_publication_info[11].strftime("%Y"), - publication_url, - licence_string, - shoplink_string) + items_to_return = {"bookauthors" : book_authors_string, + "pubsuffix" : pub_suffix, + "booktitle": format_title(eoa_publication_info["Title"]), + "booksubtitle" : format_title(eoa_publication_info["Subtitle"], is_book_subtitle=True,unformatted=True), + "publisher" : publisher_string, + "series" : eoa_publication_info["Serie"].title(), + "number" : eoa_publication_info["Number"], + "isbn" : eoa_publication_info["Isbn"], + "pubdate" : eoa_publication_info["Datepublished"].strftime("%Y-%m-%d"), + "url" : publication_url, + "licence" : licence_string, + "shoplink" : shoplink_string} return(items_to_return) # def format_publication_info ends here @@ -293,21 +317,32 @@ def format_licence(publication_licence): return(licence_string) # def format_licence ends here -def format_shoplink(input_string): +def format_shoplink(input_string, raw=False): """Parse the shoplink entry""" SHOPLINK_PATTERN = re.compile('(?P.*?)') shoplink_match = re.match(SHOPLINK_PATTERN, input_string) - shop_url = shoplink_match.group('book_url') - company = shoplink_match.group('company') + + company = shop_url = "k.A." + + try: + shop_url = shoplink_match.group('book_url') + company = shoplink_match.group('company') + except AttributeError: + pass if company == "epubli.de": shoplink_line = r"Neopubli GmbH, Berlin\par\url{%s}" % shop_url elif company == "pro-business.com": shoplink_line = r"PRO BUSINESS digital printing Deutschland GmbH, Berlin\par\url{%s}" % shop_url + else: + shoplink_line = "" - return(shoplink_line) + if raw == False: + return(shoplink_line) + else: + return(shop_url, company) # def format_shoplink ends here def download_cover_image(image_url): @@ -336,6 +371,19 @@ def download_chapter_pdf(chapter_url, destination): print_error("Program received an HTTP Error 403: Forbidden. Maybe there are no chapter files?") # def download_chapter_pdf +def file_base64(filepath): + """Base64 encode a file + + https://code.tutsplus.com/tutorials/base64-encoding-and-decoding-using-python--cms-25588 + """ + import base64 + + read_file = open(filepath, "rb").read() + base_64_encode = base64.encodestring(read_file) + + return(base_64_encode) +# def file_base64 ends here + def run_latex(command): """Compile the latex""" @@ -390,7 +438,7 @@ def add_pdf_info(pdf_filename, list_of_authors, title_for_pdf, subject_string): os.rename(pdf_filename + '_out.pdf', pdf_filename + '.pdf') # def add_pdf_info ends here -def main(eoa_publication): +def create_chapter_frontmatter(eoa_publication): """Main function""" # validate input @@ -402,7 +450,6 @@ def main(eoa_publication): eoa_pub_id = get_publication_id(eoa_publication, eoa_cursor) eoa_publication_info = get_publication_info(eoa_pub_id, eoa_cursor) - # print(eoa_publication_info) base_url = which_publisher(eoa_publication_info[0][8])[0] @@ -449,11 +496,11 @@ def main(eoa_publication): formatted_chapter_title, authors_line = format_chapter_info(chapter) if len(authors_line) == 0: - authors_line = item_for_template[0] + authors_line = item_for_template["bookauthors"] formatted_chapter_authors = r"\emph{%s:}" % authors_line - geometry_string, fontsize_string = choose_geometry(item_for_template[5]) + geometry_string, fontsize_string = choose_geometry(item_for_template["series"]) frontmatter_template_string = string.Template(frontmatter_template) # fill in the blanks @@ -462,18 +509,18 @@ def main(eoa_publication): GEOMETRY_SETTINGS=geometry_string, FORMATTED_CHAPTER_TITLE=formatted_chapter_title, CHAPTER_AUTHORS_LINE=formatted_chapter_authors, - FORMAT_AUTHORS=item_for_template[0], - FORMATTED_SHOPLINK=item_for_template[11], - LICENCE=item_for_template[10], - PUB_SUFFIX=item_for_template[1], - FORMAT_TITLE=item_for_template[2], - FORMAT_SUBTITLE=item_for_template[3].replace(" : ", "~:~"), - PUBLISHER_STRING=item_for_template[4], - EOA_SERIES=item_for_template[5], - SERIES_NUMBER=item_for_template[6], - ISBN_CODE=item_for_template[7], - PUB_DATE=item_for_template[8], - PUBLICATION_URL=item_for_template[9]) + FORMAT_AUTHORS=item_for_template["bookauthors"], + FORMATTED_SHOPLINK=item_for_template["shoplink"], + LICENCE=item_for_template["licence"], + PUB_SUFFIX=item_for_template["pubsuffix"], + FORMAT_TITLE=item_for_template["booktitle"], + FORMAT_SUBTITLE=item_for_template["booksubtitle"].replace(" : ", "~:~"), + PUBLISHER_STRING=item_for_template["publisher"], + EOA_SERIES=item_for_template["series"], + SERIES_NUMBER=item_for_template["number"], + ISBN_CODE=item_for_template["isbn"], + PUB_DATE=item_for_template["pubdate"], + PUBLICATION_URL=item_for_template["url"]) outfile.write(frontmatter_replacement) outfile.close() @@ -509,7 +556,69 @@ def main(eoa_publication): os.unlink("Coverimage.jpg") os.chdir("..") -# def main ends here +# def create_chapter_frontmatter ends here + +def create_omp_native_xml(eoa_publication): + """Use the database infos for creating input for OMP""" + # validate input + check_publication(eoa_publication) + + # setting up database + eoa_db = connect_db() + eoa_cursor = eoa_db.cursor(cursor_factory=psycopg2.extras.DictCursor) + + eoa_pub_id = get_publication_id(eoa_publication, eoa_cursor) + eoa_publication_info = get_publication_info(eoa_pub_id, eoa_cursor)[0] + + base_url = which_publisher(eoa_publication_info["Serie"]) + + # the template file + tmp_template = open("./data/native_template.xml", "r") + frontmatter_template = tmp_template.read() + tmp_template.close() + + xml_filename = eoa_publication + ".xml" + + outfile = open("./generated_files/" + xml_filename, "w") + + item_for_template = format_publication_info(eoa_publication_info) + + supplierurl, suppliercomp = format_shoplink(eoa_publication_info["Shoplink"], raw=True) + + if len(eoa_publication_info["Subtitle"]) > 0: + onix_subtitle = """%s""" % eoa_publication_info["Subtitle"] + omp_subtitle = """%s""" % eoa_publication_info["Subtitle"] + else: + omp_subtitle = onix_subtitle = "" + + frontmatter_template_string = string.Template(frontmatter_template) + # fill in the blanks + frontmatter_replacement = frontmatter_template_string.substitute( + INTERNAL_ID=item_for_template["number"], + FORMAT_AUTHORS=item_for_template["bookauthors"], + FORMAT_TITLE=item_for_template["booktitle"], + OMP_SUBTITLE=omp_subtitle, + ONIX_SUBTITLE=onix_subtitle, + PUBLISHER_STRING=item_for_template["publisher"], + EOA_SERIES=item_for_template["series"], + SERIES_NUMBER=item_for_template["number"], + ISBN_CODE=item_for_template["isbn"], + PUB_DATE=item_for_template["pubdate"], + PUBLICATION_URL=item_for_template["url"], + ABSTRACT=eoa_publication_info["Descriptionlong"].replace("
", ""), + BASE64_PDF=file_base64("./data/dummy.pdf"), + PRICE=eoa_publication_info["Price"], + TODAY=datetime.today().strftime("%Y-%m-%d"), + SUPPLIER_COMP=suppliercomp, + SUPPLIER_URL=supplierurl, + PAGES=eoa_publication_info["Pages"], + SUBMISSION_NAME="%s_%d_submission" % (item_for_template["series"], item_for_template["number"]), + PUBDATE_00=eoa_publication_info["Datepublished"].strftime("%Y%m%d") +) + + outfile.write(frontmatter_replacement) + outfile.close() +# def create_omp_native_xml ends here if __name__ == '__main__': if len(sys.argv) == 1: @@ -518,5 +627,6 @@ def main(eoa_publication): elif len(sys.argv) > 2: print_error("You can work with only one publication at a time!") sys.exit() - main(sys.argv[-1]) + create_omp_native_xml(sys.argv[-1]) + # create_chapter_frontmatter(sys.argv[-1]) # finis