Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
more or less finished
  • Loading branch information
Klaus Thoden committed Mar 22, 2017
1 parent 71f7e03 commit 97f5fad
Showing 1 changed file with 161 additions and 51 deletions.
212 changes: 161 additions & 51 deletions build_frontmatter.py
Expand Up @@ -11,7 +11,9 @@
import sys
import re
import os
from datetime import datetime
import psycopg2
import psycopg2.extras

# using https://wiki.postgresql.org/wiki/Psycopg2_Tutorial

Expand Down Expand Up @@ -64,7 +66,7 @@ def get_publication_id(input_string, eoa_cursor):

rows = eoa_cursor.fetchall()
if len(rows) > 1:
print_error("There should be only one database entry that matches the input. Found %s") % len(rows)
print_error("There should be only one database entry that matches the input. Found %s" % len(rows))
elif len(rows) == 0:
print_error("It seems like there is no such as publication %s %s. Exiting." % (eoa_series.title(), eoa_number))
sys.exit()
Expand All @@ -77,12 +79,7 @@ def get_publication_id(input_string, eoa_cursor):
def get_publication_info(eoa_pub_id, eoa_cursor):
"""Get more information from one publication"""

query_string = """SELECT "Title", "Subtitle",
"Publicationauthor1", "Publicationauthor2", "Publicationauthor3",
"Publicationauthor4", "Publicationauthor5",
"Publicationauthorsuffix", "Serie", "Number", "Isbn",
"Datepublished", "Coverbig" , "Publicationlicense", "Shoplink" FROM publications_publication WHERE
"id" = '%s' """ % eoa_pub_id
query_string = """SELECT * FROM publications_publication WHERE "id" = '%s' """ % eoa_pub_id

eoa_cursor.execute(query_string)

Expand Down Expand Up @@ -142,6 +139,31 @@ def format_authors(result_list, start_range, end_range):
return(authors_as_string, authors)
# def format_authors ends here

def format_authors_xml(eoa_publication_info):
"""Format the list of authors
Input is the start and end point of the authors in a list. Return
an XML element as a string.
"""

authors_as_string = ""

for authornumber in range(1, 6):
author_key = "Publicationauthor" + str(authornumber)
tmp_author = eoa_publication_info[author_key]
if len(tmp_author) > 0:
nameparts = tmp_author.split(" ")
author_string = """
<author primary_contact="true" user_group_ref="Author">
<firstname>%s</firstname>
<lastname>%s</lastname>
<email>author@example.com</email>
</author>""" % (nameparts[0], nameparts[-1])
authors_as_string += author_string

return(authors_as_string)
# def format_authors_xml ends here

def format_title(title_string, is_book_subtitle=False, unformatted=False):
"""Convert html tags to their LaTeX counterpart."""

Expand Down Expand Up @@ -217,7 +239,9 @@ def format_chapter_info(chapter_result):
# def format_chapter_info ends here

def which_publisher(series):
"""Make layout dependant on series"""
"""Make layout dependant on series
Return base url as well as publisher string"""

if series == "sources":
base_url = "http://edition-open-sources.org"
Expand Down Expand Up @@ -248,36 +272,36 @@ def choose_geometry(eoa_series):
def format_publication_info(eoa_publication_info):
"""Provide strings for the publication info.
Return a tuple of items.
Return a dictionary of items.
"""

pub_suffix = ""

base_url, publisher_string = which_publisher(eoa_publication_info[8])
cover_url = "%s/media/%s" % (PRODUCTION_URL, eoa_publication_info[12])
base_url, publisher_string = which_publisher(eoa_publication_info["Serie"])
cover_url = "%s/media/%s" % (PRODUCTION_URL, eoa_publication_info["Coverbig"])
download_cover_image(cover_url)

publication_url = "%s/%s/%s/" % (base_url, eoa_publication_info[8], eoa_publication_info[9])
publication_url = "%s/%s/%s/" % (base_url, eoa_publication_info["Serie"], eoa_publication_info["Number"])

if len(eoa_publication_info[7]) > 0:
pub_suffix = " " + eoa_publication_info[7]
if len(eoa_publication_info["Publicationauthorsuffix"]) > 0:
pub_suffix = " " + eoa_publication_info["Publicationauthorsuffix"]

licence_string = format_licence(eoa_publication_info[13])
shoplink_string = format_shoplink(eoa_publication_info[14])
book_authors_string = format_authors(eoa_publication_info, 2, 7)[0]
licence_string = format_licence(eoa_publication_info["Publicationlicense"])
shoplink_string = format_shoplink(eoa_publication_info['Shoplink'])
book_authors_string = format_authors_xml(eoa_publication_info)

items_to_return = (book_authors_string,
pub_suffix,
format_title(eoa_publication_info[0]),
format_title(eoa_publication_info[1], is_book_subtitle=True),
publisher_string,
eoa_publication_info[8].title(),
eoa_publication_info[9],
eoa_publication_info[10],
eoa_publication_info[11].strftime("%Y"),
publication_url,
licence_string,
shoplink_string)
items_to_return = {"bookauthors" : book_authors_string,
"pubsuffix" : pub_suffix,
"booktitle": format_title(eoa_publication_info["Title"]),
"booksubtitle" : format_title(eoa_publication_info["Subtitle"], is_book_subtitle=True,unformatted=True),
"publisher" : publisher_string,
"series" : eoa_publication_info["Serie"].title(),
"number" : eoa_publication_info["Number"],
"isbn" : eoa_publication_info["Isbn"],
"pubdate" : eoa_publication_info["Datepublished"].strftime("%Y-%m-%d"),
"url" : publication_url,
"licence" : licence_string,
"shoplink" : shoplink_string}

return(items_to_return)
# def format_publication_info ends here
Expand All @@ -293,21 +317,32 @@ def format_licence(publication_licence):
return(licence_string)
# def format_licence ends here

def format_shoplink(input_string):
def format_shoplink(input_string, raw=False):
"""Parse the shoplink entry"""

SHOPLINK_PATTERN = re.compile('<a href="(?P<book_url>.*?)">(?P<company>.*?)</a>')

shoplink_match = re.match(SHOPLINK_PATTERN, input_string)
shop_url = shoplink_match.group('book_url')
company = shoplink_match.group('company')

company = shop_url = "k.A."

try:
shop_url = shoplink_match.group('book_url')
company = shoplink_match.group('company')
except AttributeError:
pass

if company == "epubli.de":
shoplink_line = r"Neopubli GmbH, Berlin\par\url{%s}" % shop_url
elif company == "pro-business.com":
shoplink_line = r"PRO BUSINESS digital printing Deutschland GmbH, Berlin\par\url{%s}" % shop_url
else:
shoplink_line = ""

return(shoplink_line)
if raw == False:
return(shoplink_line)
else:
return(shop_url, company)
# def format_shoplink ends here

def download_cover_image(image_url):
Expand Down Expand Up @@ -336,6 +371,19 @@ def download_chapter_pdf(chapter_url, destination):
print_error("Program received an HTTP Error 403: Forbidden. Maybe there are no chapter files?")
# def download_chapter_pdf

def file_base64(filepath):
"""Base64 encode a file
https://code.tutsplus.com/tutorials/base64-encoding-and-decoding-using-python--cms-25588
"""
import base64

read_file = open(filepath, "rb").read()
base_64_encode = base64.encodestring(read_file)

return(base_64_encode)
# def file_base64 ends here

def run_latex(command):
"""Compile the latex"""

Expand Down Expand Up @@ -390,7 +438,7 @@ def add_pdf_info(pdf_filename, list_of_authors, title_for_pdf, subject_string):
os.rename(pdf_filename + '_out.pdf', pdf_filename + '.pdf')
# def add_pdf_info ends here

def main(eoa_publication):
def create_chapter_frontmatter(eoa_publication):
"""Main function"""

# validate input
Expand All @@ -402,7 +450,6 @@ def main(eoa_publication):

eoa_pub_id = get_publication_id(eoa_publication, eoa_cursor)
eoa_publication_info = get_publication_info(eoa_pub_id, eoa_cursor)
# print(eoa_publication_info)

base_url = which_publisher(eoa_publication_info[0][8])[0]

Expand Down Expand Up @@ -449,11 +496,11 @@ def main(eoa_publication):
formatted_chapter_title, authors_line = format_chapter_info(chapter)

if len(authors_line) == 0:
authors_line = item_for_template[0]
authors_line = item_for_template["bookauthors"]

formatted_chapter_authors = r"\emph{%s:}" % authors_line

geometry_string, fontsize_string = choose_geometry(item_for_template[5])
geometry_string, fontsize_string = choose_geometry(item_for_template["series"])

frontmatter_template_string = string.Template(frontmatter_template)
# fill in the blanks
Expand All @@ -462,18 +509,18 @@ def main(eoa_publication):
GEOMETRY_SETTINGS=geometry_string,
FORMATTED_CHAPTER_TITLE=formatted_chapter_title,
CHAPTER_AUTHORS_LINE=formatted_chapter_authors,
FORMAT_AUTHORS=item_for_template[0],
FORMATTED_SHOPLINK=item_for_template[11],
LICENCE=item_for_template[10],
PUB_SUFFIX=item_for_template[1],
FORMAT_TITLE=item_for_template[2],
FORMAT_SUBTITLE=item_for_template[3].replace(" : ", "~:~"),
PUBLISHER_STRING=item_for_template[4],
EOA_SERIES=item_for_template[5],
SERIES_NUMBER=item_for_template[6],
ISBN_CODE=item_for_template[7],
PUB_DATE=item_for_template[8],
PUBLICATION_URL=item_for_template[9])
FORMAT_AUTHORS=item_for_template["bookauthors"],
FORMATTED_SHOPLINK=item_for_template["shoplink"],
LICENCE=item_for_template["licence"],
PUB_SUFFIX=item_for_template["pubsuffix"],
FORMAT_TITLE=item_for_template["booktitle"],
FORMAT_SUBTITLE=item_for_template["booksubtitle"].replace(" : ", "~:~"),
PUBLISHER_STRING=item_for_template["publisher"],
EOA_SERIES=item_for_template["series"],
SERIES_NUMBER=item_for_template["number"],
ISBN_CODE=item_for_template["isbn"],
PUB_DATE=item_for_template["pubdate"],
PUBLICATION_URL=item_for_template["url"])

outfile.write(frontmatter_replacement)
outfile.close()
Expand Down Expand Up @@ -509,7 +556,69 @@ def main(eoa_publication):
os.unlink("Coverimage.jpg")

os.chdir("..")
# def main ends here
# def create_chapter_frontmatter ends here

def create_omp_native_xml(eoa_publication):
"""Use the database infos for creating input for OMP"""
# validate input
check_publication(eoa_publication)

# setting up database
eoa_db = connect_db()
eoa_cursor = eoa_db.cursor(cursor_factory=psycopg2.extras.DictCursor)

eoa_pub_id = get_publication_id(eoa_publication, eoa_cursor)
eoa_publication_info = get_publication_info(eoa_pub_id, eoa_cursor)[0]

base_url = which_publisher(eoa_publication_info["Serie"])

# the template file
tmp_template = open("./data/native_template.xml", "r")
frontmatter_template = tmp_template.read()
tmp_template.close()

xml_filename = eoa_publication + ".xml"

outfile = open("./generated_files/" + xml_filename, "w")

item_for_template = format_publication_info(eoa_publication_info)

supplierurl, suppliercomp = format_shoplink(eoa_publication_info["Shoplink"], raw=True)

if len(eoa_publication_info["Subtitle"]) > 0:
onix_subtitle = """<onix:Subtitle>%s</onix:Subtitle>""" % eoa_publication_info["Subtitle"]
omp_subtitle = """<subtitle locale="en_US">%s</subtitle>""" % eoa_publication_info["Subtitle"]
else:
omp_subtitle = onix_subtitle = ""

frontmatter_template_string = string.Template(frontmatter_template)
# fill in the blanks
frontmatter_replacement = frontmatter_template_string.substitute(
INTERNAL_ID=item_for_template["number"],
FORMAT_AUTHORS=item_for_template["bookauthors"],
FORMAT_TITLE=item_for_template["booktitle"],
OMP_SUBTITLE=omp_subtitle,
ONIX_SUBTITLE=onix_subtitle,
PUBLISHER_STRING=item_for_template["publisher"],
EOA_SERIES=item_for_template["series"],
SERIES_NUMBER=item_for_template["number"],
ISBN_CODE=item_for_template["isbn"],
PUB_DATE=item_for_template["pubdate"],
PUBLICATION_URL=item_for_template["url"],
ABSTRACT=eoa_publication_info["Descriptionlong"].replace("<br/>", ""),
BASE64_PDF=file_base64("./data/dummy.pdf"),
PRICE=eoa_publication_info["Price"],
TODAY=datetime.today().strftime("%Y-%m-%d"),
SUPPLIER_COMP=suppliercomp,
SUPPLIER_URL=supplierurl,
PAGES=eoa_publication_info["Pages"],
SUBMISSION_NAME="%s_%d_submission" % (item_for_template["series"], item_for_template["number"]),
PUBDATE_00=eoa_publication_info["Datepublished"].strftime("%Y%m%d")
)

outfile.write(frontmatter_replacement)
outfile.close()
# def create_omp_native_xml ends here

if __name__ == '__main__':
if len(sys.argv) == 1:
Expand All @@ -518,5 +627,6 @@ def main(eoa_publication):
elif len(sys.argv) > 2:
print_error("You can work with only one publication at a time!")
sys.exit()
main(sys.argv[-1])
create_omp_native_xml(sys.argv[-1])
# create_chapter_frontmatter(sys.argv[-1])
# finis

0 comments on commit 97f5fad

Please sign in to comment.