From 3582319f452331676e2fc2c4132a883a7a6ef886 Mon Sep 17 00:00:00 2001 From: kthoden Date: Mon, 13 Jan 2020 15:50:05 +0100 Subject: [PATCH 1/5] Nicer formatting of publication.cfg --- src/tei2imxml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tei2imxml.py b/src/tei2imxml.py index c160b82..04c48cd 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -102,7 +102,7 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=Fal else: tmp_field = xml_tree.xpath(query_path, namespaces=NS_MAP) if len(tmp_field) > 0: - return_string = tmp_field[0] + return_string = sanitize_data_string(tmp_field[0]) else: if mandatory is True: sys.exit("Field stored in %s is mandatory. Exiting." % query_path) From 4cf9b002533160a318666ce7f76ceb86d70c0fad Mon Sep 17 00:00:00 2001 From: kthoden Date: Mon, 13 Jan 2020 15:50:17 +0100 Subject: [PATCH 2/5] Add info about landing page to config --- src/tei2imxml.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tei2imxml.py b/src/tei2imxml.py index 04c48cd..c466bdf 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -132,6 +132,7 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=Fal info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='detailed']/p/text()") info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='additional']/p/text()") info_dict['eoa_dedication'] = get_field(xml_tree, "//t:text/t:front/t:div[@type='dedication']/t:ab/text()") + info_dict['eoa_landingpage'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:publisher/t:orgName[@n='Press']/@ref") info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']/@ref", findall=True) info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']/@ref", findall=True) @@ -175,6 +176,7 @@ def make_publication_cfg(info_dict, translation_file): technical_config['Shoplink'] = """{1}""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_text']) #ok technical_config['Language'] = info_dict['eoa_language'] #ok technical_config['License'] = info_dict['eoa_license'].split("/")[4] #ok + technical_config['LandingPage'] = f"{info_dict['eoa_landingpage']}/{info_dict['eoa_series'].lower()}/{info_dict['eoa_number']}/index.html" general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok if info_dict['eoa_submitters'] is not None: From 9552d242a609c2d277b0882f6ef1254e2509d8ce Mon Sep 17 00:00:00 2001 From: kthoden Date: Mon, 13 Jan 2020 15:50:29 +0100 Subject: [PATCH 3/5] Not only warn about too many keywords, but also use some of them --- src/tei2imxml.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tei2imxml.py b/src/tei2imxml.py index c466bdf..4c4e565 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -187,9 +187,10 @@ def make_publication_cfg(info_dict, translation_file): if len(info_dict['eoa_keywords']) > 8: logging.warning("Too many keywords. Up to 8 are allowed. Using the first 8.") else: - for keyword in info_dict['eoa_keywords'][:7]: - keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1) - general_config[keyword_label] = keyword + pass + for keyword in info_dict['eoa_keywords'][:7]: + keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1) + general_config[keyword_label] = keyword general_config['DetailedDescription'] = info_dict['eoa_detail_desc'] #ok general_config['AdditionalInformation'] = info_dict['eoa_additional_info'] #ok From 1c507dbb086689af13ddfa7480515e95446b0ce6 Mon Sep 17 00:00:00 2001 From: kthoden Date: Tue, 14 Jan 2020 14:58:07 +0100 Subject: [PATCH 4/5] Enable hyperimage support for epub --- src/imxml2epub.py | 139 +++++++++++++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 46 deletions(-) diff --git a/src/imxml2epub.py b/src/imxml2epub.py index 82a86e6..0e9b316 100755 --- a/src/imxml2epub.py +++ b/src/imxml2epub.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2019-12-18 10:13:44 (kthoden)> +# Time-stamp: <2020-01-14 14:56:34 (kthoden)> """ Convert a customized DocBook XML file into a set of files that constitute the contents of an EPUB file. @@ -31,6 +31,7 @@ SCRIPT_PATH = Path( __file__ ) SCRIPT_NAME = SCRIPT_PATH.stem + DEFAULT_INPUT_DIR = \ Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input') @@ -97,6 +98,13 @@ help="Specify the directory with files of the font (the font itself, License)", ) +parser.add_argument( + "-him", "--hyperimage", + help="Link hyperlink references to online version.", + action="store_true" + ) + + args = parser.parse_args() config_file = args.CONFIG_FILE @@ -179,6 +187,12 @@ dictPagelabels = data["pagelabeldict"] +if args.hyperimage: + logging.info("Enabled Hyperimage support") +else: + pass + + def get_mimetype(filename_suffix): """Return mimetype of image""" if filename_suffix.lower() == ".jpg": @@ -526,6 +540,7 @@ def add_css_snippet(css_snippet, css_file): publication_series = cfgPublication.get("Technical", "Serie") publication_number = cfgPublication.get("Technical", "Number") publication_license = cfgPublication.get("Technical", "License") +publication_landingpage = cfgPublication.get("Technical", "LandingPage") try: publication_isbn = cfgPublication.get("Technical", "ISBN-epub") except: @@ -738,7 +753,9 @@ def add_css_snippet(css_snippet, css_file): xmlParagraph.find("head").tag = "h5" logging.info(f"{logseparator}Preparing Figures") -xmlFigures = xmlEbookTree.xpath(".//EOAfigure[not(@type='hionly')] | .//EOAlsfigure[not(@type='hionly')]") +xmlFigures = xmlEbookTree.xpath(".//EOAfigure[not(contains(@type,'hionly'))]") +libeoaconvert.debug_xml_here(xmlEbookTree, "find_eoafigures", DEBUG_DIR) +logging.info("Found %s figures", len(xmlFigures)) for xmlFigure in xmlFigures: # Copy File of the Image # If it's in a subfolder, name of folder and name of image will be merged @@ -808,9 +825,10 @@ def add_css_snippet(css_snippet, css_file): # Change the tag of the parent

-Tag to

so that it may be removed #xmlFigure.getparent().tag = "div" -xml_figures_hyperimage = xmlEbookTree.xpath(".//EOAfigure[@type='hionly'] | .//EOAlsfigure[@type='hionly']") -logging.debug("found %s hyperimage figures" % len(xml_figures_hyperimage)) +xml_figures_hyperimage = xmlEbookTree.xpath(".//EOAfigure[contains(@type,'hionly')]") +logging.info("Found %s hyperimage figures", len(xml_figures_hyperimage)) for fig in xml_figures_hyperimage: + fig.clear() fig.tag = "EOAhifigure" logging.info(f"{logseparator}Preparing not numbered Figures") @@ -1659,10 +1677,9 @@ class FootnoteError(Exception): logging.info(f"{logseparator}Preparing Cross-References") for xmlChapter in xmlChapters: - xmlReferences = xmlChapter.findall(".//EOAref") + xmlReferences = xmlChapter.xpath(".//EOAref[not(parent::EOAref)]") for xmlReference in xmlReferences: - # the new stuff # label_text = xmlReference.find("Label").text[1:] # logging.debug("label text is %s" % label_text) @@ -1678,47 +1695,77 @@ class FootnoteError(Exception): # eoa_id = eoa_id_element.get("id") # end of the new stuff + hitarget_id_list = xmlReference.xpath("./ref/@hitarget") - logging.info("XXXXXXXX") - strResult = "!!! Cross Reference !!!" - - xmlReferenceLabel = xmlReference.find("Label") - xmlReferenceLabelText = xmlReferenceLabel.text - - xmlReferenceRef = xmlReference.find("ref") - xmlReferenceRefTarget = xmlReferenceRef.get("target") - - if xmlReferenceLabelText in dictEquations: - logging.info("Verweis auf Array gefunden:" + xmlReferenceLabelText) - strResult = dictEquations[xmlReferenceLabelText] - if xmlReferenceRefTarget in dictEquations: - logging.info("Verweis auf Equation gefunden:" + xmlReferenceRefTarget) - strResult = dictEquations[xmlReferenceRefTarget] - if xmlReferenceRefTarget in dictLists: - logging.info("Verweis auf Liste gefunden") - strResult = dictLists[xmlReferenceRefTarget] - if xmlReferenceRefTarget in dictChapters: - logging.info("Verweis auf Kapitel gefunden") - strResult = dictChapters[xmlReferenceRefTarget] - if xmlReferenceRefTarget in dictSections: - logging.info("Verweis auf Section gefunden") - strResult = dictSections[xmlReferenceRefTarget] - if xmlReferenceRefTarget in dictFigures: - logging.info("Verweis auf Abbildung gefunden") - strResult = dictFigures[xmlReferenceRefTarget] - if xmlReferenceRefTarget in dictFootnotes: - logging.info("Verweis auf Fussnote gefunden") - strResult = dictFootnotes[xmlReferenceRefTarget] - if xmlReferenceRefTarget in dictTheorems: - logging.info("Verweis auf Theorem gefunden") - strResult = dictTheorems[xmlReferenceRefTarget] - if xmlReferenceRefTarget in dictTables: - logging.info("Verweis auf Tabelle gefunden") - strResult = dictTables[xmlReferenceRefTarget] - tmpTail = xmlReference.tail or "" - #tmpTail = tmpTail.strip() + if len(hitarget_id_list) == 1: + hitarget_id = hitarget_id_list[0] + else: + hitarget_id = None + + reference_type = xmlReference.get("type") + if reference_type == "text": + tmpTail = xmlReference.tail or "" + strResult = xmlReference.text + elif reference_type == "collage": + tmpTail = xmlReference.tail or "" + logging.debug("Found reference to a Hyperimage collage.") + subreferences = xmlReference.xpath("./EOAref[@type='number']") + strResult = "" + for subref in subreferences: + subref_tail = subref.tail or "" + subref_target = subref.xpath("./ref/@target")[0] + target_string = dictFigures[subref_target] + strResult += f"{target_string}{subref_tail}" + elif reference_type == "number": + logging.info("XXXXXXXX") + strResult = "!!! Cross Reference !!!" + + xmlReferenceLabel = xmlReference.find("Label") + xmlReferenceLabelText = xmlReferenceLabel.text + + xmlReferenceRef = xmlReference.find("ref") + xmlReferenceRefTarget = xmlReferenceRef.get("target") + + if xmlReferenceLabelText in dictEquations: + logging.info("Verweis auf Array gefunden:" + xmlReferenceLabelText) + strResult = dictEquations[xmlReferenceLabelText] + if xmlReferenceRefTarget in dictEquations: + logging.info("Verweis auf Equation gefunden:" + xmlReferenceRefTarget) + strResult = dictEquations[xmlReferenceRefTarget] + if xmlReferenceRefTarget in dictLists: + logging.info("Verweis auf Liste gefunden") + strResult = dictLists[xmlReferenceRefTarget] + if xmlReferenceRefTarget in dictChapters: + logging.info("Verweis auf Kapitel gefunden") + strResult = dictChapters[xmlReferenceRefTarget] + if xmlReferenceRefTarget in dictSections: + logging.info("Verweis auf Section gefunden") + strResult = dictSections[xmlReferenceRefTarget] + if xmlReferenceRefTarget in dictFigures: + logging.info("Verweis auf Abbildung gefunden") + strResult = dictFigures[xmlReferenceRefTarget] + if xmlReferenceRefTarget in dictFootnotes: + logging.info("Verweis auf Fussnote gefunden") + strResult = dictFootnotes[xmlReferenceRefTarget] + if xmlReferenceRefTarget in dictTheorems: + logging.info("Verweis auf Theorem gefunden") + strResult = dictTheorems[xmlReferenceRefTarget] + if xmlReferenceRefTarget in dictTables: + logging.info("Verweis auf Tabelle gefunden") + strResult = dictTables[xmlReferenceRefTarget] + tmpTail = xmlReference.tail or "" + #tmpTail = tmpTail.strip() + else: + logging.error("Found unknown reference type: %s. Exiting", reference_type) + sys.exit(0) logging.info("XXXXXXXX") xmlReference.clear() + if args.hyperimage and hitarget_id and reference_type in ["collage", "number"]: + hyperimage_link = f"{publication_landingpage[:-11]}/{intChapterNumber - 1}/index.html#{hitarget_id}" + xmlReference.tag = "a" + xmlReference.set("href", hyperimage_link) + else: + pass xmlReference.text = strResult xmlReference.tail = tmpTail @@ -1760,8 +1807,8 @@ class FootnoteError(Exception): xmlIndexentry.clear() xmlIndexentry.tail = tmpTail etree.strip_tags(xmlEbookTree, "EOAlabel", "EOAindex", "EOApageref", "EOAcitenumeric", "EOAtable", "EOAref", "note", "div", "div2", "div3", "div4", "div5", "citetext", "newpage", "EOAciteyear", "EOAtablelabel" , "hi", "pagebreak", "page", "pagestyle", "EOAcitation", "EOAciteauthoryear", "EOAcitemanual", "EOAprintbibliography", "EOAindexperson", "EOAprintindex", "EOAindexlocation", "EOAprintpersonindex", "EOAprintlocationindex","anchor", "temp", "EOAletterhead", "EOAhifigure", "EOAtocentry") -etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend") # also contained "id" -etree.strip_elements(xmlEbookTree, "citekey", with_tail=False) +etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend", "hielement") # also contained "id" +etree.strip_elements(xmlEbookTree, "citekey", "originalcontents", with_tail=False) logging.info("Write every Part and Chapter into one file") xmlChapters = xmlEbookTree.findall("//div1") From 221c479f8ada3715cf8e499910c3397c9ae69739 Mon Sep 17 00:00:00 2001 From: kthoden Date: Tue, 14 Jan 2020 16:54:48 +0100 Subject: [PATCH 5/5] non-numbered sections --- src/imxml2epub.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/imxml2epub.py b/src/imxml2epub.py index 0e9b316..55c3385 100755 --- a/src/imxml2epub.py +++ b/src/imxml2epub.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2020-01-14 14:56:34 (kthoden)> +# Time-stamp: <2020-01-14 16:18:28 (kthoden)> """ Convert a customized DocBook XML file into a set of files that constitute the contents of an EPUB file. @@ -721,21 +721,25 @@ def add_css_snippet(css_snippet, css_file): xmlSections = xmlEbookTree.findall(".//div2") for xmlSection in xmlSections: xmlSection.find("head").tag = "h2" - if xmlSection.get("rend") != "nonumber": - idSection = xmlSection.get("id") - strHeadline = xmlSection.find("h2").text or "" - logging.info(strHeadline) + idSection = xmlSection.get("id") + strHeadline = xmlSection.find("h2").text or "" + logging.info(strHeadline) + if xmlSection.get("n") != "nonumber": xmlSection.find("h2").text = str(dictSections[idSection]) + " " + strHeadline + else: + xmlSection.find("h2").text = strHeadline logging.info(f"{logseparator}Convert EOAsubsection to H3") xmlSubsections = xmlEbookTree.findall(".//div3") for xmlSubsection in xmlSubsections: xmlSubsection.find("head").tag = "h3" - if xmlSubsection.get("rend") != "nonumber": - idSection = xmlSubsection.get("id") - strHeadline = xmlSubsection.find("h3").text or "" - logging.info(strHeadline) + idSection = xmlSubsection.get("id") + strHeadline = xmlSubsection.find("h3").text or "" + logging.info(strHeadline) + if xmlSubsection.get("n") != "nonumber": xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline + else: + xmlSubsection.find("h3").text = strHeadline logging.info(f"{logseparator}Convert EOAsubsubsection to H4") xmlSubsubsections = xmlEbookTree.findall(".//div4")