From 3582319f452331676e2fc2c4132a883a7a6ef886 Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Mon, 13 Jan 2020 15:50:05 +0100
Subject: [PATCH 1/5] Nicer formatting of publication.cfg

---
 src/tei2imxml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/tei2imxml.py b/src/tei2imxml.py
index c160b82..04c48cd 100755
--- a/src/tei2imxml.py
+++ b/src/tei2imxml.py
@@ -102,7 +102,7 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=Fal
         else:
             tmp_field = xml_tree.xpath(query_path, namespaces=NS_MAP)
             if len(tmp_field) > 0:
-                return_string = tmp_field[0]
+                return_string = sanitize_data_string(tmp_field[0])
             else:
                 if mandatory is True:
                     sys.exit("Field stored in %s is mandatory. Exiting." % query_path)

From 4cf9b002533160a318666ce7f76ceb86d70c0fad Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Mon, 13 Jan 2020 15:50:17 +0100
Subject: [PATCH 2/5] Add info about landing page to config

---
 src/tei2imxml.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tei2imxml.py b/src/tei2imxml.py
index 04c48cd..c466bdf 100755
--- a/src/tei2imxml.py
+++ b/src/tei2imxml.py
@@ -132,6 +132,7 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=Fal
     info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='detailed']/p/text()")
     info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='additional']/p/text()")
     info_dict['eoa_dedication'] = get_field(xml_tree, "//t:text/t:front/t:div[@type='dedication']/t:ab/text()")
+    info_dict['eoa_landingpage'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:publisher/t:orgName[@n='Press']/@ref")
 
     info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']/@ref", findall=True)
     info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']/@ref", findall=True)
@@ -175,6 +176,7 @@ def make_publication_cfg(info_dict, translation_file):
     technical_config['Shoplink'] = """<a href="{0}">{1}</a>""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_text']) #ok
     technical_config['Language'] = info_dict['eoa_language']   #ok
     technical_config['License'] = info_dict['eoa_license'].split("/")[4]     #ok
+    technical_config['LandingPage'] = f"{info_dict['eoa_landingpage']}/{info_dict['eoa_series'].lower()}/{info_dict['eoa_number']}/index.html"
 
     general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok
     if info_dict['eoa_submitters'] is not None:

From 9552d242a609c2d277b0882f6ef1254e2509d8ce Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Mon, 13 Jan 2020 15:50:29 +0100
Subject: [PATCH 3/5] Not only warn about too many keywords, but also use some
 of them

---
 src/tei2imxml.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/tei2imxml.py b/src/tei2imxml.py
index c466bdf..4c4e565 100755
--- a/src/tei2imxml.py
+++ b/src/tei2imxml.py
@@ -187,9 +187,10 @@ def make_publication_cfg(info_dict, translation_file):
     if len(info_dict['eoa_keywords']) > 8:
         logging.warning("Too many keywords. Up to 8 are allowed. Using the first 8.")
     else:
-        for keyword in info_dict['eoa_keywords'][:7]:
-            keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1)
-            general_config[keyword_label] = keyword
+        pass
+    for keyword in info_dict['eoa_keywords'][:7]:
+        keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1)
+        general_config[keyword_label] = keyword
 
     general_config['DetailedDescription'] = info_dict['eoa_detail_desc'] #ok
     general_config['AdditionalInformation'] = info_dict['eoa_additional_info'] #ok

From 1c507dbb086689af13ddfa7480515e95446b0ce6 Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Tue, 14 Jan 2020 14:58:07 +0100
Subject: [PATCH 4/5] Enable hyperimage support for epub

---
 src/imxml2epub.py | 139 +++++++++++++++++++++++++++++++---------------
 1 file changed, 93 insertions(+), 46 deletions(-)

diff --git a/src/imxml2epub.py b/src/imxml2epub.py
index 82a86e6..0e9b316 100755
--- a/src/imxml2epub.py
+++ b/src/imxml2epub.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8; mode: python -*-
-# Time-stamp: <2019-12-18 10:13:44 (kthoden)>
+# Time-stamp: <2020-01-14 14:56:34 (kthoden)>
 
 """ Convert a customized DocBook XML file into a set of files that
 constitute the contents of an EPUB file.
@@ -31,6 +31,7 @@
 SCRIPT_PATH = Path( __file__ )
 SCRIPT_NAME = SCRIPT_PATH.stem
 
+
 DEFAULT_INPUT_DIR = \
     Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')
 
@@ -97,6 +98,13 @@
         help="Specify the directory with files of the font (the font itself, License)",
 )
 
+parser.add_argument(
+        "-him", "--hyperimage",
+        help="Link hyperlink references to online version.",
+        action="store_true"
+    )
+
+
 args = parser.parse_args()
 
 config_file = args.CONFIG_FILE
@@ -179,6 +187,12 @@
 dictPagelabels = data["pagelabeldict"]
 
 
+if args.hyperimage:
+    logging.info("Enabled Hyperimage support")
+else:
+    pass
+
+
 def get_mimetype(filename_suffix):
     """Return mimetype of image"""
     if filename_suffix.lower() == ".jpg":
@@ -526,6 +540,7 @@ def add_css_snippet(css_snippet, css_file):
 publication_series = cfgPublication.get("Technical", "Serie")
 publication_number = cfgPublication.get("Technical", "Number")
 publication_license = cfgPublication.get("Technical", "License")
+publication_landingpage = cfgPublication.get("Technical", "LandingPage")
 try:
     publication_isbn = cfgPublication.get("Technical", "ISBN-epub")
 except:
@@ -738,7 +753,9 @@ def add_css_snippet(css_snippet, css_file):
     xmlParagraph.find("head").tag = "h5"
 
 logging.info(f"{logseparator}Preparing Figures")
-xmlFigures = xmlEbookTree.xpath(".//EOAfigure[not(@type='hionly')] | .//EOAlsfigure[not(@type='hionly')]")
+xmlFigures = xmlEbookTree.xpath(".//EOAfigure[not(contains(@type,'hionly'))]")
+libeoaconvert.debug_xml_here(xmlEbookTree, "find_eoafigures", DEBUG_DIR)
+logging.info("Found %s figures", len(xmlFigures))
 for xmlFigure in xmlFigures:
     # Copy File of the Image
     # If it's in a subfolder, name of folder and name of image will be merged
@@ -808,9 +825,10 @@ def add_css_snippet(css_snippet, css_file):
     # Change the tag of the parent <p>-Tag to <div> so that it may be removed
     #xmlFigure.getparent().tag = "div"
 
-xml_figures_hyperimage = xmlEbookTree.xpath(".//EOAfigure[@type='hionly'] | .//EOAlsfigure[@type='hionly']")
-logging.debug("found %s hyperimage figures" % len(xml_figures_hyperimage))
+xml_figures_hyperimage = xmlEbookTree.xpath(".//EOAfigure[contains(@type,'hionly')]")
+logging.info("Found %s hyperimage figures", len(xml_figures_hyperimage))
 for fig in xml_figures_hyperimage:
+    fig.clear()
     fig.tag = "EOAhifigure"
 
 logging.info(f"{logseparator}Preparing not numbered Figures")
@@ -1659,10 +1677,9 @@ class FootnoteError(Exception):
 logging.info(f"{logseparator}Preparing Cross-References")
 
 for xmlChapter in xmlChapters:
-    xmlReferences = xmlChapter.findall(".//EOAref")
+    xmlReferences = xmlChapter.xpath(".//EOAref[not(parent::EOAref)]")
     for xmlReference in xmlReferences:
 
-
         # the new stuff
         # label_text = xmlReference.find("Label").text[1:]
         # logging.debug("label text is %s" % label_text)
@@ -1678,47 +1695,77 @@ class FootnoteError(Exception):
         # eoa_id = eoa_id_element.get("id")
         # end of the new stuff
 
+        hitarget_id_list = xmlReference.xpath("./ref/@hitarget")
 
-        logging.info("XXXXXXXX")
-        strResult = "!!! Cross Reference !!!"
-
-        xmlReferenceLabel = xmlReference.find("Label")
-        xmlReferenceLabelText = xmlReferenceLabel.text
-
-        xmlReferenceRef = xmlReference.find("ref")
-        xmlReferenceRefTarget = xmlReferenceRef.get("target")
-
-        if xmlReferenceLabelText in dictEquations:
-            logging.info("Verweis auf Array gefunden:" + xmlReferenceLabelText)
-            strResult = dictEquations[xmlReferenceLabelText]
-        if xmlReferenceRefTarget in dictEquations:
-            logging.info("Verweis auf Equation gefunden:" + xmlReferenceRefTarget)
-            strResult = dictEquations[xmlReferenceRefTarget]
-        if xmlReferenceRefTarget in dictLists:
-            logging.info("Verweis auf Liste gefunden")
-            strResult = dictLists[xmlReferenceRefTarget]
-        if xmlReferenceRefTarget in dictChapters:
-            logging.info("Verweis auf Kapitel gefunden")
-            strResult = dictChapters[xmlReferenceRefTarget]
-        if xmlReferenceRefTarget in dictSections:
-            logging.info("Verweis auf Section gefunden")
-            strResult = dictSections[xmlReferenceRefTarget]
-        if xmlReferenceRefTarget in dictFigures:
-            logging.info("Verweis auf Abbildung gefunden")
-            strResult = dictFigures[xmlReferenceRefTarget]
-        if xmlReferenceRefTarget in dictFootnotes:
-            logging.info("Verweis auf Fussnote gefunden")
-            strResult = dictFootnotes[xmlReferenceRefTarget]
-        if xmlReferenceRefTarget in dictTheorems:
-            logging.info("Verweis auf Theorem gefunden")
-            strResult = dictTheorems[xmlReferenceRefTarget]
-        if xmlReferenceRefTarget in dictTables:
-            logging.info("Verweis auf Tabelle gefunden")
-            strResult = dictTables[xmlReferenceRefTarget]
-        tmpTail = xmlReference.tail or ""
-        #tmpTail = tmpTail.strip()
+        if len(hitarget_id_list) == 1:
+            hitarget_id = hitarget_id_list[0]
+        else:
+            hitarget_id = None
+
+        reference_type = xmlReference.get("type")
+        if reference_type == "text":
+            tmpTail = xmlReference.tail or ""
+            strResult = xmlReference.text
+        elif reference_type == "collage":
+            tmpTail = xmlReference.tail or ""
+            logging.debug("Found reference to a Hyperimage collage.")
+            subreferences = xmlReference.xpath("./EOAref[@type='number']")
+            strResult = ""
+            for subref in subreferences:
+                subref_tail = subref.tail or ""
+                subref_target = subref.xpath("./ref/@target")[0]
+                target_string = dictFigures[subref_target]
+                strResult += f"{target_string}{subref_tail}"
+        elif reference_type == "number":
+            logging.info("XXXXXXXX")
+            strResult = "!!! Cross Reference !!!"
+
+            xmlReferenceLabel = xmlReference.find("Label")
+            xmlReferenceLabelText = xmlReferenceLabel.text
+
+            xmlReferenceRef = xmlReference.find("ref")
+            xmlReferenceRefTarget = xmlReferenceRef.get("target")
+
+            if xmlReferenceLabelText in dictEquations:
+                logging.info("Verweis auf Array gefunden:" + xmlReferenceLabelText)
+                strResult = dictEquations[xmlReferenceLabelText]
+            if xmlReferenceRefTarget in dictEquations:
+                logging.info("Verweis auf Equation gefunden:" + xmlReferenceRefTarget)
+                strResult = dictEquations[xmlReferenceRefTarget]
+            if xmlReferenceRefTarget in dictLists:
+                logging.info("Verweis auf Liste gefunden")
+                strResult = dictLists[xmlReferenceRefTarget]
+            if xmlReferenceRefTarget in dictChapters:
+                logging.info("Verweis auf Kapitel gefunden")
+                strResult = dictChapters[xmlReferenceRefTarget]
+            if xmlReferenceRefTarget in dictSections:
+                logging.info("Verweis auf Section gefunden")
+                strResult = dictSections[xmlReferenceRefTarget]
+            if xmlReferenceRefTarget in dictFigures:
+                logging.info("Verweis auf Abbildung gefunden")
+                strResult = dictFigures[xmlReferenceRefTarget]
+            if xmlReferenceRefTarget in dictFootnotes:
+                logging.info("Verweis auf Fussnote gefunden")
+                strResult = dictFootnotes[xmlReferenceRefTarget]
+            if xmlReferenceRefTarget in dictTheorems:
+                logging.info("Verweis auf Theorem gefunden")
+                strResult = dictTheorems[xmlReferenceRefTarget]
+            if xmlReferenceRefTarget in dictTables:
+                logging.info("Verweis auf Tabelle gefunden")
+                strResult = dictTables[xmlReferenceRefTarget]
+            tmpTail = xmlReference.tail or ""
+            #tmpTail = tmpTail.strip()
+        else:
+            logging.error("Found unknown reference type: %s. Exiting", reference_type)
+            sys.exit(0)
         logging.info("XXXXXXXX")
         xmlReference.clear()
+        if args.hyperimage and hitarget_id and reference_type in ["collage", "number"]:
+            hyperimage_link = f"{publication_landingpage[:-11]}/{intChapterNumber - 1}/index.html#{hitarget_id}"
+            xmlReference.tag = "a"
+            xmlReference.set("href", hyperimage_link)
+        else:
+            pass
         xmlReference.text = strResult
         xmlReference.tail = tmpTail
 
@@ -1760,8 +1807,8 @@ class FootnoteError(Exception):
     xmlIndexentry.clear()
     xmlIndexentry.tail = tmpTail
 etree.strip_tags(xmlEbookTree, "EOAlabel", "EOAindex", "EOApageref", "EOAcitenumeric", "EOAtable", "EOAref",  "note", "div", "div2", "div3", "div4", "div5", "citetext", "newpage", "EOAciteyear", "EOAtablelabel" , "hi", "pagebreak", "page", "pagestyle", "EOAcitation", "EOAciteauthoryear", "EOAcitemanual", "EOAprintbibliography", "EOAindexperson", "EOAprintindex", "EOAindexlocation", "EOAprintpersonindex", "EOAprintlocationindex","anchor", "temp", "EOAletterhead", "EOAhifigure", "EOAtocentry")
-etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend") # also contained "id"
-etree.strip_elements(xmlEbookTree, "citekey", with_tail=False)
+etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend", "hielement") # also contained "id"
+etree.strip_elements(xmlEbookTree, "citekey", "originalcontents", with_tail=False)
 
 logging.info("Write every Part and Chapter into one file")
 xmlChapters = xmlEbookTree.findall("//div1")

From 221c479f8ada3715cf8e499910c3397c9ae69739 Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Tue, 14 Jan 2020 16:54:48 +0100
Subject: [PATCH 5/5] non-numbered sections

---
 src/imxml2epub.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/imxml2epub.py b/src/imxml2epub.py
index 0e9b316..55c3385 100755
--- a/src/imxml2epub.py
+++ b/src/imxml2epub.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8; mode: python -*-
-# Time-stamp: <2020-01-14 14:56:34 (kthoden)>
+# Time-stamp: <2020-01-14 16:18:28 (kthoden)>
 
 """ Convert a customized DocBook XML file into a set of files that
 constitute the contents of an EPUB file.
@@ -721,21 +721,25 @@ def add_css_snippet(css_snippet, css_file):
 xmlSections = xmlEbookTree.findall(".//div2")
 for xmlSection in xmlSections:
     xmlSection.find("head").tag = "h2"
-    if xmlSection.get("rend") != "nonumber":
-        idSection = xmlSection.get("id")
-        strHeadline = xmlSection.find("h2").text or ""
-        logging.info(strHeadline)
+    idSection = xmlSection.get("id")
+    strHeadline = xmlSection.find("h2").text or ""
+    logging.info(strHeadline)
+    if xmlSection.get("n") != "nonumber":
         xmlSection.find("h2").text = str(dictSections[idSection]) + " " + strHeadline
+    else:
+        xmlSection.find("h2").text = strHeadline
 
 logging.info(f"{logseparator}Convert EOAsubsection to H3")
 xmlSubsections = xmlEbookTree.findall(".//div3")
 for xmlSubsection in xmlSubsections:
     xmlSubsection.find("head").tag = "h3"
-    if xmlSubsection.get("rend") != "nonumber":
-        idSection = xmlSubsection.get("id")
-        strHeadline = xmlSubsection.find("h3").text or ""
-        logging.info(strHeadline)
+    idSection = xmlSubsection.get("id")
+    strHeadline = xmlSubsection.find("h3").text or ""
+    logging.info(strHeadline)
+    if xmlSubsection.get("n") != "nonumber":
         xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline
+    else:
+        xmlSubsection.find("h3").text = strHeadline
 
 logging.info(f"{logseparator}Convert EOAsubsubsection to H4")
 xmlSubsubsections = xmlEbookTree.findall(".//div4")