diff --git a/src/imxml2epub.py b/src/imxml2epub.py
index 268445c..ebe65d1 100755
--- a/src/imxml2epub.py
+++ b/src/imxml2epub.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
-# Time-stamp: <2019-12-18 10:13:44 (kthoden)>
+# Time-stamp: <2020-01-14 16:18:28 (kthoden)>
""" Convert a customized DocBook XML file into a set of files that
constitute the contents of an EPUB file.
@@ -31,6 +31,7 @@
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.stem
+
DEFAULT_INPUT_DIR = \
Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')
@@ -97,6 +98,13 @@
help="Specify the directory with files of the font (the font itself, License)",
)
+parser.add_argument(
+ "-him", "--hyperimage",
+ help="Link hyperlink references to online version.",
+ action="store_true"
+ )
+
+
args = parser.parse_args()
config_file = args.CONFIG_FILE
@@ -179,6 +187,12 @@
dictPagelabels = data["pagelabeldict"]
+if args.hyperimage:
+ logging.info("Enabled Hyperimage support")
+else:
+ pass
+
+
def get_mimetype(filename_suffix):
"""Return mimetype of image"""
if filename_suffix.lower() == ".jpg":
@@ -526,6 +540,7 @@ def add_css_snippet(css_snippet, css_file):
publication_series = cfgPublication.get("Technical", "Serie")
publication_number = cfgPublication.get("Technical", "Number")
publication_license = cfgPublication.get("Technical", "License")
+publication_landingpage = cfgPublication.get("Technical", "LandingPage")
try:
publication_isbn = cfgPublication.get("Technical", "ISBN-epub")
except:
@@ -706,21 +721,25 @@ def add_css_snippet(css_snippet, css_file):
xmlSections = xmlEbookTree.findall(".//div2")
for xmlSection in xmlSections:
xmlSection.find("head").tag = "h2"
- if xmlSection.get("rend") != "nonumber":
- idSection = xmlSection.get("id")
- strHeadline = xmlSection.find("h2").text or ""
- logging.info(strHeadline)
+ idSection = xmlSection.get("id")
+ strHeadline = xmlSection.find("h2").text or ""
+ logging.info(strHeadline)
+ if xmlSection.get("n") != "nonumber":
xmlSection.find("h2").text = str(dictSections[idSection]) + " " + strHeadline
+ else:
+ xmlSection.find("h2").text = strHeadline
logging.info(f"{logseparator}Convert EOAsubsection to H3")
xmlSubsections = xmlEbookTree.findall(".//div3")
for xmlSubsection in xmlSubsections:
xmlSubsection.find("head").tag = "h3"
- if xmlSubsection.get("rend") != "nonumber":
- idSection = xmlSubsection.get("id")
- strHeadline = xmlSubsection.find("h3").text or ""
- logging.info(strHeadline)
+ idSection = xmlSubsection.get("id")
+ strHeadline = xmlSubsection.find("h3").text or ""
+ logging.info(strHeadline)
+ if xmlSubsection.get("n") != "nonumber":
xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline
+ else:
+ xmlSubsection.find("h3").text = strHeadline
logging.info(f"{logseparator}Convert EOAsubsubsection to H4")
xmlSubsubsections = xmlEbookTree.findall(".//div4")
@@ -738,7 +757,9 @@ def add_css_snippet(css_snippet, css_file):
xmlParagraph.find("head").tag = "h5"
logging.info(f"{logseparator}Preparing Figures")
-xmlFigures = xmlEbookTree.xpath(".//EOAfigure[not(@type='hionly')] | .//EOAlsfigure[not(@type='hionly')]")
+xmlFigures = xmlEbookTree.xpath(".//EOAfigure[not(contains(@type,'hionly'))]")
+libeoaconvert.debug_xml_here(xmlEbookTree, "find_eoafigures", DEBUG_DIR)
+logging.info("Found %s figures", len(xmlFigures))
for xmlFigure in xmlFigures:
# Copy File of the Image
# If it's in a subfolder, name of folder and name of image will be merged
@@ -808,9 +829,10 @@ def add_css_snippet(css_snippet, css_file):
# Change the tag of the parent
so that it may be removed
#xmlFigure.getparent().tag = "div"
-xml_figures_hyperimage = xmlEbookTree.xpath(".//EOAfigure[@type='hionly'] | .//EOAlsfigure[@type='hionly']")
-logging.debug("found %s hyperimage figures" % len(xml_figures_hyperimage))
+xml_figures_hyperimage = xmlEbookTree.xpath(".//EOAfigure[contains(@type,'hionly')]")
+logging.info("Found %s hyperimage figures", len(xml_figures_hyperimage))
for fig in xml_figures_hyperimage:
+ fig.clear()
fig.tag = "EOAhifigure"
logging.info(f"{logseparator}Preparing not numbered Figures")
@@ -1659,10 +1681,9 @@ class FootnoteError(Exception):
logging.info(f"{logseparator}Preparing Cross-References")
for xmlChapter in xmlChapters:
- xmlReferences = xmlChapter.findall(".//EOAref")
+ xmlReferences = xmlChapter.xpath(".//EOAref[not(parent::EOAref)]")
for xmlReference in xmlReferences:
-
# the new stuff
# label_text = xmlReference.find("Label").text[1:]
# logging.debug("label text is %s" % label_text)
@@ -1678,47 +1699,77 @@ class FootnoteError(Exception):
# eoa_id = eoa_id_element.get("id")
# end of the new stuff
+ hitarget_id_list = xmlReference.xpath("./ref/@hitarget")
- logging.info("XXXXXXXX")
- strResult = "!!! Cross Reference !!!"
-
- xmlReferenceLabel = xmlReference.find("Label")
- xmlReferenceLabelText = xmlReferenceLabel.text
-
- xmlReferenceRef = xmlReference.find("ref")
- xmlReferenceRefTarget = xmlReferenceRef.get("target")
-
- if xmlReferenceLabelText in dictEquations:
- logging.info("Verweis auf Array gefunden:" + xmlReferenceLabelText)
- strResult = dictEquations[xmlReferenceLabelText]
- if xmlReferenceRefTarget in dictEquations:
- logging.info("Verweis auf Equation gefunden:" + xmlReferenceRefTarget)
- strResult = dictEquations[xmlReferenceRefTarget]
- if xmlReferenceRefTarget in dictLists:
- logging.info("Verweis auf Liste gefunden")
- strResult = dictLists[xmlReferenceRefTarget]
- if xmlReferenceRefTarget in dictChapters:
- logging.info("Verweis auf Kapitel gefunden")
- strResult = dictChapters[xmlReferenceRefTarget]
- if xmlReferenceRefTarget in dictSections:
- logging.info("Verweis auf Section gefunden")
- strResult = dictSections[xmlReferenceRefTarget]
- if xmlReferenceRefTarget in dictFigures:
- logging.info("Verweis auf Abbildung gefunden")
- strResult = dictFigures[xmlReferenceRefTarget]
- if xmlReferenceRefTarget in dictFootnotes:
- logging.info("Verweis auf Fussnote gefunden")
- strResult = dictFootnotes[xmlReferenceRefTarget]
- if xmlReferenceRefTarget in dictTheorems:
- logging.info("Verweis auf Theorem gefunden")
- strResult = dictTheorems[xmlReferenceRefTarget]
- if xmlReferenceRefTarget in dictTables:
- logging.info("Verweis auf Tabelle gefunden")
- strResult = dictTables[xmlReferenceRefTarget]
- tmpTail = xmlReference.tail or ""
- #tmpTail = tmpTail.strip()
+ if len(hitarget_id_list) == 1:
+ hitarget_id = hitarget_id_list[0]
+ else:
+ hitarget_id = None
+
+ reference_type = xmlReference.get("type")
+ if reference_type == "text":
+ tmpTail = xmlReference.tail or ""
+ strResult = xmlReference.text
+ elif reference_type == "collage":
+ tmpTail = xmlReference.tail or ""
+ logging.debug("Found reference to a Hyperimage collage.")
+ subreferences = xmlReference.xpath("./EOAref[@type='number']")
+ strResult = ""
+ for subref in subreferences:
+ subref_tail = subref.tail or ""
+ subref_target = subref.xpath("./ref/@target")[0]
+ target_string = dictFigures[subref_target]
+ strResult += f"{target_string}{subref_tail}"
+ elif reference_type == "number":
+ logging.info("XXXXXXXX")
+ strResult = "!!! Cross Reference !!!"
+
+ xmlReferenceLabel = xmlReference.find("Label")
+ xmlReferenceLabelText = xmlReferenceLabel.text
+
+ xmlReferenceRef = xmlReference.find("ref")
+ xmlReferenceRefTarget = xmlReferenceRef.get("target")
+
+ if xmlReferenceLabelText in dictEquations:
+ logging.info("Verweis auf Array gefunden:" + xmlReferenceLabelText)
+ strResult = dictEquations[xmlReferenceLabelText]
+ if xmlReferenceRefTarget in dictEquations:
+ logging.info("Verweis auf Equation gefunden:" + xmlReferenceRefTarget)
+ strResult = dictEquations[xmlReferenceRefTarget]
+ if xmlReferenceRefTarget in dictLists:
+ logging.info("Verweis auf Liste gefunden")
+ strResult = dictLists[xmlReferenceRefTarget]
+ if xmlReferenceRefTarget in dictChapters:
+ logging.info("Verweis auf Kapitel gefunden")
+ strResult = dictChapters[xmlReferenceRefTarget]
+ if xmlReferenceRefTarget in dictSections:
+ logging.info("Verweis auf Section gefunden")
+ strResult = dictSections[xmlReferenceRefTarget]
+ if xmlReferenceRefTarget in dictFigures:
+ logging.info("Verweis auf Abbildung gefunden")
+ strResult = dictFigures[xmlReferenceRefTarget]
+ if xmlReferenceRefTarget in dictFootnotes:
+ logging.info("Verweis auf Fussnote gefunden")
+ strResult = dictFootnotes[xmlReferenceRefTarget]
+ if xmlReferenceRefTarget in dictTheorems:
+ logging.info("Verweis auf Theorem gefunden")
+ strResult = dictTheorems[xmlReferenceRefTarget]
+ if xmlReferenceRefTarget in dictTables:
+ logging.info("Verweis auf Tabelle gefunden")
+ strResult = dictTables[xmlReferenceRefTarget]
+ tmpTail = xmlReference.tail or ""
+ #tmpTail = tmpTail.strip()
+ else:
+ logging.error("Found unknown reference type: %s. Exiting", reference_type)
+ sys.exit(0)
logging.info("XXXXXXXX")
xmlReference.clear()
+ if args.hyperimage and hitarget_id and reference_type in ["collage", "number"]:
+ hyperimage_link = f"{publication_landingpage[:-11]}/{intChapterNumber - 1}/index.html#{hitarget_id}"
+ xmlReference.tag = "a"
+ xmlReference.set("href", hyperimage_link)
+ else:
+ pass
xmlReference.text = strResult
xmlReference.tail = tmpTail
@@ -1760,8 +1811,8 @@ class FootnoteError(Exception):
xmlIndexentry.clear()
xmlIndexentry.tail = tmpTail
etree.strip_tags(xmlEbookTree, "EOAlabel", "EOAindex", "EOApageref", "EOAcitenumeric", "EOAtable", "EOAref", "note", "div", "div2", "div3", "div4", "div5", "citetext", "newpage", "EOAciteyear", "EOAtablelabel" , "hi", "pagebreak", "page", "pagestyle", "EOAcitation", "EOAciteauthoryear", "EOAcitemanual", "EOAprintbibliography", "EOAindexperson", "EOAprintindex", "EOAindexlocation", "EOAprintpersonindex", "EOAprintlocationindex","anchor", "temp", "EOAletterhead", "EOAhifigure", "EOAtocentry")
-etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend") # also contained "id"
-etree.strip_elements(xmlEbookTree, "citekey", with_tail=False)
+etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend", "hielement") # also contained "id"
+etree.strip_elements(xmlEbookTree, "citekey", "originalcontents", with_tail=False)
logging.info("Write every Part and Chapter into one file")
xmlChapters = xmlEbookTree.findall("//div1")
diff --git a/src/tei2imxml.py b/src/tei2imxml.py
index 97d8df5..90b484e 100755
--- a/src/tei2imxml.py
+++ b/src/tei2imxml.py
@@ -101,7 +101,7 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=Fal
else:
tmp_field = xml_tree.xpath(query_path, namespaces=NS_MAP)
if len(tmp_field) > 0:
- return_string = tmp_field[0]
+ return_string = sanitize_data_string(tmp_field[0])
else:
if mandatory is True:
sys.exit("Field stored in %s is mandatory. Exiting." % query_path)
@@ -131,6 +131,7 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=Fal
info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='detailed']/p/text()")
info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='additional']/p/text()")
info_dict['eoa_dedication'] = get_field(xml_tree, "//t:text/t:front/t:div[@type='dedication']/t:ab/text()")
+ info_dict['eoa_landingpage'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:publisher/t:orgName[@n='Press']/@ref")
info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']/@ref", findall=True)
info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']/@ref", findall=True)
@@ -174,6 +175,7 @@ def make_publication_cfg(info_dict, translation_file):
technical_config['Shoplink'] = """
{1}""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_text']) #ok
technical_config['Language'] = info_dict['eoa_language'] #ok
technical_config['License'] = info_dict['eoa_license'].split("/")[4] #ok
+ technical_config['LandingPage'] = f"{info_dict['eoa_landingpage']}/{info_dict['eoa_series'].lower()}/{info_dict['eoa_number']}/index.html"
general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok
if info_dict['eoa_submitters'] is not None:
@@ -184,9 +186,10 @@ def make_publication_cfg(info_dict, translation_file):
if len(info_dict['eoa_keywords']) > 8:
logging.warning("Too many keywords. Up to 8 are allowed. Using the first 8.")
else:
- for keyword in info_dict['eoa_keywords'][:7]:
- keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1)
- general_config[keyword_label] = keyword
+ pass
+ for keyword in info_dict['eoa_keywords'][:7]:
+ keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1)
+ general_config[keyword_label] = keyword
general_config['DetailedDescription'] = info_dict['eoa_detail_desc'] #ok
general_config['AdditionalInformation'] = info_dict['eoa_additional_info'] #ok