From 20a21c37f1ab54f040da338e3d89200d8d30c876 Mon Sep 17 00:00:00 2001 From: kthoden Date: Wed, 31 Jul 2019 14:58:26 +0200 Subject: [PATCH] Hyperimage figure handling, mostly collages --- imxml2django.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/imxml2django.py b/imxml2django.py index 49ec48c..0b82359 100755 --- a/imxml2django.py +++ b/imxml2django.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2019-07-30 16:05:12 (kthoden)> +# Time-stamp: <2019-07-31 14:46:18 (kthoden)> """ Create an XML file that can be inserted into the Django database @@ -387,7 +387,7 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid figure_type = xmlElement.get("type") # Copy Image if figure_type == "hionly" or figure_type == "hionlycollage": - logging.debug(f"Found hyperimage figure ({figure_type}), continuing") + logging.debug(f"Found hyperimage figure ({figure_type}), no need to copy them.") pass else: strImageFileString = xmlElement.find(".//file").text @@ -415,17 +415,29 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid logging.debug("The filename is %s" % xmlEOAfigure.get("file")) else: xmlEOAfigure.set("file", strImageFileDir + strImageFileName) + + if figure_type == "hionly" or figure_type == "hionlycollage": + logging.debug(f"Found hyperimage figure ({figure_type}), no need for caption and size information.") + pass + else: xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") xmlEOAfigure.set("order", str(intObjectNumber)) xmlEOAfigure.append(xmlElement.find(".//caption")) xmlResult.append(xmlEOAfigure) - intObjectNumber += 1 # Insert visual Number and uid strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")] xmlEOAfigure.set("number", strFigureNumber) strFigureUID = xmlElement.find(".//anchor").get("id") xmlEOAfigure.set("id", strFigureUID) + + hi_figure_types = ["hitrue", "hionly", "hionlycollage"] + + if figure_type in hi_figure_types: + xmlEOAfigure.set("hielement", xmlElement.get("hielement")) + else: + pass + elif xmlElement.findall(".//EOAtable"): xmlResult = etree.Element("EOAtable") xmlRawTable = xmlElement.find(".//table") @@ -1512,6 +1524,27 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe # Substitute References with their targets (wit links) for xmlEOAchapter in xmlEOAchapters: + # for hyperimage collages + originalcontents = xmlEOAchapter.findall(".//originalcontents") + if originalcontents is not None: + for originalcontent in originalcontents: + previous_element = originalcontent.getprevious() + if originalcontent.getparent().tag == "EOAref": + pass + elif previous_element.tag != "EOAref": + logging.error("Found a stray originalcontents element.") + else: + oc_tail = originalcontent.tail + originalcontent.tail = "" + previous_element.append(originalcontent) + if previous_element.tail is not None: + logging.warning("Appending the old tail of EOAref") + previous_element.tail += oc_tail + else: + previous_element.tail = oc_tail + else: + logging.debug("No originalcontents elements found.") + xmlReferences = xmlEOAchapter.findall(".//EOAref") for xmlReference in xmlReferences: strResult = "!!! Cross Reference !!!" @@ -1633,6 +1666,10 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe tmpTail = xmlReference.tail or "" originalcontents = xmlReference.find("originalcontents") + if xmlReference.get("type") == "collage": + ref_is_collage = True + else: + ref_is_collage = False xmlReference.clear() if originalcontents is not None: logging.info("Found originalcontents") @@ -1658,6 +1695,10 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe logging.warning("strObjectOrder is missing!") xmlReference.set("href", href_string) + if ref_is_collage: + xmlReference.set("type", "collage") + else: + pass logging.info("----------------------------------------------") logging.info("Processing Page References") @@ -1857,6 +1898,9 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe ############################################################################ # TODO: Die unnötigen Attribute wie id löschen # TODO: Die unnötigen Tags wie EOAlabel löschen +collagelinks = xmlDjangoTree.xpath(".//a[@type='collage']/originalcontents/a") +for link in collagelinks: + link.tag = "temp" etree.strip_tags(xmlDjangoTree, "temp", "citetext", "EOAprintbibliography", "originalcontents") etree.strip_elements(xmlDjangoTree, "citekey", with_tail=False) etree.strip_attributes(xmlDjangoTree, "id-text", "id", "noindent", "type", "label", "spacebefore")#, "rend")