diff --git a/src/imxml2django.py b/src/imxml2django.py index 82eb8f4..9fc503d 100755 --- a/src/imxml2django.py +++ b/src/imxml2django.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2021-07-08 19:20:28 (kthoden)> +# Time-stamp: <2021-07-09 11:16:13 (kthoden)> """ Create an XML file that can be inserted into the Django database @@ -1615,17 +1615,23 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe else: previous_element.tail = oc_tail else: - logging.debug("No originalcontents elements found.") + logging.info("No originalcontents elements found.") xmlReferences = xmlEOAchapter.findall(".//EOAref") for xmlReference in xmlReferences: strResult = "!!! Cross Reference !!!" strChapterOrder = "" strObjectOrder = "" + ref_is_text = False + ref_is_collage = False + + reference_type = xmlReference.get("type") + originalcontents = xmlReference.find("originalcontents") xmlReferenceLabel = xmlReference.find("Label") xmlReferenceLabelText = xmlReferenceLabel.text xmlReferenceRef = xmlReference.find("ref") xmlReferenceRefTarget = xmlReferenceRef.get("target") + if xmlReferenceLabelText in dictEquations: # Grab Number from Dictionary strResult = dictEquations[xmlReferenceLabelText] @@ -1639,7 +1645,7 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe if xmlParent.tag == "EOAchapter": strChapterOrder = xmlParent.get("order") strObjectOrder = xmlEOAequation.get("order") - if xmlReferenceRefTarget in dictEquations: + elif xmlReferenceRefTarget in dictEquations: # Grab Number from Dictionary strResult = dictEquations[xmlReferenceRefTarget] # Go through all equations and find the corresponding Equation @@ -1652,7 +1658,7 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe if xmlParent.tag == "EOAchapter": strChapterOrder = xmlParent.get("order") strObjectOrder = xmlEOAequation.get("order") - if xmlReferenceRefTarget in dictLists: + elif xmlReferenceRefTarget in dictLists: logging.debug("Found link to list.") strResult = dictLists[xmlReferenceRefTarget] xmlEOAlistitem = xmlEOAdocument.xpath("//EOAchapter/*[contains(@id, $targetuid)]", targetuid = xmlReferenceRefTarget)[0] @@ -1660,15 +1666,34 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe if xmlParent.tag == "EOAchapter": strChapterOrder = xmlParent.get("order") strObjectOrder = xmlEOAlistitem.get("order") - if xmlReferenceRefTarget in dictChapters: + elif xmlReferenceRefTarget in dictChapters: logging.debug("Found link to chapter.") strResult = dictChapters[xmlReferenceRefTarget] - for xmlEOAchapter in xmlEOAdocument.findall(".//EOAchapter"): - if xmlEOAchapter.get("id") == xmlReferenceRefTarget: - logging.debug("Successfully handled link to a chapter: %s" % strResult) - strObjectOrder = "top" - strChapterOrder = xmlEOAchapter.get("order") - if xmlReferenceRefTarget in dictTheorems: + xmlEOAchapter = xmlEOAdocument.xpath(f".//EOAchapter[@id='{xmlReferenceRefTarget}']") + if len(xmlEOAchapter) == 0: + logging.warning("There seems to be no corresponding id for %s." % xmlReferenceRefTarget) + # if uid is the one from the anchor after the head + # element, that anchor element has been removed by now + # and we need to find the corresponding element by + # string comparison in the dictionary + same_sr = [i for i in dictChapters if dictChapters[i] == strResult] + same_sr.remove(xmlReferenceRefTarget) + if len(same_sr) == 0: + logging.error("id cannot be found.") + elif len(same_sr) > 1: + logging.error("id is ambiguous.") + else: + logging.info(f"Using {same_sr[0]} instead.") + right_chapter = xmlEOAdocument.xpath(f".//EOAchapter[@id='{same_sr[0]}']")[0] + elif len(xmlEOAchapter) > 1: + logging.error("The xml:id %s is assigned more than once. This is not allowed. Exiting." % xmlReferenceLabelText) + sys.exit(2) + else: + right_chapter = xmlEOAchapter[0] + + strChapterOrder = right_chapter.get("order") + strObjectOrder = "top" + elif xmlReferenceRefTarget in dictTheorems: logging.debug("Found link to ein Theorem") strResult = dictTheorems[xmlReferenceRefTarget] for xmlEOAtheorem in xmlEOAdocument.findall(".//EOAtheorem"): @@ -1678,18 +1703,33 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe if xmlParent.tag == "EOAchapter": strObjectOrder = xmlEOAtheorem.get("order") strChapterOrder = xmlParent.get("order") - if xmlReferenceRefTarget in dictSections: + elif xmlReferenceRefTarget in dictSections: logging.debug("Found link to section") + logging.info(f"It's {xmlReferenceRefTarget}") strResult = dictSections[xmlReferenceRefTarget] - xmlEOAsections = xmlEOAdocument.findall(".//EOAsection") - for xmlEOAsection in xmlEOAsections: - tmpReferenceRefTarget = xmlEOAsection.get("id") - if xmlReferenceRefTarget == tmpReferenceRefTarget: - logging.debug("Successfully handled link to section: %s " % strResult) - for xmlParent in xmlEOAsection.iterancestors(): - if xmlParent.tag == "EOAchapter": - strChapterOrder = xmlParent.get("order") - strObjectOrder = xmlEOAsection.get("order") + xmlEOAsection = xmlEOAdocument.xpath(f".//EOAsection[@id='{xmlReferenceRefTarget}']") + if len(xmlEOAsection) == 0: + logging.warning("There seems to be no corresponding id for %s." % xmlReferenceRefTarget) + # see explanation at dictChapters + same_sr = [i for i in dictSections if dictSections[i] == strResult] + same_sr.remove(xmlReferenceRefTarget) + if len(same_sr) == 0: + logging.error("id cannot be found.") + elif len(same_sr) > 1: + logging.error("id is ambiguous.") + else: + logging.info(f"Using {same_sr[0]} instead.") + right_section = xmlEOAdocument.xpath(f".//EOAsection[@id='{same_sr[0]}']")[0] + elif len(xmlEOAsection) > 1: + logging.error("The xml:id %s is assigned more than once. This is not allowed. Exiting." % xmlReferenceLabelText) + sys.exit(2) + else: + right_section = xmlEOAsection[0] + + for xmlParent in right_section.iterancestors(): + if xmlParent.tag == "EOAchapter": + strChapterOrder = xmlParent.get("order") + strObjectOrder = right_section.get("order") xmlEOAsubsections = xmlEOAdocument.findall(".//EOAsubsection") for xmlEOAsubsection in xmlEOAsubsections: tmpReferenceRefTarget = xmlEOAsubsection.get("id") @@ -1699,7 +1739,7 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe if xmlParent.tag == "EOAchapter": strChapterOrder = xmlParent.get("order") strObjectOrder = xmlEOAsubsection.get("order") - if xmlReferenceRefTarget in dictFigures: + elif xmlReferenceRefTarget in dictFigures: logging.debug("Found link to figure") strResult = dictFigures[xmlReferenceRefTarget] xmlEOAfigures = xmlEOAdocument.findall(".//EOAfigure") @@ -1711,7 +1751,7 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe if xmlParent.tag == "EOAchapter": strChapterOrder = xmlParent.get("order") strObjectOrder = xmlEOAfigure.get("order") - if xmlReferenceRefTarget in dictFootnotes: + elif xmlReferenceRefTarget in dictFootnotes: logging.debug("Found link to footnote") strResult = dictFootnotes[xmlReferenceRefTarget] xmlEOAfootnotes = xmlEOAdocument.findall(".//EOAfootnote") @@ -1723,7 +1763,7 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe if xmlParent.tag == "EOAchapter": strChapterOrder = xmlParent.get("order") strObjectOrder = xmlEOAfootnote.get("order") - if xmlReferenceRefTarget in dictTables: + elif xmlReferenceRefTarget in dictTables: logging.debug("Found link to table") strResult = dictTables[xmlReferenceRefTarget] xmlEOAtables = xmlEOAdocument.findall(".//EOAtable") @@ -1735,27 +1775,63 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe if xmlParent.tag == "EOAchapter": strChapterOrder = xmlParent.get("order") strObjectOrder = xmlEOAtable.get("order") - tmpTail = xmlReference.tail or "" - - originalcontents = xmlReference.find("originalcontents") - - ref_is_text = False - ref_is_collage = False - - reference_type = xmlReference.get("type") - if reference_type == "collage": - ref_is_collage = True - elif reference_type == "text": - ref_is_text = True - all_children = list(xmlReference) - text_has_children = all_children[:-2] - if text_has_children: - reference_text = xmlReference.text - textref_innards = list() - for xml_child in text_has_children: - textref_innards.append(xml_child) + else: + logging.debug("Found this other reference") + if reference_type == "collage": + logging.info(f"{xmlReferenceRefTarget} is a collage") + ref_is_collage = True + elif reference_type == "text": + logging.debug(f"{xmlReferenceRefTarget} is a text link") + ref_is_text = True + xmlReferenceRef = xmlReference.find("ref") + xmlReferenceRefTarget = xmlReferenceRef.get("target") + xmlReferenceLabel = xmlReference.find("Label") + xmlReferenceLabelText = xmlReferenceLabel.text + + pararef = xmlDjangoTree.xpath("//*[@id='%s']" % xmlReferenceRefTarget) + + if len(pararef) == 0: + logging.warning("There seems to be no corresponding xml:id for %s." % xmlReferenceRefTarget) + failed_ids.append(f"{xmlReferenceRefTarget} ({xmlReferenceLabelText})\n") + elif len(pararef) > 1: + logging.error("The xml:id %s is assigned more than once. This is not allowed. Exiting." % xmlReferenceLabelText) + sys.exit(2) + else: + for xmlParent in pararef[0].iterancestors(): + if xmlParent.tag == "EOAchapter": + strChapterOrder = xmlParent.get("order") + for xmlParent in pararef[0].iterancestors(): + if xmlParent.tag == "EOAparagraph": + strObjectOrder = xmlParent.get("order") + + all_children = list(xmlReference) + text_has_children = all_children[:-2] + if text_has_children: + reference_text = xmlReference.text + textref_innards = list() + for xml_child in text_has_children: + textref_innards.append(xml_child) + else: + reference_text = xmlReference.text.strip() else: - reference_text = xmlReference.text.strip() + guessref = xmlDjangoTree.xpath("//*[@id='%s']" % xmlReferenceRefTarget) + if len(guessref) == 0: + logging.warning("There seems to be no corresponding xml:id for %s." % xmlReferenceLabelText) + failed_ids.append(xmlReferenceLabelText + "\n") + elif len(guessref) > 1: + logging.error("The xml:id %s is assigned more than once. This is not allowed. Exiting." % xmlReferenceLabelText) + sys.exit(2) + else: + for xmlParent in guessref[0].iterancestors(): + if xmlParent.tag == "EOAparagraph": + strObjectOrder = xmlParent.get("order") + id_container = xmlParent.xpath("preceding-sibling::EOAsection[1]")[0] + section_id = id_container.get("id") + strResult = dictSections[section_id] + if xmlParent.tag == "EOAchapter": + strChapterOrder = xmlParent.get("order") + + tmpTail = xmlReference.tail or "" xmlReference.clear() if originalcontents is not None: @@ -1774,13 +1850,11 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe # hyperimage if xmlReferenceRef.get("data-hilayer"): xmlReference.set("data-hilayer", xmlReferenceRef.get("data-hilayer")) - if xmlReference.text: - logging.debug(xmlReference.text) - xmlReference.text - if xmlReferenceRef.get("hitarget"): + elif xmlReferenceRef.get("hitarget"): xmlReference.set("class", "HILink") href_string = "#" + xmlReferenceRef.get("hitarget") - elif strObjectOrder: + + if strObjectOrder: href_string = "../" + strChapterOrder + "/index.html#" + strObjectOrder else: href_string = "strChapterOrder missing"