From 6d05febd36c3862a53c973cb80b5467ddf361a6a Mon Sep 17 00:00:00 2001 From: kthoden Date: Fri, 9 Jul 2021 11:03:30 +0200 Subject: [PATCH] Move anchors that are direct children of divs deeper into tree If not deeper in tree, they are removed at a later point --- src/eoatex2imxml.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/eoatex2imxml.py b/src/eoatex2imxml.py index 9c0f759..ac459d4 100755 --- a/src/eoatex2imxml.py +++ b/src/eoatex2imxml.py @@ -1207,6 +1207,28 @@ def check_footnote_paragraphs(): logging.info("Footnote check found no errors") # def check_footnote_paragraphs ends here + +def move_anchors(xml_tree): + """Move stray anchors into paragraph""" + + all_anchors = xml_tree.xpath(".//anchor") + logging.info(f"Found {len(all_anchors)} anchors.") + for anchor in all_anchors: + anchor_parent = anchor.getparent() + if anchor_parent.tag not in ["p", "EOAfigure", "hi"]: + anchor_pos = libeoaconvert.get_place_in_xml_tree(anchor, anchor_parent) + logging.debug(f"Anchor parent is {anchor_parent.tag}, anchor is at position {anchor_pos}.") + try: + next_paragraph = anchor.xpath("following-sibling::p[1]")[0] + para_pos = libeoaconvert.get_place_in_xml_tree(next_paragraph, anchor_parent) + next_paragraph.insert(0, anchor) + logging.info(f"Moved the anchor {para_pos-anchor_pos} positions down the tree.") + except IndexError: + logging.warning(f"This didn't work out: {anchor.get('id')}") + pass +# def move_anchors ends here + + ############################################################## # Preparing the Bibliography # ############################################################## @@ -1960,6 +1982,8 @@ def add_bibliography_to_xml( etree.strip_tags(xmlTree, "tagtobestripped") etree.strip_elements(xmlTree, "elementtobestripped", with_tail=False) +move_anchors(xmlTree) + # here followed the conversion to epub and the conversion to django.xml # both parts were removed and put into separate files.