From bcadf508d9129bf1fa05742e070af56f187b311f Mon Sep 17 00:00:00 2001 From: kthoden Date: Mon, 2 Mar 2020 13:09:03 +0100 Subject: [PATCH] Process citedRange with inline markup --- src/tei2imxml.py | 44 ++++++++++++++++++++++++++++---------- src/utils/libeoaconvert.py | 10 +++++++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/tei2imxml.py b/src/tei2imxml.py index 6793bbd..3c3e8f5 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -764,15 +764,24 @@ def handle_refs_default(ref): else: pass + cited_range_children = False + if len(cited_range) > 0: - if cited_range[0].text is not None and cited_range[0].get("from") is not None: + has_content = libeoaconvert.has_text_or_children(cited_range[0]) + + if has_content and cited_range[0].get("from") is not None: logging.error("You must not use 'from' attribute and text in citedRange at the same time. Exiting.") sys.exit(1) - elif cited_range[0].text is not None: - # might contain markup! - pagerange = ", {}".format(cited_range[0].text) - # clear the text - cited_range[0].text = "" + elif has_content: + if len(cited_range[0].getchildren()) > 0: + cited_range_children = True + cited_range[0].tag = "tagtobestripped" + pagerange = f""", {etree.tostring(cited_range[0]).decode("utf-8").strip()}""" + print("pagerange", pagerange) + else: + pagerange = ", {}".format(cited_range[0].text) + # clear the text + cited_range[0].text = "" elif cited_range[0].get("from") is not None: pagerange_start = cited_range[0].get("from") pagerange_end = cited_range[0].get("to") @@ -781,18 +790,27 @@ def handle_refs_default(ref): if cite_render == 'year': try: - formatted_citation = cited_data[citekey][1] + pagerange + if cited_range_children: + formatted_citation = etree.fromstring(f"{cited_data[citekey][1]}{pagerange}") + else: + formatted_citation = cited_data[citekey][1] + pagerange except KeyError: logging.error("Citekey %s was not found in the references. Exiting." % citekey) sys.exit(1) else: try: - formatted_citation = cited_data[citekey][0] + pagerange + if cited_range_children: + formatted_citation = etree.fromstring(f"{cited_data[citekey][0]}{pagerange}") + else: + formatted_citation = cited_data[citekey][0] + pagerange except KeyError: logging.error("Citekey %s was not found in the references. Exiting." % citekey) sys.exit(1) - sanitized_citation_string = sanitize_data_string(formatted_citation) + if cited_range_children: + pass + else: + sanitized_citation_string = sanitize_data_string(formatted_citation) if olddesign == True: """ 0: + htoc = True + else: + htoc = False + return htoc +# def has_text_or_children ends here