From 3c9bf3213504937e556135dfc63b9e5a79f4792d Mon Sep 17 00:00:00 2001 From: kthoden Date: Wed, 27 May 2020 12:56:39 +0200 Subject: [PATCH] Observe only main index entry for keyword --- src/tei2imxml.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/tei2imxml.py b/src/tei2imxml.py index 7688d46..f45d2f5 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -1272,7 +1272,7 @@ def handle_refs_default(ref): ############ # Indexing # ############ - index_entries = xml_tree.xpath("//t:body//t:index", namespaces=NS_MAP) + index_entries = xml_tree.xpath("//t:body//t:index[not(ancestor::t:index)]", namespaces=NS_MAP) for entry in index_entries: index_type = entry.get("indexName") tagname = "EOAindex" @@ -1281,13 +1281,19 @@ def handle_refs_default(ref): entry.tag = tagname entry_tail = entry.tail entry_content = entry.find("t:term", namespaces=NS_MAP) - try: - sortkey = entry_content.get("sortKey") - except AttributeError: - sortkey = "" entry_text = "" - if len(sortkey) > 0: + + sortkey = entry_content.get("sortKey") + # try: + # sortkey = entry_content.get("sortKey") + # except AttributeError: + # sortkey = "" + if sortkey is None: + logging.info("No sortkey found") + else: entry_text = f"{sortkey}@" + # if len(sortkey) > 0: + # entry_text = f"{sortkey}@" # markup ignored for now remainder = libeoaconvert.gettext(entry_content) entry_text += remainder