From 3e2e66f17ee515901018b052633695abf2a42296 Mon Sep 17 00:00:00 2001 From: kthoden Date: Wed, 4 Mar 2020 11:55:41 +0100 Subject: [PATCH] Rudimentary index handling --- src/tei2imxml.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/tei2imxml.py b/src/tei2imxml.py index 86ab4c5..437ad4f 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -1085,11 +1085,31 @@ def handle_refs_default(ref): ############ # Indexing # ############ - # indexName="person">James Joyce" index_entries = xml_tree.xpath("//t:body//t:index", namespaces=NS_MAP) for entry in index_entries: - # hier weiter! - pass + index_type = entry.get("indexName") + tagname = "EOAindex" + if index_type != "keyword": + tagname += index_type + entry.tag = tagname + entry_tail = entry.tail + entry_content = entry.find("t:term", namespaces=NS_MAP) + try: + sortkey = entry_content.get("sortKey") + print("sortkey found") + except AttributeError: + sortkey = "" + print("no sortkey") + entry_text = "" + if len(sortkey) > 0: + entry_text = f"{sortkey}@" + # markup ignored for now + remainder = libeoaconvert.gettext(entry_content) + entry_text += remainder + # re-assemble + entry.clear() + entry.text = entry_text + entry.tail = entry_tail ######## # Math #