From b4ee8f47bb93523c8375e64742b1c769a8d90ac6 Mon Sep 17 00:00:00 2001 From: kthoden Date: Thu, 17 Dec 2020 10:58:24 +0100 Subject: [PATCH] Add XML test suite --- testxml.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 testxml.py diff --git a/testxml.py b/testxml.py new file mode 100644 index 0000000..9a3de81 --- /dev/null +++ b/testxml.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8; mode: python -*- + +"""A test suite for all sorts of XML modifications.""" + +from lxml import etree + +NSMAP = { "tei" : "http://www.tei-c.org/ns/1.0", + "eoa" : "http://www.edition-open-access.de/ns"} + +def remove_namespace(doc, namespace): + """Remove namespace in the passed document in place. + + Thank you, https://homework.nwsnet.de/releases/45be/ + """ + + ns = u'{%s}' % namespace + nsl = len(ns) + for elem in doc.getiterator(): + if elem.tag.startswith(ns): + elem.tag = elem.tag[nsl:] +# def remove_namespace ends here + +# example (import ElementTree as ET) +# elem = ET.fromstring(some_xml_string) +# remove_namespace(elem, u'http://earth.google.com/kml/2.0') + +ab_string = """A Text B TextB Tail.""" +cde_string = """D Tail E TextE Tail.""" +fragment_string = """

Klein anfangen weiter.

""" +fragment_string2 = """

Klein anfangen weiter.

""" + +ab = etree.fromstring(ab_string) +cde = etree.fromstring(cde_string) +fragment = etree.fromstring(fragment_string) +fragment2 = etree.fromstring(fragment_string2) + +xmlbit = fragment2 + +remove_namespace(xmlbit, NSMAP['tei']) + +first_text = xmlbit.text +output = f"{first_text}" +fchildren = xmlbit.getchildren() + +for child in fchildren: + remove_namespace(child, NSMAP['tei']) + ct = etree.tostring(child).decode('utf-8') + forget = f"{ct[:]}" + print(forget, "forget") + geton = etree.fromstring(forget) + if geton.tag == "hi": + print("yes") + if geton.get("rend") == "italic": + geton.tag = "{}em" + del geton.attrib["rend"] + print(geton.tag) + else: + print("no", geton.tag) + + output += f"{etree.tostring(geton).decode('utf-8')}" + +print(output, "end")