Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add XML test suite
  • Loading branch information
kthoden committed Dec 17, 2020
1 parent 4747bb5 commit b4ee8f4
Showing 1 changed file with 63 additions and 0 deletions.
63 changes: 63 additions & 0 deletions testxml.py
@@ -0,0 +1,63 @@
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-

"""A test suite for all sorts of XML modifications."""

from lxml import etree

NSMAP = { "tei" : "http://www.tei-c.org/ns/1.0",
"eoa" : "http://www.edition-open-access.de/ns"}

def remove_namespace(doc, namespace):
"""Remove namespace in the passed document in place.
Thank you, https://homework.nwsnet.de/releases/45be/
"""

ns = u'{%s}' % namespace
nsl = len(ns)
for elem in doc.getiterator():
if elem.tag.startswith(ns):
elem.tag = elem.tag[nsl:]
# def remove_namespace ends here

# example (import ElementTree as ET)
# elem = ET.fromstring(some_xml_string)
# remove_namespace(elem, u'http://earth.google.com/kml/2.0')

ab_string = """<a>A Text <b>B Text</b>B Tail.</a>"""
cde_string = """<c><d/>D Tail <e>E Text</e>E Tail.</c>"""
fragment_string = """<p xmlns="http://www.tei-c.org/ns/1.0" xmlns:tei="http://www.tei-c.org/ns/1.0">Klein <hi rend="italic">anfangen</hi> weiter.</p>"""
fragment_string2 = """<p xmlns="http://www.tei-c.org/ns/1.0">Klein <hi rend="italic">anfangen</hi> weiter.</p>"""

ab = etree.fromstring(ab_string)
cde = etree.fromstring(cde_string)
fragment = etree.fromstring(fragment_string)
fragment2 = etree.fromstring(fragment_string2)

xmlbit = fragment2

remove_namespace(xmlbit, NSMAP['tei'])

first_text = xmlbit.text
output = f"{first_text}"
fchildren = xmlbit.getchildren()

for child in fchildren:
remove_namespace(child, NSMAP['tei'])
ct = etree.tostring(child).decode('utf-8')
forget = f"{ct[:]}"
print(forget, "forget")
geton = etree.fromstring(forget)
if geton.tag == "hi":
print("yes")
if geton.get("rend") == "italic":
geton.tag = "{}em"
del geton.attrib["rend"]
print(geton.tag)
else:
print("no", geton.tag)

output += f"{etree.tostring(geton).decode('utf-8')}"

print(output, "end")

0 comments on commit b4ee8f4

Please sign in to comment.