Skip to content
Permalink
b4ee8f47bb
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
63 lines (48 sloc) 1.88 KB
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
"""A test suite for all sorts of XML modifications."""
from lxml import etree
NSMAP = { "tei" : "http://www.tei-c.org/ns/1.0",
"eoa" : "http://www.edition-open-access.de/ns"}
def remove_namespace(doc, namespace):
"""Remove namespace in the passed document in place.
Thank you, https://homework.nwsnet.de/releases/45be/
"""
ns = u'{%s}' % namespace
nsl = len(ns)
for elem in doc.getiterator():
if elem.tag.startswith(ns):
elem.tag = elem.tag[nsl:]
# def remove_namespace ends here
# example (import ElementTree as ET)
# elem = ET.fromstring(some_xml_string)
# remove_namespace(elem, u'http://earth.google.com/kml/2.0')
ab_string = """<a>A Text <b>B Text</b>B Tail.</a>"""
cde_string = """<c><d/>D Tail <e>E Text</e>E Tail.</c>"""
fragment_string = """<p xmlns="http://www.tei-c.org/ns/1.0" xmlns:tei="http://www.tei-c.org/ns/1.0">Klein <hi rend="italic">anfangen</hi> weiter.</p>"""
fragment_string2 = """<p xmlns="http://www.tei-c.org/ns/1.0">Klein <hi rend="italic">anfangen</hi> weiter.</p>"""
ab = etree.fromstring(ab_string)
cde = etree.fromstring(cde_string)
fragment = etree.fromstring(fragment_string)
fragment2 = etree.fromstring(fragment_string2)
xmlbit = fragment2
remove_namespace(xmlbit, NSMAP['tei'])
first_text = xmlbit.text
output = f"{first_text}"
fchildren = xmlbit.getchildren()
for child in fchildren:
remove_namespace(child, NSMAP['tei'])
ct = etree.tostring(child).decode('utf-8')
forget = f"{ct[:]}"
print(forget, "forget")
geton = etree.fromstring(forget)
if geton.tag == "hi":
print("yes")
if geton.get("rend") == "italic":
geton.tag = "{}em"
del geton.attrib["rend"]
print(geton.tag)
else:
print("no", geton.tag)
output += f"{etree.tostring(geton).decode('utf-8')}"
print(output, "end")