diff --git a/tei2imxml.py b/tei2imxml.py index 5270c60..5daa47b 100644 --- a/tei2imxml.py +++ b/tei2imxml.py @@ -618,17 +618,20 @@ def assign_ids(xml_tree, data): sections = chapter.findall(".//div2") section_counter = 1 for section in sections: - section_number = "%d.%d" % (chapter_counter, section_counter) - section.set("id-text", section_number) - secdict[section.get("id")] = section_number - subsection_counter = 1 - subsections = section.findall(".//div3") + if section.get('rend') != "nonumber": + section_number = "%d.%d" % (chapter_counter, section_counter) + section.set("id-text", section_number) + secdict[section.get("id")] = section_number + + subsection_counter = 1 + subsections = section.findall(".//div3") for subsection in subsections: - subsection_number = "%d.%d.%d" % (chapter_counter, section_counter, subsection_counter) - subsection.set("id-text", subsection_number) - secdict[subsection.get("id")] = subsection_number - subsection_counter += 1 + if subsection.get('rend') != "nonumber": + subsection_number = "%d.%d.%d" % (chapter_counter, section_counter, subsection_counter) + subsection.set("id-text", subsection_number) + secdict[subsection.get("id")] = subsection_number + subsection_counter += 1 section_counter += 1 chapter_counter += 1 @@ -899,7 +902,10 @@ def fix_bib_entries(div_snippet): element_counter += 1 assigned_ids, data_to_pickle = assign_ids(resulting_tree, data) - xml_root = assigned_ids.getroot() + + updated_xml_tree = update_ids(assigned_ids) + + xml_root = updated_xml_tree.getroot() xml_root.tag = "Book" @@ -931,7 +937,7 @@ def fix_bib_entries(div_snippet): logging.info("Wrote %s." % output_filename) # Remove namespace info (brute force solution) - bad_ns_string = 'xmlns="http://www.tei-c.org/ns/1.0"' + bad_ns_string = ' xmlns="http://www.tei-c.org/ns/1.0"' with open(output_filename, 'r') as textfile: xml_as_string = textfile.read()