Permalink
Browse files

Chapter formatting and cleanup

  • Loading branch information...
kthoden committed May 29, 2018
1 parent 3e07e8c commit 60bee922acf62d0dea41a2f35d7ce97dd0c92396
Showing with 17 additions and 11 deletions.
  1. +17 −11 tei2imxml.py
View
@@ -618,17 +618,20 @@ def assign_ids(xml_tree, data):
sections = chapter.findall(".//div2")
section_counter = 1
for section in sections:
section_number = "%d.%d" % (chapter_counter, section_counter)
section.set("id-text", section_number)
secdict[section.get("id")] = section_number
subsection_counter = 1
subsections = section.findall(".//div3")
if section.get('rend') != "nonumber":
section_number = "%d.%d" % (chapter_counter, section_counter)
section.set("id-text", section_number)
secdict[section.get("id")] = section_number
subsection_counter = 1
subsections = section.findall(".//div3")
for subsection in subsections:
subsection_number = "%d.%d.%d" % (chapter_counter, section_counter, subsection_counter)
subsection.set("id-text", subsection_number)
secdict[subsection.get("id")] = subsection_number
subsection_counter += 1
if subsection.get('rend') != "nonumber":
subsection_number = "%d.%d.%d" % (chapter_counter, section_counter, subsection_counter)
subsection.set("id-text", subsection_number)
secdict[subsection.get("id")] = subsection_number
subsection_counter += 1
section_counter += 1
chapter_counter += 1
@@ -899,7 +902,10 @@ def fix_bib_entries(div_snippet):
element_counter += 1
assigned_ids, data_to_pickle = assign_ids(resulting_tree, data)
xml_root = assigned_ids.getroot()
updated_xml_tree = update_ids(assigned_ids)
xml_root = updated_xml_tree.getroot()
xml_root.tag = "Book"
@@ -931,7 +937,7 @@ def fix_bib_entries(div_snippet):
logging.info("Wrote %s." % output_filename)
# Remove namespace info (brute force solution)
bad_ns_string = 'xmlns="http://www.tei-c.org/ns/1.0"'
bad_ns_string = ' xmlns="http://www.tei-c.org/ns/1.0"'
with open(output_filename, 'r') as textfile:
xml_as_string = textfile.read()

0 comments on commit 60bee92

Please sign in to comment.