More command line options

Highest level of sections can be specified
EditionOpenAccess · May 29, 2018 · ba29277 · ba29277
1 parent 914ef82
commit ba29277
Showing 1 changed file with 58 additions and 24 deletions.
diff --git a/fix_tei.py b/fix_tei.py
@@ -271,25 +271,44 @@ def cleanup_xml(xml_tree):
     return xml_tree
 # def cleanup_xml ends here
 
-def fix_document_structure(xml_tree):
+def fix_document_structure(xml_tree, highest_level):
     """Insert div types"""
 
     # Unsure here, but maybe have a rule that one file is one chapter,
     # so the highest level would be sections
 
-    chapter_divs = xml_tree.xpath("//t:body/t:div", namespaces=NS_MAP)
-    section_divs = xml_tree.xpath("//t:body/t:div/t:div", namespaces=NS_MAP)
-    subsection_divs = xml_tree.xpath("//t:body/t:div/t:div/t:div", namespaces=NS_MAP)
-    subsubsection_divs = xml_tree.xpath("//t:body/t:div/t:div/t:div/t:div", namespaces=NS_MAP)
-
-    for chapter in chapter_divs:
-        chapter.set("type", "chapter")
-    for section in section_divs:
-        section.set("type", "section")
-    for subsection in subsection_divs:
-        subsection.set("type", "subsection")
-    for subsubsection in subsubsection_divs:
-        subsubsection.set("type", "subsubsection")
+    if highest_level == "chapter":
+        chapter_divs = xml_tree.xpath("//t:body/t:div", namespaces=NS_MAP)
+        section_divs = xml_tree.xpath("//t:body/t:div/t:div", namespaces=NS_MAP)
+        subsection_divs = xml_tree.xpath("//t:body/t:div/t:div/t:div", namespaces=NS_MAP)
+        subsubsection_divs = xml_tree.xpath("//t:body/t:div/t:div/t:div/t:div", namespaces=NS_MAP)
+
+        for chapter in chapter_divs:
+            chapter.set("type", "chapter")
+        for section in section_divs:
+            section.set("type", "section")
+        for subsection in subsection_divs:
+            subsection.set("type", "subsection")
+        for subsubsection in subsubsection_divs:
+            subsubsection.set("type", "subsubsection")
+
+    elif highest_level == "part":
+        part_divs = xml_tree.xpath("//t:body/t:div", namespaces=NS_MAP)
+        chapter_divs = xml_tree.xpath("//t:body/t:div/t:div", namespaces=NS_MAP)
+        section_divs = xml_tree.xpath("//t:body/t:div/t:div/t:div", namespaces=NS_MAP)
+        subsection_divs = xml_tree.xpath("//t:body/t:div/t:div/t:div/t:div", namespaces=NS_MAP)
+        subsubsection_divs = xml_tree.xpath("//t:body/t:div/t:div/t:div/t:div/t:div", namespaces=NS_MAP)
+
+        for part in part_divs:
+            part.set("type", "part")
+        for chapter in chapter_divs:
+            chapter.set("type", "chapter")
+        for section in section_divs:
+            section.set("type", "section")
+        for subsection in subsection_divs:
+            subsection.set("type", "subsection")
+        for subsubsection in subsubsection_divs:
+            subsubsection.set("type", "subsubsection")
 
     # section_divs = xml_tree.xpath("//t:body/t:div", namespaces=NS_MAP)
     # subsection_divs = xml_tree.xpath("//t:body/t:div/t:div", namespaces=NS_MAP)
@@ -377,11 +396,18 @@ def main():
     """The main bit"""
 
     parser = argparse.ArgumentParser()
+    parser.add_argument("-d", "--dochighestorder", default='chapter', help="Specify which divider is at the highest level, possible values: part, chapter. Default is chapter.")
+    parser.add_argument("-f", "--finalize", help="Finalize a publication.", action="store_true")
     parser.add_argument("teifile", help="Output from oxgarage/metypeset, an TEI XML file.")
     parser.add_argument("bibfile", help="The bibliography database of the publication.")
     parser.add_argument("figdir", help="The directory that contains the figures belonging to the publication.")
     args = parser.parse_args()
 
+    highest_level = args.dochighestorder
+    if highest_level not in ["chapter", "part"]:
+        sys.stderr.write("Specify either 'chapter' or 'part' as highest level. Exiting")
+        sys.exit()
+
     if not os.path.exists(TMP_DIR):
         os.makedirs(TMP_DIR)
 
@@ -432,18 +458,23 @@ def main():
         print("-"*60)
         exit()
 
-    all_figures = xml_tree2.xpath("//t:graphic", namespaces=NS_MAP)
-    bad_figures = make_figure_elements(all_figures, args.figdir)
+    if args.finalize:
+        pass
+    else:
+        all_figures = xml_tree2.xpath("//t:graphic", namespaces=NS_MAP)
+        bad_figures = make_figure_elements(all_figures, args.figdir)
 
-    report["bad_figures"] = bad_figures
+        report["bad_figures"] = bad_figures
 
     all_references = xml_tree2.xpath("//t:bibl", namespaces=NS_MAP)
-    bad_pageref = parse_cited_range(all_references)
-
-    report["bad_pageref"] = bad_pageref
+    if args.finalize:
+        pass
+    else:
+        bad_pageref = parse_cited_range(all_references)
+        report["bad_pageref"] = bad_pageref
 
-    tei_header = xml_tree2.xpath("//t:teiHeader", namespaces=NS_MAP)
-    fix_tei_header(tei_header[0], str(args.bibfile))
+        tei_header = xml_tree2.xpath("//t:teiHeader", namespaces=NS_MAP)
+        fix_tei_header(tei_header[0], str(args.bibfile))
 
     etree.strip_tags(xml_tree2, "tagtobestripped")
 
@@ -473,14 +504,17 @@ def main():
         # Pickle the 'data' dictionary using the highest protocol available.
         pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL)
 
-    fix_document_structure(xml_tree2)
+    fix_document_structure(xml_tree2, highest_level)
     # output
     output = args.teifile.replace(".xml", "-out.xml")
     tree = etree.ElementTree(xml_tree2)
     tree.write(output, pretty_print=True, xml_declaration=True,encoding="utf-8")
     logging.info("Wrote %s." % output)
 
-    evaluate_report(report)
+    if args.finalize:
+        pass
+    else:
+        evaluate_report(report)
 # def main ends here
 
 if __name__ == '__main__':