diff --git a/prepare_tei.py b/prepare_tei.py index 3e9d7e5..55b9024 100644 --- a/prepare_tei.py +++ b/prepare_tei.py @@ -17,6 +17,7 @@ from datetime import datetime import bibtexparser import argparse +import traceback logging.basicConfig(level=logging.INFO, format=' %(asctime)s - %(levelname)s - %(message)s') @@ -315,18 +316,29 @@ def main(): logging.info("Wrote %s." % debug_output) # check for wellformedness, read again as xml - xml_tree2 = etree.fromstring(mod_string2) + try: + xml_tree2 = etree.fromstring(mod_string2) + except etree.XMLSyntaxError: + print("\nXML syntax error when trying to parse modified tree. Dumped it to %s." % debug_output) + print("-"*60) + traceback.print_exc(file=sys.stdout) + print("-"*60) + exit() all_figures = xml_tree2.xpath("//t:graphic", namespaces=NS_MAP) - make_figure_elements(all_figures, args.figdir) + bad_figures = make_figure_elements(all_figures, args.figdir) + + report["bad_figures"] = bad_figures all_references = xml_tree2.xpath("//t:bibl", namespaces=NS_MAP) - parse_cited_range(all_references) + bad_pageref = parse_cited_range(all_references) + + report["bad_pageref"] = bad_pageref etree.strip_tags(xml_tree2, "tagtobestripped") tei_header = xml_tree2.xpath("//t:teiHeader", namespaces=NS_MAP) - fix_tei_header(tei_header[0]) + fix_tei_header(tei_header[0], str(args.bibfile)) dictChapters = {} dictEquations = {}