From e2b1b6c691c262b4340418661236df1430c1a954 Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Wed, 7 Mar 2018 11:52:15 +0100 Subject: [PATCH] Some additional changes --- tei2django.py | 2 +- transform_xml.py | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tei2django.py b/tei2django.py index 45a5cc2..c0c9215 100644 --- a/tei2django.py +++ b/tei2django.py @@ -13,7 +13,7 @@ import mkimage OUTPUT_DIR = "./CONVERT" -XSL_FILE = "./data/tei2django.xsl" +XSL_FILE = os.path.dirname(sys.argv[0]) + "/data/tei2django.xsl" FIGURE_DIR = "./data/images" def process_formulas(xml_tree): diff --git a/transform_xml.py b/transform_xml.py index 87e0f17..2197cd4 100644 --- a/transform_xml.py +++ b/transform_xml.py @@ -295,7 +295,7 @@ def transform_body(xml_tree, cited_data, publang): hi.tag = "EOAdown" del hi.attrib["rend"] else: - logging.debug("The rend attribute in hi has the value %s. This is not supported" % rend_attribute) + logging.info("The rend attribute in hi has the value %s. This is not supported" % rend_attribute) return xml_tree # def transform_body ends here @@ -429,11 +429,6 @@ def add_bibliography(xml_tree, refs_for_bib_chapter): bib_data["source"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:listBibl/@source", namespaces=NS_MAP)[0] bib_data["type"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:listBibl/@type", namespaces=NS_MAP)[0] - command = "pandoc -o %sformatted_citations.html -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (OUTPUT_DIR + os.path.sep, bib_data["source"], CSL_FILE, OUTPUT_DIR + os.path.sep + "used_citations.md") - arguments = shlex.split(command) - logging.info("Using external command pandoc.") - subprocess.call(arguments) - # json interim_bib_json_file = "tmp-bib.json" citeproc_command = "pandoc-citeproc --bib2json %s" % bib_data["source"] @@ -442,12 +437,18 @@ def add_bibliography(xml_tree, refs_for_bib_chapter): citeproc_json = citeproc_process.stdout.read() citations_json = json.loads(citeproc_json) + if bib_data["type"] not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]: + print("The bibliography type %s is not allowed." % bib_data["type"]) + + command = "pandoc -o %sformatted_citations.html -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (OUTPUT_DIR + os.path.sep, bib_data["source"], CSL_FILE, OUTPUT_DIR + os.path.sep + "used_citations.md") + arguments = shlex.split(command) + logging.info("Using external command pandoc.") + subprocess.call(arguments) + # refs for bib_chapter contains formatted reference entries cited_dict, refs_for_bib_chapter = format_citations(set(used_citekeys), citations_json) # render_reference(all_references, cited_dict) - assert(bib_data["type"] in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]) - tei_body = xml_tree.xpath("//t:text", namespaces=NS_MAP)[0] body_transformed = transform_body(tei_body, cited_dict, publang=publication_language) @@ -487,5 +488,14 @@ def add_bibliography(xml_tree, refs_for_bib_chapter): # resulting_tree.write(output_filename, pretty_print=True, xml_declaration=True,encoding="utf-8") final_tree.write(output_filename, pretty_print=True, xml_declaration=True,encoding="utf-8") - logging.debug("Wrote %s." % output_filename) + logging.info("Wrote %s." % output_filename) + + bad_ns_string = 'xmlns="http://www.tei-c.org/ns/1.0"' + with open(output_filename, 'r') as textfile: + xml_as_string = textfile.read() + + removed_namespace = xml_as_string.replace(bad_ns_string, "") + + with open(output_filename, 'w') as amended_textfile: + amended_textfile.write(removed_namespace) # finis