From decf367ac66598869ef4bdac234798dfdf449f85 Mon Sep 17 00:00:00 2001 From: kthoden Date: Wed, 4 Mar 2020 17:34:56 +0100 Subject: [PATCH] Inline equations --- src/tei2imxml.py | 135 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 123 insertions(+), 12 deletions(-) diff --git a/src/tei2imxml.py b/src/tei2imxml.py index 437ad4f..94e3c50 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -25,6 +25,7 @@ import pickle import shutil import shlex +import string import argparse import configparser from datetime import datetime @@ -55,10 +56,126 @@ # EOA_SCRIPTS_DIR = \ # Path(os.environ['EOA_SCRIPTS_DIR']) +PDFCROP_EXEC = "pdfcrop" # (part of texlive distribution): +GM_PATH = "gm" + ns_tei = "http://www.tei-c.org/ns/1.0" NS_MAP = {"t" : ns_tei} +def process_inline_equations(xml_tree, xml_chapters, template_path, temp_dir, output_dir): + + # inline_equations = xml_tree.xpath("//t:body//t:formula[@rend='inline' and @notation='tex']", namespaces=NS_MAP) + # for equation in inline_equations: + # tex_formula = equation.text + # formula_tail = equation.tail + # equation.clear() + # equation.tag = "EOAineq" + # equation.set("TeX", tex_formula) + # png_file = "oh dear" + # equation.set("src", png_file) + # equation.tail = formula_tail + + eoa_ineq_running_order = 1 + dict_eoa_ineqs = {} + tex_equation = "" + all_ineq = xml_tree.xpath("//t:body//t:formula[@rend='inline' and @notation='tex']", namespaces=NS_MAP) + + if len(all_ineq) > 0: + logging.info("Found " + str(len(all_ineq)) + " formulas") + + for chapter_number, xml_chapter in enumerate(xml_chapters, start=1): + logging.info("Chapter " + str(chapter_number)) + inline_equations = xml_chapter.xpath(".//t:formula[@rend='inline' and @notation='tex']", namespaces=NS_MAP) + inline_equation_number = 1 + for equation in inline_equations: + tex_formula = equation.text + + libeoaconvert.progress(inline_equation_number, len(inline_equations),"Processing EOAineq %s of %s." % (inline_equation_number, len(inline_equations))) + + tex_formula = os.linesep.join([s for s in tex_formula.splitlines() if s]) + + # this occurred once in sources 11 + tex_formula = tex_formula.replace(r"\@root", r"\root") + + tex_equation = f"""{tex_equation}${tex_formula}$\n\\newpage\n""" + # Add intEOAineqRunningOrder : Filename to dictionary + equation_filename = f"EOAineq_{str(chapter_number)}_{str(inline_equation_number)}" + dict_eoa_ineqs[eoa_ineq_running_order] = equation_filename + # Prepare XML + equation_tail = equation.tail + equation.clear() + equation.tag = "EOAineq" + equation.tail = equation_tail + equation.set("src", f"{equation_filename}.png") + equation.set("TeX", tex_formula) + # increment integers + eoa_ineq_running_order += 1 + inline_equation_number +=1 + + dict_rebound_commands = { + "\|ket\|" : r"\\ket", + "\|braket\|" : r"\\braket", + "\|bra\|" : r"\\bra", + "\|Bra\|" : r"\\Bra", + "\|Ket\|" : r"\\Ket", + "\slashed\|" : r"\\slashed" + } + for strCommand in dict_rebound_commands.keys(): + tex_equation = re.sub(strCommand, dict_rebound_commands[strCommand], tex_equation) + + formula_file = open(template_path / "formula.tex", "r") + template = formula_file.read() + formula_file.close() + formula_tmp_dir = temp_dir / "formulas2png" + if not os.path.exists( formula_tmp_dir): + os.mkdir( formula_tmp_dir ) + + # Make directory items if it doesn't already exist + items_dir = output_dir / "items" + if not os.path.exists( items_dir): + os.mkdir( items_dir ) + s = string.Template(template) + e = s.substitute(DERINHALT=tex_equation) + eoainline_file_path = formula_tmp_dir / "EOAinline.tex" + tmp = open(eoainline_file_path, "w") + tmp.write(e) + tmp.close() + logging.info("Typesetting all Inline Equations") + xelatex_command = "xelatex --halt-on-error " + str(eoainline_file_path.absolute()) + Argumente = shlex.split(xelatex_command) + Datei = open(temp_dir / 'xelatex-run.log', 'w') + Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) + logging.info("Splitting all Inline Equations") + libeoaconvert.pdf_burst("EOAinline.pdf", formula_tmp_dir) + logging.info("Converting %s split pages into PNG-Images" % len(dict_eoa_ineqs.keys())) + counter_dict_eoa_ineqs = 1 + for intRunningOrder in dict_eoa_ineqs.keys(): + # provide more status information here in output! + libeoaconvert.progress(counter_dict_eoa_ineqs, len(dict_eoa_ineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dict_eoa_ineqs, len(dict_eoa_ineqs.keys()))) + pdf_crop_command = "{cmd} {arg1} {arg2}".format( + cmd = PDFCROP_EXEC, + arg1 = (formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(), + arg2 = (formula_tmp_dir / (dict_eoa_ineqs[intRunningOrder] + ".pdf")).absolute() + ) + + Argumente = shlex.split(pdf_crop_command) + subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) + + convert_command = "{cmd} convert -density 144 {arg1} {arg2}".format( + cmd = GM_PATH, + arg1 = (formula_tmp_dir / (dict_eoa_ineqs[intRunningOrder] + ".pdf")).absolute(), + arg2 = (items_dir / (dict_eoa_ineqs[intRunningOrder] + ".png")).absolute() + ) + + Argumente = shlex.split(convert_command) + subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) + counter_dict_eoa_ineqs += 1 + + else: + logging.info("Found no EOAineq. Continuing") +# def process_inline_equations ends here + def get_publication_info(xml_tree, translation_file): """Query the TEI document for metadata fields. @@ -540,7 +657,7 @@ def resolve_choice_to_expan(element_containing_choice, original_element): # def resolve_choice_to_expan ends here -def transform_body(xml_tree, cited_data, translation_file, xml_hyperimagexml_code, olddesign, publang, hyperimage=False): +def transform_body(xml_tree, cited_data, translation_file, template_path, xml_hyperimagexml_code, olddesign, publang, temp_dir, output_dir, hyperimage=False): """Transform the body of XML document into IntermediateXML file""" def retain_original_contents(ref): @@ -1115,17 +1232,7 @@ def handle_refs_default(ref): # Math # ######## - # - inline_equations = xml_tree.xpath("//t:body//t:formula[@rend='inline' and @notation='tex']", namespaces=NS_MAP) - for equation in inline_equations: - tex_formula = equation.text - formula_tail = equation.tail - equation.clear() - equation.tag = "EOAineq" - equation.set("TeX", tex_formula) - png_file = "oh dear" - equation.set("src", png_file) - equation.tail = formula_tail + process_inline_equations(xml_tree, eoa_chapters, template_path, temp_dir, output_dir) block_equations = xml_tree.xpath("//t:body//t:ab[@type='equation']", namespaces=NS_MAP) for equation in block_equations: @@ -1544,6 +1651,7 @@ def main(): sys.exit(1) TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS'] + TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path'] CSL_FILE = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE'] xml_tree = etree.parse(str(INPUT_PATH)) @@ -1642,9 +1750,12 @@ def main(): tei_body, cited_dict, TRANSLATION_FILE, + TEMPLATE_PATH, HI_XML_FILE, args.eoa_classic, publang=publication_language, + temp_dir = TEMP_DIR, + output_dir = OUTPUT_DIR, hyperimage=args.hyperimage ) libeoaconvert.debug_xml_here(body_transformed_tmp, "body_transformed", DEBUG_DIR)