Skip to content

Commit

Permalink
Inline equations
Browse files Browse the repository at this point in the history
  • Loading branch information
kthoden committed Mar 4, 2020
1 parent 4767017 commit decf367
Showing 1 changed file with 123 additions and 12 deletions.
135 changes: 123 additions & 12 deletions src/tei2imxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import pickle
import shutil
import shlex
import string
import argparse
import configparser
from datetime import datetime
Expand Down Expand Up @@ -55,10 +56,126 @@
# EOA_SCRIPTS_DIR = \
# Path(os.environ['EOA_SCRIPTS_DIR'])

PDFCROP_EXEC = "pdfcrop" # (part of texlive distribution):
GM_PATH = "gm"

ns_tei = "http://www.tei-c.org/ns/1.0"
NS_MAP = {"t" : ns_tei}


def process_inline_equations(xml_tree, xml_chapters, template_path, temp_dir, output_dir):

# inline_equations = xml_tree.xpath("//t:body//t:formula[@rend='inline' and @notation='tex']", namespaces=NS_MAP)
# for equation in inline_equations:
# tex_formula = equation.text
# formula_tail = equation.tail
# equation.clear()
# equation.tag = "EOAineq"
# equation.set("TeX", tex_formula)
# png_file = "oh dear"
# equation.set("src", png_file)
# equation.tail = formula_tail

eoa_ineq_running_order = 1
dict_eoa_ineqs = {}
tex_equation = ""
all_ineq = xml_tree.xpath("//t:body//t:formula[@rend='inline' and @notation='tex']", namespaces=NS_MAP)

if len(all_ineq) > 0:
logging.info("Found " + str(len(all_ineq)) + " formulas")

for chapter_number, xml_chapter in enumerate(xml_chapters, start=1):
logging.info("Chapter " + str(chapter_number))
inline_equations = xml_chapter.xpath(".//t:formula[@rend='inline' and @notation='tex']", namespaces=NS_MAP)
inline_equation_number = 1
for equation in inline_equations:
tex_formula = equation.text

libeoaconvert.progress(inline_equation_number, len(inline_equations),"Processing EOAineq %s of %s." % (inline_equation_number, len(inline_equations)))

tex_formula = os.linesep.join([s for s in tex_formula.splitlines() if s])

# this occurred once in sources 11
tex_formula = tex_formula.replace(r"\@root", r"\root")

tex_equation = f"""{tex_equation}${tex_formula}$\n\\newpage\n"""
# Add intEOAineqRunningOrder : Filename to dictionary
equation_filename = f"EOAineq_{str(chapter_number)}_{str(inline_equation_number)}"
dict_eoa_ineqs[eoa_ineq_running_order] = equation_filename
# Prepare XML
equation_tail = equation.tail
equation.clear()
equation.tag = "EOAineq"
equation.tail = equation_tail
equation.set("src", f"{equation_filename}.png")
equation.set("TeX", tex_formula)
# increment integers
eoa_ineq_running_order += 1
inline_equation_number +=1

dict_rebound_commands = {
"\|ket\|" : r"\\ket",
"\|braket\|" : r"\\braket",
"\|bra\|" : r"\\bra",
"\|Bra\|" : r"\\Bra",
"\|Ket\|" : r"\\Ket",
"\slashed\|" : r"\\slashed"
}
for strCommand in dict_rebound_commands.keys():
tex_equation = re.sub(strCommand, dict_rebound_commands[strCommand], tex_equation)

formula_file = open(template_path / "formula.tex", "r")
template = formula_file.read()
formula_file.close()
formula_tmp_dir = temp_dir / "formulas2png"
if not os.path.exists( formula_tmp_dir):
os.mkdir( formula_tmp_dir )

# Make directory items if it doesn't already exist
items_dir = output_dir / "items"
if not os.path.exists( items_dir):
os.mkdir( items_dir )
s = string.Template(template)
e = s.substitute(DERINHALT=tex_equation)
eoainline_file_path = formula_tmp_dir / "EOAinline.tex"
tmp = open(eoainline_file_path, "w")
tmp.write(e)
tmp.close()
logging.info("Typesetting all Inline Equations")
xelatex_command = "xelatex --halt-on-error " + str(eoainline_file_path.absolute())
Argumente = shlex.split(xelatex_command)
Datei = open(temp_dir / 'xelatex-run.log', 'w')
Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
logging.info("Splitting all Inline Equations")
libeoaconvert.pdf_burst("EOAinline.pdf", formula_tmp_dir)
logging.info("Converting %s split pages into PNG-Images" % len(dict_eoa_ineqs.keys()))
counter_dict_eoa_ineqs = 1
for intRunningOrder in dict_eoa_ineqs.keys():
# provide more status information here in output!
libeoaconvert.progress(counter_dict_eoa_ineqs, len(dict_eoa_ineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dict_eoa_ineqs, len(dict_eoa_ineqs.keys())))
pdf_crop_command = "{cmd} {arg1} {arg2}".format(
cmd = PDFCROP_EXEC,
arg1 = (formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(),
arg2 = (formula_tmp_dir / (dict_eoa_ineqs[intRunningOrder] + ".pdf")).absolute()
)

Argumente = shlex.split(pdf_crop_command)
subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)

convert_command = "{cmd} convert -density 144 {arg1} {arg2}".format(
cmd = GM_PATH,
arg1 = (formula_tmp_dir / (dict_eoa_ineqs[intRunningOrder] + ".pdf")).absolute(),
arg2 = (items_dir / (dict_eoa_ineqs[intRunningOrder] + ".png")).absolute()
)

Argumente = shlex.split(convert_command)
subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
counter_dict_eoa_ineqs += 1

else:
logging.info("Found no EOAineq. Continuing")
# def process_inline_equations ends here

def get_publication_info(xml_tree, translation_file):
"""Query the TEI document for metadata fields.
Expand Down Expand Up @@ -540,7 +657,7 @@ def resolve_choice_to_expan(element_containing_choice, original_element):
# def resolve_choice_to_expan ends here


def transform_body(xml_tree, cited_data, translation_file, xml_hyperimagexml_code, olddesign, publang, hyperimage=False):
def transform_body(xml_tree, cited_data, translation_file, template_path, xml_hyperimagexml_code, olddesign, publang, temp_dir, output_dir, hyperimage=False):
"""Transform the body of XML document into IntermediateXML file"""

def retain_original_contents(ref):
Expand Down Expand Up @@ -1115,17 +1232,7 @@ def handle_refs_default(ref):
# Math #
########

# <EOAineq src="EOAineq_7_1.png" TeX="\sqrt{9} = 3"/>
inline_equations = xml_tree.xpath("//t:body//t:formula[@rend='inline' and @notation='tex']", namespaces=NS_MAP)
for equation in inline_equations:
tex_formula = equation.text
formula_tail = equation.tail
equation.clear()
equation.tag = "EOAineq"
equation.set("TeX", tex_formula)
png_file = "oh dear"
equation.set("src", png_file)
equation.tail = formula_tail
process_inline_equations(xml_tree, eoa_chapters, template_path, temp_dir, output_dir)

block_equations = xml_tree.xpath("//t:body//t:ab[@type='equation']", namespaces=NS_MAP)
for equation in block_equations:
Expand Down Expand Up @@ -1544,6 +1651,7 @@ def main():
sys.exit(1)

TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS']
TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path']
CSL_FILE = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE']

xml_tree = etree.parse(str(INPUT_PATH))
Expand Down Expand Up @@ -1642,9 +1750,12 @@ def main():
tei_body,
cited_dict,
TRANSLATION_FILE,
TEMPLATE_PATH,
HI_XML_FILE,
args.eoa_classic,
publang=publication_language,
temp_dir = TEMP_DIR,
output_dir = OUTPUT_DIR,
hyperimage=args.hyperimage
)
libeoaconvert.debug_xml_here(body_transformed_tmp, "body_transformed", DEBUG_DIR)
Expand Down

0 comments on commit decf367

Please sign in to comment.