Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
EditionOpenAccess
/
EOASkripts
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
2
Code
Issues
34
Pull requests
0
Actions
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Security
Insights
Files
2191f31
docs
scripts
src
bibformat
config
data
stylesheets
utils
bib_add_keyword.py
create_tmpbib.py
eoatex2imxml.py
eoatex2pdf.py
find_chapters.py
fix_tei.py
idassigner.py
imxml2django.py
imxml2epub.py
imxml2tei.py
mkimage.py
parsezotero.py
process_eoa_latex.py
process_tei.py
tei2eoatex.py
tei2html.py
tei2imxml.py
tei_add_bibl.py
tei_pickle.py
.dockerignore
.gitignore
.init-container.sh
Dockerfile
LICENSE
README.md
dependencies.conf
docker-compose.yaml
requirements.txt
Breadcrumbs
EOASkripts
/
src
/
eoatex2imxml.py
Copy path
Blame
Blame
Latest commit
kthoden
Set reftype, type is necessary because of hyperimage in XML workflow
Apr 9, 2020
2191f31
·
Apr 9, 2020
History
History
executable file
·
1863 lines (1661 loc) · 84 KB
Breadcrumbs
EOASkripts
/
src
/
eoatex2imxml.py
Top
File metadata and controls
Code
Blame
executable file
·
1863 lines (1661 loc) · 84 KB
Raw
#!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- # Time-stamp: <2020-04-08 19:03:16 (kthoden)> """ Converts Latex files into a customized DocBook XML file. The program depends on the external program tralics for the conversion as well as xelatex, pdfcrop (part of latex distributions) and pandoc-citeproc for additional formatting. """ # license? __version__= "1.0" __author__ = "Klaus Thoden" __date__="20171205" # can the job done by BeautifulSoup also be done by lxml.html soupparser? # as described in http://infohost.nmt.edu/tcc/help/pubs/pylxml/web/soupparser.html # from lxml.html import soupparser from utils.libeoabibitem import Bibitem import utils.libeoaconvert as libeoaconvert from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite, ToLog, ToFile import utils.bib2html as bib2html # imports import argparse from lxml import etree from bs4 import BeautifulSoup import glob import os import re import string import shlex import json import subprocess import sys import shutil import logging import pickle from pathlib import Path import time BASE_DIR = Path( __file__ ).resolve().parent SCRIPT_PATH = Path( __file__ ) SCRIPT_NAME = SCRIPT_PATH.stem DEFAULT_INPUT_DIR = \ Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input') DEFAULT_OUTPUT_DIR = \ Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output') DEFAULT_DEPENDENCIES_DIR = \ Path(os.environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in os.environ else './dependencies') ##################### # Parsing arguments # ##################### parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( "-c", "--config", default = BASE_DIR / "config" / "eoaconvert.cfg", help="Name of config file" ) parser.add_argument( "--log-level", default = "INFO", help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" ) parser.add_argument( "-f", "--filename", default = Path("*.tex"), type = Path, help = "xml file inside INPUT_DIR, or absolute path. Patterns like '*.xml' are also acceptable" ) parser.add_argument( "--latex-dir", type = Path, help="directory where to find the output generated by eoatex2pdf.py. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/pdf" ) parser.add_argument( "-o", "--output-dir", type = Path, help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/imxml" ) parser.add_argument( "-t", "--trash", help="Remove temporary files." ) parser.add_argument( "-n", "--no-bib4ht", action="store_true", help="Skip creation of bibliography, rely on already present HTML files." ) parser.add_argument( "-classic", "--eoa-classic", action="store_true", help="Embed webdesign of EOA1.0 into XML" ) parser.add_argument( "INPUT_DIR", help = "directory containing the publication (including resources like pictures, etc.)", type = Path, ) args = parser.parse_args() CONFIG_FILE = args.config print("The configfile is %s." % CONFIG_FILE) # current biber is not compatible with this code # switch TeX distribution to TeXLive2016, # run biber_2.1 -O biber2-1n.bbl $INPUT to obtain this file BIBERFILE = "biber2-1.bbl" ######################## # Paths to executables # ######################## GM_PATH = "gm" TRALICS_PATH_EXEC = "tralics" PDFCROP_EXEC = "pdfcrop" # (part of texlive distribution): # TL_PATH = CONFIG['Executables']['texlive'] # TEXBIN_PATH = CONFIG['Executables']['texbin'] ############################ # Paths: ############################ INPUT_DIR = args.INPUT_DIR INPUT_PATH = args.filename INPUT_PATH = \ args.filename if args.filename . is_absolute() else list(INPUT_DIR . glob( str(args.filename) ))[0] OUTPUT_DIR = \ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "imxml" LATEX_DIR = \ args.latex_dir if args.latex_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "pdf" LOG_DIR = OUTPUT_DIR / "log" LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" ) TEMP_DIR = OUTPUT_DIR / "tmp_files" DEBUG_DIR = OUTPUT_DIR / "debug" # where to output the xml file: XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" ) BIB2HTML_FILENAME = "temp" ################################## # Reading the configuration file # ################################## CONFIG = load_config( CONFIG_FILE, args.log_level, LOG_FILE, ) ############################ # Paths to auxiliary files # ############################ TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB'] TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path'] SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path'] ################################################# # Checking for existance of tools and libraries # ################################################# # sanity check: logging.debug("PATH: {}".format( os.environ['PATH'] )) check_executable( GM_PATH ) check_executable( TRALICS_PATH_EXEC ) check_executable( PDFCROP_EXEC ) logging.info( "checking executables 'utils.bib2html' needs...:" ) bib2html.check_executables() if not os.path.exists(TRALICS_PATH_LIB): logging.error(f"Cannot find the Tralics configuration at {TRALICS_PATH_LIB}. Exiting.") sys.exit() ################################## # Setting up various directories # ################################## if not os.path.exists(OUTPUT_DIR): os.mkdir( OUTPUT_DIR ) if not os.path.exists(TEMP_DIR): os.mkdir( TEMP_DIR ) if not os.path.exists( DEBUG_DIR ): os.mkdir( DEBUG_DIR ) if os.path.exists(INPUT_DIR / "publication.cfg"): shutil.copy(INPUT_DIR / "publication.cfg", OUTPUT_DIR) logging.info(f"Copied from {INPUT_DIR}.") else: logging.error(f"Found no publication.cfg in {INPUT_DIR}. Exiting") sys.exit( 1 ) if os.path.exists(INPUT_DIR / "Cover.jpg"): shutil.copy(INPUT_DIR / "Cover.jpg", OUTPUT_DIR / "Cover.jpg") logging.info("Copied cover imag from input directory.") elif os.path.exists(INPUT_DIR / "images/Cover.jpg"): shutil.copy(INPUT_DIR / "images/Cover.jpg", OUTPUT_DIR / "Cover.jpg") logging.info("Copied cover image from publication directory.") else: logging.error("No coverfile found. You can create a temporary one with the mkimage.py script") sys.exit( 1 ) # Copy Support-Files from /Library/MPIWG to current directory shutil.copy(SUPPORT_PATH / "classes.dtd", OUTPUT_DIR) shutil.copy(SUPPORT_PATH / "mathml2-qname-1.mod", OUTPUT_DIR) shutil.copy(SUPPORT_PATH / "mathml2.dtd", OUTPUT_DIR) copy_dir_overwrite(SUPPORT_PATH / "html", (OUTPUT_DIR / "html")) copy_dir_overwrite(SUPPORT_PATH / "iso8879", (OUTPUT_DIR / "iso8879")) copy_dir_overwrite(SUPPORT_PATH / "iso9573-13", (OUTPUT_DIR / "iso9573-13")) copy_dir_overwrite(SUPPORT_PATH / "mathml", (OUTPUT_DIR / "mathml")) ######################################## # Certain functions for specific tasks # ######################################## def getchildren(xmlElement): """Include all subelements""" 1 + 1 return xmlElement # def getchildren ends here def TeX2PNG(LaTeXCode, Type, Chapter, Number): """Function to render LaTeX-Code into PNG-Files, returns PNG-Filename (epub & django)""" # logging.info( f"TeX2PNG({LaTeXCode}, {Type}, {Chapter}, {Number})" ) # Dictionary contains Type:begin/end Types = { "EOAineq" : ["$", "$"], "EOAequation" : ["\\begin{equation*}", "\\end{equation*}"], "EOAequationnonumber" : ["\\begin{equation*}", "\\end{equation*}"], "EOAequationarray" : ["\\begin{align*}", "\\end{align*}"], "EOAequationarraynonumber" : ["\\begin{align*}", "\\end{align*}"] } LaTeXCode = Types[Type][0] + LaTeXCode + Types[Type][1] dictRebindedCommands = { "\|ket\|" : r"\\ket", "\|braket\|" : r"\\braket", "\|bra\|" : r"\\bra", "\|Bra\|" : r"\\Bra", "\|Ket\|" : r"\\Ket", "\slashed\|" : r"\\slashed" } for strCommand in dictRebindedCommands.keys(): LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode) # Open plain LaTeX-Template tmp = open(TEMPLATE_PATH / "formula.tex", "r") Template = tmp.read() tmp.close() # Get tmp-directory for this user account # tmpDir = os.getenv("TMPDIR") if not os.path.exists( TEMP_DIR / "formulas2png" ): os.mkdir( TEMP_DIR / "formulas2png" ) # use local tmpdir formula_tmp_dir = TEMP_DIR / "formulas2png" # Make directory items if it doesn't already exist items_dir = OUTPUT_DIR / "items" if not os.path.exists( items_dir ): os.mkdir( items_dir ) s = string.Template(Template) e = s.substitute(DERINHALT=LaTeXCode) tmpFile = formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".tex" ) tmp = open(tmpFile, "w") tmp.write(e) tmp.close() Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute()) Argumente = shlex.split(Kommando) # Redirecting stderr to save XeLaTeX-Output Datei = open(TEMP_DIR / 'xelatex-run.log', 'w') Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) if Ergebnis == 0: logging.info("Successfully converted formula " + Type + str(Chapter) + "_" + str(Number)) if Ergebnis == 1: logging.error("Failed to convert formula " + Type + str(Chapter) + "_" + str(Number)) Kommando = "{cmd} {arg1} {arg2}".format( cmd=PDFCROP_EXEC, arg1=(formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf")).absolute(), arg2=(formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf")).absolute() ) # Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf" Argumente = shlex.split(Kommando) subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format( cmd=GM_PATH, arg1 = (formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf")).absolute(), arg2 = (items_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".png")).absolute() ) # Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png" Argumente = shlex.split(Kommando) subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) # logging.info("TeX2PNG done") return LaTeXCode # def TeX2PNG ends here def make_latex_bibl_file( bib_database, set_citations, files ): """Construct a separate latex file with bibliography. The HTML bibliography is still not perfectly formatted like the LaTeX version. To check both files, a separate file is made that and which is then also converted in the various formats. """ string_citations = ", ".join(set_citations) for (input_path, output_path) in files: with open(input_path, "r") as tmp_latex: largebib_template = tmp_latex.read() largebib_template_string = string.Template( largebib_template ) largebib_replacement = largebib_template_string.substitute( INSERT_BIB_DATABASE = bib_database, INSERT_CITEKEYS = string_citations ) with open(output_path, "w") as tmp_latex: tmp_latex.write(largebib_replacement) # def make_latex_bibl_file ends here def sanitize_bibentry(bibEntry): """Some additional cleanup actions""" bibEntry = bibEntry.replace(". , ", ", ") bibEntry = bibEntry.replace("vols..", "vols.") return(bibEntry.strip()) # def sanitize_bibentry ends here def cleanup(): """Remove support files""" try: os.remove(OUTPUT_DIR / "classes.dtd") os.remove(OUTPUT_DIR / "mathml2-qname-1.mod") os.remove(OUTPUT_DIR / "mathml2.dtd") shutil.rmtree(OUTPUT_DIR / "html") shutil.rmtree(OUTPUT_DIR / "iso8879") shutil.rmtree(OUTPUT_DIR / "iso9573-13") shutil.rmtree(OUTPUT_DIR / "mathml") # shutil.rmtree((os.getcwd() + "/mathml2")) logging.debug("Removed support files.") except: logging.info("No temporary files were found.") # def cleanup ends here def reduce_element_tag(xml_element): """Remove attributes from root and make root a one letter tag""" xml_element.tag = "t" xml_element.attrib.clear() return xml_element # def reduce_element_tag ends here # Remove temporary files, neccessary for troubleshooting if args.trash == "temp": cleanup() sys.exit() ############################################################## # Preparing the main document # ############################################################## # .tex -> .xml def run_tralics( input_file, TRALICS_PATH_LIB, log_path, output_dir = OUTPUT_DIR, ): fixed_tex_file_path = output_dir / input_file.name libeoaconvert.enable_preamble( input_file, fixed_tex_file_path, "xml" ) # Convert TeX to XML via Tralics logging.info( f"executing {TRALICS_PATH_EXEC}. log file: {log_path}" ) exec_command( "{cmd} -confdir {conf_dir}/tralics_conf -config {conf_dir}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}".format( cmd = TRALICS_PATH_EXEC, # log_file = log_filename, conf_dir = TRALICS_PATH_LIB, output_dir = output_dir, input_dir = input_file.parent, input_file = fixed_tex_file_path, ), output_to = ToFile( log_path ), exit_code_ok = lambda _: True ) # .tex -> .xml run_tralics( input_file = INPUT_PATH, TRALICS_PATH_LIB = TRALICS_PATH_LIB, log_path = LOG_DIR / SCRIPT_NAME / (INPUT_PATH.stem + "-tralics.log"), output_dir = OUTPUT_DIR ) def fix_underscore_and_eoatranscripted( xml_file ): # Fix underscore und fix EOAtranscripted tmpFile = open (xml_file, "r") tmpText = tmpFile.read() tmpFile.close() tmpText = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpText) tmpText = re.sub(r"<error n='\\par' l='(.*?)' c='Invalid \\par command: paragraph not started'/>", "", tmpText) tmpFile = open (xml_file, "w") tmpFile.write(tmpText) tmpFile.close() fix_underscore_and_eoatranscripted( xml_file = XML_FILE ) def parseXML( input_file ): # Complete XML-Document in xmlTree xmlParser = etree.XMLParser(no_network=False,load_dtd=True) #resolve_entities=False xmlTree = etree.parse(str(input_file), xmlParser) # Cleanup of not needed tags in advance. To be cleaned: <error> etree.strip_elements(xmlTree, with_tail=False, *['error']) return xmlTree def xmltrans_move_eoalanguage( xmlChapters ): for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1): xmlLanguage = xmlChapter.find(".//EOAlanguage") if xmlLanguage is not None: strLanguage = xmlLanguage.text or "english" xmlChapter.set("language", strLanguage) xmlLanguage.text = None logging.info("The language of Chapter %d is %s." % (intChapterNumber, strLanguage)) xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage") ############################################################## # Numbering and Typesetting various Elements # ############################################################## # Figure out how to number (like essay or regular) def get_series( xmlTree ): try: return (xmlTree.find(".//EOAseries").text or "regular") except AttributeError: logging.error("\n\nYou are most probably using the preamble for the PDF output. Exiting.") sys.exit() def number_chapters( xmlChapters ): dictChapters = {} Chapternumber = 1 for xmlChapter in xmlChapters: if xmlChapter.get('rend') != "nonumber": Chapteruid = xmlChapter.get('id') dictChapters[Chapteruid] = str(Chapternumber) Chapternumber += 1 return dictChapters # EOAequation, EOAsubequation and EOAequationarray Numbering per Chapter def process_equations( xmlChapters, dictChapters, strNumberingType): dictEquations = {} for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1): intEquationnumber = 1 xmlDinge = xmlChapter.xpath(".//EOAequation | .//EOAequationarray | .//EOAsubequations") logging.info("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge))) for xmlDing in xmlDinge: if xmlDing.tag == "EOAequationarray": # tmpNumberinArray is only being used for filename tmpNumberinArray = intEquationnumber # tmpDictNumberLabel used to insert the attribute value into <EOAequation> tmpDictNumberLabel = {} # Numbering is being done by <mtr>-Tags xmlMathmlrows = xmlDing.findall(".//{http://www.w3.org/1998/Math/MathML}mtr") for xmlMathmlrow in xmlMathmlrows: if "Label" in xmlMathmlrow.attrib: # Label dem Dictionary für die Euqations hinzufügen if xmlChapter.get("rend") != "nonumber": dictEquations[xmlMathmlrow.get("Label")] = str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber) tmpDictNumberLabel[str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)] = xmlMathmlrow.get("Label") if xmlChapter.get("rend") == "nonumber": dictEquations[xmlMathmlrow.get("Label")] = str(intEquationnumber) tmpDictNumberLabel[str(intEquationnumber)] = xmlMathmlrow.get("Label") intEquationnumber += 1 xmlRohTeX = xmlDing.find(".//texmath") xmlNew = etree.Element('EOAequationarray') # Blank lines need to be removed otherwise TeX won't work textSourcecode = os.linesep.join([s for s in xmlRohTeX.text.splitlines() if s]) # \rowattributeunknown has to be deleted, its an artefact textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode) # Push Down loop to parse the raw code textFormel = "" boolBackslash = False for Buchstabe in textSourcecode: if Buchstabe == "\n": continue if Buchstabe == "\\": if boolBackslash == False: textFormel += Buchstabe boolBackslash = True continue if boolBackslash == True: textFormel += Buchstabe str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray)) if xmlChapter.get("rend") != "nonumber": tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))) if xmlChapter.get("rend") == "nonumber": tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray))) tmpXML.set("TeX", str_latexcode) # Put Label into EOAequation if xmlChapter.get("rend") != "nonumber": strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray) if xmlChapter.get("rend") == "nonumber": strTempKey = str(tmpNumberinArray) if strTempKey in tmpDictNumberLabel: #tmpXML.set("label", tmpDictNumberLabel[(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))]) tmpXML.set("label", tmpDictNumberLabel[strTempKey]) xmlNew.append(tmpXML) textFormel = "" boolBackslash = False tmpNumberinArray += 1 continue if Buchstabe != "\\": textFormel += Buchstabe boolBackslash = False # Typeset last equation str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray)) if xmlChapter.get("rend") != "nonumber": tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(dictChapters[xmlChapter.get('id')] + "." + str(tmpNumberinArray))) if xmlChapter.get("rend") == "nonumber": tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray))) tmpXML.set("TeX", str_latexcode) # Put Label into EOAequation if xmlChapter.get("rend") != "nonumber": strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray) if xmlChapter.get("rend") == "nonumber": strTempKey = str(tmpNumberinArray) if strTempKey in tmpDictNumberLabel: logging.info(strTempKey) logging.info(tmpDictNumberLabel) logging.info(dictChapters) tmpXML.set("label", tmpDictNumberLabel[strTempKey]) xmlNew.append(tmpXML) xmlDing.getparent().replace(xmlDing, xmlNew) # enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on #xmlNew.getparent().tag = "temp" continue if xmlDing.tag == "EOAsubequations": # Enclosing <p>-Tag of the EOAsubequations needs to be removed xmlDing.getparent().tag = "temp" xmlSubequations = xmlDing.findall('.//EOAequation') listCharacters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] tmpI = 0 # Insert Number of this Subequation into dictEquations xmlAnchor = xmlDing.find(".//anchor") logging.info(xmlAnchor) if xmlChapter.get("rend") != "nonumber": dictEquations[xmlAnchor.get('id')] = dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber) if xmlChapter.get("rend") == "nonumber": dictEquations[xmlAnchor.get('id')] = str(intEquationnumber) # Delete anchor xmlAnchor.getparent().remove(xmlAnchor) for xmlSubequation in xmlSubequations: # Enclosing <p>-Tag of the EOAsubequation needs to be removed #xmlSubequation.getparent().tag = "temp" # Numbering Subequations with characters strSubequationNumber = str(intEquationnumber) + listCharacters[tmpI] tmpI += 1 textSourcecode = xmlSubequation.find('.//texmath').text # Blank lines need to be removed otherwise TeX won't work textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) str_latexcode = TeX2PNG(textSourcecode, "EOAequation", str(intChapterNumber), strSubequationNumber) xmlAnchor = xmlSubequation.find(".//anchor") # Clear Equation xmlSubequation.clear() if xmlChapter.get("rend") != "nonumber": xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png") xmlSubequation.set("number", dictChapters[xmlChapter.get('id')] + "." + strSubequationNumber) xmlSubequation.set("uid", xmlAnchor.get('id')) if xmlChapter.get("rend") == "nonumber": xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png") xmlSubequation.set("number", strSubequationNumber) xmlSubequation.set("uid", xmlAnchor.get('id')) xmlSubequation.set("id", xmlAnchor.get('id')) xmlSubequation.set("TeX", str_latexcode) # Insert Number of this Equation into dictEquations if strNumberingType == "regular": dictEquations[xmlAnchor.get('id')] = str(dictChapters[xmlChapter.get('id')]) + "." + strSubequationNumber if strNumberingType == "essay": dictEquations[xmlAnchor.get('id')] = strSubequationNumber # TODO: Anchor direkt unter Subequation aufheben, und der ersten Equation zuordnen, so dass auf 8.16 bei 8.16a und 8.16b verlinkt werden kann xmlDing.tag = "temp" # enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on #xmlDing.getparent().tag = "temp" intEquationnumber += 1 continue if xmlDing.tag == "EOAequation": # Check, if Equation has already been found in a Subeqation xmlAnchor = xmlDing.find("anchor") if xmlAnchor == None: continue if xmlAnchor.get('id') in dictEquations: continue if xmlDing.find('.//texmath') is not None: textSourcecode = xmlDing.find('.//texmath').text else: textSourcecode = xmlDing.text # Blank lines need to be removed otherwise TeX won't work textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) str_latexcode = TeX2PNG(textSourcecode, "EOAequation", intChapterNumber, intEquationnumber) #print ("Got:") #print (str_latexcode) if xmlChapter.get("rend") != "nonumber": xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png") xmlDing.set("number", dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber)) xmlDing.set("uid", xmlAnchor.get('id')) if xmlChapter.get("rend") == "nonumber": xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png") xmlDing.set("number", str(intEquationnumber)) xmlDing.set("uid", xmlAnchor.get('id')) xmlDing.set("id", xmlAnchor.get('id')) xmlDing.set("TeX", str_latexcode) #xmlDing.getparent().replace(xmlDing, xmlNew) # Insert Number of this Equation into dictEquations if strNumberingType == "regular": dictEquations[xmlAnchor.get('id')] = \ str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber) if strNumberingType == "essay": dictEquations[xmlAnchor.get('id')] = str(intEquationnumber) intEquationnumber += 1 continue return dictEquations def process_unnumbered_equations( xmlChapters ): for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1): tempImagenumber = 1 xmlDinge = xmlChapter.xpath(".//EOAequationnonumber | .//EOAequationarraynonumber") logging.info("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge))) # print ("Working on Chapter " + str(intChapterNumber)) # print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden") for xmlDing in xmlDinge: if xmlDing.tag == "EOAequationarraynonumber": if xmlDing.find(".//texmath") is not None: textSourcecode = xmlDing.find(".//texmath").text else: textSourcecode = xmlDing.text xmlNew = etree.Element('EOAequationarraynonumber') # Blank lines need to be removed otherwise TeX won't work textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) # \rowattributeunknown has to be deleted, its an artefact textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode) # TODO: HIer überprüfen, ob und inwiefern es ausreichend ist, EOAequationarraynonumber in eine Grafik zu packen str_latexcode = TeX2PNG(textSourcecode, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) xmlNew.set("TeX", str_latexcode) xmlDing.getparent().replace(xmlDing, xmlNew) tempImagenumber += 1 continue ''' # Push Down loop to parse the raw code (Wird vorerst nicht ausgeführt) textFormel = "" boolBackslash = False for Buchstabe in textSourcecode: if Buchstabe == "\n": continue if Buchstabe == "\\": if boolBackslash == False: textFormel += Buchstabe boolBackslash = True continue if boolBackslash == True: textFormel += Buchstabe str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) tmpXML.set("TeX", str_latexcode) xmlNew.append(tmpXML) textFormel = "" boolBackslash = False tempImagenumber += 1 continue if Buchstabe != "\\": textFormel += Buchstabe boolBackslash = False # Typeset last equation str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) tmpXML.set("TeX", str_latexcode) xmlNew.append(tmpXML) xmlDing.getparent().replace(xmlDing, xmlNew) continue ''' if xmlDing.tag == "EOAequationnonumber": textSourcecode = xmlDing.find('.//texmath').text # Blank lines need to be removed otherwise TeX won't work textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) str_latexcode = TeX2PNG(textSourcecode, "EOAequationnonumber", str(intChapterNumber), tempImagenumber) # TODO: HTML-Code für das fertige Bild einfügen (Ist dieser ToDo noch aktuell?) xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationnonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) xmlNew.set("TeX", str_latexcode) xmlDing.getparent().replace(xmlDing, xmlNew) tempImagenumber += 1 continue def process_inline_equations( xmlChapters ): intEOAineqRunningOrder = 1 dictEOAineqs = {} strTeXEquations = "" all_ineq = xmlTree.findall(".//EOAineq") # if all_ineq is not None: if len(all_ineq) > 0: logging.info("Found " + str(len(all_ineq)) + " formulas") for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1): logging.info("Chapter " + str(intChapterNumber)) xmlEOAineqs = xmlChapter.findall(".//EOAineq") intEOAineqnumber = 1 for xmlEOAineq in xmlEOAineqs: if xmlEOAineq.find('.//texmath') is not None: strSourceCode = xmlEOAineq.find('.//texmath').text else: strSourceCode = xmlEOAineq.text libeoaconvert.progress(intEOAineqnumber, len(xmlEOAineqs),"Processing EOAineq %s of %s." % (intEOAineqnumber, len(xmlEOAineqs))) strSourceCode = os.linesep.join([s for s in strSourceCode.splitlines() if s]) # this occurred once in sources 11 strSourceCode = strSourceCode.replace(r"\@root", r"\root") strTeXEquations = strTeXEquations + "$" + strSourceCode + "$\n\\newpage\n" # Add intEOAineqRunningOrder : Filename to dictionary strFilename = "EOAineq_" + str(intChapterNumber) + "_" + str(intEOAineqnumber) dictEOAineqs[intEOAineqRunningOrder] = strFilename # Prepare XML tmpTail = xmlEOAineq.tail xmlEOAineq.clear() xmlEOAineq.tail = tmpTail xmlEOAineq.set("src", strFilename + ".png") xmlEOAineq.set("TeX", strSourceCode) # increment integers intEOAineqRunningOrder += 1 intEOAineqnumber +=1 dictRebindedCommands = { "\|ket\|" : r"\\ket", "\|braket\|" : r"\\braket", "\|bra\|" : r"\\bra", "\|Bra\|" : r"\\Bra", "\|Ket\|" : r"\\Ket", "\slashed\|" : r"\\slashed" } for strCommand in dictRebindedCommands.keys(): strTeXEquations = re.sub(strCommand, dictRebindedCommands[strCommand], strTeXEquations) tmp = open(TEMPLATE_PATH / "formula.tex", "r") Template = tmp.read() tmp.close() # Get tmp-directory for this user account # tmpDir = os.getenv("TMPDIR") # use local tmpdir formula_tmp_dir = TEMP_DIR / "formulas2png" if not os.path.exists( formula_tmp_dir ): os.mkdir( formula_tmp_dir ) # Make directory items if it doesn't already exist items_dir = OUTPUT_DIR / "items" if not os.path.exists( items_dir): os.mkdir( items_dir ) s = string.Template(Template) e = s.substitute(DERINHALT=strTeXEquations) tmpFile = formula_tmp_dir / "EOAinline.tex" tmp = open(tmpFile, "w") tmp.write(e) tmp.close() logging.info("Typesetting all Inline Equations") Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute()) Argumente = shlex.split(Kommando) Datei = open(TEMP_DIR / 'xelatex-run.log', 'w') Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) logging.info("Splitting all Inline Equations") libeoaconvert.pdf_burst("EOAinline.pdf", formula_tmp_dir) logging.info("Converting %s split pages into PNG-Images" % len(dictEOAineqs.keys())) counter_dictEOAineqs = 1 for intRunningOrder in dictEOAineqs.keys(): # provide more status information here in output! libeoaconvert.progress(counter_dictEOAineqs, len(dictEOAineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys()))) Kommando = "{cmd} {arg1} {arg2}".format( cmd = PDFCROP_EXEC, arg1 = (formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(), arg2 = (formula_tmp_dir / (dictEOAineqs[intRunningOrder] + ".pdf")).absolute() ) # Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf" Argumente = shlex.split(Kommando) subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format( cmd = GM_PATH, arg1 = (formula_tmp_dir / (dictEOAineqs[intRunningOrder] + ".pdf")).absolute(), arg2 = (items_dir / (dictEOAineqs[intRunningOrder] + ".png")).absolute() ) #Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png" Argumente = shlex.split(Kommando) subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) counter_dictEOAineqs += 1 else: logging.info("Found no EOAineq. Continuing") def process_eoachem( xmlChapters ): int_EOAchem_running_order = 1 dictEOAchems = {} str_tex_chem = "" all_chem = xmlTree.findall(".//EOAchem") # if all_chem is not None: if len(all_chem) > 0: logging.info("Found " + str(len(all_chem)) + " chemical formulas") for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1): logging.info("Chapter " + str(intChapterNumber)) xmlEOAchems = xmlChapter.findall(".//EOAchem") int_EOAchem_number = 1 for xml_EOAchem in xmlEOAchems: str_chem_text = xml_EOAchem.text libeoaconvert.progress(int_EOAchem_number, len(xmlEOAchems),"Processing EOAchem %s of %s." % (int_EOAchem_number, len(xmlEOAchems))) str_chem_text = os.linesep.join([s for s in str_chem_text.splitlines() if s]) str_tex_chem = str_tex_chem + "\ce{" + str_chem_text + "}\n\\newpage\n" # Add int_EOAchem_running_order : Filename to dictionary strFilename = "EOAchem_" + str(intChapterNumber) + "_" + str(int_EOAchem_number) dictEOAchems[int_EOAchem_running_order] = strFilename # Prepare XML tmpTail = xml_EOAchem.tail xml_EOAchem.clear() xml_EOAchem.tail = tmpTail xml_EOAchem.set("src", strFilename + ".png") xml_EOAchem.set("TeX", str_chem_text) # increment integers int_EOAchem_running_order += 1 int_EOAchem_number +=1 tmp = open(TEMPLATE_PATH / "formula.tex", "r") Template = tmp.read() tmp.close() # Get tmp-directory for this user account # tmpDir = os.getenv("TMPDIR") # use local tmpdir formula_tmp_dir = TEMP_DIR / "formulas2png/" # Make directory items if it doesn't already exist items_dir = OUTPUT_DIR / "items" if not os.path.exists( items_dir ): os.mkdir( items_dir ) s = string.Template(Template) e = s.substitute(DERINHALT=str_tex_chem) tmpFile = formula_tmp_dir / "EOAchem.tex" tmp = open(tmpFile, "w") tmp.write(e) tmp.close() logging.info("Typesetting all inline Chemical formulas") Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute()) Argumente = shlex.split(Kommando) Datei = open(TEMP_DIR / 'xelatex-run.log', 'w') Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) logging.info("Splitting all Inline Chemical formulas") libeoaconvert.pdf_burst("EOAchem.pdf", formula_tmp_dir) logging.info("Converting %s split pages into PNG-Images" % len(dictEOAchems.keys())) counter_dictEOAchems = 1 for intRunningOrder in dictEOAchems.keys(): # provide more status information here in output! libeoaconvert.progress(counter_dictEOAchems, len(dictEOAchems.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAchems, len(dictEOAchems.keys()))) Kommando = "{cmd} {arg1} {arg2}".format( cmd=PDFCROP_EXEC, arg1=(formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(), arg2=(formula_tmp_dir / (dictEOAchems[intRunningOrder] + ".pdf")).absolute() ) # Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf" Argumente = shlex.split(Kommando) subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format( cmd=GM_PATH, arg1 = (formula_tmp_dir / (dictEOAchems[intRunningOrder] + ".pdf")).absolute(), arg2 = (items_dir / (dictEOAchems[intRunningOrder] + ".png")).absolute() ) # Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAchems[intRunningOrder] + ".png" Argumente = shlex.split(Kommando) subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) counter_dictEOAchems += 1 else: logging.info("Found no EOAchem. Continuing") def process_figures( xmlChapters ): dictFigures = {} for xmlChapter in xmlChapters: Figurenumber = 1 xmlFigures = xmlChapter.xpath(".//EOAfigure | .//EOAlsfigure") for xmlFigure in xmlFigures: shortcaption = xmlFigure.find("shortcaption") if shortcaption and shortcaption.text == "1": shortcaption.tag = "temp" xmlAnchor = xmlFigure.find("anchor") # Check if Figure is in a numbered Chapter # Otherwise just put the Number of the figure if xmlChapter.get('id'): dictFigures[xmlAnchor.get('id')] = \ str(dictChapters[xmlChapter.get('id')]) + "." + str(Figurenumber) else: dictFigures[xmlAnchor.get('id')] = str(Figurenumber) xmlFigure.set("id", xmlAnchor.get("id")) Figurenumber += 1 return dictFigures def number_theorems( xmlChapters ): dictTheorems = {} for xmlChapter in xmlChapters: xmlTheorems = xmlChapter.findall(".//theorem") for xmlTheorem in xmlTheorems: strUID = xmlTheorem.get("id") strNumber = xmlTheorem.get("id-text") dictTheorems[strUID] = strNumber return dictTheorems def number_sections( xmlChapters ): dictSections = {} intChapterNumber = 1 for xmlChapter in xmlChapters: strUID = xmlChapter.get("id") #dictChapters[strUID] = str(intChapterNumber) xmlSections = xmlChapter.findall("div2") intSectionNumber = 1 for xmlSection in xmlSections: if xmlSection.get("rend") == "nonumber": continue strUID = xmlSection.get("id") if xmlChapter.get("rend") != "nonumber": dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) if xmlChapter.get("rend") == "nonumber": dictSections[strUID] = str(intSectionNumber) xmlSubsections = xmlSection.findall("div3") intSubsectionNumber = 1 for xmlSubsection in xmlSubsections: if xmlSubsection.get("rend") == "nonumber": continue strUID = xmlSubsection.get("id") if xmlChapter.get("rend") != "nonumber": dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) + "." + str(intSubsectionNumber) if xmlChapter.get("rend") == "nonumber": dictSections[strUID] = str(intSectionNumber) + "." + str(intSubsectionNumber) intSubsectionNumber += 1 intSectionNumber += 1 if xmlChapter.get("rend") != "nonumber": intChapterNumber += 1 return dictSections def number_footnotes( xmlChapters ): dictFootnotes = {} for xmlChapter in xmlChapters: intNoteNumber = 1 xmlFootnotes = xmlChapter.findall(".//note") for xmlFootnote in xmlFootnotes: strUID = xmlFootnote.get("id") dictFootnotes[strUID] = str(intNoteNumber) intNoteNumber += 1 return dictFootnotes def number_lists( xmlChapters ): dictLists = {} for xmlChapter in xmlChapters: xmlListitems = xmlChapter.findall(".//item") for xmlListitem in xmlListitems: strUID = xmlListitem.get("id") strItemNumber = xmlListitem.get("id-text") dictLists[strUID] = strItemNumber return dictLists def process_page_references( latex_dir, set_citations ): dictPagelabels = {} listAuxFiles = glob.glob( str(latex_dir / "*.aux") ) if len(listAuxFiles) == 0: raise( Exception("No aux file found. Exiting") ) else: for strFile in listAuxFiles: tmpFile = open(strFile, "r") lines = tmpFile.readlines() tmpFile.close() for line in lines: # hyperref makes the lines much much longer # \newlabel{facsim033}{{\caption@xref {facsim033}{ on input line 37}}{231}{Secondary Literature}{figure.caption.87}{}} # \newlabel{BL}{{\caption@xref {facsim033}{ on input line 37}}{231}{Secondary Literature}{figure.caption.87}{}} # \newlabel{BL}{{1.1}{4}{Forschungsüberblick zur Literatur über Alvarus Thomas}{section.1.1}{}} # \newlabel{BL}{{1.1}{4}} matched_label = re.match(r'\\newlabel\{(.*?)\}\{\{(.*?)\}\{(.*?)\}\}\{(.*?)\}', line) # matchObjectLabel = re.match(r'\newlabel\{(.*?)\}', line) if matched_label: # matchObjectPage = re.match(r'(.*?)\}\{(\d{1,})\}\}$', line) # if matchObjectPage: dictPagelabels[matched_label.group(1)] = matched_label.group(4) # parsing out information on cite works matched_citation = re.match(r'\\abx@aux@cite{(.*?)}', line) if matched_citation is not None: set_citations.add(matched_citation.group(1)) return dictPagelabels def number_tables( xmlChapters ): dictTables = {} for intChapterNumber, xmlChapter in enumerate(xmlChapters): intTableNumber = 1 xmlTables = xmlChapter.findall(".//EOAtable") for xmlTable in xmlTables: xmlTableLabel = xmlTable.find(".//EOAtablelabel") xml_table_id = xmlTable.xpath(".//table/@id")[0] strTableCaption = xmlTable.find(".//EOAtablecaption").text if strTableCaption == "nonumber": continue if not xmlTableLabel.text or xmlTableLabel.text == "": xmlTableLabel.text = "table" + str(intChapterNumber) + str(intTableNumber) # the question is vexed: label or id! # strUID = xmlTableLabel.text strUID = xml_table_id logging.debug(f"XML table label: {strUID}") if xmlChapter.get("rend") != "nonumber": dictTables[strUID] = dictChapters[xmlChapter.get('id')] + "." + str(intTableNumber) elif xmlChapter.get("rend") == "nonumber": dictTables[strUID] = str(intTableNumber) intTableNumber += 1 logging.debug(f"Tables in this chapter: {dictTables}.") return dictTables def process_references(xmlTree): """Use id rather than label string in references""" eoarefs = xmlTree.xpath("//EOAref") for ref in eoarefs: ref.set("type", "number") ref_element = ref.find("./ref") target_attribute = ref.xpath("./ref/@target") ref_label = ref.find("./Label").text if not target_attribute: logging.debug(ref_label) try: the_label = xmlTree.xpath(f"//*[@place='{ref_label}']/@id")[0] except IndexError: ref_label = ref_label.replace("_", "") the_label = xmlTree.xpath(f"//*[@place='{ref_label}']/@id")[0] ref_element.set("target", the_label) else: pass # def process_references ends here ############################################################## # Preparing the Bibliography # ############################################################## def bibl_info_from_xml( xmlTree ): if xmlTree.find(".//EOAbibliographydatabase") is not None: bib_database = xmlTree.find(".//EOAbibliographydatabase").text else: return None bib_type = xmlTree.find(".//EOAbibliographytype").text if bib_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]: raise( Exception(f"The bibtype must be one of {','.join[bib_type]}. Exiting") ) return (bib_type, bib_database) # .bib -> .json # (return json data as python dict) def write_json_bibl( bib_database, output_file, ): # the new solution: pandoc-citeproc # interim_bib_json_file = INPUT_PATH_NO_EXT + "-bib.json" citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib" logging.debug(f"Running citeproc with the following command: {citeproc_command}") citeproc_arguments = shlex.split(citeproc_command) env = os.environ.copy() env['LANG'] = 'en_US.UTF-8' citeproc_process = subprocess.Popen( citeproc_arguments, env = env, stdout=subprocess.PIPE ) citeproc_json = citeproc_process.stdout.read() citations_json = json.loads(citeproc_json) # for x in citations_json: # print(x["title"]) logging.debug(f"Dumping bib json file: {output_file}.") with open(output_file, 'w') as ibjf: json.dump(citeproc_json.decode('utf-8'), ibjf) return citations_json def insert_bibliographies( xml_element, language, citations_json, ## paths: bib_file, tex_template, temp_dir, output_file, log_dir, ): keyword_to_print_bibl_el = find_print_bibliography( xml_element, citations_json ) # if not language: # logging.warning("Missing a language. Falling back to English.") # language = "en" logging.info( "keywords:" ) logging.info( keyword_to_print_bibl_el.keys() ) if len(keyword_to_print_bibl_el) > 0: logging.debug(f"Found {libeoaconvert.plural(len(keyword_to_print_bibl_el), 'bibliography', plural='bibliographies')}.") citekeys = set(citekeys_from_xml( xml_element )) bib_keywords_sanity_check( keyword_to_print_bibl_el, citations_json, citekeys ) tei_bibl_file = (temp_dir / output_file.name) . with_suffix( ".tei" ) bib2html.bib2tei( bib_file = bib_file, citekeys = citekeys, tex_template = tex_template, language = language, temp_dir = temp_dir, output_file = tei_bibl_file, log_dir = log_dir, keywords = keyword_to_print_bibl_el.keys() ) bib2html.teibib_to_eoa1( tei_bibl_file, output_file = output_file ) htmlbib_tree = etree.parse( str(output_file) ) formatted_bibliographies = {} for keyword in keyword_to_print_bibl_el.keys(): formatted_bibliographies[keyword] = \ htmlbib_tree.xpath("//div[@id = 'refs' and @class = 'references']")[0] add_bibliography_to_xml( keyword_to_print_bibl_el, xml_element, citations_json, formatted_bibliographies ) else: # create an empty file logging.debug("No bibliography found.") # open(TEMP_DIR / (tmp_citation_filename + "_nocitations"), 'a').close() return keyword_to_print_bibl_el def citekeys_from_xml( chapter_element ): citekeys = chapter_element.xpath(".//citekey/text()") nocite_elements = chapter_element.xpath(".//nocite") logging.info( "citekeys: ") logging.info( len( citekeys ) ) if nocite_elements: logging.debug(f"Found {libeoaconvert.plural(len(nocite_elements), 'nocite command')}.") nocitekeys = [] for nocite in nocite_elements: keys = [x.strip() for x in nocite.text.split(",")] nocitekeys += keys nocite.tag = "elementtobestripped" logging.debug(f"Found {libeoaconvert.plural(len(set(nocitekeys)), 'nocite key')}.") logging.debug(f"Adding nocite keys to the other cite keys.") citekeys += nocitekeys return citekeys def find_print_bibliography( xml_context_element, citations_json ): """ return a dict keyword -> print_bibl_el """ ret_list = {} print_bibl_elements = xml_context_element.findall(".//EOAprintbibliography") if len(print_bibl_elements) == 0: logging.error("No EOAprintbibliography found. Maybe you commented it out? Exiting") sys.exit(0) else: for print_bibl_el in print_bibl_elements: keyword = print_bibl_el.get("keyword") if keyword is not None: ret_list[keyword] = print_bibl_el else: ret_list[''] = print_bibl_el return ret_list def bib_keywords_sanity_check( keyword_to_print_bibl_el, # print_bibl_elements, citations_json, citekeys ): if len(keyword_to_print_bibl_el) == 1 and list(keyword_to_print_bibl_el.keys())[0] != "": logging.warning("Found a keyword in bibliography although there is only one.") for keyword, print_bibl_element in keyword_to_print_bibl_el.items(): logging.debug( f"print_bibl_element: {etree.tostring(print_bibl_element)}" ) if len(keyword_to_print_bibl_el) > 1 and keyword == "": logging.error(f"No bibliography keyword found. Since there is more than one bibliography, all bibliographies are required to have a keyword. Exiting ") sys.exit(1) if keyword != "": logging.debug(f"Found bibliography keyword {keyword}") # just for debugging (?): logging.info("We want to collect the entries matching the keywords from the database.") citations_with_keyword = [] citations_without_keyword = [] # citations_with_keyword = [x["id"] for x in citations_json if keyword in x["keyword"]] for cj in citations_json: try: if keyword in cj["keyword"]: citations_with_keyword.append(cj["id"]) except KeyError: logging.warning(f"Index entry {cj['id']} has no keyword. As long as it is not cited, this is no problem.") citations_without_keyword.append(cj["id"]) pass logging.debug(f"Found {libeoaconvert.plural(len(citations_with_keyword), 'citation')} with keyword {keyword} in database.") cited_works_without_keyword = [x for x in citations_without_keyword if x in citekeys] if cited_works_without_keyword: logging.error(f"Found {libeoaconvert.plural(len(cited_works_without_keyword), 'work')} that are cited but have no keyword. Please assign one.") sys.exit(1) citations_to_format = [x for x in citations_with_keyword if x in citekeys] logging.debug(f"Found {libeoaconvert.plural(len(citations_to_format), 'citation')} with keyword {keyword} that are actually cited.") def add_bibliography_to_xml( keyword_to_print_bibl_el, chapter_element, citations_json, formatted_bibliographies ): for keyword, print_bibl_el in keyword_to_print_bibl_el.items(): formatted_bibl = formatted_bibliographies[keyword] logging.debug( f"insert formatted bibliography for keyword {keyword}:" ) logging.debug( etree.tostring( formatted_bibl ) ) fixed_entries = libeoaconvert.fix_bib_entries( formatted_bibl ) print_bibl_el.clear() if keyword != "" : print_bibl_el.set("keyword", keyword) print_bibl_el.tag = "div" bib_parent = print_bibl_el.getparent() bib_parent.tag = "div" for entry in fixed_entries: print_bibl_el.append(entry) ############################################################## # actual script # ############################################################## xmlTree = parseXML( input_file = XML_FILE ) process_references(xmlTree) xmlChapters = xmlTree.findall("//div1") logging.info("-----------------------------------------------------") logging.info("Move EOAlanguage from <head> into attribute of EOAchapter") xmltrans_move_eoalanguage( xmlChapters ) strSerie = get_series( xmlTree ) if strSerie == "Essay": strNumberingType = "essay" else: strNumberingType = "regular" set_citations = set() logging.info("-----------------------------------------------------") logging.info("Numbering Chapters") dictChapters = number_chapters( xmlChapters ) # print( dictSections ) logging.info("-----------------------------------------------------") logging.info("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations") dictEquations = process_equations( xmlChapters, dictChapters, strNumberingType ) logging.info("-----------------------------------------------------") logging.info("Processing .//EOAequationnonumber | .//EOAequationarraynonumber") process_unnumbered_equations( xmlChapters ) logging.info("-----------------------------------------------------") logging.info("Converting EOAineq") process_inline_equations( xmlChapters ) logging.info("-----------------------------------------------------") logging.info("Converting EOAchem") process_eoachem( xmlChapters ) logging.info("-----------------------------------------------------") logging.info("EOAFigure Numbering per Chapter") dictFigures = process_figures( xmlChapters ) logging.info("-----------------------------------------------------") logging.info( "Numbering Theorems" ) dictTheorems = number_theorems( xmlChapters ) logging.info("-----------------------------------------------------") logging.info("Section, Subsection,... Numbering per Chapter") dictSections = number_sections( xmlChapters ) logging.info("-----------------------------------------------------") logging.info("Numbering of Footnotes per Chapter") dictFootnotes = number_footnotes( xmlChapters ) # here was OU's footnote code, now in libeoaconvert # def get_bigfoot_data(chapter) # bigfoot needs to be integrated into # 'fndict': {'uid11': '2', 'uid12': '3', 'uid9': '1'}, logging.info("-----------------------------------------------------") logging.info("Numbering of Lists per Chapter") dictLists = number_lists( xmlChapters ) logging.info("-----------------------------------------------------") logging.info("Working on Page Numbers for References") dictPagelabels = process_page_references( LATEX_DIR, set_citations ) logging.info("page labels:") logging.info(dictPagelabels) logging.info("citations:") logging.info(set_citations) logging.info("-----------------------------------------------------") logging.info("Numbering of Tables per Chapter") dictTables = number_tables( xmlChapters ) ############################################################## # Preparing the Bibliography # ############################################################## bibl_info = bibl_info_from_xml( xmlTree ) if bibl_info is None: logging.warning("No bibliography database found.") elif args.no_bib4ht: logging.warning("Proceeding without typesetting bibligraphy.") else: (bib_type, bib_database) = bibl_info logging.debug(f"bib type is {bib_type}") logging.info( ".bib -> .json") citations_json = write_json_bibl( INPUT_DIR / bib_database, output_file = TEMP_DIR / (INPUT_PATH.stem + "-bib.json") ) ## only for debugging (?) make_latex_bibl_file( bib_database = bib_database, set_citations = set_citations, files = [ (TEMPLATE_PATH / "largebib.tex", DEBUG_DIR / "debug_onlybib.tex"), (TEMPLATE_PATH / "largebib-xml.tex", DEBUG_DIR / "debug_onlybib-xml.tex"), ] ) # If Bibliography-Type is monograph search for EOAbibliography and make it all if bib_type == "monograph": keyword_to_print_bibl_el = insert_bibliographies( xmlTree, # use language of the first chapter: xmlChapters[0].get( "language" ), citations_json, ## paths: bib_file = (INPUT_DIR / bib_database).with_suffix( ".bib" ), tex_template = BASE_DIR / "data" / "aux" / "bibliography4ht.tex", temp_dir = TEMP_DIR / "bib2html" / "monograph-tmp", output_file = TEMP_DIR / "bib2html" / "used_citations-monograph.html", log_dir = LOG_DIR / SCRIPT_NAME / "bib2html", ) # If Bibliography-Type is anthology search for EOAbibliography and make one per chapter elif bib_type == "anthology": for intChapterNumber, xmlChapter in enumerate(xmlChapters, start = 1): logging.debug(f"Looking at chapter {intChapterNumber}.") keyword_to_print_bibl_el = insert_bibliographies( xmlChapter, xmlChapter.get("language"), citations_json, ## paths: bib_file = (INPUT_DIR / bib_database).with_suffix( ".bib" ), tex_template = BASE_DIR / "data" / "aux" / "bibliography4ht.tex", temp_dir = TEMP_DIR / "bib2html" / "chapter_{:02d}-tmp".format( intChapterNumber ), output_file = TEMP_DIR / "bib2html" / "used_citations-anthology-chapter_{:02d}.html".format( intChapterNumber ), log_dir = LOG_DIR / SCRIPT_NAME / "bib2html" ) """ <div2 rend="nonumber"> <head>References</head> <div> <EOAprintbibliography/> <div> <p class="bibliography">Abril Castelló, Vidal (1987). Las Casas contra Vitoria, 1550–1552: La revolución de la duodécima réplica. Causas y consecuencias. <i>Revista de Indias</i> 47(179):83–101.</p> <p class="bibliography">Agrawal, Arun (1995). Dismantling the Divide Between Indigenous and Scientific Knowledge. <i>Development and Change</i> 26:413–439.</p> </div> </div> </div2> """ # for the time being strCitation = "" # Bibliographies are done, now for the citations # turn # <EOAciteyear><citekey>Renn2012a</citekey><page/></EOAciteyear> # into # <span rel="popover" class="citation" citekey="Renn2012a" data-toggle="popover" html="true" data-placement="bottom" data-title="Renn 2012" data-content="The Globalization of Knowledge in History">2012</span> if bib_type == "anthology" or bib_type == "monograph": if bib_type == "monograph": tmp_citation_filename = "used_citations-monograph" tmp_path_html = (TEMP_DIR / "bib2html" / tmp_citation_filename) .with_suffix( ".html" ) with open(tmp_path_html, "r") as formatted_citations: form_cit = BeautifulSoup(formatted_citations, "html.parser") intChapterNumber = 1 for xmlChapter in xmlChapters: logging.info("-----------------------------------------------------") logging.info("Processing References for Chapter " + str(intChapterNumber)) xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual") logging.debug(f"Found {libeoaconvert.plural(len(xmlCitations), 'citation')}.") if bib_type == "anthology": tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) tmp_path_html = (TEMP_DIR / "bib2html" / tmp_citation_filename ) .with_suffix( ".html" ) logging.debug(f"Looking for file {tmp_path_html}.") # no_cite_path = TEMP_DIR / "bib2html" / (tmp_citation_filename + "_nocitations") if os.path.exists(tmp_path_html): with open(tmp_path_html, "r") as formatted_citations: form_cit = BeautifulSoup(formatted_citations, "html.parser") else: logging.debug("no citations in this chapter") intChapterNumber += 1 continue ''' elif os.path.exists(no_cite_path): logging.debug("no citations in this chapter") intChapterNumber += 1 continue ''' counter_citations = 1 for xmlCitation in xmlCitations: string_citekey = xmlCitation.find("./citekey").text libeoaconvert.progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey)) # If Bibliography-Type is anthology find Refsection for this Chapter ############### # old version # ############### # if bib_type == "anthology": # print("Yes, it's anthology time!") # xmlRefsections = xmlBibTree.findall(".//refsection") # for xmlRefsection in xmlRefsections: # if xmlRefsection.find(".//number").text == str(intChapterNumber): # break # xml_bib_entries = xmlRefsection.findall(".//entry") ################### # end old version # ################### # If Bibliography-Type is monograph find all entries, forget about refsection ############### # new version # ############### # string_citekey = xmlCitation.find("./citekey").text for entry in citations_json: if entry["id"] == string_citekey: current_citation = entry strTitle = current_citation["title"] # [1:-1] to remove parentheses around citations try: citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text strTitle = form_cit.select("#citefull ~ p[data-cites='%s']" % string_citekey)[0].text # strTitle_element = form_cit.select("#citefull ~ p[data-cites='%s']" % string_citekey)[0] # citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] except IndexError: logging.error("Could not find {}. Exiting.".format(string_citekey)) sys.exit() data_title_value = citeauthoryear_value if xmlCitation.tag == "EOAciteauthoryear": strCitation = citeauthoryear_value elif xmlCitation.tag == "EOAciteyear": strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text elif xmlCitation.tag == "EOAcitemanual": cite_text = xmlCitation.find("citetext") if cite_text.getchildren(): tmp_string = xmlCitation.find("citetext") tmp_string = cite_text.getchildren()[0] strCitation = etree.tostring(tmp_string) # BAUSTELLE!!!!! # tmp_string2 = etree.tostring(tmp_string) # tmp_string3 = tmp_string2.decode() # strCitation = tmp_string3.replace("<", "<") else: strCitation = xmlCitation.find("citetext").text logging.info( "formatted citation: {}".format( strCitation ) ) if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: pages_text = libeoaconvert.gettext(xmlCitation.find("./page")) strCitation = strCitation + ", " + pages_text data_title_value = data_title_value + ", " + pages_text # strCitation = strCitation + ", " + xmlCitation.find("./page").text ###################### # end of new version # ###################### # Hier den XML-Tag durch die Quellenangabe ersetzen tmpTail = xmlCitation.tail xmlCitation.clear() if args.eoa_classic: xmlCitation.tag = "span" xmlCitation.set("rel", "popover") xmlCitation.set("class", "citation") # Create Link to be used for website in a popover xmlCitation.set("data-toggle", "popover") xmlCitation.set("html", "true") xmlCitation.set("data-placement", "bottom") else: # this is taken from tei2imxml! """ <a class="publications-popup-text" data-title="Halliday and Resnick 1977, 232" data-content="Physics">Halliday and Resnick 1977, 232</a>""" xmlCitation.tag = "a" xmlCitation.set("class", "publications-popup-text") # citation.set("data-content", cited_data[citekey][2]) # citation.text = sanitized_citation_string # end of taken from imxml # str_title_element = reduce_element_tag(etree.fromstring(str(strTitle_element))) # element_string = etree.tostring(str_title_element) # strTitle = libeoaconvert.escape_xml(element_string[3:-4]) try: xmlCitation.set("data-content", strTitle) except: xmlCitation.set("data-content", "missing") xmlCitation.text = strCitation xmlCitation.tail = tmpTail xmlCitation.set("citekey", string_citekey) xmlCitation.set("data-title", data_title_value) counter_citations += 1 intChapterNumber += 1 # If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all if bib_type == "monograph-numeric": if xmlTree.find(".//EOAprintbibliography") is not None: dictCitekeysNumbers = {} dictCitekeysTitles = {} xmlBibliography = xmlTree.find(".//EOAprintbibliography") xmlBibliography.clear() xmlBibliography.tag = "div" xmlBibliography.getparent().tag = "div" xml_bib_entries = xmlBibTree.findall(".//entry") intNumberOfEntry = 1 for xmlEntry in xml_bib_entries: # Go through all entries and assign a number to the citekey bibEntry = Bibitem(xmlEntry) strCitekey = bibEntry.citekey() dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry) dictCitekeysTitles[strCitekey] = str(bibEntry.title()) strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>" xmlNew = etree.fromstring(strNewentry) xmlBibliography.append(xmlNew) intNumberOfEntry += 1 # Now for the references via EOAcitenumeric xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric") for xmlCitenumeric in xmlCitenumerics: logging.info(etree.tostring(xmlCitenumeric)) strPopover = "" tmpCitekeys = xmlCitenumeric.find(".//citekey").text tmpCitekeys = re.sub(" ", "", tmpCitekeys) tmpCitekeys = re.sub("\n", "", tmpCitekeys) listCitekeys = re.split("\,", tmpCitekeys) listCitenumbers = [] for strCitekey in listCitekeys: listCitenumbers.append(dictCitekeysNumbers[strCitekey]) # Create Text to be used on the website in a popover strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " listCitenumbers = sorted(listCitenumbers, key=int) strResult = "[" + listCitenumbers[0] intNumberOfSequentialCite = 0 for i in range(1,len(listCitenumbers)): intPreviousCitenumber = int(listCitenumbers[i-1]) intCurrentCitenumber = int(listCitenumbers[i]) if i == (len(listCitenumbers)-1): if (intPreviousCitenumber + 1) == intCurrentCitenumber: if intNumberOfSequentialCite == 0: strResult = strResult + "," + str(listCitenumbers[i]) else: strResult = strResult + "-" + str(listCitenumbers[i]) intNumberOfSequentialCite == 0 else: strResult = strResult + "," + str(listCitenumbers[i]) break intNextCitenumber = int(listCitenumbers[i+1]) if (intCurrentCitenumber + 1) != intNextCitenumber: if intNumberOfSequentialCite != 0: strResult = strResult + "-" + str(intCurrentCitenumber) intNumberOfSequentialCite = 0 else: strResult = strResult + "," + str(intCurrentCitenumber) continue if (intPreviousCitenumber + 1) == intCurrentCitenumber: intNumberOfSequentialCite += 1 continue else: strResult = strResult + "," + str(intCurrentCitenumber) intNumberOfSequentialCite = 0 strResult = strResult + "]" xmlCitenumeric.text = strResult # Create Link to be used for website xmlCitenumeric.set("data-toggle", "popover") xmlCitenumeric.set("html", "true") xmlCitenumeric.set("data-content", strPopover) xmlCitenumeric.set("class","citation") xmlCitenumeric.set("data-placement", "bottom") xmlCitenumeric.set("data-title", strResult) # author is missing! # print("xmlBibliography") # print(etree.tostring(xmlBibliography)) # input() # Numeric citations for the individual chapters if bib_type == "anthology-numeric": intChapterNumber = 1 for xmlChapter in xmlChapters: logging.info("Processing Bibliography") if xmlChapter.find(".//EOAprintbibliography") is not None: dictCitekeysNumbers = {} dictCitekeysTitles = {} xmlBibliography = xmlChapter.find(".//EOAprintbibliography") #xmlBibliography.clear() xmlBibliography.tag = "div" xmlBibliography.getparent().tag = "div" xmlRefsections = xmlBibTree.findall(".//refsection") for xmlRefsection in xmlRefsections: if xmlRefsection.find(".//number").text == str(intChapterNumber): break xml_bib_entries = xmlRefsection.findall(".//entry") intNumberOfEntry = 1 for xmlEntry in xml_bib_entries: # Go through all entries and assign a number to the citekey bibEntry = Bibitem(xmlEntry) strCitekey = bibEntry.citekey() dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry) dictCitekeysTitles[strCitekey] = str(bibEntry.title()) strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>" xmlNew = etree.fromstring(strNewentry) xmlBibliography.append(xmlNew) intNumberOfEntry += 1 # Now for the references via EOAcitenumeric xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear") logging.info("Found numeric citation in chapter " + str(intChapterNumber)) for xmlCitenumeric in xmlCitenumerics: strPopover = "" tmpCitekeys = xmlCitenumeric.find(".//citekey").text tmpCitekeys = re.sub(" ", "", tmpCitekeys) tmpCitekeys = re.sub("\n", "", tmpCitekeys) logging.info(tmpCitekeys) listCitekeys = re.split("\,", tmpCitekeys) listCitenumbers = [] for strCitekey in listCitekeys: logging.info(strCitekey) listCitenumbers.append(dictCitekeysNumbers[strCitekey]) # Create Text to be used on the website in a popover strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " listCitenumbers = sorted(listCitenumbers, key=int) strResult = "[" + listCitenumbers[0] intNumberOfSequentialCite = 0 for i in range(1,len(listCitenumbers)): intPreviousCitenumber = int(listCitenumbers[i-1]) intCurrentCitenumber = int(listCitenumbers[i]) if i == (len(listCitenumbers)-1): if (intPreviousCitenumber + 1) == intCurrentCitenumber: if intNumberOfSequentialCite == 0: strResult = strResult + "," + str(listCitenumbers[i]) else: strResult = strResult + "-" + str(listCitenumbers[i]) intNumberOfSequentialCite == 0 else: strResult = strResult + "," + str(listCitenumbers[i]) break intNextCitenumber = int(listCitenumbers[i+1]) if (intCurrentCitenumber + 1) != intNextCitenumber: if intNumberOfSequentialCite != 0: strResult = strResult + "-" + str(intCurrentCitenumber) intNumberOfSequentialCite = 0 else: strResult = strResult + "," + str(intCurrentCitenumber) continue if (intPreviousCitenumber + 1) == intCurrentCitenumber: intNumberOfSequentialCite += 1 continue else: strResult = strResult + "," + str(intCurrentCitenumber) intNumberOfSequentialCite = 0 strResult = strResult + "]" xmlCitenumeric.text = strResult # Create Link to be used for website in a popover xmlCitenumeric.set("data-toggle", "popover") xmlCitenumeric.set("data-placement", "bottom") xmlCitenumeric.set("data-title", " " + strResult) xmlCitenumeric.set("data-content", strPopover) xmlCitenumeric.set("class","citation") intChapterNumber += 1 # index must be child of div0! # def do_something_funny_about_indices(): # print_bibl_elements = xmlTree.findall(".//EOAprintbibliography") # if len(print_bibl_elements) > 0: # bib_parent_element = print_bibl_elements[0].getparent() # # bib_parent_element = xmlBibliographies[0].getparent() # upper_div = bib_parent_element.xpath("./ancestor::div1")[0] # previous_div0 = upper_div.getparent() # # possible culprit for not finding the index # # other_content = bib_parent_element.xpath(".//EOAtocentry | .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex") # other_content = upper_div.xpath(".//EOAtocentry | .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex") # if len(other_content) > 0: # for element in other_content: # previous_div0.append(element) # else: # logging.debug("Nothing funny about indices.") # do_something_funny_about_indices() def make_indices_child_of_div0(): """Move index commands to a higher location""" index_sections = xmlTree.xpath(" .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex") logging.debug(f"Found {libeoaconvert.plural(len(index_sections), 'index section')}.") if index_sections: for section in index_sections: try: parent_div = section.xpath("./ancestor::div0")[0] except IndexError: logging.warning("Index is not embedded in div0, but div1 ") parent_div = section.xpath("./ancestor::div1")[0] parent_div.append(section) libeoaconvert.debug_xml_here( xmlTree, "indexmover", DEBUG_DIR) make_indices_child_of_div0() etree.strip_tags(xmlTree, "tagtobestripped") etree.strip_elements(xmlTree, "elementtobestripped", with_tail=False) # here followed the conversion to epub and the conversion to django.xml # both parts were removed and put into separate files. intermediate_file_pre = TEMP_DIR / "IntermediateXMLFile_pre.xml" intermediate_file = OUTPUT_DIR / "IntermediateXMLFile.xml" ergebnisdatei = open(intermediate_file_pre, "w") ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode") ergebnisdatei.write(ergebnis) ergebnisdatei.close() # replacing a milestone element by a closing and opening combination with open(intermediate_file_pre, 'r') as tmp_file: filedata = tmp_file.read() # add XML declaration filedata_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n" + filedata # Replace the target string filedata_clean = filedata_declaration.replace('<msparbreak/>', '</p><p>') # Write the file out again with open(intermediate_file, 'w') as outfile: outfile.write(filedata_clean) # saving some data data_to_pickle = {'chapterdict' : dictChapters, 'eqdict' : dictEquations, 'listdict' : dictLists, 'theoremdict' : dictTheorems, 'figdict' : dictFigures, 'secdict' : dictSections, 'fndict' : dictFootnotes, 'tabdict' : dictTables, 'pagelabeldict' : dictPagelabels } with open(TEMP_DIR / 'data.pickle', 'wb') as f: # Pickle the 'data' dictionary using the highest protocol available. pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL) grep_command = "grep -A1 -B2 'argument of \\\EOAfn' {}".format( LOG_DIR / SCRIPT_NAME / (INPUT_PATH.stem + "-tralics.log") ) grep_command_arguments = shlex.split(grep_command) grep_result = subprocess.Popen(grep_command_arguments, stdout=subprocess.PIPE) grep_output = grep_result.stdout.read() if len(grep_output) > 0: logging.info("\n===\nFootnotes with paragraphs were found. They have to be replaced by the \EOAfnpar command.\n") logging.info(grep_output.decode("utf-8")) logging.info("===\n") logging.info("Removing temporary files.") cleanup() logging.info("Done!") sys.exit()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
You can’t perform that action at this time.