diff --git a/eoatex2imxml.py b/eoatex2imxml.py index a247d37..4e667f0 100755 --- a/eoatex2imxml.py +++ b/eoatex2imxml.py @@ -21,7 +21,7 @@ from utils.libeoabibitem import Bibitem import utils.libeoaconvert as libeoaconvert -from utils.load_config import load_config +from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite # imports import argparse @@ -36,19 +36,22 @@ import subprocess import sys import shutil -import time import logging import pickle from pathlib import Path +import time -BASE_DIR = Path( os.path.realpath(__file__) ).parent +BASE_DIR = Path( __file__ ).resolve().parent +SCRIPT_PATH = Path( __file__ ) +SCRIPT_NAME = SCRIPT_PATH.stem -############################################################### -# Preperation of certain files and some checks in advance -############################################################### +##################### +# Parsing arguments # +##################### -# Options for the command line: filename / configfile -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) parser.add_argument( "-c", "--config", default = BASE_DIR / "config" / "eoaconvert.cfg", @@ -56,12 +59,12 @@ ) parser.add_argument( "-l", "--log-file", - default = "eoaconvert.log", + default = Path("logs", SCRIPT_NAME).with_suffix(".log"), help="logfile" ) parser.add_argument( "--log-level", - default = "DEBUG", + default = "INFO", help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" ) parser.add_argument( @@ -79,6 +82,16 @@ required = True, help="Name of main EOATeX file (without suffix!)." ) +parser.add_argument( + "--latex-dir", + default = "./latex-out", + help="directory where to find the output generated by eoatex2pdf.py" +) +parser.add_argument( + "-o", "--output-dir", + default = "./imxml", + help="where to dump all output files" +) parser.add_argument( "-t", "--trash", help="Remove temporary files." @@ -86,28 +99,24 @@ args = parser.parse_args() -config_file = args.config -logging.debug("The configfile is %s." % config_file) +CONFIG_FILE = args.config + +print("The configfile is %s." % CONFIG_FILE) # current biber is not compatible with this code # switch TeX distribution to TeXLive2016, # run biber_2.1 -O biber2-1n.bbl $INPUT to obtain this file BIBERFILE = "biber2-1.bbl" -######################## -# Constant directories # -######################## -CONVERT_DIR = os.getcwd() + os.path.sep + "CONVERT" - ################################## # Reading the configuration file # ################################## CONFIG = load_config( - config_file, - args.log_file, + CONFIG_FILE, args.log_level, + args.log_file, ) ######################## @@ -115,8 +124,7 @@ ######################## GM_PATH = "gm" TRALICS_PATH_EXEC = "tralics" -# (part of texlive distribution): -PDFCROP_EXEC = "pdfcrop" +PDFCROP_EXEC = "pdfcrop" # (part of texlive distribution): # TL_PATH = CONFIG['Executables']['texlive'] # TEXBIN_PATH = CONFIG['Executables']['texbin'] @@ -128,19 +136,95 @@ TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path'] SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path'] +############################ +# Paths: +############################ +INPUT_DIR = Path( args.filename ).resolve().parent +INPUT_PATH_NO_EXT = args.filename +OUTPUT_DIR = Path( args.output_dir ) +LATEX_DIR = Path ( args.latex_dir ) + +CONVERT_DIR = OUTPUT_DIR / "CONVERT" +# CONVERT_DIR = os.getcwd() + os.path.sep + "CONVERT" +TEMP_DIR = OUTPUT_DIR / "tmp_files" +DEBUG_DIR = OUTPUT_DIR / "debug" + +# where to output the xml file: +XML_FILE = OUTPUT_DIR / (INPUT_PATH_NO_EXT + ".xml") + + +################################################# +# Checking for existance of tools and libraries # +################################################# # sanity check: -print("PATH: {}".format( os.environ['PATH'] )) -Kommando = f"which {GM_PATH}" -Argumente = shlex.split(Kommando) -ret = subprocess.run(Argumente).returncode -if ret != 0: - raise( Exception( f"PROGRAM not found: {GM_PATH}" ) ) -Kommando = f"which {TRALICS_PATH_EXEC}" -Argumente = shlex.split(Kommando) -ret = subprocess.run(Argumente).returncode -if ret != 0: - raise( Exception( f"PROGRAM not found: {TRALICS_PATH_EXEC}" ) ) +logging.debug("PATH: {}".format( os.environ['PATH'] )) +check_executable( GM_PATH ) +check_executable( TRALICS_PATH_EXEC ) +check_executable( PDFCROP_EXEC ) + +if not os.path.exists(TRALICS_PATH_LIB): + logging.error(f"Cannot find the Tralics configuration at {TRALICS_PATH_LIB}. Exiting.") + sys.exit() + +################################## +# Setting up various directories # +################################## + +if not os.path.exists(OUTPUT_DIR): + os.mkdir( OUTPUT_DIR ) +if not os.path.exists(TEMP_DIR): + os.mkdir( TEMP_DIR ) +if not os.path.exists( TEMP_DIR / "formulas2png" ): + os.mkdir( TEMP_DIR / "formulas2png" ) +if not os.path.exists( DEBUG_DIR ): + os.mkdir( DEBUG_DIR ) + +''' +# Check for folder and necessary files +if not os.path.exists(CONVERT_DIR): + logging.info(f"The directory {CONVERT_DIR} has not been created yet. Creating it for you") + time.sleep(1) + os.makedirs(CONVERT_DIR) +if not os.path.exists(CONVERT_DIR / "publication.cfg"): + logging.info(f"The publication.cfg file is missing in {CONVERT_DIR} directory.") + if os.path.exists(INPUT_DIR / "publication.cfg"): + shutil.copy(INPUT_DIR / "publication.cfg", CONVERT_DIR) + logging.info(f"Copied from {INPUT_DIR}.") + else: + logging.error("Found no publication.cfg. Exiting") + sys.exit() +if not os.path.exists(CONVERT_DIR / "cover.jpg"): + logging.info(f"The file cover.jpg in {CONVERT_DIR} directory is missing.") + if os.path.exists(INPUT_DIR / "Cover.jpg"): + shutil.copy("Cover.jpg", CONVERT_DIR / "cover.jpg") + logging.info("Copied from current directory.") + else: + logging.error("No coverfile found. You can create a temporary one with the mkimage.py script") + sys.exit() +# if os.path.exists(os.getcwd() + "/pre_xml.tex") == False: +# print ("pre_xml fehlt") +# sys.exit() +''' + +# Copy Support-Files from /Library/MPIWG to current directory +shutil.copy(SUPPORT_PATH / "classes.dtd", OUTPUT_DIR) +shutil.copy(SUPPORT_PATH / "mathml2-qname-1.mod", OUTPUT_DIR) +shutil.copy(SUPPORT_PATH / "mathml2.dtd", OUTPUT_DIR) +copy_dir_overwrite(SUPPORT_PATH / "html", (OUTPUT_DIR / "html")) +copy_dir_overwrite(SUPPORT_PATH / "iso8879", (OUTPUT_DIR / "iso8879")) +copy_dir_overwrite(SUPPORT_PATH / "iso9573-13", (OUTPUT_DIR / "iso9573-13")) +copy_dir_overwrite(SUPPORT_PATH / "mathml", (OUTPUT_DIR / "mathml")) +''' +shutil.copy(SUPPORT_PATH / "classes.dtd", os.getcwd()) +shutil.copy(SUPPORT_PATH / "mathml2-qname-1.mod", os.getcwd()) +shutil.copy(SUPPORT_PATH / "mathml2.dtd", os.getcwd()) +copy_dir_overwrite(SUPPORT_PATH / "html", (os.getcwd() + "/html")) +copy_dir_overwrite(SUPPORT_PATH / "iso8879", (os.getcwd() + "/iso8879")) +copy_dir_overwrite(SUPPORT_PATH / "iso9573-13", (os.getcwd() + "/iso9573-13")) +copy_dir_overwrite(SUPPORT_PATH / "mathml", (os.getcwd() + "/mathml")) +# shutil.copytree(SUPPORT_PATH + "mathml2", (os.getcwd() + "/mathml2")) +''' ######################################## # Certain functions for specific tasks # @@ -154,6 +238,7 @@ def getchildren(xmlElement): def TeX2PNG(LaTeXCode, Type, Chapter, Number): """Function to render LaTeX-Code into PNG-Files, returns PNG-Filename (epub & django)""" + # logging.info( f"TeX2PNG({LaTeXCode}, {Type}, {Chapter}, {Number})" ) # Dictionary contains Type:begin/end Types = { "EOAineq" : ["$", "$"], @@ -181,33 +266,45 @@ def TeX2PNG(LaTeXCode, Type, Chapter, Number): # Get tmp-directory for this user account # tmpDir = os.getenv("TMPDIR") # use local tmpdir - formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/" + formula_tmp_dir = TEMP_DIR / "formulas2png" # Make directory items if it doesn't already exist - if not os.path.exists(os.getcwd() + "/items"): - os.mkdir(os.getcwd() + "/items") + items_dir = OUTPUT_DIR / "items" + if not os.path.exists( items_dir ): + os.mkdir( items_dir ) s = string.Template(Template) e = s.substitute(DERINHALT=LaTeXCode) - tmpFile = formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".tex" + tmpFile = formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".tex" ) tmp = open(tmpFile, "w") tmp.write(e) tmp.close() - Kommando = "xelatex --halt-on-error " + tmpFile + Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute()) Argumente = shlex.split(Kommando) # Redirecting stderr to save XeLaTeX-Output - Datei = open('tmp_files/xelatex-run.log', 'w') + Datei = open(TEMP_DIR / 'xelatex-run.log', 'w') Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) if Ergebnis == 0: - print("Successfully converted formula " + Type + str(Chapter) + "_" + str(Number)) + logging.info("Successfully converted formula " + Type + str(Chapter) + "_" + str(Number)) if Ergebnis == 1: - print("[ERROR]: Failed to convert formula " + Type + str(Chapter) + "_" + str(Number)) - Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf" + logging.error("Failed to convert formula " + Type + str(Chapter) + "_" + str(Number)) + Kommando = "{cmd} {arg1} {arg2}".format( + cmd=PDFCROP_EXEC, + arg1=(formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf")).absolute(), + arg2=(formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf")).absolute() + ) + # Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) - Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png" + Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format( + cmd=GM_PATH, + arg1 = (formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf")).absolute(), + arg2 = (items_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".png")).absolute() + ) + # Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) + # logging.info("TeX2PNG done") return LaTeXCode # def TeX2PNG ends here @@ -235,12 +332,12 @@ def make_bibchecker(bib_database, set_citations): largebib_template_string_xml = string.Template(largebib_xml_template) largebib_replacement_xml = largebib_template_string_xml.substitute(INSERT_BIB_DATABASE = bib_database, INSERT_CITEKEYS = string_citations) - tmp_latex_file = "%s/debug/debug_onlybib.tex" % (os.getcwd()) + tmp_latex_file = DEBUG_DIR / "debug_onlybib.tex" tmp_latex = open(tmp_latex_file, "w") tmp_latex.write(largebib_replacement) tmp_latex.close() - tmp_xml_file = "%s/debug/debug_onlybib-xml.tex" % (os.getcwd()) + tmp_xml_file = DEBUG_DIR / "debug_onlybib-xml.tex" tmp_xml = open(tmp_xml_file, "w") tmp_xml.write(largebib_replacement_xml) tmp_xml.close() @@ -259,7 +356,7 @@ def createBibEntryAuthorYear(bibEntry, boolSameAuthor): """Function to create a complete Entry of a publication (epub & django) for author-year citation""" strBibEntry = "" if bibEntry.entrytype() not in libeoaconvert.allowed_bibentry_types: - print("[ERROR]: You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types))) + logging.error("You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types))) sys.exit() bool_edited_book = False @@ -319,7 +416,7 @@ def createBibEntryNumeric(bibEntry): strBibEntry = "" if bibEntry.entrytype() not in libeoaconvert.allowed_bibentry_types: - print("[ERROR]: You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types))) + logging.error("You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types))) sys.exit() strAuthor = bibEntry.fullauthorfirstlast() @@ -349,14 +446,14 @@ def pdf_burst(input_file, tmpDir): """Split PDF file into single pages""" from PyPDF2 import PdfFileWriter, PdfFileReader - input1 = PdfFileReader(open(tmpDir + input_file, "rb")) + input1 = PdfFileReader(open(tmpDir / input_file, "rb")) logging.debug("Input is %s and has %d pages." % (input_file, input1.getNumPages())) for pageno in range(input1.getNumPages()): output = PdfFileWriter() output.addPage(input1.getPage(pageno)) - output_filename = tmpDir + "EOAformulas_%d.pdf" % (pageno + 1) + output_filename = tmpDir / ("EOAformulas_%d.pdf" % (pageno + 1)) output_stream = open(output_filename, 'wb') output.write(output_stream) output_stream.close() @@ -382,18 +479,18 @@ def progress(count, total, status=''): def cleanup(): """Remove support files""" try: - os.remove((os.getcwd() + "/classes.dtd")) - os.remove((os.getcwd() + "/mathml2-qname-1.mod")) - os.remove((os.getcwd() + "/mathml2.dtd")) - shutil.rmtree((os.getcwd() + "/html")) - shutil.rmtree((os.getcwd() + "/iso8879")) - shutil.rmtree((os.getcwd() + "/iso9573-13")) - shutil.rmtree((os.getcwd() + "/mathml")) + os.remove(OUTPUT_DIR / "classes.dtd") + os.remove(OUTPUT_DIR / "mathml2-qname-1.mod") + os.remove(OUTPUT_DIR / "mathml2.dtd") + shutil.rmtree(OUTPUT_DIR / "html") + shutil.rmtree(OUTPUT_DIR / "iso8879") + shutil.rmtree(OUTPUT_DIR / "iso9573-13") + shutil.rmtree(OUTPUT_DIR / "mathml") # shutil.rmtree((os.getcwd() + "/mathml2")) logging.debug("Removed support files.") except: - print("No temporary files were found.") + logging.info("No temporary files were found.") # def cleanup ends here # Remove temporary files, neccessary for troubleshooting @@ -401,96 +498,89 @@ def cleanup(): cleanup() sys.exit() -################################################# -# Checking for existance of tools and libraries # -################################################# -if not os.path.exists(TRALICS_PATH_LIB): - print("Cannot find the Tralics configuration at %s. Exiting." % TRALICS_PATH_LIB) - sys.exit() - -################################## -# Setting up various directories # -################################## - -if not os.path.exists("tmp_files"): - os.mkdir(os.path.expanduser("tmp_files")) -if not os.path.exists("tmp_files/formulas2png/"): - os.mkdir(os.path.expanduser("tmp_files/formulas2png/")) -if not os.path.exists(os.getcwd() + "/debug"): - os.mkdir(os.getcwd() + "/debug") - -# Check for folder and necessary files -if not os.path.exists(CONVERT_DIR): - print("The directory CONVERT has not been created yet. Creating it for you") - time.sleep(1) - os.makedirs(CONVERT_DIR) -if not os.path.exists(CONVERT_DIR + os.path.sep + "publication.cfg"): - print("The publication.cfg file is missing in CONVERT directory.") - if os.path.exists(os.getcwd() + os.path.sep + "publication.cfg"): - shutil.copy("publication.cfg", CONVERT_DIR) - print("Copied from current directory.") - else: - print("Found no publication.cfg. Exiting") - sys.exit() -if not os.path.exists(CONVERT_DIR + os.path.sep + "Cover.jpg"): - print("The file Cover.jpg in CONVERT directory is missing.") - if os.path.exists(os.getcwd() + os.path.sep + "Cover.jpg"): - shutil.copy("Cover.jpg", CONVERT_DIR + os.path.sep + "cover.jpg") - print("Copied from current directory.") - else: - print("No coverfile found. You can create a temporary one with the mkimage.py script") - sys.exit() -# if os.path.exists(os.getcwd() + "/pre_xml.tex") == False: -# print ("pre_xml fehlt") -# sys.exit() - - -def copy_dir_overwrite( src, dst ): - if os.path.exists( dst ): - shutil.rmtree( dst ) - shutil.copytree( src, dst) - -# Copy Support-Files from /Library/MPIWG to current directory -shutil.copy(SUPPORT_PATH / "classes.dtd", os.getcwd()) -shutil.copy(SUPPORT_PATH / "mathml2-qname-1.mod", os.getcwd()) -shutil.copy(SUPPORT_PATH / "mathml2.dtd", os.getcwd()) -copy_dir_overwrite(SUPPORT_PATH / "html", (os.getcwd() + "/html")) -copy_dir_overwrite(SUPPORT_PATH / "iso8879", (os.getcwd() + "/iso8879")) -copy_dir_overwrite(SUPPORT_PATH / "iso9573-13", (os.getcwd() + "/iso9573-13")) -copy_dir_overwrite(SUPPORT_PATH / "mathml", (os.getcwd() + "/mathml")) -# shutil.copytree(SUPPORT_PATH + "mathml2", (os.getcwd() + "/mathml2")) - ############################################################## # Preparing the main document # ############################################################## -# Convert TeX to XML via Tralics -Kommando = "%s -log_file %s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output %s.tex" % (TRALICS_PATH_EXEC, args.filename + "-tralics.log", TRALICS_PATH_LIB, TRALICS_PATH_LIB, args.filename) -Argumente = shlex.split(Kommando) -Prozess = subprocess.call(Argumente) -logging.debug(f"Tralics command: {Kommando}") - -# Fix underscore und fix EOAtranscripted -tmpFile = open ((args.filename) + ".xml", "r") -tmpText = tmpFile.read() -tmpFile.close() +# .tex -> .xml +def run_tralics( + input_file, + TRALICS_PATH_LIB, + TRALICS_LOG_PATH, + output_dir = OUTPUT_DIR, +): + fixed_tex_file_path = output_dir / Path(input_file).name + libeoaconvert.enable_preamble( + input_file, + fixed_tex_file_path, + "xml" + ) + # other possible tralics options: + # -output_dir DIR + # -output_file FILENAME + + # Convert TeX to XML via Tralics + + logging.info( f"executing {TRALICS_PATH_EXEC}. log file: {TRALICS_LOG_PATH}" ) + # input_dir = Path(input_file).absolute().parent + exec_command( + "{cmd} -log_file {log_file} -confdir {conf_dir}/tralics_conf -config {conf_dir}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}".format( + cmd = TRALICS_PATH_EXEC, + log_file = TRALICS_LOG_PATH, + conf_dir = TRALICS_PATH_LIB, + output_dir = output_dir, + input_dir = output_dir, + input_file = input_file, + ), + ignore_fail = True # :-D + ) + ''' + exec_command( + f"{TRALICS_PATH_EXEC} -log_file {TRALICS_LOG_PATH} -confdir {TRALICS_PATH_LIB}/tralics_conf -config {TRALICS_PATH_LIB}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}", + ignore_fail = True # :-D + ) + Kommando = \ + f"{TRALICS_PATH_EXEC} -log_file {TRALICS_LOG_PATH} -confdir {TRALICS_PATH_LIB}/tralics_conf -config {TRALICS_PATH_LIB}/tralics.tcf -utf8 -utf8output {input_file}" + Argumente = shlex.split(Kommando) + Prozess = subprocess.call(Argumente) + logging.debug(f"Tralics command: {Kommando}") + ''' + +run_tralics( + input_file = INPUT_PATH_NO_EXT + '.tex', + TRALICS_PATH_LIB = TRALICS_PATH_LIB, + TRALICS_LOG_PATH = (INPUT_PATH_NO_EXT + "-tralics.log"), + output_dir = OUTPUT_DIR +) -tmpText = re.sub(r"", "_", tmpText) -tmpText = re.sub(r"", "", tmpText) -tmpFile = open ((args.filename) + ".xml", "w") -tmpFile.write(tmpText) -tmpFile.close() +def fix_underscore_and_eoatranscripted( + xml_file +): + # Fix underscore und fix EOAtranscripted + tmpFile = open (xml_file, "r") + tmpText = tmpFile.read() + tmpFile.close() + + tmpText = re.sub(r"", "_", tmpText) + tmpText = re.sub(r"", "", tmpText) + tmpFile = open (xml_file, "w") + tmpFile.write(tmpText) + tmpFile.close() + +fix_underscore_and_eoatranscripted( + xml_file = XML_FILE +) # Complete XML-Document in xmlTree xmlParser = etree.XMLParser(no_network=False,load_dtd=True) #resolve_entities=False -xmlTree = etree.parse((args.filename + ".xml"), xmlParser) +xmlTree = etree.parse(str(XML_FILE), xmlParser) xmlChapters = xmlTree.findall("//div1") # Cleanup of not needed tags in advance. To be cleaned: etree.strip_elements(xmlTree, with_tail=False, *['error']) -print("-----------------------------------------------------") -print("Move EOAlanguage from into attribute of EOAchapter") +logging.info("-----------------------------------------------------") +logging.info("Move EOAlanguage from into attribute of EOAchapter") intChapterNumber = 1 for xmlChapter in xmlChapters: xmlLanguage = xmlChapter.find(".//EOAlanguage") @@ -498,7 +588,7 @@ def copy_dir_overwrite( src, dst ): strLanguage = xmlLanguage.text or "english" xmlChapter.set("language", strLanguage) xmlLanguage.text = None - print("The language of Chapter %d is %s." % (intChapterNumber, strLanguage)) + logging.info("The language of Chapter %d is %s." % (intChapterNumber, strLanguage)) xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage") intChapterNumber += 1 @@ -510,7 +600,7 @@ def copy_dir_overwrite( src, dst ): try: strSerie = xmlTree.find(".//EOAseries").text or "regular" except AttributeError: - print("\n\nYou are most probably using the preamble for the PDF output. Exiting.") + logging.error("\n\nYou are most probably using the preamble for the PDF output. Exiting.") sys.exit() if strSerie == "Essay": @@ -531,8 +621,8 @@ def copy_dir_overwrite( src, dst ): set_citations = set() -print("-----------------------------------------------------") -print("Numbering Chapters") +logging.info("-----------------------------------------------------") +logging.info("Numbering Chapters") Chapternumber = 1 for xmlChapter in xmlChapters: if xmlChapter.get('rend') != "nonumber": @@ -542,12 +632,12 @@ def copy_dir_overwrite( src, dst ): # EOAequation, EOAsubequation and EOAequationarray Numbering per Chapter intChapterNumber = 1 -print("-----------------------------------------------------") -print("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations") +logging.info("-----------------------------------------------------") +logging.info("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations") for xmlChapter in xmlChapters: intEquationnumber = 1 xmlDinge = xmlChapter.xpath(".//EOAequation | .//EOAequationarray | .//EOAsubequations") - print("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge))) + logging.info("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge))) for xmlDing in xmlDinge: if xmlDing.tag == "EOAequationarray": # tmpNumberinArray is only being used for filename @@ -620,9 +710,9 @@ def copy_dir_overwrite( src, dst ): if xmlChapter.get("rend") == "nonumber": strTempKey = str(tmpNumberinArray) if strTempKey in tmpDictNumberLabel: - print(strTempKey) - print(tmpDictNumberLabel) - print(dictChapters) + logging.info(strTempKey) + logging.info(tmpDictNumberLabel) + logging.info(dictChapters) tmpXML.set("label", tmpDictNumberLabel[strTempKey]) xmlNew.append(tmpXML) xmlDing.getparent().replace(xmlDing, xmlNew) @@ -637,7 +727,7 @@ def copy_dir_overwrite( src, dst ): tmpI = 0 # Insert Number of this Subequation into dictEquations xmlAnchor = xmlDing.find(".//anchor") - print(xmlAnchor) + logging.info(xmlAnchor) if xmlChapter.get("rend") != "nonumber": dictEquations[xmlAnchor.get('id')] = dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber) if xmlChapter.get("rend") == "nonumber": @@ -716,12 +806,12 @@ def copy_dir_overwrite( src, dst ): intChapterNumber += 1 intChapterNumber = 1 -print("-----------------------------------------------------") -print("Processing .//EOAequationnonumber | .//EOAequationarraynonumber") +logging.info("-----------------------------------------------------") +logging.info("Processing .//EOAequationnonumber | .//EOAequationarraynonumber") for xmlChapter in xmlChapters: tempImagenumber = 1 xmlDinge = xmlChapter.xpath(".//EOAequationnonumber | .//EOAequationarraynonumber") - print("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge))) + logging.info("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge))) # print ("Working on Chapter " + str(intChapterNumber)) # print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden") for xmlDing in xmlDinge: @@ -788,8 +878,8 @@ def copy_dir_overwrite( src, dst ): continue intChapterNumber += 1 -print("-----------------------------------------------------") -print("Converting EOAineq") +logging.info("-----------------------------------------------------") +logging.info("Converting EOAineq") intChapterNumber = 1 intEOAineqRunningOrder = 1 dictEOAineqs = {} @@ -797,10 +887,10 @@ def copy_dir_overwrite( src, dst ): all_ineq = xmlTree.findall(".//EOAineq") # if all_ineq is not None: if len(all_ineq) > 0: - print("Found " + str(len(all_ineq)) + " formulas") + logging.info("Found " + str(len(all_ineq)) + " formulas") for xmlChapter in xmlChapters: - print("Chapter " + str(intChapterNumber)) + logging.info("Chapter " + str(intChapterNumber)) xmlEOAineqs = xmlChapter.findall(".//EOAineq") intEOAineqnumber = 1 for xmlEOAineq in xmlEOAineqs: @@ -848,46 +938,58 @@ def copy_dir_overwrite( src, dst ): # Get tmp-directory for this user account # tmpDir = os.getenv("TMPDIR") # use local tmpdir - formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/" + formula_tmp_dir = TEMP_DIR / "formulas2png" # Make directory items if it doesn't already exist - if not os.path.exists(os.getcwd() + "/items"): - os.mkdir(os.getcwd() + "/items") + items_dir = OUTPUT_DIR / "items" + if not os.path.exists( items_dir): + os.mkdir( items_dir ) s = string.Template(Template) e = s.substitute(DERINHALT=strTeXEquations) - tmpFile = formula_tmp_dir + "EOAinline.tex" + tmpFile = formula_tmp_dir / "EOAinline.tex" tmp = open(tmpFile, "w") tmp.write(e) tmp.close() - print("Typesetting all Inline Equations") - Kommando = "xelatex --halt-on-error " + tmpFile + logging.info("Typesetting all Inline Equations") + Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute()) Argumente = shlex.split(Kommando) - Datei = open('tmp_files/xelatex-run.log', 'w') + Datei = open(TEMP_DIR / 'xelatex-run.log', 'w') Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) - print("Splitting all Inline Equations") + logging.info("Splitting all Inline Equations") pdf_burst("EOAinline.pdf", formula_tmp_dir) - print("Converting %s split pages into PNG-Images" % len(dictEOAineqs.keys())) + logging.info("Converting %s split pages into PNG-Images" % len(dictEOAineqs.keys())) counter_dictEOAineqs = 1 for intRunningOrder in dictEOAineqs.keys(): # provide more status information here in output! progress(counter_dictEOAineqs, len(dictEOAineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys()))) - Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf" + Kommando = "{cmd} {arg1} {arg2}".format( + cmd = PDFCROP_EXEC, + arg1 = (formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(), + arg2 = (formula_tmp_dir / (dictEOAineqs[intRunningOrder] + ".pdf")).absolute() + ) + # Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) - Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png" + + Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format( + cmd = GM_PATH, + arg1 = (formula_tmp_dir / (dictEOAineqs[intRunningOrder] + ".pdf")).absolute(), + arg2 = (items_dir / (dictEOAineqs[intRunningOrder] + ".png")).absolute() + ) + #Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) counter_dictEOAineqs += 1 else: - print("Found no EOAineq. Continuing") + logging.info("Found no EOAineq. Continuing") ########### # Formula # ########### -print("-----------------------------------------------------") -print("Converting EOAchem") +logging.info("-----------------------------------------------------") +logging.info("Converting EOAchem") intChapterNumber = 1 int_EOAchem_running_order = 1 dictEOAchems = {} @@ -895,10 +997,10 @@ def copy_dir_overwrite( src, dst ): all_chem = xmlTree.findall(".//EOAchem") # if all_chem is not None: if len(all_chem) > 0: - print("Found " + str(len(all_chem)) + " chemical formulas") + logging.info("Found " + str(len(all_chem)) + " chemical formulas") for xmlChapter in xmlChapters: - print("Chapter " + str(intChapterNumber)) + logging.info("Chapter " + str(intChapterNumber)) xmlEOAchems = xmlChapter.findall(".//EOAchem") int_EOAchem_number = 1 for xml_EOAchem in xmlEOAchems: @@ -929,46 +1031,58 @@ def copy_dir_overwrite( src, dst ): # Get tmp-directory for this user account # tmpDir = os.getenv("TMPDIR") # use local tmpdir - formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/" + formula_tmp_dir = TEMP_DIR / "formulas2png/" # Make directory items if it doesn't already exist - if not os.path.exists(os.getcwd() + "/items"): - os.mkdir(os.getcwd() + "/items") + items_dir = OUTPUT_DIR / "items" + if not os.path.exists( items_dir ): + os.mkdir( items_dir ) s = string.Template(Template) e = s.substitute(DERINHALT=str_tex_chem) - tmpFile = formula_tmp_dir + "EOAchem.tex" + tmpFile = formula_tmp_dir / "EOAchem.tex" tmp = open(tmpFile, "w") tmp.write(e) tmp.close() - print("Typesetting all inline Chemical formulas") - Kommando = "xelatex --halt-on-error " + tmpFile + logging.info("Typesetting all inline Chemical formulas") + Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute()) Argumente = shlex.split(Kommando) - Datei = open('tmp_files/xelatex-run.log', 'w') + Datei = open(TEMP_DIR / 'xelatex-run.log', 'w') Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) - print("Splitting all Inline Chemical formulas") + logging.info("Splitting all Inline Chemical formulas") pdf_burst("EOAchem.pdf", formula_tmp_dir) - print("Converting %s split pages into PNG-Images" % len(dictEOAchems.keys())) + logging.info("Converting %s split pages into PNG-Images" % len(dictEOAchems.keys())) counter_dictEOAchems = 1 for intRunningOrder in dictEOAchems.keys(): # provide more status information here in output! progress(counter_dictEOAchems, len(dictEOAchems.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAchems, len(dictEOAchems.keys()))) - Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf" + Kommando = "{cmd} {arg1} {arg2}".format( + cmd=PDFCROP_EXEC, + arg1=(formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(), + arg2=(formula_tmp_dir / (dictEOAchems[intRunningOrder] + ".pdf")).absolute() + ) + # Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) - Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAchems[intRunningOrder] + ".png" + + Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format( + cmd=GM_PATH, + arg1 = (formula_tmp_dir / (dictEOAchems[intRunningOrder] + ".pdf")).absolute(), + arg2 = (items_dir / (dictEOAchems[intRunningOrder] + ".png")).absolute() + ) + # Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAchems[intRunningOrder] + ".png" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) counter_dictEOAchems += 1 else: - print("Found no EOAchem. Continuing") + logging.info("Found no EOAchem. Continuing") ############### # Formula end # ############### -print("-----------------------------------------------------") -print("EOAFigure Numbering per Chapter") +logging.info("-----------------------------------------------------") +logging.info("EOAFigure Numbering per Chapter") for xmlChapter in xmlChapters: Figurenumber = 1 xmlFigures = xmlChapter.xpath(".//EOAfigure | .//EOAlsfigure") @@ -984,8 +1098,8 @@ def copy_dir_overwrite( src, dst ): xmlFigure.set("id", xmlAnchor.get("id")) Figurenumber += 1 -print("-----------------------------------------------------") -print("Numbering Theorems") +logging.info("-----------------------------------------------------") +logging.info("Numbering Theorems") for xmlChapter in xmlChapters: xmlTheorems = xmlChapter.findall(".//theorem") for xmlTheorem in xmlTheorems: @@ -993,8 +1107,8 @@ def copy_dir_overwrite( src, dst ): strNumber = xmlTheorem.get("id-text") dictTheorems[strUID] = strNumber -print("-----------------------------------------------------") -print("Section, Subsection,... Numbering per Chapter") +logging.info("-----------------------------------------------------") +logging.info("Section, Subsection,... Numbering per Chapter") intChapterNumber = 1 for xmlChapter in xmlChapters: strUID = xmlChapter.get("id") @@ -1024,8 +1138,8 @@ def copy_dir_overwrite( src, dst ): if xmlChapter.get("rend") != "nonumber": intChapterNumber += 1 -print("-----------------------------------------------------") -print("Numbering of Footnotes per Chapter") +logging.info("-----------------------------------------------------") +logging.info("Numbering of Footnotes per Chapter") intChapterNumber = 1 for xmlChapter in xmlChapters: intNoteNumber = 1 @@ -1042,8 +1156,8 @@ def copy_dir_overwrite( src, dst ): # 'fndict': {'uid11': '2', 'uid12': '3', 'uid9': '1'}, -print("-----------------------------------------------------") -print("Numbering of Lists per Chapter") +logging.info("-----------------------------------------------------") +logging.info("Numbering of Lists per Chapter") for xmlChapter in xmlChapters: xmlListitems = xmlChapter.findall(".//item") for xmlListitem in xmlListitems: @@ -1051,9 +1165,9 @@ def copy_dir_overwrite( src, dst ): strItemNumber = xmlListitem.get("id-text") dictLists[strUID] = strItemNumber -print("-----------------------------------------------------") -print("Working on Page Numbers for References") -listAuxFiles = glob.glob(os.getcwd() + "/*.aux") +logging.info("-----------------------------------------------------") +logging.info("Working on Page Numbers for References") +listAuxFiles = glob.glob( str(LATEX_DIR /"*.aux") ) if len(listAuxFiles) == 0: logging.error("No aux file found. Exiting") sys.exit(1) @@ -1077,11 +1191,11 @@ def copy_dir_overwrite( src, dst ): if matched_citation is not None: set_citations.add(matched_citation.group(1)) -print(dictPagelabels) -print(set_citations) +logging.info(dictPagelabels) +logging.info(set_citations) -print("-----------------------------------------------------") -print("Numbering of Tables per Chapter") +logging.info("-----------------------------------------------------") +logging.info("Numbering of Tables per Chapter") intChapterNumber = 1 for xmlChapter in xmlChapters: intTableNumber = 1 @@ -1120,7 +1234,7 @@ def copy_dir_overwrite( src, dst ): sys.exit() # the new solution: pandoc-citeproc -interim_bib_json_file = (args.filename) + "-bib.json" +interim_bib_json_file = INPUT_PATH_NO_EXT + "-bib.json" citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib" logging.debug(f"Running citeproc with the following command: {citeproc_command}") citeproc_arguments = shlex.split(citeproc_command) @@ -1140,11 +1254,11 @@ def copy_dir_overwrite( src, dst ): # the old solution # #################### # # Copy interim .bbl-File to interim bib.tex file -# interim_bibtex_file = (args.filename) + "bib.tex" +# interim_bibtex_file = INPUT_PATH_NO_EXT + "bib.tex" # try: # shutil.copy(BIBERFILE, interim_bibtex_file) # except FileNotFoundError: -# print("%s has not been created yet. Switch TeX distribution to TeXLive2016, run biber_2.1 -O biber2-1.bbl %s to obtain this file" % (BIBERFILE, args.filename)) +# print("%s has not been created yet. Switch TeX distribution to TeXLive2016, run biber_2.1 -O biber2-1.bbl %s to obtain this file" % (BIBERFILE, INPUT_PATH_NO_EXT)) # # Read all lines of Bibliographic TeX # tmpFile = open(interim_bibtex_file, "r") # tmpLines = tmpFile.readlines() @@ -1161,11 +1275,11 @@ def copy_dir_overwrite( src, dst ): # tmpFile.close() # # TeX has been sanitized, now tralics to make it intermediate XML # print("TeX has been sanitized, now tralics to make it intermediate XML") -# Kommando = "%s -log_file %s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output -entnames=false %sbib.tex" % (TRALICS_PATH_EXEC, args.filename + "-bib-tralics.log", TRALICS_PATH_LIB, TRALICS_PATH_LIB, args.filename) +# Kommando = "%s -log_file %s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output -entnames=false %sbib.tex" % (TRALICS_PATH_EXEC, INPUT_PATH_NO_EXT + "-bib-tralics.log", TRALICS_PATH_LIB, TRALICS_PATH_LIB, INPUT_PATH_NO_EXT) # Argumente = shlex.split(Kommando) # Prozess = subprocess.call(Argumente) # # Sanitize XML to make it useable -# tmpFile = open((args.filename) + "bib.xml", "r") +# tmpFile = open((INPUT_PATH_NO_EXT) + "bib.xml", "r") # tmpContent = tmpFile.read() # tmpFile.close() # listReplace = [ r"", @@ -1200,13 +1314,13 @@ def copy_dir_overwrite( src, dst ): # # Put back Ampersand # tmpContent = re.sub(r"&", "&", tmpContent) -# tmpFile = open((args.filename) + "bib.xml", "w") +# tmpFile = open((INPUT_PATH_NO_EXT) + "bib.xml", "w") # tmpFile.write(tmpContent) # tmpFile.close() # # TeXML has been sanitized, now load xml-Tree # xmlParser2 = etree.XMLParser(no_network=False,load_dtd=False) -# xmlBibTree = etree.parse((args.filename + "bib.xml"), xmlParser2) +# xmlBibTree = etree.parse((INPUT_PATH_NO_EXT + "bib.xml"), xmlParser2) # xml_bib_entries = xmlBibTree.findall(".//entry") ########################### @@ -1254,8 +1368,8 @@ def print_bibliography( else: citations_to_format = set(citekeys) - print( "citekeys: ") - print( len( citekeys ) ) + logging.info( "citekeys: ") + logging.info( len( citekeys ) ) csl_file = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE'] formatted_references = libeoaconvert.format_citations( citations_to_format, @@ -1272,7 +1386,7 @@ def print_bibliography( # If Bibliography-Type is monograph search for EOAbibliography and make it all if bib_type == "monograph": - tmp_citation_filename = "used_citations-monograph" + tmp_citation_filename = TEMP_DIR / "used_citations-monograph" if xmlTree.find(".//EOAprintbibliography") is not None: # to insert here: with keywords we can have multiple bibliographies xmlBibliography = xmlTree.find(".//EOAprintbibliography") @@ -1289,7 +1403,7 @@ def print_bibliography( intChapterNumber = 1 for xmlChapter in xmlChapters: logging.debug(f"Looking at chapter {intChapterNumber}.") - tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) + tmp_citation_filename = TEMP_DIR / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber)) if xmlChapter.find(".//EOAprintbibliography") is not None: xmlBibliography = xmlChapter.find(".//EOAprintbibliography") @@ -1302,7 +1416,7 @@ def print_bibliography( else: # create an empty file logging.debug("No bibliography found.") - open("tmp_files" + os.path.sep + tmp_citation_filename + "_nocitations", 'a').close() + open(TEMP_DIR / (tmp_citation_filename + "_nocitations"), 'a').close() """ @@ -1364,19 +1478,19 @@ def print_bibliography( if bib_type == "monograph": tmp_citation_filename = "used_citations-monograph" - tmp_path_html = "tmp_files" + os.path.sep + tmp_citation_filename + ".html" + tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html") with open(tmp_path_html, "r") as formatted_citations: form_cit = BeautifulSoup(formatted_citations, "html.parser") for xmlChapter in xmlChapters: - print("-----------------------------------------------------") - print("Processing References for Chapter " + str(intChapterNumber)) + logging.info("-----------------------------------------------------") + logging.info("Processing References for Chapter " + str(intChapterNumber)) xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual") if bib_type == "anthology": tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) - tmp_path_html = "tmp_files" + os.path.sep + tmp_citation_filename + ".html" - no_cite_path = "tmp_files" + os.path.sep + tmp_citation_filename + "_nocitations" + tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html") + no_cite_path = TEMP_DIR / (tmp_citation_filename + "_nocitations") if os.path.exists(tmp_path_html): with open(tmp_path_html, "r") as formatted_citations: form_cit = BeautifulSoup(formatted_citations, "html.parser") @@ -1460,7 +1574,7 @@ def print_bibliography( try: citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] except IndexError: - print("Could not find {}. Exiting.".format(string_citekey)) + logging.error("Could not find {}. Exiting.".format(string_citekey)) sys.exit() data_title_value = citeauthoryear_value if xmlCitation.tag == "EOAciteauthoryear": @@ -1535,7 +1649,7 @@ def print_bibliography( # Now for the references via EOAcitenumeric xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric") for xmlCitenumeric in xmlCitenumerics: - print(etree.tostring(xmlCitenumeric)) + logging.info(etree.tostring(xmlCitenumeric)) strPopover = "" tmpCitekeys = xmlCitenumeric.find(".//citekey").text tmpCitekeys = re.sub(" ", "", tmpCitekeys) @@ -1595,7 +1709,7 @@ def print_bibliography( if bib_type == "anthology-numeric": intChapterNumber = 1 for xmlChapter in xmlChapters: - print("Processing Bibliography") + logging.info("Processing Bibliography") if xmlChapter.find(".//EOAprintbibliography") is not None: dictCitekeysNumbers = {} dictCitekeysTitles = {} @@ -1621,17 +1735,17 @@ def print_bibliography( intNumberOfEntry += 1 # Now for the references via EOAcitenumeric xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear") - print("Found numeric citation in chapter " + str(intChapterNumber)) + logging.info("Found numeric citation in chapter " + str(intChapterNumber)) for xmlCitenumeric in xmlCitenumerics: strPopover = "" tmpCitekeys = xmlCitenumeric.find(".//citekey").text tmpCitekeys = re.sub(" ", "", tmpCitekeys) tmpCitekeys = re.sub("\n", "", tmpCitekeys) - print(tmpCitekeys) + logging.info(tmpCitekeys) listCitekeys = re.split("\,", tmpCitekeys) listCitenumbers = [] for strCitekey in listCitekeys: - print(strCitekey) + logging.info(strCitekey) listCitenumbers.append(dictCitekeysNumbers[strCitekey]) # Create Text to be used on the website in a popover strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " @@ -1693,8 +1807,8 @@ def print_bibliography( # here followed the conversion to epub and the conversion to django.xml # both parts were removed and put into separate files. -intermediate_file_pre = "tmp_files/IntermediateXMLFile_pre.xml" -intermediate_file = "tmp_files/IntermediateXMLFile.xml" +intermediate_file_pre = TEMP_DIR / "IntermediateXMLFile_pre.xml" +intermediate_file = TEMP_DIR / "IntermediateXMLFile.xml" ergebnisdatei = open(intermediate_file_pre, "w") ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode") ergebnisdatei.write(ergebnis) @@ -1726,20 +1840,23 @@ def print_bibliography( 'pagelabeldict' : dictPagelabels } -with open('tmp_files/data.pickle', 'wb') as f: +with open(TEMP_DIR / 'data.pickle', 'wb') as f: # Pickle the 'data' dictionary using the highest protocol available. pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL) -grep_command = "grep -A1 -B2 'argument of \\\EOAfn' %s-tralics.log" % args.filename +grep_command = "grep -A1 -B2 'argument of \\\EOAfn' {}".format( + # INPUT_PATH_NO_EXT + OUTPUT_DIR / (INPUT_PATH_NO_EXT + "-tralics.log") +) grep_command_arguments = shlex.split(grep_command) grep_result = subprocess.Popen(grep_command_arguments, stdout=subprocess.PIPE) grep_output = grep_result.stdout.read() if len(grep_output) > 0: - print("\n===\nFootnotes with paragraphs were found. They have to be replaced by the \EOAfnpar command.\n") - print(grep_output.decode("utf-8")) - print("===\n") + logging.info("\n===\nFootnotes with paragraphs were found. They have to be replaced by the \EOAfnpar command.\n") + logging.info(grep_output.decode("utf-8")) + logging.info("===\n") -print("Removing temporary files.") +logging.info("Removing temporary files.") cleanup() -print("Done!") +logging.info("Done!") sys.exit() diff --git a/eoatex2pdf.py b/eoatex2pdf.py new file mode 100755 index 0000000..f07a9e8 --- /dev/null +++ b/eoatex2pdf.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 + +from utils.load_config import load_config, check_executable, exec_command, copy_dir_overwrite +import utils.libeoaconvert as libeoaconvert + +import argparse +from pathlib import Path +import logging +import os +import shutil +import pathlib + + +BASE_DIR = Path( __file__ ).resolve().parent +SCRIPT_PATH = Path( __file__ ) +SCRIPT_NAME = SCRIPT_PATH.stem + +def main( + input_file, + output_dir +): + check_executable( "xelatex" ) + if( not os.path.exists( output_dir ) ): + os.makedirs( output_dir ) + fixed_file_path = Path(output_dir) / input_file + libeoaconvert.enable_preamble( + input_file, + fixed_file_path, + "pdf" + ) + copy_dir_overwrite( + Path(input_file).parent / "texfiles", + Path(output_dir) / "texfiles" + ) + + exec_command( + f"xelatex --output-directory={output_dir} {fixed_file_path}", + ) + exec_command( + "biber {}".format( Path(input_file).stem ), + wd = output_dir + ) + exec_command( + f"xelatex --output-directory={output_dir} {fixed_file_path}", + ) + exec_command( + f"xelatex --output-directory={output_dir} {fixed_file_path}", + ) + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "-f", "--filename", + required = True, + help="Name of main EOATeX file" + ) + parser.add_argument( + "-o", "--output-dir", + default = "./latex-out" + ) + parser.add_argument( + "-c", "--config", + default = BASE_DIR / "config" / "eoaconvert.cfg", + help="Name of config file" + ) + parser.add_argument( + "-l", "--log-file", + default = Path("logs", SCRIPT_NAME).with_suffix(".log"), + help="logfile" + ) + parser.add_argument( + "--log-level", + default = "INFO", + help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" + ) + args = parser.parse_args() + + load_config( + args.config, + args.log_level, + args.log_file + ) + + main( + input_file = args.filename, + output_dir = args.output_dir + ) diff --git a/imxml2django.py b/imxml2django.py index 3f9f9d8..d56d24e 100755 --- a/imxml2django.py +++ b/imxml2django.py @@ -11,7 +11,7 @@ """ -from utils.load_config import load_config +from utils.load_config import load_config, exec_command, check_executable import utils.libeoaconvert as libeoaconvert import pickle @@ -27,14 +27,19 @@ from copy import deepcopy from lxml import etree from pathlib import Path +import time -BASE_DIR = Path( os.path.realpath(__file__) ).parent +BASE_DIR = Path( __file__ ).resolve().parent +SCRIPT_PATH = Path( __file__ ) +SCRIPT_NAME = SCRIPT_PATH.name ##################### # Parsing arguments # ##################### -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) parser.add_argument( "-c", "--config", default = BASE_DIR / "config" / "eoaconvert.cfg", @@ -44,12 +49,12 @@ ) parser.add_argument( "-l", "--log-file", - default = "eoaconvert.log", + default = Path("logs", SCRIPT_NAME).with_suffix(".log"), help="logfile" ) parser.add_argument( "--log-level", - default = "DEBUG", + default = "INFO", help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" ) parser.add_argument( @@ -57,32 +62,103 @@ help="Check the publication.cfg for completeness.", action="store_true" ) +parser.add_argument( + "--publication-dir", + default = ".", + help="directory containing publication.cfg and the Cover.jpg" +) +parser.add_argument( + "-i", "--input-dir", + default = "./imxml", + help="directory containing the intermediate xml generated by eoatex2imxml.py" +) +parser.add_argument( + "-o", "--output-dir", + default = "./django", + help="where to dump all output files" +) args = parser.parse_args() config_file = args.CONFIG_FILE -logging.debug("The configfile is %s." % config_file) +print("The configfile is %s." % config_file) ################################## # Reading the configuration file # ################################## CONFIG = load_config( config_file, - args.log_file, args.log_level, + args.log_file, ) ######################## # Paths to executables # ######################## GM_PATH = "gm" -PDFCROP_EXEC = "pdfcrop" +PDFCROP_EXEC = "pdfcrop" # (part of texlive distribution): + +############################ +# Paths: +############################ +INPUT_DIR = Path( args.input_dir ) +OUTPUT_DIR = Path( args.output_dir ) +PUBLICATION_DIR = Path( args.publication_dir ) +TEMP_DIR = OUTPUT_DIR / "tmp_files" +CONVERT_DIR = OUTPUT_DIR / "CONVERT" +DEBUG_DIR = OUTPUT_DIR / "debug" + +############################ +# Paths to auxiliary files # +############################ +TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS'] + +# prepare: +logging.debug("PATH: {}".format( os.environ['PATH'] )) +check_executable( GM_PATH ) +check_executable( PDFCROP_EXEC ) + +if not TEMP_DIR.exists(): + os.makedirs( TEMP_DIR ) +if not CONVERT_DIR.exists(): + os.makedirs( CONVERT_DIR ) +if not DEBUG_DIR.exists(): + os.makedirs( DEBUG_DIR ) + +# Check for folder and necessary files +if not os.path.exists(CONVERT_DIR): + logging.info(f"The directory {CONVERT_DIR} has not been created yet. Creating it for you") + time.sleep(1) + os.makedirs(CONVERT_DIR) +if not os.path.exists(CONVERT_DIR / "publication.cfg"): + logging.info(f"The publication.cfg file is missing in {CONVERT_DIR} directory.") + if os.path.exists(PUBLICATION_DIR / "publication.cfg"): + shutil.copy(PUBLICATION_DIR / "publication.cfg", CONVERT_DIR) + logging.info(f"Copied from {PUBLICATION_DIR}.") + else: + logging.error("Found no publication.cfg. Exiting") + sys.exit( 1 ) +if not os.path.exists(CONVERT_DIR / "cover.jpg"): + logging.info(f"The file cover.jpg in {CONVERT_DIR} directory is missing.") + if os.path.exists(PUBLICATION_DIR / "Cover.jpg"): + shutil.copy(PUBLICATION_DIR / "Cover.jpg", CONVERT_DIR / "cover.jpg") + logging.info("Copied from current directory.") + else: + logging.error("No coverfile found. You can create a temporary one with the mkimage.py script") + sys.exit( 1 ) +# if os.path.exists(os.getcwd() + "/pre_xml.tex") == False: +# print ("pre_xml fehlt") +# sys.exit() + +# if os.path.exists(os.getcwd() + "/pre_xml.tex") == False: +# print ("pre_xml fehlt") +# sys.exit() ########################################### # Loading data from first conversion step # ########################################### -with open('tmp_files' + os.path.sep + 'data.pickle', 'rb') as f: +with open(INPUT_DIR / "tmp_files" / 'data.pickle', 'rb') as f: data = pickle.load(f) dictChapters = data["chapterdict"] @@ -95,12 +171,16 @@ dictTables = data["tabdict"] dictPagelabels = data["pagelabeldict"] -if not os.path.exists(os.getcwd() + os.path.sep + "debug"): - os.mkdir(os.getcwd() + os.path.sep + "debug") +if not os.path.exists(DEBUG_DIR): + os.mkdir(DEBUG_DIR) -xmlTree = etree.parse("tmp_files" + os.path.sep + "IntermediateXMLFile.xml") +xmlTree = etree.parse( str(INPUT_DIR / "tmp_files" / "IntermediateXMLFile.xml") ) -libeoaconvert.debug_xml_here(xmlTree, "fresh") +libeoaconvert.debug_xml_here( + xmlTree, + "fresh", + DEBUG_DIR +) print(""" ############################################################################ @@ -108,11 +188,11 @@ ############################################################################ """) # Create django File Structure -if os.path.exists(os.getcwd() + os.path.sep + "CONVERT" + os.path.sep + "django") == False: - os.mkdir(os.getcwd() + os.path.sep + "CONVERT" + os.path.sep + "django") - os.mkdir(os.getcwd() + os.path.sep + "CONVERT" + os.path.sep + "django" + os.path.sep + "images") - os.mkdir(os.getcwd() + os.path.sep + "CONVERT" + os.path.sep + "django" + os.path.sep + "images" + os.path.sep + "embedded") - os.mkdir(os.getcwd() + os.path.sep + "CONVERT" + os.path.sep + "django" + os.path.sep + "files") +if not os.path.exists(CONVERT_DIR / "django"): + os.mkdir(CONVERT_DIR / "django") + os.mkdir(CONVERT_DIR / "django" / "images") + os.mkdir(CONVERT_DIR / "django" / "images" / "embedded") + os.mkdir(CONVERT_DIR / "django" / "files") # Create empty xmlTree xmlEOAdocument = etree.Element("EOAdocument") @@ -120,9 +200,13 @@ etree.strip_attributes(xmlTree, "noindent") # Remove temp-Tag etree.strip_tags(xmlTree, "temp") -libeoaconvert.debug_xml_here(xmlTree, "afterstriptags") +libeoaconvert.debug_xml_here( + xmlTree, + "afterstriptags", + DEBUG_DIR +) # Write Temporary XML-Maintree -ergebnisdatei = open("tmp_files" + os.path.sep + "Devel_django.xml", "w") +ergebnisdatei = open(TEMP_DIR / "Devel_django.xml", "w") ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode") ergebnisdatei.write(ergebnis) ergebnisdatei.close() @@ -174,7 +258,7 @@ def debug_chapters(xmlEOAchapters): chap_num = 1 for chapter in xmlEOAchapters: - tmp_filename = "%s/debug/debug-chapter-%02d.xml" % (os.getcwd(), chap_num) + tmp_filename = DEBUG_DIR / ("debug-chapter-%02d.xml" % chap_num) tmp_file = open (tmp_filename, "w") tmp_result = etree.tostring(chapter, pretty_print=True, encoding="unicode") tmp_file.write(tmp_result) @@ -242,13 +326,20 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid logging.debug(xmlFigures) if xmlFigures is not None: for xmlFigure in xmlFigures: + # example 'images/1.jpg' strImageFileString = xmlFigure.find(".//file").text strImageFileString = strImageFileString.rstrip("\n") strImageFileDir = os.path.dirname(strImageFileString) strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFileString) strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] - strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName + strCommand = "{cmd} convert {arg1} -resize 250x250\\> {arg2}".format( + cmd = GM_PATH, + arg1 = PUBLICATION_DIR / strImageFileString, + arg2 = CONVERT_DIR / "django/images/embedded" / (strImageFileDir + strImageFileName), + ) + + # strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName listArguments = shlex.split(strCommand) subprocess.check_output(listArguments, shell=False) tmpStrTail = xmlFigure.tail @@ -284,7 +375,11 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFileString) strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] - shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) + shutil.copy( + PUBLICATION_DIR / strImageFileString, + CONVERT_DIR / "django/images" / (strImageFileDir + strImageFileName) + ) + # shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) xmlEOAfigure.set("file", strImageFileDir + strImageFileName) xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") xmlEOAfigure.set("order", str(intObjectNumber)) @@ -305,12 +400,21 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFileString) strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] - shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) + shutil.copy( + PUBLICATION_DIR / strImageFileString, + CONVERT_DIR / "django/images" / (strImageFileDir + strImageFileName) + ) logging.debug("Django figure %s." % strImageFileName) # yellow if os.path.splitext(strImageFileName)[1].lower() == ".pdf": logging.debug("Found a PDF file") - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) + strImageFilepath = libeoaconvert.sanitizeImage( + CONVERT_DIR / "django/images" / (strImageFileDir + strImageFileName), + TEMP_DIR, + # os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, + GM_PATH, + PDFCROP_EXEC + ) xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".png")) logging.debug("The filename is %s" % xmlEOAfigure.get("file")) else: @@ -546,7 +650,11 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid xmlEOAequation.set("filename", xmlEquation.get("filename")) if xmlEquation.get("label") is not None: xmlEOAequation.set("label", xmlEquation.get("label")) - shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") + shutil.copy( + INPUT_DIR / "items" /xmlEquation.get("filename"), + CONVERT_DIR / "django/images/" + ) + # shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") xmlEOAequation.set("TeX", xmlEquation.get("TeX")) if xmlEquation.get("label") is not None: xmlEOAequation.set("label", xmlEquation.get("label")) @@ -559,7 +667,11 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid intObjectNumber += 1 xmlEOAequation.set("number", "") xmlEOAequation.set("filename", xmlEquation.get("filename")) - shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") + shutil.copy( + INPUT_DIR / "items" / xmlEquation.get("filename"), + CONVERT_DIR / "django/images/" + ) + # shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") xmlEOAequation.set("TeX", xmlEquation.get("TeX")) xmlResult.append(xmlEOAequation) elif xmlElement.tag == "EOAequationnonumber": @@ -569,7 +681,11 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid intObjectNumber += 1 xmlResult.set("filename", xmlElement.get("filename")) xmlResult.set("TeX", xmlElement.get("TeX")) - shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") + shutil.copy( + INPUT_DIR / "items" / xmlElement.get("filename"), + CONVERT_DIR / "django/images/" + ) + # shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") xmlResult.set("number", "") elif xmlElement.findall(".//EOAequation"): # Process various Equations which may be encapsulated within

@@ -584,7 +700,11 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid xmlEOAequation.set("TeX", xmlEquation.get("TeX")) if xmlEquation.get("uid") is not None: xmlEOAequation.set("uid", xmlEquation.get("uid")) - shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") + shutil.copy( + INPUT_DIR / "items" / xmlEquation.get("filename"), + CONVERT_DIR / "django/images/" + ) + # shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") xmlEOAequation.set("filename", xmlEquation.get("filename")) xmlResult.append(xmlEOAequation) elif xmlElement.tag == "EOAequation": @@ -596,7 +716,11 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid xmlResult.set("TeX", xmlElement.get("TeX")) if xmlElement.get("uid") is not None: xmlResult.set("uid", xmlElement.get("uid")) - shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") + shutil.copy( + INPUT_DIR / "items" / xmlElement.get("filename"), + CONVERT_DIR / "django/images/" + ) + # shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") xmlResult.set("filename", xmlElement.get("filename")) elif xmlElement.tag == "div3": xmlResult = etree.Element("EOAsubsection") @@ -669,7 +793,7 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid intObjectNumber += 1 xmlResult = xmlElement else: - print("SPECIAL: %s - %s" % (xmlElement, xmlElement.text)) + logging.info("SPECIAL: %s - %s" % (xmlElement, xmlElement.text)) xmlResult = xmlElement if indent==True: @@ -751,7 +875,7 @@ def make_index(index_hits, index_type): xmlEOAindexentry.set("display", dictIndex[strSortedKey]["display_string"]) for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]: - print(xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder")) + logging.info(xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder")) xmlEOAindexlink = etree.Element("EOAindexlink") xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder")) @@ -822,7 +946,7 @@ def check_publication_cfg(configuration_file): config = configparser.ConfigParser() try: - config.read("CONVERT" + os.path.sep + configuration_file) + config.read(configuration_file) except configparser.ParsingError as err: logging.error(err) @@ -869,8 +993,8 @@ def check_publication_cfg(configuration_file): xmlEOAchapter.set("number", dictChapters[xmlChapter.get("id")]) else: xmlEOAchapter.set("number", "") - print("-----------------------------------------------------") - print(gettext(xmlChapterHeadline)) + logging.info("-----------------------------------------------------") + logging.info(gettext(xmlChapterHeadline)) xmlEOAchapter.append(djangoParseHeadline(xmlChapterHeadline)) # Deal with EOAauthor if xmlChapter.find(".//EOAauthor") is not None: @@ -959,10 +1083,14 @@ def check_publication_cfg(configuration_file): xmlEOAchapter.append(djangoParseObject(xmlChapterChild)) intChapterNumber += 1 -libeoaconvert.debug_xml_here(xmlTree, "afterchapter") +libeoaconvert.debug_xml_here( + xmlTree, + "afterchapter", + DEBUG_DIR +) -print("----------------------------------------------") -print("Processing Facsimile Parts") +logging.info("----------------------------------------------") +logging.info("Processing Facsimile Parts") listModes = ["text", "textPollux", "xml"] strBasicURL = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql?document=" @@ -998,7 +1126,11 @@ def check_publication_cfg(configuration_file): strImageFileDir = os.path.dirname(strImageFile) strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFile) - shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) + shutil.copy( + INPUT_DIR / strImageFile, + CONVERT_DIR / "django/images" / (strImageFileDir + strImageFileName) + ) + # shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) intObjectNumber += 1 # Download transcription for this Page if xmlFacsimilepage.find(".//fulltext").text is not None: @@ -1023,7 +1155,12 @@ def check_publication_cfg(configuration_file): intFacImgNumber = 1 for xmlImage in xmlImages: strImageSrc = xmlImage.get("src") - strCommand = "curl " + strImageSrc + " -o CONVERT/django/images/facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg" + strCommand = "{cmd} {src} -o {dst}".format( + cmd = curl, + src = strImageSrc, + dst = CONVERT_DIR / "django/images" / ("facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg") + ) + # strCommand = "curl " + strImageSrc + " -o CONVERT/django/images/facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg" listArguments = shlex.split(strCommand) try: exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) @@ -1041,8 +1178,8 @@ def check_publication_cfg(configuration_file): xmlEOAfacsimilepart.append(xmlEOAfacsimilepage) intFacNumber =+ 1 etree.strip_tags(xmlDjangoTree, "temp") -print("----------------------------------------------") -print("Processing and linking Footnotes for django") +logging.info("----------------------------------------------") +logging.info("Processing and linking Footnotes for django") def bring_footnote_down_django(footnote, fragment, footnote_number, object_number, unique_id, destination): """ @@ -1095,7 +1232,6 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe debug_chapters(xmlEOAchapters) -TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS'] translation_xml = etree.parse( str( TRANSLATION_FILE ) ) dictLangFootnotes = translation_xml.find("//entry[@name='footnotes']").attrib @@ -1172,7 +1308,11 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe xmlEOAfootnote.text = strFootnoteText for xmlElement in xmlFootnoteContent: if xmlElement.tag == "EOAequationnonumber": - shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") + shutil.copy( + PUBLICATION_DIR / "items" / xmlElement.get("filename"), + CONVERT_DIR / "django/images/" + ) + # shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") xmlEOAfootnote.append(xmlElement) xmlResult.append(xmlEOAfootnote) intFootnoteNumber += 1 @@ -1181,15 +1321,15 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe # Remove temp-Tag etree.strip_tags(xmlDjangoTree, "temp") -# print("----------------------------------------------") -# print("Processing Verses") +# logging.info("----------------------------------------------") +# logging.info("Processing Verses") # for xmlEOAchapter in xmlEOAchapters: # verses = xmlEOAchapter.findall(".//EOAverse") -# print("Found lotsa verses: ", len(verses)) +# logging.info("Found lotsa verses: ", len(verses)) -print("----------------------------------------------") -print("Processing various Elements") +logging.info("----------------------------------------------") +logging.info("Processing various Elements") for xmlEOAchapter in xmlEOAchapters: xmlEmphasized = xmlEOAchapter.findall(".//hi") @@ -1243,7 +1383,11 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe xmlInlineEquation.tag = "img" xmlInlineEquation.set("class", "EOAineq") xmlInlineEquation.set("alt", xmlInlineEquation.get("TeX")) - shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src")) + shutil.copy( + INPUT_DIR / "items" / xmlInlineEquation.get("src"), + CONVERT_DIR / "django/images" / xmlInlineEquation.get("src") + ) + # shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src")) # Convert EOAchem into appropriate IMG-Tags xml_inline_chems = xmlEOAchapter.findall(".//EOAchem") @@ -1251,7 +1395,11 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe xml_inline_chem.tag = "img" xml_inline_chem.set("class", "EOAineq") xml_inline_chem.set("alt", xml_inline_chem.get("TeX")) - shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/django/images/" + xml_inline_chem.get("src")) + shutil.copy( + INPUT_DIR / "items" / xml_inline_chem.get("src"), + CONVERT_DIR / "django/images" / xml_inline_chem.get("src") + ) + # shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/django/images/" + xml_inline_chem.get("src")) # Convert EOAinline into appropriate IMG-Tags xmlInlineElements = xmlEOAchapter.findall(".//EOAinline") @@ -1265,9 +1413,14 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe strInlineElementDirName = os.path.dirname(strInlineElementFilePath) xmlInlineElement.text = None xmlInlineElement.set("src", strInlineElementDirName + strInlineElementFileName) - shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName) - strNewImagePath = os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName - strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath + shutil.copy( + PUBLICATION_DIR / strInlineElementDirName / strInlineElementFileName, + CONVERT_DIR / "django/images/embedded" / (strInlineElementDirName + strInlineElementFileName) + ) + # shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName) + strNewImagePath = CONVERT_DIR / "django/images/embedded" / (strInlineElementDirName + strInlineElementFileName) + # strNewImagePath = os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName + strCommand = GM_PATH + " convert " + str(strNewImagePath) + " -resize 20x20 " + str(strNewImagePath) listArguments = shlex.split(strCommand) subprocess.check_output(listArguments, shell=False) # Change EOAcitenumeric into a span to create approriate link @@ -1295,8 +1448,8 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe xmlEOAcitemanual.set("class", "citation") xmlEOAcitemanual.set("rel", "popover") -print("----------------------------------------------") -print("Processing Cross References") +logging.info("----------------------------------------------") +logging.info("Processing Cross References") # Substitute References with their targets (wit links) for xmlEOAchapter in xmlEOAchapters: @@ -1425,8 +1578,8 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe xmlReference.tag = "a" xmlReference.set("href", "../" + strChapterOrder + "/index.html#" + strObjectOrder) -print("----------------------------------------------") -print("Processing Page References") +logging.info("----------------------------------------------") +logging.info("Processing Page References") for xmlEOAchapter in xmlEOAchapters: xmlPageReferences = xmlEOAchapter.findall(".//EOApageref") @@ -1454,8 +1607,8 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe logging.debug(strFacsimileOrder) xmlReference.set("href", "../" + strPartOrder + "/" + strFacsimileOrder + ".html") -print("----------------------------------------------") -print("Normalizing Index Entries") +logging.info("----------------------------------------------") +logging.info("Normalizing Index Entries") for xmlEOAchapter in xmlEOAchapters: xml_EOA_indices = xmlEOAchapter.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") @@ -1515,12 +1668,12 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe xmlEOAindex.set("elementorder", xmlParent.get("order")) if xmlParent.get("order") != None and xmlParent.tag == "EOAchapter": xmlEOAindex.set("chapterorder", xmlParent.get("order")) - # print(etree.tostring(xmlEOAindex)) + # logging.info(etree.tostring(xmlEOAindex)) etree.strip_tags(xmlDjangoTree, "temp") -print("----------------------------------------------") -print("Removing Duplicate Index Entries") +logging.info("----------------------------------------------") +logging.info("Removing Duplicate Index Entries") for xmlEOAchapter in xmlEOAchapters: for xmlChild in xmlEOAchapter.iterchildren(): @@ -1539,8 +1692,8 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe else: dictEntries[strEntry] = listEntry -print("----------------------------------------------") -print("Removing Index Entries in Footnotes") +logging.info("----------------------------------------------") +logging.info("Removing Index Entries in Footnotes") for xmlEOAchapter in xmlEOAchapters: for xmlChild in xmlEOAchapter.iterchildren(): @@ -1552,31 +1705,43 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe xmlEOAindex.tag = "temp" logging.debug("Found index in footnote") -print("----------------------------------------------") -print("Sorting and Creating Regular Index") +logging.info("----------------------------------------------") +logging.info("Sorting and Creating Regular Index") xml_regular_EOAindices = xmlDjangoTree.findall("//EOAindex") if len(xml_regular_EOAindices) != 0:# is not None: logging.debug("Sorting %s entries for regular index." % str(len(xml_regular_EOAindices))) xml_eoa_print_regular_index = make_index(xml_regular_EOAindices, index_type = "regular") -libeoaconvert.debug_xml_here(xmlDjangoTree, "djangotree") -libeoaconvert.debug_xml_here(xmlEOAdocument, "xmleoadocument") -libeoaconvert.debug_xml_here(xmlTree, "xmltree") +libeoaconvert.debug_xml_here( + xmlDjangoTree, + "djangotree", + DEBUG_DIR +) +libeoaconvert.debug_xml_here( + xmlEOAdocument, + "xmleoadocument", + DEBUG_DIR +) +libeoaconvert.debug_xml_here( + xmlTree, + "xmltree", + DEBUG_DIR +) # If EOAprintindex is found, append xml_eoa_print_regular_index to xmlEOAdocument xmlPrintindex = xmlTree.find(".//EOAprintindex") if xmlPrintindex is not None != 0: # Remove

from xmlDjangoTree - print("found an index") + logging.info("found an index") xmlPrintindex.tag = "temp" xmlPrintindex.getparent().tag = "temp" xmlEOAdocument.append(xml_eoa_print_regular_index) else: - print("found no index") + logging.info("found no index") -print("----------------------------------------------") -print("Sorting and Creating Person Index") +logging.info("----------------------------------------------") +logging.info("Sorting and Creating Person Index") xml_person_EOAindices = xmlDjangoTree.findall("//EOAindexperson") if len(xml_person_EOAindices) != 0:# is not None: @@ -1592,8 +1757,8 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe xmlEOAdocument.append(xml_eoa_print_person_index) # doing the same for location index -print("----------------------------------------------") -print("Sorting and Creating Location Index") +logging.info("----------------------------------------------") +logging.info("Sorting and Creating Location Index") xml_location_EOAindices = xmlDjangoTree.findall("//EOAindexlocation") if len(xml_location_EOAindices) != 0:# is not None: @@ -1619,13 +1784,13 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe # Save xmlDjangoTree # ############################################################################ -tmpFile = open("CONVERT/django/Django.xml", "w") +tmpFile = open( CONVERT_DIR / "django/Django.xml", "w") tmpResult = etree.tostring(xmlDjangoTree, pretty_print=True, encoding="unicode") tmpFile.write(tmpResult) tmpFile.close() logging.debug("Wrote Django.xml") if args.checkpublicationcfg: - check_publication_cfg("publication.cfg") + check_publication_cfg(PUBLICATION_DIR / "publication.cfg") else: pass diff --git a/imxml2epub.py b/imxml2epub.py index 15506e3..67c33d0 100755 --- a/imxml2epub.py +++ b/imxml2epub.py @@ -24,15 +24,19 @@ from copy import deepcopy from lxml import etree from pathlib import Path +import configparser -BASE_DIR = Path( os.path.realpath(__file__) ).parent -SCRIPT_NAME = Path( __file__).stem +BASE_DIR = Path( __file__ ).resolve().parent +SCRIPT_PATH = Path( __file__ ) +SCRIPT_NAME = SCRIPT_PATH.stem ##################### # Parsing arguments # ##################### -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) parser.add_argument( "-c", "--config", dest="CONFIG_FILE", @@ -42,7 +46,7 @@ ) parser.add_argument( "-l", "--log-file", - default = SCRIPT_NAME + ".log" , + default = Path( "logs", SCRIPT_NAME).with_suffix( ".log" ), help="logfile" ) parser.add_argument( @@ -50,6 +54,23 @@ default = "DEBUG", help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" ) + +parser.add_argument( + "--publication-dir", + default = ".", + help="directory containing publication.cfg and the Cover.jpg" +) +parser.add_argument( + "-i", "--input-dir", + default = "./imxml", + help="directory containing the intermediate xml generated by eoatex2imxml.py" +) +parser.add_argument( + "-o", "--output-dir", + default = "./epub", + help="where to dump all output files" +) + parser.add_argument( "-f", "--font", help="Font to be used, default is TeX Gyre Termes", @@ -65,14 +86,6 @@ config_file = args.CONFIG_FILE -''' -if args.CONFIG_FILE is not None: - CONFIG_FILE = os.path.abspath(args.CONFIG_FILE) -else: - # CONFIG_FILE = "/Users/kthoden/EOAKram/dev/EOASkripts/Skripten/eoaconvert.cfg" - CONFIG_FILE = os.path.dirname(sys.argv[0]) + "/config/eoaconvert.cfg" -''' - print("The config file is ", config_file) ################################## @@ -81,22 +94,16 @@ CONFIG = load_config( config_file, - args.log_file, args.log_level, + args.log_file, ) -# CONFIG = configparser.ConfigParser() -# CONFIG.read(CONFIG_FILE) - -# CONFIG = configparser.ConfigParser() -# CONFIG.read("/Users/kthoden/EOAKram/dev/EOASkripts/Skripten/eoaconvert.cfg") - ######################## # Paths to executables # ######################## -EPUB_FILES = os.path.dirname(sys.argv[0]) + "/data/epub_files/" +EPUB_FILES = BASE_DIR / "data/epub_files/" +# EPUB_FILES = os.path.dirname(sys.argv[0]) + "/data/epub_files/" -# TEMPLATE_PATH = CONFIG['Auxiliaries']['template_path'] GM_PATH = "gm" PDFCROP_EXEC = "pdfcrop" # TL_PATH = CONFIG['Executables']['texlive'] @@ -106,16 +113,58 @@ # SUPPORT_PATH = CONFIG['Executables']['support_path'] # AUX_TeX_FILES_PATH = CONFIG['Executables']['aux_tex_files_path'] -print(GM_PATH) +############################ +# Paths: +############################ +INPUT_DIR = Path( args.input_dir ) +OUTPUT_DIR = Path( args.output_dir ) +PUBLICATION_DIR = Path( args.publication_dir ) +TEMP_DIR = OUTPUT_DIR / "tmp_files" +CONVERT_DIR = OUTPUT_DIR / "CONVERT" +# DEBUG_DIR = OUTPUT_DIR / "debug" + +if not TEMP_DIR.exists(): + os.makedirs( TEMP_DIR ) +if not CONVERT_DIR.exists(): + os.makedirs( CONVERT_DIR ) + +# Check for folder and necessary files +if not os.path.exists(CONVERT_DIR): + logging.info(f"The directory {CONVERT_DIR} has not been created yet. Creating it for you") + time.sleep(1) + os.makedirs(CONVERT_DIR) +if not os.path.exists(CONVERT_DIR / "publication.cfg"): + logging.info(f"The publication.cfg file is missing in {CONVERT_DIR} directory.") + if os.path.exists(PUBLICATION_DIR / "publication.cfg"): + shutil.copy(PUBLICATION_DIR / "publication.cfg", CONVERT_DIR) + logging.info(f"Copied from {PUBLICATION_DIR}.") + else: + logging.error("Found no publication.cfg. Exiting") + sys.exit( 1 ) +if not os.path.exists(CONVERT_DIR / "cover.jpg"): + logging.info(f"The file cover.jpg in {CONVERT_DIR} directory is missing.") + if os.path.exists(PUBLICATION_DIR / "Cover.jpg"): + shutil.copy(PUBLICATION_DIR / "Cover.jpg", CONVERT_DIR / "cover.jpg") + logging.info("Copied from current directory.") + else: + logging.error("No coverfile found. You can create a temporary one with the mkimage.py script") + sys.exit( 1 ) +# if os.path.exists(os.getcwd() + "/pre_xml.tex") == False: +# print ("pre_xml fehlt") +# sys.exit() + lang_dict = {"fig" : {"en" : "Fig.", "de" : "Abb."}} +Datei = open( TEMP_DIR / 'intermediate.log', 'w') +''' tmpDir = os.getcwd() + "/tmp_files/" Datei = open('tmp_files/intermediate.log', 'w') +''' -xmlTree = etree.parse("tmp_files/IntermediateXMLFile.xml") +xmlTree = etree.parse( str(INPUT_DIR / "tmp_files" / "IntermediateXMLFile.xml") ) -with open('tmp_files/data.pickle', 'rb') as f: +with open(INPUT_DIR / "tmp_files" / 'data.pickle', 'rb') as f: data = pickle.load(f) dictSections = data["secdict"] @@ -219,7 +268,7 @@ def addToContentopf(contentopf, Filename, FileID, Mediatype): xmlItem = etree.Element("item") xmlItem.set("id", FileID) xmlItem.set("media-type", dictMediatypes[Mediatype]) - xmlItem.set("href", Filename) + xmlItem.set("href", str(Filename)) xmlManifest.append(xmlItem) # logging.debug("Added %s, with FileID %s" % (Filename, FileID)) @@ -260,20 +309,25 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): ############################################################## # Create folder structure for ebook -if os.path.exists(os.getcwd() + "/CONVERT/epub") == False: - os.mkdir(os.getcwd() + "/CONVERT/epub") - os.mkdir(os.getcwd() + "/CONVERT/epub/META-INF") - os.mkdir(os.getcwd() + "/CONVERT/epub/OEBPS") - os.mkdir(os.getcwd() + "/CONVERT/epub/OEBPS/images") - os.mkdir(os.getcwd() + "/CONVERT/epub/OEBPS/fonts") +if not os.path.exists(CONVERT_DIR / "epub"): + os.mkdir(CONVERT_DIR / "epub") + os.mkdir(CONVERT_DIR / "epub" / "META-INF" ) + os.mkdir(CONVERT_DIR / "epub" / "OEBPS" ) + os.mkdir(CONVERT_DIR / "epub" / "OEBPS" / "images" ) + os.mkdir(CONVERT_DIR / "epub" / "OEBPS" / "fonts" ) +# Copy containter.xml and mimetype +shutil.copy(EPUB_FILES / "epubcontainer.xml", CONVERT_DIR / "epub/META-INF/container.xml") +shutil.copy(EPUB_FILES / "epubmimetype", CONVERT_DIR / "epub/mimetype") +''' # Copy containter.xml and mimetype shutil.copy(EPUB_FILES + "epubcontainer.xml", os.getcwd() + "/CONVERT/epub/META-INF/container.xml") shutil.copy(EPUB_FILES + "epubmimetype", os.getcwd() + "/CONVERT/epub/mimetype") +''' # Preparing content.opf xmlContentopfParser = etree.XMLParser(no_network=False,load_dtd=False) -contentopf = etree.parse(EPUB_FILES + "epubcontentopf.xml", xmlContentopfParser) +contentopf = etree.parse( str(EPUB_FILES/"epubcontentopf.xml"), xmlContentopfParser) # This list includes all files which have already been included to avoid duplicates listContentopf = [] @@ -286,30 +340,56 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): if args.font == "termes": font_files = termes_fonts - shutil.copy(EPUB_FILES + "eoa-epub-termes.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") + shutil.copy( + EPUB_FILES / "eoa-epub-termes.css", + CONVERT_DIR / "epub/OEBPS/eoa-epub.css" + ) + # shutil.copy(EPUB_FILES + "eoa-epub-termes.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") elif args.font == "libertine": - shutil.copy(EPUB_FILES + "eoa-epub-libertine.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") + shutil.copy( + EPUB_FILES / "eoa-epub-libertine.css", + CONVERT_DIR / "epub/OEBPS/eoa-epub.css" + ) + # shutil.copy(EPUB_FILES + "eoa-epub-libertine.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") font_files = libertine_fonts else: logging.info("Font not recognized, falling back to default.") - shutil.copy(EPUB_FILES + "eoa-epub-termes.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") + shutil.copy( + EPUB_FILES / "eoa-epub-termes.css", + CONVERT_DIR / "epub/OEBPS/eoa-epub.css" + ) + # shutil.copy(EPUB_FILES + "eoa-epub-termes.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") otf_id_counter = 1 txt_id_counter = 1 for fontfile in font_files: - shutil.copy(EPUB_FILES + fontfile, os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") + shutil.copy( + EPUB_FILES / fontfile, + CONVERT_DIR / "epub/OEBPS/fonts/" + ) + # shutil.copy(EPUB_FILES + fontfile, os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") base_file_name, file_extension = os.path.splitext(fontfile) if file_extension == ".otf": - contentopf = addToContentopf(contentopf, "fonts/" + fontfile, "otf-font" + str(otf_id_counter), file_extension[1:]) + contentopf = addToContentopf( + contentopf, + Path("fonts") / fontfile, + "otf-font" + str(otf_id_counter), + file_extension[1:] + ) otf_id_counter += 1 elif file_extension == ".txt": - contentopf = addToContentopf(contentopf, "fonts/" + fontfile, "font-txt" + str(txt_id_counter), file_extension[1:]) + contentopf = addToContentopf( + contentopf, + Path("fonts") / fontfile, + "font-txt" + str(txt_id_counter), + file_extension[1:] + ) txt_id_counter += 1 else: - print("Other file found. Exiting") + logging.error("Other file found. Exiting") sys.exit() # shutil.copy(EPUB_FILES + "texgyretermes-bold.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") @@ -325,14 +405,16 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): # Preparing toc.ncx xmlTocncxParser = etree.XMLParser(no_network=False,load_dtd=False) -tocncx = etree.parse(EPUB_FILES + "epubtocncx.xml", xmlTocncxParser) +tocncx = etree.parse(str(EPUB_FILES / "epubtocncx.xml"), xmlTocncxParser) + -print("-----------------------------------------------------") -print("Preparing content.opf") + +logging.info("-----------------------------------------------------") +logging.info("Preparing content.opf") xmlMetadata = contentopf.find(".//{http://www.idpf.org/2007/opf}metadata") # Prepare Metadata based on Publication.cfg cfgPublication = configparser.RawConfigParser() -cfgPublication.read(os.getcwd() + "/CONVERT/publication.cfg") +cfgPublication.read(CONVERT_DIR / "publication.cfg") # Prepare Author String strAuthorString = cfgPublication.get("Authors", "Author1") if cfgPublication.get("Authors", "Author2") != "": @@ -404,20 +486,25 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): xmlItem.set("href", "images/cover.jpg") xmlItem.set("media-type", "image/jpeg") xmlManifest.append(xmlItem) -shutil.copy(os.getcwd() + "/CONVERT/Cover.jpg", os.getcwd() + "/CONVERT/epub/OEBPS/images/") +shutil.copy( + CONVERT_DIR / "cover.jpg", + CONVERT_DIR / "epub/OEBPS/images/" +) +# shutil.copy(os.getcwd() + "/CONVERT/Cover.jpg", os.getcwd() + "/CONVERT/epub/OEBPS/images/") xmlItem = etree.Element("item") xmlItem.set("id", "cover") xmlItem.set("href", "cover.xhtml") xmlItem.set("media-type", "application/xhtml+xml") xmlManifest.append(xmlItem) -shutil.copy(EPUB_FILES + "epubcover.xhtml", os.getcwd() + "/CONVERT/epub/OEBPS/cover.xhtml") -print("-------------------") -print("Preparing intro.xhtml") -print("-------------------") +shutil.copy(EPUB_FILES / "epubcover.xhtml", CONVERT_DIR / "epub/OEBPS/cover.xhtml") +# shutil.copy(EPUB_FILES + "epubcover.xhtml", os.getcwd() + "/CONVERT/epub/OEBPS/cover.xhtml") +logging.info("-------------------") +logging.info("Preparing intro.xhtml") +logging.info("-------------------") if cfgPublication.get("Technical", "Serie") == "Sources": - tmpFilePath = EPUB_FILES + "epubintro-sources.xhtml" + tmpFilePath = EPUB_FILES / "epubintro-sources.xhtml" else: - tmpFilePath = EPUB_FILES + "epubintro.xhtml" + tmpFilePath = EPUB_FILES / "epubintro.xhtml" tmpFile = open(tmpFilePath, "r") strIntroHTML = tmpFile.read() tmpFile.close() @@ -437,12 +524,13 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): # else: # strIntroHTML = re.sub("AdditionalInformation", "", strIntroHTML) -tmpFilePath = os.getcwd() + "/CONVERT/epub/OEBPS/intro.xhtml" +tmpFilePath = CONVERT_DIR / "epub/OEBPS/intro.xhtml" +# tmpFilePath = os.getcwd() + "/CONVERT/epub/OEBPS/intro.xhtml" tmpFile = open(tmpFilePath, "w") tmpFile.write(strIntroHTML) -print("-------------------") -print("Preparing toc.ncx") -print("-------------------") +logging.info("-------------------") +logging.info("Preparing toc.ncx") +logging.info("-------------------") xmlHead = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}head") xmlMeta = etree.Element("meta") xmlMeta.set("name", "dtb:uid") @@ -472,14 +560,14 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): # Convert Chapters, Sections, Subsections and Subsubsections to h1, h2, h3, h4 # Insert Number from Dictionary where needed -print("-----------------------------------------------------") -print("Convert EOAChapter to H1") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAChapter to H1") for xmlChapter in xmlChapters: xmlChapter.find("head").tag = "h1" if xmlChapter.get("rend") != "nonumber": idChapter = xmlChapter.get("id") - # print(idChapter + " konvertierung into h1") - # print(dictChapters[idChapter]) + # logging.info(idChapter + " konvertierung into h1") + # logging.info(dictChapters[idChapter]) strHeadline = xmlChapter.find("h1").text or "" xmlChapter.find("h1").text = str(dictChapters[idChapter]) + ". " + strHeadline if xmlChapter.find(".//EOAauthor") is not None: @@ -491,32 +579,32 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): xmlChapter.find(".//EOAauthor").text = "" xmlChapter = etree.strip_tags(xmlChapter, "EOAauthor") -# print(dictSections) +# logging.info(dictSections) -print("-----------------------------------------------------") -print("Convert EOAsection to H2") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAsection to H2") xmlSections = xmlEbookTree.findall(".//div2") for xmlSection in xmlSections: xmlSection.find("head").tag = "h2" if xmlSection.get("rend") != "nonumber": idSection = xmlSection.get("id") strHeadline = xmlSection.find("h2").text or "" - print(strHeadline) + logging.info(strHeadline) xmlSection.find("h2").text = str(dictSections[idSection]) + " " + strHeadline -print("-----------------------------------------------------") -print("Convert EOAsubsection to H3") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAsubsection to H3") xmlSubsections = xmlEbookTree.findall(".//div3") for xmlSubsection in xmlSubsections: xmlSubsection.find("head").tag = "h3" if xmlSubsection.get("rend") != "nonumber": idSection = xmlSubsection.get("id") strHeadline = xmlSubsection.find("h3").text or "" - print(strHeadline) + logging.info(strHeadline) xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline -print("-----------------------------------------------------") -print("Convert EOAsubsubsection to H4") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAsubsubsection to H4") xmlSubsubsections = xmlEbookTree.findall(".//div4") for xmlSubsubsection in xmlSubsubsections: xmlSubsubsection.find("head").tag = "h4" @@ -525,15 +613,15 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): #strHeadline = xmlSubsection.find("h4").text #xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline -print("-----------------------------------------------------") -print("Convert EOAparagraph to H5") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAparagraph to H5") xmlParagraphs = xmlEbookTree.findall(".//div5") for xmlParagraph in xmlParagraphs: - print("Found a paragraph.") + logging.info("Found a paragraph.") xmlParagraph.find("head").tag = "h5" -print("-----------------------------------------------------") -print("Preparing Figures") +logging.info("-----------------------------------------------------") +logging.info("Preparing Figures") xmlFigures = xmlEbookTree.xpath(".//EOAfigure[not(@type='hionly')] | .//EOAlsfigure[not(@type='hionly')]") for xmlFigure in xmlFigures: # Copy File of the Image @@ -545,21 +633,42 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFileString) strImageFileNamewoSuffix, strImageFileName_Suffix = os.path.splitext(strImageFileName) - shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) + shutil.copy( + PUBLICATION_DIR / strImageFileString, + CONVERT_DIR / "epub/OEBPS/images" / (strImageFileDir + strImageFileName) + ) + # shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) if strImageFileName_Suffix.lower() == ".jpg": extension_and_mime = "jpg" elif strImageFileName_Suffix.lower() in [".png", ".pdf"]: extension_and_mime = "png" else: - print("Found an unrecognized image suffix: %s" % strImageFileName_Suffix) + logging.info("Found an unrecognized image suffix: %s" % strImageFileName_Suffix) sys.exit() - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) + strImageFilepath = libeoaconvert.sanitizeImage( + CONVERT_DIR / "epub/OEBPS/images" / (strImageFileDir + strImageFileName), + TEMP_DIR, + GM_PATH, + PDFCROP_EXEC, + ) + # strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) # Add copied file to contentopf + content_opf_filename = Path ("images") / "{}{}.{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime) + content_opf_fileid = "{}{}{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime) + + contentopf = addToContentopf( + contentopf, + content_opf_filename, + content_opf_fileid, + extension_and_mime + ) + ''' content_opf_filename = "images" + os.path.sep + "{}{}.{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime) content_opf_fileid = "{}{}{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime) contentopf = addToContentopf(contentopf, content_opf_filename, content_opf_fileid, extension_and_mime) + ''' idFigure = xmlFigure.find(".//anchor").get("id") intFigureNumber = dictFigures[idFigure] @@ -593,8 +702,8 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): for fig in xml_figures_hyperimage: fig.tag = "EOAhifigure" -print("-----------------------------------------------------") -print("Preparing not numbered Figures") +logging.info("-----------------------------------------------------") +logging.info("Preparing not numbered Figures") xmlFigures = xmlEbookTree.findall(".//EOAfigurenonumber") for xmlFigure in xmlFigures: # Copy File of the Image @@ -605,10 +714,26 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFileString) strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] - shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) + shutil.copy( + PUBLICATION_DIR / strImageFileString, + CONVERT_DIR / "epub/OEBPS/images" / (strImageFileDir + strImageFileName) + ) + strImageFilepath = libeoaconvert.sanitizeImage( + CONVERT_DIR / "epub/OEBPS/images" / (strImageFileDir + strImageFileName), + TEMP_DIR, + GM_PATH, + PDFCROP_EXEC + ) + # shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) + # strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) # Add copied file to contentopf - contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", strImageFileDir + strImageFileNamewoSuffix + "-nonumber-jpg", "jpg") + contentopf = addToContentopf( + contentopf, + "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", + strImageFileDir + strImageFileNamewoSuffix + "-nonumber-jpg", + "jpg" + ) + # contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", strImageFileDir + strImageFileNamewoSuffix + "-nonumber-jpg", "jpg") logging.debug("Added a nonumber figure") strImageWidth = xmlFigure.find(".//width").text strImageWidth = strImageWidth.rstrip("\n") @@ -620,8 +745,8 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): xmlFigureImage.set("style", "width: " + strImageWidth + "%") xmlFigure.append(xmlFigureImage) -print("-----------------------------------------------------") -print("Preparing Footnotes") +logging.info("-----------------------------------------------------") +logging.info("Preparing Footnotes") def alph_footnote_index(fndex): """ @@ -662,11 +787,23 @@ def replace_footnote_equations(footnote): equation.tag = "p" img = etree.Element("img", src="images/%s" % filename, alt="") equation.append(img) + shutil.copy( + PUBLICATION_DIR / "items" / filename, + CONVERT_DIR / "epub/DEBPS/images" / filename + ) + result = addToContentopf( + result, + "images/" + filename, + filename, + "png" + ) + ''' cwd = os.getcwd() shutil.copy("%s/items/%s" % (cwd, filename), "%s/CONVERT/epub/DEBPS/images/%s" % (cwd, filename)) result = addToContentopf(result, "images/" + filename, filename, "png") + ''' - print("einmal durch replace_footnote_equations") + logging.info("einmal durch replace_footnote_equations") return result # def replace_footnote_equations ends here @@ -769,7 +906,7 @@ class FootnoteError(Exception): for xmlChapter in xmlChapters: groupings = libeoaconvert.get_bigfoot_data(xmlChapter) xmlFootnotes = list(xmlChapter.findall(".//note")) - print("here come the footnotes. found", len(xmlFootnotes)) + logging.info("here come the footnotes. found" + str(len(xmlFootnotes))) has_old = 0 != len(xmlFootnotes) has_new = 0 != len( [ # flatten the association list whose values are lists, so we can take the length @@ -820,7 +957,11 @@ class FootnoteError(Exception): xmlEquationnonumber.tag = "p" xmlIMG = etree.Element("img", src="images/"+ strFilename, alt="") xmlEquationnonumber.append(xmlIMG) - shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) + shutil.copy( + PUBLICATION_DIR / "items" / strFilename, + CONVERT_DIR / "epub/OEBPS/images" / strFilename + ) + # shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png") tmp_fnstring = "fn" + str(intFootnoteNumber) @@ -865,8 +1006,8 @@ class FootnoteError(Exception): intTechnicalChapterNumber += 1 -print("-----------------------------------------------------") -print("Preparing Lists") +logging.info("-----------------------------------------------------") +logging.info("Preparing Lists") for xmlChapter in xmlChapters: xmlLists = xmlChapter.findall(".//list") for xmlList in xmlLists: @@ -885,8 +1026,8 @@ class FootnoteError(Exception): for xmlListItem in xmlListItems: xmlListItem.tag = "li" -print("-----------------------------------------------------") -print("Preparing Descriptions") +logging.info("-----------------------------------------------------") +logging.info("Preparing Descriptions") for xmlChapter in xmlChapters: xmlDescriptions = xmlChapter.findall(".//list") for xmlDescription in xmlDescriptions: @@ -900,8 +1041,8 @@ class FootnoteError(Exception): del xmlChild.attrib["id"] del xmlChild.attrib["id-text"] -print("-----------------------------------------------------") -print("Preparing Blockquotes") +logging.info("-----------------------------------------------------") +logging.info("Preparing Blockquotes") xmlParagraphs = xmlEbookTree.findall(".//p") for xmlParagraph in xmlParagraphs: if xmlParagraph.get("rend") == "quoted": @@ -920,8 +1061,8 @@ class FootnoteError(Exception): xmlNew.tail = strParagraphTail xmlParagraph.append(xmlNew) -print("-----------------------------------------------------") -print("Preparing Theorems") +logging.info("-----------------------------------------------------") +logging.info("Preparing Theorems") for xmlChapter in xmlChapters: xmlTheorems = xmlChapter.findall(".//theorem") for xmlTheorem in xmlTheorems: @@ -939,8 +1080,8 @@ class FootnoteError(Exception): del xmlTheorem.attrib["id"] etree.strip_tags(xmlTheorem, "p") -print("-----------------------------------------------------") -print("Preparing Hyperlinks") +logging.info("-----------------------------------------------------") +logging.info("Preparing Hyperlinks") for xmlChapter in xmlChapters: xmlHyperlinks = xmlChapter.findall(".//xref") for xmlHyperlink in xmlHyperlinks: @@ -954,8 +1095,8 @@ class FootnoteError(Exception): etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak']) xmlHyperlink.text = strURL -print("-----------------------------------------------------") -print("Convert emphasized text") +logging.info("-----------------------------------------------------") +logging.info("Convert emphasized text") for xmlChapter in xmlChapters: xmlItalics = xmlChapter.findall(".//hi") for xmlItalic in xmlItalics: @@ -963,8 +1104,8 @@ class FootnoteError(Exception): xmlItalic.tag = "em" del xmlItalic.attrib["rend"] -print("-----------------------------------------------------") -print("Convert bold text") +logging.info("-----------------------------------------------------") +logging.info("Convert bold text") for xmlChapter in xmlChapters: xmlBolds = xmlChapter.findall(".//hi") for xmlBold in xmlBolds: @@ -972,38 +1113,38 @@ class FootnoteError(Exception): xmlBold.tag = "b" del xmlBold.attrib["rend"] -print("-----------------------------------------------------") -print("Convert EOAup to ") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAup to ") for xmlChapter in xmlChapters: xmlUps = xmlChapter.findall(".//EOAup") for xmlUp in xmlUps: xmlUp.tag = "sup" -print("-----------------------------------------------------") -print("Convert EOAdown to ") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAdown to ") for xmlChapter in xmlChapters: xmlDowns = xmlChapter.findall(".//EOAdown") for xmlDown in xmlDowns: xmlDown.tag = "sub" -print("-----------------------------------------------------") -print("Convert EOAst to ") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAst to ") for xmlChapter in xmlChapters: xmlStrikeouts = xmlChapter.findall(".//EOAst") for xmlStrikeout in xmlStrikeouts: xmlStrikeout.tag = "span" xmlStrikeout.set("style", "text-decoration: line-through;") -print("-----------------------------------------------------") -print("Convert EOAls to something nice") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAls to something nice") for xmlChapter in xmlChapters: xmlLetterspaceds = xmlChapter.findall(".//EOAls") for xmlLetterspaced in xmlLetterspaceds: xmlLetterspaced.tag = "span" xmlLetterspaced.set("style", "letter-spacing: 0.5em;") -print("-----------------------------------------------------") -print("Convert EOAcaps to something nice") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAcaps to something nice") for xmlChapter in xmlChapters: xmlLetterspaceds = xmlChapter.findall(".//EOAcaps") for xmlLetterspaced in xmlLetterspaceds: @@ -1011,32 +1152,40 @@ class FootnoteError(Exception): xmlLetterspaced.set("style", "font-variant:small-caps;") -print("-----------------------------------------------------") -print("Convert EOAineq into appropriate IMG-Tags") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAineq into appropriate IMG-Tags") for xmlChapter in xmlChapters: xmlInlineEquations = xmlChapter.findall(".//EOAineq") for xmlInlineEquation in xmlInlineEquations: xmlInlineEquation.tag = "img" xmlInlineEquation.set("alt", xmlInlineEquation.get("TeX")) del xmlInlineEquation.attrib["TeX"] - shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/epub/OEBPS/images/" + xmlInlineEquation.get("src")) + shutil.copy( + PUBLICATION_DIR / "items" / xmlInlineEquation.get("src"), + CONVERT_DIR / "epub/OEBPS/images" / xmlInlineEquation.get("src") + ) + # shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/epub/OEBPS/images/" + xmlInlineEquation.get("src")) xmlInlineEquation.set("src", "images/" + xmlInlineEquation.get("src")) contentopf = addToContentopf(contentopf, xmlInlineEquation.get("src"), xmlInlineEquation.get("src"), "png") -print("-----------------------------------------------------") -print("Convert EOAchem into appropriate IMG-Tags") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAchem into appropriate IMG-Tags") for xmlChapter in xmlChapters: xml_inline_chems = xmlChapter.findall(".//EOAchem") for xml_inline_chem in xml_inline_chems: xml_inline_chem.tag = "img" xml_inline_chem.set("alt", xml_inline_chem.get("TeX")) del xml_inline_chem.attrib["TeX"] - shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/epub/OEBPS/images/" + xml_inline_chem.get("src")) + shutil.copy( + PUBLICATION_DIR / "items" / xml_inline_chem.get("src"), + CONVERT_DIR / "epub/OEBPS/images" / xml_inline_chem.get("src") + ) + # shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/epub/OEBPS/images/" + xml_inline_chem.get("src")) xml_inline_chem.set("src", "images/" + xml_inline_chem.get("src")) contentopf = addToContentopf(contentopf, xml_inline_chem.get("src"), xml_inline_chem.get("src"), "png") -print("-----------------------------------------------------") -print("Convert EOAinline into appropriate IMG-Tags") +logging.info("-----------------------------------------------------") +logging.info("Convert EOAinline into appropriate IMG-Tags") for xmlChapter in xmlChapters: xmlInlineElements = xmlChapter.findall(".//EOAinline") for xmlInlineElement in xmlInlineElements: @@ -1050,11 +1199,16 @@ class FootnoteError(Exception): strInlineElementFileName = os.path.basename(strInlineElementFilePath) strInlineElementDirName = os.path.dirname(strInlineElementFilePath) - strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName + strNewImagePath = CONVERT_DIR / "epub/OEBPS/images" / (strInlineElementDirName + strInlineElementFileName) + # strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName # trouble when there are subdirectories in Image path! # some thing goes wrong here: Images/png_300dpi/A.png - shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, strNewImagePath) + shutil.copy( + PUBLICATION_DIR / strInlineElementDirName / strInlineElementFileName, + strNewImagePath + ) + # shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, strNewImagePath) # strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath listArguments = shlex.split(strCommand) @@ -1068,11 +1222,11 @@ class FootnoteError(Exception): contentopf = addToContentopf(contentopf, "images/" + strInlineElementDirName + strInlineElementFileName, xmlInlineElement.text, extension) xmlInlineElement.text = "" -print("-----------------------------------------------------") -print("Preparing Verses") +logging.info("-----------------------------------------------------") +logging.info("Preparing Verses") for xmlChapter in xmlChapters: xml_verses = xmlChapter.findall(".//EOAverse") - print(len(xml_verses)) + logging.info(len(xml_verses)) for xml_verse in xml_verses: xml_verse_children = xml_verse.getchildren() for line in xml_verse_children[:-1]: @@ -1082,15 +1236,19 @@ class FootnoteError(Exception): xml_verse.tag = "p" xml_verse.set("class", "verse") -print("-----------------------------------------------------") -print("Preparing Equations") +logging.info("-----------------------------------------------------") +logging.info("Preparing Equations") for xmlChapter in xmlChapters: xmlEquations = xmlChapter.findall(".//EOAequation") for xmlEquation in xmlEquations: strNumber = xmlEquation.get("number") strFilename = xmlEquation.get("filename") # Copy image of Equation - shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) + shutil.copy( + PUBLICATION_DIR / "items" / strFilename, + CONVERT_DIR / "epub/OEBPS/images" / strFilename + ) + # shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png") # Find out Number of Equation to be appended in the last step strEquationNumber = xmlEquation.get("number") @@ -1112,7 +1270,11 @@ class FootnoteError(Exception): for xmlEquation in xmlEquations: strFilename = xmlEquation.get("filename") # Copy image of Equation - shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) + shutil.copy( + PUBLICATION_DIR / "items" / strFilename, + CONVERT_DIR / "epub/OEBPS/images" / strFilename + ) + # shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png") # Rework XML xmlEquation.clear() @@ -1132,8 +1294,8 @@ class FootnoteError(Exception): xmlEquationarray.tag = "div" -print("-----------------------------------------------------") -print("Preparing Letterheads") +logging.info("-----------------------------------------------------") +logging.info("Preparing Letterheads") for xmlChapter in xmlChapters: xmlLetterheads = xmlChapter.xpath(".//EOAletterhead") for xmlLetterhead in xmlLetterheads: @@ -1154,20 +1316,20 @@ class FootnoteError(Exception): xmlLetterhead.insert(0, xmlHR) xmlLetterhead.insert(5, xmlHR2) -print("-----------------------------------------------------") -print("Preparing Transcriptions") +logging.info("-----------------------------------------------------") +logging.info("Preparing Transcriptions") # TODO: May need rework concerning the right Column for xmlChapter in xmlChapters: etree.strip_elements(xmlChapter, "Facsimilelink") xmlTranscriptions = xmlChapter.xpath(".//EOAtranscripted") for xmlTranscription in xmlTranscriptions: - print("Processing Transcription") - #print (etree.tostring(xmlTranscription)) + logging.info("Processing Transcription") + #logging.info (etree.tostring(xmlTranscription)) xmlTranscription.tag = "table" xmlHeader = xmlTranscription.find(".//EOAtranscriptedheader") xmlHeader.tag = "tr" xmlLeftHeader = xmlTranscription.find(".//Leftheader") - # print(xmlLeftHeader.text) + # logging.info(xmlLeftHeader.text) xmlLeftHeader.tag = "td" xmlLeftHeader.set("style", "width: 50%") xmlRightHeader = xmlTranscription.find(".//Rightheader") @@ -1186,7 +1348,7 @@ class FootnoteError(Exception): for xmlElement in xmlTemp.iterchildren(): if xmlElement.tag == "pagebreak": boolRightColumn = True - print("Spaltenwechsel!") + logging.info("Spaltenwechsel!") continue if boolRightColumn == False: xmlLeftColumn.append(xmlElement) @@ -1200,15 +1362,15 @@ class FootnoteError(Exception): # Remove -print("-----------------------------------------------------") -print("Preparing Tables") +logging.info("-----------------------------------------------------") +logging.info("Preparing Tables") intChapterNumber = 1 for xmlChapter in xmlChapters: xmlTables = xmlChapter.findall(".//EOAtable") for xmlTable in xmlTables: xmlRawTable = xmlTable.find(".//table") strTableCaption = xmlTable.find(".//EOAtablecaption").text or "" - # print("Working on ", strTableCaption) + # logging.info("Working on ", strTableCaption) if strTableCaption != "nonumber": intTableNumber = dictTables[xmlTable.find(".//EOAtablelabel").text] xmlTableCaption = etree.Element("p") @@ -1218,7 +1380,7 @@ class FootnoteError(Exception): xmlTableCaption.append(xmlChild) xmlRawTable.addnext(xmlTableCaption) else: - print("Table has no caption") + logging.info("Table has no caption") xmlTable.find(".//EOAtablecaption").clear() xmlTable.remove(xmlTable.find(".//EOAtablecaption")) xmlTable.find(".//EOAtablelabel").clear() @@ -1268,7 +1430,7 @@ class FootnoteError(Exception): for xmlTableRow in xmlTableRows: xmlTableCells = xmlTableRow.findall(".//cell") intCurrentColumn = 1 - print(listColumnAlignments) + logging.info(listColumnAlignments) for xmlTableCell in xmlTableCells: xmlTableCell.tag = "td" xmlTableCell.set("align",listColumnAlignments[intCurrentColumn]) @@ -1293,11 +1455,11 @@ class FootnoteError(Exception): # libeoaconvert.deb_var(xtp) xmlTableParent.getparent().remove(xmlTableParent) - print("Finished with that table.") + logging.info("Finished with that table.") intChapterNumber += 1 -print("-----------------------------------------------------") -print("Preparing Facsimiles") +logging.info("-----------------------------------------------------") +logging.info("Preparing Facsimiles") xmlParts = xmlEbookTree.findall(".//div0") for xmlPart in xmlParts: xmlFacsimiles = xmlPart.findall(".//EOAfacsimilepage") @@ -1313,9 +1475,19 @@ class FootnoteError(Exception): strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFile) # hier - shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) - - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) + shutil.copy( + PUBLICATION_DIR / strImageFile, + CONVERT_DIR / "epub/OEBPS/images" / (strImageFileDir + strImageFileName) + ) + # shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) + + strImageFilepath = libeoaconvert.sanitizeImage( + CONVERT_DIR / "epub/OEBPS/images" / (strImageFileDir + strImageFileName), + TEMP_DIR, + GM_PATH, + PDFCROP_EXEC + ) + # strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) # Add copied file to contentopf img_base_file_name, img_file_extension = os.path.splitext(strImageFileName) @@ -1339,8 +1511,8 @@ class FootnoteError(Exception): facsimile_image_element.set("alt", "Facsimile page " + facsimile_pagenumber) xmlFacsimile.getparent().replace(xmlFacsimile, facsimile_image_element) -print("-----------------------------------------------------") -print("Preparing Cross-References") +logging.info("-----------------------------------------------------") +logging.info("Preparing Cross-References") for xmlChapter in xmlChapters: xmlReferences = xmlChapter.findall(".//EOAref") @@ -1354,7 +1526,7 @@ class FootnoteError(Exception): # corresponding_eoa_id_element = xmlTree.xpath("//*[@xml:id='{}']".format(label_text)) # if len(corresponding_eoa_id_element) > 1: - # print("The xml:id %s has been assigned more than once. This is not allowed. Exiting." % corresponding_eoa_id_element) + # logging.info("The xml:id %s has been assigned more than once. This is not allowed. Exiting." % corresponding_eoa_id_element) # sys.exit() # else: # eoa_id_element = corresponding_eoa_id_element[0] @@ -1363,7 +1535,7 @@ class FootnoteError(Exception): # end of the new stuff - print("XXXXXXXX") + logging.info("XXXXXXXX") strResult = "!!! Cross Reference !!!" xmlReferenceLabel = xmlReference.find("Label") @@ -1373,35 +1545,35 @@ class FootnoteError(Exception): xmlReferenceRefTarget = xmlReferenceRef.get("target") if xmlReferenceLabelText in dictEquations: - print("Verweis auf Array gefunden:" + xmlReferenceLabelText) + logging.info("Verweis auf Array gefunden:" + xmlReferenceLabelText) strResult = dictEquations[xmlReferenceLabelText] if xmlReferenceRefTarget in dictEquations: - print("Verweis auf Equation gefunden:" + xmlReferenceRefTarget) + logging.info("Verweis auf Equation gefunden:" + xmlReferenceRefTarget) strResult = dictEquations[xmlReferenceRefTarget] if xmlReferenceRefTarget in dictLists: - print("Verweis auf Liste gefunden") + logging.info("Verweis auf Liste gefunden") strResult = dictLists[xmlReferenceRefTarget] if xmlReferenceRefTarget in dictChapters: - print("Verweis auf Kapitel gefunden") + logging.info("Verweis auf Kapitel gefunden") strResult = dictChapters[xmlReferenceRefTarget] if xmlReferenceRefTarget in dictSections: - print("Verweis auf Section gefunden") + logging.info("Verweis auf Section gefunden") strResult = dictSections[xmlReferenceRefTarget] if xmlReferenceRefTarget in dictFigures: - print("Verweis auf Abbildung gefunden") + logging.info("Verweis auf Abbildung gefunden") strResult = dictFigures[xmlReferenceRefTarget] if xmlReferenceRefTarget in dictFootnotes: - print("Verweis auf Fussnote gefunden") + logging.info("Verweis auf Fussnote gefunden") strResult = dictFootnotes[xmlReferenceRefTarget] if xmlReferenceRefTarget in dictTheorems: - print("Verweis auf Theorem gefunden") + logging.info("Verweis auf Theorem gefunden") strResult = dictTheorems[xmlReferenceRefTarget] if xmlReferenceLabelText in dictTables: - print("Verweis auf Tabelle gefunden") + logging.info("Verweis auf Tabelle gefunden") strResult = dictTables[xmlReferenceLabelText] tmpTail = xmlReference.tail or "" #tmpTail = tmpTail.strip() - print("XXXXXXXX") + logging.info("XXXXXXXX") xmlReference.clear() xmlReference.text = strResult xmlReference.tail = tmpTail @@ -1413,11 +1585,11 @@ class FootnoteError(Exception): strResult = "!!! Page Reference !!!" xmlReferenceLabel = xmlReference.find("Label") xmlReferenceLabelText = xmlReferenceLabel.text - print(xmlReferenceLabelText) + logging.info(xmlReferenceLabelText) xmlReferenceRef = xmlReference.find("ref") xmlReferenceRefTarget = xmlReferenceRef.get("target") if xmlReferenceLabelText in dictPagelabels: - print("Verweis auf Seite gefunden: " + xmlReferenceLabelText) + logging.info("Verweis auf Seite gefunden: " + xmlReferenceLabelText) strResult = dictPagelabels[xmlReferenceLabelText] tmpTail = xmlReference.tail or "" xmlReference.clear() @@ -1437,8 +1609,8 @@ class FootnoteError(Exception): # Finish ePub Conversion, save File # ############################################################## -print("-----------------------------------------------------") -print("Cleaning up XML") +logging.info("-----------------------------------------------------") +logging.info("Cleaning up XML") xmlIndexentries = xmlEbookTree.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") for xmlIndexentry in xmlIndexentries: tmpTail = xmlIndexentry.tail or "" @@ -1454,7 +1626,7 @@ class FootnoteError(Exception): intTechnicalChapterNumber = 1 for xmlChapter in xmlChapters: # Load xmlHTMLTemplate - htmlChapter = etree.parse(EPUB_FILES + "epubchapter.xml", xmlChapterParser) + htmlChapter = etree.parse( str(EPUB_FILES / "epubchapter.xml"), xmlChapterParser) # Find out, if it's inside a part. If Part has not been worked on, then do it xmlChapterParent = xmlChapter.getparent() if xmlChapterParent.tag == "div0" and xmlChapterParent.get("id") not in listParts: @@ -1465,7 +1637,8 @@ class FootnoteError(Exception): xmlNew.text = strPartTitle htmlChapter.find(".//" + htmlns + "body").append(xmlNew) # Save Part - tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" + tmpFileName = CONVERT_DIR / ("epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml") + # tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" tmpFile = open (tmpFileName, "w") tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode") tmpFile.write(tmpResult) @@ -1475,14 +1648,15 @@ class FootnoteError(Exception): contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml") intTechnicalChapterNumber += 1 # Reset htmlChapter - htmlChapter = etree.parse(EPUB_FILES + "epubchapter.xml", xmlChapterParser) + htmlChapter = etree.parse(str(EPUB_FILES / "epubchapter.xml"), xmlChapterParser) # Aus div1 alle kinder auslesen und an htmlChapter dran hängen xmlChildren = xmlChapter.getchildren() for xmlChild in xmlChildren: # Using Deepcopy, coz a simple append will delete the original htmlChapter.find(".//" + htmlns + "body").append(deepcopy(xmlChild)) # Save Chapter - tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" + tmpFileName = CONVERT_DIR / ("epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml") + # tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" tmpFile = open (tmpFileName, "w") tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode") tmpFile.write(tmpResult) @@ -1496,9 +1670,9 @@ class FootnoteError(Exception): # Convert Facsimile-Parts xmlParts = xmlEbookTree.findall("//div0") for xmlPart in xmlParts: - print("-------------") - print("Working on Facsimile-Part") - print("-------------") + logging.info("-------------") + logging.info("Working on Facsimile-Part") + logging.info("-------------") # check if it has a child element EOAfacsimilepart if bool(xmlPart.findall(".//EOAfacsimilepart")): htmlChapter = etree.parse(EPUB_FILES + "epubchapter.xml", xmlChapterParser) @@ -1512,13 +1686,15 @@ class FootnoteError(Exception): # Using Deepcopy, coz a simple append will delete the original htmlChapter.find(".//" + htmlns + "body").append(deepcopy(xmlChild)) # Save Chapter - tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" + tmpFileName = CONVERT / ("epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml") + # tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" tmpFile = open (tmpFileName, "w") tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode") tmpFile.write(tmpResult) tmpFile.close() # Save Chapter - tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" + tmpFileName = CONVERT_DIR / ("epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml") + # tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" tmpFile = open (tmpFileName, "w") tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode") tmpFile.write(tmpResult) @@ -1530,14 +1706,16 @@ class FootnoteError(Exception): intTechnicalChapterNumber += 1 # Saving toc.ncx -tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/toc.ncx" +tmpFileName = CONVERT_DIR / "epub/OEBPS/toc.ncx" +# tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/toc.ncx" tmpFile = open (tmpFileName, "w") tmpResult = etree.tostring(tocncx, pretty_print=True, encoding="unicode") tmpFile.write(tmpResult) tmpFile.close() # Saving content.opf -tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/content.opf" +tmpFileName = CONVERT_DIR / "epub/OEBPS/content.opf" +# tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/content.opf" tmpFile = open (tmpFileName, "w") tmpResult = etree.tostring(contentopf, pretty_print=True, encoding="unicode") tmpFile.write(tmpResult) @@ -1548,7 +1726,7 @@ class FootnoteError(Exception): ############################################################################ # Write Temporary XML-Tree -ergebnisdatei = open("tmp_files/Devel_ebook.xml", "w") +ergebnisdatei = open(TEMP_DIR / "Devel_ebook.xml", "w") ergebnis = etree.tostring(xmlEbookTree, pretty_print=True, encoding="unicode") ergebnisdatei.write(ergebnis) ergebnisdatei.close() diff --git a/utils/libeoaconvert.py b/utils/libeoaconvert.py index a5b0f48..6c5c117 100644 --- a/utils/libeoaconvert.py +++ b/utils/libeoaconvert.py @@ -23,6 +23,20 @@ ######################### allowed_bibentry_types = ["book", "booklet", "report", "thesis", "misc", "incollection", "inproceedings", "article", "newspaper"] +def enable_preamble( + input_file, + output_file, + pdf_or_xml +): + with open( input_file, "r" ) as i: + with open( output_file, "w" ) as o: + if( pdf_or_xml == "pdf" ): + o.write( "\input{preambel/pre_eoa}\n" ) + else: + o.write( "\input{preambel/pre_xml}\n" ) + o.write( i.read() ) + + def get_bigfoot_data(chapter): """ footnotes are per-chapter @@ -49,36 +63,37 @@ def get_bigfoot_data(chapter): def sanitizeImage( strImagepath, + tmp_dir, GM_PATH, PDFCROP_EXEC, # TL_PATH ): """Adjust and convert image for epub standard""" - if not os.path.exists("tmp_files/tmp_images/"): - os.makedirs(os.path.expanduser("tmp_files/tmp_images/")) + if not os.path.exists(Path(tmp_dir) / "tmp_images/"): + os.makedirs(os.path.expanduser(Path(tmp_dir) / "tmp_images/")) - tmp_image_dir = os.getcwd() + "/tmp_files/tmp_images/" - xelatex_sanitizeimage_logfile = open('tmp_files/xelatex-run-images.log', 'w') + tmp_image_dir = Path(tmp_dir) / "tmp_images/" + xelatex_sanitizeimage_logfile = open( Path(tmp_dir) / 'xelatex-run-images.log', 'w') logging.debug(strImagepath) - strCommand = GM_PATH + " identify -format \"%w\" " + strImagepath + strCommand = GM_PATH + " identify -format \"%w\" " + str(strImagepath) listArguments = shlex.split(strCommand) exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) intImageWidth = int(exeShell) if intImageWidth > 700: - strCommand = GM_PATH + " convert " + strImagepath + " -resize 700x\\> " + strImagepath + strCommand = GM_PATH + " convert " + str(strImagepath) + " -resize 700x\\> " + str(strImagepath) listArguments = shlex.split(strCommand) subprocess.check_output(listArguments, shell=False) - strCommand = GM_PATH + " identify -format \"%h\" " + strImagepath + strCommand = GM_PATH + " identify -format \"%h\" " + str(strImagepath) listArguments = shlex.split(strCommand) exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) intImageHeight = int(exeShell) if intImageHeight > 1000: - strCommand = GM_PATH + " convert " + strImagepath + " -resize x1000\\> " + strImagepath + strCommand = GM_PATH + " convert " + str(strImagepath) + " -resize x1000\\> " + str(strImagepath) listArguments = shlex.split(strCommand) subprocess.check_output(listArguments, shell=False) - strCommand = GM_PATH + " identify -format \"%m\" " + strImagepath + strCommand = GM_PATH + " identify -format \"%m\" " + str(strImagepath) listArguments = shlex.split(strCommand) exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) strFileFormat = str(exeShell) @@ -102,10 +117,10 @@ def sanitizeImage( # os.remove(strImagepath) # strImagepath = strNewImagepath + ".png" elif strFileFormat == "PDF": - strNewImagepath = os.path.splitext(strImagepath)[0] + strNewImagepath = os.path.splitext(str(strImagepath))[0] clipped_file = strImagepath.replace(".pdf", "-clipped.pdf") - Kommando = PDFCROP_EXEC + " --margins 10 --clip --hires " + strImagepath + " " + clipped_file + Kommando = PDFCROP_EXEC + " --margins 10 --clip --hires " + str(strImagepath) + " " + clipped_file logging.debug(Kommando) Argumente = shlex.split(Kommando) @@ -177,11 +192,17 @@ def plural(num, noun, plural=None): # def plural ends here -def format_citations_tex4ht(used_citekeys, bibdata, language, tmp_filename): +def format_citations_tex4ht( + used_citekeys, + bibdata, + language, + tmp_filename, + tmp_dir +): """Return a formatted xmlstring of the used citations""" - tmp_path_md = "tmp_files" + os.path.sep + tmp_filename + ".tex" - tmp_path_html = "tmp_files" + os.path.sep + tmp_filename + ".html" + tmp_path_md = Path(tmp_dir) / (tmp_filename + ".tex") + tmp_path_html = Path(tmp_dir) / (tmp_filename + ".html") return references # def format_citations_tex4ht ends here @@ -195,8 +216,12 @@ def format_citations( ): """Return a formatted xmlstring of the used citations""" + tmp_path_md = tmp_filename.with_suffix(".md") + tmp_path_html = tmp_filename.with_suffix(".html") + ''' tmp_path_md = "tmp_files" + os.path.sep + tmp_filename + ".md" tmp_path_html = "tmp_files" + os.path.sep + tmp_filename + ".html" + ''' md_file_header = "---\nlang: %s\ntitle: Citations\n...\n\n" % two_letter_language(language) @@ -242,20 +267,27 @@ def fix_bib_entries(div_snippet): return div_snippet # def fix_bib_entries ends here -def debug_xml_here(xml_tree, xml_filename): +def debug_xml_here( + xml_tree, + xml_filename, + output_dir +): """Dump current state of an XML tree into a file for inspection""" + ''' if not os.path.exists("debug"): os.makedirs(os.path.expanduser("debug")) xml_path = "%s/debug/debug_%s.xml" % (os.getcwd(), xml_filename) + ''' + xml_path = (Path(output_dir) / xml_filename).with_suffix( ".xml") if isinstance(xml_tree, etree._ElementTree): pass else: xml_tree = etree.ElementTree(xml_tree) - xml_tree.write(xml_path, pretty_print=True, xml_declaration=True,encoding="utf-8") + xml_tree.write( str(xml_path), pretty_print=True, xml_declaration=True,encoding="utf-8") logging.info("Wrote %s." % xml_path) # def debug_xml_here ends here diff --git a/utils/load_config.py b/utils/load_config.py index 13c5877..f1d3ebd 100644 --- a/utils/load_config.py +++ b/utils/load_config.py @@ -4,8 +4,76 @@ import configparser import logging +import subprocess +from subprocess import PIPE, STDOUT from pathlib import Path import os +import shlex +import time +import shutil + +import tempfile + +def copy_file_overwrite( src, dst ): + if os.path.exists( dst ): + shutil.rmtree( dst ) + shutil.copy( src, dst) + +def copy_dir_overwrite( src, dst ): + if os.path.exists( dst ): + shutil.rmtree( dst ) + shutil.copytree( src, dst) + +def exec_command( + command, + error_msg = "ERROR while running {command}", + wd = None, + log_level = "INFO", + ignore_fail = False + ): + logging.log( + getattr(logging,log_level), + f"executing '{command}'", + ) + + arguments = shlex.split(command) + process = subprocess.Popen( + arguments, + cwd = wd, + stdout=PIPE, + stderr=PIPE + ) + + def check_io(): + while True: + output = process.stdout.readline() + if output: + logging.debug( output.decode().strip() ) + else: + break + + # keep checking stdout/stderr until the child exits + while process.poll() is None: + check_io() + + ret = process.wait() # 0 means success + if (not ignore_fail) and ret != 0: + raise( Exception( error_msg.format( command=command ) ) ) + +def check_executable( executable_name ): + logging.info(f"checking executable {executable_name}...") + exec_command( + f"which {executable_name}", + f"PROGRAM not found: {executable_name}", + log_level = "NOTSET" + ) + logging.info("ok :-)") + +def check_file( filename ): + logging.info(f"checking '{filename}'...") + if not os.path.exists( filename ): + raise( Exception( f"'{filename}' is missing!" ) ) + logging.info("ok :-)") ################################## # Reading the configuration file # @@ -13,21 +81,42 @@ def load_config( cfg_file, - LOGFILE, - LOGLEVEL + log_level, # log level in the terminal + log_file, + log_level_file = logging.DEBUG ): - BASE_DIR = Path( os.path.realpath(__file__) ).parent.parent CONFIG = configparser.ConfigParser() CONFIG.read( cfg_file ) ###################### # Setting up logging # ###################### - # LOGFILE = CONFIG['General']['logfile'] - # LOGLEVEL = CONFIG['General']['loglevel'] - logging.basicConfig(level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s') + log_dir = Path(log_file).absolute().parent + if not (log_dir.exists() and log_dir.is_dir()): + os.makedirs( log_dir ) + time.sleep( 1 ) + + # always log to file: + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(levelname)s - %(message)s', + filename = log_file, + filemode = "w" + ) + + rootLogger = logging.getLogger() + + # set up logging to terminal: + terminal_formatter = \ + logging.Formatter( + "%(levelname)s - %(message)s" + ) + consoleHandler = logging.StreamHandler() + consoleHandler.setFormatter(terminal_formatter) + consoleHandler.setLevel( log_level ) + rootLogger.addHandler(consoleHandler) # Setup of various dictionaries for localization of various elements # dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"}