diff --git a/config/eoaconvert.cfg b/config/eoaconvert.cfg index c2353da..0793ffa 100644 --- a/config/eoaconvert.cfg +++ b/config/eoaconvert.cfg @@ -1,20 +1,22 @@ [General] # logging -logfile: ./eoaconvert.log +# logfile: ./eoaconvert.log # choose between DEBUG, INFO, WARNING, ERROR, CRITICAL -loglevel: DEBUG +# loglevel: DEBUG [Executables] -graphicsmagic: /usr/local/bin/gm -texlive: /usr/local/texlive/2017/ -texbin: /Library/TeX/texbin/ -TRALICS_PATH_EXEC: /Users/kthoden/bin/tralics-2.15.4 +# graphicsmagic: gm +# texlive: /usr/local/texlive/2017/ +# texbin: /Library/TeX/texbin/ +# TRALICS_PATH_EXEC: /Users/kthoden/bin/tralics-2.15.4 [Auxiliaries] -TRALICS_PATH_LIB: /Users/kthoden/EOAKram/dev/EOASkripts/config/ -TEMPLATE_PATH: /Users/kthoden/EOAKram/dev/EOASkripts/data/aux/ -SUPPORT_PATH: /Users/kthoden/EOAKram/dev/EOASkripts/data/dtd_files/ -CSL_FILE: /Users/kthoden/EOAKram/dev/eoa-csl/chicago-author-date-custom.csl -TRANSLATIONS: /Users/kthoden/EOAKram/dev/EOASkripts/data/aux/translations.xml -TEI_GUIDELINES: /Users/kthoden/EOAKram/dev/thirdparty/TEI -TEI_STYLESHEETS: /Users/kthoden/EOAKram/dev/thirdparty/Stylesheets \ No newline at end of file +# relative to repo: +TRALICS_PATH_LIB: config/ +TEMPLATE_PATH: data/aux/ +SUPPORT_PATH: data/dtd_files/ +TRANSLATIONS: data/aux/translations.xml +CSL_FILE: bibformat/chicago-author-date-custom.csl + +# TEI_GUIDELINES: tei/TEI +# TEI_STYLESHEETS: tei/Stylesheets diff --git a/eoatex2imxml.py b/eoatex2imxml.py index ce61873..a247d37 100755 --- a/eoatex2imxml.py +++ b/eoatex2imxml.py @@ -19,12 +19,14 @@ # as described in http://infohost.nmt.edu/tcc/help/pubs/pylxml/web/soupparser.html # from lxml.html import soupparser +from utils.libeoabibitem import Bibitem +import utils.libeoaconvert as libeoaconvert +from utils.load_config import load_config + # imports import argparse from lxml import etree -from libeoabibitem import Bibitem from bs4 import BeautifulSoup -import libeoaconvert import glob import os import re @@ -35,34 +37,58 @@ import sys import shutil import time -import configparser import logging import pickle +from pathlib import Path -# current_directory = os.path.dirname(sys.argv[0]) -# is this better? -current_directory = os.path.realpath(__file__) +BASE_DIR = Path( os.path.realpath(__file__) ).parent -print("The script is run from {}".format(current_directory)) ############################################################### # Preperation of certain files and some checks in advance ############################################################### # Options for the command line: filename / configfile parser = argparse.ArgumentParser() -parser.add_argument("-c", "--config", help="Name of config file.") -group = parser.add_mutually_exclusive_group() -group.add_argument("-f", "--filename", help="Name of main EOATeX file.") -group.add_argument("-t", "--trash", help="Remove temporary files.") +parser.add_argument( + "-c", "--config", + default = BASE_DIR / "config" / "eoaconvert.cfg", + help="Name of config file" +) +parser.add_argument( + "-l", "--log-file", + default = "eoaconvert.log", + help="logfile" +) +parser.add_argument( + "--log-level", + default = "DEBUG", + help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" +) +parser.add_argument( + "--tei-guidelines", + default = "tei/TEI", + help="path to the https://github.com/TEIC/TEI" +) +parser.add_argument( + "--tei-stylesheets", + default = "tei/Stylesheets", + help="path to the https://github.com/TEIC/Stylesheets" +) +parser.add_argument( + "-f", "--filename", + required = True, + help="Name of main EOATeX file (without suffix!)." +) +parser.add_argument( + "-t", "--trash", + help="Remove temporary files." +) args = parser.parse_args() -if args.config is not None: - CONFIG_FILE = os.path.abspath(args.config) -else: - CONFIG_FILE = os.path.dirname(sys.argv[0]) + os.path.sep.join(["", "config", "eoaconvert.cfg"]) +config_file = args.config -logging.debug("The configfile is %s." % CONFIG_FILE) +logging.debug("The configfile is %s." % config_file) # current biber is not compatible with this code # switch TeX distribution to TeXLive2016, @@ -77,37 +103,44 @@ ################################## # Reading the configuration file # ################################## -CONFIG = configparser.ConfigParser() -CONFIG.read(CONFIG_FILE) -###################### -# Setting up logging # -###################### -LOGFILE = CONFIG['General']['logfile'] -LOGLEVEL = CONFIG['General']['loglevel'] - -# numeric_level = getattr(logging, LOGLEVEL.upper(), None) -# if not isinstance(numeric_level, int): -# raise ValueError('Invalid log level: %s' % loglevel) -logging.basicConfig(filename=LOGFILE, level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s') +CONFIG = load_config( + config_file, + args.log_file, + args.log_level, +) ######################## # Paths to executables # ######################## -GM_PATH = CONFIG['Executables']['graphicsmagic'] -TL_PATH = CONFIG['Executables']['texlive'] -TEXBIN_PATH = CONFIG['Executables']['texbin'] -TRALICS_PATH_EXEC = CONFIG['Executables']['tralics_path_exec'] +GM_PATH = "gm" +TRALICS_PATH_EXEC = "tralics" +# (part of texlive distribution): +PDFCROP_EXEC = "pdfcrop" + +# TL_PATH = CONFIG['Executables']['texlive'] +# TEXBIN_PATH = CONFIG['Executables']['texbin'] ############################ # Paths to auxiliary files # ############################ -TRALICS_PATH_LIB = CONFIG['Auxiliaries']['TRALICS_PATH_LIB'] -TEMPLATE_PATH = CONFIG['Auxiliaries']['template_path'] -SUPPORT_PATH = CONFIG['Auxiliaries']['support_path'] -# AUX_TeX_FILES_PATH = CONFIG['Auxiliaries']['aux_tex_files_path'] +TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB'] +TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path'] +SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path'] -# interimResult = "" + +# sanity check: +print("PATH: {}".format( os.environ['PATH'] )) +Kommando = f"which {GM_PATH}" +Argumente = shlex.split(Kommando) +ret = subprocess.run(Argumente).returncode +if ret != 0: + raise( Exception( f"PROGRAM not found: {GM_PATH}" ) ) +Kommando = f"which {TRALICS_PATH_EXEC}" +Argumente = shlex.split(Kommando) +ret = subprocess.run(Argumente).returncode +if ret != 0: + raise( Exception( f"PROGRAM not found: {TRALICS_PATH_EXEC}" ) ) ######################################## # Certain functions for specific tasks # @@ -142,7 +175,7 @@ def TeX2PNG(LaTeXCode, Type, Chapter, Number): LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode) # Open plain LaTeX-Template - tmp = open(TEMPLATE_PATH + "formula.tex", "r") + tmp = open(TEMPLATE_PATH / "formula.tex", "r") Template = tmp.read() tmp.close() # Get tmp-directory for this user account @@ -159,7 +192,7 @@ def TeX2PNG(LaTeXCode, Type, Chapter, Number): tmp = open(tmpFile, "w") tmp.write(e) tmp.close() - Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile + Kommando = "xelatex --halt-on-error " + tmpFile Argumente = shlex.split(Kommando) # Redirecting stderr to save XeLaTeX-Output Datei = open('tmp_files/xelatex-run.log', 'w') @@ -168,7 +201,7 @@ def TeX2PNG(LaTeXCode, Type, Chapter, Number): print("Successfully converted formula " + Type + str(Chapter) + "_" + str(Number)) if Ergebnis == 1: print("[ERROR]: Failed to convert formula " + Type + str(Chapter) + "_" + str(Number)) - Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf" + Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png" @@ -186,11 +219,11 @@ def make_bibchecker(bib_database, set_citations): which is then also converted in the various formats. """ - tmp_latex = open(TEMPLATE_PATH + "largebib.tex", "r") + tmp_latex = open(TEMPLATE_PATH / "largebib.tex", "r") largebib_template = tmp_latex.read() tmp_latex.close() - tmp_xml = open(TEMPLATE_PATH + "largebib-xml.tex", "r") + tmp_xml = open(TEMPLATE_PATH / "largebib-xml.tex", "r") largebib_xml_template = tmp_xml.read() tmp_xml.close() @@ -399,7 +432,7 @@ def cleanup(): else: print("Found no publication.cfg. Exiting") sys.exit() -if not os.path.exists(CONVERT_DIR + os.path.sep + "cover.jpg"): +if not os.path.exists(CONVERT_DIR + os.path.sep + "Cover.jpg"): print("The file Cover.jpg in CONVERT directory is missing.") if os.path.exists(os.getcwd() + os.path.sep + "Cover.jpg"): shutil.copy("Cover.jpg", CONVERT_DIR + os.path.sep + "cover.jpg") @@ -411,14 +444,20 @@ def cleanup(): # print ("pre_xml fehlt") # sys.exit() + +def copy_dir_overwrite( src, dst ): + if os.path.exists( dst ): + shutil.rmtree( dst ) + shutil.copytree( src, dst) + # Copy Support-Files from /Library/MPIWG to current directory -shutil.copy(SUPPORT_PATH + "classes.dtd", os.getcwd()) -shutil.copy(SUPPORT_PATH + "mathml2-qname-1.mod", os.getcwd()) -shutil.copy(SUPPORT_PATH + "mathml2.dtd", os.getcwd()) -shutil.copytree(SUPPORT_PATH + "html", (os.getcwd() + "/html")) -shutil.copytree(SUPPORT_PATH + "iso8879", (os.getcwd() + "/iso8879")) -shutil.copytree(SUPPORT_PATH + "iso9573-13", (os.getcwd() + "/iso9573-13")) -shutil.copytree(SUPPORT_PATH + "mathml", (os.getcwd() + "/mathml")) +shutil.copy(SUPPORT_PATH / "classes.dtd", os.getcwd()) +shutil.copy(SUPPORT_PATH / "mathml2-qname-1.mod", os.getcwd()) +shutil.copy(SUPPORT_PATH / "mathml2.dtd", os.getcwd()) +copy_dir_overwrite(SUPPORT_PATH / "html", (os.getcwd() + "/html")) +copy_dir_overwrite(SUPPORT_PATH / "iso8879", (os.getcwd() + "/iso8879")) +copy_dir_overwrite(SUPPORT_PATH / "iso9573-13", (os.getcwd() + "/iso9573-13")) +copy_dir_overwrite(SUPPORT_PATH / "mathml", (os.getcwd() + "/mathml")) # shutil.copytree(SUPPORT_PATH + "mathml2", (os.getcwd() + "/mathml2")) ############################################################## @@ -803,7 +842,7 @@ def cleanup(): for strCommand in dictRebindedCommands.keys(): strTeXEquations = re.sub(strCommand, dictRebindedCommands[strCommand], strTeXEquations) - tmp = open(TEMPLATE_PATH + "formula.tex", "r") + tmp = open(TEMPLATE_PATH / "formula.tex", "r") Template = tmp.read() tmp.close() # Get tmp-directory for this user account @@ -821,7 +860,7 @@ def cleanup(): tmp.write(e) tmp.close() print("Typesetting all Inline Equations") - Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile + Kommando = "xelatex --halt-on-error " + tmpFile Argumente = shlex.split(Kommando) Datei = open('tmp_files/xelatex-run.log', 'w') Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) @@ -832,7 +871,7 @@ def cleanup(): for intRunningOrder in dictEOAineqs.keys(): # provide more status information here in output! progress(counter_dictEOAineqs, len(dictEOAineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys()))) - Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf" + Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png" @@ -884,7 +923,7 @@ def cleanup(): int_EOAchem_number +=1 intChapterNumber += 1 - tmp = open(TEMPLATE_PATH + "formula.tex", "r") + tmp = open(TEMPLATE_PATH / "formula.tex", "r") Template = tmp.read() tmp.close() # Get tmp-directory for this user account @@ -902,7 +941,7 @@ def cleanup(): tmp.write(e) tmp.close() print("Typesetting all inline Chemical formulas") - Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile + Kommando = "xelatex --halt-on-error " + tmpFile Argumente = shlex.split(Kommando) Datei = open('tmp_files/xelatex-run.log', 'w') Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) @@ -913,7 +952,7 @@ def cleanup(): for intRunningOrder in dictEOAchems.keys(): # provide more status information here in output! progress(counter_dictEOAchems, len(dictEOAchems.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAchems, len(dictEOAchems.keys()))) - Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf" + Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAchems[intRunningOrder] + ".png" @@ -1002,9 +1041,6 @@ def cleanup(): # bigfoot needs to be integrated into # 'fndict': {'uid11': '2', 'uid12': '3', 'uid9': '1'}, -# the new-style footnotes that use LaTeX bigfoot show up in the following order: -footnote_groups = ["decimal", "lower-latin"] - print("-----------------------------------------------------") print("Numbering of Lists per Chapter") @@ -1179,71 +1215,72 @@ def cleanup(): make_bibchecker(bib_database, set_citations) +def print_bibliography( + print_bibl_element, + xml_context, + tmp_citation_filename +): + bibliography_keyword = print_bibl_element.get("keyword") + if bibliography_keyword: + logging.debug(f"Found bibliography keyword {bibliography_keyword}") + else: + logging.debug(f"No bibliography keyword found") + + print_bibl_element.getparent().tag = "div" + xmlBibliographyDiv = etree.Element("div") + xmlBibliography.addnext(xmlBibliographyDiv) + + citekeys = xml_context.xpath(".//citekey/text()") + nocite_elements = xml_context.xpath(".//nocite") + + # citekeys = xmlChapter.xpath(".//citekey/text()") + # nocite_elements = xmlChapter.xpath(".//nocite") + + if nocite_elements: + logging.debug(f"Found {libeoaconvert.plural(len(nocite_elements), 'nocite command')}.") + nocitekeys = [] + for nocite in nocite_elements: + keys = [x.strip() for x in nocite.text.split(",")] + nocitekeys += keys + nocite.tag = "elementtobestripped" + logging.debug(f"Found {libeoaconvert.plural(len(set(nocitekeys)), 'nocite key')}.") + logging.debug(f"Adding nocite keys to the other cite keys.") + citekeys += nocitekeys + + if bibliography_keyword: + logging.info("We want to collect the entries matching the keywords from the database.") + citations_to_format = [x["id"] for x in citations_json if x["keyword"] == bibliography_keyword] + logging.debug(f"Found {libeoaconvert.plural(len(citations_to_format), 'nocite citation')} in database.") + else: + citations_to_format = set(citekeys) + + print( "citekeys: ") + print( len( citekeys ) ) + csl_file = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE'] + formatted_references = libeoaconvert.format_citations( + citations_to_format, + bib_database + ".bib", + strLanguage, + tmp_citation_filename, + csl_file + )[0] + + fixed_entries = libeoaconvert.fix_bib_entries(formatted_references) + for entry in fixed_entries: + xmlBibliographyDiv.append(entry) + # If Bibliography-Type is monograph search for EOAbibliography and make it all if bib_type == "monograph": + tmp_citation_filename = "used_citations-monograph" if xmlTree.find(".//EOAprintbibliography") is not None: # to insert here: with keywords we can have multiple bibliographies xmlBibliography = xmlTree.find(".//EOAprintbibliography") - bibliography_keyword = xmlBibliography.get("keyword") - if bibliography_keyword: - logging.debug(f"Found bibliography keyword {bibliography_keyword}") - else: - logging.debug(f"No bibliography keyword found") - - xmlBibliography.clear() - xmlBibliography.tag = "div" - - bib_parent = xmlBibliography.getparent() - bib_parent.tag = "div" - - xmlBibliographyDiv = etree.Element("div") - xmlBibliography.addnext(xmlBibliographyDiv) - ############### - # new version # - ############### - tmp_citation_filename = "used_citations-monograph" - citations_to_format = [x["id"] for x in citations_json if x["keyword"] == bibliography_keyword] - formatted_references = libeoaconvert.format_citations(citations_to_format, bib_database + ".bib", strLanguage, tmp_citation_filename)[0] - - fixed_entries = libeoaconvert.fix_bib_entries(formatted_references) - for entry in fixed_entries: - xmlBibliographyDiv.append(entry) - ###################### - # end of new version # - ###################### - - ############### - # old version # - ############### - # xml_bib_entries = xmlBibTree.findall(".//entry") - # intNumberOfEntry = 0 - # for xmlEntry in xml_bib_entries: - # if intNumberOfEntry == 0: - # # Don't check for previous author if first entry of the Bibliography - # bibEntry = Bibitem(xmlEntry) - # strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "

" - # else: - # bibEntry = Bibitem(xmlEntry) - # # Check if author of previous Entry is the same - # bibEntryPrevious = Bibitem(xml_bib_entries[intNumberOfEntry - 1]) - # if bibEntry.fullauthorlastfirst()[0] == bibEntryPrevious.fullauthorlastfirst()[0]: - # strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "

" - # elif bibEntryPrevious.fullauthorlastfirst()[0] == bibEntry.fullauthorlastfirst()[0]: - # strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "

" - # elif bibEntry.fullauthorlastfirst()[0] == bibEntryPrevious.fullauthorlastfirst()[0]: - # strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "

" - # else: - # strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "

" - - # xmlNew = etree.fromstring(strNewentry) - # next one writes the bibliography into the document - # xmlBibliography.append(xmlNew) - - # intNumberOfEntry += 1 - ###################### - # end of old version # - ###################### + print_bibliography( + xmlBibliography, + xmlTree, + tmp_citation_filename + ) # If Bibliography-Type is anthology search for EOAbibliography and make one per chapter @@ -1255,42 +1292,12 @@ def cleanup(): tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) if xmlChapter.find(".//EOAprintbibliography") is not None: xmlBibliography = xmlChapter.find(".//EOAprintbibliography") - bibliography_keyword = xmlBibliography.get("keyword") - if bibliography_keyword: - logging.debug(f"Found bibliography keyword {bibliography_keyword}") - else: - logging.debug(f"No bibliography keyword found") - - xmlBibliography.getparent().tag = "div" - xmlBibliographyDiv = etree.Element("div") - xmlBibliography.addnext(xmlBibliographyDiv) - - citekeys = xmlChapter.xpath(".//citekey/text()") - - nocite_elements = xmlChapter.xpath(".//nocite") - - if nocite_elements: - logging.debug(f"Found {libeoaconvert.plural(len(nocite_elements), 'nocite command')}.") - nocitekeys = [] - for nocite in nocite_elements: - keys = [x.strip() for x in nocite.text.split(",")] - nocitekeys += keys - nocite.tag = "elementtobestripped" - logging.debug(f"Found {libeoaconvert.plural(len(set(nocitekeys)), 'nocite key')}.") - logging.debug(f"Adding nocite keys to the other cite keys.") - citekeys += nocitekeys - - if bibliography_keyword: - logging.info("We want to collect the entries matching the keywords from the database.") - citations_to_format = [x["id"] for x in citations_json if x["keyword"] == bibliography_keyword] - logging.debug(f"Found {libeoaconvert.plural(len(citations_to_format), 'nocite citation')} in database.") - else: - citations_to_format = set(citekeys) - formatted_references = libeoaconvert.format_citations(citations_to_format, bib_database + ".bib", strLanguage, tmp_citation_filename)[0] - fixed_entries = libeoaconvert.fix_bib_entries(formatted_references) - for entry in fixed_entries: - xmlBibliographyDiv.append(entry) + print_bibliography( + xmlBibliography, + xmlChapter, + tmp_citation_filename + ) else: # create an empty file diff --git a/imxml2django.py b/imxml2django.py index 64a437e..3f9f9d8 100755 --- a/imxml2django.py +++ b/imxml2django.py @@ -11,6 +11,9 @@ """ +from utils.load_config import load_config +import utils.libeoaconvert as libeoaconvert + import pickle import os import sys @@ -20,46 +23,61 @@ import subprocess import argparse import configparser -import libeoaconvert import logging from copy import deepcopy from lxml import etree +from pathlib import Path + +BASE_DIR = Path( os.path.realpath(__file__) ).parent ##################### # Parsing arguments # ##################### + parser = argparse.ArgumentParser() -parser.add_argument("-c", "--config", dest="CONFIG_FILE", help="Name of configuration file", metavar="CONFIGURATION") -parser.add_argument("-p", "--checkpublicationcfg", help="Check the publication.cfg for completeness.", action="store_true") +parser.add_argument( + "-c", "--config", + default = BASE_DIR / "config" / "eoaconvert.cfg", + dest="CONFIG_FILE", + help="Name of configuration file", + metavar="CONFIGURATION" +) +parser.add_argument( + "-l", "--log-file", + default = "eoaconvert.log", + help="logfile" +) +parser.add_argument( + "--log-level", + default = "DEBUG", + help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" +) +parser.add_argument( + "-p", "--checkpublicationcfg", + help="Check the publication.cfg for completeness.", + action="store_true" +) args = parser.parse_args() -if args.CONFIG_FILE is not None: - CONFIG_FILE = os.path.abspath(args.CONFIG_FILE) -else: - CONFIG_FILE = os.path.dirname(sys.argv[0]) + os.path.sep + "config" + os.path.sep + "eoaconvert.cfg" +config_file = args.CONFIG_FILE + +logging.debug("The configfile is %s." % config_file) ################################## # Reading the configuration file # ################################## -CONFIG = configparser.ConfigParser() -CONFIG.read(CONFIG_FILE) - -###################### -# Setting up logging # -###################### -LOGFILE = CONFIG['General']['logfile'] -LOGLEVEL = CONFIG['General']['loglevel'] - -logging.basicConfig(level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s') - -logging.debug("The configfile is%s." % CONFIG_FILE) +CONFIG = load_config( + config_file, + args.log_file, + args.log_level, +) ######################## # Paths to executables # ######################## -GM_PATH = CONFIG['Executables']['graphicsmagic'] -TL_PATH = CONFIG['Executables']['texlive'] +GM_PATH = "gm" +PDFCROP_EXEC = "pdfcrop" ########################################### # Loading data from first conversion step # @@ -292,7 +310,7 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid # yellow if os.path.splitext(strImageFileName)[1].lower() == ".pdf": logging.debug("Found a PDF file") - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH) + strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".png")) logging.debug("The filename is %s" % xmlEOAfigure.get("file")) else: @@ -1077,6 +1095,10 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe debug_chapters(xmlEOAchapters) +TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS'] +translation_xml = etree.parse( str( TRANSLATION_FILE ) ) +dictLangFootnotes = translation_xml.find("//entry[@name='footnotes']").attrib + for xmlEOAchapter in xmlEOAchapters: groupings = libeoaconvert.get_bigfoot_data(xmlEOAchapter) has_old = 0 != len(xmlEOAchapter.findall(".//note")) @@ -1108,7 +1130,7 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe intObjectNumber += 1 xmlHead = etree.Element("head") - xmlHead.text = libeoaconvert.dictLangFootnotes[libeoaconvert.two_letter_language(xmlEOAchapter.get("language"))] + xmlHead.text = dictLangFootnotes[libeoaconvert.two_letter_language(xmlEOAchapter.get("language"))] xmlEOAsection.append(xmlHead) xmlResult.append(xmlEOAsection) diff --git a/imxml2epub.py b/imxml2epub.py index fb3b6f6..15506e3 100755 --- a/imxml2epub.py +++ b/imxml2epub.py @@ -9,42 +9,84 @@ with eoatex2imxml or tei2imxml. """ +from utils.load_config import load_config +import utils.libeoaconvert as libeoaconvert + import os import sys import argparse import re import shutil -import configparser import pickle import shlex import subprocess import logging from copy import deepcopy from lxml import etree -import libeoaconvert +from pathlib import Path + +BASE_DIR = Path( os.path.realpath(__file__) ).parent +SCRIPT_NAME = Path( __file__).stem ##################### # Parsing arguments # ##################### + parser = argparse.ArgumentParser() -parser.add_argument("-c", "--config", dest="CONFIG_FILE", help="Name of configuration file", metavar="CONFIGURATION") -parser.add_argument("-f", "--font", help="Font to be used, default is TeX Gyre Termes", default="termes") -parser.add_argument("-nc", "--nocaption", help="No captions for figures.", action="store_true") +parser.add_argument( + "-c", "--config", + dest="CONFIG_FILE", + default = BASE_DIR / "config" / "eoaconvert.cfg", + help="Name of configuration file", + metavar="CONFIGURATION" +) +parser.add_argument( + "-l", "--log-file", + default = SCRIPT_NAME + ".log" , + help="logfile" +) +parser.add_argument( + "--log-level", + default = "DEBUG", + help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" +) +parser.add_argument( + "-f", "--font", + help="Font to be used, default is TeX Gyre Termes", + default="termes" +) +parser.add_argument( + "-nc", "--nocaption", + help="No captions for figures.", + action="store_true" +) + args = parser.parse_args() +config_file = args.CONFIG_FILE + +''' if args.CONFIG_FILE is not None: CONFIG_FILE = os.path.abspath(args.CONFIG_FILE) else: # CONFIG_FILE = "/Users/kthoden/EOAKram/dev/EOASkripts/Skripten/eoaconvert.cfg" CONFIG_FILE = os.path.dirname(sys.argv[0]) + "/config/eoaconvert.cfg" +''' -print("The config file is ", CONFIG_FILE) +print("The config file is ", config_file) ################################## # Reading the configuration file # ################################## -CONFIG = configparser.ConfigParser() -CONFIG.read(CONFIG_FILE) + +CONFIG = load_config( + config_file, + args.log_file, + args.log_level, +) + +# CONFIG = configparser.ConfigParser() +# CONFIG.read(CONFIG_FILE) # CONFIG = configparser.ConfigParser() # CONFIG.read("/Users/kthoden/EOAKram/dev/EOASkripts/Skripten/eoaconvert.cfg") @@ -55,8 +97,9 @@ EPUB_FILES = os.path.dirname(sys.argv[0]) + "/data/epub_files/" # TEMPLATE_PATH = CONFIG['Auxiliaries']['template_path'] -GM_PATH = CONFIG['Executables']['graphicsmagic'] -TL_PATH = CONFIG['Executables']['texlive'] +GM_PATH = "gm" +PDFCROP_EXEC = "pdfcrop" +# TL_PATH = CONFIG['Executables']['texlive'] # TEXBIN_PATH = CONFIG['Executables']['texbin'] # TRALICS_PATH_EXEC = CONFIG['Executables']['tralics_path_exec'] # TRALICS_PATH_LIB = CONFIG['Executables']['TRALICS_PATH_LIB'] @@ -361,7 +404,7 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): xmlItem.set("href", "images/cover.jpg") xmlItem.set("media-type", "image/jpeg") xmlManifest.append(xmlItem) -shutil.copy(os.getcwd() + "/CONVERT/cover.jpg", os.getcwd() + "/CONVERT/epub/OEBPS/images/") +shutil.copy(os.getcwd() + "/CONVERT/Cover.jpg", os.getcwd() + "/CONVERT/epub/OEBPS/images/") xmlItem = etree.Element("item") xmlItem.set("id", "cover") xmlItem.set("href", "cover.xhtml") @@ -511,7 +554,7 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): print("Found an unrecognized image suffix: %s" % strImageFileName_Suffix) sys.exit() - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH) + strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) # Add copied file to contentopf content_opf_filename = "images" + os.path.sep + "{}{}.{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime) content_opf_fileid = "{}{}{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime) @@ -563,7 +606,7 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): strImageFileName = os.path.basename(strImageFileString) strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH) + strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) # Add copied file to contentopf contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", strImageFileDir + strImageFileNamewoSuffix + "-nonumber-jpg", "jpg") logging.debug("Added a nonumber figure") @@ -1272,7 +1315,7 @@ class FootnoteError(Exception): # hier shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH) + strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC) # Add copied file to contentopf img_base_file_name, img_file_extension = os.path.splitext(strImageFileName) diff --git a/tei2imxml.py b/tei2imxml.py old mode 100644 new mode 100755 index 2582a61..7593981 --- a/tei2imxml.py +++ b/tei2imxml.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- """A converter from TEI to customized DocBook XML. @@ -12,6 +12,9 @@ __date__ = "20180116" __author__ = "kthoden@mpiwg-berlin.mpg.de" +from utils.load_config import load_config +import utils.libeoaconvert as libeoaconvert + import os import sys import logging @@ -20,34 +23,27 @@ import pickle import shlex import argparse -import configparser -import libeoaconvert from datetime import datetime from bs4 import BeautifulSoup from lxml import etree, objectify from lxml.html import soupparser +from pathlib import Path # things to be done # assign ids top to bottom for the following elements: # div1 div2 div3 note item table EOAfigure EOAequation formula theorem -CONFIG_FILE = os.path.dirname(sys.argv[0]) + os.path.sep + "config" + os.path.sep +"eoaconvert.cfg" -# Reading the configuration file -CONFIG = configparser.ConfigParser() -CONFIG.read(CONFIG_FILE) - -logging.basicConfig(level=logging.INFO, format=' %(asctime)s - %(levelname)s - %(message)s') - -ns_tei = "http://www.tei-c.org/ns/1.0" -NS_MAP = {"t" : ns_tei} +BASE_DIR = Path( os.path.realpath(__file__) ).parent +SCRIPT_NAME = Path( __file__).stem TMP_DIR = os.path.expanduser("tmp_files") OUTPUT_DIR = os.path.expanduser("CONVERT") -CSL_FILE = CONFIG['Auxiliaries']['CSL_FILE'] - CSV_FILE = os.path.expanduser("hi_figures.csv") +ns_tei = "http://www.tei-c.org/ns/1.0" +NS_MAP = {"t" : ns_tei} + def get_publication_info(xml_tree): """Query the TEI document for metadata fields. @@ -977,13 +973,42 @@ def fix_bib_entries(div_snippet): def main(): """Main function""" + ### data_pickle = TMP_DIR + os.path.sep + 'data.pickle' + # parse args: parser = argparse.ArgumentParser() + parser.add_argument( + "-c", "--config", + dest="CONFIG_FILE", + default = BASE_DIR / "config" / "eoaconvert.cfg", + help="Name of configuration file", + metavar="CONFIGURATION" + ) + parser.add_argument( + "-l", "--log-file", + default = SCRIPT_NAME + ".log" , + help="logfile" + ) + parser.add_argument( + "--log-level", + default = "DEBUG", + help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" + ) parser.add_argument("-d", "--pickleddata", default=data_pickle, help="Pickled data file to be used.") parser.add_argument("-him", "--hyperimage", action="store_true") parser.add_argument("teifile", help="TEI XML file to convert into DocBook XML.") args = parser.parse_args() + config_file = args.CONFIG_FILE + print("The config file is ", config_file) + + # load config: + CONFIG = load_config( + config_file, + args.log_file, + args.log_level, + ) + CSL_FILE = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE'] if not os.path.exists(TMP_DIR): os.mkdir(os.path.expanduser(TMP_DIR)) @@ -1140,5 +1165,8 @@ def main(): # def main ends here if __name__ == '__main__': + + # run main: main() + # finis diff --git a/libeoabibitem.py b/utils/libeoabibitem.py similarity index 100% rename from libeoabibitem.py rename to utils/libeoabibitem.py diff --git a/libeoaconvert.py b/utils/libeoaconvert.py similarity index 87% rename from libeoaconvert.py rename to utils/libeoaconvert.py index 6290594..a5b0f48 100644 --- a/libeoaconvert.py +++ b/utils/libeoaconvert.py @@ -11,40 +11,11 @@ import configparser from lxml import etree from lxml.html import soupparser +from pathlib import Path -################################## -# Reading the configuration file # -################################## -CONFIG_FILE = os.path.dirname(os.path.realpath(__file__)) + os.path.sep + "config" + os.path.sep + "eoaconvert.cfg" -# CONFIG_FILE = os.path.abspath(os.path.dirname(sys.argv[0])) + "/config/ -CONFIG = configparser.ConfigParser() -CONFIG.read(CONFIG_FILE) - -###################### -# Setting up logging # -###################### -LOGFILE = CONFIG['General']['logfile'] -LOGLEVEL = CONFIG['General']['loglevel'] - -CSL_FILE = CONFIG['Auxiliaries']['CSL_FILE'] -TRANSLATION_FILE = CONFIG['Auxiliaries']['TRANSLATIONS'] - -logging.basicConfig(level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s') - -# Setup of various dictionaries for localization of various elements -# dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"} -# dict_and = {"en" : "and", "de" : "und", "fr" : "et", "it" : "e"} -# dict_ed = {"en" : "ed.", "de" : "Hrsg."} -# dict_eds = {"en" : "eds.", "de" : "Hrsg."} - -# use the translation file that is used also for XSL -translation_xml = etree.parse(TRANSLATION_FILE) -dictLangFootnotes = translation_xml.find("//entry[@name='footnotes']").attrib -dict_and = translation_xml.find("//entry[@name='and']").attrib -dict_ed = translation_xml.find("//entry[@name='editor-abbr']").attrib -dict_eds = translation_xml.find("//entry[@name='editors-abbr']").attrib # the new-style footnotes that use LaTeX bigfoot show up in the following order: +# global variables footnote_groups = ["decimal", "lower-latin"] ######################### @@ -76,7 +47,12 @@ def get_bigfoot_data(chapter): ] # def get_bigfoot_data ends here -def sanitizeImage(strImagepath, GM_PATH, TL_PATH): +def sanitizeImage( + strImagepath, + GM_PATH, + PDFCROP_EXEC, + # TL_PATH +): """Adjust and convert image for epub standard""" if not os.path.exists("tmp_files/tmp_images/"): @@ -129,7 +105,7 @@ def sanitizeImage(strImagepath, GM_PATH, TL_PATH): strNewImagepath = os.path.splitext(strImagepath)[0] clipped_file = strImagepath.replace(".pdf", "-clipped.pdf") - Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl --margins 10 --clip --hires " + strImagepath + " " + clipped_file + Kommando = PDFCROP_EXEC + " --margins 10 --clip --hires " + strImagepath + " " + clipped_file logging.debug(Kommando) Argumente = shlex.split(Kommando) @@ -210,7 +186,13 @@ def format_citations_tex4ht(used_citekeys, bibdata, language, tmp_filename): return references # def format_citations_tex4ht ends here -def format_citations(used_citekeys, bibdata, language, tmp_filename): +def format_citations( + used_citekeys, + bibdata, + language, + tmp_filename, + csl_file +): """Return a formatted xmlstring of the used citations""" tmp_path_md = "tmp_files" + os.path.sep + tmp_filename + ".md" @@ -231,7 +213,7 @@ def format_citations(used_citekeys, bibdata, language, tmp_filename): # citation_formatter.write("@%s\n" % entry) citation_formatter.write("\n# References\n") - command = "pandoc -o %s -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (tmp_path_html, bibdata, CSL_FILE, tmp_path_md) + command = "pandoc -o %s -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (tmp_path_html, bibdata, csl_file, tmp_path_md) arguments = shlex.split(command) logging.info("Using external command pandoc with command %s" % command) subprocess.call(arguments) diff --git a/utils/load_config.py b/utils/load_config.py new file mode 100644 index 0000000..13c5877 --- /dev/null +++ b/utils/load_config.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8; mode: python -*- + +import configparser +import logging + +from pathlib import Path +import os + +################################## +# Reading the configuration file # +################################## + +def load_config( + cfg_file, + LOGFILE, + LOGLEVEL + ): + + BASE_DIR = Path( os.path.realpath(__file__) ).parent.parent + CONFIG = configparser.ConfigParser() + CONFIG.read( cfg_file ) + + ###################### + # Setting up logging # + ###################### + # LOGFILE = CONFIG['General']['logfile'] + # LOGLEVEL = CONFIG['General']['loglevel'] + + logging.basicConfig(level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s') + + # Setup of various dictionaries for localization of various elements + # dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"} + # dict_and = {"en" : "and", "de" : "und", "fr" : "et", "it" : "e"} + # dict_ed = {"en" : "ed.", "de" : "Hrsg."} + # dict_eds = {"en" : "eds.", "de" : "Hrsg."} + + # use the translation file that is used also for XSL + # translation_xml = etree.parse( str( TRANSLATION_FILE ) ) + # dictLangFootnotes = translation_xml.find("//entry[@name='footnotes']").attrib + # dict_and = translation_xml.find("//entry[@name='and']").attrib + # dict_ed = translation_xml.find("//entry[@name='editor-abbr']").attrib + # dict_eds = translation_xml.find("//entry[@name='editors-abbr']").attrib + + return CONFIG