src/eoatex2imxml.py

#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2021-07-09 13:47:28 (kthoden)>

"""
Converts Latex files into a customized DocBook XML file.

The program depends on the external program tralics for the conversion
as well as xelatex, pdfcrop (part of latex distributions) and
pandoc-citeproc for additional formatting.
"""

# license?
__version__= "1.0"
__author__ = "Klaus Thoden"
__date__="20171205"

# can the job done by BeautifulSoup also be done by lxml.html soupparser?
# as described in http://infohost.nmt.edu/tcc/help/pubs/pylxml/web/soupparser.html
# from lxml.html import soupparser

from utils.libeoabibitem import Bibitem
import utils.libeoaconvert as libeoaconvert
from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite, ToLog, ToFile
import utils.bib2html as bib2html

# imports
import argparse
from lxml import etree
from bs4 import BeautifulSoup
import glob
import os
import re
import string
import shlex
import json
import subprocess
import sys
import shutil
import logging
import pickle
from pathlib import Path
import time

BASE_DIR = Path( __file__ ).resolve().parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.stem

DEFAULT_INPUT_DIR = \
    Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')

DEFAULT_OUTPUT_DIR = \
    Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output')

DEFAULT_DEPENDENCIES_DIR = \
    Path(os.environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in os.environ else './dependencies')

#####################
# Parsing arguments #
#####################

parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
        "-c", "--config",
        default = BASE_DIR / "config" / "eoaconvert.cfg",
        help="Name of config file"
)
parser.add_argument(
        "--log-level",
        default = "INFO",
        help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
parser.add_argument(
        "-f", "--filename",
        default = Path("*.tex"),
        type = Path,
        help = "xml file inside INPUT_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
parser.add_argument(
        "--latex-dir",
        type = Path,
        help="directory where to find the output generated by eoatex2pdf.py. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/pdf"
)
parser.add_argument(
        "-o", "--output-dir",
        type = Path,
        help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/imxml"
)
parser.add_argument(
        "-t", "--trash",
        help="Remove temporary files."
)
parser.add_argument(
        "-nb", "--no-bib4ht",
        action="store_true",
        help="Skip creation of bibliography, rely on already present HTML files."
)
parser.add_argument(
        "-nm", "--no-math",
        action="store_true",
        help="Skip processing of equations"
)
parser.add_argument(
        "-nr", "--no-references",
        action="store_true",
        help="Skip processing of references."
)
parser.add_argument(
        "-classic", "--eoa-classic",
        action="store_true",
        help="Embed webdesign of EOA1.0 into XML"
)
parser.add_argument(
        "INPUT_DIR",
        help = "directory containing the publication (including resources like pictures, etc.)",
        type = Path,
)

args = parser.parse_args()


CONFIG_FILE = args.config

print("The configfile is %s." % CONFIG_FILE)

########################
# Paths to executables #
########################
GM_PATH = "gm"
TRALICS_PATH_EXEC = "tralics"
PDFCROP_EXEC = "pdfcrop" # (part of texlive distribution):

# TL_PATH = CONFIG['Executables']['texlive']
# TEXBIN_PATH = CONFIG['Executables']['texbin']

############################
# Paths:
############################
INPUT_DIR = args.INPUT_DIR
INPUT_PATH = args.filename
INPUT_PATH = \
    args.filename if args.filename . is_absolute() else list(INPUT_DIR . glob( str(args.filename) ))[0]
OUTPUT_DIR = \
    args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "imxml"
LATEX_DIR = \
    args.latex_dir if args.latex_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "pdf"
LOG_DIR = OUTPUT_DIR / "log"
LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" )

TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"

# where to output the xml file:
XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" )

BIB2HTML_FILENAME = "temp"

##################################
# Reading the configuration file #
##################################

CONFIG = load_config(
        CONFIG_FILE,
        args.log_level,
        LOG_FILE,
)

############################
# Paths to auxiliary files #
############################
TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB']
TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path']
SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path']


#################################################
# Checking for existance of tools and libraries #
#################################################

# sanity check:
logging.debug("PATH: {}".format( os.environ['PATH'] ))
check_executable( GM_PATH )
check_executable( TRALICS_PATH_EXEC )
check_executable( PDFCROP_EXEC )
logging.info( "checking executables 'utils.bib2html' needs...:" )
bib2html.check_executables()

if not os.path.exists(TRALICS_PATH_LIB):
    logging.error(f"Cannot find the Tralics configuration at {TRALICS_PATH_LIB}. Exiting.")
    sys.exit()

##################################
# Setting up various directories #
##################################

if not os.path.exists(OUTPUT_DIR):
    os.mkdir( OUTPUT_DIR )
if not os.path.exists(TEMP_DIR):
    os.mkdir( TEMP_DIR )
if not os.path.exists( DEBUG_DIR ):
    os.mkdir( DEBUG_DIR  )

if os.path.exists(INPUT_DIR / "publication.cfg"):
    shutil.copy(INPUT_DIR / "publication.cfg", OUTPUT_DIR)
    logging.info(f"Copied from {INPUT_DIR}.")
else:
    logging.error(f"Found no publication.cfg in {INPUT_DIR}. Exiting")
    sys.exit( 1 )
if os.path.exists(INPUT_DIR /  "Cover.jpg"):
    shutil.copy(INPUT_DIR / "Cover.jpg", OUTPUT_DIR / "Cover.jpg")
    logging.info("Copied cover image from input directory.")
elif os.path.exists(INPUT_DIR /  "images/Cover.jpg"):
    shutil.copy(INPUT_DIR / "images/Cover.jpg", OUTPUT_DIR / "Cover.jpg")
    logging.info("Copied cover image from publication directory.")
else:
    logging.error("No coverfile found. You can create a temporary one with the mkimage.py script")
    sys.exit( 1 )

# Copy Support-Files from /Library/MPIWG to current directory
shutil.copy(SUPPORT_PATH / "classes.dtd", OUTPUT_DIR)
shutil.copy(SUPPORT_PATH / "mathml2-qname-1.mod", OUTPUT_DIR)
shutil.copy(SUPPORT_PATH / "mathml2.dtd", OUTPUT_DIR)
copy_dir_overwrite(SUPPORT_PATH / "html", (OUTPUT_DIR / "html"))
copy_dir_overwrite(SUPPORT_PATH / "iso8879", (OUTPUT_DIR / "iso8879"))
copy_dir_overwrite(SUPPORT_PATH / "iso9573-13", (OUTPUT_DIR / "iso9573-13"))
copy_dir_overwrite(SUPPORT_PATH / "mathml", (OUTPUT_DIR / "mathml"))

########################################
# Certain functions for specific tasks #
########################################

def TeX2PNG(LaTeXCode, Type, Chapter, Number):
    """Function to render LaTeX-Code into PNG-Files, returns PNG-Filename (epub & django)"""
    # logging.info( f"TeX2PNG({LaTeXCode}, {Type}, {Chapter}, {Number})" )
    # Dictionary contains Type:begin/end
    Types = {
    "EOAineq" : ["$", "$"],
    "EOAequation" : ["\\begin{equation*}", "\\end{equation*}"],
    "EOAequationnonumber" : ["\\begin{equation*}", "\\end{equation*}"],
    "EOAequationarray" : ["\\begin{align*}", "\\end{align*}"],
    "EOAequationarraynonumber" : ["\\begin{align*}", "\\end{align*}"]
    }
    LaTeXCode = Types[Type][0] + LaTeXCode + Types[Type][1]
    dictRebindedCommands = {
    "\|ket\|" : r"\\ket",
    "\|braket\|" : r"\\braket",
    "\|bra\|" : r"\\bra",
    "\|Bra\|" : r"\\Bra",
    "\|Ket\|" : r"\\Ket",
    "\slashed\|" : r"\\slashed",
    "\|cancel\|" : r"\\cancel"
    }
    for strCommand in dictRebindedCommands.keys():
        LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode)

    # Open plain LaTeX-Template
    tmp = open(TEMPLATE_PATH / "formula.tex", "r")
    Template = tmp.read()
    tmp.close()
    # Get tmp-directory for this user account
    # tmpDir = os.getenv("TMPDIR")
    if not os.path.exists( TEMP_DIR / "formulas2png" ):
        os.mkdir( TEMP_DIR / "formulas2png" )
    # use local tmpdir
    formula_tmp_dir = TEMP_DIR / "formulas2png"

    # Make directory items if it doesn't already exist
    items_dir = OUTPUT_DIR / "items"
    if not os.path.exists( items_dir ):
        os.mkdir( items_dir )
    s = string.Template(Template)
    e = s.substitute(DERINHALT=LaTeXCode)
    tmpFile = formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".tex" )
    tmp = open(tmpFile, "w")
    tmp.write(e)
    tmp.close()
    Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute())
    Argumente = shlex.split(Kommando)
    # Redirecting stderr to save XeLaTeX-Output
    Datei = open(TEMP_DIR / 'xelatex-run.log', 'w')
    Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
    if Ergebnis == 0:
        logging.info("Successfully converted formula " + Type + str(Chapter) + "_" + str(Number))
    if Ergebnis == 1:
        logging.error("Failed to convert formula " + Type + str(Chapter) + "_" + str(Number))
    Kommando = "{cmd} {arg1} {arg2}".format(
            cmd=PDFCROP_EXEC,
            arg1=(formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf")).absolute(),
            arg2=(formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf")).absolute()
            )
    Argumente = shlex.split(Kommando)
    subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
    Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format(
            cmd=GM_PATH,
            arg1 = (formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf")).absolute(),
            arg2 = (items_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".png")).absolute()
        )
    Argumente = shlex.split(Kommando)
    subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)

    # logging.info("TeX2PNG done")
    return LaTeXCode
# def TeX2PNG ends here

def make_latex_bibl_file(
        bib_database,
        set_citations,
        files
):
    """Construct a separate latex file with bibliography.

    The HTML bibliography is still not perfectly formatted like the
    LaTeX version. To check both files, a separate file is made that and
    which is then also converted in the various formats.
    """
    string_citations = ", ".join(set_citations)
    for (input_path, output_path) in files:
        with open(input_path, "r") as tmp_latex:
            largebib_template = tmp_latex.read()
        largebib_template_string = string.Template( largebib_template )
        largebib_replacement = largebib_template_string.substitute(
                INSERT_BIB_DATABASE = bib_database,
                INSERT_CITEKEYS = string_citations
        )
        with open(output_path, "w") as tmp_latex:
            tmp_latex.write(largebib_replacement)
# def make_latex_bibl_file ends here

def sanitize_bibentry(bibEntry):
    """Some additional cleanup actions"""

    bibEntry = bibEntry.replace(". , ", ", ")
    bibEntry = bibEntry.replace("vols..", "vols.")

    return(bibEntry.strip())
# def sanitize_bibentry ends here


def cleanup():
    """Remove support files"""
    try:
        os.remove(OUTPUT_DIR / "classes.dtd")
        os.remove(OUTPUT_DIR / "mathml2-qname-1.mod")
        os.remove(OUTPUT_DIR /  "mathml2.dtd")
        shutil.rmtree(OUTPUT_DIR / "html")
        shutil.rmtree(OUTPUT_DIR / "iso8879")
        shutil.rmtree(OUTPUT_DIR / "iso9573-13")
        shutil.rmtree(OUTPUT_DIR / "mathml")
        # shutil.rmtree((os.getcwd() + "/mathml2"))
        logging.debug("Removed support files.")

    except:
        logging.info("No temporary files were found.")
# def cleanup ends here


def reduce_element_tag(xml_element):
    """Remove attributes from root and make root a one letter tag"""

    xml_element.tag = "t"
    xml_element.attrib.clear()

    return xml_element
# def reduce_element_tag ends here


# Remove temporary files, neccessary for troubleshooting
if args.trash == "temp":
    cleanup()
    sys.exit()

##############################################################
#                  Preparing the main document               #
##############################################################

# .tex -> .xml
def run_tralics(
        input_file,
        TRALICS_PATH_LIB,
        log_path,
        output_dir = OUTPUT_DIR,
):
    fixed_tex_file_path = output_dir / input_file.name
    libeoaconvert.enable_preamble(
            input_file,
            fixed_tex_file_path,
            "xml"
    )
    # Convert TeX to XML via Tralics
    logging.info( f"executing {TRALICS_PATH_EXEC}. log file: {log_path}" )
    exec_command(
        "{cmd} -confdir {conf_dir}/tralics_conf -config {conf_dir}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}".format(
            cmd = TRALICS_PATH_EXEC,
            # log_file = log_filename,
            conf_dir = TRALICS_PATH_LIB,
            output_dir = output_dir,
            input_dir = input_file.parent,
            input_file = fixed_tex_file_path,
        ),
        output_to = ToFile( log_path ),
        exit_code_ok = lambda _: True
    )

# .tex -> .xml
run_tralics(
    input_file = INPUT_PATH,
    TRALICS_PATH_LIB = TRALICS_PATH_LIB,
    log_path = LOG_DIR / SCRIPT_NAME / (INPUT_PATH.stem + "-tralics.log"),
    output_dir = OUTPUT_DIR
)

def fix_underscore_and_eoatranscripted(
        xml_file
):
    # Fix underscore und fix EOAtranscripted
    tmpFile = open (xml_file, "r")
    tmpText = tmpFile.read()
    tmpFile.close()

    tmpText = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpText)
    tmpText = re.sub(r"<error n='\\par' l='(.*?)' c='Invalid \\par command: paragraph not started'/>", "", tmpText)
    tmpFile = open (xml_file, "w")
    tmpFile.write(tmpText)
    tmpFile.close()

fix_underscore_and_eoatranscripted(
        xml_file = XML_FILE
)

def parseXML( input_file ):
    # Complete XML-Document in xmlTree
    xmlParser = etree.XMLParser(no_network=False,load_dtd=True) #resolve_entities=False
    xmlTree = etree.parse(str(input_file), xmlParser)
    # Cleanup of not needed tags in advance. To be cleaned: <error>
    etree.strip_elements(xmlTree, with_tail=False, *['error'])
    return xmlTree

def xmltrans_move_eoalanguage( xmlChapters ):
    for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1):
        xmlLanguage = xmlChapter.find(".//EOAlanguage")
        if xmlLanguage is not None:
            strLanguage = xmlLanguage.text or "english"
            xmlChapter.set("language", strLanguage)
            xmlLanguage.text = None
            logging.info("The language of Chapter %d is %s." % (intChapterNumber, strLanguage))
        xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage")


##############################################################
#      Numbering and Typesetting various Elements            #
##############################################################

# Figure out how to number (like essay or regular)
def get_series( xmlTree ):
    try:
        return (xmlTree.find(".//EOAseries").text or "regular")
    except AttributeError:
        logging.error("\n\nYou are most probably using the preamble for the PDF output. Exiting.")
        sys.exit()

def number_chapters( xmlChapters ):
    dictChapters = {}
    Chapternumber = 1
    for xmlChapter in xmlChapters:
        if xmlChapter.get('rend') != "nonumber":
            Chapteruid = xmlChapter.get('id')
            following_anchor_id = xmlChapter.xpath(".//anchor[preceding-sibling::head]/@id")
            dictChapters[Chapteruid] = str(Chapternumber)
            if len(following_anchor_id) > 0:
                dictChapters[following_anchor_id[0]] = str(Chapternumber)
            Chapternumber += 1
    return dictChapters

# EOAequation, EOAsubequation and EOAequationarray Numbering per Chapter
def process_equations( xmlChapters, dictChapters, strNumberingType, donotgenerate):
    dictEquations = {}
    for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1):
        intEquationnumber = 1
        xmlDinge = xmlChapter.xpath(".//EOAequation | .//EOAequationarray | .//EOAsubequations")
        logging.info("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge)))
        for xmlDing in xmlDinge:
            if xmlDing.tag == "EOAequationarray":
                # tmpNumberinArray is only being used for filename
                tmpNumberinArray = intEquationnumber
                # tmpDictNumberLabel used to insert the attribute value into <EOAequation>
                tmpDictNumberLabel = {}
                # Numbering is being done by <mtr>-Tags
                xmlMathmlrows = xmlDing.findall(".//{http://www.w3.org/1998/Math/MathML}mtr")
                for xmlMathmlrow in xmlMathmlrows:
                    if "Label" in xmlMathmlrow.attrib:
                        # Label dem Dictionary für die Euqations hinzufügen
                        if xmlChapter.get("rend") != "nonumber":
                            dictEquations[xmlMathmlrow.get("Label")] = str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)
                            tmpDictNumberLabel[str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)] = xmlMathmlrow.get("Label")
                        if xmlChapter.get("rend") == "nonumber":
                            dictEquations[xmlMathmlrow.get("Label")] = str(intEquationnumber)
                            tmpDictNumberLabel[str(intEquationnumber)] = xmlMathmlrow.get("Label")
                    intEquationnumber += 1
                xmlRohTeX = xmlDing.find(".//texmath")
                xmlNew = etree.Element('EOAequationarray')
                # Blank lines need to be removed otherwise TeX won't work
                textSourcecode = os.linesep.join([s for s in xmlRohTeX.text.splitlines() if s])
                # \rowattributeunknown has to be deleted, its an artefact
                textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode)
                # Push Down loop to parse the raw code
                textFormel = ""
                boolBackslash = False
                for Buchstabe in textSourcecode:
                    if Buchstabe == "\n":
                        continue
                    if Buchstabe == "\\":
                        if boolBackslash == False:
                            textFormel += Buchstabe
                            boolBackslash = True
                            continue
                        if boolBackslash == True:
                            textFormel += Buchstabe
                            if donotgenerate:
                                str_latexcode = "Formula not generated"
                                eq_filename = "EOAformulanotgenerated.png"
                            else:
                                str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray))
                                eq_filename = "EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"
                            if xmlChapter.get("rend") != "nonumber":
                                tmpXML = etree.Element("EOAequation", filename=eq_filename, number=(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)))
                            elif xmlChapter.get("rend") == "nonumber":
                                tmpXML = etree.Element("EOAequation", filename=eq_filename, number=(str(tmpNumberinArray)))
                            tmpXML.set("TeX", str_latexcode)
                            # Put Label into EOAequation
                            if xmlChapter.get("rend") != "nonumber":
                                strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)
                            elif xmlChapter.get("rend") == "nonumber":
                                strTempKey = str(tmpNumberinArray)
                            if strTempKey in tmpDictNumberLabel:
                                #tmpXML.set("label", tmpDictNumberLabel[(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))])
                                tmpXML.set("label", tmpDictNumberLabel[strTempKey])
                            xmlNew.append(tmpXML)
                            textFormel = ""
                            boolBackslash = False
                            tmpNumberinArray += 1
                            continue
                    if Buchstabe != "\\":
                        textFormel += Buchstabe
                        boolBackslash = False
                # Typeset last equation
                if donotgenerate:
                    str_latexcode = "Formula not generated"
                    eq_filename = "EOAformulanotgenerated.png"
                else:
                    str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray))
                    eq_filename = "EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"
                if xmlChapter.get("rend") != "nonumber":
                    tmpXML = etree.Element("EOAequation", filename=eq_filename, number=(dictChapters[xmlChapter.get('id')] + "." + str(tmpNumberinArray)))
                elif xmlChapter.get("rend") == "nonumber":
                    tmpXML = etree.Element("EOAequation", filename=eq_filename, number=(str(tmpNumberinArray)))
                tmpXML.set("TeX", str_latexcode)
                # Put Label into EOAequation
                if xmlChapter.get("rend") != "nonumber":
                    strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)
                elif xmlChapter.get("rend") == "nonumber":
                    strTempKey = str(tmpNumberinArray)
                if strTempKey in tmpDictNumberLabel:
                    logging.info(strTempKey)
                    logging.info(tmpDictNumberLabel)
                    logging.info(dictChapters)
                    tmpXML.set("label", tmpDictNumberLabel[strTempKey])
                xmlNew.append(tmpXML)
                xmlDing.getparent().replace(xmlDing, xmlNew)
                # enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on
                #xmlNew.getparent().tag = "temp"
                continue
            if xmlDing.tag == "EOAsubequations":
                # Enclosing <p>-Tag of the EOAsubequations needs to be removed
                xmlDing.getparent().tag = "temp"
                xmlSubequations = xmlDing.findall('.//EOAequation')
                listCharacters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
                tmpI = 0
                # Insert Number of this Subequation into dictEquations
                xmlAnchor = xmlDing.find(".//anchor")
                if xmlChapter.get("rend") != "nonumber":
                    dictEquations[xmlAnchor.get('id')] = dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber)
                if xmlChapter.get("rend") == "nonumber":
                    dictEquations[xmlAnchor.get('id')] = str(intEquationnumber)
                # Delete anchor
                xmlAnchor.getparent().remove(xmlAnchor)
                for xmlSubequation in xmlSubequations:
                    # Enclosing <p>-Tag of the EOAsubequation needs to be removed
                    #xmlSubequation.getparent().tag = "temp"
                    # Numbering Subequations with characters
                    strSubequationNumber = str(intEquationnumber) + listCharacters[tmpI]
                    tmpI += 1
                    textSourcecode = xmlSubequation.find('.//texmath').text
                    # Blank lines need to be removed otherwise TeX won't work
                    textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
                    if donotgenerate:
                        str_latexcode = "Formula not generated"
                        eq_filename = "EOAformulanotgenerated.png"
                    else:
                        str_latexcode = TeX2PNG(textSourcecode, "EOAequation", str(intChapterNumber), strSubequationNumber)
                        eq_filename = "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png"
                    xmlAnchor = xmlSubequation.find(".//anchor")
                    # Clear Equation
                    xmlSubequation.clear()
                    if xmlChapter.get("rend") != "nonumber":
                        xmlSubequation.set("filename", eq_filename)
                        xmlSubequation.set("number", dictChapters[xmlChapter.get('id')] + "." + strSubequationNumber)
                        xmlSubequation.set("uid", xmlAnchor.get('id'))
                    if xmlChapter.get("rend") == "nonumber":
                        xmlSubequation.set("filename", eq_filename)
                        xmlSubequation.set("number", strSubequationNumber)
                        xmlSubequation.set("uid", xmlAnchor.get('id'))
                    xmlSubequation.set("id", xmlAnchor.get('id'))
                    xmlSubequation.set("TeX", str_latexcode)
                    # Insert Number of this Equation into dictEquations
                    if strNumberingType == "regular":
                        dictEquations[xmlAnchor.get('id')] = str(dictChapters[xmlChapter.get('id')]) + "." + strSubequationNumber
                    if strNumberingType == "essay":
                        dictEquations[xmlAnchor.get('id')] = strSubequationNumber
                # TODO: Anchor direkt unter Subequation aufheben, und der ersten Equation zuordnen, so dass auf 8.16 bei 8.16a und 8.16b verlinkt werden kann
                xmlDing.tag = "temp"
                # enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on
                #xmlDing.getparent().tag = "temp"
                intEquationnumber += 1
                continue
            if xmlDing.tag == "EOAequation":
                # Check, if Equation has already been found in a Subeqation
                xmlAnchor = xmlDing.find("anchor")
                if xmlAnchor == None:
                    continue
                if xmlAnchor.get('id') in dictEquations:
                    continue
                if xmlDing.find('.//texmath') is not None:
                    textSourcecode = xmlDing.find('.//texmath').text
                else:
                    textSourcecode = xmlDing.text
                # Blank lines need to be removed otherwise TeX won't work
                textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
                if donotgenerate:
                    str_latexcode = "Formula not generated"
                    eq_filename = "EOAformulanotgenerated.png"
                else:
                    str_latexcode = TeX2PNG(textSourcecode, "EOAequation", intChapterNumber, intEquationnumber)
                    eq_filename = "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png"
                #print ("Got:")
                #print (str_latexcode)
                if xmlChapter.get("rend") != "nonumber":
                    xmlDing.set("filename", eq_filename)
                    xmlDing.set("number", dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber))
                    xmlDing.set("uid", xmlAnchor.get('id'))
                if xmlChapter.get("rend") == "nonumber":
                    xmlDing.set("filename", eq_filename)
                    xmlDing.set("number", str(intEquationnumber))
                    xmlDing.set("uid", xmlAnchor.get('id'))
                xmlDing.set("id", xmlAnchor.get('id'))
                xmlDing.set("TeX", str_latexcode)
                #xmlDing.getparent().replace(xmlDing, xmlNew)
                # Insert Number of this Equation into dictEquations
                if strNumberingType == "regular":
                    dictEquations[xmlAnchor.get('id')] = \
                        str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)
                if strNumberingType == "essay":
                    dictEquations[xmlAnchor.get('id')] = str(intEquationnumber)
                intEquationnumber += 1
                continue
    return dictEquations

def process_unnumbered_equations( xmlChapters, donotgenerate ):
    for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1):
        tempImagenumber = 1
        xmlDinge = xmlChapter.xpath(".//EOAequationnonumber | .//EOAequationarraynonumber")
        logging.info("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge)))
        # print ("Working on Chapter " + str(intChapterNumber))
        # print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden")
        for xmlDing in xmlDinge:
            if xmlDing.tag == "EOAequationarraynonumber":
                if xmlDing.find(".//texmath") is not None:
                    textSourcecode = xmlDing.find(".//texmath").text
                else:
                    textSourcecode = xmlDing.text
                xmlNew = etree.Element('EOAequationarraynonumber')
                # Blank lines need to be removed otherwise TeX won't work
                textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
                # \rowattributeunknown has to be deleted, its an artefact
                textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode)
                # TODO: HIer überprüfen, ob und inwiefern es ausreichend ist, EOAequationarraynonumber in eine Grafik zu packen
                if donotgenerate:
                    str_latexcode = "Formula not generated"
                    eq_filename = "EOAformulanotgenerated.png"
                else:
                    str_latexcode = TeX2PNG(textSourcecode, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
                    eq_filename = "EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"
                xmlNew = etree.Element("EOAequationnonumber", filename=eq_filename)
                xmlNew.set("TeX", str_latexcode)
                xmlDing.getparent().replace(xmlDing, xmlNew)
                tempImagenumber += 1
                continue
                '''
                # Push Down loop to parse the raw code (Wird vorerst nicht ausgeführt)
                textFormel = ""
                boolBackslash = False
                for Buchstabe in textSourcecode:
                    if Buchstabe == "\n":
                        continue
                    if Buchstabe == "\\":
                        if boolBackslash == False:
                            textFormel += Buchstabe
                            boolBackslash = True
                            continue
                        if boolBackslash == True:
                            textFormel += Buchstabe
                            str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
                            tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
                            tmpXML.set("TeX", str_latexcode)
                            xmlNew.append(tmpXML)
                            textFormel = ""
                            boolBackslash = False
                            tempImagenumber += 1
                            continue
                    if Buchstabe != "\\":
                        textFormel += Buchstabe
                        boolBackslash = False
                # Typeset last equation
                str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
                tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
                tmpXML.set("TeX", str_latexcode)
                xmlNew.append(tmpXML)
                xmlDing.getparent().replace(xmlDing, xmlNew)
                continue
                '''
            if xmlDing.tag == "EOAequationnonumber":
                textSourcecode = xmlDing.find('.//texmath').text
                # Blank lines need to be removed otherwise TeX won't work
                textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
                if donotgenerate:
                    str_latexcode = "Formula not generated"
                    eq_filename = "EOAformulanotgenerated.png"
                else:
                    str_latexcode = TeX2PNG(textSourcecode, "EOAequationnonumber", str(intChapterNumber), tempImagenumber)
                    eq_filename = "EOAequationnonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"
                # TODO: HTML-Code für das fertige Bild einfügen (Ist dieser ToDo noch aktuell?)
                xmlNew = etree.Element("EOAequationnonumber", filename=eq_filename)

                xmlNew.set("TeX", str_latexcode)

                xmlDing.getparent().replace(xmlDing, xmlNew)
                tempImagenumber += 1
                continue


def process_inline_equations( xmlChapters, donotgenerate ):
    intEOAineqRunningOrder = 1
    dictEOAineqs = {}
    strTeXEquations = ""
    all_ineq = xmlTree.findall(".//EOAineq")

    if len(all_ineq) == 0:
        logging.info("Found no EOAineq. Continuing")
        pass
    else:
        logging.info("Found " + str(len(all_ineq)) + " formulas")

        for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1):
            logging.info("Chapter " + str(intChapterNumber))
            xmlEOAineqs = xmlChapter.findall(".//EOAineq")
            intEOAineqnumber = 1
            for xmlEOAineq in xmlEOAineqs:
                if donotgenerate:
                    strFilename = "EOAformulanotgenerated"
                    strSourceCode = "Formula not generated"
                else:
                    if xmlEOAineq.find('.//texmath') is not None:
                        strSourceCode = xmlEOAineq.find('.//texmath').text
                    else:
                        strSourceCode = xmlEOAineq.text

                    libeoaconvert.progress(intEOAineqnumber, len(xmlEOAineqs),"Processing EOAineq %s of %s." % (intEOAineqnumber, len(xmlEOAineqs)))

                    strSourceCode = os.linesep.join([s for s in strSourceCode.splitlines() if s])

                    # this occurred once in sources 11
                    strSourceCode = strSourceCode.replace(r"\@root", r"\root")

                    strTeXEquations = strTeXEquations + "$" + strSourceCode + "$\n\\newpage\n"
                    # Add intEOAineqRunningOrder : Filename to dictionary
                    strFilename = "EOAineq_" + str(intChapterNumber) + "_" + str(intEOAineqnumber)
                dictEOAineqs[intEOAineqRunningOrder] = strFilename
                # Prepare XML
                tmpTail = xmlEOAineq.tail
                xmlEOAineq.clear()
                xmlEOAineq.tail = tmpTail
                xmlEOAineq.set("src", strFilename + ".png")
                xmlEOAineq.set("TeX", strSourceCode)
                # increment integers
                intEOAineqRunningOrder += 1
                intEOAineqnumber +=1

        if donotgenerate:
            logging.info("Skipping creation of png files.")
        else:
            dictRebindedCommands = {
                "\|ket\|" : r"\\ket",
                "\|braket\|" : r"\\braket",
                "\|bra\|" : r"\\bra",
                "\|Bra\|" : r"\\Bra",
                "\|Ket\|" : r"\\Ket",
                "\slashed\|" : r"\\slashed",
                "\|cancel\|" : r"\\cancel"
            }
            for strCommand in dictRebindedCommands.keys():
                strTeXEquations = re.sub(strCommand, dictRebindedCommands[strCommand], strTeXEquations)

            tmp = open(TEMPLATE_PATH / "formula.tex", "r")
            Template = tmp.read()
            tmp.close()
            # Get tmp-directory for this user account
            # tmpDir = os.getenv("TMPDIR")
            # use local tmpdir

            formula_tmp_dir = TEMP_DIR / "formulas2png"
            if not os.path.exists( formula_tmp_dir ):
                os.mkdir( formula_tmp_dir )

            # Make directory items if it doesn't already exist
            items_dir = OUTPUT_DIR / "items"
            if not os.path.exists( items_dir):
                os.mkdir( items_dir )
            s = string.Template(Template)
            e = s.substitute(DERINHALT=strTeXEquations)
            tmpFile = formula_tmp_dir / "EOAeq.tex"
            tmp = open(tmpFile, "w")
            tmp.write(e)
            tmp.close()
            logging.info("Typesetting all inline equations (EOAineq)")
            Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute())
            Argumente = shlex.split(Kommando)
            Datei = open(TEMP_DIR / 'xelatex-run.log', 'w')
            Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
            logging.info("Splitting all inline equations, (EOAineq)")
            libeoaconvert.pdf_burst("EOAeq.pdf", formula_tmp_dir)
            logging.info("Converting %s split pages into PNG-Images" % len(dictEOAineqs.keys()))
            counter_dictEOAineqs = 1
            for intRunningOrder in dictEOAineqs.keys():
                # provide more status information here in output!
                libeoaconvert.progress(counter_dictEOAineqs, len(dictEOAineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys())))
                Kommando = "{cmd} {arg1} {arg2}".format(
                        cmd = PDFCROP_EXEC,
                        arg1 = (formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(),
                        arg2 = (formula_tmp_dir / (dictEOAineqs[intRunningOrder] + ".pdf")).absolute()
                )
                Argumente = shlex.split(Kommando)
                subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)

                Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format(
                        cmd = GM_PATH,
                        arg1 = (formula_tmp_dir / (dictEOAineqs[intRunningOrder] + ".pdf")).absolute(),
                        arg2 = (items_dir / (dictEOAineqs[intRunningOrder] + ".png")).absolute()
                )
                Argumente = shlex.split(Kommando)
                subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
                counter_dictEOAineqs += 1
# def process_inline_equations ends here


def process_eoachem( xmlChapters, donotgenerate ):
    int_EOAchem_running_order = 1
    dictEOAchems = {}
    str_tex_chem = ""
    all_chem = xmlTree.findall(".//EOAchem")

    if len(all_chem) == 0:
        logging.info("Found no EOAchem. Continuing")
    else:
        logging.info("Found " + str(len(all_chem)) + " chemical formulas")

        for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1):
            logging.info("Chapter " + str(intChapterNumber))
            xmlEOAchems = xmlChapter.findall(".//EOAchem")
            int_EOAchem_number = 1
            for xml_EOAchem in xmlEOAchems:
                if donotgenerate:
                    strFilename = "EOAformulanotgenerated"
                    str_chem_text = "Formula not generated"
                else:
                    str_chem_text = xml_EOAchem.text

                    libeoaconvert.progress(int_EOAchem_number, len(xmlEOAchems),"Processing EOAchem %s of %s." % (int_EOAchem_number, len(xmlEOAchems)))

                    str_chem_text = os.linesep.join([s for s in str_chem_text.splitlines() if s])
                    str_tex_chem = str_tex_chem + "\ce{" + str_chem_text + "}\n\\newpage\n"
                    # Add int_EOAchem_running_order : Filename to dictionary
                    strFilename = "EOAchem_" + str(intChapterNumber) + "_" + str(int_EOAchem_number)
                dictEOAchems[int_EOAchem_running_order] = strFilename
                # Prepare XML
                tmpTail = xml_EOAchem.tail
                xml_EOAchem.clear()
                xml_EOAchem.tail = tmpTail
                xml_EOAchem.set("src", strFilename + ".png")
                xml_EOAchem.set("TeX", str_chem_text)
                # increment integers
                int_EOAchem_running_order += 1
                int_EOAchem_number +=1

        if donotgenerate:
            logging.info("Skipping creation of png files.")
        else:
            tmp = open(TEMPLATE_PATH / "formula.tex", "r")
            Template = tmp.read()
            tmp.close()
            # Get tmp-directory for this user account
            # tmpDir = os.getenv("TMPDIR")
            # use local tmpdir
            formula_tmp_dir = TEMP_DIR / "formulas2png/"

            # Make directory items if it doesn't already exist
            items_dir = OUTPUT_DIR / "items"
            if not os.path.exists( items_dir ):
                os.mkdir( items_dir )
            s = string.Template(Template)
            e = s.substitute(DERINHALT=str_tex_chem)
            tmpFile = formula_tmp_dir / "EOAchem.tex"
            tmp = open(tmpFile, "w")
            tmp.write(e)
            tmp.close()
            logging.info("Typesetting all inline Chemical formulas")
            Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute())
            Argumente = shlex.split(Kommando)
            Datei = open(TEMP_DIR / 'xelatex-run.log', 'w')
            Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
            logging.info("Splitting all Inline Chemical formulas")
            libeoaconvert.pdf_burst("EOAchem.pdf", formula_tmp_dir)
            logging.info("Converting %s split pages into PNG-Images" % len(dictEOAchems.keys()))
            counter_dictEOAchems = 1
            for intRunningOrder in dictEOAchems.keys():
                # provide more status information here in output!
                libeoaconvert.progress(counter_dictEOAchems, len(dictEOAchems.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAchems, len(dictEOAchems.keys())))
                Kommando = "{cmd} {arg1} {arg2}".format(
                        cmd=PDFCROP_EXEC,
                        arg1=(formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(),
                        arg2=(formula_tmp_dir / (dictEOAchems[intRunningOrder] + ".pdf")).absolute()
                        )
                Argumente = shlex.split(Kommando)
                subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)

                Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format(
                    cmd=GM_PATH,
                    arg1 = (formula_tmp_dir / (dictEOAchems[intRunningOrder] + ".pdf")).absolute(),
                    arg2 = (items_dir / (dictEOAchems[intRunningOrder] + ".png")).absolute()
                )
                Argumente = shlex.split(Kommando)
                subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
                counter_dictEOAchems += 1
# def process_eoachem ends here


def process_figures( xmlChapters ):
    dictFigures = {}
    for xmlChapter in xmlChapters:
        Figurenumber = 1
        xmlFigures = xmlChapter.xpath(".//EOAfigure | .//EOAlsfigure")
        for xmlFigure in xmlFigures:
            shortcaption = xmlFigure.find("shortcaption")
            if shortcaption and shortcaption.text == "1":
                shortcaption.tag = "temp"
            xmlAnchor = xmlFigure.find("anchor")
            # Check if Figure is in a numbered Chapter
            # Otherwise just put the Number of the figure
            if xmlChapter.get('id'):
                dictFigures[xmlAnchor.get('id')] = \
                    str(dictChapters[xmlChapter.get('id')]) + "." + str(Figurenumber)
            else:
                dictFigures[xmlAnchor.get('id')] = str(Figurenumber)
            xmlFigure.set("id", xmlAnchor.get("id"))
            Figurenumber += 1
    return dictFigures

def number_theorems( xmlChapters ):
    dictTheorems = {}
    for xmlChapter in xmlChapters:
        xmlTheorems = xmlChapter.findall(".//theorem")
        for xmlTheorem in xmlTheorems:
            strUID = xmlTheorem.get("id")
            strNumber = xmlTheorem.get("id-text")
            dictTheorems[strUID] = strNumber
    return dictTheorems

def number_sections( xmlChapters ):
    dictSections = {}
    intChapterNumber = 1
    for xmlChapter in xmlChapters:
        strUID = xmlChapter.get("id")
        #dictChapters[strUID] = str(intChapterNumber)
        xmlSections = xmlChapter.findall("div2")
        intSectionNumber = 1
        for xmlSection in xmlSections:
            if xmlSection.get("rend") == "nonumber":
                continue
            strUID = xmlSection.get("id")

            following_anchor_id = xmlSection.xpath(".//anchor[preceding-sibling::head]/@id")
            if xmlChapter.get("rend") != "nonumber":
                dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber)
                if len(following_anchor_id) > 0:
                    dictSections[following_anchor_id[0]] = str(intChapterNumber) + "." + str(intSectionNumber)
            else:
                dictSections[strUID] = str(intSectionNumber)
                if len(following_anchor_id) > 0:
                    dictSections[following_anchor_id[0]] = str(intSectionNumber)
            xmlSubsections = xmlSection.findall("div3")
            intSubsectionNumber = 1
            for xmlSubsection in xmlSubsections:
                if xmlSubsection.get("rend") == "nonumber":
                    continue
                strUID = xmlSubsection.get("id")
                if xmlChapter.get("rend") != "nonumber":
                    dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) + "." + str(intSubsectionNumber)
                if xmlChapter.get("rend") == "nonumber":
                    dictSections[strUID] = str(intSectionNumber) + "." + str(intSubsectionNumber)
                intSubsectionNumber += 1
            intSectionNumber += 1
        if xmlChapter.get("rend") != "nonumber":
            intChapterNumber += 1
    return dictSections

def number_footnotes( xmlChapters ):
    dictFootnotes = {}
    for xmlChapter in xmlChapters:
        intNoteNumber = 1
        xmlFootnotes = xmlChapter.findall(".//note")
        for xmlFootnote in xmlFootnotes:
            strUID = xmlFootnote.get("id")
            dictFootnotes[strUID] = str(intNoteNumber)
            intNoteNumber += 1
    return dictFootnotes


def number_lists( xmlChapters ):
    dictLists = {}
    for xmlChapter in xmlChapters:
        xmlListitems = xmlChapter.findall(".//item")
        for xmlListitem in xmlListitems:
            strUID = xmlListitem.get("id")
            strItemNumber = xmlListitem.get("id-text")
            dictLists[strUID] = strItemNumber
    return dictLists

def process_page_references( latex_dir, set_citations ):
    dictPagelabels = {}
    listAuxFiles = [str(x) for x in latex_dir.rglob('*.aux')]
    # listAuxFiles = glob.glob( str(latex_dir / "*.aux") )
    if len(listAuxFiles) == 0:
        raise( Exception("No aux file found. Exiting") )
    else:
        for strFile in listAuxFiles:
            tmpFile = open(strFile, "r")
            lines = tmpFile.readlines()
            tmpFile.close()
            for line in lines:
                # hyperref makes the lines much much longer
                # \newlabel{facsim033}{{\caption@xref {facsim033}{ on input line 37}}{231}{Secondary Literature}{figure.caption.87}{}}
                # \newlabel{BL}{{\caption@xref {facsim033}{ on input line 37}}{231}{Secondary Literature}{figure.caption.87}{}}
                # \newlabel{BL}{{1.1}{4}{Forschungsüberblick zur Literatur über Alvarus Thomas}{section.1.1}{}}
                # \newlabel{BL}{{1.1}{4}}
                auxpattern = re.compile("\\\\newlabel\{(?P<label>.*?)\}\{\{(?P<num1>.*?)\}\{(?P<pagenumber>.*?)\}\{(?P<sectionname>.*?)\}\{(?P<another>.*?)\}\{(?P<whatever>.*?)\}\}")
                matched_label = re.match(auxpattern, line)
                # matched_label = re.match(r'\\newlabel\{(.*?)\}\{\{(.*?)\}\{(.*?)\}\}\{(.*?)\}', line)
                # matchObjectLabel = re.match(r'\newlabel\{(.*?)\}', line)
                if matched_label:
                    # matchObjectPage = re.match(r'(.*?)\}\{(\d{1,})\}\}$', line)
                    # if matchObjectPage:
                    dictPagelabels[matched_label.group('label')] = matched_label.group('pagenumber')
                # parsing out information on cite works
                matched_citation = re.match(r'\\abx@aux@cite{(.*?)}', line)
                if matched_citation is not None:
                    set_citations.add(matched_citation.group(1))
    return dictPagelabels

def number_tables( xmlChapters ):
    dictTables = {}
    for intChapterNumber, xmlChapter in enumerate(xmlChapters):
        intTableNumber = 1
        xmlTables = xmlChapter.findall(".//EOAtable")
        for xmlTable in xmlTables:
            xmlTableLabel = xmlTable.find(".//EOAtablelabel")
            xml_table_id = xmlTable.xpath(".//table/@id")[0]
            strTableCaption = xmlTable.find(".//EOAtablecaption").text
            if strTableCaption == "nonumber":
                continue
            if not xmlTableLabel.text or xmlTableLabel.text == "":
                xmlTableLabel.text = "table" + str(intChapterNumber) + str(intTableNumber)
            # the question is vexed: label or id!
            # strUID = xmlTableLabel.text
            strUID = xml_table_id
            logging.debug(f"XML table label: {strUID}")
            if xmlChapter.get("rend") != "nonumber":
                dictTables[strUID] = dictChapters[xmlChapter.get('id')] + "." + str(intTableNumber)
            elif xmlChapter.get("rend") == "nonumber":
                dictTables[strUID] = str(intTableNumber)
            intTableNumber += 1
            logging.debug(f"Tables in this chapter: {dictTables}.")
    return dictTables


def process_references(xmlTree):
    """Use id rather than label string in references"""

    eoarefs = xmlTree.xpath("//EOAref")
    for ref in eoarefs:
        reftype = ref.get("type")
        if reftype == "text":
            ref.set("type", "text")
        else:
            ref.set("type", "number")
        ref_element = ref.find("./ref")
        target_attribute = ref.xpath("./ref/@target")
        ref_label = ref.find("./Label").text
        if not target_attribute:
            logging.info(ref_label)
            try:
                the_label = xmlTree.xpath(f"//*[@place='{ref_label}']/@id")[0]
            except IndexError:
                ref_label = ref_label.replace("_", "")
                the_label = xmlTree.xpath(f"//*[@place='{ref_label}']/@id")[0]
            ref_element.set("target", the_label)
        else:
            pass
# def process_references ends here


def process_pararefs(xmlTree):
    """Write parallel text information into paragraph tag."""

    pararefs = xmlTree.xpath("//EOApararef")
    for pararef in pararefs:
        pararef_attributes = pararef.attrib
        para_parent = pararef.xpath("./ancestor::p")[0]
        # this shortcut does not seem to work
        # libeoaconvert.transfer_xml_attributes(pararef, para_parent)
        for attrib in pararef_attributes:
            print(attrib)
            para_parent.attrib[attrib] = pararef_attributes[attrib]

        pararef.tag = "elementtobestripped"
# def process_pararefs ends here


def process_eoabox(xmlChapters):
    """Move contents of EOAbox into regular document structure

    The EOAbox element is the child of a p tag and contains itself p
    tags
    """

    all_boxes = xmlTree.findall(".//EOAbox")
    if len(all_boxes) == 0:
        logging.info("Document contains no info boxes.")
    else:
        logging.info(f"Found {libeoaconvert.plural(len(all_boxes), 'box', plural='boxes')}.")

        for box in all_boxes:
            box_parent = box.getparent()
            box_parent.tag = "elementtobestripped"
            box_contents = list(box)
            box_head = box_contents[0].find("head")
            box_head.set("style", "boxhead")
            box_head.tag = "p"
            libeoaconvert.remove_wrapping_element(box_head.getparent())
            for contents in box_contents[1:]:
                contents.set("style", "box")

            libeoaconvert.remove_wrapping_element(box)
            libeoaconvert.remove_wrapping_element(box_parent)
    return
# def process_eoabox ends here

def make_indices_child_of_div0():
    """Move index commands to a higher location"""

    index_sections = xmlTree.xpath(" .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex")
    logging.debug(f"Found {libeoaconvert.plural(len(index_sections), 'index section')}.")

    if index_sections:
        for section in index_sections:
            try:
                parent_div = section.xpath("./ancestor::div0")[0]
            except IndexError:
                logging.warning("Index is not embedded in div0, but div1 ")
                parent_div = section.xpath("./ancestor::div1")[0]
            parent_div.append(section)
# def make_indices_child_of_div0 ends here

def check_footnote_paragraphs():
    """Check if footnotes contain more than one paragraph"""

    grep_command = "grep -A1 -B2 'argument of \\\EOAfn' {}".format(
        LOG_DIR / SCRIPT_NAME / (INPUT_PATH.stem + "-tralics.log")
    )
    grep_command_arguments = shlex.split(grep_command)
    grep_result = subprocess.Popen(grep_command_arguments, stdout=subprocess.PIPE)
    grep_output = grep_result.stdout.read()
    if len(grep_output) > 0:
        logging.warning("\n===\nFootnotes with paragraphs were found. They have to be replaced by the \EOAfnpar command.\n")
        logging.warning(grep_output.decode("utf-8"))
        logging.warning("===\n")
    else:
        logging.info("Footnote check found no errors")
# def check_footnote_paragraphs ends here


def move_anchors(xml_tree):
    """Move stray anchors into paragraph"""

    all_anchors = xml_tree.xpath(".//anchor")
    logging.info(f"Found {len(all_anchors)} anchors.")
    for anchor in all_anchors:
        anchor_parent = anchor.getparent()
        if anchor_parent.tag not in ["p", "EOAfigure", "hi"]:
            anchor_pos = libeoaconvert.get_place_in_xml_tree(anchor, anchor_parent)
            logging.debug(f"Anchor parent is {anchor_parent.tag}, anchor is at position {anchor_pos}.")
            try:
                next_paragraph = anchor.xpath("following-sibling::p[1]")[0]
                para_pos = libeoaconvert.get_place_in_xml_tree(next_paragraph, anchor_parent)
                next_paragraph.insert(0, anchor)
                logging.info(f"Moved the anchor {libeoaconvert.plural(para_pos-anchor_pos, 'position')} down the tree.")
            except IndexError:
                logging.warning(f"This didn't work out: {anchor.get('id')}")
                pass
# def move_anchors ends here


##############################################################
#                  Preparing the Bibliography                #
##############################################################

def bibl_info_from_xml(
    xmlTree
):
    if xmlTree.find(".//EOAbibliographydatabase") is not None:
        bib_database = xmlTree.find(".//EOAbibliographydatabase").text
        logging.info("The bibliography database is %s", bib_database)
    else:
        return None

    bib_type = xmlTree.find(".//EOAbibliographytype").text
    if bib_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]:
        raise( Exception(f"The bibtype must be one of {','.join[bib_type]}. Exiting") )
    else:
        logging.info("The bibliography type is %s", bib_type)

    return (bib_type, bib_database)

# .bib -> .json
# (return json data as python dict)
def write_json_bibl(
    bib_database,
    output_file,
):
    # the new solution: pandoc-citeproc
    # interim_bib_json_file = INPUT_PATH_NO_EXT + "-bib.json"
    citeproc_command = "pandoc-citeproc --bib2json  %s" % bib_database + ".bib"
    logging.debug(f"Running citeproc with the following command: {citeproc_command}")
    citeproc_arguments = shlex.split(citeproc_command)
    env = os.environ.copy()
    env['LANG'] = 'en_US.UTF-8'
    citeproc_process = subprocess.Popen(
            citeproc_arguments,
            env = env,
            stdout=subprocess.PIPE
    )
    citeproc_json = citeproc_process.stdout.read()
    citations_json = json.loads(citeproc_json)
    # for x in citations_json:
    #     print(x["title"])

    logging.debug(f"Dumping bib json file: {output_file}.")
    with open(output_file, 'w') as ibjf:
        json.dump(citeproc_json.decode('utf-8'), ibjf)
    return citations_json

def insert_bibliographies(
        xml_element,
        language,
        citations_json,
        ## paths:
        bib_file,
        tex_template,
        temp_dir,
        output_file,
        log_dir,
):
    keyword_to_print_bibl_el = find_print_bibliography(
            xml_element,
            citations_json
    )

    # if not language:
    #     logging.warning("Missing a language. Falling back to English.")
    #     language = "en"

    number_of_bibliographies = len(keyword_to_print_bibl_el)
    number_of_keywords = len(keyword_to_print_bibl_el.keys())

    if number_of_bibliographies == 0:
        # create an empty file
        logging.debug("No bibliography found.")
        # open(TEMP_DIR / (tmp_citation_filename + "_nocitations"), 'a').close()
    else:
        logging.info(f"Found {libeoaconvert.plural(number_of_bibliographies, 'bibliography', plural='bibliographies')}.")
        if number_of_keywords == 1 and list(keyword_to_print_bibl_el.keys())[0] == "":
            logging.info( "No keywords found" )
        else:
            logging.info( "Found Keywords:" )
            logging.info( keyword_to_print_bibl_el.keys() )

        citekeys = set(citekeys_from_xml( xml_element ))
        bib_keywords_sanity_check(
                keyword_to_print_bibl_el,
                citations_json,
                citekeys
        )
        tei_bibl_file = (temp_dir / output_file.name) . with_suffix( ".tei" )
        bib2html.bib2tei(
            bib_file = bib_file,
            citekeys = citekeys,
            tex_template = tex_template,
            language = language,
            temp_dir = temp_dir,
            output_file = tei_bibl_file,
            log_dir = log_dir,
            keywords = keyword_to_print_bibl_el.keys()
        )
        bib2html.teibib_to_eoa1(
                tei_bibl_file,
                output_file = output_file
        )
        htmlbib_tree = etree.parse( str(output_file) )

        formatted_bibliographies = {}
        for keyword in keyword_to_print_bibl_el.keys():
            formatted_bibliographies[keyword] = \
                htmlbib_tree.xpath("//div[@id = 'refs' and @class = 'references']")[0]

        add_bibliography_to_xml(
            keyword_to_print_bibl_el,
            xml_element,
            citations_json,
            formatted_bibliographies
        )
    return keyword_to_print_bibl_el

def citekeys_from_xml(
        chapter_element
):
    citekeys = chapter_element.xpath(".//citekey/text()")
    nocite_elements = chapter_element.xpath(".//nocite")

    logging.info( "Citekeys found: ")
    logging.info( len( citekeys ) )

    if nocite_elements:
        logging.debug(f"Found {libeoaconvert.plural(len(nocite_elements), 'nocite command')}.")
        nocitekeys = []
        for nocite in nocite_elements:
            keys = [x.strip() for x in nocite.text.split(",")]
            nocitekeys += keys
            nocite.tag = "elementtobestripped"
        logging.debug(f"Found {libeoaconvert.plural(len(set(nocitekeys)), 'nocite key')}.")
        logging.debug(f"Adding nocite keys to the other cite keys.")
        citekeys += nocitekeys
    return citekeys

def find_print_bibliography(
        xml_context_element,
        citations_json
):
    """
    return a dict keyword -> print_bibl_el
    """
    ret_list = {}
    print_bibl_elements = xml_context_element.findall(".//EOAprintbibliography")

    if len(print_bibl_elements) == 0:
        citations = xml_context_element.findall(".//citekey")
        if len(citations) > 0:
            logging.error("No EOAprintbibliography found, but there are citations. Maybe you commented it out? Exiting")
            sys.exit(0)
        else:
            logging.info("Found chapter or publication without bibliography.")
    else:
        for print_bibl_el in print_bibl_elements:
            keyword = print_bibl_el.get("keyword")
            if keyword is not None:
                ret_list[keyword] = print_bibl_el
            else:
                ret_list[''] = print_bibl_el
    return ret_list

def bib_keywords_sanity_check(
        keyword_to_print_bibl_el,
        # print_bibl_elements,
        citations_json,
        citekeys

):
    if len(keyword_to_print_bibl_el) == 1 and list(keyword_to_print_bibl_el.keys())[0] != "":
        logging.warning("Found a keyword in bibliography although there is only one.")

    for keyword, print_bibl_element in keyword_to_print_bibl_el.items():
        logging.debug( f"print_bibl_element: {etree.tostring(print_bibl_element)}" )
        if len(keyword_to_print_bibl_el) > 1 and keyword == "":
            logging.error(f"No bibliography keyword found. Since there is more than one bibliography, all bibliographies are required to have a keyword. Exiting ")
            sys.exit(1)

        if keyword != "":
            logging.debug(f"Found bibliography keyword {keyword}")

            # just for debugging (?):
            logging.info("We want to collect the entries matching the keywords from the database.")

            citations_with_keyword = []
            citations_without_keyword = []
            # citations_with_keyword = [x["id"] for x in citations_json if keyword in x["keyword"]]
            for cj in citations_json:
                try:
                    if keyword in cj["keyword"]:
                        citations_with_keyword.append(cj["id"])
                except KeyError:
                    logging.warning(f"Index entry {cj['id']} has no keyword. As long as it is not cited, this is no problem.")
                    citations_without_keyword.append(cj["id"])
                    pass
            logging.debug(f"Found {libeoaconvert.plural(len(citations_with_keyword), 'citation')} with keyword {keyword} in database.")
            cited_works_without_keyword = [x for x in citations_without_keyword if x in citekeys]
            if cited_works_without_keyword:
                logging.error(f"Found {libeoaconvert.plural(len(cited_works_without_keyword), 'work')} that are cited but have no keyword. Please assign one.")
                sys.exit(1)
            citations_to_format = [x for x in citations_with_keyword if x in citekeys]
            logging.debug(f"Found {libeoaconvert.plural(len(citations_to_format), 'citation')} with keyword {keyword} that are actually cited.")

def add_bibliography_to_xml(
        keyword_to_print_bibl_el,
        chapter_element,
        citations_json,
        formatted_bibliographies
):
    for keyword, print_bibl_el in keyword_to_print_bibl_el.items():
        formatted_bibl = formatted_bibliographies[keyword]
        logging.debug( f"insert formatted bibliography for keyword {keyword}:" )
        logging.debug( etree.tostring( formatted_bibl ) )

        fixed_entries = libeoaconvert.fix_bib_entries( formatted_bibl )

        print_bibl_el.clear()
        if keyword != "" :
            print_bibl_el.set("keyword", keyword)
        print_bibl_el.tag = "div"
        bib_parent = print_bibl_el.getparent()
        bib_parent.tag = "div"

        for entry in fixed_entries:
            print_bibl_el.append(entry)

##############################################################
#                  actual script                             #
##############################################################

xmlTree = parseXML( input_file = XML_FILE )

if args.no_references:
    pass
else:
    process_references(xmlTree)
    process_pararefs(xmlTree)

xmlChapters = xmlTree.findall("//div1")

logging.info("-----------------------------------------------------")
logging.info("Move EOAlanguage from <head> into attribute of EOAchapter")
xmltrans_move_eoalanguage( xmlChapters )

strSerie = get_series( xmlTree )

if strSerie == "Essay":
    strNumberingType = "essay"
else:
    strNumberingType = "regular"

set_citations = set()

libeoaconvert.logdivider()
logging.info("Numbering Chapters")
dictChapters = number_chapters( xmlChapters )
# print( dictSections )

if args.no_math:
    dictEquations = {}
    from PIL import Image, ImageFont, ImageDraw
    tmp_img = Image.new("RGB", (130, 10), (255,255,255))
    font_colour = (255,0,0)
    text_canvas = ImageDraw.Draw(tmp_img)
    text_canvas.text((2, 0), "Formula not generated", fill=font_colour)
    items_dir = OUTPUT_DIR / "items"
    if not os.path.exists( items_dir ):
        os.mkdir( items_dir )
    tmp_img.save(OUTPUT_DIR / "items" / "EOAformulanotgenerated.png")
else:
    logging.info("Carry on with generation of equations")

libeoaconvert.logdivider()
logging.info("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations")
dictEquations = process_equations( xmlChapters, dictChapters, strNumberingType, args.no_math )

logging.info("-----------------------------------------------------")
logging.info("Processing .//EOAequationnonumber | .//EOAequationarraynonumber")
process_unnumbered_equations( xmlChapters, args.no_math )

logging.info("-----------------------------------------------------")
logging.info("Converting EOAineq")
process_inline_equations( xmlChapters, args.no_math )

logging.info("-----------------------------------------------------")
logging.info("Converting EOAchem")
process_eoachem( xmlChapters, args.no_math )

logging.info("-----------------------------------------------------")
logging.info("Converting EOAbox")
process_eoabox( xmlChapters )

logging.info("-----------------------------------------------------")
logging.info("EOAFigure Numbering per Chapter")
dictFigures = process_figures( xmlChapters )

logging.info("-----------------------------------------------------")
logging.info( "Numbering Theorems" )
dictTheorems = number_theorems( xmlChapters )

logging.info("-----------------------------------------------------")
logging.info("Section, Subsection,... Numbering per Chapter")
dictSections = number_sections( xmlChapters )

logging.info("-----------------------------------------------------")
logging.info("Numbering of Footnotes per Chapter")
dictFootnotes = number_footnotes( xmlChapters )

# here was OU's footnote code, now in libeoaconvert
# def get_bigfoot_data(chapter)

# bigfoot needs to be integrated into
# 'fndict': {'uid11': '2', 'uid12': '3', 'uid9': '1'},


logging.info("-----------------------------------------------------")
logging.info("Numbering of Lists per Chapter")
dictLists = number_lists( xmlChapters )

logging.info("-----------------------------------------------------")
logging.info("Working on Page Numbers for References")
dictPagelabels = process_page_references( LATEX_DIR, set_citations )
logging.info("page labels:")
logging.info(dictPagelabels)

logging.info("citations:")
logging.info(set_citations)

logging.info("-----------------------------------------------------")
logging.info("Numbering of Tables per Chapter")
dictTables = number_tables( xmlChapters )


##############################################################
#                  Preparing the Bibliography                #
##############################################################

logging.info("Final big step: preparing the bibliography")

bibl_info = bibl_info_from_xml(
        xmlTree
)


if bibl_info is None:
    logging.warning("No bibliography database found.")
elif args.no_bib4ht:
    logging.warning("Proceeding without typesetting bibligraphy.")
else:
    (bib_type, bib_database) = bibl_info
    logging.debug(f"bib type is {bib_type}")

    logging.info( ".bib -> .json")
    citations_json = write_json_bibl(
            INPUT_DIR / bib_database,
            output_file = TEMP_DIR / (INPUT_PATH.stem + "-bib.json")
    )

    ## only for debugging (?)
    make_latex_bibl_file(
            bib_database = bib_database,
            set_citations = set_citations,
            files = [
                (TEMPLATE_PATH / "largebib.tex", DEBUG_DIR / "debug_onlybib.tex"),
                (TEMPLATE_PATH / "largebib-xml.tex", DEBUG_DIR / "debug_onlybib-xml.tex"),
            ]
    )

    # If Bibliography-Type is monograph search for EOAbibliography and make it all
    if bib_type == "monograph":
        keyword_to_print_bibl_el = insert_bibliographies(
                xmlTree,
                # use language of the first chapter:
                xmlChapters[0].get( "language" ),
                citations_json,
                ## paths:
                bib_file = (INPUT_DIR / bib_database).with_suffix( ".bib" ),
                tex_template = BASE_DIR / "data" / "aux" / "bibliography4ht.tex",
                temp_dir = TEMP_DIR / "bib2html" / "monograph-tmp",
                output_file = TEMP_DIR / "bib2html" / "used_citations-monograph.html",
                log_dir = LOG_DIR / SCRIPT_NAME / "bib2html",
        )

    # If Bibliography-Type is anthology search for EOAbibliography and make one per chapter
    elif bib_type == "anthology":
        for intChapterNumber, xmlChapter in enumerate(xmlChapters, start = 1):
            logging.info(f"Looking at chapter {intChapterNumber}.")
            keyword_to_print_bibl_el = insert_bibliographies(
                    xmlChapter,
                    xmlChapter.get("language"),
                    citations_json,
                    ## paths:
                    bib_file = (INPUT_DIR / bib_database).with_suffix( ".bib" ),
                    tex_template = BASE_DIR / "data" / "aux" / "bibliography4ht.tex",
                    temp_dir = TEMP_DIR / "bib2html" / "chapter_{:02d}-tmp".format( intChapterNumber ),
                    output_file = TEMP_DIR / "bib2html" / "used_citations-anthology-chapter_{:02d}.html".format( intChapterNumber ),
                    log_dir = LOG_DIR / SCRIPT_NAME / "bib2html"
            )

    # for the time being
    strCitation = ""

    # Bibliographies are done, now for the citations
    # turn
    # <EOAciteyear><citekey>Renn2012a</citekey><page/></EOAciteyear>
    # into
    # <span rel="popover" class="citation" citekey="Renn2012a" data-toggle="popover" html="true" data-placement="bottom" data-title="Renn 2012" data-content="The Globalization of Knowledge in History">2012</span>

    if len(keyword_to_print_bibl_el.keys()) > 0 and (bib_type == "anthology" or bib_type == "monograph"):

        if bib_type == "monograph":
            tmp_citation_filename = "used_citations-monograph"
            tmp_path_html = (TEMP_DIR / "bib2html" / tmp_citation_filename) .with_suffix( ".html" )
            with open(tmp_path_html, "r") as formatted_citations:
                form_cit = BeautifulSoup(formatted_citations, "html.parser")

        intChapterNumber = 1
        for xmlChapter in xmlChapters:
            logging.info("-----------------------------------------------------")
            logging.info("Processing References for Chapter " + str(intChapterNumber))
            xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual")
            logging.debug(f"Found {libeoaconvert.plural(len(xmlCitations), 'citation')}.")
            if bib_type == "anthology":
                tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber)
                tmp_path_html = (TEMP_DIR / "bib2html" / tmp_citation_filename ) .with_suffix( ".html" )
                logging.debug(f"Looking for file {tmp_path_html}.")
                # no_cite_path = TEMP_DIR / "bib2html" / (tmp_citation_filename + "_nocitations")
                if os.path.exists(tmp_path_html):
                    with open(tmp_path_html, "r") as formatted_citations:
                        form_cit = BeautifulSoup(formatted_citations, "html.parser")
                else:
                    logging.debug("no citations in this chapter")
                    intChapterNumber += 1
                    continue
                '''
                elif os.path.exists(no_cite_path):
                    logging.debug("no citations in this chapter")
                    intChapterNumber += 1
                    continue
                '''

            counter_citations = 1

            for xmlCitation in xmlCitations:
                string_citekey = xmlCitation.find("./citekey").text
                libeoaconvert.progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey))
                # If Bibliography-Type is anthology find Refsection for this Chapter
                ###############
                # old version #
                ###############
                # if bib_type == "anthology":
                #     print("Yes, it's anthology time!")
                    # xmlRefsections = xmlBibTree.findall(".//refsection")
                    # for xmlRefsection in xmlRefsections:
                    #     if xmlRefsection.find(".//number").text == str(intChapterNumber):
                    #         break
                    # xml_bib_entries = xmlRefsection.findall(".//entry")

                    ###################
                    # end old version #
                    ###################
                # If Bibliography-Type is monograph find all entries, forget about refsection

                ###############
                # new version #
                ###############

                # string_citekey = xmlCitation.find("./citekey").text
                for entry in citations_json:
                    if entry["id"] == string_citekey:
                        current_citation = entry
                        strTitle = current_citation["title"]

                # [1:-1] to remove parentheses around citations
                try:
                    citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text
                    strTitle = form_cit.select("#citefull ~ p[data-cites='%s']" % string_citekey)[0].text
                    # strTitle_element = form_cit.select("#citefull ~ p[data-cites='%s']" % string_citekey)[0]
                    # citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1]
                except IndexError:
                    logging.error("Could not find {}. Exiting.".format(string_citekey))
                    sys.exit()
                data_title_value = citeauthoryear_value
                if xmlCitation.tag == "EOAciteauthoryear":
                    strCitation = citeauthoryear_value
                elif xmlCitation.tag == "EOAciteyear":
                    strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text
                elif xmlCitation.tag == "EOAcitemanual":
                    cite_text = xmlCitation.find("citetext")
                    if cite_text.getchildren():
                        tmp_string = xmlCitation.find("citetext")
                        tmp_string = cite_text.getchildren()[0]
                        strCitation = etree.tostring(tmp_string)
                        # BAUSTELLE!!!!!
                        # tmp_string2 = etree.tostring(tmp_string)
                        # tmp_string3 = tmp_string2.decode()
                        # strCitation = tmp_string3.replace("&lt;", "<")
                    else:
                        strCitation = xmlCitation.find("citetext").text
                logging.info( "formatted citation: {}".format( strCitation ) )

                if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None:
                    pages_text = libeoaconvert.gettext(xmlCitation.find("./page"))
                    strCitation = strCitation + ", " + pages_text
                    data_title_value = data_title_value + ", " + pages_text
                    # strCitation = strCitation + ", " + xmlCitation.find("./page").text

                ######################
                # end of new version #
                ######################

                # Hier den XML-Tag durch die Quellenangabe ersetzen
                tmpTail = xmlCitation.tail
                xmlCitation.clear()

                if args.eoa_classic:
                    xmlCitation.tag = "span"
                    xmlCitation.set("rel", "popover")
                    xmlCitation.set("class", "citation")
                    # Create Link to be used for website in a popover
                    xmlCitation.set("data-toggle", "popover")
                    xmlCitation.set("html", "true")
                    xmlCitation.set("data-placement", "bottom")
                else:
                    # this is taken from tei2imxml!
                    """ <a class="publications-popup-text" data-title="Halliday and
                    Resnick 1977, 232" data-content="Physics">Halliday and Resnick
                    1977, 232</a>"""
                    xmlCitation.tag = "a"
                    xmlCitation.set("class", "publications-popup-text")

                # citation.set("data-content", cited_data[citekey][2])
                # citation.text = sanitized_citation_string
                # end of taken from imxml

                # str_title_element = reduce_element_tag(etree.fromstring(str(strTitle_element)))
                # element_string = etree.tostring(str_title_element)
                # strTitle = libeoaconvert.escape_xml(element_string[3:-4])

                try:
                    xmlCitation.set("data-content", strTitle)
                except:
                    xmlCitation.set("data-content", "missing")
                xmlCitation.text = strCitation
                xmlCitation.tail = tmpTail

                xmlCitation.set("citekey", string_citekey)
                xmlCitation.set("data-title", data_title_value)
                counter_citations += 1

            intChapterNumber += 1

    else:
        logging.debug("No bibliography to insert")

    # If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all
    if bib_type == "monograph-numeric":
        if xmlTree.find(".//EOAprintbibliography") is not None:
            dictCitekeysNumbers = {}
            dictCitekeysTitles = {}
            xmlBibliography = xmlTree.find(".//EOAprintbibliography")
            xmlBibliography.clear()
            xmlBibliography.tag = "div"
            xmlBibliography.getparent().tag = "div"
            xml_bib_entries = xmlBibTree.findall(".//entry")
            intNumberOfEntry = 1
            for xmlEntry in xml_bib_entries:
                # Go through all entries and assign a number to the citekey
                bibEntry = Bibitem(xmlEntry)
                strCitekey = bibEntry.citekey()
                dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry)
                dictCitekeysTitles[strCitekey] = str(bibEntry.title())
                strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>"
                xmlNew = etree.fromstring(strNewentry)
                xmlBibliography.append(xmlNew)
                intNumberOfEntry += 1
            # Now for the references via EOAcitenumeric
            xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric")
            for xmlCitenumeric in xmlCitenumerics:
                logging.info(etree.tostring(xmlCitenumeric))
                strPopover = ""
                tmpCitekeys = xmlCitenumeric.find(".//citekey").text
                tmpCitekeys = re.sub(" ", "", tmpCitekeys)
                tmpCitekeys = re.sub("\n", "", tmpCitekeys)
                listCitekeys = re.split("\,", tmpCitekeys)
                listCitenumbers = []
                for strCitekey in listCitekeys:
                    listCitenumbers.append(dictCitekeysNumbers[strCitekey])
                    # Create Text to be used on the website in a popover
                    strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " "
                listCitenumbers = sorted(listCitenumbers, key=int)
                strResult = "[" + listCitenumbers[0]
                intNumberOfSequentialCite = 0
                for i in range(1,len(listCitenumbers)):
                    intPreviousCitenumber = int(listCitenumbers[i-1])
                    intCurrentCitenumber = int(listCitenumbers[i])
                    if i == (len(listCitenumbers)-1):
                        if (intPreviousCitenumber + 1) == intCurrentCitenumber:
                            if intNumberOfSequentialCite == 0:
                                strResult = strResult + "," + str(listCitenumbers[i])
                            else:
                                strResult = strResult + "-" + str(listCitenumbers[i])
                                intNumberOfSequentialCite == 0
                        else:
                            strResult = strResult + "," + str(listCitenumbers[i])
                        break
                    intNextCitenumber = int(listCitenumbers[i+1])
                    if (intCurrentCitenumber + 1) != intNextCitenumber:
                        if intNumberOfSequentialCite != 0:
                            strResult = strResult + "-" + str(intCurrentCitenumber)
                            intNumberOfSequentialCite = 0
                        else:
                            strResult = strResult + "," + str(intCurrentCitenumber)
                        continue
                    if (intPreviousCitenumber + 1) == intCurrentCitenumber:
                        intNumberOfSequentialCite += 1
                        continue
                    else:
                        strResult = strResult + "," + str(intCurrentCitenumber)
                        intNumberOfSequentialCite = 0
                strResult = strResult + "]"
                xmlCitenumeric.text = strResult
                # Create Link to be used for website
                xmlCitenumeric.set("data-toggle", "popover")
                xmlCitenumeric.set("html", "true")
                xmlCitenumeric.set("data-content", strPopover)
                xmlCitenumeric.set("class","citation")
                xmlCitenumeric.set("data-placement", "bottom")
                xmlCitenumeric.set("data-title", strResult)

    # author is missing!
    # print("xmlBibliography")
    # print(etree.tostring(xmlBibliography))
    # input()

    # Numeric citations for the individual chapters
    if bib_type == "anthology-numeric":
        intChapterNumber = 1
        for xmlChapter in xmlChapters:
            logging.info("Processing Bibliography")
            if xmlChapter.find(".//EOAprintbibliography") is not None:
                dictCitekeysNumbers = {}
                dictCitekeysTitles = {}
                xmlBibliography = xmlChapter.find(".//EOAprintbibliography")
                #xmlBibliography.clear()
                xmlBibliography.tag = "div"
                xmlBibliography.getparent().tag = "div"
                xmlRefsections = xmlBibTree.findall(".//refsection")
                for xmlRefsection in xmlRefsections:
                    if xmlRefsection.find(".//number").text == str(intChapterNumber):
                        break
                xml_bib_entries = xmlRefsection.findall(".//entry")
                intNumberOfEntry = 1
                for xmlEntry in xml_bib_entries:
                    # Go through all entries and assign a number to the citekey
                    bibEntry = Bibitem(xmlEntry)
                    strCitekey = bibEntry.citekey()
                    dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry)
                    dictCitekeysTitles[strCitekey] = str(bibEntry.title())
                    strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>"
                    xmlNew = etree.fromstring(strNewentry)
                    xmlBibliography.append(xmlNew)
                    intNumberOfEntry += 1
                # Now for the references via EOAcitenumeric
                xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear")
                logging.info("Found numeric citation in chapter " + str(intChapterNumber))
                for xmlCitenumeric in xmlCitenumerics:
                    strPopover = ""
                    tmpCitekeys = xmlCitenumeric.find(".//citekey").text
                    tmpCitekeys = re.sub(" ", "", tmpCitekeys)
                    tmpCitekeys = re.sub("\n", "", tmpCitekeys)
                    logging.info(tmpCitekeys)
                    listCitekeys = re.split("\,", tmpCitekeys)
                    listCitenumbers = []
                    for strCitekey in listCitekeys:
                        logging.info(strCitekey)
                        listCitenumbers.append(dictCitekeysNumbers[strCitekey])
                        # Create Text to be used on the website in a popover
                        strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " "
                    listCitenumbers = sorted(listCitenumbers, key=int)
                    strResult = "[" + listCitenumbers[0]
                    intNumberOfSequentialCite = 0
                    for i in range(1,len(listCitenumbers)):
                        intPreviousCitenumber = int(listCitenumbers[i-1])
                        intCurrentCitenumber = int(listCitenumbers[i])
                        if i == (len(listCitenumbers)-1):
                            if (intPreviousCitenumber + 1) == intCurrentCitenumber:
                                if intNumberOfSequentialCite == 0:
                                    strResult = strResult + "," + str(listCitenumbers[i])
                                else:
                                    strResult = strResult + "-" + str(listCitenumbers[i])
                                    intNumberOfSequentialCite == 0
                            else:
                                strResult = strResult + "," + str(listCitenumbers[i])
                            break
                        intNextCitenumber = int(listCitenumbers[i+1])
                        if (intCurrentCitenumber + 1) != intNextCitenumber:
                            if intNumberOfSequentialCite != 0:
                                strResult = strResult + "-" + str(intCurrentCitenumber)
                                intNumberOfSequentialCite = 0
                            else:
                                strResult = strResult + "," + str(intCurrentCitenumber)
                            continue
                        if (intPreviousCitenumber + 1) == intCurrentCitenumber:
                            intNumberOfSequentialCite += 1
                            continue
                        else:
                            strResult = strResult + "," + str(intCurrentCitenumber)
                            intNumberOfSequentialCite = 0
                    strResult = strResult + "]"
                    xmlCitenumeric.text = strResult
                    # Create Link to be used for website in a popover
                    xmlCitenumeric.set("data-toggle", "popover")
                    xmlCitenumeric.set("data-placement", "bottom")
                    xmlCitenumeric.set("data-title", " " + strResult)
                    xmlCitenumeric.set("data-content", strPopover)
                    xmlCitenumeric.set("class","citation")
            intChapterNumber += 1

#################################
# Almost done, this is the coda #
#################################

# index must be child of div0!
# def do_something_funny_about_indices():
#     print_bibl_elements = xmlTree.findall(".//EOAprintbibliography")
#     if len(print_bibl_elements) > 0:
#         bib_parent_element = print_bibl_elements[0].getparent()
#         # bib_parent_element = xmlBibliographies[0].getparent()
#         upper_div = bib_parent_element.xpath("./ancestor::div1")[0]
#         previous_div0 = upper_div.getparent()
#         # possible culprit for not finding the index
#         # other_content = bib_parent_element.xpath(".//EOAtocentry | .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex")
#         other_content = upper_div.xpath(".//EOAtocentry | .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex")
#         if len(other_content) > 0:
#             for element in other_content:
#                 previous_div0.append(element)
#     else:
#         logging.debug("Nothing funny about indices.")

# do_something_funny_about_indices()

libeoaconvert.debug_xml_here( xmlTree, "indexmover", DEBUG_DIR)

make_indices_child_of_div0()
etree.strip_tags(xmlTree, "tagtobestripped")
etree.strip_elements(xmlTree, "elementtobestripped", with_tail=False)

move_anchors(xmlTree)

# here followed the conversion to epub and the conversion to django.xml
# both parts were removed and put into separate files.

intermediate_file_pre = TEMP_DIR / "IntermediateXMLFile_pre.xml"
intermediate_file = OUTPUT_DIR / "IntermediateXMLFile.xml"
ergebnisdatei = open(intermediate_file_pre, "w")
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode")
ergebnisdatei.write(ergebnis)
ergebnisdatei.close()

# replacing a milestone element by a closing and opening combination
with open(intermediate_file_pre, 'r') as tmp_file:
  filedata = tmp_file.read()

# add XML declaration
filedata_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n" + filedata

# Replace the target string
filedata_clean = filedata_declaration.replace('<msparbreak/>', '</p><p>')

# Write the file out again
with open(intermediate_file, 'w') as outfile:
  outfile.write(filedata_clean)

# saving some data
data_to_pickle = {'chapterdict' : dictChapters,
                  'eqdict' : dictEquations,
                  'listdict' : dictLists,
                  'theoremdict' : dictTheorems,
                  'figdict' : dictFigures,
                  'secdict' : dictSections,
                  'fndict' : dictFootnotes,
                  'tabdict' : dictTables,
                  'pagelabeldict' : dictPagelabels
                      }

with open(TEMP_DIR / 'data.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL)

check_footnote_paragraphs()

logging.info("Removing temporary files.")
cleanup()
logging.info("Done!")
sys.exit()