imxml2django.py

#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2021-10-26 14:09:17 (kthoden)>

"""
Create an XML file that can be inserted into the Django database
of an EOAv1 installation.

Input file is a customized DocBook XML that has been generated either
with eoatex2imxml or tei2imxml.

"""

from utils.load_config import load_config, exec_command, check_executable
import utils.libeoaconvert as libeoaconvert

import pickle
import os
import sys
import re
import shutil
import shlex
import subprocess
import argparse
import configparser
import logging
from copy import deepcopy
from lxml import etree
from pathlib import Path
import time

BASE_DIR = Path( __file__ ).resolve().parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.name

DEFAULT_INPUT_DIR = \
    Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')

DEFAULT_OUTPUT_DIR = \
    Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output')

#####################
# Parsing arguments #
#####################

parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
        "-c", "--config",
        default = BASE_DIR / "config" / "eoaconvert.cfg",
        dest="CONFIG_FILE",
        help="Name of configuration file",
        metavar="CONFIGURATION",
        type = Path,
)
parser.add_argument(
        "--log-level",
        default = "INFO",
        help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
parser.add_argument(
        "-p", "--checkpublicationcfg",
        help="Check the publication.cfg for completeness.",
        action="store_true"
)
parser.add_argument(
        "-i", "--input-dir",
        help = f"directory containing some intermediate xml created by previous steps. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml",
        type = Path,
)
parser.add_argument(
        "-o", "--output-dir",
        help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/django",
        type = Path,
)
parser.add_argument(
        "PUBLICATION_DIR",
        help = "directory containing the publication (including resources like pictures, etc.)",
        type = Path,
)

args = parser.parse_args()

########################
# Paths to executables #
########################
GM_PATH = "gm"
PDFCROP_EXEC = "pdfcrop" # (part of texlive distribution):

############################
# Paths:
############################
PUBLICATION_DIR = args.PUBLICATION_DIR
INPUT_DIR = \
        args.input_dir if args.input_dir is not None else DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem / "imxml"
OUTPUT_DIR = \
        args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem) / "django"
LOG_DIR = OUTPUT_DIR / "log"
LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" )
TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"

config_file = args.CONFIG_FILE

print("The configfile is %s." % config_file)

##################################
# Reading the configuration file #
##################################
CONFIG = load_config(
        config_file,
        args.log_level,
        LOG_FILE,
)

############################
# Paths to auxiliary files #
############################
TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS']

# prepare:
logging.debug("PATH: {}".format( os.environ['PATH'] ))
check_executable( GM_PATH )
check_executable( PDFCROP_EXEC )

if not TEMP_DIR.exists():
    os.makedirs( TEMP_DIR )
if not DEBUG_DIR.exists():
    os.makedirs( DEBUG_DIR )

# Check for folder and necessary files
logging.info(f"The publication.cfg file is missing in django directory.")
if os.path.exists(INPUT_DIR / "publication.cfg"):
    shutil.copy(INPUT_DIR / "publication.cfg", OUTPUT_DIR)
    logging.info(f"Copied from {INPUT_DIR}.")
else:
    logging.error(f"Found no publication.cfg in {INPUT_DIR}. Exiting")
    sys.exit( 1 )
if os.path.exists(INPUT_DIR /  "Cover.jpg"):
    shutil.copy(INPUT_DIR / "Cover.jpg", OUTPUT_DIR / "Cover.jpg")
    logging.info("Copied cover image from input directory.")
else:
    logging.error("No coverfile found. You can create a temporary one with the mkimage.py script")
    sys.exit( 1 )

###########################################
# Loading data from first conversion step #
###########################################
with open(INPUT_DIR / "tmp_files" / 'data.pickle', 'rb') as f:
    data = pickle.load(f)

dictChapters = data["chapterdict"]
dictEquations = data["eqdict"]
dictLists = data["listdict"]
dictTheorems = data["theoremdict"]
dictSections = data["secdict"]
dictFigures = data["figdict"]
dictFootnotes = data["fndict"]
dictTables = data["tabdict"]
dictPagelabels = data["pagelabeldict"]

if not os.path.exists(DEBUG_DIR):
    os.mkdir(DEBUG_DIR)

xmlTree = etree.parse( str(INPUT_DIR / "IntermediateXMLFile.xml") )

libeoaconvert.debug_xml_here(
        xmlTree,
        "fresh",
        DEBUG_DIR
)

print("""
############################################################################
#          Convert tralics-XML to Django Data Structure                    #
############################################################################
""")
if not os.path.exists(OUTPUT_DIR / "images"):
    os.mkdir(OUTPUT_DIR / "images")
if not os.path.exists(OUTPUT_DIR / "images" / "embedded"):
    os.mkdir(OUTPUT_DIR / "images" / "embedded")
if not os.path.exists(OUTPUT_DIR / "files"):
    os.mkdir(OUTPUT_DIR / "files")

# Create empty xmlTree
xmlEOAdocument = etree.Element("EOAdocument")
xmlDjangoTree = etree.ElementTree(xmlEOAdocument)
etree.strip_attributes(xmlTree, "noindent")
# Remove temp-Tag
etree.strip_tags(xmlTree, "temp")
libeoaconvert.debug_xml_here(
        xmlTree,
        "afterstriptags",
        DEBUG_DIR
)
# Write Temporary XML-Maintree
ergebnisdatei = open(TEMP_DIR / "Devel_django.xml", "w")
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode")
ergebnisdatei.write(ergebnis)
ergebnisdatei.close()

# Find all Chapters from the original tralics XML
xmlChapters = xmlTree.findall("//div1")

def replace_footnote_with_sup(note):
    """
    captures reusable behavior from the existing code
    potentially, some of the old code could be replaced by calls to this helper

    this behavior showed up in a few places
    I thought I would be able to extract a little more, but this was all that was actually common
    """
    tail = note.tail
    note.clear()
    note.tail = tail
    note.tag = "sup"
# def replace_footnote_with_sup ends here

def alph_footnote_index(fndex):
    """
    lowercase Latin footnotes need to support more than 26 values
    These are zero-indexed.

    >>> alph_footnote_index(0)
    'a'
    >>> alph_footnote_index(1)
    'b'
    >>> alph_footnote_index(24)
    'y'
    >>> alph_footnote_index(25)
    'z'
    >>> alph_footnote_index(26)
    'aa'
    >>> alph_footnote_index(27)
    'ab'
    """
    alphabet = "abcdefghijklmnopqrstuvwxyz"
    quotient, remainder = divmod(fndex, len(alphabet))
    if not quotient: return alphabet[fndex]
    return alph_footnote_index(quotient - 1) + alph_footnote_index(remainder)
# def alph_footnote_index ends here

def debug_chapters(xmlEOAchapters):
    """Write individual chapters to files"""

    chap_num = 1

    for chapter in xmlEOAchapters:
        tmp_filename = DEBUG_DIR / ("debug-chapter-%02d.xml" % chap_num)
        tmp_file = open (tmp_filename, "w")
        tmp_result = etree.tostring(chapter, pretty_print=True, encoding="unicode")
        tmp_file.write(tmp_result)
        tmp_file.close()
        chap_num += 1
# def debug_chapters ends here


def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid=None):
    # Get Dictionaries of Numbers via Global Variables
    global dictChapters
    global dictFigures
    global dictEquations
    global dictSections
    global dictFootnotes
    global dictPagelabels
    global dictTables
    global dictLists
    global intObjectNumber
    # Check what kind of Element we have and change the data

    if isinstance(xmlElement.tag, str):
        if xmlElement.tag == "EOAtranscripted":
            xmlResult = etree.Element("temp")
            xmlEOATranscription = etree.Element("EOAtranscription")
            xmlEOATranscription.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlLeftheader = xmlElement.find(".//Leftheader")
            etree.strip_tags(xmlLeftheader, "p")
            xmlEOATranscription.append(xmlLeftheader)
            xmlRightheader = xmlElement.find(".//Rightheader")
            etree.strip_tags(xmlRightheader, "p")
            xmlEOATranscription.append(xmlRightheader)
            xmlTranscriptedtext = xmlElement.find(".//EOAtranscriptedtext")
            # change \n\n into </p><p> and pagebreak intto </p><pagebreak><p> to create some valid markup
            strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode")
            #strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext))
            #strTranscriptedtext = re.sub (r"<p><pagebreak/></p>", "<pagebreak/>", strTranscriptedtext)
            xmlLeftColumn = etree.Element("EOAtranscriptionleft")
            xmlRightColumn = etree.Element("EOAtranscriptionright")
            boolRightColumn = False
            xmlTemp = etree.XML(str(strTranscriptedtext))
            for xmlElement in xmlTemp.iterchildren():
                if xmlElement.tag == "pagebreak":
                    boolRightColumn = True
                    continue
                if boolRightColumn == False:
                    xmlLeftColumn.append(xmlElement)
                if boolRightColumn == True:
                    xmlRightColumn.append(xmlElement)
            xmlEOATranscription.append(xmlLeftColumn)
            xmlEOATranscription.append(xmlRightColumn)
            # Convert Images within the transcription
            logging.debug("EOAfigurenonumber")
            xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber")
            logging.debug(xmlFigures)
            if xmlFigures is not None:
                for xmlFigure in xmlFigures:
                    # example 'images/1.jpg'
                    strImageFileString = xmlFigure.find(".//file").text
                    strImageFileString = strImageFileString.rstrip("\n")
                    strImageFileDir = os.path.dirname(strImageFileString)
                    strImageFileDir = re.sub("/", "", strImageFileDir)
                    strImageFileName = os.path.basename(strImageFileString)
                    strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
                    strCommand = "{cmd} convert {arg1} -resize 250x250\\> {arg2}".format(
                            cmd = GM_PATH,
                            arg1 = PUBLICATION_DIR / strImageFileString,
                            arg2 = OUTPUT_DIR / "images/embedded" / (strImageFileDir + strImageFileName),
                    )


                    listArguments = shlex.split(strCommand)
                    subprocess.check_output(listArguments, shell=False)
                    tmpStrTail = xmlFigure.tail
                    xmlFigure.clear()
                    xmlFigure.tag = "img"
                    xmlFigure.set("src", strImageFileDir + strImageFileName)
                    xmlFigure.set("alt", "")
            xmlResult.append(xmlEOATranscription)
        elif xmlElement.tag == "EOAletterhead":
            xmlResult = etree.Element("temp")
            xmlEOAletterhead = etree.Element("EOAletterhead")
            xmlEOAletterrecipient = xmlElement.find(".//Recipient")
            xmlEOAletterhead.append(xmlEOAletterrecipient)
            xmlEOAletterarchive = xmlElement.find(".//Archive")
            xmlEOAletterhead.append(xmlEOAletterarchive)
            xmlEOAletteradditional = xmlElement.find(".//Additional")
            xmlEOAletterhead.append(xmlEOAletteradditional)
            xmlEOAletterpages = xmlElement.find(".//Pages")
            xmlEOAletterhead.append(xmlEOAletterpages)
            xmlEOAletterhead.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult.append(xmlEOAletterhead)

        elif xmlElement.tag == "EOAfigurenonumber":
            xmlResult = etree.Element("temp")
            xmlEOAfigure = etree.Element("EOAfigurenonumber")
            # Copy Image
            strImageFileString = xmlElement.find(".//file").text
            strImageFileString = strImageFileString.rstrip("\n")
            strImageFileDir = os.path.dirname(strImageFileString)
            strImageFileDir = re.sub("/", "", strImageFileDir)
            strImageFileName = os.path.basename(strImageFileString)
            strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
            shutil.copy(
                    PUBLICATION_DIR / strImageFileString,
                    OUTPUT_DIR / "images" / (strImageFileDir + strImageFileName)
            )
            style_attribute = xmlElement.get("style")
            if style_attribute is not None:
                xmlEOAfigure.set("style", style_attribute)
            xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
            xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;")
            xmlEOAfigure.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult.append(xmlEOAfigure)
        elif xmlElement.tag == "EOAfigure":
            hi_figure_types = ["hitrue", "hionly", "hionlycollage", "hionlysub"]
            xmlResult = etree.Element("temp")
            # Create basic Element EOAfigure
            xmlEOAfigure = etree.Element("EOAfigure")
            figure_type = xmlElement.get("type")
            strImageFileString = xmlElement.find(".//file").text
            strImageFileString = strImageFileString.rstrip("\n")
            strImageFileDir = os.path.dirname(strImageFileString)
            strImageFileDir = re.sub("/", "", strImageFileDir)
            strImageFileName = os.path.basename(strImageFileString)
            logging.debug("This is figure %s", strImageFileName)
            strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
            # Copy Image
            if figure_type in ["hionly", "hionlycollage", "hionlysub"]:
                logging.debug(f"Found hyperimage figure ({figure_type}), no need to copy them.")
                xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
                pass
            else:
                shutil.copy(
                        PUBLICATION_DIR / strImageFileString,
                        OUTPUT_DIR / "images" / (strImageFileDir + strImageFileName)
                )
                logging.debug("Django figure %s." % strImageFileName)
                # yellow
                if os.path.splitext(strImageFileName)[1].lower() == ".pdf":
                    logging.debug(f"""Found a PDF file: {OUTPUT_DIR / "images" / (strImageFileDir + strImageFileName)}""")
                    strImageFilepath = libeoaconvert.sanitizeImage(
                            OUTPUT_DIR / "images" / (strImageFileDir + strImageFileName),
                            TEMP_DIR,
                            # os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName,
                            GM_PATH,
                            PDFCROP_EXEC
                    )
                    xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".png"))
                    logging.debug("The filename is %s" % xmlEOAfigure.get("file"))
                else:
                    xmlEOAfigure.set("file", strImageFileDir + strImageFileName)

            if figure_type in hi_figure_types:
                xmlEOAfigure.set("hielement", xmlElement.get("hielement"))

            if figure_type in ["hionly", "hionlycollage", "hionlysub"]:
                logging.debug(f"Found hyperimage figure ({figure_type}), no need for caption and size information.")
                strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
                xmlEOAfigure.set("number", strFigureNumber)
            else:
                xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;")
                xmlEOAfigure.append(xmlElement.find(".//caption"))
                # Insert visual Number and uid
                strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
                xmlEOAfigure.set("number", strFigureNumber)
                strFigureUID = xmlElement.find(".//anchor").get("id")
                xmlEOAfigure.set("id", strFigureUID)

            xmlEOAfigure.set("order", str(intObjectNumber))
            xmlResult.append(xmlEOAfigure)
            intObjectNumber += 1

        elif xmlElement.findall(".//EOAtable"):
            xmlResult = etree.Element("EOAtable")
            xmlRawTable = xmlElement.find(".//table")
            xmlResult.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult.append(xmlRawTable)
            # Copy Number, Label and Caption
            if xmlElement.find(".//EOAtablecaption").text != "nonumber":
                xmlResult.append(xmlElement.find(".//EOAtablecaption"))
                xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text)
                table_id = xmlRawTable.get("id")
                table_label = xmlRawTable.get("id")
                xmlResult.set("number", dictTables[table_label])
                xmlResult.set("id", xmlRawTable.get("id"))
            else:
                xmlElement.set("numbering", "false")
            #if xmlElement.find(".//EOAtablelabel").text is not None:
            # Transform width of Columns
            strColumnString = xmlElement.find(".//EOAtablecolumns").text
            strColumnString = re.sub(r"\|", "", strColumnString)
            reMatchObjects = re.findall(r'([L|R|C].*?[c|m]m)', strColumnString)
            intTableWidth = 0
            listColumnAlignments = [None]
            listColumnWidths = [None]
            intNumberOfColumns = 0
            for strColumnDefinition in reMatchObjects:
                strColumnDefinition = strColumnDefinition.rstrip("cm")
                logging.info(strColumnDefinition)
                strColumnAlignment = strColumnDefinition[0]
                if strColumnAlignment == "L":
                    strColumnAlignment = "left"
                if strColumnAlignment == "C":
                    strColumnAlignment = "center"
                if strColumnAlignment == "R":
                    strColumnAlignment = "right"
                listColumnAlignments.append(strColumnAlignment)
                intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75)
                listColumnWidths.append(intColumnWidth)
                intTableWidth += intColumnWidth
                intNumberOfColumns += 1
            xmlRawTable.set("width", str(intTableWidth))
            # Figure out and deal with the Header
            xmlHeader = xmlRawTable.find(".//row/cell/tableheader")
            if xmlHeader is not None:
                xmlHeader.text = ""
                xmlHeader.getparent().text = xmlHeader.tail
                xmlHeader.getparent().remove(xmlHeader)
                xmlFirstRow = xmlRawTable.find(".//row")
                xmlFirstRow.tag = "tr"
                xmlFirstRowCells = xmlFirstRow.findall(".//cell")
                for xmlFirstRowCell in xmlFirstRowCells:
                    xmlFirstRowCell.tag = "th"
            # Now Deal with the rest of the rows
            xmlTableRows = xmlRawTable.findall(".//row")
            for xmlTableRow in xmlTableRows:
                xmlTableCells = xmlTableRow.findall(".//cell")
                intCurrentColumn = 1
                for xmlTableCell in xmlTableCells:
                    xmlTableCell.tag = "td"
                    xmlTableCell.set("align",listColumnAlignments[intCurrentColumn])
                    xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";")
                    # Deal with multicolumn
                    if xmlTableCell.get("cols") is not None:
                        xmlTableCell.set("colspan", xmlTableCell.get("cols"))
                    if intCurrentColumn > len(xmlTableCells):
                        intCurrentColumn = 1
                    # Deal with multicolumn again, increase intCurrentColumn by the columns being spanned
                    elif xmlTableCell.get("cols") is not None:
                        intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols"))
                        del xmlTableCell.attrib["cols"]
                    else:
                        intCurrentColumn += 1
                    # deal with multirow
                    if xmlTableCell.get("rowspan") is not None:
                        cellchildren = xmlTableCell.getchildren()
                        for child in cellchildren:
                            if child.tag == "figure":
                                child.tag = "img"
                                imagepath = f"{child.get('file')}.{child.get('extension')}"
                                logging.debug(f"{imagepath}")
                                strImageFileDir = os.path.dirname(imagepath)
                                strImageFileDir = re.sub("/", "", strImageFileDir)
                                strImageFileName = os.path.basename(imagepath)
                                logging.debug(f"{strImageFileDir} and {strImageFileName}")
                                shutil.copy(
                                    PUBLICATION_DIR / imagepath,
                                    OUTPUT_DIR / "images" / (strImageFileDir + strImageFileName)
                                )
                                if child.get('extension') == "pdf":
                                    strImageFilepath = libeoaconvert.sanitizeImage(
                                        OUTPUT_DIR / "images" / (strImageFileDir + strImageFileName),
                                        TEMP_DIR, GM_PATH, PDFCROP_EXEC
                                    )
                                    child.set("src", f"{strImageFileDir + strImageFileName}".replace(".pdf", ".png"))
                                else:
                                    child.set("src", strImageFileDir)
                                child.set("width", f"{str(listColumnWidths[intCurrentColumn])}px")
                                del child.attrib["rend"]
                                del child.attrib["file"]
                                del child.attrib["extension"]
                xmlTableRow.tag = "tr"
                xmlTableRow.set("valign", "top")
        elif xmlElement.tag == "list" and xmlElement.get('type') != 'description':
            xmlResult = etree.Element("temp")
            if xmlElement.get('type') == 'ordered':

                # Change first item into EOAlistfirstitem
                xmlFirstItem = xmlElement.find("..//item")
                xmlFirstItemElement = xmlFirstItem.getchildren()[0]

                xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber=xmlFirstItem.get("label"), uid=xmlFirstItem.get("id")))
                # Process Child Elements which are Part of this item
                if len(xmlFirstItem.getchildren()) >= 1:
                    for xmlChild in xmlFirstItem.iterchildren():
                        xmlResult.append(djangoParseObject(xmlChild,indent=True))
                xmlFirstItem.getparent().remove(xmlFirstItem)
                # Process remaining items in this list
                tmpIntNumber = 2
                for xmlItem in xmlElement.iterchildren():
                    xmlItemElement = xmlItem.getchildren()[0]
                    xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=xmlItem.get("label"), uid=xmlItem.get("id")))
                    tmpIntNumber += 1
                    if len(xmlItem.getchildren()) >= 1:
                        for xmlChild in xmlItem.iterchildren():
                            xmlResult.append(djangoParseObject(xmlChild, indent=True))
                    xmlItem.getparent().remove(xmlItem)
            if xmlElement.get('type') == 'simple':
                xml_first_child = xmlElement.getchildren()[0]

                if xml_first_child.tag == 'item':
                    logging.debug("a simple list with no special items")
                    # Change first item into EOAlistfirstitem
                    xmlFirstItem = xmlElement.find("..//item")
                    xmlFirstItemElement = xmlFirstItem.getchildren()[0]
                    xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-"))
                    # Process Child Elements which are Part of this item
                    if len(xmlFirstItem.getchildren()) >= 1:
                        logging.debug("len xmlFirstItem.getchildren is greater or equal 1")
                        for xmlChild in xmlFirstItem.iterchildren():
                            xmlResult.append(djangoParseObject(xmlChild,indent=True))
                    xmlFirstItem.getparent().remove(xmlFirstItem)
                    for xmlItem in xmlElement.iterchildren():
                        xmlItemElement = xmlItem.getchildren()[0]
                        xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
                        if len(xmlItem.getchildren()) >= 1:
                            for xmlChild in xmlItem.iterchildren():
                                xmlResult.append(djangoParseObject(xmlChild,indent=True))
                        xmlItem.getparent().remove(xmlItem)

                #############
                # Baustelle #
                #############
                elif xml_first_child.tag == 'label':
                    logging.debug("a simple list with named items")

                    # Change first item into EOAlistfirstitem
                    xmlFirstItem = xmlElement.find("..//item")
                    xmlFirstItemElement = xmlFirstItem.getchildren()[0]
                    logging.debug(xmlFirstItemElement.text)

                    # debugging
                    logging.debug(etree.tostring(xmlFirstItemElement))
                    # end of debugging

                    xml_first_label = xmlElement.find("..//label")
                    listnumber_text = xml_first_label.text

                    xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered custom", listnumber=listnumber_text))

                    logging.debug("The length of the children of the first item: %s." % len(xmlFirstItem.getchildren()))

                    # Process Child Elements which are Part of this item
                    if len(xmlFirstItem.getchildren()) >= 1:
                        logging.debug("len xmlFirstItem.getchildren is greater or equal 1")
                        for xmlChild in xmlFirstItem.iterchildren():
                            xmlResult.append(djangoParseObject(xmlChild,indent=True))

                    xmlFirstItem.getparent().remove(xmlFirstItem)
                    xml_first_label.getparent().remove(xml_first_label)

                    all_the_labels = xmlElement.findall("label")
                    all_the_items = xmlElement.findall("item")

                    logging.debug("itemlength %s." % len(all_the_items))
                    logging.debug("labellength %s." % len(all_the_labels))

                    for listlabel, listitem in zip(all_the_labels, all_the_items):
                        logging.debug("listitem text %s." % listitem.text)
                        logging.debug("listlabel text %s." % listlabel.text)
                        xml_item_element = listitem.getchildren()[0]
                        xmlResult.append(djangoParseObject(xml_item_element, indent=True, listnumber=listlabel.text))

                        listlabel.getparent().remove(listlabel)
                        listitem.getparent().remove(listitem)

                    # for xmlItem in xmlElement.iterchildren():
                    #     print("So many items have we: ", len(xmlItem))
                    #     xmlItemElement = xmlItem.getchildren()[0]
                    #     xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
                    #     if len(xmlItem.getchildren()) >= 1:
                    #         for xmlChild in xmlItem.iterchildren():
                    #             xmlResult.append(djangoParseObject(xmlChild,indent=True))
                    #     xmlItem.getparent().remove(xmlItem)
                ##################
                # Ende Baustelle #
                ##################

        elif xmlElement.tag == "list" and xmlElement.get('type') == 'description':
            logging.debug("A description")
            xmlResult = etree.Element("temp")
            while len(xmlElement.getchildren()) != 0:
                xmlDescription = etree.Element("EOAdescription")
                xmlDescription.set("order", str(intObjectNumber))
                xmlLabel = xmlElement.getchildren()[0]
                label_children = xmlLabel.getchildren()
                if label_children:
                    last_child = label_children[-1]
                    if last_child.tail.endswith(":"):
                        last_child.tail = last_child.tail[:-1]
                else:
                    if xmlLabel.text.endswith(":"):
                        xmlLabel.text = xmlLabel.text[:-1]
                xmlItem = xmlElement.getchildren()[1]
                if len(xmlItem.getchildren()) > 0:
                    xmlContent = xmlItem.getchildren()[0]
                else:
                    xmlContent = etree.Element("p")
                xmlLabel.tag = "description"
                xmlDescription.append(xmlLabel)
                xmlDescription.append(xmlContent)
                xmlResult.append(xmlDescription)
                intObjectNumber += 1
                if len(xmlItem.getchildren()) > 0:
                    for xmlChild in xmlItem.iterchildren():
                        xmlResult.append(djangoParseObject(xmlChild,indent=True))
                xmlItem.getparent().remove(xmlItem)
        elif xmlElement.tag == "theorem":
            xmlTheoremHead = xmlElement.find(".//head")
            xmlTheoremText = xmlElement.find(".//p")
            strTheoremNumber = xmlElement.get("id-text")
            strTheoremID = xmlElement.get("id")
            xmlResult = etree.Element("EOAtheorem")
            xmlResult.append(xmlTheoremHead)
            xmlResult.append(xmlTheoremText)
            xmlResult.set("order", str(intObjectNumber))
            xmlResult.set("number", strTheoremNumber)
            xmlResult.set("uid", strTheoremID)
            intObjectNumber += 1
        elif xmlElement.findall(".//EOAequationarray"):
            xmlResult = etree.Element("temp")
            for xmlEquation in xmlElement.findall(".//EOAequation"):
                xmlEOAequation = etree.Element("EOAequation")
                xmlEOAequation.set("order", str(intObjectNumber))
                intObjectNumber += 1
                xmlEOAequation.set("number", xmlEquation.get("number"))
                xmlEOAequation.set("filename", xmlEquation.get("filename"))
                if xmlEquation.get("label") is not None:
                    xmlEOAequation.set("label", xmlEquation.get("label"))
                shutil.copy(
                        INPUT_DIR / "items" /xmlEquation.get("filename"),
                        OUTPUT_DIR / "images/"
                )
                # shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
                xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
                if xmlEquation.get("label") is not None:
                    xmlEOAequation.set("label", xmlEquation.get("label"))
                xmlResult.append(xmlEOAequation)
        elif xmlElement.findall(".//EOAequationarraynonumber"):
            xmlResult = etree.Element("temp")
            for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"):
                xmlEOAequation = etree.Element("EOAequation")
                xmlEOAequation.set("order", str(intObjectNumber))
                intObjectNumber += 1
                xmlEOAequation.set("number", "")
                xmlEOAequation.set("filename", xmlEquation.get("filename"))
                shutil.copy(
                        INPUT_DIR / "items" / xmlEquation.get("filename"),
                        OUTPUT_DIR / "images/"
                )
                # shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
                xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
                xmlResult.append(xmlEOAequation)
        elif xmlElement.tag == "EOAequationnonumber":
            # Process one EOAequation which is not encapsulated
            xmlResult = etree.Element("EOAequation")
            xmlResult.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult.set("filename", xmlElement.get("filename"))
            xmlResult.set("TeX", xmlElement.get("TeX"))
            shutil.copy(
                        INPUT_DIR / "items" / xmlElement.get("filename"),
                        OUTPUT_DIR / "images/"
                )
            # shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
            xmlResult.set("number", "")
        elif xmlElement.findall(".//EOAequation"):
            # Process various Equations which may be encapsulated within <p>
            xmlEquations = xmlElement.findall(".//EOAequation")
            xmlResult = etree.Element("temp")
            for xmlEquation in xmlEquations:
                # Create basic Element EOAequation
                xmlEOAequation = etree.Element("EOAequation")
                xmlEOAequation.set("order", str(intObjectNumber))
                intObjectNumber += 1
                xmlEOAequation.set("number", xmlEquation.get("number"))
                xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
                if xmlEquation.get("uid") is not None:
                    xmlEOAequation.set("uid", xmlEquation.get("uid"))
                shutil.copy(
                        INPUT_DIR / "items" / xmlEquation.get("filename"),
                        OUTPUT_DIR / "images/"
                )
                # shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
                xmlEOAequation.set("filename", xmlEquation.get("filename"))
                xmlResult.append(xmlEOAequation)
        elif xmlElement.tag == "EOAequation":
            # Process one EOAequation which is not encapsulated
            xmlResult = etree.Element("EOAequation")
            xmlResult.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult.set("number", xmlElement.get("number"))
            xmlResult.set("TeX", xmlElement.get("TeX"))
            if xmlElement.get("uid") is not None:
                xmlResult.set("uid", xmlElement.get("uid"))
            shutil.copy(
                    INPUT_DIR / "items" / xmlElement.get("filename"),
                    OUTPUT_DIR / "images/"
            )
            # shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
            xmlResult.set("filename", xmlElement.get("filename"))
        elif xmlElement.tag == "div3":
            xmlResult = etree.Element("EOAsubsection")
            xmlResult.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult.append(xmlElement.find("head"))
            for xmlChild in xmlElement.iterchildren():
                xmlResult.append(djangoParseObject(xmlChild))
        elif xmlElement.tag == "div4":
            xmlResult = etree.Element("EOAsubsubsection")
            xmlResult.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult.append(xmlElement.find("head"))
            for xmlChild in xmlElement.iterchildren():
                xmlResult.append(djangoParseObject(xmlChild))
        elif xmlElement.tag == "epigraph":
            xmlResult = etree.Element("EOAparagraph")
            xmlResult.set("class", "epigraph")
            xmlResult.set("order", str(intObjectNumber))
            intObjectNumber += 1

            x_children = xmlElement.getchildren()
            first_element = True
            for child in x_children:
                if child.tag == "p":
                    child.tag = "tagtobestripped"
                    linebreak = etree.Element("br")
                    xmlResult.append(linebreak)
                    if not first_element:
                        paragraphbreak = etree.Element("br")
                        xmlResult.append(paragraphbreak)
                    xmlResult.append(deepcopy(child))
                elif child.tag == "EOAverse":
                    if not first_element:
                        paragraphbreak = etree.Element("br")
                        xmlResult.append(paragraphbreak)
                    verse_result = treat_verselines(child)
                    xmlResult.append(verse_result)
                first_element = False
        elif xmlElement.tag == "EOAverse":
            xmlResult = etree.Element("EOAparagraph")
            if xmlElement.get("class") is not None:
                xmlResult.set("class", xmlElement.get("class"))
            xmlResult.set("style", "verse")
            xmlResult.set("order", str(intObjectNumber))
            intObjectNumber += 1

            xml_verselines = xmlElement.findall("p")
            xmlResult.append(deepcopy(xml_verselines[0]))
            for xml_verseline in xml_verselines[1:]:
                linebreak = etree.Element("br")
                xmlResult.append(linebreak)
                copied_line = deepcopy(xml_verseline)
                xmlResult.append(copied_line)
            etree.strip_tags(xmlResult, "p")

        elif xmlElement.get("style") == "boxhead":
            xmlElement.tag = "b"
            del xmlElement.attrib["style"]
            wrapping_paragraph = etree.Element("EOAparagraph")
            wrapping_paragraph.set("style", "box")
            libeoaconvert.wrap_into_element(wrapping_paragraph, xmlElement)
            wrapping_paragraph.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult = wrapping_paragraph
        elif xmlElement.tag == "p" and xmlElement.get("class") == "divider":
            xmlElement.tag = "EOAparagraph"
            xmlElement.set("order", str(intObjectNumber))
            intObjectNumber += 1
            xmlResult = xmlElement
        elif xmlElement.tag == "EOAtocentry":
            # throw them out for the time being
            xmlResult = etree.Element("temp")
        elif xmlElement.tag == "pagebreak":
            # throw them out for the time being
            xmlResult = etree.Element("temp")
        else:
            if xmlElement.getchildren() == [] and not xmlElement.text:
                logging.debug(f"Removing empty paragraph")
                xmlResult = etree.Element("temp")
            else:
                xmlElement.tag = "EOAparagraph"
                logging.debug(f"The beginning of this paragraph is: '{libeoaconvert.gettext(xmlElement)[:40]}…'")
                quoted_paragraph = xmlElement.get("rend")
                if quoted_paragraph is not None and quoted_paragraph == "quoted":
                    xmlElement.set("rend", "quoted")
                xmlElement.set("order", str(intObjectNumber))
                intObjectNumber += 1
                xmlResult = xmlElement
    else:
        logging.info("SPECIAL: %s - %s" % (xmlElement, xmlElement.text))
        xmlResult = xmlElement

    if indent==True:
        xmlResult.set("indent", "True")
    if listtype != None:
        xmlResult.set("listtype", listtype)
    if listnumber != 0:
        xmlResult.set("listnumber", listnumber)
    if uid != None:
        xmlResult.set("id", uid)

    return xmlResult
# def djangoParseObject ends here

def make_index(index_hits, index_type):
    """Make an index"""

    dictIndex = {}

    for xmlEOAindex in index_hits:
        strMainEntry = xmlEOAindex.get("main")
        str_display_entry = xmlEOAindex.get("display")
        if len(strMainEntry) == 0:
            strMainEntry = str_display_entry
            logging.warning("Index found without main entry, only display string. Using display string for sorting.")

        # If strMainEntry not in Index, then create new index element
        if strMainEntry not in dictIndex:
            dictIndex[strMainEntry] = {}
            dictIndex[strMainEntry]["display_string"] = ""
            dictIndex[strMainEntry]["listMainentries"] = []
            dictIndex[strMainEntry]["dictSubentries"] = {}

        # store the display string here.
        if str_display_entry is not None:
            dictIndex[strMainEntry]["display_string"] = str_display_entry
        else:
            dictIndex[strMainEntry]["display_string"] = strMainEntry

        # if entry has no subentry then append it to listMainentries
        if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None:
            dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex)
        # if entry has subentry, proceed on the second level
        if strMainEntry in dictIndex and xmlEOAindex.get("secondary") is not None:
            # put the next line in anyway
            # dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex)
            strSubEntry = xmlEOAindex.get("secondary")
            # if strSubEntry is not in dictSubentries, then create new list
            if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]:
                dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = []
                dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
            else:
                dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)

    # Sort the main index
    listSortedKeys = sorted(dictIndex.keys(), key=str.lower)

    if index_type == "regular":
        new_index_element = "EOAprintindex"
    else:
        new_index_element = "EOAprint%sindex" % index_type

    # Create new and empty xmlTree for xmlEOAindex
    xmlEOAprintindex = etree.Element(new_index_element)
    xmlEOAindexsection = None
    listFirstChars = []

    for strSortedKey in listSortedKeys:
        strFirstChar = strSortedKey[0].upper()
        if strFirstChar not in listFirstChars:
            logging.debug("Beginning a new letter: %s." % strFirstChar)
            listFirstChars.append(strFirstChar)
            if xmlEOAindexsection is not None:
                xmlEOAprintindex.append(xmlEOAindexsection)
            xmlEOAindexsection = etree.Element("EOAindexsection")
            xmlEOAindexsection.set("Character", strFirstChar)
        # beginning a new entry

        xmlEOAindexentry = etree.Element("EOAindexentry")

        xmlEOAindexentry.set("main", strSortedKey)
        xmlEOAindexentry.set("display", dictIndex[strSortedKey]["display_string"])
        logging.debug("Index entry: %s." % strSortedKey)

        for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]:
            logging.info(xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder"))

            xmlEOAindexlink = etree.Element("EOAindexlink")
            xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder"))
            xmlEOAindexlink.set("elementorder", xmlMainelement.get("elementorder"))

            if xmlMainelement.get("bold") is not None:
                xmlEOAindexlink.set("bold", "True")
            xmlEOAindexentry.append(xmlEOAindexlink)
        # If there are any subentries, process them now
        if len(dictIndex[strSortedKey]["dictSubentries"]) > 0:
            logging.debug("Processing Subentries")
            listSortedSubKeys = sorted(dictIndex[strSortedKey]["dictSubentries"])
            for strSortedSubKey in listSortedSubKeys:
                xmlEOAindexsubentry = etree.Element("EOAindexsubentry")
                xmlEOAindexsubentry.set("secondary", strSortedSubKey)
                for xmlSubElement in dictIndex[strSortedKey]["dictSubentries"][strSortedSubKey]:
                    strSubEntry = xmlSubElement.get("secondary")
                    # Hier noch die Links auf den Untereintrag einfügen
                    xmlEOAindexlink = etree.Element("EOAindexlink")
                    xmlEOAindexlink.set("chapterorder", xmlSubElement.get("chapterorder"))
                    xmlEOAindexlink.set("elementorder", xmlSubElement.get("elementorder"))
                    xmlEOAindexsubentry.append(xmlEOAindexlink)
                    if xmlSubElement.get("bold") is not None:
                        xmlEOAindexlink.set("bold", "True")
                    logging.debug(strSubEntry)
                xmlEOAindexentry.append(xmlEOAindexsubentry)
        xmlEOAindexsection.append(xmlEOAindexentry)

    # if xmlEOAindexsection is not None:
    xmlEOAprintindex.append(xmlEOAindexsection)

    return(xmlEOAprintindex)
# def make_index ends here

def djangoParseHeadline(xmlElement):
    # Parse EOAauthor and append it to the Chapter Information
    xmlAuthors = xmlElement.find(".//EOAauthor")
    if xmlAuthors is not None:
        strAuthors = xmlAuthors.text
        xmlElement.remove(xmlAuthors)
        strAuthors = re.sub("(, and | and | und )", ",", strAuthors)
        listAuthors = re.split("\,", strAuthors)
        logging.debug(listAuthors)
        if len(listAuthors) >= 1:
            for i in range(len(listAuthors)):
                xmlAuthor = etree.Element("EOAauthor")
                # Remove Spaces before and after AuthorString
                if listAuthors[i][0] == " ":
                    strAuthor = listAuthors[i][1:]
                elif listAuthors[i].endswith(" "):
                    strAuthor = listAuthors[i][:-1]
                else:
                    strAuthor = listAuthors[i]
                xmlAuthor.text = strAuthor
                xmlElement.append(xmlAuthor)

    return xmlElement
# def djangoParseHeadline ends here

def check_publication_cfg(configuration_file):
    """Check the configuration file before uploading

    This function is adapted from the publicationimport script.
    """

    logging.debug("Checking configuration file %s.", configuration_file)

    config = configparser.ConfigParser()

    try:
        config.read(configuration_file)
    except configparser.ParsingError as err:
        logging.error(err)

    technical_items = ["Serie", "Number", "Title", "Subtitle", "PublicationDate", "Language", "License", "ISBN", "Price", "Shoplink"]
    general_items = ["BriefDescription", "DetailedDescription", "Submitter", "EditorialCoordination", "Copyediting", "Translator", "Dedication"]
    authors_items = ["Author1", "Author2", "Author3", "Author4", "Author5", "Zusatz"]

    categories = {"Technical" : technical_items, "General" : general_items, "Authors" : authors_items}

    for cat in categories:
        for item in categories[cat]:
            try:
                config[cat][item]
            except KeyError:
                logging.error("%s is missing in configuration.", item)

    return
# def check_publication_cfg ends here


def treat_verselines(verse_element):
    "Dissolve verselines to lines with linebreak milestones"

    xml_result = etree.Element("tagtobestripped")

    xml_verselines = verse_element.findall("p")
    for xml_verseline in xml_verselines:
        xml_verseline.tag = "tagtobestripped"
    xml_result.append(deepcopy(xml_verselines[0]))
    for xml_verseline in xml_verselines[1:]:
        linebreak = etree.Element("br")
        xml_result.append(linebreak)
        copied_line = deepcopy(xml_verseline)
        xml_result.append(copied_line)

    return xml_result
# def treat_verselines ends here

def bring_footnote_down_django(footnote, fragment, footnote_number, object_number, unique_id, destination):
    """
    captures reusable behavior from the existing code
    potentially, some of the old code could be replaced by calls to this helper

    usage: intObjectNumber = bring_footnote_down_django(xmlFootnote, "fn"+str(intFootnoteNumber), str(intFootnoteNumber), intObjectNumber, tmpStrUID, xmlResult)
    unfortunately, returning the result seemed like a better idea than mutating the global variable
    """

    kids = list(footnote.getchildren())
    footnote_text = footnote.text or ""
    replace_footnote_with_sup(footnote)
    footnote.set("class", "footnote")
    anchor = etree.Element("a")
    anchor.set("href", "#" + fragment) # "fn" + str(intFootnoteNumber)
    anchor.text = footnote_number # str(intFootnoteNumber)
    footnote.append(anchor)
    foot = etree.Element("EOAfootnote")
    foot.set("order", str(object_number))
    object_number += 1
    foot.set("number", footnote_number)
    anchor_number = next(
        iter(
            (
                parent.get("order")
                for parent
                in footnote.iterancestors()
                if parent.get("order") is not None
            )
        )
    )
    foot.set("anchor", anchor_number)
    foot.set("id", unique_id)
    foot.text = footnote_text
    for kid in kids:
        if "EOAequationnonumber" == kid.tag:
            cwd = os.getcwd()
            shutil.copy(
                "%s/items/%s" % (cwd, kid.get("filename")),
                "%s/images/" % cwd,
            )
        foot.append(kid)
    destination.append(foot)

    return object_number
# def bring_footnote_down_django ends here

###############################
# End of function definitions #
###############################

# Iterate over Chapters, Sections, Subsections, and Subsubsections and
# Put all on one level: EOAchapter
intChapterNumber = 1
listPartIDs = []
for xmlChapter in xmlChapters:
    intObjectNumber = 1
    # Process Chapter Title
    xmlEOAchapter = etree.Element("EOAchapter")
    xmlEOAchapter.set("type","regular")

    xmlLanguage = xmlChapter.get("language")
    if xmlLanguage is not None:
        # KT changing this after separating the big script
        strLanguage = xmlLanguage #or "english"
    else:
        strLanguage = "english"

    xmlEOAchapter.set("language", strLanguage)
    # xmlEOAchapter.set("language", xmlChapter.get("language"))
    xmlEOAchapter.set("order", str(intChapterNumber))
    if xmlChapter.get("rend") != "nonumber":
        xmlEOAchapter.set("id", xmlChapter.get("id"))
    xmlChapterHeadline = xmlChapter.find(".//head")
    if xmlChapter.get("id") in dictChapters:
        xmlEOAchapter.set("number", dictChapters[xmlChapter.get("id")])
    else:
        xmlEOAchapter.set("number", "")
    logging.info("-----------------------------------------------------")
    logging.info(libeoaconvert.gettext(xmlChapterHeadline))
    xmlEOAchapter.append(djangoParseHeadline(xmlChapterHeadline))
    # Deal with EOAauthor
    if xmlChapter.find(".//EOAauthor") is not None:
        xmlEOAchapter.append(xmlChapter.find(".//EOAauthor"))
    # Attache enclosing Part to Chapter, see django structure for this purpose
    if xmlChapter.getparent().tag == "div0":
        if xmlChapter.getparent().get("id") not in listPartIDs:
            listPartIDs.append(xmlChapter.getparent().get("id"))
            xmlPartHeadline = xmlChapter.getparent().find("head")
            xmlPartHeadline.tag = "EOAparthtml"
            xmlEOAchapter.append(xmlPartHeadline)
    # Append Chapter to xmlEOAdocument
    xmlEOAdocument.append(xmlEOAchapter)
    # iterate over children of Chapter
    for xmlChapterChild in xmlChapter.iterchildren():
        if xmlChapterChild.tag == "div2":
            # Process Section Title
            xmlEOAsection = etree.Element("EOAsection")
            xmlEOAsection.set("order", str(intObjectNumber))
            if xmlChapterChild.get("rend") != "nonumber":
                xmlEOAsection.set("id", xmlChapterChild.get("id"))
                xmlEOAsection.set("number", dictSections[xmlChapterChild.get("id")])
            intObjectNumber += 1
            xmlHead = xmlChapter.find(".//head")
            logging.debug("Section '%s'" % libeoaconvert.gettext(xmlHead))
            xmlEOAsection.append(djangoParseHeadline(xmlHead))
            xmlEOAchapter.append(xmlEOAsection)
            # Iterate over Children of Section
            for xmlSectionChild in xmlChapterChild.iterchildren():
                if xmlSectionChild.tag == "div3":
                    # Process Subsection Title
                    xmlEOAsubsection = etree.Element("EOAsubsection")
                    xmlEOAsubsection.set("order", str(intObjectNumber))
                    if xmlSectionChild.get("rend") != "nonumber":
                        xmlEOAsubsection.set("id", xmlSectionChild.get("id"))
                        xmlEOAsubsection.set("number", dictSections[xmlSectionChild.get("id")])
                    intObjectNumber += 1
                    xmlHead = xmlSectionChild.find(".//head")
                    logging.debug("Subsection '%s'" % libeoaconvert.gettext(xmlHead))
                    xmlEOAsubsection.append(djangoParseHeadline(xmlHead))
                    xmlEOAchapter.append(xmlEOAsubsection)
                    # Iterate over children of Subsection
                    for xmlSubsectionChild in xmlSectionChild.iterchildren():
                        if xmlSubsectionChild.tag == "div4":
                            # Process Subsubsection Title
                            xmlEOAsubsubsection = etree.Element("EOAsubsubsection")
                            xmlEOAsubsubsection.set("order", str(intObjectNumber))
                            intObjectNumber += 1
                            xmlHead = xmlSubsectionChild.find(".//head")
                            logging.debug(libeoaconvert.gettext(xmlHead))
                            xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead))
                            xmlEOAchapter.append(xmlEOAsubsubsection)
                            # Iterate over children of Subsubsection
                            for xmlSubsubsectionChild in xmlSubsectionChild.iterchildren():
                                xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild))
                        else:
                            xmlEOAchapter.append(djangoParseObject(xmlSubsectionChild))
                elif xmlSectionChild.tag == "div4":
                    # Process Subsubsection Title
                    xmlEOAsubsubsection = etree.Element("EOAsubsubsection")
                    xmlEOAsubsubsection.set("order", str(intObjectNumber))
                    intObjectNumber += 1
                    xmlHead = xmlSectionChild.find(".//head")
                    xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead))
                    xmlEOAchapter.append(xmlEOAsubsubsection)
                    # Iterate over children of Subsubsection
                    for xmlSubsubsectionChild in xmlSectionChild.iterchildren():
                        if xmlSubsubsectionChild.tag == "div5":
                            logging.debug("jubel")
                            # although it's div5, promote it to subsubsection
                            xmlEOAparasection = etree.Element("EOAsubsubsection")
                            # xmlEOAparasection = etree.Element("EOAparasection")
                            xmlEOAparasection.set("order", str(intObjectNumber))
                            intObjectNumber += 1
                            xmlHead = xmlSubsubsectionChild.find(".//head")
                            logging.debug(libeoaconvert.gettext(xmlHead))
                            xmlEOAparasection.append(djangoParseHeadline(xmlHead))
                            xmlEOAchapter.append(xmlEOAparasection)
                            for xmlParasectionChild in xmlSubsubsectionChild.iterchildren():
                                xmlEOAchapter.append(djangoParseObject(xmlParasectionChild))
                        else:
                            xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild))
                else:
                    xmlEOAchapter.append(djangoParseObject(xmlSectionChild))
        else:
            xmlEOAchapter.append(djangoParseObject(xmlChapterChild))
    intChapterNumber += 1

libeoaconvert.debug_xml_here(
        xmlTree,
        "afterchapter",
        DEBUG_DIR
)

logging.info("----------------------------------------------")
logging.info("Processing Facsimile Parts")

listModes = ["text", "textPollux", "xml"]
strBasicURL = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql?document="
parserECHO = etree.XMLParser()

xmlParts = xmlTree.findall("//div0")
intFacNumber = 1
for xmlPart in xmlParts:
    intObjectNumber = 1
    intFacPartNumber = 1
    if xmlPart.find(".//EOAfacsimilepart") is None:
        continue
    xmlEOAfacsimilepart = etree.Element("EOAfacsimilepart")
    xmlEOAfacsimilepart.set("order", str(intChapterNumber))
    xmlEOAfacsimileparthead = xmlPart.find(".//head")
    for xmlChild in xmlEOAfacsimileparthead:
        if xmlChild.tag == "hi":
            xmlChild.tag = "em"
            del xmlChild.attrib["rend"]
    xmlEOAfacsimilepart.append(xmlEOAfacsimileparthead)
    intChapterNumber += 1
    xmlEOAdocument.append(xmlEOAfacsimilepart)
    xmlFacsimilepages = xmlPart.findall(".//EOAfacsimilepage")
    intFacPageNumber = 1
    for xmlFacsimilepage in xmlFacsimilepages:
        strImageFile = xmlFacsimilepage.find(".//file").text
        strLabel = xmlFacsimilepage.find(".//label").text
        strPagenumber = xmlFacsimilepage.find(".//pagenumber").text or ""
        xmlEOAfacsimilepage = etree.Element("EOAfacsimilepage")
        xmlEOAfacsimilepage.set("order", str(intObjectNumber))
        # TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren
        strImageFile = strImageFile.rstrip("\n")
        strImageFileDir = os.path.dirname(strImageFile)
        strImageFileDir = re.sub("/", "", strImageFileDir)
        strImageFileName = os.path.basename(strImageFile)
        shutil.copy(
                PUBLICATION_DIR / strImageFile,
                OUTPUT_DIR / "images" / (strImageFileDir + strImageFileName)
        )
        # shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
        intObjectNumber += 1
        # Download transcription for this Page
        fulltext_string = xmlFacsimilepage.find(".//fulltext").text
        if fulltext_string is not None:
            logging.debug(f"Found a link to full text: {fulltext_string}")
            if fulltext_string.find(",") == -1:
                logging.info("Fulltext is linked in the document.")
                # hier weiter!!!
            else:
                strFacsimileURL = re.split(",", fulltext_string)[0]
                strFacsimilePage = re.split(",", fulltext_string)[1]
                for strMode in listModes:
                    strURL = strBasicURL + strFacsimileURL + "&pn=" + strFacsimilePage + "&mode=" + strMode
                    logging.debug("Processing Facsimile : " + strURL)
                    xmlECHOtree = etree.parse(strURL, parserECHO)
                    # Remove ECHO-namespaces
                    objectify.deannotate(xmlECHOtree, xsi_nil=True)
                    etree.cleanup_namespaces(xmlECHOtree)
                    xmlDivs = xmlECHOtree.findall(".//div")
                    for xmlDiv in xmlDivs:
                        if xmlDiv.get("class") == "pageContent":
                            # Create new EOA-Element
                            xmlEOAfacsimileelement =  etree.Element("EOAfacsimileelement")
                            xmlEOAfacsimileelement.set("type", strMode)
                            # Fix Images in the <div>-Element
                            xmlImages = xmlDiv.findall(".//img")
                            intFacImgNumber = 1
                            for xmlImage in xmlImages:
                                strImageSrc = xmlImage.get("src")
                                strCommand = "{cmd} {src} -o {dst}".format(
                                        cmd = curl,
                                        src = strImageSrc,
                                        dst = OUTPUT_DIR / "images" / ("facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg")
                                )
                                # strCommand = "curl " + strImageSrc + " -o CONVERT/django/images/facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg"
                                listArguments = shlex.split(strCommand)
                                try:
                                    exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
                                    xmlImage.set("src", "facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg")
                                except:
                                    xmlImage.tag = "temp"
                                intFacImgNumber += 1
                            # Change of scr of img-Element
                            xmlEOAfacsimileelement.append(xmlDiv)
                            xmlEOAfacsimilepage.append(xmlEOAfacsimileelement)
        intFacPageNumber += 1
        xmlEOAfacsimilepage.set("file", strImageFileDir + strImageFileName)
        xmlEOAfacsimilepage.set("label", str(strLabel))
        xmlEOAfacsimilepage.set("pagenumber", str(strPagenumber))
        xmlEOAfacsimilepart.append(xmlEOAfacsimilepage)
        intFacNumber =+ 1
etree.strip_tags(xmlDjangoTree, "temp")
logging.info("----------------------------------------------")
logging.info("Processing and linking Footnotes for django")

xmlEOAchapters = xmlEOAdocument.findall(".//EOAchapter")

# debug_chapters(xmlEOAchapters)

translation_xml = etree.parse( str( TRANSLATION_FILE ) )
dictLangFootnotes = translation_xml.find("//entry[@name='footnotes']").attrib

for xmlEOAchapter in xmlEOAchapters:
    groupings = libeoaconvert.get_bigfoot_data(xmlEOAchapter)
    has_old = 0 != len(xmlEOAchapter.findall(".//note[@place='Inline']"))
    has_new = 0 != len(
        [ # flatten
            note
            for grouping, notes in groupings
            for note in notes
        ]
    )
    # XOR falls through, AND is an error (that should have already been thrown during the epub phase), and NOR skips to the next chapter
    if has_old:
        if has_new:
            raise FootnoteError("This chapter contains both old-style footnotes and new-style footnotes")
    else:
        if not has_new:
            continue
    # Find out running order of last item the chapter
    # Hier pro FN zunächst die EOAequationnonumber in <p> korrigieren
    # Dann pro FN die Kindelemente abarbeiten und an die neue FN dran hängen
    # Ggf. aufpassen, ob ein Absatz mit indent versehen ist, dann blockquote drum herum machen
    xmlElement = xmlEOAchapter[(len(xmlEOAchapter)-1)]
    logging.debug(etree.tostring(xmlElement))
    intObjectNumber = (int(xmlElement.get("order")) + 1)
    intFootnoteNumber = 1
    xmlResult = etree.Element("temp")
    xmlEOAsection = etree.Element("EOAsection")
    xmlEOAsection.set("order", str(intObjectNumber))
    intObjectNumber += 1
    xmlHead = etree.Element("head")

    xmlHead.text = dictLangFootnotes[libeoaconvert.two_letter_language(xmlEOAchapter.get("language"))]

    xmlEOAsection.append(xmlHead)
    xmlResult.append(xmlEOAsection)

    for grouping, notes in groupings:
        for index, note in enumerate(notes):
            # do for the new-style notes what the old code did for the other footnotes
            fntext = str(index+1)
            if "lower-latin" == grouping:
                fntext = alph_footnote_index(index)
            unique_id = "fn%s" % fntext
            intObjectNumber = bring_footnote_down_django(note, unique_id, fntext, intObjectNumber, unique_id, xmlResult)

    intFootnoteNumber = 1
    xmlFootnotes = xmlEOAchapter.findall(".//note[@place='Inline']")
    for xmlFootnote in xmlFootnotes:
        tmpStrUID = xmlFootnote.get("id")
        logging.debug(f"Looking at footnote {tmpStrUID}.")

        xml_EOA_indices = xmlFootnote.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
        for xmlEOAindex in xml_EOA_indices:
            logging.debug("Removing index entry in footnote.")
            xmlEOAindex.tag = "elementtoberemoved"
            etree.strip_elements(xmlFootnote, "elementtoberemoved", with_tail=False)

        xmlFootnoteContent = list(xmlFootnote)
        strFootnoteText = xmlFootnote.text or ""
        tmpTail = xmlFootnote.tail
        xmlFootnote.clear()
        xmlFootnote.tail = tmpTail
        xmlFootnote.tag = "sup"
        xmlFootnote.set("class", "footnote")
        xmlFootnoteLink = etree.Element("a")
        xmlFootnoteLink.set("href", "#fn" + str(intFootnoteNumber))
        xmlFootnoteLink.text = str(intFootnoteNumber)
        xmlFootnote.append(xmlFootnoteLink)
        xmlEOAfootnote = etree.Element("EOAfootnote")
        xmlEOAfootnote.set("order", str(intObjectNumber))
        intObjectNumber += 1
        xmlEOAfootnote.set("number", str(intFootnoteNumber))
        for xmlParent in xmlFootnote.iterancestors():
            if xmlParent.get("order") is not None:
                strFootnoteAnchorNumber = xmlParent.get("order")
                break
        xmlEOAfootnote.set("anchor", strFootnoteAnchorNumber)
        xmlEOAfootnote.set("id", tmpStrUID)
        xmlEOAfootnote.text = strFootnoteText
        for xmlElement in xmlFootnoteContent:
            if xmlElement.tag != "p":
                surrounding_p = etree.fromstring("""<p></p>""")
                if xmlElement.tag == "EOAequationnonumber":
                    shutil.copy(
                            PUBLICATION_DIR / "items" / xmlElement.get("filename"),
                            OUTPUT_DIR / "images/"
                    )
                elif xmlElement.tag == "EOAverse":
                    verse_tail = xmlElement.tail
                    xmlElement.tail = ""
                    xmlElement.tag = "span"
                    xmlElement.set("style", "verse")
                    versecontent_bytes = etree.tostring(xmlElement)
                    versecontent_string = versecontent_bytes.decode("utf-8")
                    xml_verselines = versecontent_string.split("\n")
                    logging.debug("Removing surrounding EOAverse tags")
                    xml_result_string = xml_verselines[0]
                    for xml_verseline in xml_verselines[1:]:
                        xml_result_string += f"<br/>{xml_verseline}"
                    verse_paragraph = etree.fromstring(xml_result_string)
                    xmlElement.tail = verse_tail
                    surrounding_p.append(xmlElement)
                elif xmlElement.tag == "span":
                    surrounding_p.append(xmlElement)

                elif xmlElement.tag == "EOAfigurenonumber":
                    surrounding_p = etree.fromstring("""<img/>""")
                    strImageFileString = xmlElement.find(".//file").text
                    strImageFileString = strImageFileString.rstrip("\n")
                    strImageFileDir = os.path.dirname(strImageFileString)
                    strImageFileDir = re.sub("/", "", strImageFileDir)
                    strImageFileName = os.path.basename(strImageFileString)
                    strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
                    shutil.copy(
                        PUBLICATION_DIR / strImageFileString,
                        OUTPUT_DIR / "images" / "embedded" / (strImageFileDir + strImageFileName)
                    )
                    surrounding_p.set("src", strImageFileDir + strImageFileName)
                    surrounding_p.set("width", xmlElement.find(".//width").text + "%;")
                xmlElement = surrounding_p
            else:
                logging.debug("Footnote paragraph")
            xmlEOAfootnote.append(xmlElement)
        xmlResult.append(xmlEOAfootnote)

        footnote_as_plain_text = libeoaconvert.remove_all_tags(xmlEOAfootnote)
        if len(footnote_as_plain_text) > 200:
            maybe_truncated_footnote_as_plain_text = footnote_as_plain_text[:200] + "…"
        else:
            maybe_truncated_footnote_as_plain_text = footnote_as_plain_text
        xmlFootnoteLink.set("title", maybe_truncated_footnote_as_plain_text)
        intFootnoteNumber += 1
    xmlEOAchapter.append(xmlResult)

# Remove temp-Tag
etree.strip_tags(xmlDjangoTree, "temp")

logging.info("----------------------------------------------")
logging.info("Processing various Elements")

for xmlEOAchapter in xmlEOAchapters:
    xmlEmphasized = xmlEOAchapter.findall(".//hi")
    for xmlEmph in xmlEmphasized:
        rend_attribute = xmlEmph.get("rend")
        if rend_attribute == "it":
            xmlEmph.tag = "em"
            del xmlEmph.attrib["rend"]
        elif rend_attribute == "bold":
            xmlEmph.tag = "b"
            del xmlEmph.attrib["rend"]
        elif rend_attribute == "red":
            xmlEmph.tag = "span"
            xmlEmph.set("class", "red")
            del xmlEmph.attrib["rend"]
    xmlHyperlinks = xmlEOAchapter.findall(".//xref")
    for xmlHyperlink in xmlHyperlinks:
        libeoaconvert.format_hyperlinks_django_epub(xmlHyperlink, strLanguage)
    # Convert EOAup to <sup>
    xmlUps = xmlEOAchapter.findall(".//EOAup")
    for xmlUp in xmlUps:
        xmlUp.tag = "sup"
    # Convert EOAdown to <sub>
    xmlDowns = xmlEOAchapter.findall(".//EOAdown")
    for xmlDown in xmlDowns:
        xmlDown.tag = "sub"
    # Convert EOAst to <span>
    xmlStrikeouts = xmlEOAchapter.findall(".//EOAst")
    for xmlStrikeout in xmlStrikeouts:
        xmlStrikeout.tag = "span"
        xmlStrikeout.set("style", "text-decoration: line-through;")
    # Convert letter-spacing into something nice
    xmlLetterspaceds = xmlEOAchapter.findall(".//EOAls")
    for xmlLetterspaced in xmlLetterspaceds:
        xmlLetterspaced.tag = "span"
        xmlLetterspaced.set("style", "letter-spacing: 0.5em;")
    # Convert letter-spacing into something nice
    xmlCaps = xmlEOAchapter.findall(".//EOAcaps")
    for xmlCap in xmlCaps:
        xmlCap.tag = "span"
        xmlCap.set("style", "font-variant:small-caps;")
    # Convert EOAineq into appropriate IMG-Tags
    xmlInlineEquations = xmlEOAchapter.findall(".//EOAineq")
    for xmlInlineEquation in xmlInlineEquations:
        xmlInlineEquation.tag = "img"
        xmlInlineEquation.set("class", "EOAineq")
        xmlInlineEquation.set("alt", xmlInlineEquation.get("TeX"))
        shutil.copy(
                    INPUT_DIR / "items" / xmlInlineEquation.get("src"),
                    OUTPUT_DIR / "images" / xmlInlineEquation.get("src")
            )
        # shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src"))

    # Convert EOAchem into appropriate IMG-Tags
    xml_inline_chems = xmlEOAchapter.findall(".//EOAchem")
    for xml_inline_chem in xml_inline_chems:
        xml_inline_chem.tag = "img"
        xml_inline_chem.set("class", "EOAineq")
        xml_inline_chem.set("alt", xml_inline_chem.get("TeX"))
        shutil.copy(
                    INPUT_DIR / "items" / xml_inline_chem.get("src"),
                    OUTPUT_DIR / "images" / xml_inline_chem.get("src")
            )
        # shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/django/images/" + xml_inline_chem.get("src"))

    # Convert EOAinline into appropriate IMG-Tags
    xmlInlineElements = xmlEOAchapter.findall(".//EOAinline")
    for xmlInlineElement in xmlInlineElements:
        xmlInlineElement.tag = "img"
        xmlInlineElement.set("class", "EOAinline")
        xmlInlineElement.set("alt", "")
        xmlInlineElement.set("class", "eoainlineimage")
        strInlineElementFilePath = xmlInlineElement.text
        strInlineElementFileName = os.path.basename(strInlineElementFilePath)
        strInlineElementDirName = os.path.dirname(strInlineElementFilePath)
        strInlineElementSubDirName = os.path.dirname(strInlineElementFilePath).split(os.path.sep)[-1]
        xmlInlineElement.text = None

        if os.path.splitext(strInlineElementFileName)[1].lower() != ".pdf":
            pass
        else:
            logging.debug(f"""Found a PDF file: {PUBLICATION_DIR / strInlineElementDirName / strInlineElementFileName}""")
            strImageFilepath = libeoaconvert.sanitizeImage(
                    PUBLICATION_DIR / strInlineElementDirName / strInlineElementFileName,
                    TEMP_DIR,
                    GM_PATH,
                    PDFCROP_EXEC,
                    margin=False
            )
            strInlineElementFileName = strInlineElementFileName.replace(".pdf", ".png")

        xmlInlineElement.set("src", strInlineElementSubDirName + strInlineElementFileName)
        logging.debug(f"{strInlineElementDirName} is dirname, {strInlineElementFileName} is filename/basepath")
        logging.debug(f"""copy from {PUBLICATION_DIR / strInlineElementDirName /  strInlineElementFileName} to {OUTPUT_DIR / "images/embedded" / (strInlineElementDirName + strInlineElementFileName)}""")
        shutil.copy(
                    PUBLICATION_DIR / strInlineElementDirName /  strInlineElementFileName,
                    OUTPUT_DIR / "images/embedded" / (strInlineElementSubDirName + strInlineElementFileName)
            )
        # shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" +  strInlineElementFileName, os.getcwd() + "/CONVERT/django/images/embedded/"  + strInlineElementDirName + strInlineElementFileName)
        strNewImagePath = OUTPUT_DIR / "images/embedded" / (strInlineElementSubDirName + strInlineElementFileName)
        # strNewImagePath = os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName
        # strCommand = GM_PATH + " convert " + str(strNewImagePath) + " -resize 20x20 " + str(strNewImagePath)
        # listArguments = shlex.split(strCommand)
        # subprocess.check_output(listArguments, shell=False)

    # Change EOAcitenumeric into a span to create approriate link
    xmlEOAcitenumerics = xmlEOAchapter.findall(".//EOAcitenumeric")
    for xmlEOAcitenumeric in xmlEOAcitenumerics:
        xmlEOAcitenumeric.tag = "span"
        xmlEOAcitenumeric.set("class", "citation")
        xmlEOAcitenumeric.set("rel", "popover")
    # Change EOAciteauthoryear into a span to create approriate link
    xmlEOAciteauthoryears = xmlEOAchapter.findall(".//EOAciteauthoryear")
    for xmlEOAciteauthoryear in xmlEOAciteauthoryears:
        xmlEOAciteauthoryear.tag = "span"
        xmlEOAciteauthoryear.set("class", "citation")
        xmlEOAciteauthoryear.set("rel", "popover")
    # Change EOAciteauthoryear into a span to create approriate link
    xmlEOAciteyears = xmlEOAchapter.findall(".//EOAciteyear")
    for xmlEOAciteyear in xmlEOAciteyears:
        xmlEOAciteyear.tag = "span"
        xmlEOAciteyear.set("class", "citation")
        xmlEOAciteyear.set("rel", "popover")
    # Change EOAciteauthoryear into a span to create approriate link
    xmlEOAcitemanuals = xmlEOAchapter.findall(".//EOAcitemanual")
    for xmlEOAcitemanual in xmlEOAcitemanuals:
        xmlEOAcitemanual.tag = "span"
        xmlEOAcitemanual.set("class", "citation")
        xmlEOAcitemanual.set("rel", "popover")

logging.info("----------------------------------------------")
logging.info("Processing Cross References")

libeoaconvert.debug_xml_here(
        xmlDjangoTree,
        "beforecrossreference",
        DEBUG_DIR
)

failed_ids = []
# Substitute References with their targets (wit links)
for xmlEOAchapter in xmlEOAchapters:
    # for hyperimage collages
    originalcontents = xmlEOAchapter.findall(".//originalcontents")
    if originalcontents is not None:
        for originalcontent in originalcontents:
            previous_element = originalcontent.getprevious()
            if originalcontent.getparent().tag == "EOAref":
                pass
            elif previous_element.tag != "EOAref":
                logging.error("Found a stray originalcontents element.")
            else:
                oc_tail = originalcontent.tail
                originalcontent.tail = ""
                previous_element.append(originalcontent)
                if previous_element.tail is not None:
                    logging.warning("Appending the old tail of EOAref")
                    previous_element.tail += oc_tail
                else:
                    previous_element.tail = oc_tail
    else:
        logging.info("No originalcontents elements found.")

    xmlReferences = xmlEOAchapter.findall(".//EOAref")
    for xmlReference in xmlReferences:
        strResult = "!!! Cross Reference !!!"
        strChapterOrder = ""
        strObjectOrder = ""
        ref_is_text = False
        ref_is_collage = False

        reference_type = xmlReference.get("type")
        originalcontents = xmlReference.find("originalcontents")
        xmlReferenceLabel = xmlReference.find("Label")
        xmlReferenceLabelText = xmlReferenceLabel.text
        xmlReferenceRef = xmlReference.find("ref")
        xmlReferenceRefTarget = xmlReferenceRef.get("target")

        if xmlReferenceLabelText in dictEquations:
            # Grab Number from Dictionary
            strResult = dictEquations[xmlReferenceLabelText]
            # Go through all equations and find the corresponding Equation
            xmlEOAequations = xmlEOAdocument.findall(".//EOAequation")
            for xmlEOAequation in xmlEOAequations:
                tmpReferenceLabelText = xmlEOAequation.get("label")
                if xmlReferenceLabelText == tmpReferenceLabelText:
                    logging.debug("Successfully found link to array formula: %s" % strResult)
                    for xmlParent in xmlEOAequation.iterancestors():
                        if xmlParent.tag == "EOAchapter":
                            strChapterOrder = xmlParent.get("order")
                            strObjectOrder = xmlEOAequation.get("order")
        elif xmlReferenceRefTarget in dictEquations:
            # Grab Number from Dictionary
            strResult = dictEquations[xmlReferenceRefTarget]
            # Go through all equations and find the corresponding Equation
            xmlEOAequations = xmlEOAdocument.findall(".//EOAequation")
            for xmlEOAequation in xmlEOAequations:
                tmpReferenceRefTarget = xmlEOAequation.get("uid")
                if xmlReferenceRefTarget == tmpReferenceRefTarget:
                    logging.debug("Successfully found link to normal formula: %s" % strResult)
                    for xmlParent in xmlEOAequation.iterancestors():
                        if xmlParent.tag == "EOAchapter":
                            strChapterOrder = xmlParent.get("order")
                            strObjectOrder = xmlEOAequation.get("order")
        elif xmlReferenceRefTarget in dictLists:
            logging.debug("Found link to list.")
            strResult = dictLists[xmlReferenceRefTarget]
            xmlEOAlistitem = xmlEOAdocument.xpath("//EOAchapter/*[contains(@id, $targetuid)]", targetuid = xmlReferenceRefTarget)[0]
            for xmlParent in xmlEOAlistitem.iterancestors():
                if xmlParent.tag == "EOAchapter":
                    strChapterOrder = xmlParent.get("order")
                    strObjectOrder = xmlEOAlistitem.get("order")
        elif xmlReferenceRefTarget in dictChapters:
            logging.debug("Found link to chapter.")
            strResult = dictChapters[xmlReferenceRefTarget]
            xmlEOAchapter = xmlEOAdocument.xpath(f".//EOAchapter[@id='{xmlReferenceRefTarget}']")
            if len(xmlEOAchapter) == 0:
                logging.warning("There seems to be no corresponding id for %s." % xmlReferenceRefTarget)
                # if uid is the one from the anchor after the head
                # element, that anchor element has been removed by now
                # and we need to find the corresponding element by
                # string comparison in the dictionary
                same_sr = [i for i in dictChapters if dictChapters[i] == strResult]
                same_sr.remove(xmlReferenceRefTarget)
                if len(same_sr) == 0:
                    logging.error("id cannot be found.")
                elif len(same_sr) > 1:
                    logging.error("id is ambiguous.")
                else:
                    logging.info(f"Using {same_sr[0]} instead.")
                    right_chapter = xmlEOAdocument.xpath(f".//EOAchapter[@id='{same_sr[0]}']")[0]
            elif len(xmlEOAchapter) > 1:
                logging.error("The xml:id %s is assigned more than once. This is not allowed. Exiting." % xmlReferenceLabelText)
                sys.exit(2)
            else:
                right_chapter = xmlEOAchapter[0]

            strChapterOrder = right_chapter.get("order")
            strObjectOrder = "top"
        elif xmlReferenceRefTarget in dictTheorems:
            logging.debug("Found link to ein Theorem")
            strResult = dictTheorems[xmlReferenceRefTarget]
            for xmlEOAtheorem in xmlEOAdocument.findall(".//EOAtheorem"):
                if xmlEOAtheorem.get("uid") == xmlReferenceRefTarget:
                    logging.debug("Successfully handled link to a theorem: %s " % strResult)
                    for xmlParent in xmlEOAtheorem.iterancestors():
                        if xmlParent.tag == "EOAchapter":
                            strObjectOrder = xmlEOAtheorem.get("order")
                            strChapterOrder = xmlParent.get("order")
        elif xmlReferenceRefTarget in dictSections:
            logging.debug("Found link to section")
            strResult = dictSections[xmlReferenceRefTarget]
            xmlEOAsection = xmlEOAdocument.xpath(f".//EOAsection[@id='{xmlReferenceRefTarget}']")
            if len(xmlEOAsection) == 0:
                logging.warning("There seems to be no corresponding id for %s." % xmlReferenceRefTarget)
                # see explanation at dictChapters
                same_sr = [i for i in dictSections if dictSections[i] == strResult]
                same_sr.remove(xmlReferenceRefTarget)
                if len(same_sr) == 0:
                    logging.error("id cannot be found.")
                elif len(same_sr) > 1:
                    logging.error("id is ambiguous.")
                else:
                    logging.info(f"Using {same_sr[0]} instead.")
                    right_section = xmlEOAdocument.xpath(f".//EOAsection[@id='{same_sr[0]}']")[0]
            elif len(xmlEOAsection) > 1:
                logging.error("The xml:id %s is assigned more than once. This is not allowed. Exiting." % xmlReferenceLabelText)
                sys.exit(2)
            else:
                right_section = xmlEOAsection[0]

            for xmlParent in right_section.iterancestors():
                if xmlParent.tag == "EOAchapter":
                    strChapterOrder = xmlParent.get("order")
                    strObjectOrder = right_section.get("order")
            xmlEOAsubsections = xmlEOAdocument.findall(".//EOAsubsection")
            for xmlEOAsubsection in xmlEOAsubsections:
                tmpReferenceRefTarget = xmlEOAsubsection.get("id")
                if xmlReferenceRefTarget == tmpReferenceRefTarget:
                    logging.debug("Successfully handled link to subsection %s: " % strResult)
                    for xmlParent in xmlEOAsubsection.iterancestors():
                        if xmlParent.tag == "EOAchapter":
                            strChapterOrder = xmlParent.get("order")
                            strObjectOrder = xmlEOAsubsection.get("order")
        elif xmlReferenceRefTarget in dictFigures:
            logging.debug("Found link to figure")
            strResult = dictFigures[xmlReferenceRefTarget]
            xmlEOAfigures = xmlEOAdocument.findall(".//EOAfigure")
            for xmlEOAfigure in xmlEOAfigures:
                tmpReferenceRefTarget = xmlEOAfigure.get("id")
                if xmlReferenceRefTarget == tmpReferenceRefTarget:
                    logging.debug("Successfully handled link to figure: %s" % strResult)
                    for xmlParent in xmlEOAfigure.iterancestors():
                        if xmlParent.tag == "EOAchapter":
                            strChapterOrder = xmlParent.get("order")
                            strObjectOrder = xmlEOAfigure.get("order")
        elif xmlReferenceRefTarget in dictFootnotes:
            logging.debug("Found link to footnote")
            strResult = dictFootnotes[xmlReferenceRefTarget]
            xmlEOAfootnotes = xmlEOAdocument.findall(".//EOAfootnote")
            for xmlEOAfootnote in xmlEOAfootnotes:
                tmpReferenceRefTarget = xmlEOAfootnote.get("id")
                if xmlReferenceRefTarget == tmpReferenceRefTarget:
                    logging.debug("Successfully handled link to footnote: %s" % strResult)
                    for xmlParent in xmlEOAfootnote.iterancestors():
                        if xmlParent.tag == "EOAchapter":
                            strChapterOrder = xmlParent.get("order")
                            strObjectOrder = xmlEOAfootnote.get("order")
        elif xmlReferenceRefTarget in dictTables:
            logging.debug("Found link to table")
            strResult = dictTables[xmlReferenceRefTarget]
            xmlEOAtables = xmlEOAdocument.findall(".//EOAtable")
            for xmlEOAtable in xmlEOAtables:
                tmpReferenceRefTarget = xmlEOAtable.get("label")
                if xmlReferenceLabelText == tmpReferenceRefTarget:
                    logging.debug("Successfully handled link to table: %s" % strResult)
                    for xmlParent in xmlEOAtable.iterancestors():
                        if xmlParent.tag == "EOAchapter":
                            strChapterOrder = xmlParent.get("order")
                            strObjectOrder = xmlEOAtable.get("order")
        else:
            logging.debug("Found this other reference")
            if reference_type == "collage":
                logging.info(f"{xmlReferenceRefTarget} is a collage")
                ref_is_collage = True
            elif reference_type == "text":
                logging.debug(f"{xmlReferenceRefTarget} is a text link")
                ref_is_text = True
                xmlReferenceRef = xmlReference.find("ref")
                xmlReferenceRefTarget = xmlReferenceRef.get("target")
                xmlReferenceLabel = xmlReference.find("Label")
                xmlReferenceLabelText = xmlReferenceLabel.text

                pararef = xmlDjangoTree.xpath("//*[@id='%s']" % xmlReferenceRefTarget)

                if len(pararef) == 0:
                    logging.warning("There seems to be no corresponding xml:id for %s." % xmlReferenceRefTarget)
                    failed_ids.append(f"{xmlReferenceRefTarget} ({xmlReferenceLabelText})\n")
                elif len(pararef) > 1:
                    logging.error("The xml:id %s is assigned more than once. This is not allowed. Exiting." % xmlReferenceLabelText)
                    sys.exit(2)
                else:
                    for xmlParent in pararef[0].iterancestors():
                        if xmlParent.tag == "EOAchapter":
                            strChapterOrder = xmlParent.get("order")
                    for xmlParent in pararef[0].iterancestors():
                        if xmlParent.tag == "EOAparagraph":
                            strObjectOrder = xmlParent.get("order")

                all_children = list(xmlReference)
                text_has_children = all_children[:-2]
                if text_has_children:
                    reference_text = xmlReference.text
                    textref_innards = list()
                    for xml_child in text_has_children:
                        textref_innards.append(xml_child)
                else:
                    reference_text = xmlReference.text.strip()
            else:
                guessref = xmlDjangoTree.xpath("//*[@id='%s']" % xmlReferenceRefTarget)
                if len(guessref) == 0:
                    logging.warning("There seems to be no corresponding xml:id for %s." % xmlReferenceLabelText)
                    failed_ids.append(xmlReferenceLabelText + "\n")
                elif len(guessref) > 1:
                    logging.error("The xml:id %s is assigned more than once. This is not allowed. Exiting." % xmlReferenceLabelText)
                    sys.exit(2)
                else:
                    for xmlParent in guessref[0].iterancestors():
                        if xmlParent.tag == "EOAparagraph":
                            strObjectOrder = xmlParent.get("order")
                            id_container = xmlParent.xpath("preceding-sibling::EOAsection[1]")[0]
                            section_id = id_container.get("id")
                            strResult = dictSections[section_id]
                        if xmlParent.tag == "EOAchapter":
                            strChapterOrder = xmlParent.get("order")

        tmpTail = xmlReference.tail or ""

        xmlReference.clear()
        if originalcontents is not None:
            logging.info("Found originalcontents")
            xmlReference.append(originalcontents)
        elif ref_is_text:
            xmlReference.text = reference_text
            if text_has_children:
                for item in reversed(textref_innards):
                    xmlReference.insert(0, item)
        else:
            xmlReference.text = strResult
        xmlReference.tail = tmpTail
        xmlReference.tag = "a"

        # hyperimage
        if xmlReferenceRef.get("data-hilayer"):
            xmlReference.set("data-hilayer", xmlReferenceRef.get("data-hilayer"))
        elif xmlReferenceRef.get("hitarget"):
            xmlReference.set("class", "HILink")
            href_string = "#" + xmlReferenceRef.get("hitarget")

        if strObjectOrder:
            href_string = "../" + strChapterOrder + "/index.html#" + strObjectOrder
        else:
            href_string = "strChapterOrder missing"
            logging.warning("strObjectOrder is missing!")

        xmlReference.set("href", href_string)
        if ref_is_collage:
            xmlReference.set("type", "collage")
        else:
            pass

logging.info("----------------------------------------------")
logging.info("Processing Page References")

for xmlEOAchapter in xmlEOAchapters:
    xmlPageReferences = xmlEOAchapter.findall(".//EOApageref")
    strResult = "!!! Page Reference !!!"
    for xmlReference in xmlPageReferences:
        xmlReferenceLabel = xmlReference.find("Label")
        xmlReferenceLabelText = xmlReferenceLabel.text
        xmlReferenceRef = xmlReference.find("ref")
        xmlReferenceRefTarget = xmlReferenceRef.get("target")
        if xmlReferenceLabelText in dictPagelabels:
            logging.debug("Found link to page: %s" % xmlReferenceLabelText)
            strResult = dictPagelabels[xmlReferenceLabelText]
        else:
            logging.warning("Page reference not fully implemented yet, see https://github.molgen.mpg.de/EditionOpenAccess/EOASkripts/issues/52")
        xmlReference.text = strResult
        for xmlChild in xmlReference.iterchildren():
            xmlReference.remove(xmlChild)
        # Check, if EOApageref points to a Facsimile-Page
        # If yes, make a href to the facsimile
        xmlEOAfacsimilepages = xmlEOAdocument.findall(".//EOAfacsimilepage")
        for xmlEOAfacsimilepage in xmlEOAfacsimilepages:
            if xmlEOAfacsimilepage.get("label") == xmlReferenceLabelText:
                logging.debug("Found cross reference to facsimile.")
                xmlReference.tag = "a"
                strPartOrder = xmlEOAfacsimilepage.getparent().get("order")
                strFacsimileOrder = xmlEOAfacsimilepage.get("order")
                logging.debug(strFacsimileOrder)
                xmlReference.set("href", "../" + strPartOrder + "/" + strFacsimileOrder + ".html")

logging.info("----------------------------------------------")
logging.info("Normalizing Index Entries")

for xmlEOAchapter in xmlEOAchapters:
    xml_EOA_indices = xmlEOAchapter.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
    for xmlEOAindex in xml_EOA_indices:

        # Using the gettext function here, because of subelements
        # strEOAindextext = xmlEOAindex.text
        strEOAindextext = libeoaconvert.gettext(xmlEOAindex)
        strEOAindextext = strEOAindextext.replace("\n", " ")

        index_children = xmlEOAindex.getchildren()

        if index_children is not None:
            for sub_element in index_children:
                xmlEOAindex.remove(sub_element)

        xmlEOAindex.text = None
        listFirstPart = re.split('\|', strEOAindextext)
        tmpEntry = listFirstPart[0]
        listSecondPart = re.split('\!', tmpEntry)
        strMainEntry = listSecondPart[0]

        # Check if a sortkey is present via @
        listSortKey = re.split('@', strMainEntry)
        if len(listSortKey) == 2:
            xmlEOAindex.set("main", listSortKey[0])
            xmlEOAindex.set("display", listSortKey[1])
        else:
            xmlEOAindex.set("main", strMainEntry)
        if len(listSecondPart) > 1:
            strSecondPart = listSecondPart[1]
            listSecondarySortkey = re.split('@', strSecondPart)
            if len(listSecondarySortkey) == 2:
                xmlEOAindex.set("secondary", listSecondarySortkey[0])
                xmlEOAindex.set("secondarydisplay", listSecondarySortkey[1])
            else:
                xmlEOAindex.set("secondary", strSecondPart)
        if len(listFirstPart) > 1:
            strAddition = listFirstPart[1]
            if strAddition == "textbf":
                xmlEOAindex.set("bold", "true")
            tmpseealso = re.match('seealso', strAddition)
            if tmpseealso != None:
                tmpAddition = re.sub('seealso', '', strAddition)
                xmlEOAindex.set("seealso", tmpAddition)
                #  Entries containing seealso are omitted for the time being
                xmlEOAindex.tag = "temp"
            tmpsee = re.match('^see(?!also)', strAddition)
            if tmpsee != None:
                tmpAddition = re.sub('see', '', strAddition)
                xmlEOAindex.set("see", tmpAddition)
                #  Entries containing seealso are omitted for the time being
                xmlEOAindex.tag = "temp"
        # Figure out parent chapter number and parent Element order
        for xmlParent in xmlEOAindex.iterancestors():
            if xmlParent.get("order") != None and xmlParent.tag != "EOAchapter":
                xmlEOAindex.set("elementorder", xmlParent.get("order"))
            if xmlParent.get("order") != None and xmlParent.tag == "EOAchapter":
                xmlEOAindex.set("chapterorder", xmlParent.get("order"))
        # logging.info(etree.tostring(xmlEOAindex))

etree.strip_tags(xmlDjangoTree, "temp")

logging.info("----------------------------------------------")
logging.info("Removing Duplicate Index Entries")

for xmlEOAchapter in xmlEOAchapters:
    for xmlChild in xmlEOAchapter.iterchildren():
        dictEntries = {}
        xml_EOA_indices = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
        for xmlEOAindex in xml_EOA_indices:
            listEntry = []
            strEntry = xmlEOAindex.get("main")
            if strEntry in dictEntries:
                strSubentry = xmlEOAindex.get("secondary")
                if strSubentry in dictEntries[strEntry] or strSubentry == None:
                    if (xmlChild.get("see") is None) and (xmlChild.get("seealso") is None):
                        xmlEOAindex.tag = "temp"
                else:
                    dictEntries[strEntry].append(strSubentry)
            else:
                dictEntries[strEntry] = listEntry

logging.info("----------------------------------------------")
logging.info("Creating paragraph links")

paragraphs_with_corresp = xmlDjangoTree.xpath("//EOAparagraph[@corresp]")
for pc in paragraphs_with_corresp:
    # get order of target and the chapter order to create the
    # hyperlink pick this up on publicationimport and extend the model
    # for a field, probably containing the html snippet for the URL
    corresponding_attribute = pc.get("corresp")[1:]
    corresponding_paragraph = xmlDjangoTree.xpath("//EOAparagraph[@xml:id='{}']".format(corresponding_attribute))
    if len(corresponding_paragraph) == 0:
        logging.error("There is no corresponding xml:id for %s. Exiting." % corresponding_attribute)
        sys.exit(1)
    elif len(corresponding_paragraph) > 1:
        logging.error("The xml:id %s has been assigned more than once. This is not allowed. Exiting." % corresponding_paragraph[0].attrib["{http://www.w3.org/XML/1998/namespace}id"])
        sys.exit(1)
    else:
        eoa_id_element = corresponding_paragraph[0]
        paragraph_order = eoa_id_element.get("order")
        for xml_parent in eoa_id_element.iterancestors():
            if xml_parent.tag == "EOAchapter":
                chapter_order = xml_parent.get("order")
        href_text = f"../{chapter_order}/index.html#{paragraph_order}"
        pc.set("href", href_text)

for pc in paragraphs_with_corresp:
    etree.strip_attributes(pc, "corresp", "{http://www.w3.org/XML/1998/namespace}id")

logging.info("----------------------------------------------")
logging.info("Sorting and Creating Regular Index")

xml_regular_EOAindices = xmlDjangoTree.findall("//EOAindex")
if len(xml_regular_EOAindices) != 0:# is not None:
    logging.debug("Sorting %s entries for regular index." % str(len(xml_regular_EOAindices)))
    xml_eoa_print_regular_index = make_index(xml_regular_EOAindices, index_type = "regular")

libeoaconvert.debug_xml_here(
        xmlDjangoTree,
        "djangotree",
        DEBUG_DIR
)
libeoaconvert.debug_xml_here(
        xmlEOAdocument,
        "xmleoadocument",
        DEBUG_DIR
)
libeoaconvert.debug_xml_here(
        xmlTree,
        "xmltree",
        DEBUG_DIR
)

# If EOAprintindex is found, append xml_eoa_print_regular_index to xmlEOAdocument
xmlPrintindex = xmlTree.find(".//EOAprintindex")
if xmlPrintindex is not None != 0:
    # Remove <p><EOAprintindex/></p> from xmlDjangoTree
    logging.info("found an index")
    xmlPrintindex.tag = "temp"
    xmlPrintindex.getparent().tag = "temp"
    xmlEOAdocument.append(xml_eoa_print_regular_index)
else:
    logging.info("found no index")

logging.info("----------------------------------------------")
logging.info("Sorting and Creating Person Index")

xml_person_EOAindices = xmlDjangoTree.findall("//EOAindexperson")
if len(xml_person_EOAindices) != 0:# is not None:
    xml_eoa_print_person_index = make_index(xml_person_EOAindices, index_type = "person")

# If EOAprintpersonindex is found, append xml_eoa_print_person_index to xmlEOAdocument
# xmlPrintindex = xmlDjangoTree.find(".//EOAprintpersonindex")
xmlPrintindex = xmlTree.find("//EOAprintpersonindex")
if xmlPrintindex is not None != 0:
    # Remove <p><EOAprintindex/></p> from xmlDjangoTree
    xmlPrintindex.tag = "temp"
    xmlPrintindex.getparent().tag = "temp"
    xmlEOAdocument.append(xml_eoa_print_person_index)

# doing the same for location index
logging.info("----------------------------------------------")
logging.info("Sorting and Creating Location Index")

xml_location_EOAindices = xmlDjangoTree.findall("//EOAindexlocation")
if len(xml_location_EOAindices) != 0:# is not None:
    xml_eoa_print_location_index = make_index(xml_location_EOAindices, index_type = "location")

# If EOAprintlocationindex is found, append xml_eoa_print_location_index to xmlEOAdocument
xmlPrintindex = xmlTree.find(".//EOAprintlocationindex")
if xmlPrintindex is not None != 0:
    xmlPrintindex.tag = "temp"
    xmlPrintindex.getparent().tag = "temp"
    xmlEOAdocument.append(xml_eoa_print_location_index)

############################################################################
#                              Cleaning up                                 #
############################################################################
# TODO: Die unnötigen Attribute wie id löschen
# TODO: Die unnötigen Tags wie EOAlabel löschen
collagelinks = xmlDjangoTree.xpath(".//a[@type='collage']/originalcontents/a")
for link in collagelinks:
    link.tag = "temp"

some_empty_tags = xmlDjangoTree.xpath(".//anchor[not(node())] | .//b[not(node())]")
for tag in some_empty_tags:
        tag.tag = "tagtobestripped"

etree.strip_tags(xmlDjangoTree, "temp", "citetext", "EOAprintbibliography", "originalcontents", "tagtobestripped")
etree.strip_elements(xmlDjangoTree, "citekey", "elementtoberemoved", with_tail=False)
etree.strip_attributes(xmlDjangoTree, "id-text", "id", "noindent", "type", "label", "spacebefore")#, "rend")

############################################################################
#                          Save xmlDjangoTree                             #
############################################################################

tmpFile = open( OUTPUT_DIR / "Django.xml", "w")
tmpResult = etree.tostring(xmlDjangoTree, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
logging.debug(f"Wrote {OUTPUT_DIR}/Django.xml.")

if len(failed_ids) > 0:
    cleaned_failures = sorted(set(failed_ids))
    tmpFile = open( OUTPUT_DIR / "debug/failed_ids.txt", "w")
    tmpFile.writelines(f"Missing IDs by appearance ({len(failed_ids)} in total):\n")
    tmpFile.writelines(failed_ids)
    tmpFile.writelines(f"\nMissing IDs sorted and uniqued ({len(cleaned_failures)} in total):\n")
    tmpFile.writelines(cleaned_failures)
    tmpFile.close()
    logging.debug(f"Some ids could not be referenced. Check {OUTPUT_DIR}/debug/failed_ids.txt.")

if args.checkpublicationcfg:
    check_publication_cfg(INPUT_DIR / "publication.cfg")
else:
    pass