imxml2epub.py

#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2020-09-25 08:08:41 (kthoden)>

""" Convert a customized DocBook XML file into a set of files that
constitute the contents of an EPUB file.

Input file is a customized DocBook XML that has been generated either
with eoatex2imxml or tei2imxml.
"""

from utils.load_config import load_config
import utils.libeoaconvert as libeoaconvert

import os
import sys
import argparse
import re
import shutil
import pickle
import shlex
import string
import subprocess
import logging
from copy import deepcopy
from lxml import etree
from pathlib import Path
from PIL import ImageFont
import configparser

BASE_DIR = Path( __file__ ).resolve().parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.stem


DEFAULT_INPUT_DIR = \
    Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')

DEFAULT_OUTPUT_DIR = \
    Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output')

#####################
# Parsing arguments #
#####################

parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
        "-c", "--config",
        dest="CONFIG_FILE",
        default = BASE_DIR / "config" / "eoaconvert.cfg",
        help="Name of configuration file",
        metavar="CONFIGURATION"
)
parser.add_argument(
        "--log-level",
        default = "INFO",
        help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
parser.add_argument(
        "-i", "--input-dir",
        help = f"directory containing some intermediate xml created by previous steps. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml",
        type = Path,
)
parser.add_argument(
        "-o", "--output-dir",
        help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/epub",
        type = Path,
)
parser.add_argument(
        "-f", "--font",
        help="Font to be used, default is TeX Gyre Termes",
        default="termes"
)
parser.add_argument(
        "-ne", "--no-epub",
        help="Disable creation of epub file.",
        action="store_true"
)
parser.add_argument(
        "--extra-font-selector",
        help="Specify the css class selector for the extra font."
)
parser.add_argument(
        "--extra-font-files-directory",
        help="Specify the directory with files of the font (the font itself, License)",
)
parser.add_argument(
        "PUBLICATION_DIR",
        help = "directory containing the publication (including resources like pictures, etc.)",
        type = Path,
)

parser.add_argument(
        "-him", "--hyperimage",
        help="Link hyperlink references to online version.",
        action="store_true"
    )


args = parser.parse_args()

########################
# Paths to executables #
########################
EPUB_FILES = BASE_DIR / "data/epub_files/"
# EPUB_FILES = os.path.dirname(sys.argv[0]) + "/data/epub_files/"

GM_PATH = "gm"
PDFCROP_EXEC = "pdfcrop"

############################
# Paths:
############################
PUBLICATION_DIR = args.PUBLICATION_DIR
INPUT_DIR = \
        args.input_dir if args.input_dir is not None else DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem / "imxml"
OUTPUT_DIR = \
        args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem) / "epub"
LOG_DIR = OUTPUT_DIR / "log"
LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" )
TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"

##################################
# Reading the configuration file #
##################################

config_file = args.CONFIG_FILE

print(f"The config file is {config_file}")
logseparator = "-"*53 + "\n"

CONFIG = load_config(
        config_file,
        args.log_level,
        LOG_FILE,
        # args.log_file,
)

############################
# Paths to auxiliary files #
############################
TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS']
translation_xml = etree.parse( str( TRANSLATION_FILE ) )

if not TEMP_DIR.exists():
    os.makedirs( TEMP_DIR )
if not DEBUG_DIR.exists():
    os.makedirs( DEBUG_DIR )

# Check for folder and necessary files
if not os.path.exists( OUTPUT_DIR / "publication.cfg"):
    logging.info(f"The publication.cfg file is missing in {OUTPUT_DIR} directory.")
    if os.path.exists(INPUT_DIR / "publication.cfg"):
        shutil.copy(INPUT_DIR / "publication.cfg", OUTPUT_DIR)
        logging.info(f"Copied from {INPUT_DIR}.")
    else:
        logging.error("Found no publication.cfg. Exiting")
        sys.exit( 1 )
if not os.path.exists( OUTPUT_DIR / "Cover.jpg"):
    logging.info(f"The file Cover.jpg in {OUTPUT_DIR} directory is missing.")
    if os.path.exists(INPUT_DIR /  "Cover.jpg"):
        shutil.copy(INPUT_DIR / "Cover.jpg", OUTPUT_DIR / "cover.jpg")
        logging.info("Copied from current directory.")
    else:
        logging.error("No coverfile found. You can create a temporary one with the mkimage.py script")
        sys.exit( 1 )

# Datei = open( TEMP_DIR / 'intermediate.log', 'w')

xmlTree = etree.parse( str(INPUT_DIR / "IntermediateXMLFile.xml") )

with open(INPUT_DIR / 'tmp_files/data.pickle', 'rb') as f:
    data = pickle.load(f)

dictSections = data["secdict"]
dictEquations = data["eqdict"]
dictLists = data["listdict"]
dictChapters = data["chapterdict"]
dictFigures = data["figdict"]
dictFootnotes = data["fndict"]
dictTheorems = data["theoremdict"]
dictTables = data["tabdict"]
dictPagelabels = data["pagelabeldict"]


if args.hyperimage:
    logging.info("Enabled Hyperimage support")
else:
    pass


def get_mimetype(filename_suffix):
    """Return mimetype of image"""
    if filename_suffix.lower() in [".jpg", ".jpeg"]:
        mimetype = "jpg"
    elif filename_suffix.lower() in [".png", ".pdf"]:
        mimetype = "png"
    else:
        logging.error(f"Found an unrecognized image suffix: {filename_suffix}")
        sys.exit(1)

    return mimetype
# def get_mimetype ends here


def remove_processinginstruction(xml_tree, pi_name):
    """Remove processing instructions with a specific name"""

    proc_insts = xml_tree.xpath("//processing-instruction('{}')".format(pi_name))

    # counter = 1

    # for instruction in proc_insts:
    #     logging.debug("looking at pi %d" % counter)

    #     instruction_previous = instruction.getprevious()
    #     instruction_parent = instruction.getparent()

    #     if instruction_previous is not None:
    #         if instruction_previous.tail is not None:
    #             instruction_previous_tail = instruction_previous.tail
    #         else:
    #             instruction_previous_tail = ""
    #     else:
    #         instruction_previous_tail = ""

    #     instruction_tail = instruction.tail
    #     instruction_parent_text = instruction_parent.text

    #     print("parent text", instruction_parent_text)
    #     print("previous ", instruction_previous_tail)
    #     print("pi tail", instruction_tail)

    #     if instruction_previous is not None:
    #         logging.debug("case 1")
    #         if instruction_tail is not None:
    #             logging.debug("case 2")
    #             instruction_previous_tail += instruction_tail
    #     else:
    #         logging.debug("case 3")
    #         if instruction_tail is not None:
    #             instruction_parent.text += instruction_tail

    #     instruction_parent.remove(instruction)
    #     counter += 1

    # Leaving that out for now.
    # Found solution below on https://stackoverflow.com/questions/31522162/, but that
    # seems only to work in all cases

    for instruction in proc_insts:
        etree.strip_tags(instruction.getparent(), instruction.tag)

    logging.debug("Removed %s processing instructions of type %s." % (len(proc_insts), pi_name))

    return xml_tree
# def remove_processinginstruction ends here

def addToContentopf(contentopf, Filename, FileID, Mediatype):
    """Function to add Elements to Content-OPF (epub)"""

    # logging.debug("considering adding %s with FileID %s to content.opf" % (Filename, FileID))

    global listContentopf
    # Sanitizing FileID, id-attribute may not contain _ : or /
    # FileID may also not start with a number
    FileID = re.sub("\_", "", FileID)
    FileID = re.sub("\.", "", FileID)
    FileID = re.sub("\/", "", FileID)
    FileID = re.sub("^[0-9]", "", FileID)
    FileID = re.sub("^[0-9]", "", FileID)
    FileID = re.sub("^[0-9]", "", FileID)
    if FileID in listContentopf:
        # logging.debug("Not adding %s, because something with a FileID %s is already there" % (Filename, FileID))
        return contentopf
    else:
        # Sanitizing FileID, id-attribute may not contain _ : or /
        # FileID may also not start with a number
        FileID = re.sub("\_", "", FileID)
        FileID = re.sub("\.", "", FileID)
        FileID = re.sub("\/", "", FileID)
        FileID = re.sub("^[0-9]", "", FileID)
        FileID = re.sub("^[0-9]", "", FileID)
        FileID = re.sub("^[0-9]", "", FileID)
        dictMediatypes = {
            "txt" : "text/plain",
            "ttf" : "application/x-font-truetype",
            "otf" : "application/vnd.ms-opentype",
            "xml" : "application/xhtml+xml",
            "jpg" : "image/jpeg",
            "png" : "image/png",
            "bitstream" : "application/octet-stream"
        }
        contentopfns = "{http://www.idpf.org/2007/opf}"
        xmlManifest = contentopf.find(".//" + contentopfns + "manifest")
        xmlItem = etree.Element("item")
        xmlItem.set("id", FileID)
        xmlItem.set("media-type", dictMediatypes[Mediatype])
        xmlItem.set("href", str(Filename))
        xmlManifest.append(xmlItem)
        # logging.debug("Added %s, with FileID %s" % (Filename, FileID))

        # if it's an XML-File also extend <spine>
        if Mediatype == "xml":
            xmlSpine = contentopf.find(".//" + contentopfns + "spine")
            xmlItemref = etree.Element("itemref")
            xmlItemref.set("idref", FileID)
            xmlSpine.append(xmlItemref)
        listContentopf.append(FileID)

        return contentopf
# def addToContentopf ends here

def addToTocncx(tocncx, Label, intTechnicalChapterNumber):
    """Function to add Chapters to Table of Contents (epub)"""

    tocncxns = "{http://www.daisy.org/z3986/2005/ncx/}"
    xmlNavMap = tocncx.find(".//" + tocncxns + "navMap")
    xmlNavPoint = etree.Element("navPoint")
    xmlNavPoint.set("playOrder", str(intTechnicalChapterNumber + 1))
    xmlNavPoint.set("id", "chapter" + str(intTechnicalChapterNumber))
    xmlNavLabel = etree.Element("navLabel")
    xmlNavLabelText = etree.Element("text")
    xmlNavLabelText.text = Label
    xmlNavLabel.append(xmlNavLabelText)
    xmlNavPoint.append(xmlNavLabel)
    xmlContent = etree.Element("content")
    xmlContent.set("src", "chapter" + str(intTechnicalChapterNumber) + ".xhtml")
    xmlNavPoint.append(xmlContent)
    xmlNavMap.append(xmlNavPoint)

    return tocncx
# def addToTocncx ends here


def create_epub_container(filename, OUTPUT_DIR):
    """Use zip to create the epub container file"""

    import zipfile

    epub_filename = f"{filename.lower()}-raw.epub"

    output_dir = OUTPUT_DIR.resolve()
    cwd = Path.cwd()
    os.chdir( output_dir )

    epubcontainer = zipfile.ZipFile(epub_filename, "w")
    epubcontainer.write("mimetype", compresslevel=None)

    epubcontainer.write("META-INF/container.xml")

    for dirname, subdirs, files in os.walk("OEBPS"):
        epubcontainer.write(dirname)
        for contentfile in files:
            epubcontainer.write(os.path.join(dirname, contentfile))

    epubcontainer.close()
    logging.info(f"Wrote {output_dir}/{epub_filename}.")
    os.chdir( cwd )
# def create_epub_container ends here

def add_fonts_to_opf(font_files, contentopf, otf_counter, txt_counter, fontdir=""):
    """Add font files to OPF, return new values of id counters"""

    if not fontdir:
        fontdir = EPUB_FILES

    for fontfile in font_files:
       shutil.copy(
               Path(fontdir) / fontfile,
               OUTPUT_DIR / "OEBPS/fonts/"
       )

       base_file_name, file_extension = os.path.splitext(fontfile)

       if file_extension == ".otf":
           contentopf = addToContentopf(
                   contentopf,
                   Path("fonts") / fontfile,
                   "fontfile" + str(otf_counter),
                   file_extension[1:]
           )
           otf_counter += 1
       elif file_extension == ".ttf":
           contentopf = addToContentopf(
                   contentopf,
                   Path("fonts") / fontfile,
                   "fontfile" + str(otf_counter),
                   "ttf"
           )
           otf_counter += 1
       elif file_extension == ".txt":
           contentopf = addToContentopf(
                   contentopf,
                   Path("fonts") / fontfile,
                   "font-txt" + str(txt_counter),
                   file_extension[1:]
           )
           txt_counter += 1
       elif file_extension == "":
           contentopf = addToContentopf(
                   contentopf,
                   Path("fonts") / fontfile,
                   "font-txt" + str(txt_counter),
                   "bitstream"
           )
           txt_counter += 1
       else:
           logging.error(f"Other file found with extension '{file_extension}'. Exiting")
           sys.exit()

    return otf_counter, txt_counter
# def add_fonts_to_opf ends here


def create_extra_font_css(css_selector, extrafontfiles):
    """Create CSS code for extra font"""

    fontfiles = [x for x in extrafontfiles if x.endswith(".otf") or x.endswith(".ttf")]

    if len(fontfiles) > 1:
        logging.error("Found more than one font file in the list. Exiting")
        sys.exit()
    else:
        fontfile = fontfiles[0]

    font_object = ImageFont.truetype(fontfile)

    font_label = font_object.getname()[0]

    css_code = f""".{css_selector} {{
  font-family: '{font_label}';
}}

@font-face {{
  font-family: "{font_label}";
  src: url("fonts/{fontfile}");
}}
"""

    return css_code
# def create_extra_font_css ends here

def add_css_snippet(css_snippet, css_file):
    """Add extra font CSS to epub CSS file"""

    with open(css_file, "a") as filehandler:
        filehandler.write(f"\n{css_snippet}")

    logging.info("Added extra code to css file")
# def create_extra_font_css ends here

##############################################################
#                  Create .epub basic structure              #
##############################################################

if not os.path.exists( OUTPUT_DIR / "META-INF" ):
    os.mkdir( OUTPUT_DIR / "META-INF" )
if not os.path.exists( OUTPUT_DIR / "OEBPS" ):
    os.mkdir( OUTPUT_DIR / "OEBPS" )
if not os.path.exists( OUTPUT_DIR / "OEBPS" / "images" ):
    os.mkdir( OUTPUT_DIR / "OEBPS" / "images" )
if not os.path.exists( OUTPUT_DIR / "OEBPS" / "fonts"  ):
    os.mkdir( OUTPUT_DIR / "OEBPS" / "fonts" )

# Copy containter.xml and mimetype
shutil.copy(EPUB_FILES / "epubcontainer.xml", OUTPUT_DIR / "META-INF/container.xml")
shutil.copy(EPUB_FILES / "epubmimetype", OUTPUT_DIR / "mimetype")

# Preparing content.opf
xmlContentopfParser =  etree.XMLParser(no_network=False,load_dtd=False)
contentopf = etree.parse( str(EPUB_FILES/"epubcontentopf.xml"), xmlContentopfParser)

# This list includes all files which have already been included to avoid duplicates
listContentopf = []

#########
# Fonts #
#########
libertine_fonts = ["GPL.txt", "LICENCE.txt", "LinLibertine_R.otf", "LinLibertine_RI.otf", "LinLibertine_RZ.otf", "LinLibertine_RZI.otf", "OFL-1.1.txt"]
termes_fonts = ["texgyretermes-bold.otf", "texgyretermes-bolditalic.otf", "texgyretermes-italic.otf", "texgyretermes-regular.otf"]

font_files = termes_fonts
mainfont_string = "TeXGyreTermes"
css_rules = EPUB_FILES / "fontfacetermes.css"

if args.font == "termes":
    logging.info("Using default font TeX Gyre Termes.")
elif args.font == "libertine":
    font_files = libertine_fonts
    mainfont_string = "Linux Libertine O"
    css_rules = EPUB_FILES / "fontfacelibertine.css"
else:
    logging.info("Font not recognized, falling back to default (TeX Gyre Termes).")

with open(EPUB_FILES / "eoa-epub-core.css", "r") as template_file:
    css_template = template_file.read()

with open(css_rules, "r") as filehandler:
    fontface_string = filehandler.read()

css_template_string = string.Template(css_template)

css_replacement = css_template_string.substitute(
    MAINFONT=mainfont_string,
    FONTFACESPEC=fontface_string)

final_epub_css = OUTPUT_DIR / "OEBPS/eoa-epub.css"

with open(final_epub_css, "w") as write_css:
    write_css.write(css_replacement)

otf_id_counter = 1
txt_id_counter = 1

otf_id_counter, txt_id_counter = add_fonts_to_opf(font_files, contentopf, otf_id_counter, txt_id_counter)
if args.extra_font_files_directory:
    extra_fonts = os.listdir(args.extra_font_files_directory)
    otf_id_counter, txt_id_counter = add_fonts_to_opf(extra_fonts, contentopf, otf_id_counter, txt_id_counter, fontdir=args.extra_font_files_directory)

if args.extra_font_selector:
    css_selector = args.extra_font_selector
    css_snippet = create_extra_font_css(css_selector, extra_fonts)
    add_css_snippet(css_snippet, OUTPUT_DIR / "OEBPS/eoa-epub.css")

# Shortcut for namespace
htmlns = "{http://www.w3.org/1999/xhtml}"

# Load Template for Chapter HTML
xmlChapterParser = etree.XMLParser(no_network=False,load_dtd=False) #resolve_entities=False

# Preparing toc.ncx
xmlTocncxParser =  etree.XMLParser(no_network=False,load_dtd=False)
tocncx = etree.parse(str(EPUB_FILES / "epubtocncx.xml"), xmlTocncxParser)

logging.info(f"{logseparator}Preparing content.opf")
xmlMetadata = contentopf.find(".//{http://www.idpf.org/2007/opf}metadata")
# Prepare Metadata based on Publication.cfg
cfgPublication = configparser.RawConfigParser()
logging.debug(f"Reading publication.cfg from {OUTPUT_DIR / 'publication.cfg'}.")
cfgPublication.read( OUTPUT_DIR / "publication.cfg")
publication_series = cfgPublication.get("Technical", "Serie")
publication_number = cfgPublication.get("Technical", "Number")
publication_license = cfgPublication.get("Technical", "License")
publication_landingpage = cfgPublication.get("Technical", "LandingPage")
try:
    publication_isbn = cfgPublication.get("Technical", "ISBN-epub")
except:
    publication_isbn = cfgPublication.get("Technical", "ISBN")

# Prepare Author String
strAuthorString = cfgPublication.get("Authors", "Author1")
if cfgPublication.get("Authors", "Author2") != "":
    strAuthorString = cfgPublication.get("Authors", "Author1") + " and " + cfgPublication.get("Authors", "Author2")
if cfgPublication.get("Authors", "Author3") != "":
    strAuthorString = cfgPublication.get("Authors", "Author1") + ", " + cfgPublication.get("Authors", "Author2") + " and " + cfgPublication.get("Authors", "Author3")
if cfgPublication.get("Authors", "Author4") != "":
    strAuthorString = cfgPublication.get("Authors", "Author1") + ", " + cfgPublication.get("Authors", "Author2") + ", " + cfgPublication.get("Authors", "Author3") + " and " + cfgPublication.get("Authors", "Author4")
xmlAuthor = etree.Element("{http://purl.org/dc/elements/1.1/}creator")
xmlAuthor.text = strAuthorString
xmlMetadata.append(xmlAuthor)
# Prepare Title-String
strTitleString = cfgPublication.get("Technical", "Title")
xmlTitle = etree.Element("{http://purl.org/dc/elements/1.1/}title")
xmlTitle.text = strTitleString
xmlMetadata.append(xmlTitle)
# Prepare Description via Subtitle
strSubtitleString = cfgPublication.get("Technical", "Subtitle")
if strSubtitleString != "":
    xmlSubtitle = etree.Element("{http://purl.org/dc/elements/1.1/}description")
    xmlSubtitle.text = strSubtitleString
    xmlMetadata.append(xmlSubtitle)
# Prepare Identifier
strIdentifier = publication_isbn
xmlIdentifier = etree.Element("{http://purl.org/dc/elements/1.1/}identifier")
xmlIdentifier.text = strIdentifier
xmlIdentifier.set("id", "BookId")
xmlMetadata.append(xmlIdentifier)
# Prepare Type
xmlType = etree.Element("{http://purl.org/dc/elements/1.1/}type")
xmlType.text = "Text"
xmlMetadata.append(xmlType)
#Prepare Date
strPublicationDate = cfgPublication.get("Technical", "PublicationDate")
xmlDate = etree.Element("{http://purl.org/dc/elements/1.1/}date")
xmlDate.text = strPublicationDate
xmlDate.set("{http://www.idpf.org/2007/opf}event", "creation")
xmlMetadata.append(xmlDate)
# Prepare Publisher
xmlPublisher = etree.Element("{http://purl.org/dc/elements/1.1/}publisher")
xmlPublisher.text = "Edition Open Access"
xmlMetadata.append(xmlPublisher)
# Prepare Rights
xmlPublisher = etree.Element("{http://purl.org/dc/elements/1.1/}rights")
xmlPublisher.text = "Published under Creative Commons by-nc-sa 3.0 Germany Licence"
xmlMetadata.append(xmlPublisher)
# Prepare Source
xmlSource = etree.Element("{http://purl.org/dc/elements/1.1/}source")
xmlSource.text = "Max Planck Research Library for the History and Development of Knowledge"
xmlMetadata.append(xmlSource)
# Prepare Subject
strSubject = cfgPublication.get("General", "Keyword1")
xmlSubject = etree.Element("{http://purl.org/dc/elements/1.1/}subject")
xmlSubject.text = strSubject
xmlMetadata.append(xmlSubject)
# Prepare Language
strLanguage = cfgPublication.get("Technical", "Language")
xmlLanguage = etree.Element("{http://purl.org/dc/elements/1.1/}language")
xmlLanguage.text = strLanguage
xmlMetadata.append(xmlLanguage)
#Prepare Cover
xmlCover = etree.Element("meta")
xmlCover.set("content", "cover_pic")
xmlCover.set("name", "cover")
xmlMetadata.append(xmlCover)
xmlManifest = contentopf.find(".//{http://www.idpf.org/2007/opf}manifest")
xmlItem = etree.Element("item")
xmlItem.set("id", "cover_pic")
xmlItem.set("href", "images/cover.jpg")
xmlItem.set("media-type", "image/jpeg")
xmlManifest.append(xmlItem)
shutil.copy(
        OUTPUT_DIR / "cover.jpg",
        OUTPUT_DIR / "OEBPS/images/"
)
xmlItem = etree.Element("item")
xmlItem.set("id", "cover")
xmlItem.set("href", "cover.xhtml")
xmlItem.set("media-type", "application/xhtml+xml")
xmlManifest.append(xmlItem)
shutil.copy(EPUB_FILES / "epubcover.xhtml", OUTPUT_DIR / "OEBPS/cover.xhtml")
logging.info(f"{logseparator}Preparing intro.xhtml")
if publication_series == "Sources":
    tmpFilePath = EPUB_FILES / "epubintro-sources.xhtml"
else:
    tmpFilePath = EPUB_FILES / "epubintro.xhtml"
tmpFile = open(tmpFilePath, "r")
strIntroHTML = tmpFile.read()
tmpFile.close()
strIntroHTML = re.sub("author", strAuthorString, strIntroHTML)
strIntroHTML = re.sub("TITLE", strTitleString, strIntroHTML)
strIntroHTML = re.sub("year", cfgPublication.get("Technical", "PublicationYear"), strIntroHTML)
strIntroHTML = re.sub("series", publication_series, strIntroHTML)
strIntroHTML = re.sub("number", publication_number, strIntroHTML)
strIntroHTML = re.sub("epubisbn", publication_isbn, strIntroHTML)

if publication_license == "by-nc-sa":
    license_string = """Published under Creative Commons by-nc-sa 3.0 Germany Licence<br />
https://creativecommons.org/licenses/by-nc-sa/3.0/de/<br />"""
elif publication_license == "by-sa":
    license_string = """Published under Creative Commons Attribution-ShareAlike 4.0 International Licence<br />
https://creativecommons.org/licenses/by-sa/4.0/<br />"""
else:
    logging.error("No license found. Exiting")
    sys.exit( 1 )
strIntroHTML = re.sub("LicenseInformation", license_string, strIntroHTML)

try:
    strIntroHTML = re.sub("AdditionalInformation", "<p>" + cfgPublication.get("General", "AdditionalInformation") + "</p>", strIntroHTML)
except configparser.NoOptionError:
    strIntroHTML = re.sub("AdditionalInformation", "", strIntroHTML)

tmpFilePath = OUTPUT_DIR / "OEBPS/intro.xhtml"
# tmpFilePath = os.getcwd() + "/CONVERT/epub/OEBPS/intro.xhtml"
tmpFile = open(tmpFilePath, "w")
tmpFile.write(strIntroHTML)
logging.info(f"{logseparator}Preparing toc.ncx")
xmlHead = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}head")
xmlMeta = etree.Element("meta")
xmlMeta.set("name", "dtb:uid")
xmlMeta.set("content", publication_isbn)
xmlHead.append(xmlMeta)
xmlTitle = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}docTitle")
xmlText = etree.Element("text")
xmlText.text = strTitleString
xmlTitle.append(xmlText)
xmlAuthor = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}docAuthor")
xmlText = etree.Element("text")
xmlText.text = strAuthorString
xmlAuthor.append(xmlText)

##############################################################
#                  Convert Tralics-XML to Epub               #
##############################################################

#xmlTree = remove_processinginstruction(xmlTree, 'hyperimage')


# Copy xmlTree to xmlEbookTree
xmlEbookTree = deepcopy(xmlTree)
# xmlChapters is a list containing all chapters
xmlChapters = xmlEbookTree.findall("//div1")

# Convert Chapters, Sections, Subsections and Subsubsections to h1, h2, h3, h4
# Insert Number from Dictionary where needed

logging.info(f"{logseparator}Convert EOAChapter to H1")
for xmlChapter in xmlChapters:
    xmlChapter.find("head").tag = "h1"
    if xmlChapter.get("rend") != "nonumber":
        idChapter = xmlChapter.get("id")
        # logging.info(idChapter + " konvertierung into h1")
        # logging.info(dictChapters[idChapter])
        strHeadline = xmlChapter.find("h1").text or ""
        xmlChapter.find("h1").text = str(dictChapters[idChapter]) + ". " + strHeadline
    if xmlChapter.find(".//EOAauthor") is not None:
        tmpXML = etree.Element("p")
        tmpXML.append(etree.Element("i"))
        tmpXML[0].text = xmlChapter.find(".//EOAauthor").text
        xmlChapter.insert(1, tmpXML)
        # Remove unwanted EOAauthor here
        xmlChapter.find(".//EOAauthor").text = ""
        xmlChapter = etree.strip_tags(xmlChapter, "EOAauthor")

# logging.info(dictSections)

logging.info(f"{logseparator}Convert EOAsection to H2")
xmlSections = xmlEbookTree.findall(".//div2")
for xmlSection in xmlSections:
    xmlSection.find("head").tag = "h2"
    idSection = xmlSection.get("id")
    strHeadline = xmlSection.find("h2").text or ""
    logging.info(strHeadline)
    if xmlSection.get("rend") != "nonumber":
        xmlSection.find("h2").text = str(dictSections[idSection]) + " " + strHeadline
    else:
        xmlSection.find("h2").text = strHeadline

logging.info(f"{logseparator}Convert EOAsubsection to H3")
xmlSubsections = xmlEbookTree.findall(".//div3")
for xmlSubsection in xmlSubsections:
    xmlSubsection.find("head").tag = "h3"
    idSection = xmlSubsection.get("id")
    strHeadline = xmlSubsection.find("h3").text or ""
    logging.info(strHeadline)
    if xmlSubsection.get("rend") != "nonumber":
        xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline
    else:
        xmlSubsection.find("h3").text = strHeadline

logging.info(f"{logseparator}Convert EOAsubsubsection to H4")
xmlSubsubsections = xmlEbookTree.findall(".//div4")
for xmlSubsubsection in xmlSubsubsections:
    xmlSubsubsection.find("head").tag = "h4"
    #if xmlSubsubsection.get("rend") != "nonumber":
        #idSection = xmlSubsection.get("id")
        #strHeadline = xmlSubsection.find("h4").text
        #xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline

logging.info(f"{logseparator}Convert EOAparagraph to H5")
xmlParagraphs = xmlEbookTree.findall(".//div5")
for xmlParagraph in xmlParagraphs:
    logging.info("Found a paragraph.")
    xmlParagraph.find("head").tag = "h5"

logging.info(f"{logseparator}Dealing with dividing milestone")
xmlParagraphs = xmlEbookTree.findall(".//p[@class='divider']")
for xmlParagraph in xmlParagraphs:
    logging.info("Found a divider.")
    xmlParagraph.text = "*"

logging.info(f"{logseparator}Preparing Figures")
xmlFigures = xmlEbookTree.xpath(".//EOAfigure[not(contains(@type,'hionly'))]")
logging.info("Found %s figures", len(xmlFigures))
for xmlFigure in xmlFigures:
    # Copy File of the Image
    # If it's in a subfolder, name of folder and name of image will be merged
    strImageFileString = xmlFigure.find(".//file").text
    logging.debug(f"Working on image {strImageFileString}.")
    strImageFileString = strImageFileString.rstrip("\n")
    strImageFileDir = os.path.dirname(strImageFileString)
    # Remove / from path
    strImageFileDir = re.sub("/", "", strImageFileDir)
    strImageFileName = os.path.basename(strImageFileString)
    strImageFileNamewoSuffix, strImageFileName_Suffix = os.path.splitext(strImageFileName)

    if strImageFileName_Suffix == ".jpeg":
        strImageFileName = strImageFileName.replace(".jpeg", ".jpg")

    shutil.copy(
            PUBLICATION_DIR / strImageFileString,
            OUTPUT_DIR / "OEBPS/images" / (strImageFileDir + strImageFileName)
    )

    extension_and_mime = get_mimetype(strImageFileName_Suffix)

    strImageFilepath = libeoaconvert.sanitizeImage(
            OUTPUT_DIR / "OEBPS/images" / (strImageFileDir + strImageFileName),
            TEMP_DIR,
            GM_PATH,
            PDFCROP_EXEC,
    )
    # strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC)
    # Add copied file to contentopf
    content_opf_filename = Path ("images") / "{}{}.{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime)
    content_opf_fileid = "{}{}{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime)

    contentopf = addToContentopf(
            contentopf,
            content_opf_filename,
            content_opf_fileid,
            extension_and_mime
    )
    '''
    content_opf_filename = "images" + os.path.sep + "{}{}.{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime)
    content_opf_fileid = "{}{}{}".format(strImageFileDir, strImageFileNamewoSuffix, extension_and_mime)

    contentopf = addToContentopf(contentopf, content_opf_filename, content_opf_fileid, extension_and_mime)
    '''

    idFigure = xmlFigure.find(".//anchor").get("id")
    xmlFigureCaption = xmlFigure.find(".//caption")
    intFigureNumber = dictFigures[idFigure]
    if xmlFigure.tag == "EOAfigure":
        strImageWidth = xmlFigure.find(".//width").text
        strImageWidth = strImageWidth.rstrip("\n")
    if xmlFigure.tag == "EOAlsfigure":
        strImageWidth = "100"
    xmlFigure.clear()
    xmlFigure.tag = "p"
    xmlFigure.set("class", "centered_image")
    xmlFigureImage = etree.Element("img")
    xmlFigureImage.set("src", "images/" + strImageFileDir + strImageFileNamewoSuffix + "." + extension_and_mime)
    xmlFigureImage.set("alt", "")
    xmlFigureImage.set("style", "width: " + strImageWidth + "%")
    xmlFigure.append(xmlFigureImage)

    xmlFigureCaption.tag = "p"
    strFigureCaption = xmlFigureCaption.text or ""
    # FIX
    dictLangFigures = translation_xml.find("//entry[@name='fig']").attrib
    xmlChapter = xmlFigure.xpath("./ancestor::div1")[0]
    figures_text = dictLangFigures[libeoaconvert.two_letter_language(xmlChapter.get("language"))]
    if len(strFigureCaption) == 0:
        xmlFigureCaption.text = f"{figures_text} {str(intFigureNumber)}"
    else:
        xmlFigureCaption.text = f"{figures_text} {str(intFigureNumber)}: {strFigureCaption}"
    xmlFigure.addnext(xmlFigureCaption)
    # Change the tag of the parent <p>-Tag to <div> so that it may be removed
    #xmlFigure.getparent().tag = "div"

xml_figures_hyperimage = xmlEbookTree.xpath(".//EOAfigure[contains(@type,'hionly')]")
logging.info("Found %s hyperimage figures", len(xml_figures_hyperimage))
for fig in xml_figures_hyperimage:
    fig.clear()
    fig.tag = "EOAhifigure"

logging.info(f"{logseparator}Preparing not numbered Figures")
xmlFigures = xmlEbookTree.findall(".//EOAfigurenonumber")
for xmlFigure in xmlFigures:
    # Copy File of the Image
    # If it's in a subfolder, name of folder and name of image will be merged
    strImageFileString = xmlFigure.find(".//file").text
    strImageFileString = strImageFileString.rstrip("\n")
    strImageFileDir = os.path.dirname(strImageFileString)
    strImageFileDir = re.sub("/", "", strImageFileDir)
    strImageFileName = os.path.basename(strImageFileString)
    strImageFileNamewoSuffix, strImageFileName_Suffix = os.path.splitext(strImageFileName)

    if strImageFileName_Suffix == ".jpeg":
        strImageFileName = strImageFileName.replace(".jpeg", ".jpg")

    shutil.copy(
            PUBLICATION_DIR / strImageFileString,
            OUTPUT_DIR / "OEBPS/images" / (strImageFileDir + strImageFileName)
        )
    strImageFilepath = libeoaconvert.sanitizeImage(
            OUTPUT_DIR / "OEBPS/images" / (strImageFileDir + strImageFileName),
            TEMP_DIR,
            GM_PATH,
            PDFCROP_EXEC
    )
    # shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName)
    # strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC)
    # Add copied file to contentopf

    extension_and_mime = get_mimetype(strImageFileName_Suffix)
    contentopf = addToContentopf(
            contentopf,
            "images/" + strImageFileDir + strImageFileNamewoSuffix + "." + extension_and_mime,
            strImageFileDir + strImageFileNamewoSuffix + "-nonumber-" + extension_and_mime,
            extension_and_mime
    )
    # contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", strImageFileDir + strImageFileNamewoSuffix + "-nonumber-jpg", "jpg")
    logging.debug("Added a nonumber figure")
    strImageWidth = xmlFigure.find(".//width").text
    strImageWidth = strImageWidth.rstrip("\n")
    xmlFigure.clear()
    xmlFigure.tag = "p"
    xmlFigureImage = etree.Element("img")
    xmlFigureImage.set("src", "images/" + strImageFileDir + strImageFileNamewoSuffix + "." + extension_and_mime)
    xmlFigureImage.set("alt", "")
    xmlFigureImage.set("style", "width: " + strImageWidth + "%")
    xmlFigure.append(xmlFigureImage)

logging.info(f"{logseparator}Preparing Footnotes")
def alph_footnote_index(fndex):
    """
    lowercase Latin footnotes need to support more than 26 values
    These are zero-indexed.

    >>> alph_footnote_index(0)
    'a'
    >>> alph_footnote_index(1)
    'b'
    >>> alph_footnote_index(24)
    'y'
    >>> alph_footnote_index(25)
    'z'
    >>> alph_footnote_index(26)
    'aa'
    >>> alph_footnote_index(27)
    'ab'
    """
    alphabet = "abcdefghijklmnopqrstuvwxyz"
    quotient, remainder = divmod(fndex, len(alphabet))
    if not quotient: return alphabet[fndex]
    return alph_footnote_index(quotient - 1) + alph_footnote_index(remainder)
# def alph_footnote_index ends here

def replace_footnote_equations(footnote):
    """
    captures reusable behavior from the existing code
    potentially, some of the old code could be replaced by calls to this helper

    usage: contentopf = replace_footnote_equations(my_footnote)
    unfortunately, returning the result seemed like a better idea than mutating the global variable
    """
    result = contentopf
    for equation in footnote.findall(".//EOAequationnonumber"):
        filename = equation.get("filename")
        equation.clear()
        equation.tag = "p"
        img = etree.Element("img", src="images/%s" % filename, alt="")
        equation.append(img)
        shutil.copy(
                INPUT_DIR / "items" / filename,
                OUTPUT_DIR / "DEBPS/images" / filename
        )
        result = addToContentopf(
                result,
                "images/" + filename,
                filename,
                "png"
        )
        '''
        cwd = os.getcwd()
        shutil.copy("%s/items/%s" % (cwd, filename), "%s/CONVERT/epub/DEBPS/images/%s" % (cwd, filename))
        result = addToContentopf(result, "images/" + filename, filename, "png")
        '''

    logging.info("einmal durch replace_footnote_equations")

    return result
# def replace_footnote_equations ends here

def replace_footnote_with_sup(note):
    """
    captures reusable behavior from the existing code
    potentially, some of the old code could be replaced by calls to this helper

    this behavior showed up in a few places
    I thought I would be able to extract a little more, but this was all that was actually common
    """

    tail = note.tail
    note.clear()
    note.tail = tail
    note.tag = "sup"
# def replace_footnote_with_sup ends here

def bring_footnote_down_epub(footnote, footnote_name, destination):
    """
    captures reusable behavior from the existing code
    potentially, some of the old code could be replaced by calls to this helper

    usage: contentopf = bring_footnote_down_epub(my_footnote, "1", xmlNewFootnotes)
    unfortunately, returning the result seemed like a better idea than mutating the global variable
    """

    contentopf = replace_footnote_equations(footnote) # see usage note
    kids = list(footnote.getchildren())
    prefix = "[%s]" % footnote_name

    # we would like to prepend this footnote identifier to the footnote element
    if footnote.text is not None:
        # if the element starts with some text anyway, prepend it there
        # footnote.text = "%s %s" % (prefix, footnote.text)
        pass
    else:
        # if, however, the element begins with a child, prepend the text at the beginning of the first child instead
        if len(kids):
            first_child = kids[0]
            # child_text = prefix
            child_text = ""
            # separate them with a space, unless the child had no text to begin with
            child_suffix = first_child.text
            if child_suffix is None:
                child_suffix = ""
            else:
                child_text += " "
            child_text += child_suffix
            first_child.text = child_text
        else:
            # a totally empty footnote is weird, but who am I to judge?
            footnote.text = prefix

    footnote_text = footnote.text or ""

    replace_footnote_with_sup(footnote)
    footnote.text = "["
    note_link = etree.SubElement(footnote, "a")
    note_link.set("href", "#fn" + footnote_name)
    note_link.set("id", "body_fn-ref" + footnote_name)
    note_link.text = "%s" % footnote_name
    note_link.tail = "]"

    # append any text the footnote used to have to the destination
    destkids = list(destination.getchildren())
    if len(destkids):
        # if the destination has children, append after the last one's tail
        last_kid = destkids[-1]
        prefix = last_kid.tail
        if prefix is None:
            prefix = ""
        else:
            prefix += " "
        last_kid.tail = prefix + footnote_text
    else:
        # if the destination has no children, append to its text
        prefix = destination.text
        if prefix is None:
            prefix = ""
        else:
            prefix += " "
        destination.text = prefix + footnote_text
    for kid in kids:
        destination.append(kid)
    return contentopf
# def bring_footnote_down_epub ends here

class FootnoteError(Exception):
    """
    we only support one type of footnote per chapter
    don't try to mix-and-match
    """
    pass
# class FootnoteError ends here

intTechnicalChapterNumber = 1

for xmlChapter in xmlChapters:
    groupings = libeoaconvert.get_bigfoot_data(xmlChapter)
    xmlFootnotes = xmlChapter.xpath(".//note[not(ancestor::opener)]")
    logging.info(f"Found {str(len(xmlFootnotes))} footnotes in this chapter.")
    has_old = 0 != len(xmlFootnotes)
    has_new = 0 != len(
        [ # flatten the association list whose values are lists, so we can take the length
            note
            for grouping, notes in groupings
            for note in notes
        ]
    )

    # the XOR case falls through, the AND is an error, and the NOR skips to the next chapter
    if has_old:
        if has_new:
            raise FootnoteError("Chapter %s contains both \\EOAfn and footnotes in the style of \\EOAfnalph" % xmlChapter.get("id-text"))
    else:
        if not has_new:
            continue
    xmlNewFootnotes = etree.Element("div")
    xmlNewFootnotesHeader = etree.Element("h3")

    dictLangFootnotes = translation_xml.find("//entry[@name='footnotes']").attrib
    xmlNewFootnotesHeader.text = dictLangFootnotes[libeoaconvert.two_letter_language(xmlChapter.get("language"))]
    xmlNewFootnotes.append(xmlNewFootnotesHeader)

    for grouping, notes in groupings:
        # do for the new-style footnotes what was being done for the old
        for index, note in enumerate(notes):
            footnote_name = str(index + 1)
            if "lower-latin" == grouping:
                footnote_name = alph_footnote_index(index)
            para = etree.Element("p")
            para.text = "["
            note_link = etree.SubElement(para, "a")
            note_link.set("id", "fn" + footnote_name)
            note_link.set("href", "#body_fn-ref" + footnote_name)
            note_link.text = "%s" % footnote_name
            note_link.tail = "]"

            contentopf = bring_footnote_down_epub(note, footnote_name, para)
            xmlNewFootnotes.append(para)

    tmpFileName = "chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"

    intFootnoteNumber = 1
    for xmlFootnote in xmlFootnotes:
        # Not numbered Equations may appear in a footnote, need to be treated differently
        xmlEquationsnonumber = xmlFootnote.findall(".//EOAequationnonumber")
        for xmlEquationnonumber in xmlEquationsnonumber:
            strFilename = xmlEquationnonumber.get("filename")
            xmlEquationnonumber.clear()
            xmlEquationnonumber.tag = "p"
            xmlIMG = etree.Element("img", src="images/"+ strFilename, alt="")
            xmlEquationnonumber.append(xmlIMG)
            shutil.copy(
                    INPUT_DIR / "items" / strFilename,
                    OUTPUT_DIR / "OEBPS/images" / strFilename
            )
            # shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename)
            contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png")

        tmp_fnstring = "fn" + str(intFootnoteNumber)
        tmp_fnrefstring = "body_fn-ref" + str(intFootnoteNumber)

        xmlFirstChild = xmlFootnote.getchildren()[0]

        # this is for the reference text
        if xmlFirstChild.text is None:
            xmlNewFootnoteRefBottom = etree.SubElement(xmlFirstChild, "a", href = "#" + tmp_fnrefstring, id = tmp_fnstring)
            xmlFirstChild.text = "["
            xmlNewFootnoteRefBottom.text = str(intFootnoteNumber)
            xmlNewFootnoteRefBottom.tail = "] "
            xmlFirstChild.insert(0, xmlNewFootnoteRefBottom)
        else:
            xmlNewFootnoteRefBottom = etree.Element("a", href = "#" + tmp_fnrefstring, id = tmp_fnstring)
            xmlNewFootnoteRefBottom.text = str(intFootnoteNumber)

            beginning_of_footnote = xmlFirstChild.text
            xmlFirstChild.text = "["
            xmlNewFootnoteRefBottom.tail = "] " + beginning_of_footnote
            xmlFirstChild.insert(0, xmlNewFootnoteRefBottom)

        #Preserve tail and children of current <note>-Tag
        xmlFootnoteContentsTail = xmlFootnote.tail
        xmlFootnoteChildren = xmlFootnote.getchildren()

        # Substitute current <note> with Number
        xmlFootnote.clear()
        xmlFootnote.tag = "sup"
        xmlFootnote.text = "["
        xmlFootnote.tail =  xmlFootnoteContentsTail
        xmlNewFootnoteRef = etree.SubElement(xmlFootnote, "a", href = "#" + tmp_fnstring, id = tmp_fnrefstring)
        xmlNewFootnoteRef.text = str(intFootnoteNumber)
        xmlNewFootnoteRef.tail = "]"

        if len(xmlFootnoteChildren) != 0:
            for xmlFootnoteChild in xmlFootnoteChildren:
                xmlNewFootnotes.append(xmlFootnoteChild)
        intFootnoteNumber += 1
    xmlChapter.append(xmlNewFootnotes)

    intTechnicalChapterNumber += 1

logging.info(f"{logseparator}Preparing Lists")
for xmlChapter in xmlChapters:
    xmlLists = xmlChapter.findall(".//list")
    for xmlList in xmlLists:
        if xmlList.get("type") == "description":
            continue
        if xmlList.get("type") == "ordered":
            xmlList.tag = "ol"
            xmlFirstItem = xmlList.find("..//item")
            firstitemnumber = xmlFirstItem.get("id-text")
            xmlList.set("start", firstitemnumber)
            firstitemlabel =  xmlFirstItem.get("label")
            xmlFirstItem.tag = "li"
            if firstitemlabel != f"({firstitemnumber})":
                xmlFirstItem.set("style","list-style:none;")
            xmlListItems = xmlList.findall(".//item")
            for xmlListItem in xmlListItems:
                xmlListItem.tag = "li"
                itemnumber = xmlListItem.get("id-text")
                itemlabel =  xmlListItem.get("label")
                if itemlabel != f"({itemnumber})":
                    xmlListItem.set("style","list-style:none;")
                    itemparagraph = xmlListItem.find("p")
                    paratext = itemparagraph.text
                    itemparagraph.text = f"{itemlabel} {paratext}"

        if xmlList.get("type") == "simple":
            xmlList.tag = "ul"
            xmlListItems = xmlList.findall(".//item")
            for xmlListItem in xmlListItems:
                xmlListItem.tag = "li"

logging.info(f"{logseparator}Preparing Descriptions")
for xmlChapter in xmlChapters:
    xmlDescriptions = xmlChapter.findall(".//list")
    for xmlDescription in xmlDescriptions:
        xmlDescription.tag = "dl"
        del xmlDescription.attrib["type"]
        for xmlChild in xmlDescription.iterchildren():
            if xmlChild.tag == "label":
                xmlChild.tag = "dt"
            if xmlChild.tag == "item":
                xmlChild.tag = "dd"
                del xmlChild.attrib["id"]
                if xmlChild.get("id-text"):
                    del xmlChild.attrib["id-text"]

logging.info(f"{logseparator}Preparing Blockquotes")
xmlParagraphs = xmlEbookTree.findall(".//p")
for xmlParagraph in xmlParagraphs:
    if xmlParagraph.get("rend") == "quoted":
        strParagraphText = xmlParagraph.text
        strParagraphTail = xmlParagraph.tail
        xmlParagraphChildren = xmlParagraph.getchildren()
        xmlParagraph.clear()
        xmlParagraph.tag = "blockquote"
        xmlNew = etree.Element("p")
        if strParagraphText is not None:
            xmlNew.text = strParagraphText
        if len(xmlParagraphChildren) != 0:
            for xmlParagraphChild in xmlParagraphChildren:
                xmlNew.append(xmlParagraphChild)
        if strParagraphTail is not None:
            xmlNew.tail = strParagraphTail
        xmlParagraph.append(xmlNew)

logging.info(f"{logseparator}Preparing Theorems")
for xmlChapter in xmlChapters:
    xmlTheorems = xmlChapter.findall(".//theorem")
    for xmlTheorem in xmlTheorems:
        xmlTheoremHead = xmlTheorem.find(".//head")
        strTheoremTitel = xmlTheorem.find(".//head").text
        strTheoremText = xmlTheorem.find(".//p").text
        xmlTheoremTextTail = xmlTheorem.find(".//p").tail
        strTheoremNumber = xmlTheorem.get("id-text")
        xmlTheorem.tag = "p"
        xmlTheoremHead.tag = "b"
        xmlTheoremHead.text = xmlTheoremHead.text + " " + strTheoremNumber
        del xmlTheorem.attrib["style"]
        del xmlTheorem.attrib["type"]
        del xmlTheorem.attrib["id-text"]
        del xmlTheorem.attrib["id"]
        etree.strip_tags(xmlTheorem, "p")

logging.info(f"{logseparator}Preparing Hyperlinks")
for xmlChapter in xmlChapters:
    xmlLanguage = xmlChapter.get("language")
    if xmlLanguage is not None:
        # KT changing this after separating the big script
        strLanguage = xmlLanguage #or "english"
    else:
        strLanguage = "english"

    xmlHyperlinks = xmlChapter.findall(".//xref")
    for xmlHyperlink in xmlHyperlinks:
        libeoaconvert.format_hyperlinks_django_epub(xmlHyperlink, strLanguage)

logging.info(f"{logseparator}Convert emphasized text")
for xmlChapter in xmlChapters:
    xmlItalics = xmlChapter.findall(".//hi")
    for xmlItalic in xmlItalics:
        if xmlItalic.get("rend") == "it":
            xmlItalic.tag = "em"
            del xmlItalic.attrib["rend"]

logging.info(f"{logseparator}Convert bold text")
for xmlChapter in xmlChapters:
    xmlBolds = xmlChapter.findall(".//hi")
    for xmlBold in xmlBolds:
        if xmlBold.get("rend") == "bold":
            xmlBold.tag = "b"
            del xmlBold.attrib["rend"]

logging.info(f"{logseparator}Convert EOAup to <sup>")
for xmlChapter in xmlChapters:
    xmlUps = xmlChapter.findall(".//EOAup")
    for xmlUp in xmlUps:
        xmlUp.tag = "sup"

logging.info(f"{logseparator}Convert EOAdown to <sub>")
for xmlChapter in xmlChapters:
    xmlDowns = xmlChapter.findall(".//EOAdown")
    for xmlDown in xmlDowns:
        xmlDown.tag = "sub"

logging.info(f"{logseparator}Convert EOAst to <span>")
for xmlChapter in xmlChapters:
    xmlStrikeouts = xmlChapter.findall(".//EOAst")
    for xmlStrikeout in xmlStrikeouts:
        xmlStrikeout.tag = "span"
        xmlStrikeout.set("style", "text-decoration: line-through;")

logging.info(f"{logseparator}Convert EOAls to something nice")
for xmlChapter in xmlChapters:
    xmlLetterspaceds = xmlChapter.findall(".//EOAls")
    for xmlLetterspaced in xmlLetterspaceds:
        xmlLetterspaced.tag = "span"
        xmlLetterspaced.set("style", "letter-spacing: 0.5em;")

logging.info(f"{logseparator}Convert EOAcaps to something nice")
for xmlChapter in xmlChapters:
    xmlLetterspaceds = xmlChapter.findall(".//EOAcaps")
    for xmlLetterspaced in xmlLetterspaceds:
        xmlLetterspaced.tag = "span"
        xmlLetterspaced.set("style", "font-variant:small-caps;")


logging.info(f"{logseparator}Convert EOAineq into appropriate IMG-Tags")
for xmlChapter in xmlChapters:
    xmlInlineEquations = xmlChapter.findall(".//EOAineq")
    for xmlInlineEquation in xmlInlineEquations:
        xmlInlineEquation.tag = "img"
        xmlInlineEquation.set("alt", xmlInlineEquation.get("TeX"))
        del xmlInlineEquation.attrib["TeX"]
        shutil.copy(
                INPUT_DIR / "items" / xmlInlineEquation.get("src"),
                OUTPUT_DIR / "OEBPS/images" / xmlInlineEquation.get("src")
        )
        xmlInlineEquation.set("src", "images/" + xmlInlineEquation.get("src"))
        contentopf = addToContentopf(contentopf, xmlInlineEquation.get("src"), xmlInlineEquation.get("src"), "png")

logging.info(f"{logseparator}Convert EOAchem into appropriate IMG-Tags")
for xmlChapter in xmlChapters:
    xml_inline_chems = xmlChapter.findall(".//EOAchem")
    for xml_inline_chem in xml_inline_chems:
        xml_inline_chem.tag = "img"
        xml_inline_chem.set("alt", xml_inline_chem.get("TeX"))
        del xml_inline_chem.attrib["TeX"]
        shutil.copy(
                INPUT_DIR / "items" / xml_inline_chem.get("src"),
                OUTPUT_DIR / "OEBPS/images" / xml_inline_chem.get("src")
        )
        xml_inline_chem.set("src", "images/" + xml_inline_chem.get("src"))
        contentopf = addToContentopf(contentopf, xml_inline_chem.get("src"), xml_inline_chem.get("src"), "png")

logging.info(f"{logseparator}Convert EOAinline into appropriate IMG-Tags")
for xmlChapter in xmlChapters:
    xmlInlineElements = xmlChapter.findall(".//EOAinline")
    for xmlInlineElement in xmlInlineElements:
        xmlInlineElement.tag = "img"
        xmlInlineElement.set("alt", "Too late")
        strInlineElementFilePath = xmlInlineElement.text
        # remove text from element. This is visible in epub (at least in calibre's e-book-viewer)
        # however, the text is taken as id in content.opf
        # set it to nil after the addToContentopf

        strInlineElementFileName = os.path.basename(strInlineElementFilePath)
        strInlineElementDirName = os.path.dirname(strInlineElementFilePath)
        strInlineElementSubDirName = os.path.dirname(strInlineElementFilePath).split(os.path.sep)[-1]
        strNewImagePath = OUTPUT_DIR / "OEBPS/images" / Path(strInlineElementSubDirName + strInlineElementFileName)
        # strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName
        # trouble when there are subdirectories in Image path!
        # some thing goes wrong here: <EOAinline>Images/png_300dpi/A.png</EOAinline>

        shutil.copy(
                PUBLICATION_DIR /  strInlineElementDirName / strInlineElementFileName,
                strNewImagePath
        )
        # shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, strNewImagePath)
        # strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName
        strCommand = f"{GM_PATH} convert {strNewImagePath} -resize 20x20 {strNewImagePath}"
        # strCommand = GM_PATH + " convert " + str(strNewImagePath) + " -resize 20x20 " + strNewImagePath
        listArguments = shlex.split(strCommand)
        subprocess.check_output(listArguments, shell=False)
        xmlInlineElement.set("src", "images/" + strInlineElementSubDirName + strInlineElementFileName)
        # contentopf, Filename, FileID, Mediatype
        # <item id="Troublemaker" media-type="image/png" href="images/inlineA.jpg"/>
        # Mediatype should not be hard coded!!!
        # base this on file extension
        extension = strInlineElementFileName.split(".")[-1]
        contentopf = addToContentopf(contentopf, "images/" + strInlineElementSubDirName + strInlineElementFileName, xmlInlineElement.text, extension)
        xmlInlineElement.text = ""

logging.info(f"{logseparator}Epigraphs")
for xmlChapter in xmlChapters:
    xml_epigraphs = xmlChapter.findall(".//epigraph")
    for epigraph in xml_epigraphs:
        epigraph.tag = "tagtobestripped"
        children = epigraph.getchildren()
        for child in children:
            child.set("class", "epigraph")


logging.info(f"{logseparator}Preparing Verses")
for xmlChapter in xmlChapters:
    xml_verses = xmlChapter.findall(".//EOAverse")
    for xml_verse in xml_verses:
        xml_verse_children = xml_verse.getchildren()
        for line in xml_verse_children[:-1]:
            linebreak = etree.Element("br")
            line.append(linebreak)
        etree.strip_tags(xml_verse, "p")
        xml_verse.tag = "p"
        class_attribute = xml_verse.get("class")
        if class_attribute is not None:
            class_attribute += " verse"
            xml_verse.set("class", class_attribute)
        else:
            xml_verse.set("class", "verse")


logging.info(f"{logseparator}Preparing Equations")
for xmlChapter in xmlChapters:
    xmlEquations = xmlChapter.findall(".//EOAequation")
    for xmlEquation in xmlEquations:
        strNumber = xmlEquation.get("number")
        strFilename = xmlEquation.get("filename")
        # Copy image of Equation
        shutil.copy(
                INPUT_DIR / "items" / strFilename,
                OUTPUT_DIR / "OEBPS/images" / strFilename
        )
        # shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename)
        contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png")
        # Find out Number of Equation to be appended in the last step
        strEquationNumber = xmlEquation.get("number")
        # Rework XML
        xmlEquation.clear()
        xmlEquation.tag = "p"
        xmlEquationImage = etree.Element("img")
        xmlEquationImage.set("src", "images/" + strFilename)
        xmlEquationImage.set("alt", "")
        xmlEquation.append(xmlEquationImage)
        xmlNew = etree.Element('p')
        xmlNew.text = "(" + strEquationNumber + ")"
        xmlEquation.addnext(xmlNew)
        # Parent tag of Equation should be <div> instead of <p>, so that it may be removed
        #xmlEquation.getparent().tag = "div"

for xmlChapter in xmlChapters:
    xmlEquations = xmlChapter.findall(".//EOAequationnonumber")
    for xmlEquation in xmlEquations:
        strFilename = xmlEquation.get("filename")
        # Copy image of Equation
        shutil.copy(
                INPUT_DIR / "items" / strFilename,
                OUTPUT_DIR / "OEBPS/images" / strFilename
        )
        # shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename)
        contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png")
        # Rework XML
        xmlEquation.clear()
        xmlEquation.tag = "p"
        xmlEquationImage = etree.Element("img")
        xmlEquationImage.set("src", "images/" + strFilename)
        xmlEquationImage.set("alt", "")
        xmlEquation.append(xmlEquationImage)
        # Parent tag of Equation should be <div> instead of <p>, so that it may be removed
        #xmlEquation.getparent().tag = "div"

# EOAequationarray not handled so far. However: my solution (renaming
# the div) just makes the element disappear, leaving only its children!
for xmlChapter in xmlChapters:
    xmlEquationarrays = xmlChapter.findall(".//EOAequationarray")
    for xmlEquationarray in xmlEquationarrays:
        xmlEquationarray.tag = "div"


logging.info(f"{logseparator}Preparing Letterheads")
for xmlChapter in xmlChapters:
    xmlLetterheads = xmlChapter.xpath(".//EOAletterhead")
    for xmlLetterhead in xmlLetterheads:
        xmlRecipient = xmlLetterhead.find(".//Recipient")
        xmlRecipient.tag = "p"
        xmlRecipient.getchildren()[0].tag = "em"
        xmlArchive = xmlLetterhead.find(".//Archive")
        xmlArchive.tag = "p"
        xmlArchive.getchildren()[0].tag = "em"
        xmlAdditional = xmlLetterhead.find(".//Additional")
        xmlAdditional.tag = "p"
        xmlAdditional.getchildren()[0].tag = "em"
        xmlPages = xmlLetterhead.find(".//Pages")
        xmlPages.tag = "p"
        xmlPages.getchildren()[0].tag = "em"
        xmlHR = etree.Element("hr")
        xmlHR2 = etree.Element("hr")
        xmlLetterhead.insert(0, xmlHR)
        xmlLetterhead.insert(5, xmlHR2)

logging.info(f"{logseparator}Preparing Transcriptions")
# TODO: May need rework concerning the right Column
for xmlChapter in xmlChapters:
    etree.strip_elements(xmlChapter, "Facsimilelink")
    xmlTranscriptions = xmlChapter.xpath(".//EOAtranscripted")
    for xmlTranscription in xmlTranscriptions:
        logging.info("Processing Transcription")
        #logging.info (etree.tostring(xmlTranscription))
        xmlTranscription.tag = "table"
        xmlHeader = xmlTranscription.find(".//EOAtranscriptedheader")
        xmlHeader.tag = "tr"
        xmlLeftHeader = xmlTranscription.find(".//Leftheader")
        # logging.info(xmlLeftHeader.text)
        xmlLeftHeader.tag = "td"
        xmlLeftHeader.set("style", "width: 50%")
        xmlRightHeader = xmlTranscription.find(".//Rightheader")
        xmlRightHeader.tag = "td"
        xmlTranscriptedtext = xmlTranscription.find(".//EOAtranscriptedtext")
        # change \n\n into </p><p> and pagebreak into </p><pagebreak><p> to create some valid markup
        strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode")
        #strTranscriptedtext = re.sub (r"\n\n\n\n", "</p><p>", str(strTranscriptedtext), re.MULTILINE)
        #strTranscriptedtext = re.sub (r"\n\n\n", "</p><p>", str(strTranscriptedtext), re.MULTILINE)
        #strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext))
        #strTranscriptedtext = re.sub (r"<pagebreak/>", "</p><pagebreak/><p>", strTranscriptedtext)
        xmlLeftColumn = etree.Element("td")
        xmlRightColumn = etree.Element("td")
        boolRightColumn = False
        xmlTemp = etree.XML(str(strTranscriptedtext))
        for xmlElement in xmlTemp.iterchildren():
            if xmlElement.tag == "pagebreak":
                boolRightColumn = True
                logging.info("Spaltenwechsel!")
                continue
            if boolRightColumn == False:
                xmlLeftColumn.append(xmlElement)
            if boolRightColumn == True:
                xmlRightColumn.append(xmlElement)
        xmlTranscriptedtext.clear()
        xmlTranscriptedtext.tag = "tr"
        xmlTranscriptedtext.set("valign", "top")
        xmlTranscriptedtext.append(xmlLeftColumn)
        xmlTranscriptedtext.append(xmlRightColumn)

# Remove <Facsimilelink>

logging.info(f"{logseparator}Preparing Tables")
intChapterNumber = 1
for xmlChapter in xmlChapters:
    xmlTables = xmlChapter.findall(".//EOAtable")
    for xmlTable in xmlTables:
        xmlRawTable = xmlTable.find(".//table")
        xml_table_id = xmlRawTable.get("id")
        strTableCaption = xmlTable.find(".//EOAtablecaption").text or ""
        # logging.info("Working on ", strTableCaption)
        if strTableCaption != "nonumber":
            intTableNumber = dictTables[xml_table_id]
            xmlTableCaption = etree.Element("p")
            xmlTableCaption.text = str(intTableNumber) + " " + strTableCaption
            if xmlTable.find(".//EOAtablecaption").getchildren() is not None:
                for xmlChild in xmlTable.find(".//EOAtablecaption").iterchildren():
                    xmlTableCaption.append(xmlChild)
            xmlRawTable.addnext(xmlTableCaption)
            xmlTable.find(".//EOAtablelabel").clear()
            xmlTable.remove(xmlTable.find(".//EOAtablelabel"))
        else:
            logging.info("Table has no caption")
        xmlTable.find(".//EOAtablecaption").clear()
        xmlTable.remove(xmlTable.find(".//EOAtablecaption"))
        # Analyze Width and Alignment of the Columns
        strColumnString = xmlTable.find(".//EOAtablecolumns").text
        strColumnString = re.sub(r"\|", "", strColumnString)
        xmlTable.remove(xmlTable.find(".//EOAtablecolumns"))
        reMatchObjects = re.findall(r'([L|R|C].*?[c|m]m)', strColumnString)
        intTableWidth = 0
        listColumnAlignments = [None]
        listColumnWidths = [None]
        intNumberOfColumns = 0
        for strColumnDefinition in reMatchObjects:
            strColumnDefinition = strColumnDefinition.rstrip("cm")
            strColumnAlignment = strColumnDefinition[0]
            if strColumnAlignment == "L":
                strColumnAlignment = "left"
            if strColumnAlignment == "C":
                strColumnAlignment = "center"
            if strColumnAlignment == "R":
                strColumnAlignment = "right"
            listColumnAlignments.append(strColumnAlignment)
            intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75)
            listColumnWidths.append(intColumnWidth)
            intTableWidth += intColumnWidth
            intNumberOfColumns += 1
        xmlRawTable.set("width", str(intTableWidth)+"px;")
        del xmlRawTable.attrib["rend"]
        del xmlRawTable.attrib["id-text"]
        del xmlRawTable.attrib["id"]
        try:
            del xmlRawTable.attrib["place"]
        except KeyError:
            pass
        # Figure out and deal with the Header
        xmlHeader = xmlRawTable.find(".//row/cell/tableheader")
        if xmlHeader is not None:
            xmlHeader.text = ""
            xmlHeader.getparent().text = xmlHeader.tail
            xmlHeader.getparent().remove(xmlHeader)
            xmlFirstRow = xmlRawTable.find(".//row")
            xmlFirstRow.tag = "tr"
            xmlFirstRowCells = xmlFirstRow.findall(".//cell")
            for xmlFirstRowCell in xmlFirstRowCells:
                xmlFirstRowCell.tag = "th"
        # Now Deal with the rest of the rows
        xmlTableRows = xmlRawTable.findall(".//row")
        for xmlTableRow in xmlTableRows:
            xmlTableCells = xmlTableRow.findall(".//cell")
            intCurrentColumn = 1
            logging.info(listColumnAlignments)
            for xmlTableCell in xmlTableCells:
                xmlTableCell.tag = "td"
                xmlTableCell.set("align",listColumnAlignments[intCurrentColumn])
                xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + "px;")
                # Deal with multicolumn
                if xmlTableCell.get("cols") is not None:
                    xmlTableCell.set("colspan", xmlTableCell.get("cols"))
                if intCurrentColumn > len(xmlTableCells):
                    intCurrentColumn = 1
                # Deal with multicolumn again, increase intCurrentColumn by the columns being spanned
                elif xmlTableCell.get("cols") is not None:
                    intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols"))
                    del xmlTableCell.attrib["cols"]
                else:
                    intCurrentColumn += 1

                # deal with multirow
                if xmlTableCell.get("rowspan") is not None:
                    cellchildren = xmlTableCell.getchildren()
                    for child in cellchildren:
                        if child.tag == "figure":
                            child.tag = "img"
                            imagepath = f"{child.get('file')}.{child.get('extension')}"
                            logging.debug(f"Source image: {PUBLICATION_DIR} {imagepath}")
                            strImageFileDir = os.path.dirname(imagepath)
                            strImageFileDir = re.sub("/", "", strImageFileDir)
                            strImageFileName = os.path.basename(imagepath)
                            logging.debug(f"Meant to be copied to {OUTPUT_DIR} /images/ {strImageFileDir}{strImageFileName}")
                            shutil.copy(
                                PUBLICATION_DIR / imagepath,
                                OUTPUT_DIR / "OEBPS" / "images" / (strImageFileDir + strImageFileName)
                            )
                            if child.get('extension') == "pdf":
                                strImageFilepath = libeoaconvert.sanitizeImage(
                                    OUTPUT_DIR / "OEBPS" / "images" / (strImageFileDir + strImageFileName),
                                    TEMP_DIR, GM_PATH, PDFCROP_EXEC
                                )
                                strImageFileName = strImageFileName.replace(".pdf", ".png")
                                strImageFileDir = f"images/{strImageFileDir + strImageFileName}"
                            child.set("src", strImageFileDir)

                            strImageFileNamewoSuffix, strImageFileName_Suffix = os.path.splitext(strImageFileName)
                            extension_and_mime = get_mimetype(strImageFileName_Suffix)
                            contentopf = addToContentopf(
                                contentopf,
                                strImageFileDir,# + strImageFileNamewoSuffix + "." + extension_and_mime,
                                # "images/" + strImageFileDir + strImageFileNamewoSuffix + "." + extension_and_mime,
                                strImageFileDir + strImageFileNamewoSuffix + "-nonumber-" + extension_and_mime,
                                extension_and_mime
                            )

                            child.set("width", f"{str(listColumnWidths[intCurrentColumn])}px")
                            del child.attrib["rend"]
                            del child.attrib["file"]
                            del child.attrib["extension"]

            xmlTableRow.tag = "tr"
            xmlTableRow.set("valign", "top")
        xmlTableParent = xmlTable.getparent()
        xmlTableParent.addnext(xmlTable)

        xtp = etree.tostring(xmlTableParent)
        # libeoaconvert.deb_var(xtp)

        xmlTableParent.getparent().remove(xmlTableParent)
    logging.info("Finished with that table.")
    intChapterNumber += 1

logging.info(f"{logseparator}Preparing Facsimiles")
xmlParts = xmlEbookTree.findall(".//div0")
for xmlPart in xmlParts:
    xmlFacsimiles = xmlPart.findall(".//EOAfacsimilepage")
    for xmlFacsimile in xmlFacsimiles:
        strImageFile = xmlFacsimile.find(".//file").text
        strFacsimileLabel = xmlFacsimile.find(".//label").text
        facsimile_pagenumber = xmlFacsimile.find(".//pagenumber").text or ""
        etree.strip_elements(xmlFacsimile, "file")
        etree.strip_elements(xmlFacsimile, "label")
        # TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren
        strImageFile = strImageFile.rstrip("\n")
        strImageFileDir = os.path.dirname(strImageFile)
        strImageFileDir = re.sub("/", "", strImageFileDir)
        strImageFileName = os.path.basename(strImageFile)
        # hier
        shutil.copy(
                PUBLICATION_DIR / strImageFile,
                OUTPUT_DIR / "OEBPS/images" / (strImageFileDir + strImageFileName)
        )
        # shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName)

        strImageFilepath = libeoaconvert.sanitizeImage(
                OUTPUT_DIR / "OEBPS/images" / (strImageFileDir + strImageFileName),
                TEMP_DIR,
                GM_PATH,
                PDFCROP_EXEC
        )
        # strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName, GM_PATH, PDFCROP_EXEC)

        # Add copied file to contentopf
        img_base_file_name, img_file_extension = os.path.splitext(strImageFileName)

        contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileName, strImageFileDir + strImageFileName, img_file_extension[1:])

        # strSVGTemplate = """<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100%" height="100%" viewBox="0 0 573 800" preserveAspectRatio="xMidYMid meet"></svg>"""
        # xmlSVGFacsimile = etree.fromstring(strSVGTemplate)
        # xmlNew = etree.Element('image')
        # xmlNew.set("width", "600px;")
        # xmlNew.set("height", "800px;")
        # xmlNew.set("{http://www.w3.org/1999/xlink}href", "images/" + strImageFileDir + strImageFileName)
        # xmlSVGFacsimile.append(xmlNew)
        # xmlFacsimile.getparent().replace(xmlFacsimile, xmlSVGFacsimile)

        # <img src="images/ImagesFigure3.jpg" alt="" style="width: 99%"/>

        facsimile_image_element = etree.Element(htmlns + "img")
        facsimile_image_element.set("src", "images/" + strImageFileDir + strImageFileName)
        facsimile_image_element.set("style", "width: 95%; height: auto;")
        facsimile_image_element.set("alt", "Facsimile page " + facsimile_pagenumber)
        xmlFacsimile.getparent().replace(xmlFacsimile, facsimile_image_element)

logging.info(f"{logseparator}Preparing Cross-References")

# restart chapter counter
intChapterNumber = 1
for xmlChapter in xmlChapters:
    xmlReferences = xmlChapter.xpath(".//EOAref[not(parent::EOAref)]")
    for xmlReference in xmlReferences:

        # the new stuff
        # label_text = xmlReference.find("Label").text[1:]
        # logging.debug("label text is %s" % label_text)

        # corresponding_eoa_id_element = xmlTree.xpath("//*[@xml:id='{}']".format(label_text))

        # if len(corresponding_eoa_id_element) > 1:
        #     logging.info("The xml:id %s has been assigned more than once. This is not allowed. Exiting." % corresponding_eoa_id_element)
        #     sys.exit()
        # else:
        #     eoa_id_element = corresponding_eoa_id_element[0]

        # eoa_id = eoa_id_element.get("id")
        # end of the new stuff

        hitarget_id_list = xmlReference.xpath("./ref/@hitarget")

        if len(hitarget_id_list) == 1:
            hitarget_id = hitarget_id_list[0]
        else:
            hitarget_id = None

        reference_type = xmlReference.get("type")
        if reference_type == "text":
            tmpTail = xmlReference.tail or ""
            strResult = xmlReference.text
        elif reference_type == "collage":
            tmpTail = xmlReference.tail or ""
            logging.debug("Found reference to a Hyperimage collage.")
            subreferences = xmlReference.xpath("./EOAref[@type='number']")
            strResult = ""
            for subref in subreferences:
                subref_tail = subref.tail or ""
                subref_target = subref.xpath("./ref/@target")[0]
                target_string = dictFigures[subref_target]
                strResult += f"{target_string}{subref_tail}"
        elif reference_type == "number":
            strResult = "!!! Cross Reference !!!"

            xmlReferenceLabel = xmlReference.find("Label")
            xmlReferenceLabelText = xmlReferenceLabel.text

            xmlReferenceRef = xmlReference.find("ref")
            xmlReferenceRefTarget = xmlReferenceRef.get("target")

            if xmlReferenceLabelText in dictEquations:
                logging.info("Found link to array:" + xmlReferenceLabelText)
                strResult = dictEquations[xmlReferenceLabelText]
            if xmlReferenceRefTarget in dictEquations:
                logging.info("Found link to equation:" + xmlReferenceRefTarget)
                strResult = dictEquations[xmlReferenceRefTarget]
            if xmlReferenceRefTarget in dictLists:
                logging.info("Found link to list")
                strResult = dictLists[xmlReferenceRefTarget]
            if xmlReferenceRefTarget in dictChapters:
                logging.info("Found link to chapter")
                strResult = dictChapters[xmlReferenceRefTarget]
            if xmlReferenceRefTarget in dictSections:
                logging.info("Found link to section")
                strResult = dictSections[xmlReferenceRefTarget]
            if xmlReferenceRefTarget in dictFigures:
                logging.info("Found link to figure")
                strResult = dictFigures[xmlReferenceRefTarget]
            if xmlReferenceRefTarget in dictFootnotes:
                logging.info("Found link to footnote")
                strResult = dictFootnotes[xmlReferenceRefTarget]
            if xmlReferenceRefTarget in dictTheorems:
                logging.info("Found link to theorem")
                strResult = dictTheorems[xmlReferenceRefTarget]
            if xmlReferenceRefTarget in dictTables:
                logging.info("Found link to table")
                strResult = dictTables[xmlReferenceRefTarget]
            tmpTail = xmlReference.tail or ""
            #tmpTail = tmpTail.strip()
        else:
            logging.error("Found unknown reference type: %s. Exiting", reference_type)
            sys.exit(0)
        xmlReference.clear()
        if args.hyperimage and hitarget_id and reference_type in ["collage", "number"]:
            hyperimage_link = f"{publication_landingpage[:-11]}/{intChapterNumber}/index.html#{hitarget_id}"
            xmlReference.tag = "a"
            xmlReference.set("href", hyperimage_link)
        else:
            pass
        xmlReference.text = strResult
        xmlReference.tail = tmpTail
    intChapterNumber += 1

# Substitute Page-References with their targets
for xmlChapter in xmlChapters:
    xmlReferences = xmlChapter.findall(".//EOApageref")
    for xmlReference in xmlReferences:
        strResult = "!!! Page Reference !!!"
        xmlReferenceLabel = xmlReference.find("Label")
        xmlReferenceLabelText = xmlReferenceLabel.text
        logging.info(xmlReferenceLabelText)
        xmlReferenceRef = xmlReference.find("ref")
        xmlReferenceRefTarget = xmlReferenceRef.get("target")
        if xmlReferenceLabelText in dictPagelabels:
            logging.info("Found link to page: " + xmlReferenceLabelText)
            strResult = dictPagelabels[xmlReferenceLabelText]
        tmpTail = xmlReference.tail or ""
        xmlReference.clear()
        xmlReference.text = strResult
        xmlReference.tail = tmpTail

# Correcting References to Publications
# NOTE: This may be reworked in the future to enable popups in the ebook
# NOTE: For the time being, span ist going to be removed
for xmlChapter in xmlChapters:
    xmlPublicationreferences = xmlChapter.findall(".//span")
    for xmlPublicationreference in xmlPublicationreferences:
        if xmlPublicationreference.get("rel") == "popover":
            xmlPublicationreference.tag = "EOAcitation"

##############################################################
#              Finish ePub Conversion, save File             #
##############################################################

logging.info(f"{logseparator}Cleaning up XML")
xmlIndexentries = xmlEbookTree.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
for xmlIndexentry in xmlIndexentries:
    tmpTail = xmlIndexentry.tail or ""
    xmlIndexentry.clear()
    xmlIndexentry.tail = tmpTail
etree.strip_tags(xmlEbookTree, "EOAlabel", "EOAindex", "EOApageref", "EOAcitenumeric", "EOAtable", "EOAref",  "note", "div", "div2", "div3", "div4", "div5", "citetext", "newpage", "EOAciteyear", "EOAtablelabel" , "hi", "pagebreak", "page", "pagestyle", "EOAcitation", "EOAciteauthoryear", "EOAcitemanual", "EOAprintbibliography", "EOAindexperson", "EOAprintindex", "EOAindexlocation", "EOAprintpersonindex", "EOAprintlocationindex","anchor", "temp", "EOAletterhead", "EOAhifigure", "EOAtocentry","tagtobestripped")
etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend", "hielement") # also contained "id"
etree.strip_elements(xmlEbookTree, "citekey", "originalcontents", "elementtoberemoved", with_tail=False)

logging.info("Write every Part and Chapter into one file")
xmlChapters = xmlEbookTree.findall("//div1")
listParts = []
intTechnicalChapterNumber = 1
for xmlChapter in xmlChapters:
    # Load xmlHTMLTemplate
    htmlChapter = etree.parse( str(EPUB_FILES / "epubchapter.xml"), xmlChapterParser)
    # Find out, if it's inside a part. If Part has not been worked on, then do it
    xmlChapterParent = xmlChapter.getparent()
    if xmlChapterParent.tag == "div0" and xmlChapterParent.get("id") not in listParts:
        listParts.append(xmlChapterParent.get("id"))
        strPartTitle = xmlChapterParent.find(".//head").text
        htmlChapter.find(".//" + htmlns + "title").text = strPartTitle
        xmlNew = etree.Element('h1')
        xmlNew.text = strPartTitle
        htmlChapter.find(".//" + htmlns + "body").append(xmlNew)
        # Save Part
        tmpFileName = OUTPUT_DIR / ("OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml")
        # tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"
        tmpFile = open (tmpFileName, "w")
        tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode")
        tmpFile.write(tmpResult)
        tmpFile.close()
        # Add to TocNCX
        tocncx = addToTocncx(tocncx, htmlChapter.find(".//" + htmlns + "title").text, intTechnicalChapterNumber)
        contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml")
        intTechnicalChapterNumber += 1
        # Reset htmlChapter
        htmlChapter = etree.parse(str(EPUB_FILES / "epubchapter.xml"), xmlChapterParser)
    # Aus div1 alle kinder auslesen und an htmlChapter dran hängen
    xmlChildren = xmlChapter.getchildren()
    for xmlChild in xmlChildren:
        # Using Deepcopy, coz a simple append will delete the original
        htmlChapter.find(".//" + htmlns + "body").append(deepcopy(xmlChild))
    # Save Chapter
    tmpFileName = OUTPUT_DIR / ("OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml")
    # tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"
    tmpFile = open (tmpFileName, "w")
    tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode")
    tmpFile.write(tmpResult)
    tmpFile.close()
    # Add to TocNCX
    tocncx = addToTocncx(tocncx, xmlChapter.find(".//h1").text, intTechnicalChapterNumber)
    contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml")
    # Content_OPF hinzufügen
    intTechnicalChapterNumber += 1

logging.info("Convert Facsimile-Parts")
xmlParts = xmlEbookTree.findall("//div0")
for xmlPart in xmlParts:
    logging.info(f"{logseparator}Working on Facsimile-Part")
    # check if it has a child element EOAfacsimilepart
    if bool(xmlPart.findall(".//EOAfacsimilepart")):
        htmlChapter = etree.parse(str(EPUB_FILES / "epubchapter.xml"), xmlChapterParser)
        # Change EOAfacsimilepart into H1
        xmlHeadline = xmlPart.find(".//EOAfacsimilepart")
        xmlHeadline.tag = "h1"
        etree.strip_elements(xmlPart, "head")
        # Aus div0 alle kinder auslesen und an htmlChapter dran hängen
        xmlChildren = xmlPart.getchildren()
        for xmlChild in xmlChildren:
        # Using Deepcopy, coz a simple append will delete the original
            htmlChapter.find(".//" + htmlns + "body").append(deepcopy(xmlChild))
        # Save Chapter
        tmpFileName = OUTPUT_DIR / ("OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml")
        # tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"
        tmpFile = open (tmpFileName, "w")
        tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode")
        tmpFile.write(tmpResult)
        tmpFile.close()
        # Save Chapter
        tmpFileName = OUTPUT_DIR / ("OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml")
        # tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"
        tmpFile = open (tmpFileName, "w")
        tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode")
        tmpFile.write(tmpResult)
        tmpFile.close()
        # Add to TocNCX
        tocncx = addToTocncx(tocncx, xmlChapter.find("..//h1").text, intTechnicalChapterNumber)
        contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml")
        # Content_OPF hinzufügen
        intTechnicalChapterNumber += 1

tocncx_filename = OUTPUT_DIR / "OEBPS/toc.ncx"
logging.info("Saving toc.ncx")
tocncx.write( str(tocncx_filename), pretty_print=True, xml_declaration=True, encoding="utf-8")

contentopf_filename = OUTPUT_DIR / "OEBPS/content.opf"
logging.info("Saving content.opf")
contentopf.write( str(contentopf_filename), pretty_print=True, xml_declaration=True, encoding="utf-8")

############################################################################
#                      Finishing various Stuff                             #
############################################################################

devel_ebook_file = TEMP_DIR / "Devel_ebook.xml"
logging.info(f"Write Temporary XML-Tree to {devel_ebook_file}.")
xmlEbookTree.write( str(devel_ebook_file), pretty_print=True, xml_declaration=True, encoding="utf-8")

if args.no_epub:
    pass
else:
    create_epub_container(f"{publication_series}{publication_number}", OUTPUT_DIR)

logging.info("Finished!")
# finis