eoaconvert.py

#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2018-01-22 18:57:49 (kthoden)>

# license?
__version__= "1.0"
__author__ = "Klaus Thoden"
__date__="20171205"
__doc__ = """Converts Latex files into Django-XML and epub files."""

# imports
from optparse import OptionParser
from lxml import etree
from lxml import objectify
from copy import deepcopy
from copy import copy
from libeoabibitem import Bibitem
import libeoaconvert
import glob
import os
import re
import string
import shlex
import json
import subprocess
import sys
import shutil
import time
import configparser
import logging
import pickle

print("The script is run from ", os.path.dirname(sys.argv[0]))

###############################################################
#  Preperation of certain files and some checks in advance
###############################################################

# Options for the command line: filename / configfile
parser = OptionParser()
parser.add_option("-f", "--file", dest="filename",
                  help="Name of XML-File", metavar="FILE")
parser.add_option("-c", "--config", dest="CONFIG_FILE",
                  help="Name of Configuration-File", metavar="CONFIGURATION")
parser.add_option("-t", "--trash", dest="helpfiles",
                help="Trash temporary files")

(options, args) = parser.parse_args()

if options.CONFIG_FILE is not None:
    CONFIG_FILE = os.path.abspath(options.CONFIG_FILE)
else:
    # CONFIG_FILE = "/Users/kthoden/EOAKram/dev/EOASkripts/Skripten/eoaconvert.cfg"
    CONFIG_FILE = os.path.dirname(sys.argv[0]) + "/config/eoaconvert.cfg"

print("The configfile is ", CONFIG_FILE)

# current biber is not compatible with this code
# switch TeX distribution to TeXLive2016,
# run biber_2.1 -O biber2-1n.bbl $INPUT to obtain this file
BIBERFILE = "biber2-1.bbl"

##################################
# Reading the configuration file #
##################################
CONFIG = configparser.ConfigParser()
CONFIG.read(CONFIG_FILE)

######################
# Setting up logging #
######################
LOGFILE = CONFIG['General']['logfile']
LOGLEVEL = CONFIG['General']['loglevel']

# numeric_level = getattr(logging, LOGLEVEL.upper(), None)
# if not isinstance(numeric_level, int):
#     raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(filename=LOGFILE, level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s')

########################
# Paths to executables #
########################
GM_PATH = CONFIG['Executables']['graphicsmagic']
TL_PATH = CONFIG['Executables']['texlive']
TEXBIN_PATH = CONFIG['Executables']['texbin']
TRALICS_PATH_EXEC = CONFIG['Executables']['tralics_path_exec']

############################
# Paths to auxiliary files #
############################
TRALICS_PATH_LIB = CONFIG['Auxiliaries']['TRALICS_PATH_LIB']
TEMPLATE_PATH = CONFIG['Auxiliaries']['template_path']
SUPPORT_PATH = CONFIG['Auxiliaries']['support_path']
# AUX_TeX_FILES_PATH = CONFIG['Auxiliaries']['aux_tex_files_path']

# interimResult = ""

########################################
# Certain functions for specific tasks #
########################################

def getchildren(xmlElement):
    """Include all subelements"""
    1 + 1
    return xmlElement
# def getchildren ends here

def TeX2PNG(LaTeXCode, Type, Chapter, Number):
    """Function to render LaTeX-Code into PNG-Files, returns PNG-Filename (epub & django)"""
    # Dictionary contains Type:begin/end
    Types = {
    "EOAineq" : ["$", "$"],
    "EOAequation" : ["\\begin{equation*}", "\\end{equation*}"],
    "EOAequationnonumber" : ["\\begin{equation*}", "\\end{equation*}"],
    "EOAequationarray" : ["\\begin{align*}", "\\end{align*}"],
    "EOAequationarraynonumber" : ["\\begin{align*}", "\\end{align*}"]
    }
    LaTeXCode = Types[Type][0] + LaTeXCode + Types[Type][1]
    dictRebindedCommands = {
    "\|ket\|" : r"\\ket",
    "\|braket\|" : r"\\braket",
    "\|bra\|" : r"\\bra",
    "\|Bra\|" : r"\\Bra",
    "\|Ket\|" : r"\\Ket",
    "\slashed\|" : r"\\slashed"
    }
    for strCommand in dictRebindedCommands.keys():
        LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode)

    # Open plain LaTeX-Template
    tmp = open(TEMPLATE_PATH + "formula.tex", "r")
    Template = tmp.read()
    tmp.close()
    # Get tmp-directory for this user account
    # tmpDir = os.getenv("TMPDIR")
    # use local tmpdir
    formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/"

    # Make directory items if it doesn't already exist
    if not os.path.exists(os.getcwd() + "/items"):
        os.mkdir(os.getcwd() + "/items")
    s = string.Template(Template)
    e = s.substitute(DERINHALT=LaTeXCode)
    tmpFile = formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".tex"
    tmp = open(tmpFile, "w")
    tmp.write(e)
    tmp.close()
    Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile
    Argumente = shlex.split(Kommando)
    # Redirecting stderr to save XeLaTeX-Output
    Datei = open('tmp_files/xelatex-run.log', 'w')
    Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
    if Ergebnis == 0:
        print("Successfully converted formula " + Type + str(Chapter) + "_" + str(Number))
    if Ergebnis == 1:
        print("[ERROR]: Failed to convert formula " + Type + str(Chapter) + "_" + str(Number))
    Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf"
    Argumente = shlex.split(Kommando)
    subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
    Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png"
    Argumente = shlex.split(Kommando)
    subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)

    return LaTeXCode
# def TeX2PNG ends here

def make_bibchecker(bib_database, set_citations):
    """Construct a separate latex file with bibliography.

    The HTML bibliography is still not perfectly formatted like the
    LaTeX version. To check both files, a separate file is made that and
    which is then also converted in the various formats.
    """

    tmp_latex = open(TEMPLATE_PATH + "largebib.tex", "r")
    largebib_template = tmp_latex.read()
    tmp_latex.close()

    tmp_xml = open(TEMPLATE_PATH + "largebib-xml.tex", "r")
    largebib_xml_template = tmp_xml.read()
    tmp_xml.close()

    string_citations = ", ".join(set_citations)

    largebib_template_string = string.Template(largebib_template)
    largebib_replacement = largebib_template_string.substitute(INSERT_BIB_DATABASE = bib_database, INSERT_CITEKEYS = string_citations)

    largebib_template_string_xml = string.Template(largebib_xml_template)
    largebib_replacement_xml = largebib_template_string_xml.substitute(INSERT_BIB_DATABASE = bib_database, INSERT_CITEKEYS = string_citations)

    tmp_latex_file = "%s/debug/debug_onlybib.tex" % (os.getcwd())
    tmp_latex = open(tmp_latex_file, "w")
    tmp_latex.write(largebib_replacement)
    tmp_latex.close()

    tmp_xml_file = "%s/debug/debug_onlybib-xml.tex" % (os.getcwd())
    tmp_xml = open(tmp_xml_file, "w")
    tmp_xml.write(largebib_replacement_xml)
    tmp_xml.close()
# def make_bibchecker ends here

def sanitize_bibentry(bibEntry):
    """Some additional cleanup actions"""

    bibEntry = bibEntry.replace(". , ", ", ")
    bibEntry = bibEntry.replace("vols..", "vols.")

    return(bibEntry.strip())
# def sanitize_bibentry ends here

def createBibEntryAuthorYear(bibEntry, boolSameAuthor):
    """Function to create a complete Entry of a publication (epub & django) for author-year citation"""
    strBibEntry = ""
    if bibEntry.entrytype() not in libeoaconvert.allowed_bibentry_types:
        print("[ERROR]: You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types)))
        sys.exit()

    bool_edited_book = False
    book_without_author = False
    editor_postfix = bibEntry.fullauthorlastfirst()[1]
    if boolSameAuthor == False:
        if len(editor_postfix) != 0:
            if editor_postfix == "no_author_only_title":
                book_without_author = True
                strAuthor = bibEntry.title()
            else:
                bool_edited_book = True
                strAuthor = bibEntry.fullauthorlastfirst()[0] + ", " + editor_postfix
        else:
            strAuthor = bibEntry.fullauthorlastfirst()[0]
    if boolSameAuthor == True:
        # if there is no author, but a publisher, the localized
        # postfix is returned from the function together with the name in a tuple
        if len(editor_postfix) != 0:
            bool_edited_book = True
            strAuthor = "– " + editor_postfix
        else:
            strAuthor = "–"
    # Next line good for debugging
    # print(bibEntry.citekey(), strAuthor)
    if bibEntry.entrytype() == "book":
        if bool_edited_book == True:
            strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " +  bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url()
        elif book_without_author == True:
            strBibEntry =  "<i>" + str(bibEntry.title()) + "</i> " + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + ")." +  bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note()  + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url()
        else:
            strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " +  bibEntry.editor() + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url()
    if bibEntry.entrytype() == "booklet":
        strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i>" + str(bibEntry.location()) + bibEntry.howpublished() + "."
    if bibEntry.entrytype() == "report":
        strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>"
    if bibEntry.entrytype() == "thesis":
        strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>" + bibEntry.thesistype() + bibEntry.institution() + "." + bibEntry.url()
    if bibEntry.entrytype() == "misc":
        strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " + bibEntry.howpublished() + ". " + bibEntry.note() + bibEntry.location() + bibEntry.thesistype() + "." + bibEntry.url()
    if bibEntry.entrytype() == "incollection":
        strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + bibEntry.booktitle() + ". " + bibEntry.editor() + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + bibEntry.location() + bibEntry.pages() + "." + bibEntry.url()
    if bibEntry.entrytype() == "inproceedings":
        strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + bibEntry.booktitle() + bibEntry.editor() + bibEntry.series() + bibEntry.location() + bibEntry.pages()
    if bibEntry.entrytype() == "article":
        strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). " + str(bibEntry.title()) + str(bibEntry.journaltitle()) + bibEntry.volumenumberpages() + ". " + bibEntry.note() + bibEntry.url()
    if bibEntry.entrytype() == "newspaper":
        strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>"

    print(strBibEntry)

    return sanitize_bibentry(strBibEntry)
# def createBibEntryAuthorYear ends here

def createBibEntryNumeric(bibEntry):
    """Function to create a complete Entry of a publication (epub & django) for numeric citation"""

    strBibEntry = ""
    if bibEntry.entrytype() not in libeoaconvert.allowed_bibentry_types:
        print("[ERROR]: You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types)))
        sys.exit()

    strAuthor = bibEntry.fullauthorfirstlast()
    if bibEntry.entrytype() == "book":
        strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>." + bibEntry.location() + ", " + bibEntry.year()
    if bibEntry.entrytype() == "booklet":
        strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.howpublished() + ". " + bibEntry.location() + ", " + bibEntry.year()
    if bibEntry.entrytype() == "report":
        strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>."
    if bibEntry.entrytype() == "thesis":
        strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.thesistype() + bibEntry.institution() + ", " + bibEntry.year()
    if bibEntry.entrytype() == "misc":
        strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". "
    if bibEntry.entrytype() == "incollection":
        strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + bibEntry.editor() + ". " + bibEntry.location() + ", " + bibEntry.year() + ". " + bibEntry.pages() + "."
    if bibEntry.entrytype() == "inproceedings":
        strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". " + bibEntry.volumenumeric() + bibEntry.year() + ". " + bibEntry.pages() + "."
    if bibEntry.entrytype() == "article":
        strBibEntry = strAuthor + ". " + bibEntry.title() + "<i>" + bibEntry.journaltitle() + "</i> " + bibEntry.volumenumberpages() + " (" + bibEntry.year() + "):" + bibEntry.pages() + "."
    if bibEntry.entrytype() == "newspaper":
        strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>."

    return strBibEntry
# def createBibEntryNumeric ends here

def pdf_burst(input_file, tmpDir):
    """Split PDF file into single pages"""
    from PyPDF2 import PdfFileWriter, PdfFileReader

    input1 = PdfFileReader(open(tmpDir + input_file, "rb"))
    print("Input is %s and has %d pages." % (input_file, input1.getNumPages()))

    for pageno in range(input1.getNumPages()):
        output = PdfFileWriter()
        output.addPage(input1.getPage(pageno))

        output_filename = tmpDir + "EOAineq_%d.pdf" % (pageno + 1)
        output_stream = open(output_filename, 'wb')
        output.write(output_stream)
        output_stream.close()
        logging.debug("Wrote %s." % output_filename)

        pageno += 1
# def pdf_burst ends here

def progress(count, total, status=''):
    """Progress bar for command line. Taken from
    https://gist.github.com/vladignatyev/06860ec2040cb497f0f3"""

    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '#' * filled_len + '-' * (bar_len - filled_len)

    sys.stdout.write('[%s] %s%s ... %s\r' % (bar, percents, '%', status))
    sys.stdout.flush()
# def progress ends here

def cleanup():
    """Remove support files"""
    try:
        os.remove((os.getcwd() + "/classes.dtd"))
        os.remove((os.getcwd() + "/mathml2-qname-1.mod"))
        os.remove((os.getcwd() + "/mathml2.dtd"))
        shutil.rmtree((os.getcwd() + "/html"))
        shutil.rmtree((os.getcwd() + "/iso8879"))
        shutil.rmtree((os.getcwd() + "/iso9573-13"))
        shutil.rmtree((os.getcwd() + "/mathml"))
        # shutil.rmtree((os.getcwd() + "/mathml2"))
    except:
        print("No temporary files were found.")
# def cleanup ends here

# Remove temporary files, neccessary for troubleshooting
if options.helpfiles == "temp":
    cleanup()
    sys.exit()

#################################################
# Checking for existance of tools and libraries #
#################################################
if not os.path.exists(TRALICS_PATH_LIB):
    print ("Cannot find the Tralics configuration at %s. Exiting." % TRALICS_PATH_LIB)
    sys.exit()

##################################
# Setting up various directories #
##################################

if not os.path.exists("tmp_files"):
    os.mkdir(os.path.expanduser("tmp_files"))
if not os.path.exists("tmp_files/formulas2png/"):
    os.mkdir(os.path.expanduser("tmp_files/formulas2png/"))
if not os.path.exists(os.getcwd() + "/debug"):
    os.mkdir(os.getcwd() + "/debug")

# Check for folder and necessary files
if not os.path.exists(os.getcwd() + "/CONVERT"):
    print ("Das notwendige Verzeichnis CONVERT wurde noch nicht erstellt.")
    sys.exit()
if not os.path.exists(os.getcwd() + "/CONVERT/cover.jpg"):
    print ("Die Datei cover.jpg im Verzeichnis CONVERT fehlt.")
    sys.exit()
if not os.path.exists(os.getcwd() + "/CONVERT/publication.cfg"):
    print ("Die Datei publication.cfg im Verzeichnis CONVERT fehlt.")
    sys.exit()
# if os.path.exists(os.getcwd() + "/pre_xml.tex") == False:
#     print ("pre_xml fehlt")
#     sys.exit()

# Copy Support-Files from /Library/MPIWG to current directory
shutil.copy(SUPPORT_PATH + "classes.dtd", os.getcwd())
shutil.copy(SUPPORT_PATH + "mathml2-qname-1.mod", os.getcwd())
shutil.copy(SUPPORT_PATH + "mathml2.dtd", os.getcwd())
shutil.copytree(SUPPORT_PATH + "html", (os.getcwd() + "/html"))
shutil.copytree(SUPPORT_PATH + "iso8879", (os.getcwd() + "/iso8879"))
shutil.copytree(SUPPORT_PATH + "iso9573-13", (os.getcwd() + "/iso9573-13"))
shutil.copytree(SUPPORT_PATH + "mathml", (os.getcwd() + "/mathml"))
# shutil.copytree(SUPPORT_PATH + "mathml2", (os.getcwd() + "/mathml2"))

##############################################################
#                  Preparing the main document               #
##############################################################

# Convert TeX to XML via Tralics
Kommando = "%s -log_file %s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output %s.tex" % (TRALICS_PATH_EXEC, options.filename + "-tralics.log", TRALICS_PATH_LIB, TRALICS_PATH_LIB, options.filename)
Argumente = shlex.split(Kommando)
Prozess = subprocess.call(Argumente)

# Fix underscore und fix EOAtranscripted
tmpFile = open ((options.filename) + ".xml", "r")
tmpText = tmpFile.read()
tmpFile.close()

tmpText = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpText)
tmpText = re.sub(r"<error n='\\par' l='(.*?)' c='Invalid \\par command: paragraph not started'/>", "", tmpText)
tmpFile = open ((options.filename) + ".xml", "w")
tmpFile.write(tmpText)
tmpFile.close()

# Complete XML-Document in xmlTree
xmlParser = etree.XMLParser(no_network=False,load_dtd=True) #resolve_entities=False
xmlTree = etree.parse((options.filename + ".xml"), xmlParser)
xmlChapters = xmlTree.findall("//div1")

# Cleanup of not needed tags in advance. To be cleaned: <error>
etree.strip_elements(xmlTree, with_tail=False, *['error'])

print("-----------------------------------------------------")
print("Move EOAlanguage from <head> into attribute of EOAchapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
    xmlLanguage = xmlChapter.find(".//EOAlanguage")
    if xmlLanguage is not None:
        strLanguage = xmlLanguage.text or "english"
        xmlChapter.set("language", strLanguage)
        xmlLanguage.text = None
        print("The language of Chapter %d is %s." % (intChapterNumber, strLanguage))
    xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage")
    intChapterNumber += 1

##############################################################
#      Numbering and Typesetting various Elements            #
##############################################################

# Figure out how to number (like essay or regular)
try:
    strSerie = xmlTree.find(".//EOAseries").text or "regular"
except AttributeError:
    print("\n\nYou are most probably using the preamble for the PDF output. Exiting.")
    sys.exit()

if strSerie == "Essay":
    strNumberingType = "essay"
else:
    strNumberingType = "regular"

# Dictionaries containing UIDs and Numbers
dictChapters = {}
dictFigures = {}
dictEquations = {}
dictSections = {}
dictFootnotes = {}
dictPagelabels = {}
dictTables = {}
dictLists = {}
dictTheorems = {}

set_citations = set()

print("-----------------------------------------------------")
print("Numbering Chapters")
Chapternumber = 1
for xmlChapter in xmlChapters:
    if xmlChapter.get('rend') != "nonumber":
        Chapteruid = xmlChapter.get('id')
        dictChapters[Chapteruid] = str(Chapternumber)
        Chapternumber += 1

# EOAequation, EOAsubequation and EOAequationarray Numbering per Chapter
intChapterNumber = 1
print("-----------------------------------------------------")
print("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations")
for xmlChapter in xmlChapters:
    intEquationnumber = 1
    xmlDinge = xmlChapter.xpath(".//EOAequation | .//EOAequationarray | .//EOAsubequations")
    print("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge)))
    for xmlDing in xmlDinge:
        if xmlDing.tag == "EOAequationarray":
            # tmpNumberinArray is only being used for filename
            tmpNumberinArray = intEquationnumber
            # tmpDictNumberLabel used to insert the attribute value into <EOAequation>
            tmpDictNumberLabel = {}
            # Numbering is being done by <mtr>-Tags
            xmlMathmlrows = xmlDing.findall(".//{http://www.w3.org/1998/Math/MathML}mtr")
            for xmlMathmlrow in xmlMathmlrows:
                if "Label" in xmlMathmlrow.attrib:
                    # Label dem Dictionary für die Euqations hinzufügen
                    if xmlChapter.get("rend") != "nonumber":
                        dictEquations[xmlMathmlrow.get("Label")] = str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)
                        tmpDictNumberLabel[str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)] = xmlMathmlrow.get("Label")
                    if xmlChapter.get("rend") == "nonumber":
                        dictEquations[xmlMathmlrow.get("Label")] = str(intEquationnumber)
                        tmpDictNumberLabel[str(intEquationnumber)] = xmlMathmlrow.get("Label")
                intEquationnumber += 1
            xmlRohTeX = xmlDing.find(".//texmath")
            xmlNew = etree.Element('EOAequationarray')
            # Blank lines need to be removed otherwise TeX won't work
            textSourcecode = os.linesep.join([s for s in xmlRohTeX.text.splitlines() if s])
            # \rowattributeunknown has to be deleted, its an artefact
            textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode)
            # Push Down loop to parse the raw code
            textFormel = ""
            boolBackslash = False
            for Buchstabe in textSourcecode:
                if Buchstabe == "\n":
                    continue
                if Buchstabe == "\\":
                    if boolBackslash == False:
                        textFormel += Buchstabe
                        boolBackslash = True
                        continue
                    if boolBackslash == True:
                        textFormel += Buchstabe
                        str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray))
                        if xmlChapter.get("rend") != "nonumber":
                            tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)))
                        if xmlChapter.get("rend") == "nonumber":
                            tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray)))
                        tmpXML.set("TeX", str_latexcode)
                        # Put Label into EOAequation
                        if xmlChapter.get("rend") != "nonumber":
                            strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)
                        if xmlChapter.get("rend") == "nonumber":
                            strTempKey = str(tmpNumberinArray)
                        if strTempKey in tmpDictNumberLabel:
                            #tmpXML.set("label", tmpDictNumberLabel[(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))])
                            tmpXML.set("label", tmpDictNumberLabel[strTempKey])
                        xmlNew.append(tmpXML)
                        textFormel = ""
                        boolBackslash = False
                        tmpNumberinArray += 1
                        continue
                if Buchstabe != "\\":
                    textFormel += Buchstabe
                    boolBackslash = False
            # Typeset last equation
            str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray))
            if xmlChapter.get("rend") != "nonumber":
                tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(dictChapters[xmlChapter.get('id')] + "." + str(tmpNumberinArray)))
            if xmlChapter.get("rend") == "nonumber":
                tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray)))
            tmpXML.set("TeX", str_latexcode)
            # Put Label into EOAequation
            if xmlChapter.get("rend") != "nonumber":
                strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)
            if xmlChapter.get("rend") == "nonumber":
                strTempKey = str(tmpNumberinArray)
            if strTempKey in tmpDictNumberLabel:
                print(strTempKey)
                print(tmpDictNumberLabel)
                print(dictChapters)
                tmpXML.set("label", tmpDictNumberLabel[strTempKey])
            xmlNew.append(tmpXML)
            xmlDing.getparent().replace(xmlDing, xmlNew)
            # enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on
            #xmlNew.getparent().tag = "temp"
            continue
        if xmlDing.tag == "EOAsubequations":
            # Enclosing <p>-Tag of the EOAsubequations needs to be removed
            xmlDing.getparent().tag = "temp"
            xmlSubequations = xmlDing.findall('.//EOAequation')
            listCharacters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
            tmpI = 0
            # Insert Number of this Subequation into dictEquations
            xmlAnchor = xmlDing.find(".//anchor")
            print(xmlAnchor)
            if xmlChapter.get("rend") != "nonumber":
                dictEquations[xmlAnchor.get('id')] = dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber)
            if xmlChapter.get("rend") == "nonumber":
                dictEquations[xmlAnchor.get('id')] = str(intEquationnumber)
            # Delete anchor
            xmlAnchor.getparent().remove(xmlAnchor)
            for xmlSubequation in xmlSubequations:
                # Enclosing <p>-Tag of the EOAsubequation needs to be removed
                #xmlSubequation.getparent().tag = "temp"
                # Numbering Subequations with characters
                strSubequationNumber = str(intEquationnumber) + listCharacters[tmpI]
                tmpI += 1
                textSourcecode = xmlSubequation.find('.//texmath').text
                # Blank lines need to be removed otherwise TeX won't work
                textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
                str_latexcode = TeX2PNG(textSourcecode, "EOAequation", str(intChapterNumber), strSubequationNumber)
                xmlAnchor = xmlSubequation.find(".//anchor")
                # Clear Equation
                xmlSubequation.clear()
                if xmlChapter.get("rend") != "nonumber":
                    xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png")
                    xmlSubequation.set("number", dictChapters[xmlChapter.get('id')] + "." + strSubequationNumber)
                    xmlSubequation.set("uid", xmlAnchor.get('id'))
                if xmlChapter.get("rend") == "nonumber":
                    xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png")
                    xmlSubequation.set("number", strSubequationNumber)
                    xmlSubequation.set("uid", xmlAnchor.get('id'))
                xmlSubequation.set("id", xmlAnchor.get('id'))
                xmlSubequation.set("TeX", str_latexcode)
                # Insert Number of this Equation into dictEquations
                if strNumberingType == "regular":
                    dictEquations[xmlAnchor.get('id')] = str(dictChapters[xmlChapter.get('id')]) + "." + strSubequationNumber
                if strNumberingType == "essay":
                    dictEquations[xmlAnchor.get('id')] = strSubequationNumber
            # TODO: Anchor direkt unter Subequation aufheben, und der ersten Equation zuordnen, so dass auf 8.16 bei 8.16a und 8.16b verlinkt werden kann
            xmlDing.tag = "temp"
            # enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on
            #xmlDing.getparent().tag = "temp"
            intEquationnumber += 1
            continue
        if xmlDing.tag == "EOAequation":
            # Check, if Equation has already been found in a Subeqation
            xmlAnchor = xmlDing.find("anchor")
            if xmlAnchor == None:
                continue
            if xmlAnchor.get('id') in dictEquations:
                continue
            if xmlDing.find('.//texmath') is not None:
                textSourcecode = xmlDing.find('.//texmath').text
            else:
                textSourcecode = xmlDing.text
            # Blank lines need to be removed otherwise TeX won't work
            textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
            str_latexcode = TeX2PNG(textSourcecode, "EOAequation", intChapterNumber, intEquationnumber)
            #print ("Got:")
            #print (str_latexcode)
            if xmlChapter.get("rend") != "nonumber":
                xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png")
                xmlDing.set("number", dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber))
                xmlDing.set("uid", xmlAnchor.get('id'))
            if xmlChapter.get("rend") == "nonumber":
                xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png")
                xmlDing.set("number", str(intEquationnumber))
                xmlDing.set("uid", xmlAnchor.get('id'))
            xmlDing.set("id", xmlAnchor.get('id'))
            xmlDing.set("TeX", str_latexcode)
            #xmlDing.getparent().replace(xmlDing, xmlNew)
            # Insert Number of this Equation into dictEquations
            if strNumberingType == "regular":
                dictEquations[xmlAnchor.get('id')] = \
                    str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)
            if strNumberingType == "essay":
                dictEquations[xmlAnchor.get('id')] = str(intEquationnumber)
            intEquationnumber += 1
            continue
    intChapterNumber += 1

intChapterNumber = 1
print("-----------------------------------------------------")
print("Processing .//EOAequationnonumber | .//EOAequationarraynonumber")
for xmlChapter in xmlChapters:
    tempImagenumber = 1
    xmlDinge = xmlChapter.xpath(".//EOAequationnonumber | .//EOAequationarraynonumber")
    print("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge)))
    # print ("Working on Chapter " + str(intChapterNumber))
    # print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden")
    for xmlDing in xmlDinge:
        if xmlDing.tag == "EOAequationarraynonumber":
            if xmlDing.find(".//texmath") is not None:
                textSourcecode = xmlDing.find(".//texmath").text
            else:
                textSourcecode = xmlDing.text
            xmlNew = etree.Element('EOAequationarraynonumber')
            # Blank lines need to be removed otherwise TeX won't work
            textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
            # \rowattributeunknown has to be deleted, its an artefact
            textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode)
            # TODO: HIer überprüfen, ob und inwiefern es ausreichend ist, EOAequationarraynonumber in eine Grafik zu packen
            str_latexcode = TeX2PNG(textSourcecode, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
            xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
            xmlNew.set("TeX", str_latexcode)
            xmlDing.getparent().replace(xmlDing, xmlNew)
            tempImagenumber += 1
            continue
            # Push Down loop to parse the raw code (Wird vorerst nicht ausgeführt)
            textFormel = ""
            boolBackslash = False
            for Buchstabe in textSourcecode:
                if Buchstabe == "\n":
                    continue
                if Buchstabe == "\\":
                    if boolBackslash == False:
                        textFormel += Buchstabe
                        boolBackslash = True
                        continue
                    if boolBackslash == True:
                        textFormel += Buchstabe
                        str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
                        tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
                        tmpXML.set("TeX", str_latexcode)
                        xmlNew.append(tmpXML)
                        textFormel = ""
                        boolBackslash = False
                        tempImagenumber += 1
                        continue
                if Buchstabe != "\\":
                    textFormel += Buchstabe
                    boolBackslash = False
            # Typeset last equation
            str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
            tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
            tmpXML.set("TeX", str_latexcode)
            xmlNew.append(tmpXML)
            xmlDing.getparent().replace(xmlDing, xmlNew)
            continue
        if xmlDing.tag == "EOAequationnonumber":
            textSourcecode = xmlDing.find('.//texmath').text
            # Blank lines need to be removed otherwise TeX won't work
            textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
            str_latexcode = TeX2PNG(textSourcecode, "EOAequationnonumber", str(intChapterNumber), tempImagenumber)
            # TODO: HTML-Code für das fertige Bild einfügen (Ist dieser ToDo noch aktuell?)
            xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationnonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))

            xmlNew.set("TeX", str_latexcode)

            xmlDing.getparent().replace(xmlDing, xmlNew)
            tempImagenumber += 1
            continue
    intChapterNumber += 1

print("-----------------------------------------------------")
print("Converting EOAineq")
intChapterNumber = 1
intEOAineqRunningOrder = 1
dictEOAineqs = {}
strTeXEquations = ""
all_ineq = xmlTree.findall(".//EOAineq")
# if all_ineq is not None:
if len(all_ineq) > 0:
    print("Found " + str(len(all_ineq)) + " formulas")

    for xmlChapter in xmlChapters:
        print("Chapter " + str(intChapterNumber))
        xmlEOAineqs = xmlChapter.findall(".//EOAineq")
        intEOAineqnumber = 1
        for xmlEOAineq in xmlEOAineqs:

            if xmlEOAineq.find('.//texmath') is not None:
                strSourceCode = xmlEOAineq.find('.//texmath').text
            else:
                strSourceCode = xmlEOAineq.text

            progress(intEOAineqnumber, len(xmlEOAineqs),"Processing EOAineq %s of %s." % (intEOAineqnumber, len(xmlEOAineqs)))

            strSourceCode = os.linesep.join([s for s in strSourceCode.splitlines() if s])
            strTeXEquations = strTeXEquations + "$" + strSourceCode + "$\n\\newpage\n"
            # Add intEOAineqRunningOrder : Filename to dictionary
            strFilename = "EOAineq_" + str(intChapterNumber) + "_" + str(intEOAineqnumber)
            dictEOAineqs[intEOAineqRunningOrder] = strFilename
            # Prepare XML
            tmpTail = xmlEOAineq.tail
            xmlEOAineq.clear()
            xmlEOAineq.tail = tmpTail
            xmlEOAineq.set("src", strFilename + ".png")
            xmlEOAineq.set("TeX", strSourceCode)
            # increment integers
            intEOAineqRunningOrder += 1
            intEOAineqnumber +=1
        intChapterNumber += 1

    dictRebindedCommands = {
        "\|ket\|" : r"\\ket",
        "\|braket\|" : r"\\braket",
        "\|bra\|" : r"\\bra",
        "\|Bra\|" : r"\\Bra",
        "\|Ket\|" : r"\\Ket",
        "\slashed\|" : r"\\slashed"
    }
    for strCommand in dictRebindedCommands.keys():
        strTeXEquations = re.sub(strCommand, dictRebindedCommands[strCommand], strTeXEquations)

    tmp = open(TEMPLATE_PATH + "formula.tex", "r")
    Template = tmp.read()
    tmp.close()
    # Get tmp-directory for this user account
    # tmpDir = os.getenv("TMPDIR")
    # use local tmpdir
    formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/"

    # Make directory items if it doesn't already exist
    if not os.path.exists(os.getcwd() + "/items"):
        os.mkdir(os.getcwd() + "/items")
    s = string.Template(Template)
    e = s.substitute(DERINHALT=strTeXEquations)
    tmpFile = formula_tmp_dir + "EOAinline.tex"
    tmp = open(tmpFile, "w")
    tmp.write(e)
    tmp.close()
    print("Typesetting all Inline Equations")
    Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile
    Argumente = shlex.split(Kommando)
    Datei = open('tmp_files/xelatex-run.log', 'w')
    Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
    print("Splitting all Inline Equations")
    pdf_burst("EOAinline.pdf", formula_tmp_dir)
    print("Converting %s split pages into PNG-Images" % len(dictEOAineqs.keys()))
    counter_dictEOAineqs = 1
    for intRunningOrder in dictEOAineqs.keys():
        # provide more status information here in output!
        progress(counter_dictEOAineqs, len(dictEOAineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys())))
        Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + "EOAineq_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf"
        Argumente = shlex.split(Kommando)
        subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
        Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png"
        Argumente = shlex.split(Kommando)
        subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
        counter_dictEOAineqs += 1

else:
    print("Found no EOAineq. Continuing")


print("-----------------------------------------------------")
print("EOAFigure Numbering per Chapter")
for xmlChapter in xmlChapters:
    Figurenumber = 1
    xmlFigures = xmlChapter.xpath(".//EOAfigure | .//EOAlsfigure")
    for xmlFigure in xmlFigures:
        xmlAnchor = xmlFigure.find("anchor")
        # Check if Figure is in a numbered Chapter
        # Otherwise just put the Number of the figure
        if xmlChapter.get('id'):
            dictFigures[xmlAnchor.get('id')] = \
                str(dictChapters[xmlChapter.get('id')]) + "." + str(Figurenumber)
        else:
            dictFigures[xmlAnchor.get('id')] = str(Figurenumber)
        xmlFigure.set("id", xmlAnchor.get("id"))
        Figurenumber += 1

print("-----------------------------------------------------")
print("Numbering Theorems")
for xmlChapter in xmlChapters:
    xmlTheorems = xmlChapter.findall(".//theorem")
    for xmlTheorem in xmlTheorems:
        strUID = xmlTheorem.get("id")
        strNumber = xmlTheorem.get("id-text")
        dictTheorems[strUID] = strNumber

print("-----------------------------------------------------")
print("Section, Subsection,... Numbering per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
    strUID = xmlChapter.get("id")
    #dictChapters[strUID] = str(intChapterNumber)
    xmlSections = xmlChapter.findall("div2")
    intSectionNumber = 1
    for xmlSection in xmlSections:
        if xmlSection.get("rend") == "nonumber":
            continue
        strUID = xmlSection.get("id")
        if xmlChapter.get("rend") != "nonumber":
            dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber)
        if xmlChapter.get("rend") == "nonumber":
            dictSections[strUID] = str(intSectionNumber)
        xmlSubsections = xmlSection.findall("div3")
        intSubsectionNumber = 1
        for xmlSubsection in xmlSubsections:
            if xmlSubsection.get("rend") == "nonumber":
                continue
            strUID = xmlSubsection.get("id")
            if xmlChapter.get("rend") != "nonumber":
                dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) + "." + str(intSubsectionNumber)
            if xmlChapter.get("rend") == "nonumber":
                dictSections[strUID] = str(intSectionNumber) + "." + str(intSubsectionNumber)
            intSubsectionNumber += 1
        intSectionNumber += 1
    if xmlChapter.get("rend") != "nonumber":
        intChapterNumber += 1

print("-----------------------------------------------------")
print("Numbering of Footnotes per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
    intNoteNumber = 1
    xmlFootnotes = xmlChapter.findall(".//note")
    for xmlFootnote in xmlFootnotes:
        strUID = xmlFootnote.get("id")
        dictFootnotes[strUID] = str(intNoteNumber)
        intNoteNumber += 1


# here was OU's footnote code, now in libeoaconvert

print("-----------------------------------------------------")
print("Numbering of Lists per Chapter")
for xmlChapter in xmlChapters:
    xmlListitems = xmlChapter.findall(".//item")
    for xmlListitem in xmlListitems:
        strUID = xmlListitem.get("id")
        strItemNumber = xmlListitem.get("id-text")
        dictLists[strUID] = strItemNumber

print("-----------------------------------------------------")
print("Working on Page Numbers for References")
listAuxFiles = glob.glob(os.getcwd() + "/*.aux")
if len(listAuxFiles) == 0:
    print("No aux file found. Exiting")
    sys.exit(1)
else:
    for strFile in listAuxFiles:
        tmpFile = open(strFile, "r")
        lines = tmpFile.readlines()
        tmpFile.close()
        for line in lines:
            # hyperref makes the lines much much longer
            # \newlabel{BL}{{1.1}{4}{Forschungsüberblick zur Literatur über Alvarus Thomas}{section.1.1}{}}
            # \newlabel{BL}{{1.1}{4}}
            matched_label = re.match(r'\\newlabel\{(.*?)\}\{\{(.*?)\}\{(.*?)\}', line)
            # matchObjectLabel = re.match(r'\newlabel\{(.*?)\}', line)
            if matched_label:
                # matchObjectPage = re.match(r'(.*?)\}\{(\d{1,})\}\}$', line)
                # if matchObjectPage:
                dictPagelabels[matched_label.group(1)] = matched_label.group(3)
            # parsing out information on cite works
            matched_citation = re.match(r'\\abx@aux@cite{(.*?)}', line)
            if matched_citation is not None:
                set_citations.add(matched_citation.group(1))

print(dictPagelabels)
print(set_citations)

print("-----------------------------------------------------")
print("Numbering of Tables per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
    intTableNumber = 1
    xmlTables = xmlChapter.findall(".//EOAtable")
    for xmlTable in xmlTables:
        xmlTableLabel = xmlTable.find(".//EOAtablelabel")
        strTableCaption = xmlTable.find(".//EOAtablecaption").text
        if strTableCaption == "nonumber":
            continue
        if not xmlTableLabel.text or xmlTableLabel.text == "":
            xmlTableLabel.text = "table" + str(intChapterNumber) + str(intTableNumber)
        strUID = xmlTableLabel.text
        print(strUID)
        if xmlChapter.get("rend") != "nonumber":
            dictTables[strUID] = dictChapters[xmlChapter.get('id')] + "." + str(intTableNumber)
        if xmlChapter.get("rend") == "nonumber":
            dictTables[strUID] = str(intTableNumber)
        intTableNumber += 1
        print(dictTables)
    intChapterNumber += 1

##############################################################
#                  Preparing the Bibliography                #
##############################################################
if xmlTree.find(".//EOAbibliographydatabase") is not None:
    bib_database = xmlTree.find(".//EOAbibliographydatabase").text
    HAS_BIBLIOGRAPHY = True
else:
    print("No database found.")
    HAS_BIBLIOGRAPHY = False
    input()

# the new solution: pandoc-citeproc
interim_bib_json_file = (options.filename) + "-bib.json"
citeproc_command = "pandoc-citeproc --bib2json  %s" % bib_database + ".bib"
logging.debug(citeproc_command)
citeproc_arguments = shlex.split(citeproc_command)
citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE)
citeproc_json = citeproc_process.stdout.read()

citations_json = json.loads(citeproc_json)

# for x in citations_json:
#     print(x["title"])

####################
# the old solution #
####################
# Copy interim .bbl-File to interim bib.tex file
interim_bibtex_file = (options.filename) + "bib.tex"
try:
    shutil.copy(BIBERFILE, interim_bibtex_file)
except FileNotFoundError:
    print("%s has not been created yet. Switch TeX distribution to TeXLive2016, run biber_2.1 -O biber2-1.bbl %s to obtain this file" % (BIBERFILE, options.filename))
# Read all lines of Bibliographic TeX
tmpFile = open(interim_bibtex_file, "r")
tmpLines = tmpFile.readlines()
tmpFile.close()

# First line should link to Bibliographic Praeambel
tmpLines[0] = "\\include{%spre_bib}\n" % TEMPLATE_PATH
# Remove unwanted lines
for i in range(18,0,-1):
    del tmpLines[i]
# Save changes
tmpFile = open(interim_bibtex_file, "w")
tmpFile.writelines(tmpLines)
tmpFile.close()
# TeX has been sanitized, now tralics to make it intermediate XML
print("TeX has been sanitized, now tralics to make it intermediate XML")
Kommando = "%s -log_file %s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output -entnames=false %sbib.tex" % (TRALICS_PATH_EXEC, options.filename + "-bib-tralics.log", TRALICS_PATH_LIB, TRALICS_PATH_LIB, options.filename)
Argumente = shlex.split(Kommando)
Prozess = subprocess.call(Argumente)
# Sanitize XML to make it useable
tmpFile = open((options.filename) + "bib.xml", "r")
tmpContent = tmpFile.read()
tmpFile.close()
listReplace = [ r"<math mode='display' xmlns='http://www.w3.org/1998/Math/MathML'>",
r"<formula textype='displaymath' type='display'>",
r"<mi>",
r"</mi>",
r"<mn>",
r"<mn>",
r"<mo>",
r"</mo>",
r"<mn>",
r"</mn>",
r"<mrow/>",
r"<msup>",
r"</msup>",
r"</math>",
r"</formula>",
r"<formula type='inline'>",
r"<math xmlns='http://www.w3.org/1998/Math/MathML'>",
r"<formula textype='math' type='inline'>",
r"<mrow>uniquename=(.*?),hash=(.*?)</mrow>",
r"<mrow>hash=(.*?)</mrow>",
]
for strReplace in listReplace:
    tmpContent = re.sub(strReplace, "", tmpContent)

# Put Back Underscore _
tmpContent = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpContent)

# Remove empty Lines
tmpContent = re.sub(r"\n\n", "\n", tmpContent)

# Put back Ampersand
tmpContent = re.sub(r"&", "&amp;", tmpContent)
tmpFile = open((options.filename) + "bib.xml", "w")
tmpFile.write(tmpContent)
tmpFile.close()

# TeXML has been sanitized, now load xml-Tree
xmlParser2 = etree.XMLParser(no_network=False,load_dtd=False)
xmlBibTree = etree.parse((options.filename + "bib.xml"), xmlParser2)
xmlEntries = xmlBibTree.findall(".//entry")

###########################
# end of the old solution #
###########################

make_bibchecker(bib_database, set_citations)

# If Bibliography-Type is monograph search for EOAbibliography and make it all
if xmlTree.find(".//EOAbibliographytype").text == "monograph":
    if xmlTree.find(".//EOAprintbibliography") is not None:
        xmlBibliography = xmlTree.find(".//EOAprintbibliography")
        xmlBibliography.clear()
        xmlBibliography.tag = "div"
        xmlBibliography.getparent().tag = "div"
        #xmlBibliography.addnext(xmlBibliographyDiv)

        ###############
        # new version #
        ###############

        formatted_references = libeoaconvert.format_citations(set_citations, bib_database + ".bib", strLanguage)[0]

        # to check: is the order correct?
        entries = formatted_references.findall(".//div")

        for entry in entries:
            entry_id = entry.get("id")
            entry.set("class", "bibliography")
            etree.strip_tags(entry, "p")
            entry.tag = "p"
            xmlBibliography.append(entry)

        ######################
        # end of new version #
        ######################

        ###############
        # old version #
        ###############
        xmlEntries = xmlBibTree.findall(".//entry")
        intNumberOfEntry = 0
        for xmlEntry in xmlEntries:
            if intNumberOfEntry == 0:
                # Don't check for previous author if first entry of the Bibliography
                bibEntry = Bibitem(xmlEntry)
                strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
            else:
                bibEntry = Bibitem(xmlEntry)
                # Check if author of previous Entry is the same
                bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1])
                if bibEntry.fullauthorlastfirst()[0] == bibEntryPrevious.fullauthorlastfirst()[0]:
                    strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>"
                elif bibEntryPrevious.fullauthorlastfirst()[0] == bibEntry.fullauthorlastfirst()[0]:
                    strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>"
                elif bibEntry.fullauthorlastfirst()[0] == bibEntryPrevious.fullauthorlastfirst()[0]:
                    strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>"
                else:
                    print("dieser fall")
                    strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
                    print("a new entry", strNewentry)

            xmlNew = etree.fromstring(strNewentry)
            # xmlBibliography.append(xmlNew)

            intNumberOfEntry += 1
        ######################
        # end of old version #
        ######################


# If Bibliography-Type is anthology search for EOAbibliography and make one per chapter
if xmlTree.find(".//EOAbibliographytype").text == "anthology":
    intChapterNumber = 1
    for xmlChapter in xmlChapters:
        if xmlChapter.find(".//EOAprintbibliography") is not None:
            xmlBibliography = xmlChapter.find(".//EOAprintbibliography")
            xmlBibliography.getparent().tag = "div"
            xmlBibliographyDiv = etree.Element("div")
            xmlBibliography.addnext(xmlBibliographyDiv)
            xmlRefsections = xmlBibTree.findall(".//refsection")
            for xmlRefsection in xmlRefsections:
                if xmlRefsection.find(".//number").text == str(intChapterNumber):
                    break
            xmlEntries = xmlRefsection.findall(".//entry")
            intNumberOfEntry = 0
            for xmlEntry in xmlEntries:
                if intNumberOfEntry == 0:
                    # Don't check for previous author if first entry of the Bibliography
                    bibEntry = Bibitem(xmlEntry)
                    strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
                    xmlNew = etree.fromstring(strNewentry)
                    xmlBibliographyDiv.append(xmlNew)
                else:
                    bibEntry = Bibitem(xmlEntry)
                    # Check if author of previous Entry is the same
                    bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1])
                    if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst():
                        print(bibEntry.citekey())
                        strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>"
                        xmlNew = etree.fromstring(strNewentry)
                        xmlBibliographyDiv.append(xmlNew)
                    else:
                        print(bibEntry.citekey())
                        strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
                        xmlNew = etree.fromstring(strNewentry)
                        xmlBibliographyDiv.append(xmlNew)
                intNumberOfEntry += 1
        intChapterNumber += 1

# for the time being
strCitation = ""

# Bibliographies are done, now for the citations
if xmlTree.find(".//EOAbibliographytype").text == "anthology" or xmlTree.find(".//EOAbibliographytype").text == "monograph":
    intChapterNumber = 1
    for xmlChapter in xmlChapters:
        print ("-----------------------------------------------------")
        print ("Processing References for Chapter " + str(intChapterNumber))
        xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual")

        counter_citations = 1

        for xmlCitation in xmlCitations:
            string_citekey = xmlCitation.find("./citekey").text
            progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey))
            # If Bibliography-Type is anthology find Refsection for this Chapter
            if xmlTree.find(".//EOAbibliographytype").text == "anthology":
                xmlRefsections = xmlBibTree.findall(".//refsection")
                for xmlRefsection in xmlRefsections:
                    if xmlRefsection.find(".//number").text == str(intChapterNumber):
                        break
                xmlEntries = xmlRefsection.findall(".//entry")
            # If Bibliography-Type is monograph find all entries, forget about refsection
            if xmlTree.find(".//EOAbibliographytype").text == "monograph":
                xmlEntries = xmlBibTree.findall(".//entry")
            for xmlEntry in xmlEntries:
                bibEntry = Bibitem(xmlEntry)
                if bibEntry.citekey() == xmlCitation.find("./citekey").text:
                    if xmlCitation.tag == "EOAciteauthoryear":
                        strCitation = bibEntry.shortauthor() + " " + bibEntry.labelyear()
                        if bibEntry.labelyearsuffix() is not None:
                            strCitation = strCitation + bibEntry.labelyearsuffix()
                        strTitle = bibEntry.title()
                    if xmlCitation.tag == "EOAciteyear":
                        strCitation = bibEntry.labelyear()
                        if bibEntry.labelyearsuffix() is not None:
                            strCitation = strCitation + bibEntry.labelyearsuffix()
                        strTitle = bibEntry.title()
                    if xmlCitation.tag == "EOAcitemanual":
                        cite_text = xmlCitation.find("citetext")
                        if cite_text.getchildren():
                            tmp_string = xmlCitation.find("citetext")
                            tmp_string = cite_text.getchildren()[0]
                            strCitation = etree.tostring(tmp_string)
                            # BAUSTELLE!!!!!
                            # tmp_string2 = etree.tostring(tmp_string)
                            # tmp_string3 = tmp_string2.decode()
                            # strCitation = tmp_string3.replace("&lt;", "<")
                        else:
                            strCitation = xmlCitation.find("citetext").text
                        strTitle = bibEntry.title()
            if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None:
                strCitation = strCitation + ", " + xmlCitation.find("./page").text
            # Hier den XML-Tag durch die Quellenangabe ersetzen
            tmpTail = xmlCitation.tail
            xmlCitation.clear()
            xmlCitation.tag = "span"
            xmlCitation.set("rel","popover")
            xmlCitation.set("class","citation")
            xmlCitation.set("citekey", string_citekey)
            xmlCitation.text = strCitation
            xmlCitation.tail = tmpTail
            # Create Link to be used for website in a popover
            xmlCitation.set("data-toggle", "popover")
            xmlCitation.set("html", "true")
            xmlCitation.set("data-placement", "bottom")
            xmlCitation.set("data-title", strCitation)
            try:
                xmlCitation.set("data-content", strTitle)
            except:
                xmlCitation.set("data-content", "missing")
            counter_citations += 1
        intChapterNumber += 1

# If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all
if xmlTree.find(".//EOAbibliographytype").text == "monograph-numeric":
    if xmlTree.find(".//EOAprintbibliography") is not None:
        dictCitekeysNumbers = {}
        dictCitekeysTitles = {}
        xmlBibliography = xmlTree.find(".//EOAprintbibliography")
        xmlBibliography.clear()
        xmlBibliography.tag = "div"
        xmlBibliography.getparent().tag = "div"
        xmlEntries = xmlBibTree.findall(".//entry")
        intNumberOfEntry = 1
        for xmlEntry in xmlEntries:
            # Go through all entries and assign a number to the citekey
            bibEntry = Bibitem(xmlEntry)
            strCitekey = bibEntry.citekey()
            dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry)
            dictCitekeysTitles[strCitekey] = str(bibEntry.title())
            strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>"
            xmlNew = etree.fromstring(strNewentry)
            xmlBibliography.append(xmlNew)
            intNumberOfEntry += 1
        # Now for the references via EOAcitenumeric
        xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric")
        for xmlCitenumeric in xmlCitenumerics:
            print(etree.tostring(xmlCitenumeric))
            strPopover = ""
            tmpCitekeys = xmlCitenumeric.find(".//citekey").text
            tmpCitekeys = re.sub(" ", "", tmpCitekeys)
            tmpCitekeys = re.sub("\n", "", tmpCitekeys)
            listCitekeys = re.split("\,", tmpCitekeys)
            listCitenumbers = []
            for strCitekey in listCitekeys:
                listCitenumbers.append(dictCitekeysNumbers[strCitekey])
                # Create Text to be used on the website in a popover
                strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " "
            listCitenumbers = sorted(listCitenumbers, key=int)
            strResult = "[" + listCitenumbers[0]
            intNumberOfSequentialCite = 0
            for i in range(1,len(listCitenumbers)):
                intPreviousCitenumber = int(listCitenumbers[i-1])
                intCurrentCitenumber = int(listCitenumbers[i])
                if i == (len(listCitenumbers)-1):
                    if (intPreviousCitenumber + 1) == intCurrentCitenumber:
                        if intNumberOfSequentialCite == 0:
                            strResult = strResult + "," + str(listCitenumbers[i])
                        else:
                            strResult = strResult + "-" + str(listCitenumbers[i])
                            intNumberOfSequentialCite == 0
                    else:
                        strResult = strResult + "," + str(listCitenumbers[i])
                    break
                intNextCitenumber = int(listCitenumbers[i+1])
                if (intCurrentCitenumber + 1) != intNextCitenumber:
                    if intNumberOfSequentialCite != 0:
                        strResult = strResult + "-" + str(intCurrentCitenumber)
                        intNumberOfSequentialCite = 0
                    else:
                        strResult = strResult + "," + str(intCurrentCitenumber)
                    continue
                if (intPreviousCitenumber + 1) == intCurrentCitenumber:
                    intNumberOfSequentialCite += 1
                    continue
                else:
                    strResult = strResult + "," + str(intCurrentCitenumber)
                    intNumberOfSequentialCite = 0
            strResult = strResult + "]"
            xmlCitenumeric.text = strResult
            # Create Link to be used for website
            xmlCitenumeric.set("data-toggle", "popover")
            xmlCitenumeric.set("html", "true")
            xmlCitenumeric.set("data-content", strPopover)
            xmlCitenumeric.set("class","citation")
            xmlCitenumeric.set("data-placement", "bottom")
            xmlCitenumeric.set("data-title", strResult)

# author is missing!
# print("xmlBibliography")
# print(etree.tostring(xmlBibliography))
# input()

# Numeric citations for the individual chapters
if xmlTree.find(".//EOAbibliographytype").text == "anthology-numeric":
    intChapterNumber = 1
    for xmlChapter in xmlChapters:
        print("Processing Bibliography")
        if xmlChapter.find(".//EOAprintbibliography") is not None:
            dictCitekeysNumbers = {}
            dictCitekeysTitles = {}
            xmlBibliography = xmlChapter.find(".//EOAprintbibliography")
            #xmlBibliography.clear()
            xmlBibliography.tag = "div"
            xmlBibliography.getparent().tag = "div"
            xmlRefsections = xmlBibTree.findall(".//refsection")
            for xmlRefsection in xmlRefsections:
                if xmlRefsection.find(".//number").text == str(intChapterNumber):
                    break
            xmlEntries = xmlRefsection.findall(".//entry")
            intNumberOfEntry = 1
            for xmlEntry in xmlEntries:
                # Go through all entries and assign a number to the citekey
                bibEntry = Bibitem(xmlEntry)
                strCitekey = bibEntry.citekey()
                dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry)
                dictCitekeysTitles[strCitekey] = str(bibEntry.title())
                strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>"
                xmlNew = etree.fromstring(strNewentry)
                xmlBibliography.append(xmlNew)
                intNumberOfEntry += 1
            # Now for the references via EOAcitenumeric
            xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear")
            print("Found numeric citation in chapter " + str(intChapterNumber))
            for xmlCitenumeric in xmlCitenumerics:
                strPopover = ""
                tmpCitekeys = xmlCitenumeric.find(".//citekey").text
                tmpCitekeys = re.sub(" ", "", tmpCitekeys)
                tmpCitekeys = re.sub("\n", "", tmpCitekeys)
                print(tmpCitekeys)
                listCitekeys = re.split("\,", tmpCitekeys)
                listCitenumbers = []
                for strCitekey in listCitekeys:
                    print(strCitekey)
                    listCitenumbers.append(dictCitekeysNumbers[strCitekey])
                    # Create Text to be used on the website in a popover
                    strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " "
                listCitenumbers = sorted(listCitenumbers, key=int)
                strResult = "[" + listCitenumbers[0]
                intNumberOfSequentialCite = 0
                for i in range(1,len(listCitenumbers)):
                    intPreviousCitenumber = int(listCitenumbers[i-1])
                    intCurrentCitenumber = int(listCitenumbers[i])
                    if i == (len(listCitenumbers)-1):
                        if (intPreviousCitenumber + 1) == intCurrentCitenumber:
                            if intNumberOfSequentialCite == 0:
                                strResult = strResult + "," + str(listCitenumbers[i])
                            else:
                                strResult = strResult + "-" + str(listCitenumbers[i])
                                intNumberOfSequentialCite == 0
                        else:
                            strResult = strResult + "," + str(listCitenumbers[i])
                        break
                    intNextCitenumber = int(listCitenumbers[i+1])
                    if (intCurrentCitenumber + 1) != intNextCitenumber:
                        if intNumberOfSequentialCite != 0:
                            strResult = strResult + "-" + str(intCurrentCitenumber)
                            intNumberOfSequentialCite = 0
                        else:
                            strResult = strResult + "," + str(intCurrentCitenumber)
                        continue
                    if (intPreviousCitenumber + 1) == intCurrentCitenumber:
                        intNumberOfSequentialCite += 1
                        continue
                    else:
                        strResult = strResult + "," + str(intCurrentCitenumber)
                        intNumberOfSequentialCite = 0
                strResult = strResult + "]"
                xmlCitenumeric.text = strResult
                # Create Link to be used for website in a popover
                xmlCitenumeric.set("data-toggle", "popover")
                xmlCitenumeric.set("data-placement", "bottom")
                xmlCitenumeric.set("data-title", " " + strResult)
                xmlCitenumeric.set("data-content", strPopover)
                xmlCitenumeric.set("class","citation")
        intChapterNumber += 1

# here followed the conversion to epub and the conversion to django.xml
# both parts were removed and put into separate files.

ergebnisdatei = open("tmp_files/IntermediateXMLFile.xml", "w")
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode")
ergebnisdatei.write(ergebnis)
ergebnisdatei.close()

data_to_pickle = {'chapterdict' : dictChapters,
                  'eqdict' : dictEquations,
                  'listdict' : dictLists,
                  'theoremdict' : dictTheorems,
                  'figdict' : dictFigures,
                  'secdict' : dictSections,
                  'fndict' : dictFootnotes,
                  'tabdict' : dictTables,
                  'pagelabeldict' : dictPagelabels
                      }

with open('tmp_files/data.pickle', 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL)

print("Removing temporary files.")
cleanup()
print("Done!")
sys.exit()