tei2imxml.py

#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-

"""A converter from TEI to customized DocBook XML.

This program is used to get a TEI XML file into the EOAv1 workflow.
Out of the resulting files, the existing programs can be used to
create the output formats EPUB and Django.
"""

__version__ = "1.0"
__date__ = "20180116"
__author__ = "kthoden@mpiwg-berlin.mpg.de"

from utils.load_config import load_config
import utils.libeoaconvert as libeoaconvert
import utils.bib2html as bib2html

import os
import sys
import logging
import json
import subprocess
import pickle
import shlex
import argparse
import configparser
import bibtexparser
from datetime import datetime
from bs4 import BeautifulSoup
from lxml import etree#, objectify
#from lxml.html import soupparser
from pathlib import Path

# things to be done
# assign ids top to bottom for the following elements:
# div1 div2 div3 note item table EOAfigure EOAequation formula theorem

BIB2HTML_FILENAME = "temp"

BASE_DIR = Path( os.path.realpath(__file__) ).parent
SCRIPT_NAME = Path( __file__).stem

CSV_FILE = os.path.expanduser("hi_figures.csv")

ns_tei = "http://www.tei-c.org/ns/1.0"
NS_MAP = {"t" : ns_tei}

logging.info( "checking executables 'utils.bib2html' needs...:" )
bib2html.check_executables()


def get_publication_info(xml_tree, translation_file):
    """Query the TEI document for metadata fields.

    Return a dictionary"""

    info_dict = {}

    ns_tei = "http://www.tei-c.org/ns/1.0"
    ns_cc = "http://web.resource.org/cc/"
    ns_rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    NS_MAP = {"t" : ns_tei, "c" : ns_cc, "r" : ns_rdf}

    def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=False):
        """Query XML for metadata fields.

        Default behaviour is if it fails, move on, if mandatory is set
        to True, exit the program
        """

        if findall is True:
            find_several = xml_tree.xpath(query_path, namespaces=NS_MAP)
            if len(find_several) > 0:
                if noformat is True:
                    if len(find_several) == 1:
                        return_string = [find_several[0].text]
                    else:
                        return_string = [x.text for x in find_several]
                else:
                    publang = xml_tree.xpath("//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", namespaces=NS_MAP)[0]
                    if len(find_several) == 1:
                        return_string = [format_authors(find_several, publang, xml_tree, translation_file)]
                    else:
                        list_of_formatted_people = []
                        list_of_people = ([x for x in find_several])
                        for person in list_of_people:
                            formatted_person = format_authors([person], publang, xml_tree, translation_file)
                            list_of_formatted_people.append(formatted_person)
                            return_string = list_of_formatted_people
            else:
                return_string = ""
        else:
            tmp_field = xml_tree.xpath(query_path, namespaces=NS_MAP)
            if len(tmp_field) > 0:
                return_string = tmp_field[0]
            else:
                if mandatory is True:
                    sys.exit("Field stored in %s is mandatory. Exiting." % query_path)
                else:
                    return_string = ""

        return return_string
    # def get_field ends here

    # Mandatory values (according to database schema)
    info_dict['eoa_publicationdate'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:date/@when", mandatory=True)
    info_dict['eoa_language'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", mandatory=True)
    info_dict['eoa_license'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:availability/t:licence/@target", mandatory=True)
    info_dict['eoa_number'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:seriesStmt/t:idno[@type='number']/text()", mandatory=True)
    info_dict['eoa_series'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:seriesStmt/t:title/text()", mandatory=True)
    info_dict['eoa_title'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='main']/text()", mandatory=True)

    # Optional (according to database schema)
    info_dict['eoa_subtitle'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='sub']/text()")
    info_dict['eoa_isbn'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='isbn']/text()")
    info_dict['eoa_price'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@type='price']/@quantity")
    info_dict['eoa_pages'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@commodity='pages']/@quantity")
    info_dict['eoa_currency'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@type='price']/@unit")
    info_dict['eoa_shoplink_url'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='shoplink']/text()")
    info_dict['eoa_shoplink_text'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/t:orgName/text()")
    info_dict['eoa_brief_desc'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='brief']/p/text()")
    info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='detailed']/p/text()")
    info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:abstract[@n='additional']/p/text()")
    info_dict['eoa_dedication'] = get_field(xml_tree, "//t:text/t:front/t:div[@type='dedication']/t:ab/text()")

    info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']/@ref", findall=True)
    info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']/@ref", findall=True)
    info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']/@ref", findall=True)
    info_dict['eoa_editorialcoordinators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='editorialcoordinator']/@ref", findall=True)
    info_dict['eoa_copyeditors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='copyeditor']/@ref", findall=True)
    info_dict['eoa_translators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='translator']/@ref", findall=True)
    info_dict['eoa_keywords'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:textClass/t:keywords/t:list/t:item", findall=True, noformat=True)
    info_dict['eoa_authors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:author/@ref", findall=True)
    info_dict['eoa_editors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='volumeeditor']/@ref", findall=True)

    return info_dict
# def get_publication_info ends here

def make_publication_cfg(info_dict, translation_file):
    """Main function"""

    config = configparser.ConfigParser(delimiters=(':'))
    # https://stackoverflow.com/questions/1611799/preserve-case-in-configparser
    config.optionxform=str

    # set up three main bits
    config['Technical'] = {}
    technical_config = config['Technical']
    config['General'] = {}
    general_config = config['General']
    config['Authors'] = {}
    authors_config = config['Authors']

    date_object = datetime.strptime(info_dict['eoa_publicationdate'], "%Y-%m-%d")

    # fill in the fields
    technical_config['Serie'] = info_dict['eoa_series']   #ok
    technical_config['Number'] = info_dict['eoa_number']  #ok
    technical_config['Title'] = info_dict['eoa_title']    #ok
    technical_config['Subtitle'] = info_dict['eoa_subtitle']   #ok
    technical_config['PublicationDate'] = info_dict['eoa_publicationdate'] #ok
    technical_config['PublicationYear'] = datetime.strftime(date_object, "%Y")
    technical_config['ISBN'] = info_dict['eoa_isbn']   #ok
    technical_config['Price'] = "{} {}".format(info_dict['eoa_price'], info_dict['eoa_currency'])
    technical_config['Shoplink'] = """<a href="{0}">{1}</a>""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_text']) #ok
    technical_config['Language'] = info_dict['eoa_language']   #ok
    technical_config['License'] = info_dict['eoa_license'].split("/")[4]     #ok

    general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok
    if info_dict['eoa_submitters'] is not None:
        general_config['Submitter'] = ", ".join(info_dict['eoa_submitters']) #ok
    general_config['PublicationManagment'] = ", ".join(info_dict['eoa_publicationmanagers'])
    general_config['PublicationAssistants'] = ", ".join(info_dict['eoa_publicationassistants'])

    if len(info_dict['eoa_keywords']) > 8:
        sys.exit("Too many Keywords. Up to 8 are allowed. Exiting.")
    else:
        for keyword in info_dict['eoa_keywords']:
            keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1)
            general_config[keyword_label] = keyword

    general_config['DetailedDescription'] = info_dict['eoa_detail_desc'] #ok
    general_config['AdditionalInformation'] = info_dict['eoa_additional_info'] #ok
    general_config['EditorialCoordination'] = ", ".join(info_dict['eoa_editorialcoordinators'])
    general_config['Copyediting'] = ", ".join(info_dict['eoa_copyeditors'])
    general_config['Dedication'] = info_dict['eoa_dedication']   #ok
    general_config['Translator'] = ", ".join(info_dict['eoa_translators'])

    number_of_authors = len(info_dict['eoa_authors'])
    number_of_editors = len(info_dict['eoa_editors'])

    if number_of_authors > 0 and number_of_editors > 0:
        logging.error("Found both editor and authors. This is not permitted. Exiting")
        sys.exit(1)
    elif number_of_authors == 0 and number_of_editors == 0:
        logging.error("Found neither editor nor authors. Please fill in. Exiting")
        sys.exit(1)
    elif number_of_authors > 5 or number_of_editors > 5:
        logging.error("Only a maximum of 5 authors or editors allowed. Exiting")
        sys.exit(1)
    elif number_of_authors == 0 and number_of_editors in range(1,6):
        EDITED_VOLUME = True
    elif number_of_authors in range(1,6) and number_of_editors == 0:
        EDITED_VOLUME = False
    else:
        logging.error("Something went wrong with the number of authors end editors. Please check. Exiting")
        sys.exit(1)

    for entry in range(0, 5):
        author_label = "Author" + str(entry + 1)
        try:
            if EDITED_VOLUME == True:
                authors_config[author_label] = info_dict['eoa_editors'][entry]
                if number_of_editors == 1:
                    authors_config['Zusatz'] = "({})".format(libeoaconvert.translate("editor-abbr", info_dict['eoa_language'], translation_file).capitalize())
                else:
                    authors_config['Zusatz'] = "({})".format(libeoaconvert.translate("editors-abbr", info_dict['eoa_language'], translation_file).capitalize())
            else:
                authors_config[author_label] = info_dict['eoa_authors'][entry]
                authors_config['Zusatz'] = ""
        except IndexError:
            authors_config[author_label] = ""

    return config
# def make_publication_cfg ends here


def check_bibliography(xml_tree):
    """Check TEI header for bibliography data, return relevant data as dictionary."""

    bib_data = {}
    bib_data["source"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibdatabase']/t:ref/@target", namespaces=NS_MAP)[0]
    bib_data["type"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibdatabase']/t:ref/@type", namespaces=NS_MAP)[0]
    logging.info("The bibfile is %s and this publication type is %s." % (bib_data["source"], bib_data["type"]))
    if bib_data["type"] not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]:
        logging.error(f"The bibliography type {bib_data['type']} is not allowed. Exiting")
        sys.exit(1)

    return bib_data
# def check_bibliography ends here


def render_reference(list_of_xml_elements, cited_data):
    """Provide an attribute for a formatted version of Reference.

    This will be used for output formats that don't have a bibliographic
    formatter themselves
    """

    for reference in list_of_xml_elements:
        citekey = reference.xpath("t:ref/@target", namespaces=NS_MAP)[0][1:]

        # here we need to get a formatted version of the entry, like it
        # would appear in the typeset version.
        # looked at: bibulous
        # pandoc-citeproc, maybe

        element = etree.SubElement(reference, "abbr", type="authoryear")
        element.text = cited_data[citekey][1]
        element = etree.SubElement(reference, "abbr", type="year")
        element.text = cited_data[citekey][2]
# def render_reference ends here

def write_citation_markdown(used_citekeys, citations_filename):
    """Write markdown file with citekeys for bibliography rendering"""

    md_file_header = "---\nlang: en\ntitle: Citations\n...\n\n"

    with open(citations_filename, "w") as citation_formatter:
        citation_formatter.write(md_file_header)
        # citation_formatter.write("# Full parentheses\n")
        citation_formatter.write("# citeauthoryear\n")
        for entry in used_citekeys:
            citation_formatter.write("[@%s]\n" % entry)
        citation_formatter.write("\n# citeyear\n")
        for entry in used_citekeys:
            citation_formatter.write("[-@%s]\n" % entry)
        # sentencestyle
        citation_formatter.write("\n# yearparen\n")
        for entry in used_citekeys:
            citation_formatter.write("@%s\n" % entry)
        citation_formatter.write("\n# References\n")

    logging.info(f"Wrote citation formatter: {citations_filename}")
# def write_citation_markdown ends here

def format_reference_list(used_citekeys, html_file):
    """Create an HTML formatted list of references"""

    logging.info("Opening %s", html_file)
    # second part of function
    reference_list = etree.parse(str(html_file))
    references = reference_list.xpath("//div[@class='references']")[0]

    return references
# def format_reference_list ends here

def format_citations_json(used_citekeys, bibdata, html_file):
    """Return a dictionary of the used citations as formatted entries.

    citation_dict[citekey] = (authoryear_citation, year_citation, title)
    """

    with open(html_file, "r") as ding:
        cites = BeautifulSoup(ding, "html.parser")

    citation_dict = {}

    for entry in used_citekeys:
        for entry_2 in bibdata:
            if entry_2["id"] == entry:
                current_citation = entry
                logging.debug(f"""{html_file}: {entry}.""")
                try:
                    strTitle = entry_2["title"]
                except KeyError:
                    logging.warning("No title found for %s", entry)

                title = strTitle
                authoryear_citation = cites.select("#citeauthoryear ~ p > span[data-cites='%s']" % entry)[0].text
                year_citation = cites.select("#citeyear ~ p > span[data-cites='%s']" % entry)[0].text
                citation_dict[entry] = (authoryear_citation, year_citation, title)

    return citation_dict
# def format_citations_json ends here


def format_citations(used_citekeys, bibdata, html_file):
    """Return a dictionary of the used citations as formatted entries.

    citation_dict[citekey] = (authoryear_citation, year_citation, title)
    """

    cites = etree.parse(str(html_file))

    citation_dict = {}

    for entry in used_citekeys:
        if entry in bibdata:
            current_citation = entry
            logging.debug(f"""{html_file}: {entry}.""")
            try:
                strTitle = bibdata[entry]["title"]
            except KeyError:
                logging.warning("No title found for %s", entry)

            title = strTitle
            authoryear_citation = cites.xpath(f"//div[@class='authoryear']/p/span[@data-cites='{entry}']")[0].text
            year_citation = cites.xpath(f"//div[@class='year']/p/span[@data-cites='{entry}']")[0].text
            citation_dict[entry] = (authoryear_citation, year_citation, title)

    return citation_dict
# def format_citations ends here


def format_pagerange(pagerange_start, pagerange_end):
    """Parse values of citedRange attributes. Return formatted string"""

    return_string = ""

    if pagerange_start is not None:
        return_string += pagerange_start
    if pagerange_end is not None:
        return_string += "–" + pagerange_end

    return return_string
# def format_pagerange ends here

def format_authors(list_author_id, publang, xml_tree, translation_file):
    """Retrieve author names from respStmt entries and format them."""

    author_string = ""

    formatted_list = []

    for author in list_author_id:
        tmp_xpath = "//t:respStmt[@xml:id='%s']" % author[1:]
        author_xml = xml_tree.xpath(tmp_xpath, namespaces=NS_MAP)

        surname = author_xml[0].find("t:persName/t:surname", namespaces=NS_MAP).text
        forename = author_xml[0].find("t:persName/t:forename", namespaces=NS_MAP).text

        single_author_string = "{} {}".format(forename, surname)
        formatted_list.append(single_author_string)

    if len(formatted_list) == 1:
        author_string = formatted_list[0]
    elif len(formatted_list) == 2:
        author_string = "{} {} {}".format(formatted_list[0], libeoaconvert.translate("and", publang, translation_file), formatted_list[1])
    elif len(formatted_list) > 2:
        author_string = ", ".join(formatted_list[0:-1])
        if publang == "en":
            author_string += ", {} {}".format(libeoaconvert.translate("and", publang, translation_file), formatted_list[-1])
        else:
            author_string += " {} {}".format(libeoaconvert.translate("and", publang, translation_file), formatted_list[-1])
    return author_string
# def format_authors ends here

def hi_lookup(hi_id):
    """Get hyperimage code from CSV file"""

    import csv

    nd = {}

    logging.debug("Opening %s", CSV_FILE)

    with open(CSV_FILE, newline='') as f:
        reader = csv.DictReader(f, fieldnames = ("checked","id","number","elementstring"))
        jsonStr = json.dumps(list(reader)[1:])
        jsonObj = json.loads(jsonStr)

        for xx in jsonObj[1:]:
            nd[xx["id"]] = {"number": xx["number"], "checked" : xx["checked"], "elementstring" : xx["elementstring"]}

    if nd[hi_id]:
        hi_code = nd[hi_id]["elementstring"]
    else:
        logging.error("Could not find hi code %s", hi_id)

    return hi_code
# def hi_lookup ends here

def transform_body(xml_tree, cited_data, translation_file, publang, hyperimage=False):
    """Transform the body of XML document into IntermediateXML file"""

    def handle_refs_default(ref):
        """Handle refs the normal way"""

        target_attribute = ref.get("target")
        if not target_attribute:
            logging.error("Found a ref element without target. Exiting.")
            sys.exit()
        else:
            url_attribute = ref.get("type")
            if url_attribute == "url":
                del ref.attrib["type"]
                del ref.attrib["target"]
                ref.tag = "xref"
                ref.set("url", target_attribute)
            else:
                ref.tag = "EOAref"
                del ref.attrib["target"]
                etree.SubElement(ref, "ref", teitarget=target_attribute)
                etree.SubElement(ref, "Label").text = target_attribute
        return
    # def handle_refs_default ends here

    def handle_refs_hyperimage(ref):
        """Treat also the special cases of hyperimage refs"""

        logging.info("Found a ref without target, what else have we got?")

        return
    # def handle_refs_hyperimage ends here

    logging.info("Performing XML transformations of the body.")
    ######################
    # Document structure #
    ######################

    # unclean solution
    # chapter_element = xml_tree[0]
    # chapter_element.tag = "div1"
    # chapter_element.set("language", publang)

    eoa_chapters = xml_tree.xpath("//t:div[@type='chapter']", namespaces=NS_MAP)
    for chapter in eoa_chapters:
        chapter.tag = "div1"
        chapter.set("language", publang)

        chapter_title = chapter.find("t:head", namespaces=NS_MAP)

        author_ids = chapter.get("resp")
        if author_ids is not None:
            list_author_id = author_ids.split(" ")
            logging.info("Found chapter author shortcuts: {}.".format(list_author_id))
            if len(list_author_id) > 0:
                author_string = format_authors(list_author_id, publang, xml_tree, translation_file)
                eoa_author = etree.Element("EOAauthor")
                eoa_author.text = author_string
                chapter_title.insert(0, eoa_author)
        else:
            logging.info("No chapter author.")
        chapter.insert(0, chapter_title)

    eoa_sections = xml_tree.xpath("//t:div[@type='section']", namespaces=NS_MAP)
    for section in eoa_sections:
        section.tag = "div2"

    eoa_subsections = xml_tree.xpath("//t:div[@type='subsection']", namespaces=NS_MAP)
    for subsection in eoa_subsections:
        subsection.tag = "div3"

    eoa_subsubsections = xml_tree.xpath("//t:div[@type='subsubsection']", namespaces=NS_MAP)
    for subsubsection in eoa_subsubsections:
        subsubsection.tag = "div4"

    ##############
    # Paragraphs #
    ##############

    eoa_paragraphs = xml_tree.xpath("//t:p[not(@rend='footnote text')]", namespaces=NS_MAP)

    for paragraph in eoa_paragraphs:
        paragraph.tag = "p"

        if paragraph.get("rend") == "Quote":
            paragraph.set("rend", "quoted")

    #############
    # Citations #
    #############

    # we need some data of the references here!
    """
    <!--
  <span rel="popover" class="citation" data-toggle="popover" html="true" data-placement="bottom" data-title="Descartes 1644, 37–44" data-content="Principia philosophiae.">Descartes 1644, 37–44</span>
  -->
Intermediate XML:
<span rel="popover" class="citation" citekey="monti_tradizione_2011" data-toggle="popover" html="true" data-placement="bottom" data-title="Monti " data-content="La tradizione galileiana e lo sperimentalismo naturalistico d&#x2019;Et&#xE0; Moderna. Pratiche, teorie, linguaggi.">Monti </span>

    """

    eoa_citations = xml_tree.xpath("//t:bibl", namespaces=NS_MAP)

    for citation in eoa_citations:
        pagerange = ""
        cited_range = citation.xpath("t:citedRange", namespaces=NS_MAP)
        citeref = citation.xpath("t:ref", namespaces=NS_MAP)
        cite_render = citeref[0].get("type")
        citekey = citeref[0].get("target")[1:]
        citeref[0].tag = "tagtobestripped"

        citation.tag = "span"
        citation.set("rel", "popover")
        citation.set("class", "citation")
        citation.set("citekey", citekey)
        citation.set("data-toggle", "popover")
        citation.set("html", "true")
        citation.set("data-placement", "bottom")

        if len(cited_range) > 0:
            if cited_range[0].text is not None and cited_range[0].get("from") is not None:
                logging.error("You must not use 'from' attribute and text in citedRange at the same time. Exiting.")
                sys.exit(1)
            elif cited_range[0].text is not None:
                # might contain markup!
                pagerange = ", {}".format(cited_range[0].text)
                # clear the text
                cited_range[0].text = ""
            elif cited_range[0].get("from") is not None:
                pagerange_start = cited_range[0].get("from")
                pagerange_end = cited_range[0].get("to")
                pagerange = ", " + format_pagerange(pagerange_start, pagerange_end)
            cited_range[0].tag = "tagtobestripped"

        if cite_render == 'year':
            try:
                formatted_citation = cited_data[citekey][1] + pagerange
            except KeyError:
                logging.error("Citekey %s was not found in the references. Exiting." % citekey)
                sys.exit(1)
        else:
            try:
                formatted_citation = cited_data[citekey][0] + pagerange
            except KeyError:
                logging.error("Citekey %s was not found in the references. Exiting." % citekey)
                sys.exit(1)

        citation.text = formatted_citation
        citation.set("data-title", formatted_citation)
        citation.set("data-content", cited_data[citekey][2])

    #############
    # Footnotes #
    #############

    eoa_footnotes = xml_tree.xpath("//t:note[@place='bottom']", namespaces=NS_MAP)

    """
<note place="bottom" xml:id="ftn2" n="2">
<note id-text="34" id="uid40" place="Inline"><p>One reads</note>
    """

    for footnote in eoa_footnotes:
        # re-assign tag here to get rid of namespace
        footnote.tag = "note"
        footnote.set("place", "Inline")
        footnote.set("id-text",  footnote.get("n"))

        fn_parent = footnote.getparent()
        # we assert here that the parent of a footnote is always a paragraph or a quote
        footnote_id = footnote.xpath("@xml:id")[0]

        # logging.debug(f"The prefix of fn_parent is {fn_parent.prefix}.")

        if fn_parent.prefix is not None:
            fn_parent_tag = fn_parent.tag.replace(fn_parent.prefix, "")
        else:
            fn_parent_tag = fn_parent.tag.replace(f"{{{ns_tei}}}", "")

        if fn_parent_tag not in ["p", "quote", "item"]:
            logging.error(f"The parent of footnote '{footnote_id}' is {fn_parent_tag}. Must be a p, quote or item. Exiting.")
            sys.exit(1)

        fn_paragraphs = footnote.xpath("t:p", namespaces=NS_MAP)
        for fn_paragraph in fn_paragraphs:
            fn_paragraph.tag = "p"
            del fn_paragraph.attrib["rend"]

    ###########
    # Figures #
    ###########

    """
        <figure><graphic url="figures/Fig.3CarceresaccidentalTraceFirenze2017.png"/><head>Latin inscription on a wall in Caceres, Spain. CIL II 697</head></figure>

<EOAfigure id="uid21">
   <anchor id-text="1" id="uid21"/>
   <p>
     <caption>An example of the titles</caption>
     <file>images/Figure1-1_BenedettiSignature.jpg</file>
     <width>60</width>
  </p>
</EOAfigure>

hyperimage
<figure xml:id="chap14_fig2" corresp="#chap14_fig2-hi">

<EOAfigure file="imagesTrnkova_CAS_572.jpg" hielement="ewkJfQoJCV0KCX0KfQ==" width="60px;" order="8" number="14.2">
<caption>The town hall in Prague’s Academy of Sciences</caption>
</EOAfigure>

    """

    figure_counter = 1

    eoa_figures = xml_tree.xpath("//t:figure", namespaces=NS_MAP)

    for figure in eoa_figures:
        # careful, caption can contain markup!
        caption_element = figure.find("t:head", namespaces=NS_MAP)
        figure_type = figure.get("type")
        if figure_type == "hionly":
            pass
        else:
            if caption_element is not None:
                figure.tag = "EOAfigure"
                figure.set("id", "anotheruid")

                anchor_element = etree.SubElement(figure, "anchor")
                # anchor_element.set("id-text", "id-text")

                caption_element.tag = "caption"

                fig_p_element = etree.SubElement(figure, "p")
                figure_file = etree.SubElement(fig_p_element, "file").text = figure.xpath("t:graphic/@url", namespaces=NS_MAP)[0]
                figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever
                fig_p_element.append(caption_element)
            else:
                figure.tag = "EOAfigurenonumber"
                fig_p_element = etree.SubElement(figure, "p")
                figure_file = etree.SubElement(fig_p_element, "file").text = figure.xpath("t:graphic/@url", namespaces=NS_MAP)[0]
                figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever

                # <EOAfigurenonumber><p><file>images/1.jpg</file><width>33</width></p>
            if figure_type == "hitrue":
                # display image in hyperimage viewer, not in lightbox
                # hi_id needs to be looked up in hi_figures.csv
                logging.debug("Found figure for hiviewer.")
                hi_id = figure.attrib["{http://www.w3.org/XML/1998/namespace}id"]
                hi_code = hi_lookup(hi_id)
                figure.set("hielement", hi_code)
            else:
                pass

            etree.strip_elements(figure, "{%s}graphic" % ns_tei)

    ##############
    # Hi-Element #
    ##############
    eoa_hi = xml_tree.xpath("//t:hi", namespaces=NS_MAP)

    for hi in eoa_hi:
        rend_attribute = hi.get("rend")

        if rend_attribute == "italic":
            hi.set("rend", "it")
        elif rend_attribute == "superscript":
            hi.tag = "EOAup"
            del hi.attrib["rend"]
        elif rend_attribute == "subscript":
            hi.tag = "EOAdown"
            del hi.attrib["rend"]
        elif rend_attribute == "bold":
            hi.tag = "EOAbold"
            del hi.attrib["rend"]
        else:
          logging.info("The rend attribute in hi has the value %s. This is not supported" % rend_attribute)

    ##########
    # Tables #
    ##########
    eoa_tables = xml_tree.xpath("//t:table", namespaces=NS_MAP)

    for table in eoa_tables:
        tablechildren = table.findall("t:row", namespaces=NS_MAP)
        table_caption = table.find("t:head", namespaces=NS_MAP)
        number_of_cells = len(table.findall("t:row[1]/t:cell", namespaces=NS_MAP))

        if table_caption is not None:
            table_id = table.attrib["{http://www.w3.org/XML/1998/namespace}id"]
            table.clear()
            table_label = etree.Element("EOAtablelabel")
            table_label.text = table_id
            table.append(table_label)
            table_caption.tag = "EOAtablecaption"
            table.tag = "EOAtable"
        else:
            table.tag = "EOAtablenonumber"
            table.clear()
        # not sure if this is evaluated later.
        table_label = etree.SubElement(table, "EOAtablecolumns").text = "L3cm" * number_of_cells
        real_table_element = etree.SubElement(table, "table")

        if table_caption is not None:
            table.insert(1, table_caption)
            real_table_element.set("place", table_id)
            real_table_element.attrib["{http://www.w3.org/XML/1998/namespace}id"] = table_id
        else:
            pass

        real_table_element.set("rend", "display")
        # attributes id-text and id are assigned later

        for row in tablechildren:
            if row.get("role") == "label":
                tableheader = etree.Element("tableheader")
                tableheader.text = "TRUE"
                row.insert(0, tableheader)
            else:
                pass
            del row.attrib["role"]
            cells = row.findall("t:cell", namespaces=NS_MAP)
            for cell in cells:
                del cell.attrib["role"]
            real_table_element.append(row)

    ##############
    # References #
    ##############
    eoa_ref = xml_tree.xpath("//t:body//t:ref", namespaces=NS_MAP)

    for ref in eoa_ref:
        ref_parent = ref.getparent()
        if ref_parent == "bibl":
            continue
        else:
            if hyperimage:
                handle_refs_hyperimage(ref)
            else:
                handle_refs_default(ref)

    return xml_tree
# def transform_body ends here

def assign_ids(xml_tree, data):
    """Walk the xml tree again. Assign ids to xml and put them into dicts, as well."""

    chapterdict = {}
    figdict = {}
    eqdict = {}
    fndict = {}
    listdict = {}
    pagelabeldict = {}
    secdict = {}
    tabdict = {}
    theoremdict = {}

    chapter_counter = 1
    xml_chapters = xml_tree.xpath("//div1")
    for chapter in xml_chapters:
        equation_counter = 1
        footnote_counter = 1
        list_counter = 1
        figure_counter = 1
        section_counter = 1
        table_counter = 1
        theorem_counter = 1

        if chapter.get('rend') != "nonumber":
            chapter.set("id-text", str(chapter_counter))
            chapterdict[chapter.get("id")] = str(chapter_counter)

        figure_anchors = chapter.findall(".//EOAfigure/anchor")
        for anchor in figure_anchors:
            figure_number = "%d.%d" % (chapter_counter, figure_counter)

            anchor.set("id-text", figure_number)
            figure_counter += 1

            figure_element = anchor.getparent()
            figure_element.set("id", anchor.get("id"))
            figdict[anchor.get("id")] = figure_number

        footnotes = chapter.findall(".//note")
        for footnote in footnotes:
            fndict[footnote.get("id")] = footnote.get("n")

        sections = chapter.findall(".//div2")
        for section in sections:

            if section.get('rend') != "nonumber":
                section_number = "%d.%d" % (chapter_counter, section_counter)
                section.set("id-text", section_number)
                secdict[section.get("id")] = section_number

                subsection_counter = 1
                subsections = section.findall(".//div3")
            for subsection in subsections:
                if subsection.get('rend') != "nonumber":
                    subsection_number = "%d.%d.%d" % (chapter_counter, section_counter, subsection_counter)
                    subsection.set("id-text", subsection_number)
                    secdict[subsection.get("id")] = subsection_number
                    subsection_counter += 1

            section_counter += 1

        tables = chapter.findall(".//EOAtable/table")
        for table in tables:
            table_number = "{}.{}".format(chapter_counter, table_counter)
            table.attrib["id-text"] = table_number
            # table.set("id-text", table_number)
            table_counter += 1
            tabdict[table.get("id")] = table_number

        chapter_counter += 1

    # not implemented yet: equation, list, pagelabel, tab, theorem

    data["chapterdict"] = chapterdict
    data["figdict"] = figdict
    data["eqdict"] = eqdict
    data["fndict"] = fndict
    data["listdict"] = listdict
    data["pagelabeldict"] = pagelabeldict
    data["secdict"] = secdict
    data["tabdict"] = tabdict
    data["theoremdict"] = theoremdict

    return xml_tree, data
# def assign_ids ends here

def update_ids(xml_tree, ignore_ref_errors):
    """Update the references in EOAref to the id value assigned in assign_ids"""

    xmlReferences = xml_tree.findall(".//EOAref")
    logging.debug("Found %d references", len(xmlReferences))

    for xmlReference in xmlReferences:
        eoa_reference = xmlReference.find("ref")

        label_text = xmlReference.find("Label").text[1:]
        logging.debug("label text is %s" % label_text)

        # if label_text.endswith("-hi"):
        #     logging.debug("%s is a hyperimage reference. Leaving out for now." % label_text)
        #     pass
        # else:
        corresponding_eoa_id_element = xml_tree.xpath("//*[@xml:id='{}']".format(label_text))
        logging.debug("The corresponding id element is %s", corresponding_eoa_id_element)
        # if corresponding_eoa_id_element is None:
        if len(corresponding_eoa_id_element) == 0:
            if ignore_ref_errors:
                logging.warning(f"Found no corresponding xml:id for {label_text}. Ignoring it for now.")
                eoa_reference.set("target", "??")
            else:
                logging.error("There seems to be no corresponding xml:id for %s. Exiting." % label_text)
                sys.exit(1)
        elif len(corresponding_eoa_id_element) > 1:
            if ignore_ref_errors:
                pass
            else:
                logging.error("The xml:id %s has been assigned more than once. This is not allowed. Exiting." % corresponding_eoa_id_element)
                sys.exit(1)
        else:
            eoa_id_element = corresponding_eoa_id_element[0]
            eoa_id = eoa_id_element.get("id")
            eoa_reference.set("target", eoa_id)

    return xml_tree
# def update_ids ends here


def get_all_citations(xml_file):
    """Retrieve citations from file """

    all_citations = xml_file.xpath("//t:bibl/t:ref", namespaces=NS_MAP)

    all_citekeys = []

    for citation in all_citations:
        citekey = citation.get("target")[1:]
        if citekey not in all_citekeys:
            all_citekeys.append(citekey)

    return  all_citekeys
# def get_all_citations ends here


def get_citations_per_chapter(xml_tree):
    """If publication is anthology, store which citations are mentioned in each chapter:

  'chap18_schwartz': {'Blodget_1857', 'CliffordMarcus_1986',
  'Hunter_2004', 'MarcusFischer_1986', 'Mitchell_1992', 'Nye_1994',
  'Schlereth_1980', 'Schwartz_2003', 'Schwartz_2011'}}

    """

    refs_per_chapter = {}

    all_chapters = xml_tree.xpath("//t:div[@type='chapter']", namespaces=NS_MAP)
    logging.info(f"Found {libeoaconvert.plural(len(all_chapters), 'chapter')}.")

    for chapter in all_chapters:
        try:
            chapter_id = chapter.xpath("@xml:id", namespaces=NS_MAP)[0]
        except IndexError:
            logging.error(f"Found a chapter without identifier. Each chapter must have one. Exiting.")
            sys.exit(1)
        all_refs_with_hash = chapter.xpath(".//t:bibl/t:ref/@target", namespaces=NS_MAP)
        all_refs = [x[1:] for x in all_refs_with_hash]
        logging.info(f"Found {libeoaconvert.plural(len(all_refs), 'reference')} in this chapter.")
        refs_per_chapter[chapter_id] = set(all_refs)

    return refs_per_chapter
# def get_citations_per_chapter ends here

def convert_bibliography_to_json(bib_data, TEMP_DIR):
    """Create a JSON version of bibliography data, using pandoc-citeproc"""

    interim_bib_json_file = Path(TEMP_DIR) / "tmp-bib.json"
    citeproc_command = "pandoc-citeproc --bib2json  %s" % bib_data["source"]
    citeproc_arguments = shlex.split(citeproc_command)
    citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE)
    citeproc_json = citeproc_process.stdout.read()
    citations_json = json.loads(citeproc_json)

    with open(interim_bib_json_file, 'w') as json_file:
        json_file.write(citeproc_json.decode('utf-8'))

    logging.info(f"Wrote bibliography as interim json file: {interim_bib_json_file}.")

    return citations_json
# def convert_bibliography_to_json ends here


def convert_bibliography_to_dict(bib_data):
    """Create a dictionary from bibliography data."""

    parser = bibtexparser.bparser.BibTexParser()
    # be a bit lax about nonstandard entry types
    parser.ignore_nonstandard_types = False

    bibliography_dict = {}

    with open(Path(bib_data["source"])) as btf:
        btb = bibtexparser.load(btf, parser=parser)
        bibliography_dict = btb.entries_dict

    return bibliography_dict
# def convert_bibliography_to_dict ends here


def make_bibliography_pandoc(used_citekeys, bib_data, citations_json, output_file_root, CSL_FILE):
    """Create the HTML version of the bibliography using pandoc

    Return the filename of HTML file.
    """

    citations_filename_html = Path(output_file_root).with_suffix(".html")
    citations_filename_markdown = Path(output_file_root).with_suffix(".md")
    write_citation_markdown(used_citekeys, citations_filename_markdown)
    markdown_command = "pandoc -o  %s -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (citations_filename_html, bib_data["source"], CSL_FILE, citations_filename_markdown)
    arguments = shlex.split(markdown_command)
    logging.info("Using external command pandoc: %s." % markdown_command)
    subprocess.call(arguments)
    logging.info("Finished processing the bibtex file.")
    logging.info(f"Wrote {citations_filename_html}.")

    return citations_filename_html
# def make_bibliography_pandoc ends here


def make_bibliography_tex4ht(used_citekeys, bib_data, output_file_root, publication_language, TEMP_DIR, log_dir):
    """Create the HTML version of the bibliography using tex4ht

    Return the filename of the HTML file
    """

    translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}

    citations_filename_html = Path(output_file_root).with_suffix(".html")

    formatted_bibl_info = bib2html.main(
        bib_file = Path(bib_data["source"]),
        citekeys = used_citekeys,
        tex_template = BASE_DIR / "data" / "aux" / "bibliography4ht.tex",
        language = translations[publication_language],
        temp_dir = TEMP_DIR,
        output_file = citations_filename_html,
        log_dir = log_dir,
        keywords = [""]
        # keywords = keyword_to_print_bibl_el.keys()
        )

    return citations_filename_html
# def make_bibliography_tex4ht ends here


def add_bibliography_monograph(xml_tree, refs_for_bib_chapter):
    """Add another chapter containing the bibliography."""

    root_element = xml_tree.getroot()

    xml_chapters = root_element.xpath("//div1")
    number_of_chapters = len(xml_chapters)
    bibliography_chapter = etree.Element("div1", rend="nonumber", language="english")
    # this needs to be configurable by language
    bib_head = etree.SubElement(bibliography_chapter, "head").text = "Bibliography"
    bib_div_1 = etree.SubElement(bibliography_chapter, "div")
    bib_div_2 = etree.SubElement(bib_div_1, "div")

    entries = refs_for_bib_chapter.findall(".//div")

    for entry in entries:
        entry_id = entry.get("id")
        entry.set("class", "bibliography")
        etree.strip_tags(entry, "p")
        entry.tag = "p"
        internal_markup = entry.findall(".//em")
        for markup in internal_markup:
            markup.tag = "i"

        bib_div_2.append(entry)

    root_element.insert(number_of_chapters + 1, bibliography_chapter)

    return root_element
# def add_bibliography_monograph ends here

def add_bibliography_anthology(xml_tree, formatted_references_dict):
    """Add another chapter containing the bibliography."""

    for chapter in formatted_references_dict.keys():
        chapter_id = chapter.replace("dict_", "")
        # print("looking at", chapter_id, formatted_references_dict[chapter])

        # tmp_xpath_ns = "//t:div1[@xml:id='%s']//processing-instruction('eoa')" % chapter_id
        tmp_xpath = "//div1[@xml:id='%s']//processing-instruction('eoa')" % chapter_id
        # print(tmp_xpath)

        # eoa_pis_ns = xml_tree.xpath(tmp_xpath, namespaces=NS_MAP)
        eoa_pis = xml_tree.xpath(tmp_xpath)
        # eoa_pis = xml_tree.xpath("//processing-instruction('eoa')")
        # eoa_pis = xml_tree.xpath("//t:div1[@xml:id='chap17_riggs']//processing-instruction('eoa')", namespaces=NS_MAP)
        # print(eoa_pis)
        for eoa_pi in eoa_pis:
            if eoa_pi.text == "printbibliography":
                # print("ok cool, printbibliography")
                # assuming there's only one
                bibliography_parent = eoa_pi.getparent()

        fixed_references = fix_bib_entries(formatted_references_dict[chapter])

        """
<div1 rend="nonumber" language="english"><head>Bibliography</head>
<div><div><p id="ref-adami_storia_1737" class="bibliography">
Adami, Andrea (1737). <i>Storia Di Volseno Antica Metropoli Della Toscana Descritta in Quattro Libri</i>. Vol. I. IV vol. In Roma: Per Antonio de’ Rossi, nella Strada del Seminario Romano.
</p>
        """
        extra_div = etree.SubElement(bibliography_parent, "div")
        extra_div.insert(1, fixed_references)
        # bibliography_parent.insert(1, formatted_references_dict[chapter])
        bibliography_parent.remove(eoa_pi)

    # xml_chapters = xml_tree.xpath("//t:div1", namespaces=NS_MAP)
    # xml_chapters_re = root_element.xpath("//t:div1", namespaces=NS_MAP)
    # xml_chapters = xml_tree.xpath("//div1")
    # yyy.xpath("//t:div1[@xml:id='chap01_caraffa']//processing-instruction('eoa')", namespaces=NS_MAP)
    # for chapter in xml_chapters:
    #     chapter_id = chapter.xpath("@xml:id")

    #     eoa_pi = chapter.xpath("//processing-instruction('eoa')")
    #     print("Looking at", chapter_id, eoa_pi)

    return xml_tree
# def add_bibliography_anthology ends here

def fix_bib_entries(div_snippet):
    """Modify the html code returned by pandoc-citeproc"""

    entries = div_snippet.findall(".//div")

    for entry in entries:
        entry_id = entry.get("id")
        entry.set("class", "bibliography")
        etree.strip_tags(entry, "p")
        entry.tag = "p"
        internal_markup = entry.findall(".//em")
        for markup in internal_markup:
            markup.tag = "i"

    return div_snippet
# def fix_bib_entries ends here

def main():
    """Main function"""

    # parse args:
    parser = argparse.ArgumentParser()
    parser.add_argument(
            "--log-dir",
            default = Path("output/logs"),
            # default = Path("logs", SCRIPT_NAME).with_suffix(".log"),
            help="logfile"
    )
    parser.add_argument(
            "-c", "--config",
            dest="CONFIG_FILE",
            default = BASE_DIR / "config" / "eoaconvert.cfg",
            help="Name of configuration file",
            metavar="CONFIGURATION"
    )
    parser.add_argument(
            "-l", "--log-file",
            default = SCRIPT_NAME + ".log" ,
            help="logfile"
    )
    parser.add_argument(
            "--log-level",
            default = "DEBUG",
            help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
    )
    parser.add_argument(
            "-f", "--filename",
            required = True,
            help="TEI XML file to convert into DocBook XML."
    )
    parser.add_argument(
            "-o", "--output-dir",
            default = "./output/imxml",
            help="where to dump all output files"
    )
    parser.add_argument(
            "-i", "--ignore-ref-errors",
            action="store_true",
            help="Ignore warnings of missing or duplicate ids."
    )

    parser.add_argument("-d", "--pickleddata", default="./output/imxml/tmp_files/data.pickle", help="Pickled data file to be used.")
    parser.add_argument("-him", "--hyperimage", action="store_true")
    args = parser.parse_args()
    config_file = args.CONFIG_FILE
    print("The config file is ", config_file)

    INPUT_DIR = Path( args.filename ).resolve().parent
    INPUT_PATH = Path( args.filename )
    OUTPUT_DIR = Path( args.output_dir )
    LOG_DIR = Path( args.log_dir )

    TEMP_DIR = OUTPUT_DIR / "tmp_files"
    DEBUG_DIR = OUTPUT_DIR / "debug"

    # where to output the xml file:
    XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" )

    CONFIG = load_config(
        args.CONFIG_FILE,
        args.log_level,
        (Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ),
        args.log_file,
    )

    if not os.path.exists(OUTPUT_DIR):
        os.mkdir( OUTPUT_DIR )
    if not os.path.exists(TEMP_DIR):
        os.mkdir( TEMP_DIR )
    if not os.path.exists( DEBUG_DIR ):
        os.mkdir( DEBUG_DIR  )

    try:
        with open(args.pickleddata, 'rb') as f:
            data = pickle.load(f)
    except FileNotFoundError:
        logging.error("File 'data.pickle' not found. You should run 'fix_tei.py' first. Exiting.")
        sys.exit(1)

    TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS']
    CSL_FILE = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE']

    xml_tree = etree.parse(args.filename)

    publication_language = xml_tree.xpath("//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", namespaces=NS_MAP)[0]

    bib_data = check_bibliography(xml_tree)

    # citations_json = convert_bibliography_to_json(bib_data, TEMP_DIR)
    citations_dict = convert_bibliography_to_dict(bib_data)

    logging.debug("Creating bibliographies.")
    if bib_data["type"] == "monograph":
        used_citekeys = get_all_citations(xml_tree)
        citations_filename_root = Path(TEMP_DIR, "formatted_citations_monograph")
        # citations_filename_html = make_bibliography_pandoc(used_citekeys, bib_data, citations_json, citations_filename_root, CSL_FILE)
        citations_filename_html = make_bibliography_tex4ht(used_citekeys, bib_data, citations_filename_root, publication_language, TEMP_DIR, LOG_DIR)
        logging.info("Formatting citations now.")
        cited_dict = format_citations(used_citekeys, citations_dict, citations_filename_html)
        refs_for_bib_chapter = format_reference_list(used_citekeys, citations_filename_html)
    elif bib_data["type"] == "anthology":
        citations_per_chapter = get_citations_per_chapter(xml_tree)
        formatted_references_dict = {}
        all_chapter_ids = xml_tree.xpath("//t:div[@type='chapter']/@xml:id", namespaces=NS_MAP)
        cited_dict = {}

        for chapter_id in all_chapter_ids:
            used_citekeys_per_chapter = citations_per_chapter[chapter_id]
            logging.debug(f"{len(used_citekeys_per_chapter)} citations in this chapter")
            if not used_citekeys_per_chapter:
                logging.debug("No citations found, advancing to next chapter.")
                continue
            else:
                citations_filename_root = Path(TEMP_DIR, f"formatted_citations_{chapter_id}")
                # citations_filename_html_per_chapter = make_bibliography_pandoc(used_citekeys_per_chapter, bib_data, citations_json, citations_filename_root, CSL_FILE)
                citations_filename_html_per_chapter = make_bibliography_tex4ht(used_citekeys_per_chapter, bib_data, citations_filename_root, publication_language, TEMP_DIR, LOG_DIR)

                logging.info("Formatting citations now.")
                cited_dict_per_chapter = format_citations(used_citekeys_per_chapter, citations_dict, citations_filename_html_per_chapter)
                # Merge dictionaries
                cited_dict = {**cited_dict, **cited_dict_per_chapter}

                refs_for_bib_chapter = format_reference_list(used_citekeys_per_chapter, citations_filename_html_per_chapter)
                tmp_dict_key = "dict_" + chapter_id
                # create a dictionary entry containing the formatted references
                formatted_references_dict[tmp_dict_key] = refs_for_bib_chapter
                logging.debug(f"cited_dict now has {libeoaconvert.plural(len(cited_dict), 'entry', plural='entries')}.")

    tei_body = xml_tree.xpath("//t:body", namespaces=NS_MAP)[0]
    if args.hyperimage:
        logging.info("Transforming body with Hyperimage support")
    else:
        pass
    body_transformed_tmp = transform_body(tei_body, cited_dict, TRANSLATION_FILE, publang=publication_language, hyperimage=args.hyperimage)
    libeoaconvert.debug_xml_here(body_transformed_tmp, "body_transformed", DEBUG_DIR)
    body_transformed = etree.ElementTree(body_transformed_tmp)

    if bib_data["type"] == "monograph":
        xml_add_bib = add_bibliography_monograph(body_transformed, refs_for_bib_chapter)
    elif bib_data["type"] == "anthology":
        xml_add_bib = add_bibliography_anthology(body_transformed, formatted_references_dict)

    etree.strip_tags(xml_add_bib, "tagtobestripped")
    libeoaconvert.debug_xml_here(xml_add_bib, "xml_add_bib", DEBUG_DIR)

    elements_with_ids = xml_add_bib.xpath("//div1 | //div2 | //div3 | //note | //item | //table | //EOAfigure/anchor | //EOAequation | //formula | //theorem")
    element_counter = 1
    for element in elements_with_ids:
        element.set("id", "uid" + str(element_counter))
        element_counter += 1

    assigned_ids, data_to_pickle = assign_ids(xml_add_bib, data)

    updated_xml_tree = update_ids(assigned_ids, args.ignore_ref_errors)
    # libeoaconvert.debug_xml_here(updated_xml_tree, "updated_tree")
    # nearly_final_tree = etree.ElementTree(updated_xml_tree)
    # xml_root = nearly_final_tree.getroot()

    xml_root = updated_xml_tree.getroot()

    xml_root.tag = "Book"

    final_tree = etree.ElementTree(xml_root)
    # objectify.deannotate(final_tree, cleanup_namespaces=True)
    # etree.cleanup_namespaces(xml_root)

    with open(OUTPUT_DIR / 'tmp_files/data.pickle', 'wb') as f:
        # Pickle the 'data' dictionary using the highest protocol available.
        pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL)

    publication_info = get_publication_info(xml_tree, TRANSLATION_FILE)
    config_data = make_publication_cfg(publication_info, TRANSLATION_FILE)

    output_filename = OUTPUT_DIR / "publication.cfg"
    with open(output_filename, 'w') as configfile:
        config_data.write(configfile)

    logging.info(f"Wrote {output_filename}.")

    output_filename = str(OUTPUT_DIR / "IntermediateXMLFile.xml")

    final_tree.write(output_filename, pretty_print=True, xml_declaration=True, encoding="utf-8")
    logging.info(f"Wrote {output_filename}.")

    # Remove namespace info (brute force solution)
    bad_ns_string =  ' xmlns="http://www.tei-c.org/ns/1.0"'
    with open(output_filename, 'r') as textfile:
        xml_as_string = textfile.read()

    removed_namespace = xml_as_string.replace(bad_ns_string, "")

    with open(output_filename, 'w') as amended_textfile:
        amended_textfile.write(removed_namespace)
# def main ends here

if __name__ == '__main__':

    # run main:
    main()

# finis