From db30e89a159af608d1957233d0171e1b20416cf0 Mon Sep 17 00:00:00 2001
From: EsGeh <SamuelGfroerer@googlemail.com>
Date: Tue, 23 Apr 2019 16:14:14 +0200
Subject: [PATCH 1/7] fixed html bibliography generation: output should be the
 same as before.

---
 eoatex2imxml.py   |  3 +--
 utils/bib2html.py | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/eoatex2imxml.py b/eoatex2imxml.py
index f5255b3..aaf8c3c 100755
--- a/eoatex2imxml.py
+++ b/eoatex2imxml.py
@@ -1195,7 +1195,6 @@ def insert_bibliographies(
                 citations_json,
                 citekeys
         )
-        # use language of the first chapter:
         formatted_bibl_info = bib2html.main(
             bib_file = bib_file,
             citekeys = citekeys,
@@ -1402,7 +1401,6 @@ def add_bibliography_to_xml(
 if bibl_info is None:
     logging.warning("No bibliography database found.")
 else:
-
     (bib_type, bib_database) = bibl_info
     logging.debug(f"bib type is {bib_type}")
 
@@ -1426,6 +1424,7 @@ def add_bibliography_to_xml(
     if bib_type == "monograph":
         keyword_to_print_bibl_el = insert_bibliographies(
                 xmlTree,
+                # use language of the first chapter:
                 xmlChapters[0].get( "language" ),
                 citations_json,
                 ## paths:
diff --git a/utils/bib2html.py b/utils/bib2html.py
index efc1d10..b254c7f 100755
--- a/utils/bib2html.py
+++ b/utils/bib2html.py
@@ -42,7 +42,7 @@ def transform_reference(reference_element, dialect='html'):
     """Formatting transformation for reference element"""
 
     string_from_xml = etree.tostring(reference_element).decode('utf-8')
-    removed_linebreak = string_from_xml.replace("\n", "")
+    removed_linebreak = string_from_xml.replace("\n", " ")
     removed_namespace = removed_linebreak.replace('<p xmlns="http://www.w3.org/1999/xhtml" class="noindent">', '<p>')
     cleaned_element = etree.fromstring(removed_namespace)
 
@@ -269,6 +269,7 @@ def main(
             chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
         else:
             chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword = keyword )
+        # '<dl class="thebibliography"> ... </dl>
         bibliography_el = xml_tree.xpath(
                 f"//x:body/x:p[text() = '{chapter_heading}']/following-sibling::x:dl[1]",
                 namespaces = NS_MAP
@@ -276,16 +277,21 @@ def main(
         if( len(bibliography_el) != 1 ):
             logging.error( f"error parsing bibliography with keyword '{keyword}'" )
             sys.exit( 1 )
-        bibliographies_dict[keyword] =  bibliography_el[0]
+        bibliography_el = bibliography_el[0]
 
-    xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0]
-    reference_list = xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0]
-    reference_div = create_reference_list(reference_list)
+        reference_div = create_reference_list(bibliography_el)
+        bibliographies_dict[keyword] = reference_div
 
     html_element = etree.Element("html")
     html_element.insert(0, citation_authoryear)
     html_element.insert(1, citation_year)
-    html_element.insert(2, reference_div)
+    for keyword in keywords:
+        bibl_el = etree.SubElement(
+                html_element,
+                "div",
+                **({} if keyword == "" else { 'keyword': keyword } )
+        )
+        bibl_el.append( bibliographies_dict[keyword] )
 
     # print(etree.tostring(html_element))
 

From 57587d37d6d8d6b74a77624057c6d40c1da6187a Mon Sep 17 00:00:00 2001
From: EsGeh <SamuelGfroerer@googlemail.com>
Date: Tue, 23 Apr 2019 16:28:58 +0200
Subject: [PATCH 2/7] some citations were chopped, should be ok now

---
 eoatex2imxml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eoatex2imxml.py b/eoatex2imxml.py
index aaf8c3c..36a72d9 100755
--- a/eoatex2imxml.py
+++ b/eoatex2imxml.py
@@ -1543,7 +1543,7 @@ def add_bibliography_to_xml(
                 if xmlCitation.tag == "EOAciteauthoryear":
                     strCitation = citeauthoryear_value
                 elif xmlCitation.tag == "EOAciteyear":
-                    strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1]
+                    strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text
                 elif xmlCitation.tag == "EOAcitemanual":
                     cite_text = xmlCitation.find("citetext")
                     if cite_text.getchildren():

From 812b7e5951caf5037392fb4fd8fe5528943182f4 Mon Sep 17 00:00:00 2001
From: EsGeh <SamuelGfroerer@googlemail.com>
Date: Tue, 23 Apr 2019 16:30:07 +0200
Subject: [PATCH 3/7] cleaning up dead commented code

---
 utils/bib2html.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/utils/bib2html.py b/utils/bib2html.py
index b254c7f..f0cbce3 100755
--- a/utils/bib2html.py
+++ b/utils/bib2html.py
@@ -293,8 +293,6 @@ def main(
         )
         bibl_el.append( bibliographies_dict[keyword] )
 
-    # print(etree.tostring(html_element))
-
     tree = etree.ElementTree(html_element)
     logging.info("writing '%s'" % output_file)
     tree.write(str(output_file), pretty_print=True, xml_declaration=True, encoding="utf-8")

From 593bb1214b140212a5016e75409c650298f2aedc Mon Sep 17 00:00:00 2001
From: EsGeh <SamuelGfroerer@googlemail.com>
Date: Tue, 23 Apr 2019 17:07:46 +0200
Subject: [PATCH 4/7] dummy cover generation script picks an image with the
 right file suffix

---
 mkimage.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/mkimage.py b/mkimage.py
index bbfcb98..11ad51d 100755
--- a/mkimage.py
+++ b/mkimage.py
@@ -38,21 +38,20 @@
 def get_cover_image(image_path):
     """Choose a random landscape image from publications in this volume"""
 
-    import random
-
-    candidates = os.listdir(image_path)
-
+    import random, glob
+    extensions = ("png", "jpg")
+    candidates = []
+    for extension in extensions:
+        path = f"{image_path}/**/*.{extension}"
+        candidates.extend(
+            glob.glob( path, recursive = True)
+        )
     for image in candidates:
-        if image == ".DS_Store":
-            candidates.remove(image)
-            continue
-        tmp_image = Image.open(image_path + "/" + str(image))
+        tmp_image = Image.open(str(image))
         ratio = calculate_ratio(tmp_image)
         if ratio < 1:
             candidates.remove(image)
-
     chosen_image = random.choice(candidates)
-
     return chosen_image
 # def get_cover_image ends here
 
@@ -209,7 +208,7 @@ def create_cover(metadata_dict, image_directory, cover_filename, image_is_file):
     text_draw.multiline_text((ptcenter,DIMENSIONS[1]-400), press_text_joined, font=small_font, align="center")
 
     if image_is_file == False:
-        image_on_cover = Image.open(os.path.join(image_directory, get_cover_image(image_directory)))
+        image_on_cover = Image.open(get_cover_image(image_directory))
     else:
         image_on_cover = Image.open(image_directory)
 

From 5db6ad0729f9745f03d8224bae0a4c2f9d7db429 Mon Sep 17 00:00:00 2001
From: EsGeh <SamuelGfroerer@googlemail.com>
Date: Tue, 23 Apr 2019 18:50:34 +0200
Subject: [PATCH 5/7] fixed and simplified imxml2django facsimile handling

---
 imxml2django.py        |  2 +-
 utils/libeoaconvert.py | 63 ++++++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/imxml2django.py b/imxml2django.py
index 31e9272..82a009c 100755
--- a/imxml2django.py
+++ b/imxml2django.py
@@ -1127,7 +1127,7 @@ def check_publication_cfg(configuration_file):
         strImageFileDir = re.sub("/", "", strImageFileDir)
         strImageFileName = os.path.basename(strImageFile)
         shutil.copy(
-                INPUT_DIR / strImageFile,
+                PUBLICATION_DIR / strImageFile,
                 CONVERT_DIR / "django/images" / (strImageFileDir + strImageFileName)
         )
         # shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
diff --git a/utils/libeoaconvert.py b/utils/libeoaconvert.py
index 6ffdd92..ce10d8f 100644
--- a/utils/libeoaconvert.py
+++ b/utils/libeoaconvert.py
@@ -71,34 +71,33 @@ def sanitizeImage(
 ):
     """Adjust and convert image for epub standard"""
 
-    if not os.path.exists(Path(tmp_dir) / "tmp_images/"):
-        os.makedirs(os.path.expanduser(Path(tmp_dir) / "tmp_images/"))
+    tmp_dir = Path( tmp_dir )
+    strImagepath = Path( strImagepath )
+    if not (tmp_dir / "tmp_images").exists():
+        os.makedirs(tmp_dir / "tmp_images/")
 
-    tmp_image_dir = Path(tmp_dir) / "tmp_images/"
-    xelatex_sanitizeimage_logfile = open( Path(tmp_dir) / 'xelatex-run-images.log', 'w')
+    tmp_image_dir = tmp_dir / "tmp_images"
 
     logging.debug(strImagepath)
-    strCommand = GM_PATH + " identify -format \"%w\" " + str(strImagepath)
-    listArguments = shlex.split(strCommand)
-    exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
-    intImageWidth = int(exeShell)
+    intImageWidth = int(subprocess.check_output(
+        shlex.split( f"{GM_PATH} identify -format \"%w\" {strImagepath}" ),
+        universal_newlines=True
+    ))
     if intImageWidth > 700:
-        strCommand = GM_PATH + " convert " + str(strImagepath) + " -resize 700x\\> " + str(strImagepath)
-        listArguments = shlex.split(strCommand)
-        subprocess.check_output(listArguments, shell=False)
-    strCommand = GM_PATH + " identify -format \"%h\" " + str(strImagepath)
-    listArguments = shlex.split(strCommand)
-    exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
-    intImageHeight = int(exeShell)
+        exec_command(
+            f"{GM_PATH} convert {strImagepath} -resize 700x\\> {strImagepath}"
+        )
+    intImageHeight = int( subprocess.check_output(
+        shlex.split( f"{GM_PATH} identify -format \"%h\" {strImagepath}" ),
+        universal_newlines=True
+    ))
     if intImageHeight > 1000:
-        strCommand = GM_PATH + " convert " + str(strImagepath) + " -resize x1000\\> " + str(strImagepath)
-        listArguments = shlex.split(strCommand)
-        subprocess.check_output(listArguments, shell=False)
-    strCommand = GM_PATH + " identify -format \"%m\" " + str(strImagepath)
-    listArguments = shlex.split(strCommand)
-    exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
-    strFileFormat = str(exeShell)
-    strFileFormat = strFileFormat.strip()
+        exec_command(
+            f"{GM_PATH} convert {strImagepath} -resize x1000\\> {strImagepath}"
+        )
+    strFileFormat = str( subprocess.check_output(
+        shlex.split( f"{GM_PATH} identify -format \"%m\" {strImagepath}" )
+    )).strip()
     if strFileFormat == "JPEG":
         pass
         # print("looking at jpeg file")
@@ -119,17 +118,15 @@ def sanitizeImage(
         # strImagepath = strNewImagepath + ".png"
     elif strFileFormat == "PDF":
         strNewImagepath = os.path.splitext(str(strImagepath))[0]
-        clipped_file = strImagepath.replace(".pdf", "-clipped.pdf")
-
-        Kommando = PDFCROP_EXEC + " --margins 10 --clip --hires " + str(strImagepath) + " " + clipped_file
-        logging.debug(Kommando)
+        clipped_file = str(strImagepath).replace(".pdf", "-clipped.pdf")
 
-        Argumente = shlex.split(Kommando)
-        subprocess.call(Argumente, cwd=tmp_image_dir, stdout=xelatex_sanitizeimage_logfile)
-
-        strCommand = GM_PATH + " convert -density 400 " + clipped_file + " " + strNewImagepath + ".png"
-        listArguments = shlex.split(strCommand)
-        subprocess.call(listArguments)
+        exec_command(
+            f"{PDFCROP_EXEC} --margins 10 --clip --hires {strImagepath} {clipped_file}",
+            # wd = tmp_image_dir
+        )
+        exec_command(
+            f"{GM_PATH} convert -density 400 {clipped_file} {strNewImagepath}.png"
+        )
         logging.debug("Removing two files: %s and %s " % (clipped_file, strImagepath))
         os.remove(clipped_file)
         os.remove(strImagepath)

From d22976608b482401c7583b09b7184884eb40ba19 Mon Sep 17 00:00:00 2001
From: EsGeh <SamuelGfroerer@googlemail.com>
Date: Wed, 24 Apr 2019 17:12:27 +0200
Subject: [PATCH 6/7] fix non-ascii problems with bibliography representing
 them as xml entities

---
 bibformat/4ht/bibliography4ht.tex |  5 +++
 utils/bib2html.py                 | 54 ++++++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/bibformat/4ht/bibliography4ht.tex b/bibformat/4ht/bibliography4ht.tex
index 15cc7b6..9d60796 100644
--- a/bibformat/4ht/bibliography4ht.tex
+++ b/bibformat/4ht/bibliography4ht.tex
@@ -91,6 +91,11 @@
 \bibliography{$bibfile}
 \begin{document}
 % \maketitle
+\makeatletter
+\def\hshchr{\expandafter\@gobble\string\#}
+\def\ampchr{\expandafter\@gobble\string\&}
+\def\entity#1{\HCode{\ampchr\hshchr#1;}}
+\makeatother
 
 \section{Citations}
 
diff --git a/utils/bib2html.py b/utils/bib2html.py
index f0cbce3..6f4d70e 100755
--- a/utils/bib2html.py
+++ b/utils/bib2html.py
@@ -33,6 +33,15 @@
 BIBLIOGRAPHY_CHAPTER_NO_KEYWORD = "BIBLIOGRAPHY"
 BIBLIOGRAPHY_CHAPTER = "BIBLIOGRAPHY {keyword}"
 
+def latex_escape_non_ascii( input_str ):
+    output = ""
+    for c in input_str:
+        if ord(c) > 0x7F:
+            output += "\entity{{{}}}".format( ord(c) )
+        else:
+            output += c
+    return output
+
 def check_executables():
     check_executable( "htlatex" )
     check_executable( "tidy" )
@@ -82,6 +91,7 @@ def write_dummy_latex(
         tmp_filename
 ):
     """Prepare a latex file"""
+    tmp_dir = tmp_filename.parent
 
     allcitekeys = ""
 
@@ -112,20 +122,45 @@ def write_dummy_latex(
                 f"""
 \chapter{{{chapter_heading}}}
 \printbibliography[keyword={{{keyword}}}]\n"""
+    
+    bibfile_orig = (tmp_dir / (bibfile.stem + "_orig")) . with_suffix( ".bib" )
+    bibfile_local = tmp_dir / bibfile.name
+    shutil.copyfile(
+            bibfile,
+            bibfile_orig
+    )
+    import fileinput, unicodedata
+    with open( bibfile_local, "w") as out_file:
+        for line in fileinput.input(bibfile_orig):
+            out_file.write(
+                    latex_escape_non_ascii(
+                        line
+                    )
+            )
 
     bibfile_path = \
         bibfile if bibfile.is_absolute() else Path.cwd() / bibfile
     substitions = fill_in_template.substitute(
             language = language,
             # language = translations[language],
-            bibfile = bibfile_path,
+            bibfile = bibfile.name,
+            # bibfile = bibfile_path,
             # bibfile = '../' + bibfile,
             citations = allcitekeys,
             bibliographies = bibliographies
     )
+    # (just for debugging: save with unescaped non-ascii characters)
+    with open(tmp_dir / (tmp_filename.name + ".orig"), "w") as texfile:
+        texfile.write(
+            substitions
+        )
 
     with open(tmp_filename, "w") as texfile:
-        texfile.write(substitions)
+        texfile.write(
+            latex_escape_non_ascii(
+                substitions
+            )
+        )
 
     logging.info(f"Wrote {tmp_filename}")
 # def write_dummy_latex ends here
@@ -137,7 +172,8 @@ def run_htlatex(
 ):
     """Create HTML file from temporary LaTeX file"""
     exec_command(
-        f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
+        f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
+        # f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
         output_to = ToFile( Path(log_dir) / "htlatex1.log" )
     )
     exec_command(
@@ -145,7 +181,8 @@ def run_htlatex(
         output_to = ToFile( Path(log_dir) / "biber.log" )
     )
     exec_command(
-        f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
+        f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
+        # f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
         output_to = ToFile( Path(log_dir) / "htlatex2.log" )
     )
 # def run_htlatex ends here
@@ -230,22 +267,29 @@ def main(
     wd = Path.cwd()
     log_dir = log_dir.resolve()
     os.chdir( temp_dir )
+    logging.info(f"cd {temp_dir}")
     run_htlatex(
             tmp_filename . with_suffix( "" ),
             # tmp_filename,
             log_dir = log_dir
     )
+    logging.info(f"cd {wd}")
     os.chdir( wd )
 
     tmp_path_html = temp_dir / tmp_filename . with_suffix( ".html" )
+    tmp_path_html_utf8 = (temp_dir / (str(tmp_filename) + "-utf8")) . with_suffix( ".html" )
     tmp_path_html_fixed1 = temp_dir / tmp_filename . with_suffix( ".1.html" )
     tmp_path_html_fixed2 = temp_dir / tmp_filename . with_suffix( ".2.html" )
 
+    exec_command(
+        f"iconv -f ISO-8859-1 -t UTF-8 --output={tmp_path_html_utf8} {tmp_path_html}"
+    )
+
     # htlatex seems to produce incorrect xhtml.
     # We have to fix it
     # (this will e.g. replace '&' by '&amp;'):
     exec_command(
-            f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html}",
+            f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html_utf8}",
             exit_code_ok = lambda x: x in (0,1)
     )
     import fileinput, unicodedata

From e57cce7c9a5315c1cd7401a683875f1b5857dbaa Mon Sep 17 00:00:00 2001
From: EsGeh <SamuelGfroerer@googlemail.com>
Date: Thu, 25 Apr 2019 13:31:10 +0200
Subject: [PATCH 7/7] citations in temporary html are represented as html table

---
 utils/bib2html.py | 48 +++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/utils/bib2html.py b/utils/bib2html.py
index 6f4d70e..9964cb8 100755
--- a/utils/bib2html.py
+++ b/utils/bib2html.py
@@ -21,6 +21,7 @@
 from lxml import etree
 from pathlib import Path
 import sys
+import textwrap
 
 BASE_DIR = Path( __file__ ).resolve().parent.parent
 SCRIPT_PATH = Path( __file__ )
@@ -94,14 +95,10 @@ def write_dummy_latex(
     tmp_dir = tmp_filename.parent
 
     allcitekeys = ""
-
+    allcitekeys += "\\begin{tabular}{l l l}\n"
     for key in citekeys:
-        allcitekeys += """
-\subsection*{%s}
-\subsubsection*{authoryear}
-\cite{%s}
-\subsubsection*{year}
-\cite*{%s}\n""" % (key, key, key)
+        allcitekeys += f"\\verb|{key}|  &\\cite{{{key}}}&\\cite*{{{key}}}\\\\\n"
+    allcitekeys += "\\end{tabular}\n"
 
     with open(template_path, "r") as tmp_template:
         template = tmp_template.read()
@@ -113,15 +110,21 @@ def write_dummy_latex(
         if keyword == "":
             chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
             bibliographies += \
-                f"""
-\chapter{{{chapter_heading}}}
-\printbibliography\n"""
+                textwrap.dedent(
+                    f"""
+                    \chapter{{{chapter_heading}}}
+                    \printbibliography
+                    """
+                )
         else:
             chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword=keyword )
             bibliographies += \
-                f"""
-\chapter{{{chapter_heading}}}
-\printbibliography[keyword={{{keyword}}}]\n"""
+                textwrap.dedent(
+                    f"""
+                    \chapter{{{chapter_heading}}}
+                    \printbibliography[keyword={{{keyword}}}]
+                    """
+                )
     
     bibfile_orig = (tmp_dir / (bibfile.stem + "_orig")) . with_suffix( ".bib" )
     bibfile_local = tmp_dir / bibfile.name
@@ -202,11 +205,24 @@ def create_citations(citekeys, xml_tree, style):
     p_element = etree.Element("p")
 
     for citekey in citekeys:
-        logging.debug(f"working on citekey: {citekey}" )
+        logging.debug( f"working on citekey: '{citekey}', style: '{style}'" )
+        citation_el = None
         if style == "authoryear":
-            format_citation = xml_tree.xpath(f"//x:h4[text() = '{citekey}']/following-sibling::x:p[2]/text()", namespaces=NS_MAP)[0].strip()
+            citation_el = xml_tree.xpath(
+                f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[1]/text()",
+                namespaces=NS_MAP
+            )
         else:
-            format_citation = xml_tree.xpath(f"//x:h4[text() = '{citekey}']/following-sibling::x:p[3]/text()", namespaces=NS_MAP)[0].strip()
+            citation_el = xml_tree.xpath(
+                f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[2]/text()",
+                    namespaces=NS_MAP
+            )
+        if( len(citation_el) == 0 ):
+            logging.error( f"error parsing formatted citation: '{citekey}', style: '{style}'" )
+            sys.exit( 1 )
+
+        format_citation = citation_el[0].strip()
+        logging.debug( f"formatted: '{format_citation}'" )
         span_element = etree.fromstring(f"""<span class="citation" data-cites="{citekey}">{format_citation}</span>""")
 
         p_element.append(span_element)