From 63c08a2ba704c4e9085d916e046eb442d57eeee1 Mon Sep 17 00:00:00 2001
From: Klaus Thoden <kthoden@mpiwg-berlin.mpg.de>
Date: Tue, 29 May 2018 17:04:37 +0200
Subject: [PATCH] New functions

---
 libeoaconvert.py | 71 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 11 deletions(-)

diff --git a/libeoaconvert.py b/libeoaconvert.py
index 3829866..ff58d9f 100644
--- a/libeoaconvert.py
+++ b/libeoaconvert.py
@@ -7,6 +7,7 @@
 import shlex
 import logging
 import configparser
+from lxml import etree
 from lxml.html import soupparser
 
 ##################################
@@ -35,6 +36,9 @@
 
 # Setup of various dictionaries for localization of various elements
 dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"}
+dict_and = {"en" : "and", "de" : "und", "fr" : "et", "it" : "e"}
+dict_ed = {"en" : "ed.", "de" : "Hrsg."}
+dict_eds = {"en" : "eds.", "de" : "Hrsg."}
 
 # the new-style footnotes that use LaTeX bigfoot show up in the following order:
 footnote_groups = ["decimal", "lower-latin"]
@@ -93,13 +97,24 @@ def sanitizeImage(strImagepath, GM_PATH, TL_PATH):
     exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
     strFileFormat = str(exeShell)
     strFileFormat = strFileFormat.strip()
-    if strFileFormat == "PNG":
-        strNewImagepath = os.path.splitext(strImagepath)[0]
-        strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg"
-        listArguments = shlex.split(strCommand)
-        subprocess.call(listArguments)
-        os.remove(strImagepath)
-        strImagepath = strNewImagepath + ".jpg"
+    if strFileFormat == "JPEG":
+        pass
+        # print("looking at jpeg file")
+        # strNewImagepath = os.path.splitext(strImagepath)[0]
+        # strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg"
+        # listArguments = shlex.split(strCommand)
+        # subprocess.call(listArguments)
+        # os.remove(strImagepath)
+        # strImagepath = strNewImagepath + ".jpg"
+    elif strFileFormat == "PNG":
+        pass
+        # print("looking at png file")
+        # strNewImagepath = os.path.splitext(strImagepath)[0]
+        # strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".png"
+        # listArguments = shlex.split(strCommand)
+        # subprocess.call(listArguments)
+        # os.remove(strImagepath)
+        # strImagepath = strNewImagepath + ".png"
     elif strFileFormat == "PDF":
         strNewImagepath = os.path.splitext(strImagepath)[0]
         clipped_file = strImagepath.replace(".pdf", "-clipped.pdf")
@@ -165,12 +180,15 @@ def plural(num, noun):
         return noun + "s"
 # def plural ends here
 
-def format_citations(used_citekeys, bibdata, language):
+def format_citations(used_citekeys, bibdata, language, tmp_filename):
     """Return a formatted xmlstring of the used citations"""
 
+    tmp_path_md = "tmp_files" + os.path.sep + tmp_filename + ".md"
+    tmp_path_html = "tmp_files" + os.path.sep + tmp_filename + ".html"
+
     md_file_header = "---\nlang: %s\ntitle: Citations\n...\n\n" % two_letter_language(language)
 
-    with open("tmp_files/used_citations.md", "w") as citation_formatter:
+    with open(tmp_path_md, "w") as citation_formatter:
         citation_formatter.write(md_file_header)
         citation_formatter.write("# citeauthoryear\n")
         for entry in used_citekeys:
@@ -183,14 +201,45 @@ def format_citations(used_citekeys, bibdata, language):
         #     citation_formatter.write("@%s\n" % entry)
         citation_formatter.write("\n# References\n")
 
-    command = "pandoc -o tmp_files/formatted_citations.html -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s tmp_files/used_citations.md" % (bibdata, CSL_FILE)
+    command = "pandoc -o %s -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (tmp_path_html, bibdata, CSL_FILE, tmp_path_md)
     arguments = shlex.split(command)
     logging.info("Using external command pandoc with command %s" % command)
     subprocess.call(arguments)
 
-    with open("tmp_files/formatted_citations.html", "r") as ding:
+    with open(tmp_path_html, "r") as ding:
         dd = soupparser.fromstring(ding, features="html.parser")
 
     references = dd.xpath("//div[@class='references']")
     return references
 # def format_citations ends here
+
+def fix_bib_entries(div_snippet):
+    """Modify the html code returned by pandoc-citeproc"""
+
+    entries = div_snippet.findall(".//div")
+
+    for entry in entries:
+        entry_id = entry.get("id")
+        entry.set("class", "bibliography")
+        etree.strip_tags(entry, "p")
+        entry.tag = "p"
+        internal_markup = entry.findall(".//em")
+        for markup in internal_markup:
+            markup.tag = "i"
+
+    return div_snippet
+# def fix_bib_entries ends here
+
+def debug_xml_here(xml_tree, xml_filename):
+    """Dump current state of an XML tree into a file for inspection"""
+
+    xml_path = "%s/debug/debug_%s.xml" % (os.getcwd(), xml_filename)
+
+    if isinstance(xml_tree, etree._ElementTree):
+        pass
+    else:
+        xml_tree = etree.ElementTree(xml_tree)
+
+    xml_tree.write(xml_path, pretty_print=True, xml_declaration=True,encoding="utf-8")
+    logging.info("Wrote %s." % xml_path)
+# def debug_xml_here ends here