From 63c08a2ba704c4e9085d916e046eb442d57eeee1 Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Tue, 29 May 2018 17:04:37 +0200 Subject: [PATCH] New functions --- libeoaconvert.py | 71 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 11 deletions(-) diff --git a/libeoaconvert.py b/libeoaconvert.py index 3829866..ff58d9f 100644 --- a/libeoaconvert.py +++ b/libeoaconvert.py @@ -7,6 +7,7 @@ import shlex import logging import configparser +from lxml import etree from lxml.html import soupparser ################################## @@ -35,6 +36,9 @@ # Setup of various dictionaries for localization of various elements dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"} +dict_and = {"en" : "and", "de" : "und", "fr" : "et", "it" : "e"} +dict_ed = {"en" : "ed.", "de" : "Hrsg."} +dict_eds = {"en" : "eds.", "de" : "Hrsg."} # the new-style footnotes that use LaTeX bigfoot show up in the following order: footnote_groups = ["decimal", "lower-latin"] @@ -93,13 +97,24 @@ def sanitizeImage(strImagepath, GM_PATH, TL_PATH): exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) strFileFormat = str(exeShell) strFileFormat = strFileFormat.strip() - if strFileFormat == "PNG": - strNewImagepath = os.path.splitext(strImagepath)[0] - strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg" - listArguments = shlex.split(strCommand) - subprocess.call(listArguments) - os.remove(strImagepath) - strImagepath = strNewImagepath + ".jpg" + if strFileFormat == "JPEG": + pass + # print("looking at jpeg file") + # strNewImagepath = os.path.splitext(strImagepath)[0] + # strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg" + # listArguments = shlex.split(strCommand) + # subprocess.call(listArguments) + # os.remove(strImagepath) + # strImagepath = strNewImagepath + ".jpg" + elif strFileFormat == "PNG": + pass + # print("looking at png file") + # strNewImagepath = os.path.splitext(strImagepath)[0] + # strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".png" + # listArguments = shlex.split(strCommand) + # subprocess.call(listArguments) + # os.remove(strImagepath) + # strImagepath = strNewImagepath + ".png" elif strFileFormat == "PDF": strNewImagepath = os.path.splitext(strImagepath)[0] clipped_file = strImagepath.replace(".pdf", "-clipped.pdf") @@ -165,12 +180,15 @@ def plural(num, noun): return noun + "s" # def plural ends here -def format_citations(used_citekeys, bibdata, language): +def format_citations(used_citekeys, bibdata, language, tmp_filename): """Return a formatted xmlstring of the used citations""" + tmp_path_md = "tmp_files" + os.path.sep + tmp_filename + ".md" + tmp_path_html = "tmp_files" + os.path.sep + tmp_filename + ".html" + md_file_header = "---\nlang: %s\ntitle: Citations\n...\n\n" % two_letter_language(language) - with open("tmp_files/used_citations.md", "w") as citation_formatter: + with open(tmp_path_md, "w") as citation_formatter: citation_formatter.write(md_file_header) citation_formatter.write("# citeauthoryear\n") for entry in used_citekeys: @@ -183,14 +201,45 @@ def format_citations(used_citekeys, bibdata, language): # citation_formatter.write("@%s\n" % entry) citation_formatter.write("\n# References\n") - command = "pandoc -o tmp_files/formatted_citations.html -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s tmp_files/used_citations.md" % (bibdata, CSL_FILE) + command = "pandoc -o %s -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (tmp_path_html, bibdata, CSL_FILE, tmp_path_md) arguments = shlex.split(command) logging.info("Using external command pandoc with command %s" % command) subprocess.call(arguments) - with open("tmp_files/formatted_citations.html", "r") as ding: + with open(tmp_path_html, "r") as ding: dd = soupparser.fromstring(ding, features="html.parser") references = dd.xpath("//div[@class='references']") return references # def format_citations ends here + +def fix_bib_entries(div_snippet): + """Modify the html code returned by pandoc-citeproc""" + + entries = div_snippet.findall(".//div") + + for entry in entries: + entry_id = entry.get("id") + entry.set("class", "bibliography") + etree.strip_tags(entry, "p") + entry.tag = "p" + internal_markup = entry.findall(".//em") + for markup in internal_markup: + markup.tag = "i" + + return div_snippet +# def fix_bib_entries ends here + +def debug_xml_here(xml_tree, xml_filename): + """Dump current state of an XML tree into a file for inspection""" + + xml_path = "%s/debug/debug_%s.xml" % (os.getcwd(), xml_filename) + + if isinstance(xml_tree, etree._ElementTree): + pass + else: + xml_tree = etree.ElementTree(xml_tree) + + xml_tree.write(xml_path, pretty_print=True, xml_declaration=True,encoding="utf-8") + logging.info("Wrote %s." % xml_path) +# def debug_xml_here ends here