Permalink
Browse files

New functions

  • Loading branch information...
kthoden committed May 29, 2018
1 parent 9092acb commit 63c08a2ba704c4e9085d916e046eb442d57eeee1
Showing with 60 additions and 11 deletions.
  1. +60 −11 libeoaconvert.py
View
@@ -7,6 +7,7 @@
import shlex
import logging
import configparser
+from lxml import etree
from lxml.html import soupparser
##################################
@@ -35,6 +36,9 @@
# Setup of various dictionaries for localization of various elements
dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"}
+dict_and = {"en" : "and", "de" : "und", "fr" : "et", "it" : "e"}
+dict_ed = {"en" : "ed.", "de" : "Hrsg."}
+dict_eds = {"en" : "eds.", "de" : "Hrsg."}
# the new-style footnotes that use LaTeX bigfoot show up in the following order:
footnote_groups = ["decimal", "lower-latin"]
@@ -93,13 +97,24 @@ def sanitizeImage(strImagepath, GM_PATH, TL_PATH):
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
strFileFormat = str(exeShell)
strFileFormat = strFileFormat.strip()
- if strFileFormat == "PNG":
- strNewImagepath = os.path.splitext(strImagepath)[0]
- strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg"
- listArguments = shlex.split(strCommand)
- subprocess.call(listArguments)
- os.remove(strImagepath)
- strImagepath = strNewImagepath + ".jpg"
+ if strFileFormat == "JPEG":
+ pass
+ # print("looking at jpeg file")
+ # strNewImagepath = os.path.splitext(strImagepath)[0]
+ # strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg"
+ # listArguments = shlex.split(strCommand)
+ # subprocess.call(listArguments)
+ # os.remove(strImagepath)
+ # strImagepath = strNewImagepath + ".jpg"
+ elif strFileFormat == "PNG":
+ pass
+ # print("looking at png file")
+ # strNewImagepath = os.path.splitext(strImagepath)[0]
+ # strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".png"
+ # listArguments = shlex.split(strCommand)
+ # subprocess.call(listArguments)
+ # os.remove(strImagepath)
+ # strImagepath = strNewImagepath + ".png"
elif strFileFormat == "PDF":
strNewImagepath = os.path.splitext(strImagepath)[0]
clipped_file = strImagepath.replace(".pdf", "-clipped.pdf")
@@ -165,12 +180,15 @@ def plural(num, noun):
return noun + "s"
# def plural ends here
-def format_citations(used_citekeys, bibdata, language):
+def format_citations(used_citekeys, bibdata, language, tmp_filename):
"""Return a formatted xmlstring of the used citations"""
+ tmp_path_md = "tmp_files" + os.path.sep + tmp_filename + ".md"
+ tmp_path_html = "tmp_files" + os.path.sep + tmp_filename + ".html"
+
md_file_header = "---\nlang: %s\ntitle: Citations\n...\n\n" % two_letter_language(language)
- with open("tmp_files/used_citations.md", "w") as citation_formatter:
+ with open(tmp_path_md, "w") as citation_formatter:
citation_formatter.write(md_file_header)
citation_formatter.write("# citeauthoryear\n")
for entry in used_citekeys:
@@ -183,14 +201,45 @@ def format_citations(used_citekeys, bibdata, language):
# citation_formatter.write("@%s\n" % entry)
citation_formatter.write("\n# References\n")
- command = "pandoc -o tmp_files/formatted_citations.html -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s tmp_files/used_citations.md" % (bibdata, CSL_FILE)
+ command = "pandoc -o %s -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (tmp_path_html, bibdata, CSL_FILE, tmp_path_md)
arguments = shlex.split(command)
logging.info("Using external command pandoc with command %s" % command)
subprocess.call(arguments)
- with open("tmp_files/formatted_citations.html", "r") as ding:
+ with open(tmp_path_html, "r") as ding:
dd = soupparser.fromstring(ding, features="html.parser")
references = dd.xpath("//div[@class='references']")
return references
# def format_citations ends here
+
+def fix_bib_entries(div_snippet):
+ """Modify the html code returned by pandoc-citeproc"""
+
+ entries = div_snippet.findall(".//div")
+
+ for entry in entries:
+ entry_id = entry.get("id")
+ entry.set("class", "bibliography")
+ etree.strip_tags(entry, "p")
+ entry.tag = "p"
+ internal_markup = entry.findall(".//em")
+ for markup in internal_markup:
+ markup.tag = "i"
+
+ return div_snippet
+# def fix_bib_entries ends here
+
+def debug_xml_here(xml_tree, xml_filename):
+ """Dump current state of an XML tree into a file for inspection"""
+
+ xml_path = "%s/debug/debug_%s.xml" % (os.getcwd(), xml_filename)
+
+ if isinstance(xml_tree, etree._ElementTree):
+ pass
+ else:
+ xml_tree = etree.ElementTree(xml_tree)
+
+ xml_tree.write(xml_path, pretty_print=True, xml_declaration=True,encoding="utf-8")
+ logging.info("Wrote %s." % xml_path)
+# def debug_xml_here ends here

0 comments on commit 63c08a2

Please sign in to comment.