Permalink
Browse files

New functions

  • Loading branch information...
kthoden committed May 29, 2018
1 parent 9092acb commit 63c08a2ba704c4e9085d916e046eb442d57eeee1
Showing with 60 additions and 11 deletions.
  1. +60 −11 libeoaconvert.py
@@ -7,6 +7,7 @@
import shlex
import logging
import configparser
from lxml import etree
from lxml.html import soupparser

##################################
@@ -35,6 +36,9 @@

# Setup of various dictionaries for localization of various elements
dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"}
dict_and = {"en" : "and", "de" : "und", "fr" : "et", "it" : "e"}
dict_ed = {"en" : "ed.", "de" : "Hrsg."}
dict_eds = {"en" : "eds.", "de" : "Hrsg."}

# the new-style footnotes that use LaTeX bigfoot show up in the following order:
footnote_groups = ["decimal", "lower-latin"]
@@ -93,13 +97,24 @@ def sanitizeImage(strImagepath, GM_PATH, TL_PATH):
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
strFileFormat = str(exeShell)
strFileFormat = strFileFormat.strip()
if strFileFormat == "PNG":
strNewImagepath = os.path.splitext(strImagepath)[0]
strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg"
listArguments = shlex.split(strCommand)
subprocess.call(listArguments)
os.remove(strImagepath)
strImagepath = strNewImagepath + ".jpg"
if strFileFormat == "JPEG":
pass
# print("looking at jpeg file")
# strNewImagepath = os.path.splitext(strImagepath)[0]
# strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg"
# listArguments = shlex.split(strCommand)
# subprocess.call(listArguments)
# os.remove(strImagepath)
# strImagepath = strNewImagepath + ".jpg"
elif strFileFormat == "PNG":
pass
# print("looking at png file")
# strNewImagepath = os.path.splitext(strImagepath)[0]
# strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".png"
# listArguments = shlex.split(strCommand)
# subprocess.call(listArguments)
# os.remove(strImagepath)
# strImagepath = strNewImagepath + ".png"
elif strFileFormat == "PDF":
strNewImagepath = os.path.splitext(strImagepath)[0]
clipped_file = strImagepath.replace(".pdf", "-clipped.pdf")
@@ -165,12 +180,15 @@ def plural(num, noun):
return noun + "s"
# def plural ends here

def format_citations(used_citekeys, bibdata, language):
def format_citations(used_citekeys, bibdata, language, tmp_filename):
"""Return a formatted xmlstring of the used citations"""

tmp_path_md = "tmp_files" + os.path.sep + tmp_filename + ".md"
tmp_path_html = "tmp_files" + os.path.sep + tmp_filename + ".html"

md_file_header = "---\nlang: %s\ntitle: Citations\n...\n\n" % two_letter_language(language)

with open("tmp_files/used_citations.md", "w") as citation_formatter:
with open(tmp_path_md, "w") as citation_formatter:
citation_formatter.write(md_file_header)
citation_formatter.write("# citeauthoryear\n")
for entry in used_citekeys:
@@ -183,14 +201,45 @@ def format_citations(used_citekeys, bibdata, language):
# citation_formatter.write("@%s\n" % entry)
citation_formatter.write("\n# References\n")

command = "pandoc -o tmp_files/formatted_citations.html -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s tmp_files/used_citations.md" % (bibdata, CSL_FILE)
command = "pandoc -o %s -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (tmp_path_html, bibdata, CSL_FILE, tmp_path_md)
arguments = shlex.split(command)
logging.info("Using external command pandoc with command %s" % command)
subprocess.call(arguments)

with open("tmp_files/formatted_citations.html", "r") as ding:
with open(tmp_path_html, "r") as ding:
dd = soupparser.fromstring(ding, features="html.parser")

references = dd.xpath("//div[@class='references']")
return references
# def format_citations ends here

def fix_bib_entries(div_snippet):
"""Modify the html code returned by pandoc-citeproc"""

entries = div_snippet.findall(".//div")

for entry in entries:
entry_id = entry.get("id")
entry.set("class", "bibliography")
etree.strip_tags(entry, "p")
entry.tag = "p"
internal_markup = entry.findall(".//em")
for markup in internal_markup:
markup.tag = "i"

return div_snippet
# def fix_bib_entries ends here

def debug_xml_here(xml_tree, xml_filename):
"""Dump current state of an XML tree into a file for inspection"""

xml_path = "%s/debug/debug_%s.xml" % (os.getcwd(), xml_filename)

if isinstance(xml_tree, etree._ElementTree):
pass
else:
xml_tree = etree.ElementTree(xml_tree)

xml_tree.write(xml_path, pretty_print=True, xml_declaration=True,encoding="utf-8")
logging.info("Wrote %s." % xml_path)
# def debug_xml_here ends here

0 comments on commit 63c08a2

Please sign in to comment.