Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
New functions
  • Loading branch information
Klaus Thoden committed May 29, 2018
1 parent 9092acb commit 63c08a2
Showing 1 changed file with 60 additions and 11 deletions.
71 changes: 60 additions & 11 deletions libeoaconvert.py
Expand Up @@ -7,6 +7,7 @@
import shlex import shlex
import logging import logging
import configparser import configparser
from lxml import etree
from lxml.html import soupparser from lxml.html import soupparser


################################## ##################################
Expand Down Expand Up @@ -35,6 +36,9 @@


# Setup of various dictionaries for localization of various elements # Setup of various dictionaries for localization of various elements
dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"} dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"}
dict_and = {"en" : "and", "de" : "und", "fr" : "et", "it" : "e"}
dict_ed = {"en" : "ed.", "de" : "Hrsg."}
dict_eds = {"en" : "eds.", "de" : "Hrsg."}


# the new-style footnotes that use LaTeX bigfoot show up in the following order: # the new-style footnotes that use LaTeX bigfoot show up in the following order:
footnote_groups = ["decimal", "lower-latin"] footnote_groups = ["decimal", "lower-latin"]
Expand Down Expand Up @@ -93,13 +97,24 @@ def sanitizeImage(strImagepath, GM_PATH, TL_PATH):
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
strFileFormat = str(exeShell) strFileFormat = str(exeShell)
strFileFormat = strFileFormat.strip() strFileFormat = strFileFormat.strip()
if strFileFormat == "PNG": if strFileFormat == "JPEG":
strNewImagepath = os.path.splitext(strImagepath)[0] pass
strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg" # print("looking at jpeg file")
listArguments = shlex.split(strCommand) # strNewImagepath = os.path.splitext(strImagepath)[0]
subprocess.call(listArguments) # strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg"
os.remove(strImagepath) # listArguments = shlex.split(strCommand)
strImagepath = strNewImagepath + ".jpg" # subprocess.call(listArguments)
# os.remove(strImagepath)
# strImagepath = strNewImagepath + ".jpg"
elif strFileFormat == "PNG":
pass
# print("looking at png file")
# strNewImagepath = os.path.splitext(strImagepath)[0]
# strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".png"
# listArguments = shlex.split(strCommand)
# subprocess.call(listArguments)
# os.remove(strImagepath)
# strImagepath = strNewImagepath + ".png"
elif strFileFormat == "PDF": elif strFileFormat == "PDF":
strNewImagepath = os.path.splitext(strImagepath)[0] strNewImagepath = os.path.splitext(strImagepath)[0]
clipped_file = strImagepath.replace(".pdf", "-clipped.pdf") clipped_file = strImagepath.replace(".pdf", "-clipped.pdf")
Expand Down Expand Up @@ -165,12 +180,15 @@ def plural(num, noun):
return noun + "s" return noun + "s"
# def plural ends here # def plural ends here


def format_citations(used_citekeys, bibdata, language): def format_citations(used_citekeys, bibdata, language, tmp_filename):
"""Return a formatted xmlstring of the used citations""" """Return a formatted xmlstring of the used citations"""


tmp_path_md = "tmp_files" + os.path.sep + tmp_filename + ".md"
tmp_path_html = "tmp_files" + os.path.sep + tmp_filename + ".html"

md_file_header = "---\nlang: %s\ntitle: Citations\n...\n\n" % two_letter_language(language) md_file_header = "---\nlang: %s\ntitle: Citations\n...\n\n" % two_letter_language(language)


with open("tmp_files/used_citations.md", "w") as citation_formatter: with open(tmp_path_md, "w") as citation_formatter:
citation_formatter.write(md_file_header) citation_formatter.write(md_file_header)
citation_formatter.write("# citeauthoryear\n") citation_formatter.write("# citeauthoryear\n")
for entry in used_citekeys: for entry in used_citekeys:
Expand All @@ -183,14 +201,45 @@ def format_citations(used_citekeys, bibdata, language):
# citation_formatter.write("@%s\n" % entry) # citation_formatter.write("@%s\n" % entry)
citation_formatter.write("\n# References\n") citation_formatter.write("\n# References\n")


command = "pandoc -o tmp_files/formatted_citations.html -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s tmp_files/used_citations.md" % (bibdata, CSL_FILE) command = "pandoc -o %s -t html --filter=pandoc-citeproc --bibliography=%s --csl=%s %s" % (tmp_path_html, bibdata, CSL_FILE, tmp_path_md)
arguments = shlex.split(command) arguments = shlex.split(command)
logging.info("Using external command pandoc with command %s" % command) logging.info("Using external command pandoc with command %s" % command)
subprocess.call(arguments) subprocess.call(arguments)


with open("tmp_files/formatted_citations.html", "r") as ding: with open(tmp_path_html, "r") as ding:
dd = soupparser.fromstring(ding, features="html.parser") dd = soupparser.fromstring(ding, features="html.parser")


references = dd.xpath("//div[@class='references']") references = dd.xpath("//div[@class='references']")
return references return references
# def format_citations ends here # def format_citations ends here

def fix_bib_entries(div_snippet):
"""Modify the html code returned by pandoc-citeproc"""

entries = div_snippet.findall(".//div")

for entry in entries:
entry_id = entry.get("id")
entry.set("class", "bibliography")
etree.strip_tags(entry, "p")
entry.tag = "p"
internal_markup = entry.findall(".//em")
for markup in internal_markup:
markup.tag = "i"

return div_snippet
# def fix_bib_entries ends here

def debug_xml_here(xml_tree, xml_filename):
"""Dump current state of an XML tree into a file for inspection"""

xml_path = "%s/debug/debug_%s.xml" % (os.getcwd(), xml_filename)

if isinstance(xml_tree, etree._ElementTree):
pass
else:
xml_tree = etree.ElementTree(xml_tree)

xml_tree.write(xml_path, pretty_print=True, xml_declaration=True,encoding="utf-8")
logging.info("Wrote %s." % xml_path)
# def debug_xml_here ends here

0 comments on commit 63c08a2

Please sign in to comment.