-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
use htlatex (instead of pandoc) to generate the citations
- Loading branch information
EsGeh
authored and
EsGeh
committed
Apr 6, 2019
1 parent
5730c48
commit 1793c57
Showing
2 changed files
with
226 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8; mode: python -*- | ||
|
||
""" | ||
Docstring goes here | ||
""" | ||
|
||
__version__ = "1.0" | ||
__date__ = "20190313" | ||
__author__ = "kthoden@mpiwg-berlin.mpg.de" | ||
|
||
import argparse | ||
import os | ||
import subprocess | ||
import shlex | ||
import logging | ||
import string | ||
from lxml import etree | ||
from pathlib import Path | ||
|
||
BASE_DIR = Path( __file__ ).resolve().parent.parent | ||
SCRIPT_PATH = Path( __file__ ) | ||
SCRIPT_NAME = SCRIPT_PATH.name | ||
|
||
# logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s') | ||
|
||
NS_MAP = {"x" : 'http://www.w3.org/1999/xhtml'} | ||
|
||
def transform_reference(reference_element, dialect='html'): | ||
"""Formatting transformation for reference element""" | ||
|
||
string_from_xml = etree.tostring(reference_element).decode('utf-8') | ||
removed_linebreak = string_from_xml.replace("\n", "") | ||
removed_namespace = removed_linebreak.replace('<p xmlns="http://www.w3.org/1999/xhtml" class="noindent">', '<p>') | ||
cleaned_element = etree.fromstring(removed_namespace) | ||
|
||
links = cleaned_element.xpath("a", namespaces=NS_MAP) | ||
for link in links: | ||
link.tag = "tagtobestripped" | ||
|
||
ecti_span = cleaned_element.xpath("span[@class='ecti-1095']", namespaces=NS_MAP) | ||
for ecti in ecti_span: | ||
if dialect == 'tei': | ||
ecti.tag = "hi" | ||
ecti.set("rend", "italic") | ||
else: | ||
ecti.tag = "em" | ||
ecti.attrib.pop('class') | ||
|
||
ectt_span = cleaned_element.xpath("x:span[@class='ectt-1095']", namespaces=NS_MAP) | ||
for ectt in ectt_span: | ||
if dialect == 'tei': | ||
ectt.tag = "hi" | ||
ectt.set("rend", "monospace") | ||
else: | ||
ecti.tag = "code" | ||
ectt.attrib.pop('class') | ||
|
||
etree.strip_tags(cleaned_element, "tagtobestripped") | ||
|
||
return cleaned_element | ||
# def transform_reference ends here | ||
|
||
def write_dummy_latex( | ||
citekeys, | ||
bibfile, | ||
language, | ||
template_path, | ||
tmp_filename | ||
): | ||
"""Prepare a latex file""" | ||
|
||
allcitekeys = "" | ||
|
||
for key in citekeys: | ||
allcitekeys += """ | ||
\subsection*{%s} | ||
\subsubsection*{authoryear} | ||
\cite{%s} | ||
\subsubsection*{year} | ||
\cite*{%s}\n""" % (key, key, key) | ||
|
||
with open(template_path, "r") as tmp_template: | ||
template = tmp_template.read() | ||
|
||
fill_in_template = string.Template(template) | ||
|
||
bibfile_path = \ | ||
bibfile if bibfile.is_absolute() else Path.cwd() / bibfile | ||
substitions = fill_in_template.substitute( | ||
language = language, | ||
# language = translations[language], | ||
bibfile = bibfile_path, | ||
# bibfile = '../' + bibfile, | ||
citations = allcitekeys | ||
) | ||
|
||
with open(tmp_filename, "w") as texfile: | ||
texfile.write(substitions) | ||
|
||
logging.info(f"Wrote {tmp_filename}") | ||
# def write_dummy_latex ends here | ||
|
||
|
||
def run_htlatex(tmp_filename): | ||
"""Create HTML file from temporary LaTeX file""" | ||
|
||
command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'" | ||
arguments = shlex.split(command) | ||
logging.info("Using external command htlatex with command %s" % command) | ||
subprocess.call(arguments) | ||
|
||
command = f"biber {tmp_filename}" | ||
arguments = shlex.split(command) | ||
logging.info("Using external command biber with command %s" % command) | ||
subprocess.call(arguments) | ||
|
||
command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'" | ||
arguments = shlex.split(command) | ||
logging.info("Using external command htlatex with command %s" % command) | ||
subprocess.call(arguments) | ||
# def run_htlatex ends here | ||
|
||
|
||
def main( | ||
bib_file, | ||
citekeys, | ||
tex_template, | ||
language, | ||
temp_dir, | ||
tmp_filename = "temp" | ||
): | ||
temp_dir = Path( temp_dir ) | ||
tmp_filename = Path( tmp_filename ) | ||
|
||
if not temp_dir.exists(): | ||
os.makedirs( temp_dir ) | ||
|
||
write_dummy_latex( | ||
citekeys, | ||
bib_file, | ||
language, | ||
template_path = tex_template, | ||
tmp_filename = temp_dir / tmp_filename . with_suffix( ".tex" ) | ||
) | ||
|
||
wd = Path.cwd() | ||
os.chdir( temp_dir ) | ||
run_htlatex(tmp_filename) | ||
os.chdir( wd ) | ||
|
||
tmp_path_html = (temp_dir / tmp_filename).with_suffix( ".html" ) | ||
|
||
xml_tree = etree.parse( str(tmp_path_html) ) | ||
|
||
reference_list = xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0] | ||
entries = reference_list.xpath(f"x:dt", namespaces=NS_MAP) | ||
|
||
reference_div = etree.fromstring("""<div id="refs" class="references"></div>""") | ||
|
||
for entry in entries: | ||
entry_citekey = entry.get("id").replace("X0-", "") | ||
reference_string = entry.xpath(f"following-sibling::x:dd[1]/x:p", namespaces=NS_MAP)[0] | ||
formatted_reference = transform_reference(reference_string) | ||
wrapper_div = etree.fromstring(f"""<div id="ref-{entry_citekey}"></div>""") | ||
wrapper_div.append(formatted_reference) | ||
reference_div.append(wrapper_div) | ||
return reference_div | ||
|
||
""" | ||
<h1 id="references" class="unnumbered">References</h1> | ||
<div id="refs" class="references"> | ||
<div id="ref-Appadurai_1986"> | ||
<p>Appadurai, Arjun, ed. (1986). <em>The Social Life of Things: Commodities in Cultural Perspective</em>. Cambridge, UK: Cambridge University Press.</p> | ||
</div> | ||
""" | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"bibfile", | ||
help="File that contains the bibliography") | ||
parser.add_argument( | ||
"--tex-template", | ||
default = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex", | ||
help="the latex template to use for the bibliography" | ||
) | ||
parser.add_argument( | ||
"--temp-dir", | ||
default = "tmp_files", | ||
help="where to store temporary files" | ||
) | ||
args = parser.parse_args() | ||
citekeys = ["Edwards_2017", "Riggs:2016aa", "Bruckler:2001aa", "Zdenek:1939aa", "Caraffa_2011", "Uhlikova:2010aa", "Noll:1992aa", "Schwarz:1931aa", "Schwartz_1995", "Faber:2015ab", "Rohacek:2010aa", "Lachnit:2005aa", "Groll:1865aa", "Schlosser:1934aa", "Eitelberger:1863ab", "Wirth:1939aa", "Faber:2015aa", "Trnkova:2015aa", "Trnkova:2010aa", "Frodl:1988aa"] | ||
language = "de" | ||
|
||
translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"} | ||
|
||
temp_dir = Path( args.temp_dir ) | ||
references_in_html = main( | ||
bib_file = args.bibfile, | ||
citekeys = citekeys, | ||
tex_template = args.tex_template, | ||
language = translations[language], | ||
temp_dir = args.temp_dir | ||
) | ||
print( etree.tostring( references_in_html ) ) | ||
# finis |