Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
EOASkripts/utils/bib2html.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
208 lines (168 sloc)
6.46 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8; mode: python -*- | |
""" | |
Docstring goes here | |
""" | |
__version__ = "1.0" | |
__date__ = "20190313" | |
__author__ = "kthoden@mpiwg-berlin.mpg.de" | |
import argparse | |
import os | |
import subprocess | |
import shlex | |
import logging | |
import string | |
from lxml import etree | |
from pathlib import Path | |
BASE_DIR = Path( __file__ ).resolve().parent.parent | |
SCRIPT_PATH = Path( __file__ ) | |
SCRIPT_NAME = SCRIPT_PATH.name | |
# logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s') | |
NS_MAP = {"x" : 'http://www.w3.org/1999/xhtml'} | |
def transform_reference(reference_element, dialect='html'): | |
"""Formatting transformation for reference element""" | |
string_from_xml = etree.tostring(reference_element).decode('utf-8') | |
removed_linebreak = string_from_xml.replace("\n", "") | |
removed_namespace = removed_linebreak.replace('<p xmlns="http://www.w3.org/1999/xhtml" class="noindent">', '<p>') | |
cleaned_element = etree.fromstring(removed_namespace) | |
links = cleaned_element.xpath("a", namespaces=NS_MAP) | |
for link in links: | |
link.tag = "tagtobestripped" | |
ecti_span = cleaned_element.xpath("span[@class='ecti-1095']", namespaces=NS_MAP) | |
for ecti in ecti_span: | |
if dialect == 'tei': | |
ecti.tag = "hi" | |
ecti.set("rend", "italic") | |
else: | |
ecti.tag = "em" | |
ecti.attrib.pop('class') | |
ectt_span = cleaned_element.xpath("x:span[@class='ectt-1095']", namespaces=NS_MAP) | |
for ectt in ectt_span: | |
if dialect == 'tei': | |
ectt.tag = "hi" | |
ectt.set("rend", "monospace") | |
else: | |
ecti.tag = "code" | |
ectt.attrib.pop('class') | |
etree.strip_tags(cleaned_element, "tagtobestripped") | |
return cleaned_element | |
# def transform_reference ends here | |
def write_dummy_latex( | |
citekeys, | |
bibfile, | |
language, | |
template_path, | |
tmp_filename | |
): | |
"""Prepare a latex file""" | |
allcitekeys = "" | |
for key in citekeys: | |
allcitekeys += """ | |
\subsection*{%s} | |
\subsubsection*{authoryear} | |
\cite{%s} | |
\subsubsection*{year} | |
\cite*{%s}\n""" % (key, key, key) | |
with open(template_path, "r") as tmp_template: | |
template = tmp_template.read() | |
fill_in_template = string.Template(template) | |
bibfile_path = \ | |
bibfile if bibfile.is_absolute() else Path.cwd() / bibfile | |
substitions = fill_in_template.substitute( | |
language = language, | |
# language = translations[language], | |
bibfile = bibfile_path, | |
# bibfile = '../' + bibfile, | |
citations = allcitekeys | |
) | |
with open(tmp_filename, "w") as texfile: | |
texfile.write(substitions) | |
logging.info(f"Wrote {tmp_filename}") | |
# def write_dummy_latex ends here | |
def run_htlatex(tmp_filename): | |
"""Create HTML file from temporary LaTeX file""" | |
command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'" | |
arguments = shlex.split(command) | |
logging.info("Using external command htlatex with command %s" % command) | |
subprocess.call(arguments) | |
command = f"biber {tmp_filename}" | |
arguments = shlex.split(command) | |
logging.info("Using external command biber with command %s" % command) | |
subprocess.call(arguments) | |
command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'" | |
arguments = shlex.split(command) | |
logging.info("Using external command htlatex with command %s" % command) | |
subprocess.call(arguments) | |
# def run_htlatex ends here | |
def main( | |
bib_file, | |
citekeys, | |
tex_template, | |
language, | |
temp_dir, | |
tmp_filename = "temp" | |
): | |
temp_dir = Path( temp_dir ) | |
tmp_filename = Path( tmp_filename ) | |
if not temp_dir.exists(): | |
os.makedirs( temp_dir ) | |
write_dummy_latex( | |
citekeys, | |
bib_file, | |
language, | |
template_path = tex_template, | |
tmp_filename = temp_dir / tmp_filename . with_suffix( ".tex" ) | |
) | |
wd = Path.cwd() | |
os.chdir( temp_dir ) | |
run_htlatex(tmp_filename) | |
os.chdir( wd ) | |
tmp_path_html = (temp_dir / tmp_filename).with_suffix( ".html" ) | |
xml_tree = etree.parse( str(tmp_path_html) ) | |
reference_list = xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0] | |
entries = reference_list.xpath(f"x:dt", namespaces=NS_MAP) | |
reference_div = etree.fromstring("""<div id="refs" class="references"></div>""") | |
for entry in entries: | |
entry_citekey = entry.get("id").replace("X0-", "") | |
reference_string = entry.xpath(f"following-sibling::x:dd[1]/x:p", namespaces=NS_MAP)[0] | |
formatted_reference = transform_reference(reference_string) | |
wrapper_div = etree.fromstring(f"""<div id="ref-{entry_citekey}"></div>""") | |
wrapper_div.append(formatted_reference) | |
reference_div.append(wrapper_div) | |
return reference_div | |
""" | |
<h1 id="references" class="unnumbered">References</h1> | |
<div id="refs" class="references"> | |
<div id="ref-Appadurai_1986"> | |
<p>Appadurai, Arjun, ed. (1986). <em>The Social Life of Things: Commodities in Cultural Perspective</em>. Cambridge, UK: Cambridge University Press.</p> | |
</div> | |
""" | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"bibfile", | |
help="File that contains the bibliography") | |
parser.add_argument( | |
"--tex-template", | |
default = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex", | |
help="the latex template to use for the bibliography" | |
) | |
parser.add_argument( | |
"--temp-dir", | |
default = "tmp_files", | |
help="where to store temporary files" | |
) | |
args = parser.parse_args() | |
citekeys = ["Edwards_2017", "Riggs:2016aa", "Bruckler:2001aa", "Zdenek:1939aa", "Caraffa_2011", "Uhlikova:2010aa", "Noll:1992aa", "Schwarz:1931aa", "Schwartz_1995", "Faber:2015ab", "Rohacek:2010aa", "Lachnit:2005aa", "Groll:1865aa", "Schlosser:1934aa", "Eitelberger:1863ab", "Wirth:1939aa", "Faber:2015aa", "Trnkova:2015aa", "Trnkova:2010aa", "Frodl:1988aa"] | |
language = "de" | |
translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"} | |
temp_dir = Path( args.temp_dir ) | |
references_in_html = main( | |
bib_file = args.bibfile, | |
citekeys = citekeys, | |
tex_template = args.tex_template, | |
language = translations[language], | |
temp_dir = args.temp_dir | |
) | |
print( etree.tostring( references_in_html ) ) | |
# finis |