Skip to content
Permalink
1793c57b61
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 208 lines (168 sloc) 6.46 KB
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
"""
Docstring goes here
"""
__version__ = "1.0"
__date__ = "20190313"
__author__ = "kthoden@mpiwg-berlin.mpg.de"
import argparse
import os
import subprocess
import shlex
import logging
import string
from lxml import etree
from pathlib import Path
BASE_DIR = Path( __file__ ).resolve().parent.parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.name
# logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
NS_MAP = {"x" : 'http://www.w3.org/1999/xhtml'}
def transform_reference(reference_element, dialect='html'):
"""Formatting transformation for reference element"""
string_from_xml = etree.tostring(reference_element).decode('utf-8')
removed_linebreak = string_from_xml.replace("\n", "")
removed_namespace = removed_linebreak.replace('<p xmlns="http://www.w3.org/1999/xhtml" class="noindent">', '<p>')
cleaned_element = etree.fromstring(removed_namespace)
links = cleaned_element.xpath("a", namespaces=NS_MAP)
for link in links:
link.tag = "tagtobestripped"
ecti_span = cleaned_element.xpath("span[@class='ecti-1095']", namespaces=NS_MAP)
for ecti in ecti_span:
if dialect == 'tei':
ecti.tag = "hi"
ecti.set("rend", "italic")
else:
ecti.tag = "em"
ecti.attrib.pop('class')
ectt_span = cleaned_element.xpath("x:span[@class='ectt-1095']", namespaces=NS_MAP)
for ectt in ectt_span:
if dialect == 'tei':
ectt.tag = "hi"
ectt.set("rend", "monospace")
else:
ecti.tag = "code"
ectt.attrib.pop('class')
etree.strip_tags(cleaned_element, "tagtobestripped")
return cleaned_element
# def transform_reference ends here
def write_dummy_latex(
citekeys,
bibfile,
language,
template_path,
tmp_filename
):
"""Prepare a latex file"""
allcitekeys = ""
for key in citekeys:
allcitekeys += """
\subsection*{%s}
\subsubsection*{authoryear}
\cite{%s}
\subsubsection*{year}
\cite*{%s}\n""" % (key, key, key)
with open(template_path, "r") as tmp_template:
template = tmp_template.read()
fill_in_template = string.Template(template)
bibfile_path = \
bibfile if bibfile.is_absolute() else Path.cwd() / bibfile
substitions = fill_in_template.substitute(
language = language,
# language = translations[language],
bibfile = bibfile_path,
# bibfile = '../' + bibfile,
citations = allcitekeys
)
with open(tmp_filename, "w") as texfile:
texfile.write(substitions)
logging.info(f"Wrote {tmp_filename}")
# def write_dummy_latex ends here
def run_htlatex(tmp_filename):
"""Create HTML file from temporary LaTeX file"""
command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'"
arguments = shlex.split(command)
logging.info("Using external command htlatex with command %s" % command)
subprocess.call(arguments)
command = f"biber {tmp_filename}"
arguments = shlex.split(command)
logging.info("Using external command biber with command %s" % command)
subprocess.call(arguments)
command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'"
arguments = shlex.split(command)
logging.info("Using external command htlatex with command %s" % command)
subprocess.call(arguments)
# def run_htlatex ends here
def main(
bib_file,
citekeys,
tex_template,
language,
temp_dir,
tmp_filename = "temp"
):
temp_dir = Path( temp_dir )
tmp_filename = Path( tmp_filename )
if not temp_dir.exists():
os.makedirs( temp_dir )
write_dummy_latex(
citekeys,
bib_file,
language,
template_path = tex_template,
tmp_filename = temp_dir / tmp_filename . with_suffix( ".tex" )
)
wd = Path.cwd()
os.chdir( temp_dir )
run_htlatex(tmp_filename)
os.chdir( wd )
tmp_path_html = (temp_dir / tmp_filename).with_suffix( ".html" )
xml_tree = etree.parse( str(tmp_path_html) )
reference_list = xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0]
entries = reference_list.xpath(f"x:dt", namespaces=NS_MAP)
reference_div = etree.fromstring("""<div id="refs" class="references"></div>""")
for entry in entries:
entry_citekey = entry.get("id").replace("X0-", "")
reference_string = entry.xpath(f"following-sibling::x:dd[1]/x:p", namespaces=NS_MAP)[0]
formatted_reference = transform_reference(reference_string)
wrapper_div = etree.fromstring(f"""<div id="ref-{entry_citekey}"></div>""")
wrapper_div.append(formatted_reference)
reference_div.append(wrapper_div)
return reference_div
"""
<h1 id="references" class="unnumbered">References</h1>
<div id="refs" class="references">
<div id="ref-Appadurai_1986">
<p>Appadurai, Arjun, ed. (1986). <em>The Social Life of Things: Commodities in Cultural Perspective</em>. Cambridge, UK: Cambridge University Press.</p>
</div>
"""
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
"bibfile",
help="File that contains the bibliography")
parser.add_argument(
"--tex-template",
default = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
help="the latex template to use for the bibliography"
)
parser.add_argument(
"--temp-dir",
default = "tmp_files",
help="where to store temporary files"
)
args = parser.parse_args()
citekeys = ["Edwards_2017", "Riggs:2016aa", "Bruckler:2001aa", "Zdenek:1939aa", "Caraffa_2011", "Uhlikova:2010aa", "Noll:1992aa", "Schwarz:1931aa", "Schwartz_1995", "Faber:2015ab", "Rohacek:2010aa", "Lachnit:2005aa", "Groll:1865aa", "Schlosser:1934aa", "Eitelberger:1863ab", "Wirth:1939aa", "Faber:2015aa", "Trnkova:2015aa", "Trnkova:2010aa", "Frodl:1988aa"]
language = "de"
translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
temp_dir = Path( args.temp_dir )
references_in_html = main(
bib_file = args.bibfile,
citekeys = citekeys,
tex_template = args.tex_template,
language = translations[language],
temp_dir = args.temp_dir
)
print( etree.tostring( references_in_html ) )
# finis