bib2html.py

#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-

"""
Docstring goes here
"""

__version__ = "1.0"
__date__ = "20190313"
__author__ = "kthoden@mpiwg-berlin.mpg.de"

from utils.load_config import exec_command, ToFile, ToLog, check_executable

import argparse
import os
import subprocess
import shlex
import logging
import string
import shutil
from lxml import etree
from pathlib import Path
import sys
import textwrap

BASE_DIR = Path( __file__ ).resolve().parent.parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.name

# logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')

NS_MAP = {"x" : 'http://www.w3.org/1999/xhtml'}

BIBLIOGRAPHY_CHAPTER_NO_KEYWORD = "BIBLIOGRAPHY"
BIBLIOGRAPHY_CHAPTER = "BIBLIOGRAPHY {keyword}"

def latex_escape_non_ascii( input_str ):
    output = ""
    for c in input_str:
        if ord(c) > 0x7F:
            output += "\entity{{{}}}".format( ord(c) )
        else:
            output += c
    return output

def check_executables():
    check_executable( "htlatex" )
    check_executable( "tidy" )
    check_executable( "biber" )

def transform_reference(reference_element, dialect='html'):
    """Formatting transformation for reference element"""

    string_from_xml = etree.tostring(reference_element).decode('utf-8')
    removed_linebreak = string_from_xml.replace("\n", " ")
    removed_namespace = removed_linebreak.replace('<p xmlns="http://www.w3.org/1999/xhtml" class="noindent">', '<p>')
    cleaned_element = etree.fromstring(removed_namespace)

    links = cleaned_element.xpath("a", namespaces=NS_MAP)
    for link in links:
        link.tag = "tagtobestripped"

    ecti_span = cleaned_element.xpath("span[@class='ecti-1095']", namespaces=NS_MAP)
    for ecti in ecti_span:
        if dialect == 'tei':
            ecti.tag = "hi"
            ecti.set("rend", "italic")
        else:
            ecti.tag = "em"
        ecti.attrib.pop('class')

    ectt_span = cleaned_element.xpath("x:span[@class='ectt-1095']", namespaces=NS_MAP)
    for ectt in ectt_span:
        if dialect == 'tei':
            ectt.tag = "hi"
            ectt.set("rend", "monospace")
        else:
            ecti.tag = "code"
        ectt.attrib.pop('class')

    etree.strip_tags(cleaned_element, "tagtobestripped")

    return cleaned_element
# def transform_reference ends here

def write_dummy_latex(
        citekeys,
        bibfile,
        language,
        keywords,
        template_path,
        tmp_filename
):
    """Prepare a latex file"""
    tmp_dir = tmp_filename.parent

    allcitekeys = ""
    allcitekeys += "\\begin{tabular}{l l l}\n"
    for key in citekeys:
        allcitekeys += f"\\verb|{key}|  &\\cite{{{key}}}&\\cite*{{{key}}}\\\\\n"
    allcitekeys += "\\end{tabular}\n"

    with open(template_path, "r") as tmp_template:
        template = tmp_template.read()

    fill_in_template = string.Template(template)

    bibliographies = ""
    for keyword in keywords:
        if keyword == "":
            chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
            bibliographies += \
                textwrap.dedent(
                    f"""
                    \chapter{{{chapter_heading}}}
                    \printbibliography
                    """
                )
        else:
            chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword=keyword )
            bibliographies += \
                textwrap.dedent(
                    f"""
                    \chapter{{{chapter_heading}}}
                    \printbibliography[keyword={{{keyword}}}]
                    """
                )

    bibfile_orig = (tmp_dir / (bibfile.stem + "_orig")) . with_suffix( ".bib" )
    bibfile_local = tmp_dir / bibfile.name
    shutil.copyfile(
            bibfile,
            bibfile_orig
    )
    import fileinput, unicodedata
    with open( bibfile_local, "w") as out_file:
        for line in fileinput.input(bibfile_orig):
            out_file.write(
                    latex_escape_non_ascii(
                        line
                    )
            )

    bibfile_path = \
        bibfile if bibfile.is_absolute() else Path.cwd() / bibfile
    substitions = fill_in_template.substitute(
            language = language,
            # language = translations[language],
            bibfile = bibfile.name,
            # bibfile = bibfile_path,
            # bibfile = '../' + bibfile,
            citations = allcitekeys,
            bibliographies = bibliographies
    )
    # (just for debugging: save with unescaped non-ascii characters)
    with open(tmp_dir / (tmp_filename.name + ".orig"), "w") as texfile:
        texfile.write(
            substitions
        )

    with open(tmp_filename, "w") as texfile:
        texfile.write(
            latex_escape_non_ascii(
                substitions
            )
        )

    logging.info(f"Wrote {tmp_filename}")
# def write_dummy_latex ends here


def run_htlatex(
        tmp_filename,
        log_dir
):
    """Create HTML file from temporary LaTeX file"""
    exec_command(
        f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
        # f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
        output_to = ToFile( Path(log_dir) / "htlatex1.log" )
    )
    exec_command(
        f"biber {tmp_filename}",
        output_to = ToFile( Path(log_dir) / "biber.log" )
    )
    exec_command(
        f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
        # f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
        output_to = ToFile( Path(log_dir) / "htlatex2.log" )
    )
# def run_htlatex ends here

def create_citations(citekeys, xml_tree, style):
    """Create citations"""

    logging.debug("creating citations")
    if style in ["authoryear", "year"]:
        pass
    else:
        logging.error("Unrecognized citation format, choose 'authoryear' or 'year'. Exiting")
        sys.exit()

    surrounding_div = etree.fromstring(f"<div class='{style}'><h1 id='cite{style}'>cite{style}</h1></div>")

    p_element = etree.Element("p")

    for citekey in citekeys:
        logging.debug( f"working on citekey: '{citekey}', style: '{style}'" )
        citation_el = None
        if style == "authoryear":
            citation_el = xml_tree.xpath(
                f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[1]/text()",
                namespaces=NS_MAP
            )
        else:
            citation_el = xml_tree.xpath(
                f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[2]/text()",
                    namespaces=NS_MAP
            )
        if( len(citation_el) == 0 ):
            logging.error( f"error parsing formatted citation: '{citekey}', style: '{style}'" )
            sys.exit( 1 )

        format_citation = citation_el[0].strip()
        logging.debug( f"formatted: '{format_citation}'" )
        span_element = etree.fromstring(f"""<span class="citation" data-cites="{citekey}">{format_citation}</span>""")

        p_element.append(span_element)

    surrounding_div.insert(1, p_element)

    return(surrounding_div)
# def create_citations ends here

def create_reference_list(reference_list):
    """Create HTML snippet for list of references"""

    entries = reference_list.xpath(f"x:dt", namespaces=NS_MAP)

    reference_div = etree.fromstring("""<div id="refs" class="references"></div>""")

    for entry in entries:
        entry_citekey = entry.get("id").replace("X0-", "")
        reference_string = entry.xpath(f"following-sibling::x:dd[1]/x:p", namespaces=NS_MAP)[0]
        formatted_reference = transform_reference(reference_string)
        wrapper_div = etree.fromstring(f"""<div id="ref-{entry_citekey}"></div>""")
        wrapper_div.append(formatted_reference)
        reference_div.append(wrapper_div)

    return reference_div
# def create_reference_list ends here


def main(
        bib_file,
        citekeys,
        tex_template,
        language,
        temp_dir,
        output_file,
        # tmp_filename = "temp",
        keywords = [""],
        log_dir = "logs"
):
    temp_dir = Path( temp_dir )
    output_file = Path( output_file )
    # tmp_filename = Path( tmp_filename )

    if not temp_dir.exists():
        os.makedirs( temp_dir )

    tmp_filename = Path(output_file.name) . with_suffix( "" )

    write_dummy_latex(
            citekeys,
            bib_file,
            language,
            keywords,
            template_path = tex_template,
            tmp_filename = temp_dir / tmp_filename . with_suffix( ".tex" )
    )

    wd = Path.cwd()
    log_dir = log_dir.resolve()
    os.chdir( temp_dir )
    logging.info(f"cd {temp_dir}")
    run_htlatex(
            tmp_filename . with_suffix( "" ),
            # tmp_filename,
            log_dir = log_dir
    )
    logging.info(f"cd {wd}")
    os.chdir( wd )

    tmp_path_html = temp_dir / tmp_filename . with_suffix( ".html" )
    tmp_path_html_utf8 = (temp_dir / (str(tmp_filename) + "-utf8")) . with_suffix( ".html" )
    tmp_path_html_fixed1 = temp_dir / tmp_filename . with_suffix( ".1.html" )
    tmp_path_html_fixed2 = temp_dir / tmp_filename . with_suffix( ".2.html" )

    exec_command(
        f"iconv -f ISO-8859-1 -t UTF-8 --output={tmp_path_html_utf8} {tmp_path_html}"
    )

    # htlatex seems to produce incorrect xhtml.
    # We have to fix it
    # (this will e.g. replace '&' by '&amp;'):
    exec_command(
            f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html_utf8}",
            exit_code_ok = lambda x: x in (0,1)
    )
    import fileinput, unicodedata

    # normalize unicode, e.g. replace ligatures (like " "ﬀ" -> "ff"):
    with open( tmp_path_html_fixed2, "w") as out_file:
        for line in fileinput.input(tmp_path_html_fixed1):
            out_file.write(
                unicodedata.normalize("NFKD", line)
            )

    xml_tree = etree.parse(str(tmp_path_html_fixed2))

    citation_authoryear = create_citations(citekeys, xml_tree, "authoryear")
    citation_year = create_citations(citekeys, xml_tree, "year")

    bibliographies_dict = {}

    for keyword in keywords:
        if keyword == "":
            chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
        else:
            chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword = keyword )
        # '<dl class="thebibliography"> ... </dl>
        bibliography_el = xml_tree.xpath(
                f"//x:body/x:p[text() = '{chapter_heading}']/following-sibling::x:dl[1]",
                namespaces = NS_MAP
        )
        if( len(bibliography_el) != 1 ):
            logging.error( f"error parsing bibliography with keyword '{keyword}'" )
            sys.exit( 1 )
        bibliography_el = bibliography_el[0]

        reference_div = create_reference_list(bibliography_el)
        bibliographies_dict[keyword] = reference_div

    html_element = etree.Element("html")
    html_element.insert(0, citation_authoryear)
    html_element.insert(1, citation_year)
    for keyword in keywords:
        bibl_el = etree.SubElement(
                html_element,
                "div",
                **({} if keyword == "" else { 'keyword': keyword } )
        )
        bibl_el.append( bibliographies_dict[keyword] )

    tree = etree.ElementTree(html_element)
    logging.info("writing '%s'" % output_file)
    tree.write(str(output_file), pretty_print=True, xml_declaration=True, encoding="utf-8")

    return {
            "references": bibliographies_dict,
            "citation_authoryear": citation_authoryear,
            "citation_year": citation_year,
    }

    """
    <h1 id="references" class="unnumbered">References</h1>
    <div id="refs" class="references">
    <div id="ref-Appadurai_1986">
    <p>Appadurai, Arjun, ed. (1986). <em>The Social Life of Things: Commodities in Cultural Perspective</em>. Cambridge, UK: Cambridge University Press.</p>
    </div>
    """

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
            "bibfile",
            help="File that contains the bibliography")
    parser.add_argument(
            "--tex-template",
            default = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
            help="the latex template to use for the bibliography"
    )
    parser.add_argument(
            "--temp-dir",
            default = "tmp_files",
            help="where to store temporary files"
    )
    args = parser.parse_args()

    check_executables()

    citekeys = ["Edwards_2017", "Riggs:2016aa", "Bruckler:2001aa", "Zdenek:1939aa", "Caraffa_2011", "Uhlikova:2010aa", "Noll:1992aa", "Schwarz:1931aa", "Schwartz_1995", "Faber:2015ab", "Rohacek:2010aa", "Lachnit:2005aa", "Groll:1865aa", "Schlosser:1934aa", "Eitelberger:1863ab", "Wirth:1939aa", "Faber:2015aa", "Trnkova:2015aa", "Trnkova:2010aa", "Frodl:1988aa"]
    language = "de"

    translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}

    temp_dir = Path( args.temp_dir )
    references_in_html = main(
            bib_file = args.bibfile,
            citekeys = citekeys,
            tex_template = args.tex_template,
            language = translations[language],
            temp_dir = args.temp_dir
    )
    print( etree.tostring( references_in_html ) )
# finis
	#!/usr/bin/env python3
	# -- coding: utf-8; mode: python --

	"""
	Docstring goes here
	"""

	__version__ = "1.0"
	__date__ = "20190313"
	__author__ = "kthoden@mpiwg-berlin.mpg.de"

	from utils.load_config import exec_command, ToFile, ToLog, check_executable

	import argparse
	import os
	import subprocess
	import shlex
	import logging
	import string
	import shutil
	from lxml import etree
	from pathlib import Path
	import sys
	import textwrap

	BASE_DIR = Path( __file__ ).resolve().parent.parent
	SCRIPT_PATH = Path( __file__ )
	SCRIPT_NAME = SCRIPT_PATH.name

	# logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')

	NS_MAP = {"x" : 'http://www.w3.org/1999/xhtml'}

	BIBLIOGRAPHY_CHAPTER_NO_KEYWORD = "BIBLIOGRAPHY"
	BIBLIOGRAPHY_CHAPTER = "BIBLIOGRAPHY {keyword}"

	def latex_escape_non_ascii( input_str ):
	output = ""
	for c in input_str:
	if ord(c) > 0x7F:
	output += "\entity{{{}}}".format( ord(c) )
	else:
	output += c
	return output

	def check_executables():
	check_executable( "htlatex" )
	check_executable( "tidy" )
	check_executable( "biber" )

	def transform_reference(reference_element, dialect='html'):
	"""Formatting transformation for reference element"""

	string_from_xml = etree.tostring(reference_element).decode('utf-8')
	removed_linebreak = string_from_xml.replace("\n", " ")
	removed_namespace = removed_linebreak.replace('<p xmlns="http://www.w3.org/1999/xhtml" class="noindent">', '<p>')
	cleaned_element = etree.fromstring(removed_namespace)

	links = cleaned_element.xpath("a", namespaces=NS_MAP)
	for link in links:
	link.tag = "tagtobestripped"

	ecti_span = cleaned_element.xpath("span[@class='ecti-1095']", namespaces=NS_MAP)
	for ecti in ecti_span:
	if dialect == 'tei':
	ecti.tag = "hi"
	ecti.set("rend", "italic")
	else:
	ecti.tag = "em"
	ecti.attrib.pop('class')

	ectt_span = cleaned_element.xpath("x:span[@class='ectt-1095']", namespaces=NS_MAP)
	for ectt in ectt_span:
	if dialect == 'tei':
	ectt.tag = "hi"
	ectt.set("rend", "monospace")
	else:
	ecti.tag = "code"
	ectt.attrib.pop('class')

	etree.strip_tags(cleaned_element, "tagtobestripped")

	return cleaned_element
	# def transform_reference ends here

	def write_dummy_latex(
	citekeys,
	bibfile,
	language,
	keywords,
	template_path,
	tmp_filename
	):
	"""Prepare a latex file"""
	tmp_dir = tmp_filename.parent

	allcitekeys = ""
	allcitekeys += "\\begin{tabular}{l l l}\n"
	for key in citekeys:
	allcitekeys += f"\\verb\|{key}\| &\\cite{{{key}}}&\\cite*{{{key}}}\\\\\n"
	allcitekeys += "\\end{tabular}\n"

	with open(template_path, "r") as tmp_template:
	template = tmp_template.read()

	fill_in_template = string.Template(template)

	bibliographies = ""
	for keyword in keywords:
	if keyword == "":
	chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
	bibliographies += \
	textwrap.dedent(
	f"""
	\chapter{{{chapter_heading}}}
	\printbibliography
	"""
	)
	else:
	chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword=keyword )
	bibliographies += \
	textwrap.dedent(
	f"""
	\chapter{{{chapter_heading}}}
	\printbibliography[keyword={{{keyword}}}]
	"""
	)

	bibfile_orig = (tmp_dir / (bibfile.stem + "_orig")) . with_suffix( ".bib" )
	bibfile_local = tmp_dir / bibfile.name
	shutil.copyfile(
	bibfile,
	bibfile_orig
	)
	import fileinput, unicodedata
	with open( bibfile_local, "w") as out_file:
	for line in fileinput.input(bibfile_orig):
	out_file.write(
	latex_escape_non_ascii(
	line
	)
	)

	bibfile_path = \
	bibfile if bibfile.is_absolute() else Path.cwd() / bibfile
	substitions = fill_in_template.substitute(
	language = language,
	# language = translations[language],
	bibfile = bibfile.name,
	# bibfile = bibfile_path,
	# bibfile = '../' + bibfile,
	citations = allcitekeys,
	bibliographies = bibliographies
	)
	# (just for debugging: save with unescaped non-ascii characters)
	with open(tmp_dir / (tmp_filename.name + ".orig"), "w") as texfile:
	texfile.write(
	substitions
	)

	with open(tmp_filename, "w") as texfile:
	texfile.write(
	latex_escape_non_ascii(
	substitions
	)
	)

	logging.info(f"Wrote {tmp_filename}")
	# def write_dummy_latex ends here


	def run_htlatex(
	tmp_filename,
	log_dir
	):
	"""Create HTML file from temporary LaTeX file"""
	exec_command(
	f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
	# f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
	output_to = ToFile( Path(log_dir) / "htlatex1.log" )
	)
	exec_command(
	f"biber {tmp_filename}",
	output_to = ToFile( Path(log_dir) / "biber.log" )
	)
	exec_command(
	f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
	# f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
	output_to = ToFile( Path(log_dir) / "htlatex2.log" )
	)
	# def run_htlatex ends here

	def create_citations(citekeys, xml_tree, style):
	"""Create citations"""

	logging.debug("creating citations")
	if style in ["authoryear", "year"]:
	pass
	else:
	logging.error("Unrecognized citation format, choose 'authoryear' or 'year'. Exiting")
	sys.exit()

	surrounding_div = etree.fromstring(f"<div class='{style}'><h1 id='cite{style}'>cite{style}</h1></div>")

	p_element = etree.Element("p")

	for citekey in citekeys:
	logging.debug( f"working on citekey: '{citekey}', style: '{style}'" )
	citation_el = None
	if style == "authoryear":
	citation_el = xml_tree.xpath(
	f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[1]/text()",
	namespaces=NS_MAP
	)
	else:
	citation_el = xml_tree.xpath(
	f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[2]/text()",
	namespaces=NS_MAP
	)
	if( len(citation_el) == 0 ):
	logging.error( f"error parsing formatted citation: '{citekey}', style: '{style}'" )
	sys.exit( 1 )

	format_citation = citation_el[0].strip()
	logging.debug( f"formatted: '{format_citation}'" )
	span_element = etree.fromstring(f"""<span class="citation" data-cites="{citekey}">{format_citation}</span>""")

	p_element.append(span_element)

	surrounding_div.insert(1, p_element)

	return(surrounding_div)
	# def create_citations ends here

	def create_reference_list(reference_list):
	"""Create HTML snippet for list of references"""

	entries = reference_list.xpath(f"x:dt", namespaces=NS_MAP)

	reference_div = etree.fromstring("""<div id="refs" class="references"></div>""")

	for entry in entries:
	entry_citekey = entry.get("id").replace("X0-", "")
	reference_string = entry.xpath(f"following-sibling::x:dd[1]/x:p", namespaces=NS_MAP)[0]
	formatted_reference = transform_reference(reference_string)
	wrapper_div = etree.fromstring(f"""<div id="ref-{entry_citekey}"></div>""")
	wrapper_div.append(formatted_reference)
	reference_div.append(wrapper_div)

	return reference_div
	# def create_reference_list ends here


	def main(
	bib_file,
	citekeys,
	tex_template,
	language,
	temp_dir,
	output_file,
	# tmp_filename = "temp",
	keywords = [""],
	log_dir = "logs"
	):
	temp_dir = Path( temp_dir )
	output_file = Path( output_file )
	# tmp_filename = Path( tmp_filename )

	if not temp_dir.exists():
	os.makedirs( temp_dir )

	tmp_filename = Path(output_file.name) . with_suffix( "" )

	write_dummy_latex(
	citekeys,
	bib_file,
	language,
	keywords,
	template_path = tex_template,
	tmp_filename = temp_dir / tmp_filename . with_suffix( ".tex" )
	)

	wd = Path.cwd()
	log_dir = log_dir.resolve()
	os.chdir( temp_dir )
	logging.info(f"cd {temp_dir}")
	run_htlatex(
	tmp_filename . with_suffix( "" ),
	# tmp_filename,
	log_dir = log_dir
	)
	logging.info(f"cd {wd}")
	os.chdir( wd )

	tmp_path_html = temp_dir / tmp_filename . with_suffix( ".html" )
	tmp_path_html_utf8 = (temp_dir / (str(tmp_filename) + "-utf8")) . with_suffix( ".html" )
	tmp_path_html_fixed1 = temp_dir / tmp_filename . with_suffix( ".1.html" )
	tmp_path_html_fixed2 = temp_dir / tmp_filename . with_suffix( ".2.html" )

	exec_command(
	f"iconv -f ISO-8859-1 -t UTF-8 --output={tmp_path_html_utf8} {tmp_path_html}"
	)

	# htlatex seems to produce incorrect xhtml.
	# We have to fix it
	# (this will e.g. replace '&' by '&'):
	exec_command(
	f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html_utf8}",
	exit_code_ok = lambda x: x in (0,1)
	)
	import fileinput, unicodedata

	# normalize unicode, e.g. replace ligatures (like " "ﬀ" -> "ff"):
	with open( tmp_path_html_fixed2, "w") as out_file:
	for line in fileinput.input(tmp_path_html_fixed1):
	out_file.write(
	unicodedata.normalize("NFKD", line)
	)

	xml_tree = etree.parse(str(tmp_path_html_fixed2))

	citation_authoryear = create_citations(citekeys, xml_tree, "authoryear")
	citation_year = create_citations(citekeys, xml_tree, "year")

	bibliographies_dict = {}

	for keyword in keywords:
	if keyword == "":
	chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
	else:
	chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword = keyword )
	# '<dl class="thebibliography"> ... </dl>
	bibliography_el = xml_tree.xpath(
	f"//x:body/x:p[text() = '{chapter_heading}']/following-sibling::x:dl[1]",
	namespaces = NS_MAP
	)
	if( len(bibliography_el) != 1 ):
	logging.error( f"error parsing bibliography with keyword '{keyword}'" )
	sys.exit( 1 )
	bibliography_el = bibliography_el[0]

	reference_div = create_reference_list(bibliography_el)
	bibliographies_dict[keyword] = reference_div

	html_element = etree.Element("html")
	html_element.insert(0, citation_authoryear)
	html_element.insert(1, citation_year)
	for keyword in keywords:
	bibl_el = etree.SubElement(
	html_element,
	"div",
	**({} if keyword == "" else { 'keyword': keyword } )
	)
	bibl_el.append( bibliographies_dict[keyword] )

	tree = etree.ElementTree(html_element)
	logging.info("writing '%s'" % output_file)
	tree.write(str(output_file), pretty_print=True, xml_declaration=True, encoding="utf-8")

	return {
	"references": bibliographies_dict,
	"citation_authoryear": citation_authoryear,
	"citation_year": citation_year,
	}

	"""
	<h1 id="references" class="unnumbered">References</h1>
	<div id="refs" class="references">
	<div id="ref-Appadurai_1986">
	<p>Appadurai, Arjun, ed. (1986). <em>The Social Life of Things: Commodities in Cultural Perspective</em>. Cambridge, UK: Cambridge University Press.</p>
	</div>
	"""

	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"bibfile",
	help="File that contains the bibliography")
	parser.add_argument(
	"--tex-template",
	default = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
	help="the latex template to use for the bibliography"
	)
	parser.add_argument(
	"--temp-dir",
	default = "tmp_files",
	help="where to store temporary files"
	)
	args = parser.parse_args()

	check_executables()

	citekeys = ["Edwards_2017", "Riggs:2016aa", "Bruckler:2001aa", "Zdenek:1939aa", "Caraffa_2011", "Uhlikova:2010aa", "Noll:1992aa", "Schwarz:1931aa", "Schwartz_1995", "Faber:2015ab", "Rohacek:2010aa", "Lachnit:2005aa", "Groll:1865aa", "Schlosser:1934aa", "Eitelberger:1863ab", "Wirth:1939aa", "Faber:2015aa", "Trnkova:2015aa", "Trnkova:2010aa", "Frodl:1988aa"]
	language = "de"

	translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}

	temp_dir = Path( args.temp_dir )
	references_in_html = main(
	bib_file = args.bibfile,
	citekeys = citekeys,
	tex_template = args.tex_template,
	language = translations[language],
	temp_dir = args.temp_dir
	)
	print( etree.tostring( references_in_html ) )
	# finis