diff --git a/eoatex2imxml.py b/eoatex2imxml.py index b5007a9..ce56645 100755 --- a/eoatex2imxml.py +++ b/eoatex2imxml.py @@ -22,6 +22,7 @@ from utils.libeoabibitem import Bibitem import utils.libeoaconvert as libeoaconvert from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite +import utils.bib2html as bib2html # imports import argparse @@ -1237,6 +1238,16 @@ def add_bibliography_to_xml( logging.info( "citekeys: ") logging.info( len( citekeys ) ) csl_file = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE'] + + formatted_references = bib2html.main( + bib_file = Path(bib_database).with_suffix( ".bib" ), + citekeys = citekeys, + tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex", + language = strLanguage, + temp_dir = tmp_citation_filename + ) + + ''' formatted_references = libeoaconvert.format_citations( citations_to_format, bib_database + ".bib", @@ -1244,6 +1255,7 @@ def add_bibliography_to_xml( tmp_citation_filename, csl_file )[0] + ''' fixed_entries = libeoaconvert.fix_bib_entries(formatted_references) for entry in fixed_entries: @@ -1255,16 +1267,17 @@ def add_bibliography_to_xml( if bibl_info is None: logging.warning("No bibliography database found.") +else: + + (bib_type, bib_database) = bibl_info + logging.debug(f"bib type is {bib_type}") -if bibl_info is not None: logging.info( ".bib -> .json") citations_json = write_json_bibl( bibl_info, output_file = TEMP_DIR / (INPUT_PATH_NO_EXT + "-bib.json") ) - (bib_type, bib_database) = bibl_info - logging.debug(f"bib type is {bib_type}") ## only for debugging (?) make_latex_bibl_file( bib_database = bib_database, @@ -1277,7 +1290,7 @@ def add_bibliography_to_xml( # If Bibliography-Type is monograph search for EOAbibliography and make it all if bib_type == "monograph": - tmp_citation_filename = TEMP_DIR / "used_citations-monograph" + tmp_citation_filename = TEMP_DIR / "bib2html" / "used_citations-monograph" if xmlTree.find(".//EOAprintbibliography") is not None: # to insert here: with keywords we can have multiple bibliographies xmlBibliography = xmlTree.find(".//EOAprintbibliography") @@ -1294,7 +1307,7 @@ def add_bibliography_to_xml( elif bib_type == "anthology": for intChapterNumber, xmlChapter in enumerate(xmlChapters, start = 1): logging.debug(f"Looking at chapter {intChapterNumber}.") - tmp_citation_filename = TEMP_DIR / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber)) + tmp_citation_filename = TEMP_DIR / "bib2html" / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber)) if xmlChapter.find(".//EOAprintbibliography") is not None: xmlBibliography = xmlChapter.find(".//EOAprintbibliography") diff --git a/utils/bib2html.py b/utils/bib2html.py new file mode 100755 index 0000000..7fadf0b --- /dev/null +++ b/utils/bib2html.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8; mode: python -*- + +""" +Docstring goes here +""" + +__version__ = "1.0" +__date__ = "20190313" +__author__ = "kthoden@mpiwg-berlin.mpg.de" + +import argparse +import os +import subprocess +import shlex +import logging +import string +from lxml import etree +from pathlib import Path + +BASE_DIR = Path( __file__ ).resolve().parent.parent +SCRIPT_PATH = Path( __file__ ) +SCRIPT_NAME = SCRIPT_PATH.name + +# logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s') + +NS_MAP = {"x" : 'http://www.w3.org/1999/xhtml'} + +def transform_reference(reference_element, dialect='html'): + """Formatting transformation for reference element""" + + string_from_xml = etree.tostring(reference_element).decode('utf-8') + removed_linebreak = string_from_xml.replace("\n", "") + removed_namespace = removed_linebreak.replace('

', '

') + cleaned_element = etree.fromstring(removed_namespace) + + links = cleaned_element.xpath("a", namespaces=NS_MAP) + for link in links: + link.tag = "tagtobestripped" + + ecti_span = cleaned_element.xpath("span[@class='ecti-1095']", namespaces=NS_MAP) + for ecti in ecti_span: + if dialect == 'tei': + ecti.tag = "hi" + ecti.set("rend", "italic") + else: + ecti.tag = "em" + ecti.attrib.pop('class') + + ectt_span = cleaned_element.xpath("x:span[@class='ectt-1095']", namespaces=NS_MAP) + for ectt in ectt_span: + if dialect == 'tei': + ectt.tag = "hi" + ectt.set("rend", "monospace") + else: + ecti.tag = "code" + ectt.attrib.pop('class') + + etree.strip_tags(cleaned_element, "tagtobestripped") + + return cleaned_element +# def transform_reference ends here + +def write_dummy_latex( + citekeys, + bibfile, + language, + template_path, + tmp_filename +): + """Prepare a latex file""" + + allcitekeys = "" + + for key in citekeys: + allcitekeys += """ +\subsection*{%s} +\subsubsection*{authoryear} +\cite{%s} +\subsubsection*{year} +\cite*{%s}\n""" % (key, key, key) + + with open(template_path, "r") as tmp_template: + template = tmp_template.read() + + fill_in_template = string.Template(template) + + bibfile_path = \ + bibfile if bibfile.is_absolute() else Path.cwd() / bibfile + substitions = fill_in_template.substitute( + language = language, + # language = translations[language], + bibfile = bibfile_path, + # bibfile = '../' + bibfile, + citations = allcitekeys + ) + + with open(tmp_filename, "w") as texfile: + texfile.write(substitions) + + logging.info(f"Wrote {tmp_filename}") +# def write_dummy_latex ends here + + +def run_htlatex(tmp_filename): + """Create HTML file from temporary LaTeX file""" + + command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'" + arguments = shlex.split(command) + logging.info("Using external command htlatex with command %s" % command) + subprocess.call(arguments) + + command = f"biber {tmp_filename}" + arguments = shlex.split(command) + logging.info("Using external command biber with command %s" % command) + subprocess.call(arguments) + + command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'" + arguments = shlex.split(command) + logging.info("Using external command htlatex with command %s" % command) + subprocess.call(arguments) +# def run_htlatex ends here + + +def main( + bib_file, + citekeys, + tex_template, + language, + temp_dir, + tmp_filename = "temp" +): + temp_dir = Path( temp_dir ) + tmp_filename = Path( tmp_filename ) + + if not temp_dir.exists(): + os.makedirs( temp_dir ) + + write_dummy_latex( + citekeys, + bib_file, + language, + template_path = tex_template, + tmp_filename = temp_dir / tmp_filename . with_suffix( ".tex" ) + ) + + wd = Path.cwd() + os.chdir( temp_dir ) + run_htlatex(tmp_filename) + os.chdir( wd ) + + tmp_path_html = (temp_dir / tmp_filename).with_suffix( ".html" ) + + xml_tree = etree.parse( str(tmp_path_html) ) + + reference_list = xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0] + entries = reference_list.xpath(f"x:dt", namespaces=NS_MAP) + + reference_div = etree.fromstring("""

""") + + for entry in entries: + entry_citekey = entry.get("id").replace("X0-", "") + reference_string = entry.xpath(f"following-sibling::x:dd[1]/x:p", namespaces=NS_MAP)[0] + formatted_reference = transform_reference(reference_string) + wrapper_div = etree.fromstring(f"""
""") + wrapper_div.append(formatted_reference) + reference_div.append(wrapper_div) + return reference_div + + """ +

References

+
+
+

Appadurai, Arjun, ed. (1986). The Social Life of Things: Commodities in Cultural Perspective. Cambridge, UK: Cambridge University Press.

+
+ """ + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + "bibfile", + help="File that contains the bibliography") + parser.add_argument( + "--tex-template", + default = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex", + help="the latex template to use for the bibliography" + ) + parser.add_argument( + "--temp-dir", + default = "tmp_files", + help="where to store temporary files" + ) + args = parser.parse_args() + citekeys = ["Edwards_2017", "Riggs:2016aa", "Bruckler:2001aa", "Zdenek:1939aa", "Caraffa_2011", "Uhlikova:2010aa", "Noll:1992aa", "Schwarz:1931aa", "Schwartz_1995", "Faber:2015ab", "Rohacek:2010aa", "Lachnit:2005aa", "Groll:1865aa", "Schlosser:1934aa", "Eitelberger:1863ab", "Wirth:1939aa", "Faber:2015aa", "Trnkova:2015aa", "Trnkova:2010aa", "Frodl:1988aa"] + language = "de" + + translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"} + + temp_dir = Path( args.temp_dir ) + references_in_html = main( + bib_file = args.bibfile, + citekeys = citekeys, + tex_template = args.tex_template, + language = translations[language], + temp_dir = args.temp_dir + ) + print( etree.tostring( references_in_html ) ) +# finis