Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
use htlatex (instead of pandoc) to generate the citations
  • Loading branch information
EsGeh authored and EsGeh committed Apr 6, 2019
1 parent 5730c48 commit 1793c57
Show file tree
Hide file tree
Showing 2 changed files with 226 additions and 5 deletions.
23 changes: 18 additions & 5 deletions eoatex2imxml.py
Expand Up @@ -22,6 +22,7 @@
from utils.libeoabibitem import Bibitem
import utils.libeoaconvert as libeoaconvert
from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite
import utils.bib2html as bib2html

# imports
import argparse
Expand Down Expand Up @@ -1237,13 +1238,24 @@ def add_bibliography_to_xml(
logging.info( "citekeys: ")
logging.info( len( citekeys ) )
csl_file = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE']

formatted_references = bib2html.main(
bib_file = Path(bib_database).with_suffix( ".bib" ),
citekeys = citekeys,
tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
language = strLanguage,
temp_dir = tmp_citation_filename
)

'''
formatted_references = libeoaconvert.format_citations(
citations_to_format,
bib_database + ".bib",
strLanguage,
tmp_citation_filename,
csl_file
)[0]
'''

fixed_entries = libeoaconvert.fix_bib_entries(formatted_references)
for entry in fixed_entries:
Expand All @@ -1255,16 +1267,17 @@ def add_bibliography_to_xml(

if bibl_info is None:
logging.warning("No bibliography database found.")
else:

(bib_type, bib_database) = bibl_info
logging.debug(f"bib type is {bib_type}")

if bibl_info is not None:
logging.info( ".bib -> .json")
citations_json = write_json_bibl(
bibl_info,
output_file = TEMP_DIR / (INPUT_PATH_NO_EXT + "-bib.json")
)

(bib_type, bib_database) = bibl_info
logging.debug(f"bib type is {bib_type}")
## only for debugging (?)
make_latex_bibl_file(
bib_database = bib_database,
Expand All @@ -1277,7 +1290,7 @@ def add_bibliography_to_xml(

# If Bibliography-Type is monograph search for EOAbibliography and make it all
if bib_type == "monograph":
tmp_citation_filename = TEMP_DIR / "used_citations-monograph"
tmp_citation_filename = TEMP_DIR / "bib2html" / "used_citations-monograph"
if xmlTree.find(".//EOAprintbibliography") is not None:
# to insert here: with keywords we can have multiple bibliographies
xmlBibliography = xmlTree.find(".//EOAprintbibliography")
Expand All @@ -1294,7 +1307,7 @@ def add_bibliography_to_xml(
elif bib_type == "anthology":
for intChapterNumber, xmlChapter in enumerate(xmlChapters, start = 1):
logging.debug(f"Looking at chapter {intChapterNumber}.")
tmp_citation_filename = TEMP_DIR / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber))
tmp_citation_filename = TEMP_DIR / "bib2html" / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber))
if xmlChapter.find(".//EOAprintbibliography") is not None:
xmlBibliography = xmlChapter.find(".//EOAprintbibliography")

Expand Down
208 changes: 208 additions & 0 deletions utils/bib2html.py
@@ -0,0 +1,208 @@
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-

"""
Docstring goes here
"""

__version__ = "1.0"
__date__ = "20190313"
__author__ = "kthoden@mpiwg-berlin.mpg.de"

import argparse
import os
import subprocess
import shlex
import logging
import string
from lxml import etree
from pathlib import Path

BASE_DIR = Path( __file__ ).resolve().parent.parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.name

# logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')

NS_MAP = {"x" : 'http://www.w3.org/1999/xhtml'}

def transform_reference(reference_element, dialect='html'):
"""Formatting transformation for reference element"""

string_from_xml = etree.tostring(reference_element).decode('utf-8')
removed_linebreak = string_from_xml.replace("\n", "")
removed_namespace = removed_linebreak.replace('<p xmlns="http://www.w3.org/1999/xhtml" class="noindent">', '<p>')
cleaned_element = etree.fromstring(removed_namespace)

links = cleaned_element.xpath("a", namespaces=NS_MAP)
for link in links:
link.tag = "tagtobestripped"

ecti_span = cleaned_element.xpath("span[@class='ecti-1095']", namespaces=NS_MAP)
for ecti in ecti_span:
if dialect == 'tei':
ecti.tag = "hi"
ecti.set("rend", "italic")
else:
ecti.tag = "em"
ecti.attrib.pop('class')

ectt_span = cleaned_element.xpath("x:span[@class='ectt-1095']", namespaces=NS_MAP)
for ectt in ectt_span:
if dialect == 'tei':
ectt.tag = "hi"
ectt.set("rend", "monospace")
else:
ecti.tag = "code"
ectt.attrib.pop('class')

etree.strip_tags(cleaned_element, "tagtobestripped")

return cleaned_element
# def transform_reference ends here

def write_dummy_latex(
citekeys,
bibfile,
language,
template_path,
tmp_filename
):
"""Prepare a latex file"""

allcitekeys = ""

for key in citekeys:
allcitekeys += """
\subsection*{%s}
\subsubsection*{authoryear}
\cite{%s}
\subsubsection*{year}
\cite*{%s}\n""" % (key, key, key)

with open(template_path, "r") as tmp_template:
template = tmp_template.read()

fill_in_template = string.Template(template)

bibfile_path = \
bibfile if bibfile.is_absolute() else Path.cwd() / bibfile
substitions = fill_in_template.substitute(
language = language,
# language = translations[language],
bibfile = bibfile_path,
# bibfile = '../' + bibfile,
citations = allcitekeys
)

with open(tmp_filename, "w") as texfile:
texfile.write(substitions)

logging.info(f"Wrote {tmp_filename}")
# def write_dummy_latex ends here


def run_htlatex(tmp_filename):
"""Create HTML file from temporary LaTeX file"""

command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'"
arguments = shlex.split(command)
logging.info("Using external command htlatex with command %s" % command)
subprocess.call(arguments)

command = f"biber {tmp_filename}"
arguments = shlex.split(command)
logging.info("Using external command biber with command %s" % command)
subprocess.call(arguments)

command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'"
arguments = shlex.split(command)
logging.info("Using external command htlatex with command %s" % command)
subprocess.call(arguments)
# def run_htlatex ends here


def main(
bib_file,
citekeys,
tex_template,
language,
temp_dir,
tmp_filename = "temp"
):
temp_dir = Path( temp_dir )
tmp_filename = Path( tmp_filename )

if not temp_dir.exists():
os.makedirs( temp_dir )

write_dummy_latex(
citekeys,
bib_file,
language,
template_path = tex_template,
tmp_filename = temp_dir / tmp_filename . with_suffix( ".tex" )
)

wd = Path.cwd()
os.chdir( temp_dir )
run_htlatex(tmp_filename)
os.chdir( wd )

tmp_path_html = (temp_dir / tmp_filename).with_suffix( ".html" )

xml_tree = etree.parse( str(tmp_path_html) )

reference_list = xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0]
entries = reference_list.xpath(f"x:dt", namespaces=NS_MAP)

reference_div = etree.fromstring("""<div id="refs" class="references"></div>""")

for entry in entries:
entry_citekey = entry.get("id").replace("X0-", "")
reference_string = entry.xpath(f"following-sibling::x:dd[1]/x:p", namespaces=NS_MAP)[0]
formatted_reference = transform_reference(reference_string)
wrapper_div = etree.fromstring(f"""<div id="ref-{entry_citekey}"></div>""")
wrapper_div.append(formatted_reference)
reference_div.append(wrapper_div)
return reference_div

"""
<h1 id="references" class="unnumbered">References</h1>
<div id="refs" class="references">
<div id="ref-Appadurai_1986">
<p>Appadurai, Arjun, ed. (1986). <em>The Social Life of Things: Commodities in Cultural Perspective</em>. Cambridge, UK: Cambridge University Press.</p>
</div>
"""

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
"bibfile",
help="File that contains the bibliography")
parser.add_argument(
"--tex-template",
default = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
help="the latex template to use for the bibliography"
)
parser.add_argument(
"--temp-dir",
default = "tmp_files",
help="where to store temporary files"
)
args = parser.parse_args()
citekeys = ["Edwards_2017", "Riggs:2016aa", "Bruckler:2001aa", "Zdenek:1939aa", "Caraffa_2011", "Uhlikova:2010aa", "Noll:1992aa", "Schwarz:1931aa", "Schwartz_1995", "Faber:2015ab", "Rohacek:2010aa", "Lachnit:2005aa", "Groll:1865aa", "Schlosser:1934aa", "Eitelberger:1863ab", "Wirth:1939aa", "Faber:2015aa", "Trnkova:2015aa", "Trnkova:2010aa", "Frodl:1988aa"]
language = "de"

translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}

temp_dir = Path( args.temp_dir )
references_in_html = main(
bib_file = args.bibfile,
citekeys = citekeys,
tex_template = args.tex_template,
language = translations[language],
temp_dir = args.temp_dir
)
print( etree.tostring( references_in_html ) )
# finis

0 comments on commit 1793c57

Please sign in to comment.