Skip to content
Permalink
380d7af310
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 428 lines (367 sloc) 11.9 KB
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
"""
Docstring goes here
"""
__version__ = "1.0"
__date__ = "20190313"
__author__ = "kthoden@mpiwg-berlin.mpg.de"
from utils.load_config import exec_command, ToFile, ToLog, check_executable
from utils.run_xslt import run_xslt
import argparse
import os
import subprocess
import shlex
import logging
import string
import shutil
from lxml import etree
from pathlib import Path
import sys
import textwrap
BASE_DIR = Path( __file__ ).resolve().parent.parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.name
# logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
NS_MAP = {"x" : 'http://www.w3.org/1999/xhtml'}
BIBLIOGRAPHY_CHAPTER_NO_KEYWORD = "BIBLIOGRAPHY"
BIBLIOGRAPHY_CHAPTER = "BIBLIOGRAPHY-{keyword}"
def get_bibl_info(
tei_tree
):
NS_MAP = {"tei" : 'http://www.tei-c.org/ns/1.0'}
citekeys = tei_tree.xpath(
".//tei:bibl/tei:ref/@target",
namespaces = NS_MAP
)
citekeys = [key.lstrip('#') for key in citekeys]
processing_instructions = tei_tree.xpath(
".//processing-instruction('eoa')"
)
keywords = []
for k in processing_instructions:
as_str = str(k).lstrip('<?').rstrip('?>').split(" ")
if as_str[0:2] == ["eoa", "printbibliography"]:
if len(as_str) > 2:
keywords += [as_str[2].strip('"').strip("'")]
else:
keywords += [""]
return {
"citekeys": citekeys,
"keywords": keywords
}
def latex_escape_non_ascii( input_str ):
output = ""
# set hex value here, orginal value was 7f
escape_threshold = "ff"
for c in input_str:
if ord(c) > int(escape_threshold, 16):
output += "\entity{{{}}}".format( ord(c) )
else:
output += c
return output
def check_executables():
check_executable( "htlatex" )
check_executable( "tidy" )
check_executable( "biber" )
def write_dummy_latex(
citekeys,
bibfile,
language,
keywords,
template_path,
tmp_filename,
dashed
):
"""Prepare a latex file"""
tmp_dir = tmp_filename.parent
allcitekeys = ""
allcitekeys += "\\begin{tabular}{l l l l}\n"
for (i,key) in enumerate(citekeys):
allcitekeys += f"\\verb|{key}| &\\cite{{{key}}}&\\cite*{{{key}}}&\\citefield{{{key}}}{{title}}"
if i < len(citekeys) - 1:
allcitekeys += "\\\\"
allcitekeys += "\n"
allcitekeys += "\\end{tabular}\n"
with open(template_path, "r") as tmp_template:
template = tmp_template.read()
fill_in_template = string.Template(template)
bibliographies = ""
if dashed:
for keyword in keywords:
if keyword == "":
section_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
bibliographies += \
textwrap.dedent(
f"""
\section*{{{section_heading}}}
\printbibliography[heading=none]
"""
)
else:
section_heading = BIBLIOGRAPHY_CHAPTER.format( keyword=keyword )
bibliographies += \
textwrap.dedent(
f"""
\section*{{{section_heading}}}
\printbibliography[heading=none, keyword={{{keyword}}}]
"""
)
else:
section_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
bibliographies += \
textwrap.dedent(
f"""
\section*{{{section_heading}}}
\printbibliography[heading=none]
"""
)
bibfile_orig = (tmp_dir / (bibfile.stem + "_orig")) . with_suffix( ".bib" )
bibfile_local = tmp_dir / bibfile.name
shutil.copyfile(
bibfile,
bibfile_orig
)
import fileinput, unicodedata
with open( bibfile_local, "w") as out_file:
for line in fileinput.input(bibfile_orig):
out_file.write(
latex_escape_non_ascii(
line
)
)
bibshorthands = """
\section*{Shorthands}
\printbiblist[heading=none]{shorthand}
"""
if dashed:
usepackagebiblatexstring = ",dashed=true"
else:
usepackagebiblatexstring = ",dashed=false"
bibfile_path = \
bibfile if bibfile.is_absolute() else Path.cwd() / bibfile
substitions = fill_in_template.substitute(
language = language,
# language = translations[language],
bibfile = bibfile.name,
usepackagebiblatex = usepackagebiblatexstring,
# bibfile = bibfile_path,
# bibfile = '../' + bibfile,
citations = allcitekeys,
bibshorthands = bibshorthands,
bibliographies = bibliographies
)
# (just for debugging: save with unescaped non-ascii characters)
with open(tmp_dir / (tmp_filename.name + ".orig"), "w") as texfile:
texfile.write(
substitions
)
with open(tmp_filename, "w") as texfile:
texfile.write(
latex_escape_non_ascii(
substitions
)
)
logging.info(f"Wrote {tmp_filename}")
# def write_dummy_latex ends here
def create_makefile(tmp_filename):
"""Create a makefile
Found on https://tex.stackexchange.com/questions/162626
"""
makefile_input = """local filter = require "make4ht-filter"
local process = filter{"cleanspan", "fixligatures", "hruletohr"}
Make:add("biber", "biber ${input}")
Make:htlatex()
Make:biber()
Make:htlatex()
Make:htlatex()
Make:match("html$",process)
"""
with open(tmp_filename, "w") as makefile:
makefile.write(makefile_input)
logging.info(f"Wrote {tmp_filename}.")
# def create_makefile ends here
def run_mk4_makefile(
tmp_filename,
log_dir
):
"""Create HTML from LaTeX using makefile"""
exec_command(
f"make4ht {tmp_filename}"
)
# def run_mk4_makefile ends here
def run_htlatex(
tmp_filename,
log_dir
):
"""Create HTML file from temporary LaTeX file"""
exec_command(
f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
# f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
output_to = ToFile( Path(log_dir) / "htlatex1.log" )
)
exec_command(
f"biber {tmp_filename}",
output_to = ToFile( Path(log_dir) / "biber.log" )
)
exec_command(
f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
# f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
output_to = ToFile( Path(log_dir) / "htlatex2.log" )
)
# def run_htlatex ends here
def bib2tei(
bib_file,
citekeys,
language,
temp_dir,
output_file,
keywords = [""],
log_dir = "logs",
tex_template = BASE_DIR / "data" / "aux" / "bibliography4ht.tex",
):
imhtml_file = (temp_dir / "imhtml-nondashed") . with_suffix( ".html" )
__bib2imhtml(
bib_file = bib_file,
citekeys = citekeys,
tex_template = tex_template,
language = language,
temp_dir = temp_dir,
output_file = imhtml_file,
dashed_status = False,
keywords = keywords,
log_dir = log_dir
)
imhtml_dashed_file = (temp_dir / "imhtml-dashed") . with_suffix( ".html" )
__bib2imhtml(
bib_file = bib_file,
citekeys = citekeys,
tex_template = tex_template,
language = language,
temp_dir = temp_dir,
output_file = imhtml_dashed_file,
dashed_status = True,
keywords = keywords,
log_dir = log_dir
)
__imhtml_2_tei(
imhtml_file,
imhtml_dashed_file,
output_file = output_file
)
def __bib2imhtml(
bib_file,
citekeys,
tex_template,
language,
temp_dir,
dashed_status,
output_file,
keywords,
log_dir
):
"convert .bib to html inside 'temp_dir', return as xml tree. Optionally copy result to output_file"
temp_dir = Path( temp_dir )
translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
if language in translations.values():
language_translated = language
else:
language_translated = translations[language]
if not temp_dir.exists():
os.makedirs( temp_dir )
# tmp_filename = Path(output_file.name) . with_suffix( "" )
if not dashed_status:
tmp_filename = Path("nondashed")
else:
tmp_filename = Path("dashed")
write_dummy_latex(
citekeys,
bib_file,
language_translated,
keywords,
template_path = tex_template,
tmp_filename = temp_dir / tmp_filename . with_suffix( ".tex" ),
dashed = dashed_status
)
wd = Path.cwd()
log_dir = log_dir.resolve()
os.chdir( temp_dir )
logging.info(f"cd {temp_dir}")
tmp_path_makefile = tmp_filename . with_suffix( ".mk4" )
create_makefile(tmp_path_makefile)
run_mk4_makefile(
tmp_filename . with_suffix( "" ),
# tmp_filename,
log_dir = log_dir
)
# run_htlatex(
# tmp_filename . with_suffix( "" ),
# # tmp_filename,
# log_dir = log_dir
# )
logging.info(f"cd {wd}")
os.chdir( wd )
tmp_path_html = temp_dir / tmp_filename . with_suffix( ".html" )
tmp_path_html_utf8 = ((temp_dir / (str(tmp_filename) + "-utf8"))) . with_suffix( ".html" )
# tmp_path_html_utf8 = (temp_dir / (str(tmp_filename) + "-utf8")) . with_suffix( ".html" )
exec_command(
f"iconv -f ISO-8859-1 -t UTF-8 -o \"{tmp_path_html_utf8}\" \"{tmp_path_html}\"",
output_to = ToFile( log_dir / "iconv.log" )
# output_to = ToFile( tmp_path_html_utf8 )
)
# tmp_path_html_res = temp_dir / tmp_filename . with_suffix( ".html" )
# htlatex seems to produce incorrect xhtml.
# We have to fix it
# (this will e.g. replace '&' by '&amp;'):
exec_command(
f"tidy -numeric -output {output_file} {tmp_path_html_utf8}",
exit_code_ok = lambda x: x in (0,1)
)
def __imhtml_2_tei(
imhtml_file,
imhtml_dashed_file,
output_file
):
run_xslt(
imhtml_file,
BASE_DIR / "stylesheets/tex4ht_2_tei.xsl",
params = [ f"dashed_file={imhtml_dashed_file.absolute()}" ],
output_file = output_file
)
def teibib_to_eoa1(
tei_bibl_file: Path,
output_file: Path
):
run_xslt(
tei_bibl_file,
BASE_DIR / "stylesheets/teibib_to_eoa1.xsl",
output_file = output_file
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
"bibfile",
help="File that contains the bibliography")
parser.add_argument(
"--tex-template",
default = BASE_DIR / "data" / "aux" / "bibliography4ht.tex",
help="the latex template to use for the bibliography"
)
parser.add_argument(
"--temp-dir",
default = "tmp_files",
help="where to store temporary files"
)
args = parser.parse_args()
check_executables()
language = "de"
temp_dir = Path( args.temp_dir )
references_in_html = main(
bib_file = args.bibfile,
citekeys = citekeys,
tex_template = args.tex_template,
language = translations[language],
temp_dir = args.temp_dir
)
# print( etree.tostring( references_in_html ) )
# finis