Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge branch 'clean_and_fix' of https://github.molgen.mpg.de/EditionO…
…penAccess/EOASkripts into clean_and_fix
  • Loading branch information
kthoden committed Apr 29, 2019
2 parents 088deef + e57cce7 commit 962ed1f
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 77 deletions.
5 changes: 5 additions & 0 deletions bibformat/4ht/bibliography4ht.tex
Expand Up @@ -91,6 +91,11 @@
\bibliography{$bibfile}
\begin{document}
% \maketitle
\makeatletter
\def\hshchr{\expandafter\@gobble\string\#}
\def\ampchr{\expandafter\@gobble\string\&}
\def\entity#1{\HCode{\ampchr\hshchr#1;}}
\makeatother
\section{Citations}
Expand Down
5 changes: 2 additions & 3 deletions eoatex2imxml.py
Expand Up @@ -1195,7 +1195,6 @@ def insert_bibliographies(
citations_json,
citekeys
)
# use language of the first chapter:
formatted_bibl_info = bib2html.main(
bib_file = bib_file,
citekeys = citekeys,
Expand Down Expand Up @@ -1402,7 +1401,6 @@ def add_bibliography_to_xml(
if bibl_info is None:
logging.warning("No bibliography database found.")
else:

(bib_type, bib_database) = bibl_info
logging.debug(f"bib type is {bib_type}")

Expand All @@ -1426,6 +1424,7 @@ def add_bibliography_to_xml(
if bib_type == "monograph":
keyword_to_print_bibl_el = insert_bibliographies(
xmlTree,
# use language of the first chapter:
xmlChapters[0].get( "language" ),
citations_json,
## paths:
Expand Down Expand Up @@ -1544,7 +1543,7 @@ def add_bibliography_to_xml(
if xmlCitation.tag == "EOAciteauthoryear":
strCitation = citeauthoryear_value
elif xmlCitation.tag == "EOAciteyear":
strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1]
strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text
elif xmlCitation.tag == "EOAcitemanual":
cite_text = xmlCitation.find("citetext")
if cite_text.getchildren():
Expand Down
2 changes: 1 addition & 1 deletion imxml2django.py
Expand Up @@ -1127,7 +1127,7 @@ def check_publication_cfg(configuration_file):
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFile)
shutil.copy(
INPUT_DIR / strImageFile,
PUBLICATION_DIR / strImageFile,
CONVERT_DIR / "django/images" / (strImageFileDir + strImageFileName)
)
# shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
Expand Down
21 changes: 10 additions & 11 deletions mkimage.py
Expand Up @@ -38,21 +38,20 @@
def get_cover_image(image_path):
"""Choose a random landscape image from publications in this volume"""

import random

candidates = os.listdir(image_path)

import random, glob
extensions = ("png", "jpg")
candidates = []
for extension in extensions:
path = f"{image_path}/**/*.{extension}"
candidates.extend(
glob.glob( path, recursive = True)
)
for image in candidates:
if image == ".DS_Store":
candidates.remove(image)
continue
tmp_image = Image.open(image_path + "/" + str(image))
tmp_image = Image.open(str(image))
ratio = calculate_ratio(tmp_image)
if ratio < 1:
candidates.remove(image)

chosen_image = random.choice(candidates)

return chosen_image
# def get_cover_image ends here

Expand Down Expand Up @@ -209,7 +208,7 @@ def create_cover(metadata_dict, image_directory, cover_filename, image_is_file):
text_draw.multiline_text((ptcenter,DIMENSIONS[1]-400), press_text_joined, font=small_font, align="center")

if image_is_file == False:
image_on_cover = Image.open(os.path.join(image_directory, get_cover_image(image_directory)))
image_on_cover = Image.open(get_cover_image(image_directory))
else:
image_on_cover = Image.open(image_directory)

Expand Down
122 changes: 93 additions & 29 deletions utils/bib2html.py
Expand Up @@ -21,6 +21,7 @@
from lxml import etree
from pathlib import Path
import sys
import textwrap

BASE_DIR = Path( __file__ ).resolve().parent.parent
SCRIPT_PATH = Path( __file__ )
Expand All @@ -33,6 +34,15 @@
BIBLIOGRAPHY_CHAPTER_NO_KEYWORD = "BIBLIOGRAPHY"
BIBLIOGRAPHY_CHAPTER = "BIBLIOGRAPHY {keyword}"

def latex_escape_non_ascii( input_str ):
output = ""
for c in input_str:
if ord(c) > 0x7F:
output += "\entity{{{}}}".format( ord(c) )
else:
output += c
return output

def check_executables():
check_executable( "htlatex" )
check_executable( "tidy" )
Expand All @@ -42,7 +52,7 @@ def transform_reference(reference_element, dialect='html'):
"""Formatting transformation for reference element"""

string_from_xml = etree.tostring(reference_element).decode('utf-8')
removed_linebreak = string_from_xml.replace("\n", "")
removed_linebreak = string_from_xml.replace("\n", " ")
removed_namespace = removed_linebreak.replace('<p xmlns="http://www.w3.org/1999/xhtml" class="noindent">', '<p>')
cleaned_element = etree.fromstring(removed_namespace)

Expand Down Expand Up @@ -82,16 +92,13 @@ def write_dummy_latex(
tmp_filename
):
"""Prepare a latex file"""
tmp_dir = tmp_filename.parent

allcitekeys = ""

allcitekeys += "\\begin{tabular}{l l l}\n"
for key in citekeys:
allcitekeys += """
\subsection*{%s}
\subsubsection*{authoryear}
\cite{%s}
\subsubsection*{year}
\cite*{%s}\n""" % (key, key, key)
allcitekeys += f"\\verb|{key}| &\\cite{{{key}}}&\\cite*{{{key}}}\\\\\n"
allcitekeys += "\\end{tabular}\n"

with open(template_path, "r") as tmp_template:
template = tmp_template.read()
Expand All @@ -103,29 +110,60 @@ def write_dummy_latex(
if keyword == "":
chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
bibliographies += \
f"""
\chapter{{{chapter_heading}}}
\printbibliography\n"""
textwrap.dedent(
f"""
\chapter{{{chapter_heading}}}
\printbibliography
"""
)
else:
chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword=keyword )
bibliographies += \
f"""
\chapter{{{chapter_heading}}}
\printbibliography[keyword={{{keyword}}}]\n"""
textwrap.dedent(
f"""
\chapter{{{chapter_heading}}}
\printbibliography[keyword={{{keyword}}}]
"""
)

bibfile_orig = (tmp_dir / (bibfile.stem + "_orig")) . with_suffix( ".bib" )
bibfile_local = tmp_dir / bibfile.name
shutil.copyfile(
bibfile,
bibfile_orig
)
import fileinput, unicodedata
with open( bibfile_local, "w") as out_file:
for line in fileinput.input(bibfile_orig):
out_file.write(
latex_escape_non_ascii(
line
)
)

bibfile_path = \
bibfile if bibfile.is_absolute() else Path.cwd() / bibfile
substitions = fill_in_template.substitute(
language = language,
# language = translations[language],
bibfile = bibfile_path,
bibfile = bibfile.name,
# bibfile = bibfile_path,
# bibfile = '../' + bibfile,
citations = allcitekeys,
bibliographies = bibliographies
)
# (just for debugging: save with unescaped non-ascii characters)
with open(tmp_dir / (tmp_filename.name + ".orig"), "w") as texfile:
texfile.write(
substitions
)

with open(tmp_filename, "w") as texfile:
texfile.write(substitions)
texfile.write(
latex_escape_non_ascii(
substitions
)
)

logging.info(f"Wrote {tmp_filename}")
# def write_dummy_latex ends here
Expand All @@ -137,15 +175,17 @@ def run_htlatex(
):
"""Create HTML file from temporary LaTeX file"""
exec_command(
f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
# f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
output_to = ToFile( Path(log_dir) / "htlatex1.log" )
)
exec_command(
f"biber {tmp_filename}",
output_to = ToFile( Path(log_dir) / "biber.log" )
)
exec_command(
f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'",
# f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'",
output_to = ToFile( Path(log_dir) / "htlatex2.log" )
)
# def run_htlatex ends here
Expand All @@ -165,11 +205,24 @@ def create_citations(citekeys, xml_tree, style):
p_element = etree.Element("p")

for citekey in citekeys:
logging.debug(f"working on citekey: {citekey}" )
logging.debug( f"working on citekey: '{citekey}', style: '{style}'" )
citation_el = None
if style == "authoryear":
format_citation = xml_tree.xpath(f"//x:h4[text() = '{citekey}']/following-sibling::x:p[2]/text()", namespaces=NS_MAP)[0].strip()
citation_el = xml_tree.xpath(
f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[1]/text()",
namespaces=NS_MAP
)
else:
format_citation = xml_tree.xpath(f"//x:h4[text() = '{citekey}']/following-sibling::x:p[3]/text()", namespaces=NS_MAP)[0].strip()
citation_el = xml_tree.xpath(
f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[2]/text()",
namespaces=NS_MAP
)
if( len(citation_el) == 0 ):
logging.error( f"error parsing formatted citation: '{citekey}', style: '{style}'" )
sys.exit( 1 )

format_citation = citation_el[0].strip()
logging.debug( f"formatted: '{format_citation}'" )
span_element = etree.fromstring(f"""<span class="citation" data-cites="{citekey}">{format_citation}</span>""")

p_element.append(span_element)
Expand Down Expand Up @@ -230,22 +283,29 @@ def main(
wd = Path.cwd()
log_dir = log_dir.resolve()
os.chdir( temp_dir )
logging.info(f"cd {temp_dir}")
run_htlatex(
tmp_filename . with_suffix( "" ),
# tmp_filename,
log_dir = log_dir
)
logging.info(f"cd {wd}")
os.chdir( wd )

tmp_path_html = temp_dir / tmp_filename . with_suffix( ".html" )
tmp_path_html_utf8 = (temp_dir / (str(tmp_filename) + "-utf8")) . with_suffix( ".html" )
tmp_path_html_fixed1 = temp_dir / tmp_filename . with_suffix( ".1.html" )
tmp_path_html_fixed2 = temp_dir / tmp_filename . with_suffix( ".2.html" )

exec_command(
f"iconv -f ISO-8859-1 -t UTF-8 --output={tmp_path_html_utf8} {tmp_path_html}"
)

# htlatex seems to produce incorrect xhtml.
# We have to fix it
# (this will e.g. replace '&' by '&amp;'):
exec_command(
f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html}",
f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html_utf8}",
exit_code_ok = lambda x: x in (0,1)
)
import fileinput, unicodedata
Expand All @@ -269,25 +329,29 @@ def main(
chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD
else:
chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword = keyword )
# '<dl class="thebibliography"> ... </dl>
bibliography_el = xml_tree.xpath(
f"//x:body/x:p[text() = '{chapter_heading}']/following-sibling::x:dl[1]",
namespaces = NS_MAP
)
if( len(bibliography_el) != 1 ):
logging.error( f"error parsing bibliography with keyword '{keyword}'" )
sys.exit( 1 )
bibliographies_dict[keyword] = bibliography_el[0]
bibliography_el = bibliography_el[0]

xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0]
reference_list = xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0]
reference_div = create_reference_list(reference_list)
reference_div = create_reference_list(bibliography_el)
bibliographies_dict[keyword] = reference_div

html_element = etree.Element("html")
html_element.insert(0, citation_authoryear)
html_element.insert(1, citation_year)
html_element.insert(2, reference_div)

# print(etree.tostring(html_element))
for keyword in keywords:
bibl_el = etree.SubElement(
html_element,
"div",
**({} if keyword == "" else { 'keyword': keyword } )
)
bibl_el.append( bibliographies_dict[keyword] )

tree = etree.ElementTree(html_element)
logging.info("writing '%s'" % output_file)
Expand Down

0 comments on commit 962ed1f

Please sign in to comment.