diff --git a/bibformat/4ht/bibliography4ht.tex b/bibformat/4ht/bibliography4ht.tex index 15cc7b6..9d60796 100644 --- a/bibformat/4ht/bibliography4ht.tex +++ b/bibformat/4ht/bibliography4ht.tex @@ -91,6 +91,11 @@ \bibliography{$bibfile} \begin{document} % \maketitle +\makeatletter +\def\hshchr{\expandafter\@gobble\string\#} +\def\ampchr{\expandafter\@gobble\string\&} +\def\entity#1{\HCode{\ampchr\hshchr#1;}} +\makeatother \section{Citations} diff --git a/eoatex2imxml.py b/eoatex2imxml.py index f5255b3..36a72d9 100755 --- a/eoatex2imxml.py +++ b/eoatex2imxml.py @@ -1195,7 +1195,6 @@ def insert_bibliographies( citations_json, citekeys ) - # use language of the first chapter: formatted_bibl_info = bib2html.main( bib_file = bib_file, citekeys = citekeys, @@ -1402,7 +1401,6 @@ def add_bibliography_to_xml( if bibl_info is None: logging.warning("No bibliography database found.") else: - (bib_type, bib_database) = bibl_info logging.debug(f"bib type is {bib_type}") @@ -1426,6 +1424,7 @@ def add_bibliography_to_xml( if bib_type == "monograph": keyword_to_print_bibl_el = insert_bibliographies( xmlTree, + # use language of the first chapter: xmlChapters[0].get( "language" ), citations_json, ## paths: @@ -1544,7 +1543,7 @@ def add_bibliography_to_xml( if xmlCitation.tag == "EOAciteauthoryear": strCitation = citeauthoryear_value elif xmlCitation.tag == "EOAciteyear": - strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] + strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text elif xmlCitation.tag == "EOAcitemanual": cite_text = xmlCitation.find("citetext") if cite_text.getchildren(): diff --git a/imxml2django.py b/imxml2django.py index 31e9272..82a009c 100755 --- a/imxml2django.py +++ b/imxml2django.py @@ -1127,7 +1127,7 @@ def check_publication_cfg(configuration_file): strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFile) shutil.copy( - INPUT_DIR / strImageFile, + PUBLICATION_DIR / strImageFile, CONVERT_DIR / "django/images" / (strImageFileDir + strImageFileName) ) # shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) diff --git a/mkimage.py b/mkimage.py index bbfcb98..11ad51d 100755 --- a/mkimage.py +++ b/mkimage.py @@ -38,21 +38,20 @@ def get_cover_image(image_path): """Choose a random landscape image from publications in this volume""" - import random - - candidates = os.listdir(image_path) - + import random, glob + extensions = ("png", "jpg") + candidates = [] + for extension in extensions: + path = f"{image_path}/**/*.{extension}" + candidates.extend( + glob.glob( path, recursive = True) + ) for image in candidates: - if image == ".DS_Store": - candidates.remove(image) - continue - tmp_image = Image.open(image_path + "/" + str(image)) + tmp_image = Image.open(str(image)) ratio = calculate_ratio(tmp_image) if ratio < 1: candidates.remove(image) - chosen_image = random.choice(candidates) - return chosen_image # def get_cover_image ends here @@ -209,7 +208,7 @@ def create_cover(metadata_dict, image_directory, cover_filename, image_is_file): text_draw.multiline_text((ptcenter,DIMENSIONS[1]-400), press_text_joined, font=small_font, align="center") if image_is_file == False: - image_on_cover = Image.open(os.path.join(image_directory, get_cover_image(image_directory))) + image_on_cover = Image.open(get_cover_image(image_directory)) else: image_on_cover = Image.open(image_directory) diff --git a/utils/bib2html.py b/utils/bib2html.py index efc1d10..9964cb8 100755 --- a/utils/bib2html.py +++ b/utils/bib2html.py @@ -21,6 +21,7 @@ from lxml import etree from pathlib import Path import sys +import textwrap BASE_DIR = Path( __file__ ).resolve().parent.parent SCRIPT_PATH = Path( __file__ ) @@ -33,6 +34,15 @@ BIBLIOGRAPHY_CHAPTER_NO_KEYWORD = "BIBLIOGRAPHY" BIBLIOGRAPHY_CHAPTER = "BIBLIOGRAPHY {keyword}" +def latex_escape_non_ascii( input_str ): + output = "" + for c in input_str: + if ord(c) > 0x7F: + output += "\entity{{{}}}".format( ord(c) ) + else: + output += c + return output + def check_executables(): check_executable( "htlatex" ) check_executable( "tidy" ) @@ -42,7 +52,7 @@ def transform_reference(reference_element, dialect='html'): """Formatting transformation for reference element""" string_from_xml = etree.tostring(reference_element).decode('utf-8') - removed_linebreak = string_from_xml.replace("\n", "") + removed_linebreak = string_from_xml.replace("\n", " ") removed_namespace = removed_linebreak.replace('
', '
') cleaned_element = etree.fromstring(removed_namespace) @@ -82,16 +92,13 @@ def write_dummy_latex( tmp_filename ): """Prepare a latex file""" + tmp_dir = tmp_filename.parent allcitekeys = "" - + allcitekeys += "\\begin{tabular}{l l l}\n" for key in citekeys: - allcitekeys += """ -\subsection*{%s} -\subsubsection*{authoryear} -\cite{%s} -\subsubsection*{year} -\cite*{%s}\n""" % (key, key, key) + allcitekeys += f"\\verb|{key}| &\\cite{{{key}}}&\\cite*{{{key}}}\\\\\n" + allcitekeys += "\\end{tabular}\n" with open(template_path, "r") as tmp_template: template = tmp_template.read() @@ -103,29 +110,60 @@ def write_dummy_latex( if keyword == "": chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD bibliographies += \ - f""" -\chapter{{{chapter_heading}}} -\printbibliography\n""" + textwrap.dedent( + f""" + \chapter{{{chapter_heading}}} + \printbibliography + """ + ) else: chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword=keyword ) bibliographies += \ - f""" -\chapter{{{chapter_heading}}} -\printbibliography[keyword={{{keyword}}}]\n""" + textwrap.dedent( + f""" + \chapter{{{chapter_heading}}} + \printbibliography[keyword={{{keyword}}}] + """ + ) + + bibfile_orig = (tmp_dir / (bibfile.stem + "_orig")) . with_suffix( ".bib" ) + bibfile_local = tmp_dir / bibfile.name + shutil.copyfile( + bibfile, + bibfile_orig + ) + import fileinput, unicodedata + with open( bibfile_local, "w") as out_file: + for line in fileinput.input(bibfile_orig): + out_file.write( + latex_escape_non_ascii( + line + ) + ) bibfile_path = \ bibfile if bibfile.is_absolute() else Path.cwd() / bibfile substitions = fill_in_template.substitute( language = language, # language = translations[language], - bibfile = bibfile_path, + bibfile = bibfile.name, + # bibfile = bibfile_path, # bibfile = '../' + bibfile, citations = allcitekeys, bibliographies = bibliographies ) + # (just for debugging: save with unescaped non-ascii characters) + with open(tmp_dir / (tmp_filename.name + ".orig"), "w") as texfile: + texfile.write( + substitions + ) with open(tmp_filename, "w") as texfile: - texfile.write(substitions) + texfile.write( + latex_escape_non_ascii( + substitions + ) + ) logging.info(f"Wrote {tmp_filename}") # def write_dummy_latex ends here @@ -137,7 +175,8 @@ def run_htlatex( ): """Create HTML file from temporary LaTeX file""" exec_command( - f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'", + f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'", + # f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'", output_to = ToFile( Path(log_dir) / "htlatex1.log" ) ) exec_command( @@ -145,7 +184,8 @@ def run_htlatex( output_to = ToFile( Path(log_dir) / "biber.log" ) ) exec_command( - f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'", + f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'", + # f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'", output_to = ToFile( Path(log_dir) / "htlatex2.log" ) ) # def run_htlatex ends here @@ -165,11 +205,24 @@ def create_citations(citekeys, xml_tree, style): p_element = etree.Element("p") for citekey in citekeys: - logging.debug(f"working on citekey: {citekey}" ) + logging.debug( f"working on citekey: '{citekey}', style: '{style}'" ) + citation_el = None if style == "authoryear": - format_citation = xml_tree.xpath(f"//x:h4[text() = '{citekey}']/following-sibling::x:p[2]/text()", namespaces=NS_MAP)[0].strip() + citation_el = xml_tree.xpath( + f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[1]/text()", + namespaces=NS_MAP + ) else: - format_citation = xml_tree.xpath(f"//x:h4[text() = '{citekey}']/following-sibling::x:p[3]/text()", namespaces=NS_MAP)[0].strip() + citation_el = xml_tree.xpath( + f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[2]/text()", + namespaces=NS_MAP + ) + if( len(citation_el) == 0 ): + logging.error( f"error parsing formatted citation: '{citekey}', style: '{style}'" ) + sys.exit( 1 ) + + format_citation = citation_el[0].strip() + logging.debug( f"formatted: '{format_citation}'" ) span_element = etree.fromstring(f"""{format_citation}""") p_element.append(span_element) @@ -230,22 +283,29 @@ def main( wd = Path.cwd() log_dir = log_dir.resolve() os.chdir( temp_dir ) + logging.info(f"cd {temp_dir}") run_htlatex( tmp_filename . with_suffix( "" ), # tmp_filename, log_dir = log_dir ) + logging.info(f"cd {wd}") os.chdir( wd ) tmp_path_html = temp_dir / tmp_filename . with_suffix( ".html" ) + tmp_path_html_utf8 = (temp_dir / (str(tmp_filename) + "-utf8")) . with_suffix( ".html" ) tmp_path_html_fixed1 = temp_dir / tmp_filename . with_suffix( ".1.html" ) tmp_path_html_fixed2 = temp_dir / tmp_filename . with_suffix( ".2.html" ) + exec_command( + f"iconv -f ISO-8859-1 -t UTF-8 --output={tmp_path_html_utf8} {tmp_path_html}" + ) + # htlatex seems to produce incorrect xhtml. # We have to fix it # (this will e.g. replace '&' by '&'): exec_command( - f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html}", + f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html_utf8}", exit_code_ok = lambda x: x in (0,1) ) import fileinput, unicodedata @@ -269,6 +329,7 @@ def main( chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD else: chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword = keyword ) + # '