diff --git a/bibformat/4ht/bibliography4ht.tex b/bibformat/4ht/bibliography4ht.tex index 15cc7b6..9d60796 100644 --- a/bibformat/4ht/bibliography4ht.tex +++ b/bibformat/4ht/bibliography4ht.tex @@ -91,6 +91,11 @@ \bibliography{$bibfile} \begin{document} % \maketitle +\makeatletter +\def\hshchr{\expandafter\@gobble\string\#} +\def\ampchr{\expandafter\@gobble\string\&} +\def\entity#1{\HCode{\ampchr\hshchr#1;}} +\makeatother \section{Citations} diff --git a/eoatex2imxml.py b/eoatex2imxml.py index f5255b3..36a72d9 100755 --- a/eoatex2imxml.py +++ b/eoatex2imxml.py @@ -1195,7 +1195,6 @@ def insert_bibliographies( citations_json, citekeys ) - # use language of the first chapter: formatted_bibl_info = bib2html.main( bib_file = bib_file, citekeys = citekeys, @@ -1402,7 +1401,6 @@ def add_bibliography_to_xml( if bibl_info is None: logging.warning("No bibliography database found.") else: - (bib_type, bib_database) = bibl_info logging.debug(f"bib type is {bib_type}") @@ -1426,6 +1424,7 @@ def add_bibliography_to_xml( if bib_type == "monograph": keyword_to_print_bibl_el = insert_bibliographies( xmlTree, + # use language of the first chapter: xmlChapters[0].get( "language" ), citations_json, ## paths: @@ -1544,7 +1543,7 @@ def add_bibliography_to_xml( if xmlCitation.tag == "EOAciteauthoryear": strCitation = citeauthoryear_value elif xmlCitation.tag == "EOAciteyear": - strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] + strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text elif xmlCitation.tag == "EOAcitemanual": cite_text = xmlCitation.find("citetext") if cite_text.getchildren(): diff --git a/imxml2django.py b/imxml2django.py index 31e9272..82a009c 100755 --- a/imxml2django.py +++ b/imxml2django.py @@ -1127,7 +1127,7 @@ def check_publication_cfg(configuration_file): strImageFileDir = re.sub("/", "", strImageFileDir) strImageFileName = os.path.basename(strImageFile) shutil.copy( - INPUT_DIR / strImageFile, + PUBLICATION_DIR / strImageFile, CONVERT_DIR / "django/images" / (strImageFileDir + strImageFileName) ) # shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) diff --git a/mkimage.py b/mkimage.py index bbfcb98..11ad51d 100755 --- a/mkimage.py +++ b/mkimage.py @@ -38,21 +38,20 @@ def get_cover_image(image_path): """Choose a random landscape image from publications in this volume""" - import random - - candidates = os.listdir(image_path) - + import random, glob + extensions = ("png", "jpg") + candidates = [] + for extension in extensions: + path = f"{image_path}/**/*.{extension}" + candidates.extend( + glob.glob( path, recursive = True) + ) for image in candidates: - if image == ".DS_Store": - candidates.remove(image) - continue - tmp_image = Image.open(image_path + "/" + str(image)) + tmp_image = Image.open(str(image)) ratio = calculate_ratio(tmp_image) if ratio < 1: candidates.remove(image) - chosen_image = random.choice(candidates) - return chosen_image # def get_cover_image ends here @@ -209,7 +208,7 @@ def create_cover(metadata_dict, image_directory, cover_filename, image_is_file): text_draw.multiline_text((ptcenter,DIMENSIONS[1]-400), press_text_joined, font=small_font, align="center") if image_is_file == False: - image_on_cover = Image.open(os.path.join(image_directory, get_cover_image(image_directory))) + image_on_cover = Image.open(get_cover_image(image_directory)) else: image_on_cover = Image.open(image_directory) diff --git a/utils/bib2html.py b/utils/bib2html.py index efc1d10..9964cb8 100755 --- a/utils/bib2html.py +++ b/utils/bib2html.py @@ -21,6 +21,7 @@ from lxml import etree from pathlib import Path import sys +import textwrap BASE_DIR = Path( __file__ ).resolve().parent.parent SCRIPT_PATH = Path( __file__ ) @@ -33,6 +34,15 @@ BIBLIOGRAPHY_CHAPTER_NO_KEYWORD = "BIBLIOGRAPHY" BIBLIOGRAPHY_CHAPTER = "BIBLIOGRAPHY {keyword}" +def latex_escape_non_ascii( input_str ): + output = "" + for c in input_str: + if ord(c) > 0x7F: + output += "\entity{{{}}}".format( ord(c) ) + else: + output += c + return output + def check_executables(): check_executable( "htlatex" ) check_executable( "tidy" ) @@ -42,7 +52,7 @@ def transform_reference(reference_element, dialect='html'): """Formatting transformation for reference element""" string_from_xml = etree.tostring(reference_element).decode('utf-8') - removed_linebreak = string_from_xml.replace("\n", "") + removed_linebreak = string_from_xml.replace("\n", " ") removed_namespace = removed_linebreak.replace('

', '

') cleaned_element = etree.fromstring(removed_namespace) @@ -82,16 +92,13 @@ def write_dummy_latex( tmp_filename ): """Prepare a latex file""" + tmp_dir = tmp_filename.parent allcitekeys = "" - + allcitekeys += "\\begin{tabular}{l l l}\n" for key in citekeys: - allcitekeys += """ -\subsection*{%s} -\subsubsection*{authoryear} -\cite{%s} -\subsubsection*{year} -\cite*{%s}\n""" % (key, key, key) + allcitekeys += f"\\verb|{key}| &\\cite{{{key}}}&\\cite*{{{key}}}\\\\\n" + allcitekeys += "\\end{tabular}\n" with open(template_path, "r") as tmp_template: template = tmp_template.read() @@ -103,29 +110,60 @@ def write_dummy_latex( if keyword == "": chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD bibliographies += \ - f""" -\chapter{{{chapter_heading}}} -\printbibliography\n""" + textwrap.dedent( + f""" + \chapter{{{chapter_heading}}} + \printbibliography + """ + ) else: chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword=keyword ) bibliographies += \ - f""" -\chapter{{{chapter_heading}}} -\printbibliography[keyword={{{keyword}}}]\n""" + textwrap.dedent( + f""" + \chapter{{{chapter_heading}}} + \printbibliography[keyword={{{keyword}}}] + """ + ) + + bibfile_orig = (tmp_dir / (bibfile.stem + "_orig")) . with_suffix( ".bib" ) + bibfile_local = tmp_dir / bibfile.name + shutil.copyfile( + bibfile, + bibfile_orig + ) + import fileinput, unicodedata + with open( bibfile_local, "w") as out_file: + for line in fileinput.input(bibfile_orig): + out_file.write( + latex_escape_non_ascii( + line + ) + ) bibfile_path = \ bibfile if bibfile.is_absolute() else Path.cwd() / bibfile substitions = fill_in_template.substitute( language = language, # language = translations[language], - bibfile = bibfile_path, + bibfile = bibfile.name, + # bibfile = bibfile_path, # bibfile = '../' + bibfile, citations = allcitekeys, bibliographies = bibliographies ) + # (just for debugging: save with unescaped non-ascii characters) + with open(tmp_dir / (tmp_filename.name + ".orig"), "w") as texfile: + texfile.write( + substitions + ) with open(tmp_filename, "w") as texfile: - texfile.write(substitions) + texfile.write( + latex_escape_non_ascii( + substitions + ) + ) logging.info(f"Wrote {tmp_filename}") # def write_dummy_latex ends here @@ -137,7 +175,8 @@ def run_htlatex( ): """Create HTML file from temporary LaTeX file""" exec_command( - f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'", + f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'", + # f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'", output_to = ToFile( Path(log_dir) / "htlatex1.log" ) ) exec_command( @@ -145,7 +184,8 @@ def run_htlatex( output_to = ToFile( Path(log_dir) / "biber.log" ) ) exec_command( - f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'", + f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8,fn-in' ' -utf8' '' '--interaction=nonstopmode'", + # f"htxelatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8' '' '--interaction=nonstopmode'", output_to = ToFile( Path(log_dir) / "htlatex2.log" ) ) # def run_htlatex ends here @@ -165,11 +205,24 @@ def create_citations(citekeys, xml_tree, style): p_element = etree.Element("p") for citekey in citekeys: - logging.debug(f"working on citekey: {citekey}" ) + logging.debug( f"working on citekey: '{citekey}', style: '{style}'" ) + citation_el = None if style == "authoryear": - format_citation = xml_tree.xpath(f"//x:h4[text() = '{citekey}']/following-sibling::x:p[2]/text()", namespaces=NS_MAP)[0].strip() + citation_el = xml_tree.xpath( + f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[1]/text()", + namespaces=NS_MAP + ) else: - format_citation = xml_tree.xpath(f"//x:h4[text() = '{citekey}']/following-sibling::x:p[3]/text()", namespaces=NS_MAP)[0].strip() + citation_el = xml_tree.xpath( + f"//x:table/x:tr/x:td[.//x:span[text() = '{citekey}'] ]/following-sibling::x:td[2]/text()", + namespaces=NS_MAP + ) + if( len(citation_el) == 0 ): + logging.error( f"error parsing formatted citation: '{citekey}', style: '{style}'" ) + sys.exit( 1 ) + + format_citation = citation_el[0].strip() + logging.debug( f"formatted: '{format_citation}'" ) span_element = etree.fromstring(f"""{format_citation}""") p_element.append(span_element) @@ -230,22 +283,29 @@ def main( wd = Path.cwd() log_dir = log_dir.resolve() os.chdir( temp_dir ) + logging.info(f"cd {temp_dir}") run_htlatex( tmp_filename . with_suffix( "" ), # tmp_filename, log_dir = log_dir ) + logging.info(f"cd {wd}") os.chdir( wd ) tmp_path_html = temp_dir / tmp_filename . with_suffix( ".html" ) + tmp_path_html_utf8 = (temp_dir / (str(tmp_filename) + "-utf8")) . with_suffix( ".html" ) tmp_path_html_fixed1 = temp_dir / tmp_filename . with_suffix( ".1.html" ) tmp_path_html_fixed2 = temp_dir / tmp_filename . with_suffix( ".2.html" ) + exec_command( + f"iconv -f ISO-8859-1 -t UTF-8 --output={tmp_path_html_utf8} {tmp_path_html}" + ) + # htlatex seems to produce incorrect xhtml. # We have to fix it # (this will e.g. replace '&' by '&'): exec_command( - f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html}", + f"tidy -numeric -output {tmp_path_html_fixed1} {tmp_path_html_utf8}", exit_code_ok = lambda x: x in (0,1) ) import fileinput, unicodedata @@ -269,6 +329,7 @@ def main( chapter_heading = BIBLIOGRAPHY_CHAPTER_NO_KEYWORD else: chapter_heading = BIBLIOGRAPHY_CHAPTER.format( keyword = keyword ) + # '

...
bibliography_el = xml_tree.xpath( f"//x:body/x:p[text() = '{chapter_heading}']/following-sibling::x:dl[1]", namespaces = NS_MAP @@ -276,18 +337,21 @@ def main( if( len(bibliography_el) != 1 ): logging.error( f"error parsing bibliography with keyword '{keyword}'" ) sys.exit( 1 ) - bibliographies_dict[keyword] = bibliography_el[0] + bibliography_el = bibliography_el[0] - xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0] - reference_list = xml_tree.xpath(f"//x:dl[@class='thebibliography']", namespaces=NS_MAP)[0] - reference_div = create_reference_list(reference_list) + reference_div = create_reference_list(bibliography_el) + bibliographies_dict[keyword] = reference_div html_element = etree.Element("html") html_element.insert(0, citation_authoryear) html_element.insert(1, citation_year) - html_element.insert(2, reference_div) - - # print(etree.tostring(html_element)) + for keyword in keywords: + bibl_el = etree.SubElement( + html_element, + "div", + **({} if keyword == "" else { 'keyword': keyword } ) + ) + bibl_el.append( bibliographies_dict[keyword] ) tree = etree.ElementTree(html_element) logging.info("writing '%s'" % output_file) diff --git a/utils/libeoaconvert.py b/utils/libeoaconvert.py index 6ffdd92..ce10d8f 100644 --- a/utils/libeoaconvert.py +++ b/utils/libeoaconvert.py @@ -71,34 +71,33 @@ def sanitizeImage( ): """Adjust and convert image for epub standard""" - if not os.path.exists(Path(tmp_dir) / "tmp_images/"): - os.makedirs(os.path.expanduser(Path(tmp_dir) / "tmp_images/")) + tmp_dir = Path( tmp_dir ) + strImagepath = Path( strImagepath ) + if not (tmp_dir / "tmp_images").exists(): + os.makedirs(tmp_dir / "tmp_images/") - tmp_image_dir = Path(tmp_dir) / "tmp_images/" - xelatex_sanitizeimage_logfile = open( Path(tmp_dir) / 'xelatex-run-images.log', 'w') + tmp_image_dir = tmp_dir / "tmp_images" logging.debug(strImagepath) - strCommand = GM_PATH + " identify -format \"%w\" " + str(strImagepath) - listArguments = shlex.split(strCommand) - exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) - intImageWidth = int(exeShell) + intImageWidth = int(subprocess.check_output( + shlex.split( f"{GM_PATH} identify -format \"%w\" {strImagepath}" ), + universal_newlines=True + )) if intImageWidth > 700: - strCommand = GM_PATH + " convert " + str(strImagepath) + " -resize 700x\\> " + str(strImagepath) - listArguments = shlex.split(strCommand) - subprocess.check_output(listArguments, shell=False) - strCommand = GM_PATH + " identify -format \"%h\" " + str(strImagepath) - listArguments = shlex.split(strCommand) - exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) - intImageHeight = int(exeShell) + exec_command( + f"{GM_PATH} convert {strImagepath} -resize 700x\\> {strImagepath}" + ) + intImageHeight = int( subprocess.check_output( + shlex.split( f"{GM_PATH} identify -format \"%h\" {strImagepath}" ), + universal_newlines=True + )) if intImageHeight > 1000: - strCommand = GM_PATH + " convert " + str(strImagepath) + " -resize x1000\\> " + str(strImagepath) - listArguments = shlex.split(strCommand) - subprocess.check_output(listArguments, shell=False) - strCommand = GM_PATH + " identify -format \"%m\" " + str(strImagepath) - listArguments = shlex.split(strCommand) - exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) - strFileFormat = str(exeShell) - strFileFormat = strFileFormat.strip() + exec_command( + f"{GM_PATH} convert {strImagepath} -resize x1000\\> {strImagepath}" + ) + strFileFormat = str( subprocess.check_output( + shlex.split( f"{GM_PATH} identify -format \"%m\" {strImagepath}" ) + )).strip() if strFileFormat == "JPEG": pass # print("looking at jpeg file") @@ -119,17 +118,15 @@ def sanitizeImage( # strImagepath = strNewImagepath + ".png" elif strFileFormat == "PDF": strNewImagepath = os.path.splitext(str(strImagepath))[0] - clipped_file = strImagepath.replace(".pdf", "-clipped.pdf") - - Kommando = PDFCROP_EXEC + " --margins 10 --clip --hires " + str(strImagepath) + " " + clipped_file - logging.debug(Kommando) + clipped_file = str(strImagepath).replace(".pdf", "-clipped.pdf") - Argumente = shlex.split(Kommando) - subprocess.call(Argumente, cwd=tmp_image_dir, stdout=xelatex_sanitizeimage_logfile) - - strCommand = GM_PATH + " convert -density 400 " + clipped_file + " " + strNewImagepath + ".png" - listArguments = shlex.split(strCommand) - subprocess.call(listArguments) + exec_command( + f"{PDFCROP_EXEC} --margins 10 --clip --hires {strImagepath} {clipped_file}", + # wd = tmp_image_dir + ) + exec_command( + f"{GM_PATH} convert -density 400 {clipped_file} {strNewImagepath}.png" + ) logging.debug("Removing two files: %s and %s " % (clipped_file, strImagepath)) os.remove(clipped_file) os.remove(strImagepath)