From 8c1a25478304dfeb0cc80c47d20993e4790a57dc Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Fri, 9 Feb 2018 11:29:23 +0100 Subject: [PATCH] Chemical formulas --- eoaconvert.py | 92 ++++++++++++++++++++++++++++++++++++++++++++--- tralics2django.py | 13 +++++-- tralics2epub.py | 16 +++++++-- 3 files changed, 113 insertions(+), 8 deletions(-) diff --git a/eoaconvert.py b/eoaconvert.py index 4efe277..89cf1f2 100755 --- a/eoaconvert.py +++ b/eoaconvert.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2018-01-26 09:53:29 (kthoden)> +# Time-stamp: <2018-02-09 10:52:21 (kthoden)> # license? __version__= "1.0" @@ -304,13 +304,13 @@ def pdf_burst(input_file, tmpDir): from PyPDF2 import PdfFileWriter, PdfFileReader input1 = PdfFileReader(open(tmpDir + input_file, "rb")) - print("Input is %s and has %d pages." % (input_file, input1.getNumPages())) + logging.debug("Input is %s and has %d pages." % (input_file, input1.getNumPages())) for pageno in range(input1.getNumPages()): output = PdfFileWriter() output.addPage(input1.getPage(pageno)) - output_filename = tmpDir + "EOAineq_%d.pdf" % (pageno + 1) + output_filename = tmpDir + "EOAformulas_%d.pdf" % (pageno + 1) output_stream = open(output_filename, 'wb') output.write(output_stream) output_stream.close() @@ -802,7 +802,7 @@ def cleanup(): for intRunningOrder in dictEOAineqs.keys(): # provide more status information here in output! progress(counter_dictEOAineqs, len(dictEOAineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys()))) - Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + "EOAineq_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf" + Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf" Argumente = shlex.split(Kommando) subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png" @@ -814,6 +814,90 @@ def cleanup(): print("Found no EOAineq. Continuing") +########### +# Formula # +########### +print("-----------------------------------------------------") +print("Converting EOAchem") +intChapterNumber = 1 +int_EOAchem_running_order = 1 +dictEOAchems = {} +str_tex_chem = "" +all_chem = xmlTree.findall(".//EOAchem") +# if all_chem is not None: +if len(all_chem) > 0: + print("Found " + str(len(all_chem)) + " chemical formulas") + + for xmlChapter in xmlChapters: + print("Chapter " + str(intChapterNumber)) + xmlEOAchems = xmlChapter.findall(".//EOAchem") + int_EOAchem_number = 1 + for xml_EOAchem in xmlEOAchems: + + str_chem_text = xml_EOAchem.text + + progress(int_EOAchem_number, len(xmlEOAchems),"Processing EOAchem %s of %s." % (int_EOAchem_number, len(xmlEOAchems))) + + str_chem_text = os.linesep.join([s for s in str_chem_text.splitlines() if s]) + str_tex_chem = str_tex_chem + "\ce{" + str_chem_text + "}\n\\newpage\n" + # Add int_EOAchem_running_order : Filename to dictionary + strFilename = "EOAchem_" + str(intChapterNumber) + "_" + str(int_EOAchem_number) + dictEOAchems[int_EOAchem_running_order] = strFilename + # Prepare XML + tmpTail = xml_EOAchem.tail + xml_EOAchem.clear() + xml_EOAchem.tail = tmpTail + xml_EOAchem.set("src", strFilename + ".png") + xml_EOAchem.set("TeX", str_chem_text) + # increment integers + int_EOAchem_running_order += 1 + int_EOAchem_number +=1 + intChapterNumber += 1 + + tmp = open(TEMPLATE_PATH + "formula.tex", "r") + Template = tmp.read() + tmp.close() + # Get tmp-directory for this user account + # tmpDir = os.getenv("TMPDIR") + # use local tmpdir + formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/" + + # Make directory items if it doesn't already exist + if not os.path.exists(os.getcwd() + "/items"): + os.mkdir(os.getcwd() + "/items") + s = string.Template(Template) + e = s.substitute(DERINHALT=str_tex_chem) + tmpFile = formula_tmp_dir + "EOAchem.tex" + tmp = open(tmpFile, "w") + tmp.write(e) + tmp.close() + print("Typesetting all inline Chemical formulas") + Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile + Argumente = shlex.split(Kommando) + Datei = open('tmp_files/xelatex-run.log', 'w') + Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) + print("Splitting all Inline Chemical formulas") + pdf_burst("EOAchem.pdf", formula_tmp_dir) + print("Converting %s split pages into PNG-Images" % len(dictEOAchems.keys())) + counter_dictEOAchems = 1 + for intRunningOrder in dictEOAchems.keys(): + # provide more status information here in output! + progress(counter_dictEOAchems, len(dictEOAchems.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAchems, len(dictEOAchems.keys()))) + Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf" + Argumente = shlex.split(Kommando) + subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) + Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAchems[intRunningOrder] + ".png" + Argumente = shlex.split(Kommando) + subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) + counter_dictEOAchems += 1 + +else: + print("Found no EOAchem. Continuing") + +############### +# Formula end # +############### + print("-----------------------------------------------------") print("EOAFigure Numbering per Chapter") for xmlChapter in xmlChapters: diff --git a/tralics2django.py b/tralics2django.py index 0091c5b..ee2bb81 100755 --- a/tralics2django.py +++ b/tralics2django.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2018-01-26 14:48:17 (kthoden)> +# Time-stamp: <2018-02-09 11:16:22 (kthoden)> import pickle import os @@ -1117,8 +1117,17 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe for xmlInlineEquation in xmlInlineEquations: xmlInlineEquation.tag = "img" xmlInlineEquation.set("class", "EOAineq") - xmlInlineEquation.set("alt", "") + xmlInlineEquation.set("alt", xmlInlineEquation.get("TeX")) shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src")) + + # Convert EOAchem into appropriate IMG-Tags + xml_inline_chems = xmlEOAchapter.findall(".//EOAchem") + for xml_inline_chem in xml_inline_chems: + xml_inline_chem.tag = "img" + xml_inline_chem.set("class", "EOAineq") + xml_inline_chem.set("alt", xml_inline_chem.get("TeX")) + shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/django/images/" + xml_inline_chem.get("src")) + # Convert EOAinline into appropriate IMG-Tags xmlInlineElements = xmlEOAchapter.findall(".//EOAinline") for xmlInlineElement in xmlInlineElements: diff --git a/tralics2epub.py b/tralics2epub.py index 8abf3eb..517e4fa 100755 --- a/tralics2epub.py +++ b/tralics2epub.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2018-01-23 10:38:00 (kthoden)> +# Time-stamp: <2018-02-09 11:19:54 (kthoden)> import os import sys @@ -822,12 +822,24 @@ class FootnoteError(Exception): xmlInlineEquations = xmlChapter.findall(".//EOAineq") for xmlInlineEquation in xmlInlineEquations: xmlInlineEquation.tag = "img" - xmlInlineEquation.set("alt", "") + xmlInlineEquation.set("alt", xmlInlineEquation.get("TeX")) del xmlInlineEquation.attrib["TeX"] shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/epub/OEBPS/images/" + xmlInlineEquation.get("src")) xmlInlineEquation.set("src", "images/" + xmlInlineEquation.get("src")) contentopf = addToContentopf(contentopf, xmlInlineEquation.get("src"), xmlInlineEquation.get("src"), "png") +print("-----------------------------------------------------") +print("Convert EOAchem into appropriate IMG-Tags") +for xmlChapter in xmlChapters: + xml_inline_chems = xmlChapter.findall(".//EOAchem") + for xml_inline_chem in xml_inline_chems: + xml_inline_chem.tag = "img" + xml_inline_chem.set("alt", xml_inline_chem.get("TeX")) + del xml_inline_chem.attrib["TeX"] + shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/epub/OEBPS/images/" + xml_inline_chem.get("src")) + xml_inline_chem.set("src", "images/" + xml_inline_chem.get("src")) + contentopf = addToContentopf(contentopf, xml_inline_chem.get("src"), xml_inline_chem.get("src"), "png") + print("-----------------------------------------------------") print("Convert EOAinline into appropriate IMG-Tags") for xmlChapter in xmlChapters: