From 5a2d9c8054b2fe3fda0a338f134516190dc84dbb Mon Sep 17 00:00:00 2001 From: kthoden Date: Wed, 4 Mar 2020 12:20:45 +0100 Subject: [PATCH] Moved pdf_burst to library --- src/eoatex2imxml.py | 26 +++----------------------- src/utils/libeoaconvert.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/eoatex2imxml.py b/src/eoatex2imxml.py index 2c8874b..710a3ff 100755 --- a/src/eoatex2imxml.py +++ b/src/eoatex2imxml.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2020-03-04 11:54:38 (kthoden)> +# Time-stamp: <2020-03-04 12:19:55 (kthoden)> """ Converts Latex files into a customized DocBook XML file. @@ -331,26 +331,6 @@ def sanitize_bibentry(bibEntry): # def sanitize_bibentry ends here -def pdf_burst(input_file, tmpDir): - """Split PDF file into single pages""" - from PyPDF2 import PdfFileWriter, PdfFileReader - - input1 = PdfFileReader(open(tmpDir / input_file, "rb")) - logging.debug("Input is %s and has %d pages." % (input_file, input1.getNumPages())) - - for pageno in range(input1.getNumPages()): - output = PdfFileWriter() - output.addPage(input1.getPage(pageno)) - - output_filename = tmpDir / ("EOAformulas_%d.pdf" % (pageno + 1)) - output_stream = open(output_filename, 'wb') - output.write(output_stream) - output_stream.close() - logging.debug("Wrote %s." % output_filename) - - pageno += 1 -# def pdf_burst ends here - def cleanup(): """Remove support files""" try: @@ -804,7 +784,7 @@ def process_inline_equations( xmlChapters ): Datei = open(TEMP_DIR / 'xelatex-run.log', 'w') Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) logging.info("Splitting all Inline Equations") - pdf_burst("EOAinline.pdf", formula_tmp_dir) + libeoaconvert.pdf_burst("EOAinline.pdf", formula_tmp_dir) logging.info("Converting %s split pages into PNG-Images" % len(dictEOAineqs.keys())) counter_dictEOAineqs = 1 for intRunningOrder in dictEOAineqs.keys(): @@ -890,7 +870,7 @@ def process_eoachem( xmlChapters ): Datei = open(TEMP_DIR / 'xelatex-run.log', 'w') Ergebnis = subprocess.check_call(Argumente,cwd=formula_tmp_dir,stdout=Datei) logging.info("Splitting all Inline Chemical formulas") - pdf_burst("EOAchem.pdf", formula_tmp_dir) + libeoaconvert.pdf_burst("EOAchem.pdf", formula_tmp_dir) logging.info("Converting %s split pages into PNG-Images" % len(dictEOAchems.keys())) counter_dictEOAchems = 1 for intRunningOrder in dictEOAchems.keys(): diff --git a/src/utils/libeoaconvert.py b/src/utils/libeoaconvert.py index 0ed86bc..e2aee8c 100644 --- a/src/utils/libeoaconvert.py +++ b/src/utils/libeoaconvert.py @@ -560,3 +560,24 @@ def progress(count, total, status=''): sys.stdout.write('[%s] %s%s ... %s\r' % (bar, percents, '%', status)) sys.stdout.flush() # def progress ends here + + +def pdf_burst(input_file, tmpDir): + """Split PDF file into single pages""" + from PyPDF2 import PdfFileWriter, PdfFileReader + + input1 = PdfFileReader(open(tmpDir / input_file, "rb")) + logging.debug("Input is %s and has %d pages." % (input_file, input1.getNumPages())) + + for pageno in range(input1.getNumPages()): + output = PdfFileWriter() + output.addPage(input1.getPage(pageno)) + + output_filename = tmpDir / ("EOAformulas_%d.pdf" % (pageno + 1)) + output_stream = open(output_filename, 'wb') + output.write(output_stream) + output_stream.close() + logging.debug("Wrote %s." % output_filename) + + pageno += 1 +# def pdf_burst ends here