From fb6af94f52ba7f45a0718fd13213713ed760b38b Mon Sep 17 00:00:00 2001 From: kthoden Date: Wed, 6 Nov 2019 15:28:16 +0100 Subject: [PATCH] Inserting common functionalities --- imxml2tei.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/imxml2tei.py b/imxml2tei.py index c029395..b98aed8 100755 --- a/imxml2tei.py +++ b/imxml2tei.py @@ -7,9 +7,93 @@ """ +import argparse import sys import configparser +from pathlib import Path from lxml import etree +import utils.libeoaconvert as libeoaconvert + +BASE_DIR = Path( __file__ ).resolve().parent +SCRIPT_PATH = Path( __file__ ) +SCRIPT_NAME = SCRIPT_PATH.stem + +##################### +# Parsing arguments # +##################### + +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + "-c", "--config", + default = BASE_DIR / "config" / "eoaconvert.cfg", + help="Name of config file" +) +parser.add_argument( + "-l", "--log-dir", + default = Path("output/logs"), + # default = Path("logs", SCRIPT_NAME).with_suffix(".log"), + help="logfile" +) +parser.add_argument( + "--log-level", + default = "INFO", + help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" +) +parser.add_argument( + "-f", "--filename", + default = "IntermediateXMLFile.xml", + help="Name of intermediate XML file (without suffix!)." +) +parser.add_argument( + "-o", "--output-dir", + default = "./output/tei", + help="where to dump all output files" +) +parser.add_argument( + "-i", "--input-dir", + default = "./output/imxml", + help="location of intermediate XML file" +) + +args = parser.parse_args() + +CONFIG_FILE = args.config + +print("The configfile is %s." % CONFIG_FILE) + +CONFIG = load_config( + CONFIG_FILE, + args.log_level, + (Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ), + # args.log_file, +) + +############################ +# Paths: +############################ +INPUT_DIR = Path( args.input_dir ) +INPUT_PATH = Path( args.filename ) +OUTPUT_DIR = Path( args.output_dir ) +LOG_DIR = Path( args.log_dir ) + +TEMP_DIR = OUTPUT_DIR / "tmp_files" +DEBUG_DIR = OUTPUT_DIR / "debug" + +# where to output the xml file: +XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" ) + +################################## +# Setting up various directories # +################################## + +if not os.path.exists(OUTPUT_DIR): + os.mkdir( OUTPUT_DIR ) +if not os.path.exists(TEMP_DIR): + os.mkdir( TEMP_DIR ) +if not os.path.exists( DEBUG_DIR ): + os.mkdir( DEBUG_DIR ) # citations need a little more work: especially citedRange # so do landscape figures, no way to distinguish them! @@ -417,10 +501,9 @@ def main(): back_part = etree.SubElement(tei_body, "back") tei_body.insert(1, tei_body_xml.getroot()) - outfile = 'CONVERT/TEI.xml' output_string = etree.tostring(tei_root, xml_declaration=True, pretty_print=True, encoding="UTF-8", doctype= '\n') - with open(outfile, 'w') as output_file: + with open(XML_FILE, 'w') as output_file: output_file.write(output_string.decode("utf-8")) # def main ends here