diff --git a/Dockerfile b/Dockerfile index a070e1c..f93e67f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -97,8 +97,8 @@ RUN pip install -r "requirements.txt" # install saxon # Debians version of saxon is too old. We need to download it "manually" :-P WORKDIR ${INSTALL_DIR} -RUN wget 'https://sourceforge.net/projects/saxon/files/Saxon-HE/9.6/SaxonHE9-6-0-10J.zip' -RUN unzip -d "$UTILS_BIN_DIR" "SaxonHE9-6-0-10J.zip" +RUN wget 'https://sourceforge.net/projects/saxon/files/Saxon-HE/9.8/SaxonHE9-8-0-10J.zip' +RUN unzip -d "$UTILS_BIN_DIR" "SaxonHE9-8-0-10J.zip" WORKDIR ${UTILS_BIN_DIR} RUN echo '#!/bin/bash' > $UTILS_BIN_DIR/saxon RUN echo 'java -jar $UTILS_BIN_DIR/saxon9he.jar "$@"' >> $UTILS_BIN_DIR/saxon diff --git a/src/tei2html.py b/src/tei2html.py index 8ce3bdd..10ce0f1 100755 --- a/src/tei2html.py +++ b/src/tei2html.py @@ -14,6 +14,8 @@ import logging import argparse from pathlib import Path +from os import environ, mkdir +from shutil import rmtree # things to be done # assign ids top to bottom for the following elements: @@ -22,6 +24,18 @@ BASE_DIR = Path( __file__ ).resolve().parent SCRIPT_NAME = Path( __file__).stem +DEFAULT_INPUT_DIR = \ + Path(environ['INPUT_DIR'] if 'INPUT_DIR' in environ else './input') + +DEFAULT_OUTPUT_DIR = \ + Path(environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in environ else './output') + +DEFAULT_DEPENDENCIES_DIR = \ + Path(environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in environ else './dependencies') + +EOA_SCRIPTS_DIR = \ + Path(environ['EOA_SCRIPTS_DIR']) + def check_executables(): check_executable( "saxon" ) @@ -35,8 +49,9 @@ def check_executables(): help="Name of config file" ) parser.add_argument( - "-l", "--log-file", - # default = Path("logs", SCRIPT_NAME).with_suffix(".log"), + "-l", "--log-dir", + default = DEFAULT_OUTPUT_DIR / "logs", + type = Path, help="logfile" ) parser.add_argument( @@ -46,8 +61,9 @@ def check_executables(): ) parser.add_argument( "--root-dir", - default = Path.cwd() / "output", - help="internal html links on the page will use this location as a prefix" + default = DEFAULT_OUTPUT_DIR / "html_from_tei", + type = Path, + help="" ) parser.add_argument( "-p", "--param", @@ -65,48 +81,73 @@ def check_executables(): parser.add_argument( "-f", "--filename", required = True, + type = Path, help="Name of main EOA-TEI file" ) parser.add_argument( - "-o", "--output-file", - help="make saxon redirect the templates output here" + "-o", "--output-dir", + type = Path, + default = DEFAULT_OUTPUT_DIR / "html_from_tei", + help="the main stylesheet 'tei2html.xsl' will write html files here" ) - - ''' - TODO: support output directory parser.add_argument( - "-o", "--output-dir", - default = ".", - help="where to dump all output files" + "-!", "--overwrite", + action = "store_true", + default = False, + help="overwrite files at OUTPUT_DIR" + ) + parser.add_argument( + "--output-file", + type = Path, + help="in case of calling a classical stylesheet with output, redirect here" ) - ''' args = parser.parse_args() CONFIG_FILE = args.config - print("The configfile is '%s'." % CONFIG_FILE) - log_file = args.log_file - if log_file is None: - log_file = (Path("logs") / args.xsl).with_suffix(".log") - print("The logfile is '%s'." % log_file) + log_dir = args.log_dir CONFIG = load_config( CONFIG_FILE, args.log_level, - log_file, + (log_dir / SCRIPT_NAME) . with_suffix( ".log" ), ) check_executables() - print( "params: " + str(args.param) ) + print( "xslt params: " + str(args.param) ) + + input_file = args.filename + output_dir = args.output_dir + + if not input_file.is_file(): + import errno + raise( Exception( + f"not a valid input file: {input_file}" + ) ) + + if output_dir.exists(): + if args.overwrite: + rmtree( output_dir ) + else: + raise( Exception( f"output directory already existing: '{output_dir}'!" ) ) + if not output_dir.exists(): + mkdir( output_dir ) run_xslt( - input_file = Path( args.filename ), + input_file = input_file, xslt_file = BASE_DIR / "tei2html" / args.xsl, - output_file = Path( args.output_file ), params = - [ f"domain={root_dir}" ] ++ args.param, + [ + f"domain={args.root_dir}", + f"output_dir={output_dir}", + ] + + args.param, + output_file = args.output_file, + exec_command_args = { + 'output_to' : ToFile( log_dir / SCRIPT_NAME / "saxon.log" ) + }, ) # finis