Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
repair tei->html script (updated saxon)
  • Loading branch information
EsGeh authored and EsGeh committed Dec 2, 2019
1 parent 253b661 commit 1146478
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 25 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Expand Up @@ -97,8 +97,8 @@ RUN pip install -r "requirements.txt"
# install saxon
# Debians version of saxon is too old. We need to download it "manually" :-P
WORKDIR ${INSTALL_DIR}
RUN wget 'https://sourceforge.net/projects/saxon/files/Saxon-HE/9.6/SaxonHE9-6-0-10J.zip'
RUN unzip -d "$UTILS_BIN_DIR" "SaxonHE9-6-0-10J.zip"
RUN wget 'https://sourceforge.net/projects/saxon/files/Saxon-HE/9.8/SaxonHE9-8-0-10J.zip'
RUN unzip -d "$UTILS_BIN_DIR" "SaxonHE9-8-0-10J.zip"
WORKDIR ${UTILS_BIN_DIR}
RUN echo '#!/bin/bash' > $UTILS_BIN_DIR/saxon
RUN echo 'java -jar $UTILS_BIN_DIR/saxon9he.jar "$@"' >> $UTILS_BIN_DIR/saxon
Expand Down
87 changes: 64 additions & 23 deletions src/tei2html.py
Expand Up @@ -14,6 +14,8 @@
import logging
import argparse
from pathlib import Path
from os import environ, mkdir
from shutil import rmtree

# things to be done
# assign ids top to bottom for the following elements:
Expand All @@ -22,6 +24,18 @@
BASE_DIR = Path( __file__ ).resolve().parent
SCRIPT_NAME = Path( __file__).stem

DEFAULT_INPUT_DIR = \
Path(environ['INPUT_DIR'] if 'INPUT_DIR' in environ else './input')

DEFAULT_OUTPUT_DIR = \
Path(environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in environ else './output')

DEFAULT_DEPENDENCIES_DIR = \
Path(environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in environ else './dependencies')

EOA_SCRIPTS_DIR = \
Path(environ['EOA_SCRIPTS_DIR'])

def check_executables():
check_executable( "saxon" )

Expand All @@ -35,8 +49,9 @@ def check_executables():
help="Name of config file"
)
parser.add_argument(
"-l", "--log-file",
# default = Path("logs", SCRIPT_NAME).with_suffix(".log"),
"-l", "--log-dir",
default = DEFAULT_OUTPUT_DIR / "logs",
type = Path,
help="logfile"
)
parser.add_argument(
Expand All @@ -46,8 +61,9 @@ def check_executables():
)
parser.add_argument(
"--root-dir",
default = Path.cwd() / "output",
help="internal html links on the page will use this location as a prefix"
default = DEFAULT_OUTPUT_DIR / "html_from_tei",
type = Path,
help=""
)
parser.add_argument(
"-p", "--param",
Expand All @@ -65,48 +81,73 @@ def check_executables():
parser.add_argument(
"-f", "--filename",
required = True,
type = Path,
help="Name of main EOA-TEI file"
)
parser.add_argument(
"-o", "--output-file",
help="make saxon redirect the templates output here"
"-o", "--output-dir",
type = Path,
default = DEFAULT_OUTPUT_DIR / "html_from_tei",
help="the main stylesheet 'tei2html.xsl' will write html files here"
)

'''
TODO: support output directory
parser.add_argument(
"-o", "--output-dir",
default = ".",
help="where to dump all output files"
"-!", "--overwrite",
action = "store_true",
default = False,
help="overwrite files at OUTPUT_DIR"
)
parser.add_argument(
"--output-file",
type = Path,
help="in case of calling a classical stylesheet with output, redirect here"
)
'''

args = parser.parse_args()

CONFIG_FILE = args.config

print("The configfile is '%s'." % CONFIG_FILE)
log_file = args.log_file
if log_file is None:
log_file = (Path("logs") / args.xsl).with_suffix(".log")
print("The logfile is '%s'." % log_file)
log_dir = args.log_dir

CONFIG = load_config(
CONFIG_FILE,
args.log_level,
log_file,
(log_dir / SCRIPT_NAME) . with_suffix( ".log" ),
)

check_executables()

print( "params: " + str(args.param) )
print( "xslt params: " + str(args.param) )

input_file = args.filename
output_dir = args.output_dir

if not input_file.is_file():
import errno
raise( Exception(
f"not a valid input file: {input_file}"
) )

if output_dir.exists():
if args.overwrite:
rmtree( output_dir )
else:
raise( Exception( f"output directory already existing: '{output_dir}'!" ) )
if not output_dir.exists():
mkdir( output_dir )

run_xslt(
input_file = Path( args.filename ),
input_file = input_file,
xslt_file = BASE_DIR / "tei2html" / args.xsl,
output_file = Path( args.output_file ),
params =
[ f"domain={root_dir}" ] ++ args.param,
[
f"domain={args.root_dir}",
f"output_dir={output_dir}",
] +
args.param,
output_file = args.output_file,
exec_command_args = {
'output_to' : ToFile( log_dir / SCRIPT_NAME / "saxon.log" )
},
)

# finis

0 comments on commit 1146478

Please sign in to comment.