From 66c1b1f2c3d1a4a4db782e268f9b372b2630b536 Mon Sep 17 00:00:00 2001 From: EsGeh Date: Wed, 4 Dec 2019 16:15:30 +0100 Subject: [PATCH] tei->html: script creates "self containing" output. --- dependencies.conf | 8 +++++- docker-compose.yaml | 1 + src/tei2html.py | 63 +++++++++++++++++++++++++++++++++++++++------ 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/dependencies.conf b/dependencies.conf index 0f6a7fd..3b3e90c 100644 --- a/dependencies.conf +++ b/dependencies.conf @@ -13,5 +13,11 @@ hash = 2a01be46ee82fce5eba6074359b3d18db2222e0c [eoa-publication-model] uri = https://github.molgen.mpg.de/EditionOpenAccess/eoa-publication-model.git -hash = ea8a2eb99b22703f1ef92544352701f07c08d17e +hash = 5ff326580a6bc34756bce511a23146453fdb82b6 # init not needed, since only example publication is needed + +[webdesign_platform] + +uri = https://github.molgen.mpg.de/EditionOpenAccess/webdesign_platform.git +hash = 7a6c3f7c0db224fdcbb046df3f7edad8fefded3f +init = ./scripts/init.sh diff --git a/docker-compose.yaml b/docker-compose.yaml index 5d9b0a8..7b6319f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -12,6 +12,7 @@ services: - ./${HOME_DIR}:${HOME_DIR_IN_CONTAINER} - ./${INPUT_DIR}:${INPUT_DIR_IN_CONTAINER} - ./${OUTPUT_DIR}:${OUTPUT_DIR_IN_CONTAINER} + - ./${DEPENDENCIES_DIR}:${DEPENDENCIES_DIR_IN_CONTAINER} environment: - HOME=${HOME_DIR_IN_CONTAINER} - INPUT_DIR=${INPUT_DIR_IN_CONTAINER} diff --git a/src/tei2html.py b/src/tei2html.py index 7f9c2ca..a57664a 100755 --- a/src/tei2html.py +++ b/src/tei2html.py @@ -15,7 +15,8 @@ import argparse from pathlib import Path from os import environ, mkdir -from shutil import rmtree +from shutil import rmtree, copytree, ignore_patterns +from lxml import etree # things to be done # assign ids top to bottom for the following elements: @@ -39,6 +40,22 @@ def check_executables(): check_executable( "saxon" ) +def copy_dir( + src, + dst, + **opts +): + print( "'{}' -> '{}'".format( src, dst ) ) + if Path(dst).exists(): + rmtree( + dst + ) + copytree( + src=src, + dst=dst, + **opts + ) + if __name__ == '__main__': parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter @@ -80,9 +97,9 @@ def check_executables(): ) parser.add_argument( "-f", "--filename", - required = True, + default = Path("*.xml"), type = Path, - help="Name of main EOA-TEI file" + help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable" ) parser.add_argument( "-o", "--output-dir", @@ -94,7 +111,7 @@ def check_executables(): "-!", "--overwrite", action = "store_true", default = False, - help="overwrite files at OUTPUT_DIR" + help="overwrite OUTPUT_DIR, if existing" ) parser.add_argument( "--output-file", @@ -113,6 +130,13 @@ def check_executables(): type = Path, help="static files of publication (relative to OUTPUT_DIR, or absolute)" ) + parser.add_argument( + "PUBLICATION_DIR", + default = Path("input/example/tei"), + help = "directory containing the publication (including resources like pictures, etc.)", + nargs = '?', # (optional) + type = Path, + ) args = parser.parse_args() @@ -130,12 +154,19 @@ def check_executables(): print( "xslt params: " + str(args.param) ) - input_file = args.filename + publ_dir = args.PUBLICATION_DIR + publ_file = args.filename + + tei_filename = \ + publ_file if publ_file . is_absolute() else list(publ_dir . glob (str(publ_file)))[0] + + logging.info( f"tei_file: {tei_filename}, publ_dir: {publ_dir}" ) + output_dir = args.output_dir - if not input_file.is_file(): + if not tei_filename.is_file(): raise( Exception( - f"not a valid input file: {input_file}" + f"not a valid input file: {tei_filename}" ) ) if output_dir.exists(): @@ -145,9 +176,25 @@ def check_executables(): raise( Exception( f"output directory already existing: '{output_dir}'!" ) ) if not output_dir.exists(): mkdir( output_dir ) + ## copy webdesign: + copy_dir( + DEFAULT_DEPENDENCIES_DIR / "webdesign_platform/dist", + output_dir / "webdesign_platform", + ) + ## copy publication static files: + static_dest_dir = output_dir / "publication_static" + static_dest_dir.mkdir( + parents = True, + exist_ok = True + ) + copy_dir( + publ_dir, + static_dest_dir, + ignore = ignore_patterns( "*.xml" ) + ) run_xslt( - input_file = input_file, + input_file = tei_filename, xslt_file = BASE_DIR / "tei2html" / args.xsl, params = [