Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
EOASkripts/src/tei2html.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
212 lines (185 sloc)
6.02 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8; mode: python -*- | |
"A converter from TEI to HTML." | |
__version__ = "1.0" | |
__date__ = "20190408" | |
__author__ = "sgfroerer@mpiwg-berlin.mpg.de" | |
from utils.load_config import load_config, check_executable, exec_command, ToFile | |
from utils.run_xslt import run_xslt | |
import utils.libeoaconvert as libeoaconvert | |
import logging | |
import argparse | |
from pathlib import Path | |
from os import environ, mkdir | |
from shutil import rmtree, copytree, ignore_patterns | |
from lxml import etree | |
# things to be done | |
# assign ids top to bottom for the following elements: | |
# div1 div2 div3 note item table EOAfigure EOAequation formula theorem | |
BASE_DIR = Path( __file__ ).resolve().parent | |
SCRIPT_NAME = Path( __file__).stem | |
DEFAULT_INPUT_DIR = \ | |
Path(environ['INPUT_DIR'] if 'INPUT_DIR' in environ else './input') | |
DEFAULT_OUTPUT_DIR = \ | |
Path(environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in environ else './output') | |
DEFAULT_DEPENDENCIES_DIR = \ | |
Path(environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in environ else './dependencies') | |
EOA_SCRIPTS_DIR = \ | |
Path(environ['EOA_SCRIPTS_DIR']) | |
def check_executables(): | |
check_executable( "saxon" ) | |
def copy_dir( | |
src, | |
dst, | |
**opts | |
): | |
print( "'{}' -> '{}'".format( src, dst ) ) | |
if Path(dst).exists(): | |
rmtree( | |
dst | |
) | |
copytree( | |
src=src, | |
dst=dst, | |
**opts | |
) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter | |
) | |
parser.add_argument( | |
"-c", "--config", | |
default = BASE_DIR / "config" / "eoaconvert.cfg", | |
help="Name of config file" | |
) | |
parser.add_argument( | |
"-l", "--log-dir", | |
default = DEFAULT_OUTPUT_DIR / "logs", | |
type = Path, | |
help="logfile" | |
) | |
parser.add_argument( | |
"--log-level", | |
default = "INFO", | |
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" | |
) | |
parser.add_argument( | |
"--root-dir", | |
default = DEFAULT_OUTPUT_DIR / "html_from_tei", | |
type = Path, | |
help="" | |
) | |
parser.add_argument( | |
"-p", "--param", | |
action = 'append', | |
default = [], | |
help="xslt params" | |
) | |
parser.add_argument( | |
"-x", "--xsl", | |
default = "tei2html.xsl", | |
help="name of the xsl file in '{dir}/'".format( | |
dir = BASE_DIR / "tei2html" | |
) | |
) | |
parser.add_argument( | |
"-f", "--filename", | |
default = Path("*.xml"), | |
type = Path, | |
help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable" | |
) | |
parser.add_argument( | |
"-o", "--output-dir", | |
type = Path, | |
default = DEFAULT_OUTPUT_DIR / "html_from_tei", | |
help="the main stylesheet 'tei2html.xsl' will write html files here" | |
) | |
parser.add_argument( | |
"-!", "--overwrite", | |
action = "store_true", | |
default = False, | |
help="overwrite OUTPUT_DIR, if existing" | |
) | |
parser.add_argument( | |
"--output-file", | |
type = Path, | |
help="in case of calling a classical stylesheet with output, redirect here" | |
) | |
parser.add_argument( | |
"-w", "--webdesign", | |
default = '../webdesign_platform', | |
type = Path, | |
help="location to the webdesign (relative to OUTPUT_DIR, or absolute)" | |
) | |
parser.add_argument( | |
"-s", "--static", | |
default = '../publication_static', | |
type = Path, | |
help="static files of publication (relative to OUTPUT_DIR, or absolute)" | |
) | |
parser.add_argument( | |
"PUBLICATION_DIR", | |
default = Path("input/example/tei"), | |
help = "directory containing the publication (including resources like pictures, etc.)", | |
nargs = '?', # (optional) | |
type = Path, | |
) | |
args = parser.parse_args() | |
CONFIG_FILE = args.config | |
log_dir = args.log_dir | |
CONFIG = load_config( | |
CONFIG_FILE, | |
args.log_level, | |
(log_dir / SCRIPT_NAME) . with_suffix( ".log" ), | |
) | |
check_executables() | |
print( "xslt params: " + str(args.param) ) | |
publ_dir = args.PUBLICATION_DIR | |
publ_file = args.filename | |
tei_filename = \ | |
publ_file if publ_file . is_absolute() else list(publ_dir . glob (str(publ_file)))[0] | |
logging.info( f"tei_file: {tei_filename}, publ_dir: {publ_dir}" ) | |
output_dir = args.output_dir | |
if not tei_filename.is_file(): | |
raise( Exception( | |
f"not a valid input file: {tei_filename}" | |
) ) | |
if output_dir.exists(): | |
if args.overwrite: | |
rmtree( output_dir ) | |
else: | |
raise( Exception( f"output directory already existing: '{output_dir}'!" ) ) | |
if not output_dir.exists(): | |
mkdir( output_dir ) | |
## copy webdesign: | |
copy_dir( | |
DEFAULT_DEPENDENCIES_DIR / "webdesign_platform/dist", | |
output_dir / "webdesign_platform", | |
) | |
## copy publication static files: | |
static_dest_dir = output_dir / "publication_static" | |
static_dest_dir.mkdir( | |
parents = True, | |
exist_ok = True | |
) | |
copy_dir( | |
publ_dir, | |
static_dest_dir, | |
ignore = ignore_patterns( "*.xml" ) | |
) | |
run_xslt( | |
input_file = tei_filename, | |
xslt_file = BASE_DIR / "tei2html" / args.xsl, | |
params = | |
[ | |
f"output_dir={output_dir}", | |
f"webdesign_url={args.webdesign}", | |
f"publ_static_url={args.static}", | |
] + | |
args.param, | |
output_file = args.output_file, | |
exec_command_args = { | |
'output_to' : ToFile( log_dir / SCRIPT_NAME / "saxon.log" ) | |
}, | |
) | |
# finis |