diff --git a/README.md b/README.md index 1123952..b98fb34 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ In order to apply the workflow to any other publication copy it into the `runtim This script calls several other scripts to compile the input into several different output formats. Every script can be run seperately if needed. Just check the contents of the script for details. -## The DocX workflow (docx -> TEI -> pdf, django, epub) (TODO: complete documentation, test and fix workflow) +## The TEI workflow (TEI -> pdf, django, epub) The following description uses the example publication in `input/example` (from the `eoa-publication-model` repository). In order to apply the workflow to any other publication copy it into the `input/` directory and adjust paths in the description accordingly. @@ -113,23 +113,25 @@ In order to apply the workflow to any other publication copy it into the `input/ 1. eoaTEI -> eoaTEI with bibliography - $ tei_add_bibl.py -o output/125/with_bibl input/example/125_tei_part + $ tei_add_bibl.py input/example/125_tei_part 1. eoaTEI -> eoaTEX - $ tei2eoatex.py -f input/example/125_tei_part/tei_part.xml -o output/125/eoatex + $ tei2eoatex.py input/example/125_tei_part 1. eoaTEX -> pdf - $ eoatex2pdf.py -f output/125/eoatex/main.tex -o output/125/pdf - - (adjust filename if necessary) + $ eoatex2pdf.py -o output/125_tei_part/pdf output/125_tei_part/eoatex 1. eoaTEI -> imxml (to intermediate xml) - $ gather_pickledata.py -o output/125/pickle input/example/125_tei_part/*.xml input/example/125_tei_part/texfiles/example.bib - $ tei2imxml.py -f output/125/with_bibl/tei_part.xml -d output/125/pickle/data.pickle -o output/125/imxml + $ gather_pickledata.py -o output/125_tei_part/pickle output/with_bibl/125_tei_part/{tei_part_with_bibl.xml,texfiles/example.bib} + tei2imxml.py -f tei_part.xml output/with_bibl/125_tei_part 1. eoaTEI -> html - $ tei2html.py -o output/125/html output/125/with_bibl + $ tei2html.py output/with_bibl/125_tei_part + +## The DocX workflow (DocX -> TEI -> ...) (TODO: describe how) + +Convert from DocX to eoaTEI, then continue with the tei workflow (adjust paths accordingly) as described above. diff --git a/src/eoatex2imxml.py b/src/eoatex2imxml.py index 7e4f303..e7603b1 100755 --- a/src/eoatex2imxml.py +++ b/src/eoatex2imxml.py @@ -67,40 +67,26 @@ default = BASE_DIR / "config" / "eoaconvert.cfg", help="Name of config file" ) -parser.add_argument( - "-l", "--log-dir", - default = DEFAULT_OUTPUT_DIR / "logs", - help="logfile" -) parser.add_argument( "--log-level", default = "INFO", help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" ) -parser.add_argument( - "--tei-guidelines", - default = DEFAULT_DEPENDENCIES_DIR / "TEI", - help="path to the https://github.com/TEIC/TEI" -) -parser.add_argument( - "--tei-stylesheets", - default = DEFAULT_DEPENDENCIES_DIR / "Stylesheets", - help="path to the https://github.com/TEIC/Stylesheets" -) parser.add_argument( "-f", "--filename", - required = True, - help="Name of main EOATeX file (without suffix!)." + default = Path("*.tex"), + type = Path, + help = "xml file inside INPUT_DIR, or absolute path. Patterns like '*.xml' are also acceptable" ) parser.add_argument( "--latex-dir", - default = DEFAULT_OUTPUT_DIR / "latex", - help="directory where to find the output generated by eoatex2pdf.py" + type = Path, + help="directory where to find the output generated by eoatex2pdf.py. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/pdf" ) parser.add_argument( "-o", "--output-dir", - default = DEFAULT_OUTPUT_DIR / "imxml", - help="where to dump all output files" + type = Path, + help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/imxml" ) parser.add_argument( "-t", "--trash", @@ -116,6 +102,11 @@ action="store_true", help="Embed webdesign of EOA1.0 into XML" ) +parser.add_argument( + "INPUT_DIR", + help = "directory containing the publication (including resources like pictures, etc.)", + type = Path, +) args = parser.parse_args() @@ -129,17 +120,6 @@ # run biber_2.1 -O biber2-1n.bbl $INPUT to obtain this file BIBERFILE = "biber2-1.bbl" -################################## -# Reading the configuration file # -################################## - -CONFIG = load_config( - CONFIG_FILE, - args.log_level, - (Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ), - # args.log_file, -) - ######################## # Paths to executables # ######################## @@ -150,25 +130,19 @@ # TL_PATH = CONFIG['Executables']['texlive'] # TEXBIN_PATH = CONFIG['Executables']['texbin'] -############################ -# Paths to auxiliary files # -############################ -TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB'] -TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path'] -SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path'] - ############################ # Paths: ############################ -INPUT_DIR = Path( args.filename ).resolve().parent -INPUT_PATH = Path( args.filename ) -if INPUT_PATH.suffix == '': - INPUT_PATH = INPUT_PATH.with_suffix( ".tex" ) -elif INPUT_PATH.suffix != ".tex": - raise( Exception( "input file matching '*.tex' expected" ) ) -OUTPUT_DIR = Path( args.output_dir ) -LATEX_DIR = Path ( args.latex_dir ) -LOG_DIR = Path( args.log_dir ) +INPUT_DIR = args.INPUT_DIR +INPUT_PATH = args.filename +INPUT_PATH = \ + args.filename if args.filename . is_absolute() else list(INPUT_DIR . glob( str(args.filename) ))[0] +OUTPUT_DIR = \ + args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "imxml" +LATEX_DIR = \ + args.latex_dir if args.latex_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "pdf" +LOG_DIR = OUTPUT_DIR / "log" +LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" ) TEMP_DIR = OUTPUT_DIR / "tmp_files" DEBUG_DIR = OUTPUT_DIR / "debug" @@ -178,6 +152,23 @@ BIB2HTML_FILENAME = "temp" +################################## +# Reading the configuration file # +################################## + +CONFIG = load_config( + CONFIG_FILE, + args.log_level, + LOG_FILE, +) + +############################ +# Paths to auxiliary files # +############################ +TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB'] +TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path'] +SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path'] + ################################################# # Checking for existance of tools and libraries # diff --git a/src/eoatex2pdf.py b/src/eoatex2pdf.py index a7be5a3..d86abc0 100755 --- a/src/eoatex2pdf.py +++ b/src/eoatex2pdf.py @@ -16,10 +16,10 @@ SCRIPT_PATH = Path( __file__ ) SCRIPT_NAME = SCRIPT_PATH.stem -INPUT_DIR = \ +DEFAULT_INPUT_DIR = \ Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input') -OUTPUT_DIR = \ +DEFAULT_OUTPUT_DIR = \ Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output') def main( @@ -110,24 +110,20 @@ def copy_bib_file(): ) parser.add_argument( "-f", "--filename", - required = True, - help="Name of main EOATeX file without .tex extension." + default = Path("*.tex"), + type = Path, + help = "xml file inside INPUT_DIR, or absolute path. Patterns like '*.xml' are also acceptable" ) parser.add_argument( "-o", "--output-dir", - default = OUTPUT_DIR / "latex", - help = "output directory" + type = Path, + help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/pdf" ) parser.add_argument( "-c", "--config", default = BASE_DIR / "config" / "eoaconvert.cfg", help="Name of config file" ) - parser.add_argument( - "-l", "--log-file", - default = (OUTPUT_DIR / "logs" / SCRIPT_NAME).with_suffix(".log"), - help="logfile" - ) parser.add_argument( "--log-level", default = "INFO", @@ -139,16 +135,30 @@ def copy_bib_file(): default = False, help="Run only two passes of XeLaTeX and no biber." ) + parser.add_argument( + "INPUT_DIR", + help = "directory containing the publication (including resources like pictures, etc.)", + type = Path, + ) + args = parser.parse_args() + input_dir = args.INPUT_DIR + input_file = \ + args.filename if args.filename . is_absolute() else list(input_dir . glob( str(args.filename) ))[0] + # output_dir = args.output_dir + output_dir = \ + args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / input_dir.resolve().stem) / "pdf" + log_dir = output_dir / "log" + log_file = (log_dir / SCRIPT_NAME) . with_suffix( ".log" ) load_config( args.config, args.log_level, - args.log_file + log_file ) main( - input_file = args.filename, - output_dir = args.output_dir, + input_file = input_file, + output_dir = output_dir, nobiber = args.no_biber ) diff --git a/src/gather_pickledata.py b/src/gather_pickledata.py index 5fbcfd0..da43662 100755 --- a/src/gather_pickledata.py +++ b/src/gather_pickledata.py @@ -62,7 +62,7 @@ def main( ) parser.add_argument( "-o", "--output-dir", - default = DEFAULT_OUTPUT_DIR / "from_tei/pickle", + required = True, metavar = "OUTPUT_DIR", help="output directory" ) diff --git a/src/imxml2django.py b/src/imxml2django.py index dfe51d0..6b64bd8 100755 --- a/src/imxml2django.py +++ b/src/imxml2django.py @@ -51,12 +51,8 @@ default = BASE_DIR / "config" / "eoaconvert.cfg", dest="CONFIG_FILE", help="Name of configuration file", - metavar="CONFIGURATION" -) -parser.add_argument( - "-l", "--log-file", - default = (DEFAULT_OUTPUT_DIR / 'logs' / SCRIPT_NAME).with_suffix(".log"), - help="logfile" + metavar="CONFIGURATION", + type = Path, ) parser.add_argument( "--log-level", @@ -68,38 +64,24 @@ help="Check the publication.cfg for completeness.", action="store_true" ) -parser.add_argument( - "--publication-dir", - required = True, - # default = DEFAULT_INPUT_DIR, - help="directory containing publication.cfg and the Cover.jpg" -) parser.add_argument( "-i", "--input-dir", - default = DEFAULT_OUTPUT_DIR / "imxml", - help="directory containing the intermediate xml generated by eoatex2imxml.py" + help = f"directory containing some intermediate xml created by previous steps. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml", + type = Path, ) parser.add_argument( "-o", "--output-dir", - default = DEFAULT_OUTPUT_DIR / "django", - help="where to dump all output files" + help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/django", + type = Path, +) +parser.add_argument( + "PUBLICATION_DIR", + help = "directory containing the publication (including resources like pictures, etc.)", + type = Path, ) args = parser.parse_args() -config_file = args.CONFIG_FILE - -print("The configfile is %s." % config_file) - -################################## -# Reading the configuration file # -################################## -CONFIG = load_config( - config_file, - args.log_level, - args.log_file, -) - ######################## # Paths to executables # ######################## @@ -109,13 +91,29 @@ ############################ # Paths: ############################ -INPUT_DIR = Path( args.input_dir ) -OUTPUT_DIR = Path( args.output_dir ) -PUBLICATION_DIR = Path( args.publication_dir ) +PUBLICATION_DIR = args.PUBLICATION_DIR +INPUT_DIR = \ + args.input_dir if args.input_dir is not None else DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem / "imxml" +OUTPUT_DIR = \ + args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem) / "django" +LOG_DIR = OUTPUT_DIR / "log" +LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" ) TEMP_DIR = OUTPUT_DIR / "tmp_files" -# CONVERT_DIR = OUTPUT_DIR / "CONVERT" DEBUG_DIR = OUTPUT_DIR / "debug" +config_file = args.CONFIG_FILE + +print("The configfile is %s." % config_file) + +################################## +# Reading the configuration file # +################################## +CONFIG = load_config( + config_file, + args.log_level, + LOG_FILE, +) + ############################ # Paths to auxiliary files # ############################ @@ -128,16 +126,10 @@ if not TEMP_DIR.exists(): os.makedirs( TEMP_DIR ) -# if not CONVERT_DIR.exists(): -# os.makedirs( CONVERT_DIR ) if not DEBUG_DIR.exists(): os.makedirs( DEBUG_DIR ) # Check for folder and necessary files -# if not os.path.exists(CONVERT_DIR): -# logging.info(f"The directory {CONVERT_DIR} has not been created yet. Creating it for you") -# time.sleep(1) -# os.makedirs(CONVERT_DIR) logging.info(f"The publication.cfg file is missing in django directory.") if os.path.exists(INPUT_DIR / "publication.cfg"): shutil.copy(INPUT_DIR / "publication.cfg", OUTPUT_DIR) @@ -185,10 +177,6 @@ # Convert tralics-XML to Django Data Structure # ############################################################################ """) -# Create django File Structure -# if not os.path.exists(CONVERT_DIR / "django"): -# os.mkdir(CONVERT_DIR / "django") - # os.mkdir(CONVERT_DIR / "django" / "images") if not os.path.exists(OUTPUT_DIR / "images"): os.mkdir(OUTPUT_DIR / "images") if not os.path.exists(OUTPUT_DIR / "images" / "embedded"): diff --git a/src/imxml2epub.py b/src/imxml2epub.py index 268445c..72b4d21 100755 --- a/src/imxml2epub.py +++ b/src/imxml2epub.py @@ -51,33 +51,21 @@ help="Name of configuration file", metavar="CONFIGURATION" ) -parser.add_argument( - "-l", "--log-file", - default = (DEFAULT_OUTPUT_DIR / "logs" / SCRIPT_NAME) . with_suffix( ".log" ), - help="logfile" -) parser.add_argument( "--log-level", default = "INFO", help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" ) - -parser.add_argument( - "--publication-dir", - default = DEFAULT_INPUT_DIR, - help="directory containing publication.cfg and the Cover.jpg" -) parser.add_argument( "-i", "--input-dir", - default = DEFAULT_OUTPUT_DIR / "imxml", - help="directory containing the intermediate xml generated by eoatex2imxml.py" + help = f"directory containing some intermediate xml created by previous steps. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml", + type = Path, ) parser.add_argument( "-o", "--output-dir", - default = DEFAULT_OUTPUT_DIR / "epub", - help="where to dump all output files" + help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml", + type = Path, ) - parser.add_argument( "-f", "--font", help="Font to be used, default is TeX Gyre Termes", @@ -96,24 +84,14 @@ "--extra-font-files-directory", help="Specify the directory with files of the font (the font itself, License)", ) +parser.add_argument( + "PUBLICATION_DIR", + help = "directory containing the publication (including resources like pictures, etc.)", + type = Path, +) args = parser.parse_args() -config_file = args.CONFIG_FILE - -print(f"The config file is {config_file}") -logseparator = "-"*53 + "\n" - -################################## -# Reading the configuration file # -################################## - -CONFIG = load_config( - config_file, - args.log_level, - args.log_file, -) - ######################## # Paths to executables # ######################## @@ -126,12 +104,32 @@ ############################ # Paths: ############################ -INPUT_DIR = Path( args.input_dir ) -OUTPUT_DIR = Path( args.output_dir ) -PUBLICATION_DIR = Path( args.publication_dir ) +PUBLICATION_DIR = args.PUBLICATION_DIR +INPUT_DIR = \ + args.input_dir if args.input_dir is not None else DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem / "imxml" +OUTPUT_DIR = \ + args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem) / "django" +LOG_DIR = OUTPUT_DIR / "log" +LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" ) TEMP_DIR = OUTPUT_DIR / "tmp_files" DEBUG_DIR = OUTPUT_DIR / "debug" +################################## +# Reading the configuration file # +################################## + +config_file = args.CONFIG_FILE + +print(f"The config file is {config_file}") +logseparator = "-"*53 + "\n" + +CONFIG = load_config( + config_file, + args.log_level, + LOG_FILE, + # args.log_file, +) + ############################ # Paths to auxiliary files # ############################ diff --git a/src/process_eoa_latex.py b/src/process_eoa_latex.py index cf2f9cd..c604831 100755 --- a/src/process_eoa_latex.py +++ b/src/process_eoa_latex.py @@ -25,19 +25,19 @@ Path(os.environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in os.environ else './dependencies') def main( - input_file + publ_dir ): exec_command( - f"eoatex2pdf.py -f \"{input_file}\"" + f"eoatex2pdf.py \"{publ_dir}\"" ) exec_command( - f"eoatex2imxml.py -f \"{input_file}\"" + f"eoatex2imxml.py \"{publ_dir}\"" ) exec_command( - f"imxml2django.py --publication-dir \"{input_file.parent}\"" + f"imxml2django.py \"{publ_dir}\"" ) exec_command( - f"imxml2epub.py --publication-dir \"{input_file.parent}\"" + f"imxml2epub.py \"{publ_dir}\"" ) if __name__ == "__main__": @@ -49,11 +49,13 @@ def main( parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) + ''' parser.add_argument( "-f", "--filename", required = True, help="Name of main EOATeX file (without suffix!)." ) + ''' parser.add_argument( "-c", "--config", default = BASE_DIR / "config" / "eoaconvert.cfg", @@ -71,6 +73,11 @@ def main( default = "INFO", help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" ) + parser.add_argument( + "PUBLICATION_DIR", + help = "directory containing the publication (including resources like pictures, etc.)", + type = Path, +) args = parser.parse_args() @@ -81,5 +88,5 @@ def main( ) main( - input_file = Path( args.filename ) + publ_dir = Path( args.PUBLICATION_DIR ) ) diff --git a/src/tei2eoatex.py b/src/tei2eoatex.py index e2d46af..ed37a20 100755 --- a/src/tei2eoatex.py +++ b/src/tei2eoatex.py @@ -37,18 +37,11 @@ def copy_files( input_dir, output_dir, - overwrite = False ): - def copy_cmd(src, dst): - if overwrite: - copy_dir_overwrite( src, dst ) - else: - shutil.copytree( src, dst ) - for f in input_dir.iterdir(): logging.debug( f"copy dir: {f}" ) if f.is_dir(): - copy_cmd( + shutil.copytree( f, output_dir / f.name ) @@ -68,28 +61,21 @@ def copy_cmd(src, dst): default = BASE_DIR / "config" / "eoaconvert.cfg", help="Name of config file" ) - parser.add_argument( - "-l", "--log-dir", - default = DEFAULT_OUTPUT_DIR / "logs", - help="logfiles go into this directory" - ) parser.add_argument( "--log-level", default = "INFO", help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL" ) parser.add_argument( - "-f", "--filename", - required = True, - # default = DEFAULT_INPUT_DIR / "tei", - help="the TEI file" + "-o", "--output-dir", + help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/eoatex", + type = Path, ) parser.add_argument( - "-o", "--output-dir", - default = DEFAULT_OUTPUT_DIR / "from_tei/eoatex", - metavar = "OUTPUT_DIR", - help="output directory", - type = Path + "-f", "--filename", + default = Path("*.xml"), + type = Path, + help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable" ) parser.add_argument( "-p", "--param", @@ -103,33 +89,26 @@ def copy_cmd(src, dst): default = False, help="overwrite files at OUTPUT_DIR" ) - - args = parser.parse_args() - - CONFIG_FILE = args.config - - log_dir = Path(args.log_dir) - - CONFIG = load_config( - CONFIG_FILE, - args.log_level, - (log_dir / SCRIPT_NAME) . with_suffix( ".log" ), - # args.log_file, + parser.add_argument( + "PUBLICATION_DIR", + help = "directory containing the publication (including resources like pictures, etc.)", + type = Path, ) - OUTPUT_DIR = args.output_dir - - check_executable( "saxon" ) + args = parser.parse_args() - input_file = Path( args.filename ) - output_file = OUTPUT_DIR / "main.tex" + publ_dir = args.PUBLICATION_DIR + input_file = \ + args.filename if args.filename . is_absolute() else list(publ_dir . glob( str(args.filename) ))[0] + OUTPUT_DIR = \ + args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / publ_dir.resolve().stem) / "eoatex" + log_dir = OUTPUT_DIR / "log" if not input_file.is_file(): import errno raise( Exception( f"not a valid input file: {input_file}" ) ) - if OUTPUT_DIR.exists(): if args.overwrite: shutil.rmtree( OUTPUT_DIR ) @@ -140,10 +119,20 @@ def copy_cmd(src, dst): parents=True ) + CONFIG_FILE = args.config + CONFIG = load_config( + CONFIG_FILE, + args.log_level, + (log_dir / SCRIPT_NAME) . with_suffix( ".log" ), + ) + + check_executable( "saxon" ) + + output_file = OUTPUT_DIR / "main.tex" + copy_files( input_dir = input_file.parent, output_dir = output_file.parent, - overwrite = args.overwrite, ) run_xslt( diff --git a/src/tei2html.py b/src/tei2html.py index 6da2eeb..a13a1d5 100755 --- a/src/tei2html.py +++ b/src/tei2html.py @@ -65,12 +65,6 @@ def copy_dir( default = BASE_DIR / "config" / "eoaconvert.cfg", help="Name of config file" ) - parser.add_argument( - "-l", "--log-dir", - default = DEFAULT_OUTPUT_DIR / "logs", - type = Path, - help="logfile" - ) parser.add_argument( "--log-level", default = "INFO", @@ -103,9 +97,8 @@ def copy_dir( ) parser.add_argument( "-o", "--output-dir", + help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/html", type = Path, - default = DEFAULT_OUTPUT_DIR / "from_tei/html", - help="the main stylesheet 'tei2html.xsl' will write html files here" ) parser.add_argument( "-!", "--overwrite", @@ -138,24 +131,12 @@ def copy_dir( ) parser.add_argument( "PUBLICATION_DIR", - default = DEFAULT_OUTPUT_DIR / "from_tei/with_bibl", help = "directory containing the publication (including resources like pictures, etc.)", - nargs = '?', # (optional) type = Path, ) args = parser.parse_args() - CONFIG_FILE = args.config - - log_dir = args.log_dir - - CONFIG = load_config( - CONFIG_FILE, - args.log_level, - (log_dir / SCRIPT_NAME) . with_suffix( ".log" ), - ) - check_executables() print( "xslt params: " + str(args.param) ) @@ -166,9 +147,21 @@ def copy_dir( tei_filename = \ publ_file if publ_file . is_absolute() else list(publ_dir . glob (str(publ_file)))[0] - logging.info( f"tei_file: {tei_filename}, publ_dir: {publ_dir}" ) + output_dir = \ + args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / publ_dir.resolve().stem) / "html" + + CONFIG_FILE = args.config + + log_dir = output_dir / "log" + log_file = (log_dir / SCRIPT_NAME) . with_suffix( ".log" ) + + CONFIG = load_config( + CONFIG_FILE, + args.log_level, + log_file, + ) - output_dir = args.output_dir + logging.info( f"tei_file: {tei_filename}, publ_dir: {publ_dir}" ) if not tei_filename.is_file(): raise( Exception( diff --git a/src/tei2imxml.py b/src/tei2imxml.py index 77dd514..c2f124b 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -1274,11 +1274,6 @@ def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) - parser.add_argument( - "--log-dir", - default = DEFAULT_OUTPUT_DIR / "logs", - help="logfile" - ) parser.add_argument( "-c", "--config", dest="CONFIG_FILE", @@ -1286,11 +1281,6 @@ def main(): help="Name of configuration file", metavar="CONFIGURATION" ) - parser.add_argument( - "-l", "--log-file", - default = SCRIPT_NAME + ".log" , - help="logfile" - ) parser.add_argument( "--log-level", default = "INFO", @@ -1298,13 +1288,14 @@ def main(): ) parser.add_argument( "-f", "--filename", - required = True, - help="TEI XML file to convert into DocBook XML." + default = Path("*.xml"), + type = Path, + help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable" ) parser.add_argument( "-o", "--output-dir", - default = DEFAULT_OUTPUT_DIR / "from_tei/imxml", - help="output directory" + help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml", + type = Path, ) parser.add_argument( "-i", "--ignore-ref-errors", @@ -1318,8 +1309,8 @@ def main(): ) parser.add_argument( "-d", "--pickleddata", - default= DEFAULT_OUTPUT_DIR / "from_tei/pickle/data.pickle", - help="Pickled data file to be used." + help = f"directory containing pickled data file to be used. default {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/pickle", + type = Path, ) parser.add_argument( "-him", "--hyperimage", @@ -1335,14 +1326,22 @@ def main(): action="store_true", help="Embed webdesign of EOA1.0 into XML" ) + parser.add_argument( + "PUBLICATION_DIR", + help = "directory containing the publication (including resources like pictures, etc.)", + type = Path, + ) args = parser.parse_args() - config_file = args.CONFIG_FILE - print("The config file is ", config_file) - INPUT_PATH = Path( args.filename ) - INPUT_DIR = INPUT_PATH.parent - OUTPUT_DIR = Path( args.output_dir ) - LOG_DIR = Path( args.log_dir ) + INPUT_DIR = args.PUBLICATION_DIR + INPUT_PATH = \ + args.filename if args.filename . is_absolute() else list( INPUT_DIR . glob( str(args.filename) ))[0] + OUTPUT_DIR = \ + args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "imxm" + PICKLE_DIR = \ + args.pickledata if args.pickleddata is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "pickle" + PICKLE_FILE = PICKLE_DIR / "data.pickle" + LOG_DIR = OUTPUT_DIR / "log" TEMP_DIR = OUTPUT_DIR / "tmp_files" DEBUG_DIR = OUTPUT_DIR / "debug" @@ -1352,11 +1351,13 @@ def main(): # where to output the xml file: XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" ) + config_file = args.CONFIG_FILE + print("The config file is ", config_file) + CONFIG = load_config( - args.CONFIG_FILE, + config_file, args.log_level, (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" ), - args.log_file, ) logging.info( "checking executables 'utils.bib2html' needs...:" ) @@ -1376,7 +1377,7 @@ def main(): ) try: - with open(args.pickleddata, 'rb') as f: + with open(PICKLE_FILE, 'rb') as f: data = pickle.load(f) except FileNotFoundError: logging.error("File 'data.pickle' not found. You should run 'fix_tei.py' first. Exiting.") @@ -1385,7 +1386,7 @@ def main(): TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS'] CSL_FILE = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE'] - xml_tree = etree.parse(args.filename) + xml_tree = etree.parse(str(INPUT_PATH)) publication_language = xml_tree.xpath("//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", namespaces=NS_MAP)[0] diff --git a/src/tei_add_bibl.py b/src/tei_add_bibl.py index a9aa30d..caa0eb1 100755 --- a/src/tei_add_bibl.py +++ b/src/tei_add_bibl.py @@ -43,18 +43,6 @@ def copy_dir( **opts ) -def copy_file( - src, - dst, - **opts -): - print( "'{}' -> '{}'".format( src, dst ) ) - copy( - src=src, - dst=dst, - **opts - ) - def publication_info(xml_tree): """Check TEI header for bibliography data, return relevant data as dictionary.""" @@ -104,11 +92,6 @@ def create_bibl( parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) - parser.add_argument( - "--log-dir", - default = DEFAULT_OUTPUT_DIR / "logs", - help="logfile" - ) parser.add_argument( "-c", "--config", dest="CONFIG_FILE", @@ -116,11 +99,6 @@ def create_bibl( help="Name of configuration file", metavar="CONFIGURATION" ) - parser.add_argument( - "-l", "--log-file", - default = SCRIPT_NAME + ".log" , - help="logfile" - ) parser.add_argument( "--log-level", default = "INFO", @@ -134,8 +112,8 @@ def create_bibl( ) parser.add_argument( "-o", "--output-dir", - default = DEFAULT_OUTPUT_DIR / "from_tei/with_bibl", - help="output directory", + metavar = "OUTPUT_DIR", + help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/with_bibl/PUBLICATION_NAME", type = Path, ) parser.add_argument( @@ -146,31 +124,43 @@ def create_bibl( ) parser.add_argument( "PUBLICATION_DIR", - default = Path("input/example/tei"), help = "directory containing the publication (including resources like pictures, etc.)", - nargs = '?', # (optional) type = Path, ) + args = parser.parse_args() + publ_dir = args.PUBLICATION_DIR + output_dir = \ + args.output_dir if args.output_dir is not None else DEFAULT_OUTPUT_DIR / "with_bibl" / (publ_dir.resolve().stem) + tei_file_input = args.filename + + log_dir = output_dir / "log" + log_file = (log_dir / SCRIPT_NAME) . with_suffix( ".log" ) + print( f"log_dir: {log_dir}" ) + + if output_dir.exists(): + if args.overwrite: + rmtree( output_dir ) + else: + raise( Exception( f"output directory already existing: '{output_dir}'!" ) ) + if not output_dir.exists(): + copy_dir( + publ_dir, + output_dir, + ) + config_file = args.CONFIG_FILE print("The config file is ", config_file) - log_dir = args.log_dir - CONFIG = load_config( config_file, args.log_level, - (log_dir / SCRIPT_NAME) . with_suffix( ".log" ), + log_file ) - publ_dir = args.PUBLICATION_DIR - tei_file_input = args.filename - tei_file = \ tei_file_input if tei_file_input . is_absolute() else list(publ_dir . glob (str(tei_file_input)))[0] - output_dir = args.output_dir - tei_tree = etree.parse(str(tei_file)) publ_info = publication_info( tei_tree ) publ_info['bib_file'] = tei_file.parent / publ_info['bib_file'] @@ -181,19 +171,6 @@ def create_bibl( raise( Exception( f"not a valid input file: {tei_file}" ) ) - - if output_dir.exists(): - if args.overwrite: - rmtree( output_dir ) - else: - raise( Exception( f"output directory already existing: '{output_dir}'!" ) ) - if not output_dir.exists(): - # mkdir( output_dir ) - copy_dir( - publ_dir, - output_dir, - # ignore = ignore_patterns( tei_file_input ) if not(tei_file_input . is_absolute()) else None - ) tei_with_bibl_file = \ (output_dir / (tei_file.with_suffix("").name + "_with_bibl")) . with_suffix( ".xml" ) if publ_info["publ_type"] == "monograph": @@ -206,7 +183,6 @@ def create_bibl( temp_dir = output_dir / "temp/all", tei_bib_file = tei_bib_file, tei_file = tei_file, - # tei_with_bibl_file = tei_with_bibl_file ) # insert bibliography: run_xslt( diff --git a/src/utils/load_config.py b/src/utils/load_config.py index 29ee92c..5a50b66 100644 --- a/src/utils/load_config.py +++ b/src/utils/load_config.py @@ -129,8 +129,10 @@ def load_config( ###################### log_dir = Path(log_file).absolute().parent - if not (log_dir.exists() and log_dir.is_dir()): - os.makedirs( log_dir ) + log_dir.mkdir( + parents = True, + exist_ok = True + ) time.sleep( 1 ) # always log to file: