Merge branch 'master' of https://github.molgen.mpg.de/EditionOpenAcce…

…ss/EOASkripts
EditionOpenAccess · Jan 14, 2020 · ad7de39 · ad7de39
2 parents 2a57834 + 56a39f7
commit ad7de39
Show file tree

Hide file tree

Showing 19 changed files with 601 additions and 487 deletions.
diff --git a/README.md b/README.md
@@ -93,15 +93,15 @@ In order to apply the workflow to any other publication copy it into the `runtim
 		$ ./scripts/run.py                 # run if not yet running
 		$ ./scripts/exec_in_container.py   # enter container
 
-1. eoatex -> pdf
+1. process eoatex:
 
-		$ process_eoa_latex.py -f input/example/eoatex/EOASample.tex
+		$ process_eoa_latex.py input/example/124_eoatex
 
 	(adjust filename if necessary)
 
 	This script calls several other scripts to compile the input into several different output formats. Every script can be run seperately if needed. Just check the contents of the script for details.
 
-## The DocX workflow (docx -> TEI -> pdf, django, epub) (TODO: complete documentation, test and fix workflow)
+## The TEI workflow (TEI -> pdf, django, epub)
 
 The following description uses the example publication in `input/example` (from the `eoa-publication-model` repository).
 In order to apply the workflow to any other publication copy it into the `input/` directory and adjust paths in the description accordingly.
@@ -111,25 +111,10 @@ In order to apply the workflow to any other publication copy it into the `input/
 		$ ./scripts/run.py                 # run if not yet running
 		$ ./scripts/exec_in_container.py   # enter container
 
-1. eoaTEI -> eoaTEI with bibliography
+1. process tei
 
-		$ tei_add_bibl.py input/example/tei
+		$ process_tei.py input/example/125_tei_part
 
-1. eoaTEI -> eoaTEX
+## The DocX workflow (DocX -> TEI -> ...) (TODO: describe how)
 
-		$ tei2eoatex.py -f input/example/tei/exampleTEI.xml
-
-1. eoaTEX -> pdf
-
-		$ eoatex2pdf.py -f output/from_tei/eoatex/main.tex -o output/from_tei/pdf
-
-	(adjust filename if necessary)
-
-1. eoaTEI -> imxml (to intermediate xml)
-
-		$ gather_pickledata.py input/example/tei/exampleTEI.xml input/example/tei/example.bib
-		$ tei2imxml.py -f input/example/tei/exampleTEI.xml
-
-1. eoaTEI -> html
-
-		$ tei2html.py
+Convert from DocX to eoaTEI, then continue with the tei workflow (adjust paths accordingly) as described above.
diff --git a/dependencies.conf b/dependencies.conf
@@ -13,7 +13,7 @@ hash = 2a01be46ee82fce5eba6074359b3d18db2222e0c
 [eoa-publication-model]
 
 uri = https://github.molgen.mpg.de/EditionOpenAccess/eoa-publication-model.git
-hash = 5ff326580a6bc34756bce511a23146453fdb82b6
+hash = 62eb49dd05ebe3697e47acac31a1cff2f60c6f7a
 # init not needed, since only example publication is needed
 
 [webdesign_platform]

diff --git a/src/eoatex2imxml.py b/src/eoatex2imxml.py
@@ -67,40 +67,26 @@
         default = BASE_DIR / "config" / "eoaconvert.cfg",
         help="Name of config file"
 )
-parser.add_argument(
-        "-l", "--log-dir",
-        default = DEFAULT_OUTPUT_DIR / "logs",
-        help="logfile"
-)
 parser.add_argument(
         "--log-level",
         default = "INFO",
         help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
 )
-parser.add_argument(
-        "--tei-guidelines",
-        default = DEFAULT_DEPENDENCIES_DIR / "TEI",
-        help="path to the https://github.com/TEIC/TEI"
-)
-parser.add_argument(
-        "--tei-stylesheets",
-        default = DEFAULT_DEPENDENCIES_DIR / "Stylesheets",
-        help="path to the https://github.com/TEIC/Stylesheets"
-)
 parser.add_argument(
         "-f", "--filename",
-        required = True,
-        help="Name of main EOATeX file (without suffix!)."
+        default = Path("*.tex"),
+        type = Path,
+        help = "xml file inside INPUT_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
 )
 parser.add_argument(
         "--latex-dir",
-        default = DEFAULT_OUTPUT_DIR / "latex",
-        help="directory where to find the output generated by eoatex2pdf.py"
+        type = Path,
+        help="directory where to find the output generated by eoatex2pdf.py. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/pdf"
 )
 parser.add_argument(
         "-o", "--output-dir",
-        default = DEFAULT_OUTPUT_DIR / "imxml",
-        help="where to dump all output files"
+        type = Path,
+        help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/imxml"
 )
 parser.add_argument(
         "-t", "--trash",
@@ -116,6 +102,11 @@
         action="store_true",
         help="Embed webdesign of EOA1.0 into XML"
 )
+parser.add_argument(
+        "INPUT_DIR",
+        help = "directory containing the publication (including resources like pictures, etc.)",
+        type = Path,
+)
 
 args = parser.parse_args()
 
@@ -129,17 +120,6 @@
 # run biber_2.1 -O biber2-1n.bbl $INPUT to obtain this file
 BIBERFILE = "biber2-1.bbl"
 
-##################################
-# Reading the configuration file #
-##################################
-
-CONFIG = load_config(
-        CONFIG_FILE,
-        args.log_level,
-        (Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ),
-        # args.log_file,
-)
-
 ########################
 # Paths to executables #
 ########################
@@ -150,25 +130,19 @@
 # TL_PATH = CONFIG['Executables']['texlive']
 # TEXBIN_PATH = CONFIG['Executables']['texbin']
 
-############################
-# Paths to auxiliary files #
-############################
-TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB']
-TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path']
-SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path']
-
 ############################
 # Paths:
 ############################
-INPUT_DIR = Path( args.filename ).resolve().parent
-INPUT_PATH = Path( args.filename )
-if INPUT_PATH.suffix == '':
-    INPUT_PATH = INPUT_PATH.with_suffix( ".tex" )
-elif INPUT_PATH.suffix != ".tex":
-    raise( Exception( "input file matching '*.tex' expected" ) )
-OUTPUT_DIR = Path( args.output_dir )
-LATEX_DIR = Path ( args.latex_dir )
-LOG_DIR = Path( args.log_dir )
+INPUT_DIR = args.INPUT_DIR
+INPUT_PATH = args.filename
+INPUT_PATH = \
+    args.filename if args.filename . is_absolute() else list(INPUT_DIR . glob( str(args.filename) ))[0]
+OUTPUT_DIR = \
+    args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "imxml"
+LATEX_DIR = \
+    args.latex_dir if args.latex_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "pdf"
+LOG_DIR = OUTPUT_DIR / "log"
+LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" )
 
 TEMP_DIR = OUTPUT_DIR / "tmp_files"
 DEBUG_DIR = OUTPUT_DIR / "debug"
@@ -178,6 +152,23 @@
 
 BIB2HTML_FILENAME = "temp"
 
+##################################
+# Reading the configuration file #
+##################################
+
+CONFIG = load_config(
+        CONFIG_FILE,
+        args.log_level,
+        LOG_FILE,
+)
+
+############################
+# Paths to auxiliary files #
+############################
+TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB']
+TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path']
+SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path']
+
 
 #################################################
 # Checking for existance of tools and libraries #

diff --git a/src/eoatex2pdf.py b/src/eoatex2pdf.py
@@ -16,10 +16,10 @@
 SCRIPT_PATH = Path( __file__ )
 SCRIPT_NAME = SCRIPT_PATH.stem
 
-INPUT_DIR = \
+DEFAULT_INPUT_DIR = \
     Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')
 
-OUTPUT_DIR = \
+DEFAULT_OUTPUT_DIR = \
     Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output')
 
 def main(
@@ -110,24 +110,20 @@ def copy_bib_file():
     )
     parser.add_argument(
             "-f", "--filename",
-            required = True,
-            help="Name of main EOATeX file without .tex extension."
+            default = Path("*.tex"),
+            type = Path,
+            help = "xml file inside INPUT_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
     )
     parser.add_argument(
             "-o", "--output-dir",
-            default = OUTPUT_DIR / "latex",
-            help = "output directory"
+            type = Path,
+            help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/pdf"
     )
     parser.add_argument(
         "-c", "--config",
         default = BASE_DIR / "config" / "eoaconvert.cfg",
         help="Name of config file"
     )
-    parser.add_argument(
-            "-l", "--log-file",
-            default = (OUTPUT_DIR / "logs" / SCRIPT_NAME).with_suffix(".log"),
-            help="logfile"
-    )
     parser.add_argument(
             "--log-level",
             default = "INFO",
@@ -139,16 +135,30 @@ def copy_bib_file():
             default = False,
             help="Run only two passes of XeLaTeX and no biber."
     )
+    parser.add_argument(
+            "INPUT_DIR",
+            help = "directory containing the publication (including resources like pictures, etc.)",
+            type = Path,
+    )
+
     args = parser.parse_args()
+    input_dir = args.INPUT_DIR
+    input_file = \
+        args.filename if args.filename . is_absolute() else list(input_dir . glob( str(args.filename) ))[0]
+    # output_dir = args.output_dir
+    output_dir = \
+        args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / input_dir.resolve().stem) / "pdf"
+    log_dir = output_dir / "log"
+    log_file = (log_dir / SCRIPT_NAME) . with_suffix( ".log" )
 
     load_config(
         args.config,
         args.log_level,
-        args.log_file
+        log_file
     )
 
     main(
-            input_file = args.filename,
-            output_dir = args.output_dir,
+            input_file = input_file,
+            output_dir = output_dir,
             nobiber = args.no_biber
     )
diff --git a/src/gather_pickledata.py b/src/gather_pickledata.py