diff --git a/README.md b/README.md
index 3abccaa..616a4cf 100644
--- a/README.md
+++ b/README.md
@@ -93,15 +93,15 @@ In order to apply the workflow to any other publication copy it into the `runtim
$ ./scripts/run.py # run if not yet running
$ ./scripts/exec_in_container.py # enter container
-1. eoatex -> pdf
+1. process eoatex:
- $ process_eoa_latex.py -f input/example/eoatex/EOASample.tex
+ $ process_eoa_latex.py input/example/124_eoatex
(adjust filename if necessary)
This script calls several other scripts to compile the input into several different output formats. Every script can be run seperately if needed. Just check the contents of the script for details.
-## The DocX workflow (docx -> TEI -> pdf, django, epub) (TODO: complete documentation, test and fix workflow)
+## The TEI workflow (TEI -> pdf, django, epub)
The following description uses the example publication in `input/example` (from the `eoa-publication-model` repository).
In order to apply the workflow to any other publication copy it into the `input/` directory and adjust paths in the description accordingly.
@@ -111,25 +111,10 @@ In order to apply the workflow to any other publication copy it into the `input/
$ ./scripts/run.py # run if not yet running
$ ./scripts/exec_in_container.py # enter container
-1. eoaTEI -> eoaTEI with bibliography
+1. process tei
- $ tei_add_bibl.py input/example/tei
+ $ process_tei.py input/example/125_tei_part
-1. eoaTEI -> eoaTEX
+## The DocX workflow (DocX -> TEI -> ...) (TODO: describe how)
- $ tei2eoatex.py -f input/example/tei/exampleTEI.xml
-
-1. eoaTEX -> pdf
-
- $ eoatex2pdf.py -f output/from_tei/eoatex/main.tex -o output/from_tei/pdf
-
- (adjust filename if necessary)
-
-1. eoaTEI -> imxml (to intermediate xml)
-
- $ gather_pickledata.py input/example/tei/exampleTEI.xml input/example/tei/example.bib
- $ tei2imxml.py -f input/example/tei/exampleTEI.xml
-
-1. eoaTEI -> html
-
- $ tei2html.py
+Convert from DocX to eoaTEI, then continue with the tei workflow (adjust paths accordingly) as described above.
diff --git a/dependencies.conf b/dependencies.conf
index 3b3e90c..1cd7964 100644
--- a/dependencies.conf
+++ b/dependencies.conf
@@ -13,7 +13,7 @@ hash = 2a01be46ee82fce5eba6074359b3d18db2222e0c
[eoa-publication-model]
uri = https://github.molgen.mpg.de/EditionOpenAccess/eoa-publication-model.git
-hash = 5ff326580a6bc34756bce511a23146453fdb82b6
+hash = 62eb49dd05ebe3697e47acac31a1cff2f60c6f7a
# init not needed, since only example publication is needed
[webdesign_platform]
diff --git a/src/eoatex2imxml.py b/src/eoatex2imxml.py
index 7e4f303..e7603b1 100755
--- a/src/eoatex2imxml.py
+++ b/src/eoatex2imxml.py
@@ -67,40 +67,26 @@
default = BASE_DIR / "config" / "eoaconvert.cfg",
help="Name of config file"
)
-parser.add_argument(
- "-l", "--log-dir",
- default = DEFAULT_OUTPUT_DIR / "logs",
- help="logfile"
-)
parser.add_argument(
"--log-level",
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
-parser.add_argument(
- "--tei-guidelines",
- default = DEFAULT_DEPENDENCIES_DIR / "TEI",
- help="path to the https://github.com/TEIC/TEI"
-)
-parser.add_argument(
- "--tei-stylesheets",
- default = DEFAULT_DEPENDENCIES_DIR / "Stylesheets",
- help="path to the https://github.com/TEIC/Stylesheets"
-)
parser.add_argument(
"-f", "--filename",
- required = True,
- help="Name of main EOATeX file (without suffix!)."
+ default = Path("*.tex"),
+ type = Path,
+ help = "xml file inside INPUT_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
parser.add_argument(
"--latex-dir",
- default = DEFAULT_OUTPUT_DIR / "latex",
- help="directory where to find the output generated by eoatex2pdf.py"
+ type = Path,
+ help="directory where to find the output generated by eoatex2pdf.py. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/pdf"
)
parser.add_argument(
"-o", "--output-dir",
- default = DEFAULT_OUTPUT_DIR / "imxml",
- help="where to dump all output files"
+ type = Path,
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/imxml"
)
parser.add_argument(
"-t", "--trash",
@@ -116,6 +102,11 @@
action="store_true",
help="Embed webdesign of EOA1.0 into XML"
)
+parser.add_argument(
+ "INPUT_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
+)
args = parser.parse_args()
@@ -129,17 +120,6 @@
# run biber_2.1 -O biber2-1n.bbl $INPUT to obtain this file
BIBERFILE = "biber2-1.bbl"
-##################################
-# Reading the configuration file #
-##################################
-
-CONFIG = load_config(
- CONFIG_FILE,
- args.log_level,
- (Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ),
- # args.log_file,
-)
-
########################
# Paths to executables #
########################
@@ -150,25 +130,19 @@
# TL_PATH = CONFIG['Executables']['texlive']
# TEXBIN_PATH = CONFIG['Executables']['texbin']
-############################
-# Paths to auxiliary files #
-############################
-TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB']
-TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path']
-SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path']
-
############################
# Paths:
############################
-INPUT_DIR = Path( args.filename ).resolve().parent
-INPUT_PATH = Path( args.filename )
-if INPUT_PATH.suffix == '':
- INPUT_PATH = INPUT_PATH.with_suffix( ".tex" )
-elif INPUT_PATH.suffix != ".tex":
- raise( Exception( "input file matching '*.tex' expected" ) )
-OUTPUT_DIR = Path( args.output_dir )
-LATEX_DIR = Path ( args.latex_dir )
-LOG_DIR = Path( args.log_dir )
+INPUT_DIR = args.INPUT_DIR
+INPUT_PATH = args.filename
+INPUT_PATH = \
+ args.filename if args.filename . is_absolute() else list(INPUT_DIR . glob( str(args.filename) ))[0]
+OUTPUT_DIR = \
+ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "imxml"
+LATEX_DIR = \
+ args.latex_dir if args.latex_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "pdf"
+LOG_DIR = OUTPUT_DIR / "log"
+LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" )
TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"
@@ -178,6 +152,23 @@
BIB2HTML_FILENAME = "temp"
+##################################
+# Reading the configuration file #
+##################################
+
+CONFIG = load_config(
+ CONFIG_FILE,
+ args.log_level,
+ LOG_FILE,
+)
+
+############################
+# Paths to auxiliary files #
+############################
+TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB']
+TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path']
+SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path']
+
#################################################
# Checking for existance of tools and libraries #
diff --git a/src/eoatex2pdf.py b/src/eoatex2pdf.py
index a7be5a3..d86abc0 100755
--- a/src/eoatex2pdf.py
+++ b/src/eoatex2pdf.py
@@ -16,10 +16,10 @@
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.stem
-INPUT_DIR = \
+DEFAULT_INPUT_DIR = \
Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')
-OUTPUT_DIR = \
+DEFAULT_OUTPUT_DIR = \
Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output')
def main(
@@ -110,24 +110,20 @@ def copy_bib_file():
)
parser.add_argument(
"-f", "--filename",
- required = True,
- help="Name of main EOATeX file without .tex extension."
+ default = Path("*.tex"),
+ type = Path,
+ help = "xml file inside INPUT_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
parser.add_argument(
"-o", "--output-dir",
- default = OUTPUT_DIR / "latex",
- help = "output directory"
+ type = Path,
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/INPUT_DIR/pdf"
)
parser.add_argument(
"-c", "--config",
default = BASE_DIR / "config" / "eoaconvert.cfg",
help="Name of config file"
)
- parser.add_argument(
- "-l", "--log-file",
- default = (OUTPUT_DIR / "logs" / SCRIPT_NAME).with_suffix(".log"),
- help="logfile"
- )
parser.add_argument(
"--log-level",
default = "INFO",
@@ -139,16 +135,30 @@ def copy_bib_file():
default = False,
help="Run only two passes of XeLaTeX and no biber."
)
+ parser.add_argument(
+ "INPUT_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
+ )
+
args = parser.parse_args()
+ input_dir = args.INPUT_DIR
+ input_file = \
+ args.filename if args.filename . is_absolute() else list(input_dir . glob( str(args.filename) ))[0]
+ # output_dir = args.output_dir
+ output_dir = \
+ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / input_dir.resolve().stem) / "pdf"
+ log_dir = output_dir / "log"
+ log_file = (log_dir / SCRIPT_NAME) . with_suffix( ".log" )
load_config(
args.config,
args.log_level,
- args.log_file
+ log_file
)
main(
- input_file = args.filename,
- output_dir = args.output_dir,
+ input_file = input_file,
+ output_dir = output_dir,
nobiber = args.no_biber
)
diff --git a/src/gather_pickledata.py b/src/gather_pickledata.py
deleted file mode 100755
index 5fbcfd0..0000000
--- a/src/gather_pickledata.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8; mode: python -*-
-
-"""
-Gather some data for further conversion steps. This is originally part of fix_tei.
-"""
-
-__version__ = "1.0"
-__date__ = "20190718"
-__author__ = "kthoden@mpiwg-berlin.mpg.de"
-
-from utils.load_config import load_config
-
-from pathlib import Path
-import os
-import shutil
-import argparse
-import logging
-import pickle
-import fix_tei
-from lxml import etree
-
-DEFAULT_INPUT_DIR = \
- Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')
-DEFAULT_OUTPUT_DIR = \
- Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output')
-
-
-ns_tei = "http://www.tei-c.org/ns/1.0"
-NS_MAP = {"t" : ns_tei}
-
-logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
-
-def main(
- teifile,
- bibfile,
- output,
-):
- """The main bit"""
- xml_tree = etree.parse(args.teifile)
-
- bibdata = fix_tei.parse_bibtex(args.bibfile)
-
- cited = xml_tree.xpath("//t:bibl/t:ref/@target", namespaces=NS_MAP)
- used_citekeys = [fix_tei.unescape(c[1:]) for c in cited]
- citekeys_not_in_bib = fix_tei.validate_citations(used_citekeys, bibdata)
-
- fix_tei.pickle_data(citekeys_not_in_bib, used_citekeys, output)
-# def main ends here
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(
- formatter_class=argparse.ArgumentDefaultsHelpFormatter
- )
- parser.add_argument(
- "teifile",
- help="The XML file from which data is pickled."
- )
- parser.add_argument(
- "bibfile",
- help="The bibliography file for checking the references."
- )
- parser.add_argument(
- "-o", "--output-dir",
- default = DEFAULT_OUTPUT_DIR / "from_tei/pickle",
- metavar = "OUTPUT_DIR",
- help="output directory"
- )
- # picklefile = "output/imxml/tmp_files/data.pickle"
- parser.add_argument(
- "-!", "--overwrite",
- action = "store_true",
- default = False,
- help="overwrite files at OUTPUT_DIR"
- )
- args = parser.parse_args()
-
- output_dir = Path( args.output_dir )
-
- if output_dir.exists():
- if args.overwrite:
- shutil.rmtree( output_dir )
- else:
- raise( Exception( f"output directory already existing: '{output_dir}'!" ) )
- if not output_dir.exists():
- os.mkdir( output_dir )
-
- main(
- teifile = args.teifile,
- bibfile = args.bibfile,
- output = output_dir / "data.pickle",
- )
-# finis
diff --git a/src/imxml2django.py b/src/imxml2django.py
index dfe51d0..6b64bd8 100755
--- a/src/imxml2django.py
+++ b/src/imxml2django.py
@@ -51,12 +51,8 @@
default = BASE_DIR / "config" / "eoaconvert.cfg",
dest="CONFIG_FILE",
help="Name of configuration file",
- metavar="CONFIGURATION"
-)
-parser.add_argument(
- "-l", "--log-file",
- default = (DEFAULT_OUTPUT_DIR / 'logs' / SCRIPT_NAME).with_suffix(".log"),
- help="logfile"
+ metavar="CONFIGURATION",
+ type = Path,
)
parser.add_argument(
"--log-level",
@@ -68,38 +64,24 @@
help="Check the publication.cfg for completeness.",
action="store_true"
)
-parser.add_argument(
- "--publication-dir",
- required = True,
- # default = DEFAULT_INPUT_DIR,
- help="directory containing publication.cfg and the Cover.jpg"
-)
parser.add_argument(
"-i", "--input-dir",
- default = DEFAULT_OUTPUT_DIR / "imxml",
- help="directory containing the intermediate xml generated by eoatex2imxml.py"
+ help = f"directory containing some intermediate xml created by previous steps. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml",
+ type = Path,
)
parser.add_argument(
"-o", "--output-dir",
- default = DEFAULT_OUTPUT_DIR / "django",
- help="where to dump all output files"
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/django",
+ type = Path,
+)
+parser.add_argument(
+ "PUBLICATION_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
)
args = parser.parse_args()
-config_file = args.CONFIG_FILE
-
-print("The configfile is %s." % config_file)
-
-##################################
-# Reading the configuration file #
-##################################
-CONFIG = load_config(
- config_file,
- args.log_level,
- args.log_file,
-)
-
########################
# Paths to executables #
########################
@@ -109,13 +91,29 @@
############################
# Paths:
############################
-INPUT_DIR = Path( args.input_dir )
-OUTPUT_DIR = Path( args.output_dir )
-PUBLICATION_DIR = Path( args.publication_dir )
+PUBLICATION_DIR = args.PUBLICATION_DIR
+INPUT_DIR = \
+ args.input_dir if args.input_dir is not None else DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem / "imxml"
+OUTPUT_DIR = \
+ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem) / "django"
+LOG_DIR = OUTPUT_DIR / "log"
+LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" )
TEMP_DIR = OUTPUT_DIR / "tmp_files"
-# CONVERT_DIR = OUTPUT_DIR / "CONVERT"
DEBUG_DIR = OUTPUT_DIR / "debug"
+config_file = args.CONFIG_FILE
+
+print("The configfile is %s." % config_file)
+
+##################################
+# Reading the configuration file #
+##################################
+CONFIG = load_config(
+ config_file,
+ args.log_level,
+ LOG_FILE,
+)
+
############################
# Paths to auxiliary files #
############################
@@ -128,16 +126,10 @@
if not TEMP_DIR.exists():
os.makedirs( TEMP_DIR )
-# if not CONVERT_DIR.exists():
-# os.makedirs( CONVERT_DIR )
if not DEBUG_DIR.exists():
os.makedirs( DEBUG_DIR )
# Check for folder and necessary files
-# if not os.path.exists(CONVERT_DIR):
-# logging.info(f"The directory {CONVERT_DIR} has not been created yet. Creating it for you")
-# time.sleep(1)
-# os.makedirs(CONVERT_DIR)
logging.info(f"The publication.cfg file is missing in django directory.")
if os.path.exists(INPUT_DIR / "publication.cfg"):
shutil.copy(INPUT_DIR / "publication.cfg", OUTPUT_DIR)
@@ -185,10 +177,6 @@
# Convert tralics-XML to Django Data Structure #
############################################################################
""")
-# Create django File Structure
-# if not os.path.exists(CONVERT_DIR / "django"):
-# os.mkdir(CONVERT_DIR / "django")
- # os.mkdir(CONVERT_DIR / "django" / "images")
if not os.path.exists(OUTPUT_DIR / "images"):
os.mkdir(OUTPUT_DIR / "images")
if not os.path.exists(OUTPUT_DIR / "images" / "embedded"):
diff --git a/src/imxml2epub.py b/src/imxml2epub.py
index ebe65d1..4280e0a 100755
--- a/src/imxml2epub.py
+++ b/src/imxml2epub.py
@@ -52,33 +52,21 @@
help="Name of configuration file",
metavar="CONFIGURATION"
)
-parser.add_argument(
- "-l", "--log-file",
- default = (DEFAULT_OUTPUT_DIR / "logs" / SCRIPT_NAME) . with_suffix( ".log" ),
- help="logfile"
-)
parser.add_argument(
"--log-level",
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
-
-parser.add_argument(
- "--publication-dir",
- default = DEFAULT_INPUT_DIR,
- help="directory containing publication.cfg and the Cover.jpg"
-)
parser.add_argument(
"-i", "--input-dir",
- default = DEFAULT_OUTPUT_DIR / "imxml",
- help="directory containing the intermediate xml generated by eoatex2imxml.py"
+ help = f"directory containing some intermediate xml created by previous steps. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml",
+ type = Path,
)
parser.add_argument(
"-o", "--output-dir",
- default = DEFAULT_OUTPUT_DIR / "epub",
- help="where to dump all output files"
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml",
+ type = Path,
)
-
parser.add_argument(
"-f", "--font",
help="Font to be used, default is TeX Gyre Termes",
@@ -97,6 +85,11 @@
"--extra-font-files-directory",
help="Specify the directory with files of the font (the font itself, License)",
)
+parser.add_argument(
+ "PUBLICATION_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
+)
parser.add_argument(
"-him", "--hyperimage",
@@ -107,21 +100,6 @@
args = parser.parse_args()
-config_file = args.CONFIG_FILE
-
-print(f"The config file is {config_file}")
-logseparator = "-"*53 + "\n"
-
-##################################
-# Reading the configuration file #
-##################################
-
-CONFIG = load_config(
- config_file,
- args.log_level,
- args.log_file,
-)
-
########################
# Paths to executables #
########################
@@ -134,12 +112,32 @@
############################
# Paths:
############################
-INPUT_DIR = Path( args.input_dir )
-OUTPUT_DIR = Path( args.output_dir )
-PUBLICATION_DIR = Path( args.publication_dir )
+PUBLICATION_DIR = args.PUBLICATION_DIR
+INPUT_DIR = \
+ args.input_dir if args.input_dir is not None else DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem / "imxml"
+OUTPUT_DIR = \
+ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / PUBLICATION_DIR.resolve().stem) / "django"
+LOG_DIR = OUTPUT_DIR / "log"
+LOG_FILE = (LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" )
TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"
+##################################
+# Reading the configuration file #
+##################################
+
+config_file = args.CONFIG_FILE
+
+print(f"The config file is {config_file}")
+logseparator = "-"*53 + "\n"
+
+CONFIG = load_config(
+ config_file,
+ args.log_level,
+ LOG_FILE,
+ # args.log_file,
+)
+
############################
# Paths to auxiliary files #
############################
diff --git a/src/process_eoa_latex.py b/src/process_eoa_latex.py
index cf2f9cd..809a1fd 100755
--- a/src/process_eoa_latex.py
+++ b/src/process_eoa_latex.py
@@ -25,19 +25,19 @@
Path(os.environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in os.environ else './dependencies')
def main(
- input_file
+ publ_dir
):
exec_command(
- f"eoatex2pdf.py -f \"{input_file}\""
+ f"eoatex2pdf.py \"{publ_dir}\""
)
exec_command(
- f"eoatex2imxml.py -f \"{input_file}\""
+ f"eoatex2imxml.py \"{publ_dir}\""
)
exec_command(
- f"imxml2django.py --publication-dir \"{input_file.parent}\""
+ f"imxml2django.py \"{publ_dir}\""
)
exec_command(
- f"imxml2epub.py --publication-dir \"{input_file.parent}\""
+ f"imxml2epub.py \"{publ_dir}\""
)
if __name__ == "__main__":
@@ -49,11 +49,6 @@ def main(
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
- parser.add_argument(
- "-f", "--filename",
- required = True,
- help="Name of main EOATeX file (without suffix!)."
- )
parser.add_argument(
"-c", "--config",
default = BASE_DIR / "config" / "eoaconvert.cfg",
@@ -71,6 +66,11 @@ def main(
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
+ parser.add_argument(
+ "PUBLICATION_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
+)
args = parser.parse_args()
@@ -81,5 +81,5 @@ def main(
)
main(
- input_file = Path( args.filename )
+ publ_dir = Path( args.PUBLICATION_DIR )
)
diff --git a/src/process_tei.py b/src/process_tei.py
new file mode 100755
index 0000000..6b42cf5
--- /dev/null
+++ b/src/process_tei.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+from utils.load_config import load_config, exec_command
+
+# imports
+import argparse
+from pathlib import Path
+import glob
+import os
+import subprocess
+import shutil
+import logging
+
+BASE_DIR = Path( __file__ ).resolve().parent
+SCRIPT_PATH = Path( __file__ )
+SCRIPT_NAME = SCRIPT_PATH.stem
+
+DEFAULT_INPUT_DIR = \
+ Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')
+
+DEFAULT_OUTPUT_DIR = \
+ Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output')
+
+DEFAULT_DEPENDENCIES_DIR = \
+ Path(os.environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in os.environ else './dependencies')
+
+def main(
+ publ_dir
+):
+ PUBL_NAME = publ_dir.resolve().stem
+ exec_command(
+ f"tei_add_bibl.py -! \"{publ_dir}\""
+ )
+ exec_command(
+ f"tei2eoatex.py -! \"{publ_dir}\""
+ )
+ exec_command(
+ f"eoatex2pdf.py --output-dir \"{DEFAULT_OUTPUT_DIR}/{PUBL_NAME}/pdf\" \"{DEFAULT_OUTPUT_DIR}/{PUBL_NAME}/eoatex\""
+ )
+ exec_command(
+ f"tei_pickle.py -! \"{DEFAULT_OUTPUT_DIR}/with_bibl/{PUBL_NAME}\""
+ )
+ exec_command(
+ f"tei2imxml.py --filename no_bibl.xml \"{DEFAULT_OUTPUT_DIR}/with_bibl/{PUBL_NAME}\""
+ )
+ exec_command(
+ f"tei2html.py -! --filename \"with_bibl.xml\" \"{DEFAULT_OUTPUT_DIR}/with_bibl/{PUBL_NAME}\""
+ )
+
+if __name__ == "__main__":
+
+ #####################
+ # Parsing arguments #
+ #####################
+
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter
+ )
+ parser.add_argument(
+ "-c", "--config",
+ default = BASE_DIR / "config" / "eoaconvert.cfg",
+ dest="CONFIG_FILE",
+ help="Name of configuration file",
+ metavar="CONFIGURATION"
+ )
+ parser.add_argument(
+ "-l", "--log-file",
+ default = (DEFAULT_OUTPUT_DIR / 'logs' / SCRIPT_NAME).with_suffix(".log"),
+ help="logfile"
+ )
+ parser.add_argument(
+ "--log-level",
+ default = "INFO",
+ help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
+ )
+ parser.add_argument(
+ "PUBLICATION_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
+)
+
+ args = parser.parse_args()
+
+ CONFIG = load_config(
+ args.CONFIG_FILE,
+ args.log_level,
+ args.log_file,
+ )
+
+ main(
+ publ_dir = Path( args.PUBLICATION_DIR )
+ )
diff --git a/src/stylesheets/insert_bibliography.xsl b/src/stylesheets/insert_bibliography.xsl
index 4ad9367..11a3fba 100644
--- a/src/stylesheets/insert_bibliography.xsl
+++ b/src/stylesheets/insert_bibliography.xsl
@@ -15,8 +15,17 @@
version="1.0"
/>
-
-
+
+
+
+
@@ -43,7 +52,20 @@
-
+
+ print bibliography in chapter
+
+
+
+
+ inserting from file
+
+
+
+
+
diff --git a/src/stylesheets/tei2eoatex.xsl b/src/stylesheets/tei2eoatex.xsl
index 3b5647b..e8d050e 100644
--- a/src/stylesheets/tei2eoatex.xsl
+++ b/src/stylesheets/tei2eoatex.xsl
@@ -24,7 +24,7 @@
-
+
@@ -141,6 +141,7 @@ contexts, a double replacement is performed.
\end{document}
+
diff --git a/src/stylesheets/tex4ht_2_tei.xsl b/src/stylesheets/tex4ht_2_tei.xsl
index 5d80102..addad6a 100644
--- a/src/stylesheets/tex4ht_2_tei.xsl
+++ b/src/stylesheets/tex4ht_2_tei.xsl
@@ -28,7 +28,7 @@
- Title
+ Bibliography for
Publication Information
@@ -116,7 +116,7 @@
rendering citations
-
+
diff --git a/src/tei2eoatex.py b/src/tei2eoatex.py
index 1010bc7..ed37a20 100755
--- a/src/tei2eoatex.py
+++ b/src/tei2eoatex.py
@@ -37,18 +37,19 @@
def copy_files(
input_dir,
output_dir,
- overwrite = False
):
- def copy_cmd(src, dst):
- if overwrite:
- copy_dir_overwrite( src, dst )
+ for f in input_dir.iterdir():
+ logging.debug( f"copy dir: {f}" )
+ if f.is_dir():
+ shutil.copytree(
+ f,
+ output_dir / f.name
+ )
else:
- shutil.copytree( src, dst )
- for dir in ("images", "inline", "facsim", "preambel"):
- copy_cmd(
- input_dir / dir,
- output_dir / dir
- )
+ shutil.copy(
+ f,
+ output_dir / f.name
+ )
if __name__ == '__main__':
@@ -60,27 +61,21 @@ def copy_cmd(src, dst):
default = BASE_DIR / "config" / "eoaconvert.cfg",
help="Name of config file"
)
- parser.add_argument(
- "-l", "--log-dir",
- default = DEFAULT_OUTPUT_DIR / "logs",
- help="logfiles go into this directory"
- )
parser.add_argument(
"--log-level",
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
parser.add_argument(
- "-f", "--filename",
- required = True,
- # default = DEFAULT_INPUT_DIR / "tei",
- help="the TEI file"
+ "-o", "--output-dir",
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/eoatex",
+ type = Path,
)
parser.add_argument(
- "-o", "--output-dir",
- default = DEFAULT_OUTPUT_DIR / "from_tei/eoatex",
- metavar = "OUTPUT_DIR",
- help="output directory"
+ "-f", "--filename",
+ default = Path("*.xml"),
+ type = Path,
+ help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
parser.add_argument(
"-p", "--param",
@@ -94,33 +89,26 @@ def copy_cmd(src, dst):
default = False,
help="overwrite files at OUTPUT_DIR"
)
-
- args = parser.parse_args()
-
- CONFIG_FILE = args.config
-
- log_dir = Path(args.log_dir)
-
- CONFIG = load_config(
- CONFIG_FILE,
- args.log_level,
- (log_dir / SCRIPT_NAME) . with_suffix( ".log" ),
- # args.log_file,
+ parser.add_argument(
+ "PUBLICATION_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
)
- OUTPUT_DIR = args.output_dir
-
- check_executable( "saxon" )
+ args = parser.parse_args()
- input_file = Path( args.filename )
- output_file = OUTPUT_DIR / "main.tex"
+ publ_dir = args.PUBLICATION_DIR
+ input_file = \
+ args.filename if args.filename . is_absolute() else list(publ_dir . glob( str(args.filename) ))[0]
+ OUTPUT_DIR = \
+ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / publ_dir.resolve().stem) / "eoatex"
+ log_dir = OUTPUT_DIR / "log"
if not input_file.is_file():
import errno
raise( Exception(
f"not a valid input file: {input_file}"
) )
-
if OUTPUT_DIR.exists():
if args.overwrite:
shutil.rmtree( OUTPUT_DIR )
@@ -131,46 +119,28 @@ def copy_cmd(src, dst):
parents=True
)
- run_xslt(
- input_file = input_file,
- xslt_file = EOA_SCRIPTS_DIR / "stylesheets/tei2eoatex.xsl",
- output_file = output_file,
- params = args.param,
- exec_command_args = {
- 'output_to' : ToFile( log_dir / SCRIPT_NAME / "saxon.log" )
- },
- )
-
- from lxml import etree
-
- ns = {"tei": "http://www.tei-c.org/ns/1.0"}
-
- tree = etree.parse(
- str(input_file)
+ CONFIG_FILE = args.config
+ CONFIG = load_config(
+ CONFIG_FILE,
+ args.log_level,
+ (log_dir / SCRIPT_NAME) . with_suffix( ".log" ),
)
- bib_file = tree.xpath(
- "//tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target",
- namespaces = ns
- )[0]
- bib_file = input_file.parent / bib_file
+ check_executable( "saxon" )
- logging.info( f"bib_file: {bib_file}" )
+ output_file = OUTPUT_DIR / "main.tex"
copy_files(
input_dir = input_file.parent,
-
output_dir = output_file.parent,
- overwrite = args.overwrite,
)
- if not bib_file.is_file():
- raise( Exception( f"bibfile not found: '{bib_file}'!" ) )
-
- src = bib_file
- dst = output_file.parent / bib_file.name
- logging.info( f"copying {src} -> {dst}")
- shutil.copy(
- bib_file,
- output_file.parent / bib_file.name
+ run_xslt(
+ input_file = input_file,
+ xslt_file = EOA_SCRIPTS_DIR / "stylesheets/tei2eoatex.xsl",
+ output_file = output_file,
+ params = args.param,
+ exec_command_args = {
+ 'output_to' : ToFile( log_dir / SCRIPT_NAME / "saxon.log" )
+ },
)
diff --git a/src/tei2html.py b/src/tei2html.py
index 6da2eeb..67d3458 100755
--- a/src/tei2html.py
+++ b/src/tei2html.py
@@ -65,23 +65,19 @@ def copy_dir(
default = BASE_DIR / "config" / "eoaconvert.cfg",
help="Name of config file"
)
- parser.add_argument(
- "-l", "--log-dir",
- default = DEFAULT_OUTPUT_DIR / "logs",
- type = Path,
- help="logfile"
- )
parser.add_argument(
"--log-level",
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
+ '''
parser.add_argument(
"--root-dir",
default = DEFAULT_OUTPUT_DIR / "html_from_tei",
type = Path,
help=""
)
+ '''
parser.add_argument(
"-p", "--param",
action = 'append',
@@ -97,15 +93,14 @@ def copy_dir(
)
parser.add_argument(
"-f", "--filename",
- default = Path("*_with_bibl.xml"),
+ default = Path("with_bibl.xml"),
type = Path,
help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
parser.add_argument(
"-o", "--output-dir",
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/html",
type = Path,
- default = DEFAULT_OUTPUT_DIR / "from_tei/html",
- help="the main stylesheet 'tei2html.xsl' will write html files here"
)
parser.add_argument(
"-!", "--overwrite",
@@ -138,24 +133,12 @@ def copy_dir(
)
parser.add_argument(
"PUBLICATION_DIR",
- default = DEFAULT_OUTPUT_DIR / "from_tei/with_bibl",
help = "directory containing the publication (including resources like pictures, etc.)",
- nargs = '?', # (optional)
type = Path,
)
args = parser.parse_args()
- CONFIG_FILE = args.config
-
- log_dir = args.log_dir
-
- CONFIG = load_config(
- CONFIG_FILE,
- args.log_level,
- (log_dir / SCRIPT_NAME) . with_suffix( ".log" ),
- )
-
check_executables()
print( "xslt params: " + str(args.param) )
@@ -166,9 +149,8 @@ def copy_dir(
tei_filename = \
publ_file if publ_file . is_absolute() else list(publ_dir . glob (str(publ_file)))[0]
- logging.info( f"tei_file: {tei_filename}, publ_dir: {publ_dir}" )
-
- output_dir = args.output_dir
+ output_dir = \
+ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / publ_dir.resolve().stem) / "html"
if not tei_filename.is_file():
raise( Exception(
@@ -180,6 +162,18 @@ def copy_dir(
rmtree( output_dir )
else:
raise( Exception( f"output directory already existing: '{output_dir}'!" ) )
+
+ CONFIG_FILE = args.config
+ log_dir = output_dir / "log"
+ log_file = (log_dir / SCRIPT_NAME) . with_suffix( ".log" )
+ CONFIG = load_config(
+ CONFIG_FILE,
+ args.log_level,
+ log_file,
+ )
+
+ logging.info( f"tei_file: {tei_filename}, publ_dir: {publ_dir}" )
+
if not output_dir.exists():
mkdir( output_dir )
## copy webdesign:
diff --git a/src/tei2imxml.py b/src/tei2imxml.py
index 90b484e..c2c608c 100755
--- a/src/tei2imxml.py
+++ b/src/tei2imxml.py
@@ -1277,11 +1277,6 @@ def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
- parser.add_argument(
- "--log-dir",
- default = DEFAULT_OUTPUT_DIR / "logs",
- help="logfile"
- )
parser.add_argument(
"-c", "--config",
dest="CONFIG_FILE",
@@ -1289,11 +1284,6 @@ def main():
help="Name of configuration file",
metavar="CONFIGURATION"
)
- parser.add_argument(
- "-l", "--log-file",
- default = SCRIPT_NAME + ".log" ,
- help="logfile"
- )
parser.add_argument(
"--log-level",
default = "INFO",
@@ -1301,13 +1291,14 @@ def main():
)
parser.add_argument(
"-f", "--filename",
- required = True,
- help="TEI XML file to convert into DocBook XML."
+ default = Path("*.xml"),
+ type = Path,
+ help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
parser.add_argument(
"-o", "--output-dir",
- default = DEFAULT_OUTPUT_DIR / "from_tei/imxml",
- help="output directory"
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/imxml",
+ type = Path,
)
parser.add_argument(
"-i", "--ignore-ref-errors",
@@ -1321,8 +1312,8 @@ def main():
)
parser.add_argument(
"-d", "--pickleddata",
- default= DEFAULT_OUTPUT_DIR / "from_tei/pickle/data.pickle",
- help="Pickled data file to be used."
+ help = f"directory containing pickled data file to be used. default {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/pickle",
+ type = Path,
)
parser.add_argument(
"-him", "--hyperimage",
@@ -1338,14 +1329,22 @@ def main():
action="store_true",
help="Embed webdesign of EOA1.0 into XML"
)
+ parser.add_argument(
+ "PUBLICATION_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
+ )
args = parser.parse_args()
- config_file = args.CONFIG_FILE
- print("The config file is ", config_file)
- INPUT_PATH = Path( args.filename )
- INPUT_DIR = INPUT_PATH.parent
- OUTPUT_DIR = Path( args.output_dir )
- LOG_DIR = Path( args.log_dir )
+ INPUT_DIR = args.PUBLICATION_DIR
+ INPUT_PATH = \
+ args.filename if args.filename . is_absolute() else list( INPUT_DIR . glob( str(args.filename) ))[0]
+ OUTPUT_DIR = \
+ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "imxm"
+ PICKLE_DIR = \
+ args.pickledata if args.pickleddata is not None else (DEFAULT_OUTPUT_DIR / INPUT_DIR.resolve().stem) / "pickle"
+ PICKLE_FILE = PICKLE_DIR / "data.pickle"
+ LOG_DIR = OUTPUT_DIR / "log"
TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"
@@ -1355,25 +1354,33 @@ def main():
# where to output the xml file:
XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" )
+ config_file = args.CONFIG_FILE
+ print("The config file is ", config_file)
+
CONFIG = load_config(
- args.CONFIG_FILE,
+ config_file,
args.log_level,
(LOG_DIR / SCRIPT_NAME) . with_suffix( ".log" ),
- args.log_file,
)
logging.info( "checking executables 'utils.bib2html' needs...:" )
bib2html.check_executables()
- if not os.path.exists(OUTPUT_DIR):
- os.mkdir( OUTPUT_DIR )
- if not os.path.exists(TEMP_DIR):
- os.mkdir( TEMP_DIR )
- if not os.path.exists( DEBUG_DIR ):
- os.mkdir( DEBUG_DIR )
+ OUTPUT_DIR.mkdir(
+ parents = True,
+ exist_ok = True
+ )
+ TEMP_DIR.mkdir(
+ parents = True,
+ exist_ok = True
+ )
+ DEBUG_DIR.mkdir(
+ parents = True,
+ exist_ok = True
+ )
try:
- with open(args.pickleddata, 'rb') as f:
+ with open(PICKLE_FILE, 'rb') as f:
data = pickle.load(f)
except FileNotFoundError:
logging.error("File 'data.pickle' not found. You should run 'fix_tei.py' first. Exiting.")
@@ -1382,7 +1389,7 @@ def main():
TRANSLATION_FILE = BASE_DIR / CONFIG['Auxiliaries']['TRANSLATIONS']
CSL_FILE = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE']
- xml_tree = etree.parse(args.filename)
+ xml_tree = etree.parse(str(INPUT_PATH))
publication_language = xml_tree.xpath("//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", namespaces=NS_MAP)[0]
diff --git a/src/tei_add_bibl.py b/src/tei_add_bibl.py
index 98f630f..131af4a 100755
--- a/src/tei_add_bibl.py
+++ b/src/tei_add_bibl.py
@@ -43,22 +43,12 @@ def copy_dir(
**opts
)
-def copy_file(
- src,
- dst,
- **opts
-):
- print( "'{}' -> '{}'".format( src, dst ) )
- copy(
- src=src,
- dst=dst,
- **opts
- )
-
def publication_info(xml_tree):
"""Check TEI header for bibliography data, return relevant data as dictionary."""
- bib_file = xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target", namespaces=NS_MAP)[0]
+ bib_file = Path(
+ xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target", namespaces=NS_MAP)[0]
+ )
publ_type = xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@type", namespaces=NS_MAP)[0]
if publ_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]:
logging.error(f"The bibliography type {publ_type} is not allowed. Exiting")
@@ -74,18 +64,19 @@ def publication_info(xml_tree):
}
-def create_bibl_and_insert(
- tei_tree,
+def create_bibl(
+ tei_node,
+ publ_info,
temp_dir,
tei_bib_file,
tei_file,
- tei_with_bibl_file
+ # tei_with_bibl_file
):
translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
- tei_info = bib2html.get_bibl_info( tei_tree )
+ tei_info = bib2html.get_bibl_info( tei_node )
logging.debug( f"info from tei file: {tei_info}" )
bib2html.bib2tei(
- bib_file = bib_file,
+ bib_file = publ_info['bib_file'],
citekeys = tei_info['citekeys'],
keywords = tei_info['keywords'],
language = translations[publ_info['language']],
@@ -94,12 +85,6 @@ def create_bibl_and_insert(
output_file = tei_bib_file,
log_dir = temp_dir / "log"
)
- run_xslt(
- tei_file,
- BASE_DIR / "stylesheets/insert_bibliography.xsl",
- params = [ f"tei_bib_file={tei_bib_file}" ],
- output_file = tei_with_bibl_file
- )
if __name__ == '__main__':
@@ -107,11 +92,6 @@ def create_bibl_and_insert(
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
- parser.add_argument(
- "--log-dir",
- default = DEFAULT_OUTPUT_DIR / "logs",
- help="logfile"
- )
parser.add_argument(
"-c", "--config",
dest="CONFIG_FILE",
@@ -119,11 +99,6 @@ def create_bibl_and_insert(
help="Name of configuration file",
metavar="CONFIGURATION"
)
- parser.add_argument(
- "-l", "--log-file",
- default = SCRIPT_NAME + ".log" ,
- help="logfile"
- )
parser.add_argument(
"--log-level",
default = "INFO",
@@ -135,16 +110,11 @@ def create_bibl_and_insert(
type = Path,
help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
- parser.add_argument(
- "-b", "--bib-file",
- default = Path("*.bib"),
- type = Path,
- help = "bib file inside PUBLICATION_DIR, or absolute path. Patterns like '*.bib' are also acceptable"
- )
parser.add_argument(
"-o", "--output-dir",
- default = DEFAULT_OUTPUT_DIR / "from_tei/with_bibl",
- help="output directory"
+ metavar = "OUTPUT_DIR",
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/with_bibl/PUBLICATION_NAME",
+ type = Path,
)
parser.add_argument(
"-!", "--overwrite",
@@ -154,42 +124,20 @@ def create_bibl_and_insert(
)
parser.add_argument(
"PUBLICATION_DIR",
- default = Path("input/example/tei"),
help = "directory containing the publication (including resources like pictures, etc.)",
- nargs = '?', # (optional)
type = Path,
)
- args = parser.parse_args()
- config_file = args.CONFIG_FILE
- print("The config file is ", config_file)
- log_dir = args.log_dir
-
- CONFIG = load_config(
- config_file,
- args.log_level,
- (log_dir / SCRIPT_NAME) . with_suffix( ".log" ),
- )
+ args = parser.parse_args()
publ_dir = args.PUBLICATION_DIR
+ output_dir = \
+ args.output_dir if args.output_dir is not None else DEFAULT_OUTPUT_DIR / "with_bibl" / (publ_dir.resolve().stem)
tei_file_input = args.filename
- bib_file = args.bib_file
-
- tei_file = \
- tei_file_input if tei_file_input . is_absolute() else list(publ_dir . glob (str(tei_file_input)))[0]
-
- bib_file = \
- bib_file if bib_file . is_absolute() else list(publ_dir . glob (str(bib_file)))[0]
-
- logging.info( f"publ_dir: {publ_dir}, tei_file: {tei_file}, bib_file: {bib_file}" )
-
- output_dir = args.output_dir
-
- if not tei_file.is_file():
- raise( Exception(
- f"not a valid input file: {tei_file}"
- ) )
+ log_dir = output_dir / "log"
+ log_file = (log_dir / SCRIPT_NAME) . with_suffix( ".log" )
+ print( f"log_dir: {log_dir}" )
if output_dir.exists():
if args.overwrite:
@@ -197,31 +145,99 @@ def create_bibl_and_insert(
else:
raise( Exception( f"output directory already existing: '{output_dir}'!" ) )
if not output_dir.exists():
- # mkdir( output_dir )
copy_dir(
publ_dir,
output_dir,
- # ignore = ignore_patterns( tei_file_input ) if not(tei_file_input . is_absolute()) else None
)
+
+ config_file = args.CONFIG_FILE
+ print("The config file is ", config_file)
+ CONFIG = load_config(
+ config_file,
+ args.log_level,
+ log_file
+ )
+
+ tei_file = \
+ tei_file_input if tei_file_input . is_absolute() else list(publ_dir . glob (str(tei_file_input)))[0]
+
tei_tree = etree.parse(str(tei_file))
publ_info = publication_info( tei_tree )
+ publ_info['bib_file'] = tei_file.parent / publ_info['bib_file']
+ logging.info( f"publ_dir: {publ_dir}, tei_file: {tei_file}" )
logging.info( f"The bibfile is '{publ_info['bib_file']}' and this publication type is '{publ_info['publ_type']}'. Language: '{publ_info['language']}'")
+
+ if not tei_file.is_file():
+ raise( Exception(
+ f"not a valid input file: {tei_file}"
+ ) )
+ tei_with_bibl_file = \
+ (output_dir / (tei_file.with_suffix("").name + "_with_bibl")) . with_suffix( ".xml" )
if publ_info["publ_type"] == "monograph":
- create_bibl_and_insert(
+ # create bibliography in tei format:
+ tei_bib_file = \
+ (output_dir/ "bibliography/bibliography_all") . with_suffix(".tei")
+ create_bibl(
tei_tree,
+ publ_info = publ_info,
temp_dir = output_dir / "temp/all",
- tei_bib_file = (output_dir/ "bibliography/bibliography_all") . with_suffix(".tei"),
+ tei_bib_file = tei_bib_file,
tei_file = tei_file,
- tei_with_bibl_file = (output_dir / (tei_file.with_suffix("").name + "_with_bibl")) . with_suffix( ".xml" )
+ )
+ # insert bibliography:
+ run_xslt(
+ tei_file,
+ BASE_DIR / "stylesheets/insert_bibliography.xsl",
+ params = [ f"tei_bib_files={tei_bib_file.absolute()}" ],
+ output_file = tei_with_bibl_file
)
elif publ_info["publ_type"] == "anthology":
- for chap_node in tei_tree.xpath("//tei:body//tei:div[@type = 'chapter']"):
-
- chapter_id = chap_node.xpath("@xml:id", namespaces=NS_MAP)
- create_bibl_and_insert(
- tei_tree,
+ all_chapter_bibl_filepaths = []
+ # for every chapter:
+ for chap_node in tei_tree.xpath("//tei:body//tei:div[@type = 'chapter']", namespaces = NS_MAP):
+
+ chapter_id = chap_node.xpath("@xml:id", namespaces=NS_MAP)[0]
+ logging.info( f"creating bibliography for chapter: {chapter_id}" )
+ # create bibliography in tei format:
+ tei_bib_file = (output_dir/ f"bibliography/bibliography_chap_{chapter_id}") . with_suffix(".tei")
+ create_bibl(
+ chap_node,
+ publ_info = publ_info,
temp_dir = output_dir / f"temp/chap_{chapter_id}",
- tei_bib_file = (output_dir/ f"bibliography/bibliography_chap_{chapter_id}") . with_suffix(".tei")
+ tei_bib_file = tei_bib_file,
+ tei_file = tei_file,
)
+ all_chapter_bibl_filepaths += [tei_bib_file]
+ # insert bibliography:
+ all_teibibs_arg = ";".join(
+ [str(f.absolute()) for f in all_chapter_bibl_filepaths]
+ )
+ run_xslt(
+ tei_file,
+ BASE_DIR / "stylesheets/insert_bibliography.xsl",
+ params = [ f"tei_bib_files={all_teibibs_arg}" ],
+ output_file = tei_with_bibl_file
+ )
else:
raise( Exception("unknown publication type!"))
+
+ # create uniquely named links to
+ # original tei file and the one with added bibliography
+ orig_link = output_dir / "no_bibl.xml"
+ if orig_link.exists():
+ logging.error(
+ f"error while creating unique link: file already exists '{orig_link}'"
+ )
+ exit(1)
+ orig_link . symlink_to(
+ tei_file . name
+ )
+ with_bibl_link = output_dir / "with_bibl.xml"
+ if with_bibl_link.exists():
+ logging.error(
+ f"error while creating unique link: file already exists '{with_bibl_link}'"
+ )
+ exit(1)
+ with_bibl_link . symlink_to(
+ tei_with_bibl_file . name
+ )
diff --git a/src/tei_pickle.py b/src/tei_pickle.py
new file mode 100755
index 0000000..b162a59
--- /dev/null
+++ b/src/tei_pickle.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8; mode: python -*-
+
+"""
+Gather some data for further conversion steps. This is originally part of fix_tei.
+"""
+
+__version__ = "1.0"
+__date__ = "20190718"
+__author__ = "kthoden@mpiwg-berlin.mpg.de"
+
+from utils.load_config import load_config
+
+from pathlib import Path
+import os
+import shutil
+import argparse
+import logging
+import pickle
+import fix_tei
+from lxml import etree
+
+
+BASE_DIR = Path( os.path.realpath(__file__) ).parent
+SCRIPT_NAME = Path( __file__).stem
+
+DEFAULT_INPUT_DIR = \
+ Path(os.environ['INPUT_DIR'] if 'INPUT_DIR' in os.environ else './input')
+DEFAULT_OUTPUT_DIR = \
+ Path(os.environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in os.environ else './output')
+
+
+ns_tei = "http://www.tei-c.org/ns/1.0"
+NS_MAP = {"t" : ns_tei}
+
+def main(
+ tei_file,
+ bib_file,
+ output,
+):
+ """The main bit"""
+ xml_tree = etree.parse(str(tei_file))
+
+ bibdata = fix_tei.parse_bibtex(bib_file)
+
+ cited = xml_tree.xpath("//t:bibl/t:ref/@target", namespaces=NS_MAP)
+ used_citekeys = [fix_tei.unescape(c[1:]) for c in cited]
+ citekeys_not_in_bib = fix_tei.validate_citations(used_citekeys, bibdata)
+
+ fix_tei.pickle_data(citekeys_not_in_bib, used_citekeys, output)
+# def main ends here
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter
+ )
+ parser.add_argument(
+ "-c", "--config",
+ dest="CONFIG_FILE",
+ default = BASE_DIR / "config" / "eoaconvert.cfg",
+ help="Name of configuration file",
+ metavar="CONFIGURATION"
+ )
+ parser.add_argument(
+ "--log-level",
+ default = "INFO",
+ help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
+ )
+ parser.add_argument(
+ "-o", "--output-dir",
+ help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/PUBLICATION_NAME/pickle",
+ type = Path,
+ )
+ parser.add_argument(
+ "-f", "--filename",
+ default = Path("*.xml"),
+ type = Path,
+ help = "eoaTEI file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
+ )
+ parser.add_argument(
+ "-b", "--bib-file",
+ default = Path("**/*.bib"),
+ type = Path,
+ help = "bibtex file inside PUBLICATION_DIR, or absolute path. Patterns like '**/*.bib' are also acceptable"
+ )
+ parser.add_argument(
+ "PUBLICATION_DIR",
+ help = "directory containing the publication (including resources like pictures, etc.)",
+ type = Path,
+ )
+ parser.add_argument(
+ "-!", "--overwrite",
+ action = "store_true",
+ default = False,
+ help="overwrite files at OUTPUT_DIR"
+ )
+ args = parser.parse_args()
+
+ input_dir = args.PUBLICATION_DIR
+ tei_file = \
+ args.filename if args.filename . is_absolute() else list( input_dir . glob( str(args.filename) ))[0]
+ bib_file = \
+ args.bib_file if args.bib_file . is_absolute() else list( input_dir . glob( str(args.bib_file) ))[0]
+
+ output_dir = \
+ args.output_dir if args.output_dir is not None else (DEFAULT_OUTPUT_DIR / input_dir.resolve().stem) / "pickle"
+ log_dir = output_dir / "log"
+
+ config_file = args.CONFIG_FILE
+ print("The config file is ", config_file)
+
+ if output_dir.exists():
+ if args.overwrite:
+ shutil.rmtree( output_dir )
+ else:
+ raise( Exception( f"output directory already existing: '{output_dir}'!" ) )
+ if not output_dir.exists():
+ os.mkdir( output_dir )
+
+ CONFIG = load_config(
+ config_file,
+ args.log_level,
+ (log_dir / SCRIPT_NAME) . with_suffix( ".log" ),
+ )
+
+ main(
+ tei_file = tei_file,
+ bib_file = bib_file,
+ output = output_dir / "data.pickle",
+ )
+# finis
diff --git a/src/utils/bib2html.py b/src/utils/bib2html.py
index 7c32c6e..9f028fa 100755
--- a/src/utils/bib2html.py
+++ b/src/utils/bib2html.py
@@ -382,7 +382,7 @@ def __imhtml_2_tei(
run_xslt(
imhtml_file,
BASE_DIR / "stylesheets/tex4ht_2_tei.xsl",
- params = [ f"dashed_file={imhtml_dashed_file}" ],
+ params = [ f"dashed_file={imhtml_dashed_file.absolute()}" ],
output_file = output_file
)
@@ -424,5 +424,5 @@ def teibib_to_eoa1(
language = translations[language],
temp_dir = args.temp_dir
)
- print( etree.tostring( references_in_html ) )
+ # print( etree.tostring( references_in_html ) )
# finis
diff --git a/src/utils/load_config.py b/src/utils/load_config.py
index 29ee92c..5a50b66 100644
--- a/src/utils/load_config.py
+++ b/src/utils/load_config.py
@@ -129,8 +129,10 @@ def load_config(
######################
log_dir = Path(log_file).absolute().parent
- if not (log_dir.exists() and log_dir.is_dir()):
- os.makedirs( log_dir )
+ log_dir.mkdir(
+ parents = True,
+ exist_ok = True
+ )
time.sleep( 1 )
# always log to file: