Skip to content
Permalink
380d7af310
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 246 lines (220 sloc) 8.02 KB
#!/usr/bin/env python3
import utils.bib2html as bib2html
from utils.load_config import load_config
from utils.run_xslt import run_xslt
import logging
import argparse
from lxml import etree
from pathlib import Path
from os import environ
from shutil import rmtree, copytree, ignore_patterns, copy
BASE_DIR = Path( __file__ ).parent
SCRIPT_NAME = Path( __file__).stem
DEFAULT_INPUT_DIR = \
Path(environ['INPUT_DIR'] if 'INPUT_DIR' in environ else './input')
DEFAULT_OUTPUT_DIR = \
Path(environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in environ else './output')
DEFAULT_DEPENDENCIES_DIR = \
Path(environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in environ else './dependencies')
NS_MAP = {"tei" : 'http://www.tei-c.org/ns/1.0'}
def copy_dir(
src,
dst,
**opts
):
print( "'{}' -> '{}'".format( src, dst ) )
if Path(dst).exists():
rmtree(
dst
)
copytree(
src=src,
dst=dst,
**opts
)
def copy_file(
src,
dst,
**opts
):
print( "'{}' -> '{}'".format( src, dst ) )
copy(
src=src,
dst=dst,
**opts
)
def publication_info(xml_tree):
"""Check TEI header for bibliography data, return relevant data as dictionary."""
bib_file = Path(
xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target", namespaces=NS_MAP)[0]
)
publ_type = xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@type", namespaces=NS_MAP)[0]
if publ_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]:
logging.error(f"The bibliography type {publ_type} is not allowed. Exiting")
exit(1)
language = tei_tree.xpath(
"/tei:TEI/tei:teiHeader/tei:profileDesc/tei:langUsage/tei:language/@ident",
namespaces = NS_MAP
)[0]
return {
"bib_file": bib_file,
"publ_type": publ_type,
"language": language,
}
def create_bibl(
tei_node,
publ_info,
temp_dir,
tei_bib_file,
tei_file,
# tei_with_bibl_file
):
translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
tei_info = bib2html.get_bibl_info( tei_node )
logging.debug( f"info from tei file: {tei_info}" )
bib2html.bib2tei(
bib_file = publ_info['bib_file'],
citekeys = tei_info['citekeys'],
keywords = tei_info['keywords'],
language = translations[publ_info['language']],
tex_template = BASE_DIR / "data/aux/bibliography4ht.tex",
temp_dir = temp_dir,
output_file = tei_bib_file,
log_dir = temp_dir / "log"
)
if __name__ == '__main__':
# parse args:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--log-dir",
default = DEFAULT_OUTPUT_DIR / "logs",
help="logfile"
)
parser.add_argument(
"-c", "--config",
dest="CONFIG_FILE",
default = BASE_DIR / "config" / "eoaconvert.cfg",
help="Name of configuration file",
metavar="CONFIGURATION"
)
parser.add_argument(
"-l", "--log-file",
default = SCRIPT_NAME + ".log" ,
help="logfile"
)
parser.add_argument(
"--log-level",
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
parser.add_argument(
"-f", "--filename",
default = Path("*.xml"),
type = Path,
help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
parser.add_argument(
"-o", "--output-dir",
default = DEFAULT_OUTPUT_DIR / "from_tei/with_bibl",
help="output directory",
type = Path,
)
parser.add_argument(
"-!", "--overwrite",
action = "store_true",
default = False,
help="overwrite OUTPUT_DIR, if existing"
)
parser.add_argument(
"PUBLICATION_DIR",
default = Path("input/example/tei"),
help = "directory containing the publication (including resources like pictures, etc.)",
nargs = '?', # (optional)
type = Path,
)
args = parser.parse_args()
config_file = args.CONFIG_FILE
print("The config file is ", config_file)
log_dir = args.log_dir
CONFIG = load_config(
config_file,
args.log_level,
(log_dir / SCRIPT_NAME) . with_suffix( ".log" ),
)
publ_dir = args.PUBLICATION_DIR
tei_file_input = args.filename
tei_file = \
tei_file_input if tei_file_input . is_absolute() else list(publ_dir . glob (str(tei_file_input)))[0]
output_dir = args.output_dir
tei_tree = etree.parse(str(tei_file))
publ_info = publication_info( tei_tree )
publ_info['bib_file'] = tei_file.parent / publ_info['bib_file']
logging.info( f"publ_dir: {publ_dir}, tei_file: {tei_file}" )
logging.info( f"The bibfile is '{publ_info['bib_file']}' and this publication type is '{publ_info['publ_type']}'. Language: '{publ_info['language']}'")
if not tei_file.is_file():
raise( Exception(
f"not a valid input file: {tei_file}"
) )
if output_dir.exists():
if args.overwrite:
rmtree( output_dir )
else:
raise( Exception( f"output directory already existing: '{output_dir}'!" ) )
if not output_dir.exists():
# mkdir( output_dir )
copy_dir(
publ_dir,
output_dir,
# ignore = ignore_patterns( tei_file_input ) if not(tei_file_input . is_absolute()) else None
)
tei_with_bibl_file = \
(output_dir / (tei_file.with_suffix("").name + "_with_bibl")) . with_suffix( ".xml" )
if publ_info["publ_type"] == "monograph":
# create bibliography in tei format:
tei_bib_file = \
(output_dir/ "bibliography/bibliography_all") . with_suffix(".tei")
create_bibl(
tei_tree,
publ_info = publ_info,
temp_dir = output_dir / "temp/all",
tei_bib_file = tei_bib_file,
tei_file = tei_file,
# tei_with_bibl_file = tei_with_bibl_file
)
# insert bibliography:
run_xslt(
tei_file,
BASE_DIR / "stylesheets/insert_bibliography.xsl",
params = [ f"tei_bib_files={tei_bib_file.absolute()}" ],
output_file = tei_with_bibl_file
)
elif publ_info["publ_type"] == "anthology":
all_chapter_bibl_filepaths = []
# for every chapter:
for chap_node in tei_tree.xpath("//tei:body//tei:div[@type = 'chapter']", namespaces = NS_MAP):
chapter_id = chap_node.xpath("@xml:id", namespaces=NS_MAP)[0]
logging.info( f"creating bibliography for chapter: {chapter_id}" )
# create bibliography in tei format:
tei_bib_file = (output_dir/ f"bibliography/bibliography_chap_{chapter_id}") . with_suffix(".tei")
create_bibl(
chap_node,
publ_info = publ_info,
temp_dir = output_dir / f"temp/chap_{chapter_id}",
tei_bib_file = tei_bib_file,
tei_file = tei_file,
)
all_chapter_bibl_filepaths += [tei_bib_file]
# insert bibliography:
all_teibibs_arg = ";".join(
[str(f.absolute()) for f in all_chapter_bibl_filepaths]
)
run_xslt(
tei_file,
BASE_DIR / "stylesheets/insert_bibliography.xsl",
params = [ f"tei_bib_files={all_teibibs_arg}" ],
output_file = tei_with_bibl_file
)
else:
raise( Exception("unknown publication type!"))