Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 243 lines (217 sloc) 8.15 KB
#!/usr/bin/env python3
import utils.bib2html as bib2html
from utils.load_config import load_config
from utils.run_xslt import run_xslt
import logging
import argparse
from lxml import etree
from pathlib import Path
from os import environ
from shutil import rmtree, copytree, ignore_patterns, copy
BASE_DIR = Path( __file__ ).parent
SCRIPT_NAME = Path( __file__).stem
DEFAULT_INPUT_DIR = \
Path(environ['INPUT_DIR'] if 'INPUT_DIR' in environ else './input')
DEFAULT_OUTPUT_DIR = \
Path(environ['OUTPUT_DIR'] if 'OUTPUT_DIR' in environ else './output')
DEFAULT_DEPENDENCIES_DIR = \
Path(environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in environ else './dependencies')
NS_MAP = {"tei" : 'http://www.tei-c.org/ns/1.0'}
def copy_dir(
src,
dst,
**opts
):
print( "'{}' -> '{}'".format( src, dst ) )
if Path(dst).exists():
rmtree(
dst
)
copytree(
src=src,
dst=dst,
**opts
)
def publication_info(xml_tree):
"""Check TEI header for bibliography data, return relevant data as dictionary."""
bib_file = Path(
xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target", namespaces=NS_MAP)[0]
)
publ_type = xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@type", namespaces=NS_MAP)[0]
if publ_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]:
logging.error(f"The bibliography type {publ_type} is not allowed. Exiting")
exit(1)
language = tei_tree.xpath(
"/tei:TEI/tei:teiHeader/tei:profileDesc/tei:langUsage/tei:language/@ident",
namespaces = NS_MAP
)[0]
return {
"bib_file": bib_file,
"publ_type": publ_type,
"language": language,
}
def create_bibl(
tei_node,
publ_info,
temp_dir,
tei_bib_file,
tei_file,
# tei_with_bibl_file
):
translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
tei_info = bib2html.get_bibl_info( tei_node )
logging.debug( f"info from tei file: {tei_info}" )
bib2html.bib2tei(
bib_file = publ_info['bib_file'],
citekeys = tei_info['citekeys'],
keywords = tei_info['keywords'],
language = translations[publ_info['language']],
tex_template = BASE_DIR / "data/aux/bibliography4ht.tex",
temp_dir = temp_dir,
output_file = tei_bib_file,
log_dir = temp_dir / "log"
)
if __name__ == '__main__':
# parse args:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"-c", "--config",
dest="CONFIG_FILE",
default = BASE_DIR / "config" / "eoaconvert.cfg",
help="Name of configuration file",
metavar="CONFIGURATION"
)
parser.add_argument(
"--log-level",
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
parser.add_argument(
"-f", "--filename",
default = Path("*.xml"),
type = Path,
help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable"
)
parser.add_argument(
"-o", "--output-dir",
metavar = "OUTPUT_DIR",
help = f"output directory. default: {DEFAULT_OUTPUT_DIR}/with_bibl/PUBLICATION_NAME",
type = Path,
)
parser.add_argument(
"-!", "--overwrite",
action = "store_true",
default = False,
help="overwrite OUTPUT_DIR, if existing"
)
parser.add_argument(
"PUBLICATION_DIR",
help = "directory containing the publication (including resources like pictures, etc.)",
type = Path,
)
args = parser.parse_args()
publ_dir = args.PUBLICATION_DIR
output_dir = \
args.output_dir if args.output_dir is not None else DEFAULT_OUTPUT_DIR / "with_bibl" / (publ_dir.resolve().stem)
tei_file_input = args.filename
log_dir = output_dir / "log"
log_file = (log_dir / SCRIPT_NAME) . with_suffix( ".log" )
print( f"log_dir: {log_dir}" )
if output_dir.exists():
if args.overwrite:
rmtree( output_dir )
else:
raise( Exception( f"output directory already existing: '{output_dir}'!" ) )
if not output_dir.exists():
copy_dir(
publ_dir,
output_dir,
)
config_file = args.CONFIG_FILE
print("The config file is ", config_file)
CONFIG = load_config(
config_file,
args.log_level,
log_file
)
tei_file = \
tei_file_input if tei_file_input . is_absolute() else list(publ_dir . glob (str(tei_file_input)))[0]
tei_tree = etree.parse(str(tei_file))
publ_info = publication_info( tei_tree )
publ_info['bib_file'] = tei_file.parent / publ_info['bib_file']
logging.info( f"publ_dir: {publ_dir}, tei_file: {tei_file}" )
logging.info( f"The bibfile is '{publ_info['bib_file']}' and this publication type is '{publ_info['publ_type']}'. Language: '{publ_info['language']}'")
if not tei_file.is_file():
raise( Exception(
f"not a valid input file: {tei_file}"
) )
tei_with_bibl_file = \
(output_dir / (tei_file.with_suffix("").name + "_with_bibl")) . with_suffix( ".xml" )
if publ_info["publ_type"] == "monograph":
# create bibliography in tei format:
tei_bib_file = \
(output_dir/ "bibliography/bibliography_all") . with_suffix(".tei")
create_bibl(
tei_tree,
publ_info = publ_info,
temp_dir = output_dir / "temp/all",
tei_bib_file = tei_bib_file,
tei_file = tei_file,
)
# insert bibliography:
run_xslt(
tei_file,
BASE_DIR / "stylesheets/insert_bibliography.xsl",
params = [ f"tei_bib_files={tei_bib_file.absolute()}" ],
output_file = tei_with_bibl_file
)
elif publ_info["publ_type"] == "anthology":
all_chapter_bibl_filepaths = []
# for every chapter:
for chap_node in tei_tree.xpath("//tei:body//tei:div[@type = 'chapter']", namespaces = NS_MAP):
chapter_id = chap_node.xpath("@xml:id", namespaces=NS_MAP)[0]
logging.info( f"creating bibliography for chapter: {chapter_id}" )
# create bibliography in tei format:
tei_bib_file = (output_dir/ f"bibliography/bibliography_chap_{chapter_id}") . with_suffix(".tei")
create_bibl(
chap_node,
publ_info = publ_info,
temp_dir = output_dir / f"temp/chap_{chapter_id}",
tei_bib_file = tei_bib_file,
tei_file = tei_file,
)
all_chapter_bibl_filepaths += [tei_bib_file]
# insert bibliography:
all_teibibs_arg = ";".join(
[str(f.absolute()) for f in all_chapter_bibl_filepaths]
)
run_xslt(
tei_file,
BASE_DIR / "stylesheets/insert_bibliography.xsl",
params = [ f"tei_bib_files={all_teibibs_arg}" ],
output_file = tei_with_bibl_file
)
else:
raise( Exception("unknown publication type!"))
# create uniquely named links to
# original tei file and the one with added bibliography
orig_link = output_dir / "no_bibl.xml"
if orig_link.exists():
logging.error(
f"error while creating unique link: file already exists '{orig_link}'"
)
exit(1)
orig_link . symlink_to(
tei_file . name
)
with_bibl_link = output_dir / "with_bibl.xml"
if with_bibl_link.exists():
logging.error(
f"error while creating unique link: file already exists '{with_bibl_link}'"
)
exit(1)
with_bibl_link . symlink_to(
tei_with_bibl_file . name
)