diff --git a/README.md b/README.md index 3abccaa..7a8aa19 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ In order to apply the workflow to any other publication copy it into the `runtim 1. eoatex -> pdf - $ process_eoa_latex.py -f input/example/eoatex/EOASample.tex + $ process_eoa_latex.py -f input/example/124_eoatex/EOASample.tex (adjust filename if necessary) @@ -113,23 +113,23 @@ In order to apply the workflow to any other publication copy it into the `input/ 1. eoaTEI -> eoaTEI with bibliography - $ tei_add_bibl.py input/example/tei + $ tei_add_bibl.py -o output/125/with_bibl input/example/125_tei_part 1. eoaTEI -> eoaTEX - $ tei2eoatex.py -f input/example/tei/exampleTEI.xml + $ tei2eoatex.py -f input/example/125_tei_part/tei_part.xml -o output/125/eoatex 1. eoaTEX -> pdf - $ eoatex2pdf.py -f output/from_tei/eoatex/main.tex -o output/from_tei/pdf + $ eoatex2pdf.py -f output/125/eoatex/main.tex -o output/125/pdf (adjust filename if necessary) 1. eoaTEI -> imxml (to intermediate xml) - $ gather_pickledata.py input/example/tei/exampleTEI.xml input/example/tei/example.bib - $ tei2imxml.py -f input/example/tei/exampleTEI.xml + $ gather_pickledata.py -o output/125/pickle input/example/125_tei_part/*.xml input/example/125_tei_part/texfiles/example.bib + $ tei2imxml.py -f output/125/with_bibl/tei_part.xml -d output/125/pickle/data.pickle -o output/125/imxml 1. eoaTEI -> html - $ tei2html.py + $ tei2html.py output/125/with_bibl -o output/125/html diff --git a/dependencies.conf b/dependencies.conf index 3b3e90c..0df1f0e 100644 --- a/dependencies.conf +++ b/dependencies.conf @@ -13,7 +13,7 @@ hash = 2a01be46ee82fce5eba6074359b3d18db2222e0c [eoa-publication-model] uri = https://github.molgen.mpg.de/EditionOpenAccess/eoa-publication-model.git -hash = 5ff326580a6bc34756bce511a23146453fdb82b6 +hash = 5237e12c6205cff45d46c7d5f62d5222040be7a1 # init not needed, since only example publication is needed [webdesign_platform] diff --git a/src/stylesheets/tei2eoatex.xsl b/src/stylesheets/tei2eoatex.xsl index 3b5647b..e8d050e 100644 --- a/src/stylesheets/tei2eoatex.xsl +++ b/src/stylesheets/tei2eoatex.xsl @@ -24,7 +24,7 @@ - + @@ -141,6 +141,7 @@ contexts, a double replacement is performed. \end{document} + diff --git a/src/tei2eoatex.py b/src/tei2eoatex.py index 1010bc7..e2d46af 100755 --- a/src/tei2eoatex.py +++ b/src/tei2eoatex.py @@ -44,11 +44,19 @@ def copy_cmd(src, dst): copy_dir_overwrite( src, dst ) else: shutil.copytree( src, dst ) - for dir in ("images", "inline", "facsim", "preambel"): - copy_cmd( - input_dir / dir, - output_dir / dir - ) + + for f in input_dir.iterdir(): + logging.debug( f"copy dir: {f}" ) + if f.is_dir(): + copy_cmd( + f, + output_dir / f.name + ) + else: + shutil.copy( + f, + output_dir / f.name + ) if __name__ == '__main__': @@ -80,7 +88,8 @@ def copy_cmd(src, dst): "-o", "--output-dir", default = DEFAULT_OUTPUT_DIR / "from_tei/eoatex", metavar = "OUTPUT_DIR", - help="output directory" + help="output directory", + type = Path ) parser.add_argument( "-p", "--param", @@ -131,6 +140,12 @@ def copy_cmd(src, dst): parents=True ) + copy_files( + input_dir = input_file.parent, + output_dir = output_file.parent, + overwrite = args.overwrite, + ) + run_xslt( input_file = input_file, xslt_file = EOA_SCRIPTS_DIR / "stylesheets/tei2eoatex.xsl", @@ -140,37 +155,3 @@ def copy_cmd(src, dst): 'output_to' : ToFile( log_dir / SCRIPT_NAME / "saxon.log" ) }, ) - - from lxml import etree - - ns = {"tei": "http://www.tei-c.org/ns/1.0"} - - tree = etree.parse( - str(input_file) - ) - - bib_file = tree.xpath( - "//tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target", - namespaces = ns - )[0] - bib_file = input_file.parent / bib_file - - logging.info( f"bib_file: {bib_file}" ) - - copy_files( - input_dir = input_file.parent, - - output_dir = output_file.parent, - overwrite = args.overwrite, - ) - - if not bib_file.is_file(): - raise( Exception( f"bibfile not found: '{bib_file}'!" ) ) - - src = bib_file - dst = output_file.parent / bib_file.name - logging.info( f"copying {src} -> {dst}") - shutil.copy( - bib_file, - output_file.parent / bib_file.name - ) diff --git a/src/tei2imxml.py b/src/tei2imxml.py index 97d8df5..77dd514 100755 --- a/src/tei2imxml.py +++ b/src/tei2imxml.py @@ -1362,12 +1362,18 @@ def main(): logging.info( "checking executables 'utils.bib2html' needs...:" ) bib2html.check_executables() - if not os.path.exists(OUTPUT_DIR): - os.mkdir( OUTPUT_DIR ) - if not os.path.exists(TEMP_DIR): - os.mkdir( TEMP_DIR ) - if not os.path.exists( DEBUG_DIR ): - os.mkdir( DEBUG_DIR ) + OUTPUT_DIR.mkdir( + parents = True, + exist_ok = True + ) + TEMP_DIR.mkdir( + parents = True, + exist_ok = True + ) + DEBUG_DIR.mkdir( + parents = True, + exist_ok = True + ) try: with open(args.pickleddata, 'rb') as f: diff --git a/src/tei_add_bibl.py b/src/tei_add_bibl.py index 98f630f..53662aa 100755 --- a/src/tei_add_bibl.py +++ b/src/tei_add_bibl.py @@ -58,7 +58,9 @@ def copy_file( def publication_info(xml_tree): """Check TEI header for bibliography data, return relevant data as dictionary.""" - bib_file = xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target", namespaces=NS_MAP)[0] + bib_file = Path( + xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target", namespaces=NS_MAP)[0] + ) publ_type = xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@type", namespaces=NS_MAP)[0] if publ_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]: logging.error(f"The bibliography type {publ_type} is not allowed. Exiting") @@ -76,6 +78,7 @@ def publication_info(xml_tree): def create_bibl_and_insert( tei_tree, + publ_info, temp_dir, tei_bib_file, tei_file, @@ -85,7 +88,7 @@ def create_bibl_and_insert( tei_info = bib2html.get_bibl_info( tei_tree ) logging.debug( f"info from tei file: {tei_info}" ) bib2html.bib2tei( - bib_file = bib_file, + bib_file = publ_info['bib_file'], citekeys = tei_info['citekeys'], keywords = tei_info['keywords'], language = translations[publ_info['language']], @@ -97,7 +100,7 @@ def create_bibl_and_insert( run_xslt( tei_file, BASE_DIR / "stylesheets/insert_bibliography.xsl", - params = [ f"tei_bib_file={tei_bib_file}" ], + params = [ f"tei_bib_file={tei_bib_file.absolute()}" ], output_file = tei_with_bibl_file ) @@ -135,16 +138,11 @@ def create_bibl_and_insert( type = Path, help = "xml file inside PUBLICATION_DIR, or absolute path. Patterns like '*.xml' are also acceptable" ) - parser.add_argument( - "-b", "--bib-file", - default = Path("*.bib"), - type = Path, - help = "bib file inside PUBLICATION_DIR, or absolute path. Patterns like '*.bib' are also acceptable" - ) parser.add_argument( "-o", "--output-dir", default = DEFAULT_OUTPUT_DIR / "from_tei/with_bibl", - help="output directory" + help="output directory", + type = Path, ) parser.add_argument( "-!", "--overwrite", @@ -173,19 +171,18 @@ def create_bibl_and_insert( publ_dir = args.PUBLICATION_DIR tei_file_input = args.filename - bib_file = args.bib_file tei_file = \ tei_file_input if tei_file_input . is_absolute() else list(publ_dir . glob (str(tei_file_input)))[0] - bib_file = \ - bib_file if bib_file . is_absolute() else list(publ_dir . glob (str(bib_file)))[0] - - - logging.info( f"publ_dir: {publ_dir}, tei_file: {tei_file}, bib_file: {bib_file}" ) - output_dir = args.output_dir + tei_tree = etree.parse(str(tei_file)) + publ_info = publication_info( tei_tree ) + publ_info['bib_file'] = tei_file.parent / publ_info['bib_file'] + logging.info( f"publ_dir: {publ_dir}, tei_file: {tei_file}" ) + logging.info( f"The bibfile is '{publ_info['bib_file']}' and this publication type is '{publ_info['publ_type']}'. Language: '{publ_info['language']}'") + if not tei_file.is_file(): raise( Exception( f"not a valid input file: {tei_file}" @@ -203,12 +200,10 @@ def create_bibl_and_insert( output_dir, # ignore = ignore_patterns( tei_file_input ) if not(tei_file_input . is_absolute()) else None ) - tei_tree = etree.parse(str(tei_file)) - publ_info = publication_info( tei_tree ) - logging.info( f"The bibfile is '{publ_info['bib_file']}' and this publication type is '{publ_info['publ_type']}'. Language: '{publ_info['language']}'") if publ_info["publ_type"] == "monograph": create_bibl_and_insert( tei_tree, + publ_info = publ_info, temp_dir = output_dir / "temp/all", tei_bib_file = (output_dir/ "bibliography/bibliography_all") . with_suffix(".tei"), tei_file = tei_file, @@ -220,8 +215,11 @@ def create_bibl_and_insert( chapter_id = chap_node.xpath("@xml:id", namespaces=NS_MAP) create_bibl_and_insert( tei_tree, + publ_info = publ_info, temp_dir = output_dir / f"temp/chap_{chapter_id}", - tei_bib_file = (output_dir/ f"bibliography/bibliography_chap_{chapter_id}") . with_suffix(".tei") + tei_bib_file = (output_dir/ f"bibliography/bibliography_chap_{chapter_id}") . with_suffix(".tei"), + tei_file = tei_file, + tei_with_bibl_file = (output_dir / (tei_file.with_suffix("").name + "_with_bibl")) . with_suffix( ".xml" ), ) else: raise( Exception("unknown publication type!")) diff --git a/src/utils/bib2html.py b/src/utils/bib2html.py index 7c32c6e..ce0775b 100755 --- a/src/utils/bib2html.py +++ b/src/utils/bib2html.py @@ -382,7 +382,7 @@ def __imhtml_2_tei( run_xslt( imhtml_file, BASE_DIR / "stylesheets/tex4ht_2_tei.xsl", - params = [ f"dashed_file={imhtml_dashed_file}" ], + params = [ f"dashed_file={imhtml_dashed_file.absolute()}" ], output_file = output_file )