diff --git a/README.md b/README.md
index 2a6fba7..5194c82 100644
--- a/README.md
+++ b/README.md
@@ -111,17 +111,21 @@ In order to apply the workflow to any other publication copy it into the `input/
$ ./scripts/run.py # run if not yet running
$ ./scripts/exec_in_container.py # enter container
-1. eoatei -> eoatex
+1. eoaTEI -> eoaTEI with bibliography
+
+ $ tei_add_bibl.py input/example/tei
+
+1. eoaTEI -> eoaTEX
$ tei2eoatex.py -f input/example/tei/exampleTEI.xml
-1. eoatex -> pdf
+1. eoaTEX -> pdf
$ eoatex2pdf.py -f output/from_tei/eoatex/main.tex -o output/from_tei/pdf
(adjust filename if necessary)
-1. eoatex -> imxml (to intermediate xml)
+1. eoaTEI -> imxml (to intermediate xml)
$ gather_pickledata.py input/example/tei/exampleTEI.xml input/example/tei/example.bib
$ tei2imxml.py -f input/example/tei/exampleTEI.xml
diff --git a/src/stylesheets/teibib_to_eoa1.xsl b/src/stylesheets/teibib_to_eoa1.xsl
index 9fd7776..0b00692 100644
--- a/src/stylesheets/teibib_to_eoa1.xsl
+++ b/src/stylesheets/teibib_to_eoa1.xsl
@@ -32,6 +32,14 @@
+
citefull
diff --git a/src/stylesheets/tex4ht_2_tei.xsl b/src/stylesheets/tex4ht_2_tei.xsl
index a17c3e5..5d80102 100644
--- a/src/stylesheets/tex4ht_2_tei.xsl
+++ b/src/stylesheets/tex4ht_2_tei.xsl
@@ -120,10 +120,12 @@
+
+
diff --git a/src/tei2imxml.py b/src/tei2imxml.py
index 76472f6..97d8df5 100755
--- a/src/tei2imxml.py
+++ b/src/tei2imxml.py
@@ -26,7 +26,6 @@
import shlex
import argparse
import configparser
-import bibtexparser
from datetime import datetime
from bs4 import BeautifulSoup
from copy import deepcopy
@@ -316,7 +315,10 @@ def format_reference_list(used_citekeys, html_file):
return references
# def format_reference_list ends here
-def format_citations(used_citekeys, bibdata, html_file):
+def format_citations(
+ used_citekeys,
+ html_file
+):
"""Return a dictionary of the used citations as formatted entries.
citation_dict[citekey] = (authoryear_citation, year_citation, title)
@@ -329,24 +331,15 @@ def format_citations(used_citekeys, bibdata, html_file):
sys.exit(1)
citation_dict = {}
-
for entry in used_citekeys:
- if entry in bibdata:
- current_citation = entry
- logging.debug(f"""{html_file}: {entry}.""")
- try:
- strTitle = bibdata[entry]["title"]
- except KeyError:
- logging.warning("No title found for %s", entry)
-
- title = strTitle
- try:
- authoryear_citation = cites.xpath(f"//div[@class='authoryear']/p/span[@data-cites='{entry}']")[0].text
- year_citation = cites.xpath(f"//div[@class='year']/p/span[@data-cites='{entry}']")[0].text
- except IndexError:
- logging.error(f"Entry {entry} was not found in HTML file. Maybe you should run the tool again without -n option. Exiting.")
- sys.exit(1)
- citation_dict[entry] = (authoryear_citation, year_citation, title)
+ try:
+ authoryear_citation = cites.xpath(f"//div[@class='authoryear']/p/span[@data-cites='{entry}']")[0].text
+ year_citation = cites.xpath(f"//div[@class='year']/p/span[@data-cites='{entry}']")[0].text
+ title = cites.xpath(f"//div[@class='title']/p/span[@data-cites='{entry}']")[0].text
+ except IndexError:
+ logging.error(f"Entry {entry} was not found in HTML file. Maybe you should run the tool again without -n option. Exiting.")
+ sys.exit(1)
+ citation_dict[entry] = (authoryear_citation, year_citation, title)
return citation_dict
# def format_citations ends here
@@ -691,6 +684,7 @@ def handle_refs_default(ref):
eoa_citations = xml_tree.xpath("//t:bibl", namespaces=NS_MAP)
for citation in eoa_citations:
+ # logging.debug( f"handling citation: {etree.tostring(citation)}" )
pagerange = ""
cited_range = citation.xpath("t:citedRange", namespaces=NS_MAP)
citeref = citation.xpath("t:ref", namespaces=NS_MAP)
@@ -1177,108 +1171,6 @@ def update_ids(xml_tree, ignore_ref_errors):
return xml_tree
# def update_ids ends here
-
-def get_all_citations(xml_file):
- """Retrieve citations from file """
-
- all_citations = xml_file.xpath("//t:bibl/t:ref", namespaces=NS_MAP)
-
- all_citekeys = []
-
- for citation in all_citations:
- citekey = citation.get("target")[1:]
- if citekey not in all_citekeys:
- all_citekeys.append(citekey)
-
- return all_citekeys
-# def get_all_citations ends here
-
-
-def get_citations_per_chapter(xml_tree):
- """If publication is anthology, store which citations are mentioned in each chapter:
-
- 'chap18_schwartz': {'Blodget_1857', 'CliffordMarcus_1986',
- 'Hunter_2004', 'MarcusFischer_1986', 'Mitchell_1992', 'Nye_1994',
- 'Schlereth_1980', 'Schwartz_2003', 'Schwartz_2011'}}
-
- """
-
- refs_per_chapter = {}
-
- all_chapters = xml_tree.xpath("//t:div[@type='chapter']", namespaces=NS_MAP)
- logging.info(f"Found {libeoaconvert.plural(len(all_chapters), 'chapter')}.")
-
- for chapter in all_chapters:
- try:
- chapter_id = chapter.xpath("@xml:id", namespaces=NS_MAP)[0]
- except IndexError:
- logging.error(f"Found a chapter without identifier. Each chapter must have one. Exiting.")
- sys.exit(1)
- all_refs_with_hash = chapter.xpath(".//t:bibl/t:ref/@target", namespaces=NS_MAP)
- all_refs = [x[1:] for x in all_refs_with_hash]
- logging.info(f"Found {libeoaconvert.plural(len(all_refs), 'reference')} in this chapter.")
- refs_per_chapter[chapter_id] = set(all_refs)
-
- return refs_per_chapter
-# def get_citations_per_chapter ends here
-
-def convert_bibliography_to_dict(
- bib_file : Path
-):
- """Create a dictionary from bibliography data."""
-
- parser = bibtexparser.bparser.BibTexParser()
- # be a bit lax about nonstandard entry types
- parser.ignore_nonstandard_types = False
-
- bibliography_dict = {}
-
- with open(bib_file) as btf:
- btb = bibtexparser.load(btf, parser=parser)
- bibliography_dict = btb.entries_dict
-
- return bibliography_dict
-# def convert_bibliography_to_dict ends here
-
-def make_bibliography_tex4ht(
- used_citekeys,
- bib_data,
- output_file_root,
- publication_language,
- TEMP_DIR,
- log_dir,
- input_dir
- ):
- """Create the HTML version of the bibliography using tex4ht
-
- Return the filename of the HTML file
- """
-
- translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
-
- citations_filename_tei = Path(output_file_root).with_suffix(".tei")
-
- bib2html.bib2tei(
- bib_file = input_dir / bib_data["source"],
- citekeys = used_citekeys,
- language = translations[publication_language],
- temp_dir = TEMP_DIR,
- output_file = citations_filename_tei,
- log_dir = log_dir,
- keywords = [""]
- )
-
- citations_filename_html = Path(output_file_root).with_suffix(".html")
- bib2html.teibib_to_eoa1(
- citations_filename_tei,
- output_file = citations_filename_html
- )
-
-
- return citations_filename_html
-# def make_bibliography_tex4ht ends here
-
-
def add_bibliography_monograph(xml_tree, refs_for_bib_chapter):
"""Add another chapter containing the bibliography."""
@@ -1493,37 +1385,32 @@ def main():
bib_data = check_bibliography(xml_tree)
- citations_dict = convert_bibliography_to_dict(
- INPUT_DIR / bib_data["source"]
- )
-
logging.debug("Creating bibliographies.")
+ cited_dict = {}
if bib_data["type"] == "monograph":
- used_citekeys = get_all_citations(xml_tree)
- citations_filename_root = Path(TEMP_DIR, "formatted_citations_monograph")
+ bibl_info = bib2html.get_bibl_info( xml_tree )
+ logging.debug( f"citekeys: {bibl_info['citekeys']}" )
- if args.no_bib4ht:
- citations_filename_html = citations_filename_root.with_suffix(".html")
- logging.info("Skipping creation of HTML bibliography files. Using the existing ones.")
- else:
- citations_filename_html = make_bibliography_tex4ht(
- used_citekeys,
- bib_data,
- citations_filename_root,
- publication_language,
- TEMP_DIR,
- LOG_DIR,
- INPUT_DIR
- )
+ citations_filename_tei = \
+ (INPUT_DIR / "bibliography/bibliography_all") . with_suffix(".tei")
+
+ citations_filename_html = (TEMP_DIR / "formatted_citations_monograph") . with_suffix(".html")
+ bib2html.teibib_to_eoa1(
+ citations_filename_tei,
+ output_file = citations_filename_html
+ )
logging.info("Formatting citations now.")
- cited_dict = format_citations(used_citekeys, citations_dict, citations_filename_html)
- refs_for_bib_chapter = format_reference_list(used_citekeys, citations_filename_html)
+ # citekey -> (authoryear, year, title)
+ cited_dict = format_citations(
+ bibl_info['citekeys'],
+ citations_filename_html
+ )
+ refs_for_bib_chapter = format_reference_list(bibl_info['citekeys'], citations_filename_html)
elif bib_data["type"] == "anthology":
- citations_per_chapter = get_citations_per_chapter(xml_tree)
+ bibl_info = bib2html.get_bibl_info( xml_tree )
formatted_references_dict = {}
all_chapter_ids = xml_tree.xpath("//t:div[@type='chapter']/@xml:id", namespaces=NS_MAP)
- cited_dict = {}
for chapter_id in all_chapter_ids:
used_citekeys_per_chapter = citations_per_chapter[chapter_id]
@@ -1534,21 +1421,31 @@ def main():
else:
citations_filename_root = Path(TEMP_DIR, f"formatted_citations_{chapter_id}")
if args.no_bib4ht:
- citations_filename_html_per_chapter = citations_filename_root.with_suffix(".html")
+ # citations_filename_html_per_chapter = citations_filename_root.with_suffix(".html")
logging.info("Skipping creation of HTML bibliography files. Using the existing ones.")
else:
- citations_filename_html_per_chapter = make_bibliography_tex4ht(
- used_citekeys_per_chapter,
- bib_data,
- citations_filename_root,
- publication_language,
- TEMP_DIR,
- LOG_DIR,
- INPUT_DIR
+
+ citations_filename_tei_per_chapter = citations_filename_root . with_suffix(".tei")
+ if not citations_filename_tei.is_file():
+ translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
+ bib2html.bib2tei(
+ bib_file = INPUT_DIR / bib_data["source"],
+ citekeys = used_citekeys,
+ language = translations[publication_language],
+ temp_dir = TEMP_DIR,
+ output_file = citations_filename_tei_per_chapter,
+ log_dir = LOG_DIR,
+ keywords = [""]
+ )
+ citations_filename_html_per_chapter = citations_filename_root . with_suffix(".html")
+ bib2html.teibib_to_eoa1(
+ citations_filename_tei_per_chapter,
+ output_file = citations_filename_html_per_chapter
)
logging.info("Formatting citations now.")
- cited_dict_per_chapter = format_citations(used_citekeys_per_chapter, citations_dict, citations_filename_html_per_chapter)
+ # citekey -> (authoryear, year, title)
+ cited_dict_per_chapter = format_citations(used_citekeys_per_chapter, citations_filename_html_per_chapter)
# Merge dictionaries
cited_dict = {**cited_dict, **cited_dict_per_chapter}
@@ -1557,13 +1454,24 @@ def main():
# create a dictionary entry containing the formatted references
formatted_references_dict[tmp_dict_key] = refs_for_bib_chapter
logging.debug(f"cited_dict now has {libeoaconvert.plural(len(cited_dict), 'entry', plural='entries')}.")
+ else:
+ raise( Exception("unknown publication type!"))
+ logging.debug( cited_dict )
tei_body = xml_tree.xpath("//t:body", namespaces=NS_MAP)[0]
if args.hyperimage:
logging.info("Transforming body with Hyperimage support")
else:
pass
- body_transformed_tmp = transform_body(tei_body, cited_dict, TRANSLATION_FILE, HI_XML_FILE, args.eoa_classic, publang=publication_language, hyperimage=args.hyperimage)
+ body_transformed_tmp = transform_body(
+ tei_body,
+ cited_dict,
+ TRANSLATION_FILE,
+ HI_XML_FILE,
+ args.eoa_classic,
+ publang=publication_language,
+ hyperimage=args.hyperimage
+ )
libeoaconvert.debug_xml_here(body_transformed_tmp, "body_transformed", DEBUG_DIR)
body_transformed = etree.ElementTree(body_transformed_tmp)
diff --git a/src/tei_add_bibl.py b/src/tei_add_bibl.py
index c0502af..98f630f 100755
--- a/src/tei_add_bibl.py
+++ b/src/tei_add_bibl.py
@@ -12,10 +12,6 @@
from os import environ
from shutil import rmtree, copytree, ignore_patterns, copy
-def main(
-):
- logging.info("hallo")
-
BASE_DIR = Path( __file__ ).parent
SCRIPT_NAME = Path( __file__).stem
@@ -29,6 +25,8 @@ def main(
DEFAULT_DEPENDENCIES_DIR = \
Path(environ['DEPENDENCIES_DIR'] if 'DEPENDENCIES_DIR' in environ else './dependencies')
+NS_MAP = {"tei" : 'http://www.tei-c.org/ns/1.0'}
+
def copy_dir(
src,
dst,
@@ -57,41 +55,52 @@ def copy_file(
**opts
)
-def info_from_tei(
- tei_file
-):
- NS_MAP = {"tei" : 'http://www.tei-c.org/ns/1.0'}
- translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
+def publication_info(xml_tree):
+ """Check TEI header for bibliography data, return relevant data as dictionary."""
- tei_tree = etree.parse(str(tei_file))
- citekeys = tei_tree.xpath(
- "/tei:TEI//tei:body//tei:bibl/tei:ref/@target",
- namespaces = NS_MAP
- )
- citekeys = [key.lstrip('#') for key in citekeys]
+ bib_file = xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@target", namespaces=NS_MAP)[0]
+ publ_type = xml_tree.xpath("//tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:ab[@type='bibdatabase']/tei:ref/@type", namespaces=NS_MAP)[0]
+ if publ_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]:
+ logging.error(f"The bibliography type {publ_type} is not allowed. Exiting")
+ exit(1)
language = tei_tree.xpath(
"/tei:TEI/tei:teiHeader/tei:profileDesc/tei:langUsage/tei:language/@ident",
namespaces = NS_MAP
)[0]
- language = translations[language]
- processing_instructions = tei_tree.xpath(
- "//processing-instruction('eoa')"
- )
- keywords = []
- for k in processing_instructions:
- as_str = str(k).lstrip('').rstrip('?>').split(" ")
- if as_str[0:2] == ["eoa", "printbibliography"]:
- if len(as_str) > 2:
- keywords += [as_str[2].strip('"').strip("'")]
- else:
- keywords += [""]
-
return {
- "citekeys": citekeys,
+ "bib_file": bib_file,
+ "publ_type": publ_type,
"language": language,
- "keywords": keywords
+
}
+def create_bibl_and_insert(
+ tei_tree,
+ temp_dir,
+ tei_bib_file,
+ tei_file,
+ tei_with_bibl_file
+):
+ translations = {"de" : "german", "en" : "english", "it" : "italian", "fr" : "french"}
+ tei_info = bib2html.get_bibl_info( tei_tree )
+ logging.debug( f"info from tei file: {tei_info}" )
+ bib2html.bib2tei(
+ bib_file = bib_file,
+ citekeys = tei_info['citekeys'],
+ keywords = tei_info['keywords'],
+ language = translations[publ_info['language']],
+ tex_template = BASE_DIR / "data/aux/bibliography4ht.tex",
+ temp_dir = temp_dir,
+ output_file = tei_bib_file,
+ log_dir = temp_dir / "log"
+ )
+ run_xslt(
+ tei_file,
+ BASE_DIR / "stylesheets/insert_bibliography.xsl",
+ params = [ f"tei_bib_file={tei_bib_file}" ],
+ output_file = tei_with_bibl_file
+ )
+
if __name__ == '__main__':
# parse args:
@@ -192,34 +201,27 @@ def info_from_tei(
copy_dir(
publ_dir,
output_dir,
- ignore = ignore_patterns( tei_file_input ) if not(tei_file_input . is_absolute()) else None
+ # ignore = ignore_patterns( tei_file_input ) if not(tei_file_input . is_absolute()) else None
)
- copy_file(
- tei_file,
- (output_dir / (tei_file.with_suffix("").name + "_orig")) . with_suffix( tei_file . suffix )
+ tei_tree = etree.parse(str(tei_file))
+ publ_info = publication_info( tei_tree )
+ logging.info( f"The bibfile is '{publ_info['bib_file']}' and this publication type is '{publ_info['publ_type']}'. Language: '{publ_info['language']}'")
+ if publ_info["publ_type"] == "monograph":
+ create_bibl_and_insert(
+ tei_tree,
+ temp_dir = output_dir / "temp/all",
+ tei_bib_file = (output_dir/ "bibliography/bibliography_all") . with_suffix(".tei"),
+ tei_file = tei_file,
+ tei_with_bibl_file = (output_dir / (tei_file.with_suffix("").name + "_with_bibl")) . with_suffix( ".xml" )
)
- tei_info = info_from_tei( tei_file )
-
- logging.debug( f"info from tei file: {tei_info}" )
-
- # language = "german"
- temp_dir = output_dir / "temp"
- tei_bib_file = (output_dir/ "bibliography") . with_suffix(".tei")
-
- bib2html.bib2tei(
- bib_file = bib_file,
- citekeys = tei_info['citekeys'],
- keywords = tei_info['keywords'],
- language = tei_info['language'],
- tex_template = BASE_DIR / "data/aux/bibliography4ht.tex",
- temp_dir = temp_dir,
- output_file = tei_bib_file,
- log_dir = log_dir
- )
-
- run_xslt(
- tei_file,
- BASE_DIR / "stylesheets/insert_bibliography.xsl",
- params = [ f"tei_bib_file={tei_bib_file}" ],
- output_file = output_dir / tei_file.name
- )
+ elif publ_info["publ_type"] == "anthology":
+ for chap_node in tei_tree.xpath("//tei:body//tei:div[@type = 'chapter']"):
+
+ chapter_id = chap_node.xpath("@xml:id", namespaces=NS_MAP)
+ create_bibl_and_insert(
+ tei_tree,
+ temp_dir = output_dir / f"temp/chap_{chapter_id}",
+ tei_bib_file = (output_dir/ f"bibliography/bibliography_chap_{chapter_id}") . with_suffix(".tei")
+ )
+ else:
+ raise( Exception("unknown publication type!"))
diff --git a/src/utils/bib2html.py b/src/utils/bib2html.py
index d936e11..7c32c6e 100755
--- a/src/utils/bib2html.py
+++ b/src/utils/bib2html.py
@@ -35,6 +35,32 @@
BIBLIOGRAPHY_CHAPTER_NO_KEYWORD = "BIBLIOGRAPHY"
BIBLIOGRAPHY_CHAPTER = "BIBLIOGRAPHY-{keyword}"
+def get_bibl_info(
+ tei_tree
+):
+
+ NS_MAP = {"tei" : 'http://www.tei-c.org/ns/1.0'}
+ citekeys = tei_tree.xpath(
+ ".//tei:bibl/tei:ref/@target",
+ namespaces = NS_MAP
+ )
+ citekeys = [key.lstrip('#') for key in citekeys]
+ processing_instructions = tei_tree.xpath(
+ ".//processing-instruction('eoa')"
+ )
+ keywords = []
+ for k in processing_instructions:
+ as_str = str(k).lstrip('').rstrip('?>').split(" ")
+ if as_str[0:2] == ["eoa", "printbibliography"]:
+ if len(as_str) > 2:
+ keywords += [as_str[2].strip('"').strip("'")]
+ else:
+ keywords += [""]
+ return {
+ "citekeys": citekeys,
+ "keywords": keywords
+ }
+
def latex_escape_non_ascii( input_str ):
output = ""
@@ -66,9 +92,9 @@ def write_dummy_latex(
tmp_dir = tmp_filename.parent
allcitekeys = ""
- allcitekeys += "\\begin{tabular}{l l l}\n"
+ allcitekeys += "\\begin{tabular}{l l l l}\n"
for (i,key) in enumerate(citekeys):
- allcitekeys += f"\\verb|{key}| &\\cite{{{key}}}&\\cite*{{{key}}}"
+ allcitekeys += f"\\verb|{key}| &\\cite{{{key}}}&\\cite*{{{key}}}&\\citefield{{{key}}}{{title}}"
if i < len(citekeys) - 1:
allcitekeys += "\\\\"
allcitekeys += "\n"