From 0c7f9d58097d066f71e76b867c5127919a6438aa Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Wed, 14 Nov 2018 16:04:04 +0100 Subject: [PATCH] Updates --- tei2imxml.py | 58 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/tei2imxml.py b/tei2imxml.py index 2ab1a31..d09a1d5 100644 --- a/tei2imxml.py +++ b/tei2imxml.py @@ -272,8 +272,10 @@ def write_citation_markdown(used_citekeys, citations_filename): def format_reference_list(used_citekeys, html_file): """Create an HTML formatted list of references""" + html_path = os.path.sep.join([TMP_DIR, html_file]) + logging.info("Opening %s", html_path) # second part of function - with open(TMP_DIR + os.path.sep + html_file, "r") as ding: + with open(html_path, "r") as ding: reference_list = soupparser.fromstring(ding, features="html.parser") references = reference_list.xpath("//div[@class='references']")[0] @@ -294,7 +296,10 @@ def format_citations(used_citekeys, bibdata, html_file): if entry_2["id"] == entry: current_citation = entry # logging.info("%s: The title %s" % (html_file, entry_2["title"])) - strTitle = entry_2["title"] + try: + strTitle = entry_2["title"] + except KeyError: + logging.error("No title found for %s", entry) title = strTitle authoryear_citation = cites.select("#citeauthoryear ~ p > span[data-cites='%s']" % entry)[0].text[1:-1] @@ -462,17 +467,9 @@ def transform_body(xml_tree, cited_data, publang): sys.exit() citation.text = formatted_citation - - # try: - # formatted_citation = cited_data[citekey][0] + pagerange - # except KeyError: - # print("Citekey %s was not found in the references. Exiting." % citekey) - # sys.exit() - citation.set("data-title", formatted_citation) citation.set("data-content", cited_data[citekey][2]) - ############# # Footnotes # ############# @@ -775,6 +772,35 @@ def update_ids(xml_tree): return xml_tree # def update_ids ends here +def get_citations_per_chapter(xml_tree): + """If publication is anthology, store which citations are mentioned in each chapter.""" + + """ + 'chap18_schwartz': {'Blodget_1857', + 'CliffordMarcus_1986', + 'Hunter_2004', + 'MarcusFischer_1986', + 'Mitchell_1992', + 'Nye_1994', + 'Schlereth_1980', + 'Schwartz_2003', + 'Schwartz_2011'}} + """ + + refs_per_chapter = {} + + all_chapters = xml_tree.xpath("//t:div[@type='chapter']", namespaces=NS_MAP) + + logging.info("Found %s chapters.", len(all_chapters)) + for chapter in all_chapters: + chapter_id = chapter.xpath("@xml:id", namespaces=NS_MAP)[0] + all_refs_with_hash = chapter.xpath("//t:bibl/t:ref/@target", namespaces=NS_MAP) + all_refs = [x[1:] for x in all_refs_with_hash] + refs_per_chapter[chapter_id] = set(all_refs) + + return refs_per_chapter +# def get_citations_per_chapter ends here + def prepare_bibliography(bib_data): """Create a JSON version of bibliography data, using pandoc-citeproc""" @@ -948,11 +974,15 @@ def main(): if bib_data["type"] == "monograph": refs_for_bib_chapter = format_reference_list(used_citekeys, citations_filename_html) elif bib_data["type"] == "anthology": + + citations_per_chapter = get_citations_per_chapter(xml_tree) + formatted_references_dict = {} all_chapter_ids = xml_tree.xpath("//t:div[@type='chapter']/@xml:id", namespaces=NS_MAP) for chapter_id in all_chapter_ids: - used_citekeys_per_chapter = data["citekeys_by_chapter"][chapter_id] + used_citekeys_per_chapter = citations_per_chapter[chapter_id] + # used_citekeys_per_chapter = data["citekeys_by_chapter"][chapter_id] citations_filename_markdown = chapter_id + ".md" citations_filename_html = "formatted_citations_" + chapter_id + ".html" @@ -991,8 +1021,10 @@ def main(): updated_xml_tree = update_ids(assigned_ids) # libeoaconvert.debug_xml_here(updated_xml_tree, "updated_tree") - nearly_final_tree = etree.ElementTree(updated_xml_tree) - xml_root = nearly_final_tree.getroot() + # nearly_final_tree = etree.ElementTree(updated_xml_tree) + # xml_root = nearly_final_tree.getroot() + + xml_root = updated_xml_tree.getroot() xml_root.tag = "Book"