From 0c7f9d58097d066f71e76b867c5127919a6438aa Mon Sep 17 00:00:00 2001
From: Klaus Thoden <kthoden@mpiwg-berlin.mpg.de>
Date: Wed, 14 Nov 2018 16:04:04 +0100
Subject: [PATCH] Updates

---
 tei2imxml.py | 58 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/tei2imxml.py b/tei2imxml.py
index 2ab1a31..d09a1d5 100644
--- a/tei2imxml.py
+++ b/tei2imxml.py
@@ -272,8 +272,10 @@ def write_citation_markdown(used_citekeys, citations_filename):
 def format_reference_list(used_citekeys, html_file):
     """Create an HTML formatted list of references"""
 
+    html_path = os.path.sep.join([TMP_DIR, html_file])
+    logging.info("Opening %s", html_path)
     # second part of function
-    with open(TMP_DIR + os.path.sep + html_file, "r") as ding:
+    with open(html_path, "r") as ding:
         reference_list = soupparser.fromstring(ding, features="html.parser")
 
     references = reference_list.xpath("//div[@class='references']")[0]
@@ -294,7 +296,10 @@ def format_citations(used_citekeys, bibdata, html_file):
             if entry_2["id"] == entry:
                 current_citation = entry
                 # logging.info("%s: The title %s" % (html_file, entry_2["title"]))
-                strTitle = entry_2["title"]
+                try:
+                    strTitle = entry_2["title"]
+                except KeyError:
+                    logging.error("No title found for %s", entry)
 
                 title = strTitle
                 authoryear_citation = cites.select("#citeauthoryear ~ p > span[data-cites='%s']" % entry)[0].text[1:-1]
@@ -462,17 +467,9 @@ def transform_body(xml_tree, cited_data, publang):
                 sys.exit()
 
         citation.text = formatted_citation
-
-        # try:
-        #     formatted_citation = cited_data[citekey][0] + pagerange
-        # except KeyError:
-        #     print("Citekey %s was not found in the references. Exiting." % citekey)
-        #     sys.exit()
-
         citation.set("data-title", formatted_citation)
         citation.set("data-content", cited_data[citekey][2])
 
-
     #############
     # Footnotes #
     #############
@@ -775,6 +772,35 @@ def update_ids(xml_tree):
     return xml_tree
 # def update_ids ends here
 
+def get_citations_per_chapter(xml_tree):
+    """If publication is anthology, store which citations are mentioned in each chapter."""
+
+    """
+  'chap18_schwartz': {'Blodget_1857',
+  'CliffordMarcus_1986',
+  'Hunter_2004',
+  'MarcusFischer_1986',
+  'Mitchell_1992',
+  'Nye_1994',
+  'Schlereth_1980',
+  'Schwartz_2003',
+  'Schwartz_2011'}}
+    """
+
+    refs_per_chapter = {}
+
+    all_chapters = xml_tree.xpath("//t:div[@type='chapter']", namespaces=NS_MAP)
+
+    logging.info("Found %s chapters.", len(all_chapters))
+    for chapter in all_chapters:
+        chapter_id = chapter.xpath("@xml:id", namespaces=NS_MAP)[0]
+        all_refs_with_hash = chapter.xpath("//t:bibl/t:ref/@target", namespaces=NS_MAP)
+        all_refs = [x[1:] for x in all_refs_with_hash]
+        refs_per_chapter[chapter_id] = set(all_refs)
+
+    return refs_per_chapter
+# def get_citations_per_chapter ends here
+
 def prepare_bibliography(bib_data):
     """Create a JSON version of bibliography data, using pandoc-citeproc"""
 
@@ -948,11 +974,15 @@ def main():
     if bib_data["type"] == "monograph":
         refs_for_bib_chapter = format_reference_list(used_citekeys, citations_filename_html)
     elif bib_data["type"] == "anthology":
+
+        citations_per_chapter = get_citations_per_chapter(xml_tree)
+
         formatted_references_dict = {}
         all_chapter_ids = xml_tree.xpath("//t:div[@type='chapter']/@xml:id", namespaces=NS_MAP)
 
         for chapter_id in all_chapter_ids:
-            used_citekeys_per_chapter = data["citekeys_by_chapter"][chapter_id]
+            used_citekeys_per_chapter = citations_per_chapter[chapter_id]
+            # used_citekeys_per_chapter = data["citekeys_by_chapter"][chapter_id]
             citations_filename_markdown = chapter_id + ".md"
             citations_filename_html = "formatted_citations_" + chapter_id + ".html"
 
@@ -991,8 +1021,10 @@ def main():
 
     updated_xml_tree = update_ids(assigned_ids)
     # libeoaconvert.debug_xml_here(updated_xml_tree, "updated_tree")
-    nearly_final_tree = etree.ElementTree(updated_xml_tree)
-    xml_root = nearly_final_tree.getroot()
+    # nearly_final_tree = etree.ElementTree(updated_xml_tree)
+    # xml_root = nearly_final_tree.getroot()
+
+    xml_root = updated_xml_tree.getroot()
 
     xml_root.tag = "Book"