Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Updates
  • Loading branch information
Klaus Thoden committed Nov 14, 2018
1 parent 7435c76 commit 0c7f9d5
Showing 1 changed file with 45 additions and 13 deletions.
58 changes: 45 additions & 13 deletions tei2imxml.py
Expand Up @@ -272,8 +272,10 @@ def write_citation_markdown(used_citekeys, citations_filename):
def format_reference_list(used_citekeys, html_file):
"""Create an HTML formatted list of references"""

html_path = os.path.sep.join([TMP_DIR, html_file])
logging.info("Opening %s", html_path)
# second part of function
with open(TMP_DIR + os.path.sep + html_file, "r") as ding:
with open(html_path, "r") as ding:
reference_list = soupparser.fromstring(ding, features="html.parser")

references = reference_list.xpath("//div[@class='references']")[0]
Expand All @@ -294,7 +296,10 @@ def format_citations(used_citekeys, bibdata, html_file):
if entry_2["id"] == entry:
current_citation = entry
# logging.info("%s: The title %s" % (html_file, entry_2["title"]))
strTitle = entry_2["title"]
try:
strTitle = entry_2["title"]
except KeyError:
logging.error("No title found for %s", entry)

title = strTitle
authoryear_citation = cites.select("#citeauthoryear ~ p > span[data-cites='%s']" % entry)[0].text[1:-1]
Expand Down Expand Up @@ -462,17 +467,9 @@ def transform_body(xml_tree, cited_data, publang):
sys.exit()

citation.text = formatted_citation

# try:
# formatted_citation = cited_data[citekey][0] + pagerange
# except KeyError:
# print("Citekey %s was not found in the references. Exiting." % citekey)
# sys.exit()

citation.set("data-title", formatted_citation)
citation.set("data-content", cited_data[citekey][2])


#############
# Footnotes #
#############
Expand Down Expand Up @@ -775,6 +772,35 @@ def update_ids(xml_tree):
return xml_tree
# def update_ids ends here

def get_citations_per_chapter(xml_tree):
"""If publication is anthology, store which citations are mentioned in each chapter."""

"""
'chap18_schwartz': {'Blodget_1857',
'CliffordMarcus_1986',
'Hunter_2004',
'MarcusFischer_1986',
'Mitchell_1992',
'Nye_1994',
'Schlereth_1980',
'Schwartz_2003',
'Schwartz_2011'}}
"""

refs_per_chapter = {}

all_chapters = xml_tree.xpath("//t:div[@type='chapter']", namespaces=NS_MAP)

logging.info("Found %s chapters.", len(all_chapters))
for chapter in all_chapters:
chapter_id = chapter.xpath("@xml:id", namespaces=NS_MAP)[0]
all_refs_with_hash = chapter.xpath("//t:bibl/t:ref/@target", namespaces=NS_MAP)
all_refs = [x[1:] for x in all_refs_with_hash]
refs_per_chapter[chapter_id] = set(all_refs)

return refs_per_chapter
# def get_citations_per_chapter ends here

def prepare_bibliography(bib_data):
"""Create a JSON version of bibliography data, using pandoc-citeproc"""

Expand Down Expand Up @@ -948,11 +974,15 @@ def main():
if bib_data["type"] == "monograph":
refs_for_bib_chapter = format_reference_list(used_citekeys, citations_filename_html)
elif bib_data["type"] == "anthology":

citations_per_chapter = get_citations_per_chapter(xml_tree)

formatted_references_dict = {}
all_chapter_ids = xml_tree.xpath("//t:div[@type='chapter']/@xml:id", namespaces=NS_MAP)

for chapter_id in all_chapter_ids:
used_citekeys_per_chapter = data["citekeys_by_chapter"][chapter_id]
used_citekeys_per_chapter = citations_per_chapter[chapter_id]
# used_citekeys_per_chapter = data["citekeys_by_chapter"][chapter_id]
citations_filename_markdown = chapter_id + ".md"
citations_filename_html = "formatted_citations_" + chapter_id + ".html"

Expand Down Expand Up @@ -991,8 +1021,10 @@ def main():

updated_xml_tree = update_ids(assigned_ids)
# libeoaconvert.debug_xml_here(updated_xml_tree, "updated_tree")
nearly_final_tree = etree.ElementTree(updated_xml_tree)
xml_root = nearly_final_tree.getroot()
# nearly_final_tree = etree.ElementTree(updated_xml_tree)
# xml_root = nearly_final_tree.getroot()

xml_root = updated_xml_tree.getroot()

xml_root.tag = "Book"

Expand Down

0 comments on commit 0c7f9d5

Please sign in to comment.