Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Citations and bibliography
  • Loading branch information
Klaus Thoden committed May 29, 2018
1 parent db5ddf5 commit dfe0b4d
Showing 1 changed file with 85 additions and 21 deletions.
106 changes: 85 additions & 21 deletions tei2imxml.py
Expand Up @@ -222,12 +222,12 @@ def render_reference(list_of_xml_elements, cited_data):
element.text = cited_data[citekey][2]
# def render_reference ends here

def write_citation_markdown(used_citekeys):
def write_citation_markdown(used_citekeys, citations_filename):
"""Write markdown file with citekeys for bibliography rendering"""

md_file_header = "---\nlang: en\ntitle: Citations\n...\n\n"

with open(TMP_DIR + os.path.sep + "used_citations.md", "w") as citation_formatter:
with open(TMP_DIR + os.path.sep + citations_filename, "w") as citation_formatter:
citation_formatter.write(md_file_header)
# citation_formatter.write("# Full parentheses\n")
citation_formatter.write("# citeauthoryear\n")
Expand All @@ -245,36 +245,41 @@ def write_citation_markdown(used_citekeys):
logging.info("Wrote citation formatter.")
# def write_citation_markdown ends here

def format_citations(used_citekeys, bibdata):
"""Return a formatted entry of the used citations"""
def format_reference_list(used_citekeys, html_file):
"""Create an HTML formatted list of references"""

with open(TMP_DIR + os.path.sep + "formatted_citations.html", "r") as ding:
cites = BeautifulSoup(ding, "html.parser")
with open(TMP_DIR + os.path.sep + "formatted_citations.html", "r") as ding:
# second part of function
with open(TMP_DIR + os.path.sep + html_file, "r") as ding:
reference_list = soupparser.fromstring(ding, features="html.parser")

# references = dd.xpath("//div[@class='references']")
# with open("tmp_files/formatted_citations.html", "r") as ding:

references = reference_list.xpath("//div[@class='references']")[0]

# full_paren_cites = cites.select("#full-parentheses ~ p > span")
# year_paren_cites = cites.select("#year-parentheses ~ p > span")
return references
# def format_reference_list ends here

def format_citations(used_citekeys, bibdata, html_file):
"""Return a formatted entry of the used citations"""

# print(used_citekeys)

with open(TMP_DIR + os.path.sep + html_file, "r") as ding:
cites = BeautifulSoup(ding, "html.parser")

citation_dict = {}

for entry in used_citekeys:
for entry_2 in bibdata:
if entry_2["id"] == entry:
current_citation = entry
# logging.info("%s: The title %s" % (html_file, entry_2["title"]))
strTitle = entry_2["title"]

title = strTitle
authoryear_citation = cites.select("#citeauthoryear ~ p > span[data-cites='%s']" % entry)[0].text
year_citation = cites.select("#citeyear ~ p > span[data-cites='%s']" % entry)[0].text
citation_dict[entry] = (authoryear_citation, year_citation, title)
title = strTitle
authoryear_citation = cites.select("#citeauthoryear ~ p > span[data-cites='%s']" % entry)[0].text[1:-1]
year_citation = cites.select("#citeyear ~ p > span[data-cites='%s']" % entry)[0].text[1:-1]
citation_dict[entry] = (authoryear_citation, year_citation, title)

return citation_dict, references
return citation_dict
# def format_citations ends here

def format_pagerange(pagerange_start, pagerange_end):
Expand Down Expand Up @@ -378,9 +383,18 @@ def transform_body(xml_tree, cited_data, authors, publang):
citation.set("data-placement", "bottom")

if len(cited_range) > 0:
pagerange_start = cited_range[0].get("from")
pagerange_end = cited_range[0].get("to")
pagerange = ", " + format_pagerange(pagerange_start, pagerange_end)
if cited_range[0].text is not None and cited_range[0].get("from") is not None:
print("You must not use 'from' attribute and text in citedRange at the same time. Exiting.")
sys.exit()
elif cited_range[0].text is not None:
# might contain markup!
pagerange = ", {}".format(cited_range[0].text)
# clear the text
cited_range[0].text = ""
elif cited_range[0].get("from") is not None:
pagerange_start = cited_range[0].get("from")
pagerange_end = cited_range[0].get("to")
pagerange = ", " + format_pagerange(pagerange_start, pagerange_end)
cited_range[0].tag = "tagtobestripped"

if cite_render == 'inline':
Expand Down Expand Up @@ -572,7 +586,57 @@ def assign_ids(xml_tree, data):
return xml_tree, data
# def assign_ids ends here

def add_bibliography(xml_tree, refs_for_bib_chapter):
def update_ids(xml_tree):
"""Update the references in EOAref to the id value assigned in assign_ids"""

xmlReferences = xml_tree.findall(".//EOAref")

for xmlReference in xmlReferences:
eoa_reference = xmlReference.find("ref")

label_text = xmlReference.find("Label").text[1:]
logging.debug("label text is %s" % label_text)

# if label_text.endswith("-hi"):
# logging.debug("%s is a hyperimage reference. Leaving out for now." % label_text)
# pass
# else:
corresponding_eoa_id_element = xml_tree.xpath("//*[@xml:id='{}']".format(label_text))
if len(corresponding_eoa_id_element) == 0:
print("There seems to be no corresponding xml:id for %s. Exiting." % label_text)
sys.exit()
elif len(corresponding_eoa_id_element) > 1:
print("The xml:id %s has been assigned more than once. This is not allowed. Exiting." % corresponding_eoa_id_element)
sys.exit()
else:
eoa_id_element = corresponding_eoa_id_element[0]

eoa_id = eoa_id_element.get("id")
eoa_reference.set("target", eoa_id)

return xml_tree
# def update_ids ends here

def prepare_bibliography(bib_data):
"""Create a JSON version of bibliography data, using pandoc-citeproc"""

# json
interim_bib_json_file = TMP_DIR + os.path.sep + "tmp-bib.json"
citeproc_command = "pandoc-citeproc --bib2json %s" % bib_data["source"]
citeproc_arguments = shlex.split(citeproc_command)
citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE)
citeproc_json = citeproc_process.stdout.read()
citations_json = json.loads(citeproc_json)

with open(interim_bib_json_file, 'w') as json_file:
json_file.write(citeproc_json.decode('utf-8'))

logging.info("Wrote json file")

return citations_json
# def prepare_bibliography ends here

def add_bibliography_monograph(xml_tree, refs_for_bib_chapter):
"""Add another chapter containing the bibliography."""

root_element = xml_tree.getroot()
Expand Down

0 comments on commit dfe0b4d

Please sign in to comment.