From b74ec826d539cd6e90c680cda6af7c1d37dda7aa Mon Sep 17 00:00:00 2001 From: kthoden Date: Wed, 5 Feb 2020 15:05:30 +0100 Subject: [PATCH] Major update, prefix and suffix formatting --- src/parsezotero.py | 73 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/src/parsezotero.py b/src/parsezotero.py index 6fafe96..e9ee899 100644 --- a/src/parsezotero.py +++ b/src/parsezotero.py @@ -53,7 +53,7 @@ CITATION_SEPARATOR = ";" TRANSLATOR_URL = "http://127.0.0.1:1969" BIBTEX_FILE = "bibliography.bib" - +BRACKET = "()" def find_citation_pis(xmltree): @@ -222,7 +222,30 @@ def get_citekey(bibtex_entry): # def get_citekey ends here -def create_citation_element(citation_dict, total_items, index_item): +def modify_preceding_text(bibl_element, prefix_text, BRACKET="[]"): + """Retrieve and modify preceding text with bibliographic prefix""" + + preceding_element = bibl_element.getprevious() + if preceding_element is None: + preceding_element = bibl_element.getparent() + print(preceding_element) + preceding_text = preceding_element.text + if preceding_text is None: + preceding_element.text = f"{prefix_text} {BRACKET[0]}" + else: + preceding_element.text = f"{preceding_text}{prefix_text} {BRACKET[0]}" + else: + preceding_text = preceding_element.tail + if preceding_text is None: + preceding_element.tail = f"{prefix_text} {BRACKET[0]}" + else: + preceding_element.tail = f"{preceding_text}{prefix_text} {BRACKET[0]}" + + return +# def modify_preceding_text ends here + + +def create_citation_element(citation_dict, total_items, index_item, print_formatted_citation): """Create an XML element with zotero data""" bibl = etree.Element("bibl") @@ -234,8 +257,12 @@ def create_citation_element(citation_dict, total_items, index_item): else: formatted_citation = citation_dict['formatted'] - bibl.text = formatted_citation + # bibl.text = formatted_citation ref = etree.Element("ref") + if print_formatted_citation: + ref.tail = formatted_citation + else: + pass bibl.insert(0, ref) # add year or authoryear, need a good heuristic here if citation_dict["formatted"] == f"({citation_dict['year']})": @@ -244,23 +271,19 @@ def create_citation_element(citation_dict, total_items, index_item): ref.set("type", "authoryear") if citation_dict["citekey"]: - ref.set("url", f"#{citation_dict['citekey']}") + ref.set("target", f"#{citation_dict['citekey']}") else: - ref.set("url", f"#{citation_dict['zotero_url']}") + ref.set("target", f"#{citation_dict['zotero_url']}") if citation_dict["pagerange"]: citedrange = etree.Element("citedRange") citedrange.text = citation_dict["pagerange"] - ref.insert(0, citedrange) + bibl.append(citedrange) - # if citation_dict["prefix"]: - # element_string = element_string.replace('', f'{citation_dict["prefix"]} ') - # if citation_dict["suffix"]: - # element_string = element_string.replace('', f' {citation_dict["suffix"]}') return bibl # def create_citation_element ends here -def citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_items, item_position): +def citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_items, item_position, print_formatted_citation): """Wrapper function for citation items. This function makes use of the Zotero translation server @@ -293,13 +316,13 @@ def citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_it citekey_list.append(citekey) write_to_bibfile(bibtex_entry) - citation_element = create_citation_element(citation_dict, number_of_items, item_position) + citation_element = create_citation_element(citation_dict, number_of_items, item_position, print_formatted_citation) - return citation_element, citation_dict['formatted'] + return citation_element, citation_dict # def citation_item_to_bibl ends here -def turn_pi_into_bibl(pi, citekey_list): +def turn_pi_into_bibl(pi, citekey_list, print_formatted_citation): """Wrapper function for the conversion steps.""" # wrap a temporary element around citations @@ -312,15 +335,29 @@ def turn_pi_into_bibl(pi, citekey_list): logging.info(f"Found {libeoaconvert.plural(number_of_items, 'item')} in this zotero citation.") for citation_item in items: item_position = items.index(citation_item) - citation_element, formatted_citation = citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_items, item_position) + citation_element, citation_dict = citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_items, item_position, print_formatted_citation) tmp_element.append(citation_element) + formatted_citation = citation_dict['formatted'] # remove formatted citation from tail pi_tail = pi.tail - tmp_element.tail = pi_tail.replace(formatted_citation, "") + + if citation_dict["prefix"]: + prefix_text = citation_dict["prefix"] + else: + prefix_text = "" + + if citation_dict["suffix"]: + suffix_text = citation_dict["suffix"] + else: + suffix_text = "" + + tmp_element.tail = pi_tail.replace(formatted_citation, f"{BRACKET[1]}{suffix_text}") # replace processing instruction with bibl elements parent_element = pi.getparent() parent_element.replace(pi, tmp_element) + + modify_preceding_text(tmp_element, prefix_text, BRACKET) # def turn_pi_into_bibl ends here @@ -360,6 +397,8 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("xmlfile", help="XML file converted from Word, containing Zotero citations.") + parser.add_argument("-f", "--format-citations", help="Print formatted citation in XML.", action="store_true") + args = parser.parse_args() xmltree = etree.parse(args.xmlfile) @@ -367,7 +406,7 @@ def main(): citekey_list = [] for pi in citation_pis: - turn_pi_into_bibl(pi, citekey_list) + turn_pi_into_bibl(pi, citekey_list, args.format_citations) cleanup_xml(xmltree) add_bib_to_header(xmltree, BIBTEX_FILE)