parsezotero.py

#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-

"""Using a Zotero database for publications and their word plugin,
users don't need to put in citekey shorthands into the word
manuscripts. They can use the functionality of the plugin, which also
allows to cite only a year, add page ranges and, if needed, add a
prefix or a suffix.

The TEI converter renders the Zotero data as a processing instruction
that contains JSON code. This module parses out the relevant data and
replaces the JSON with a TEI element.

One last step to overcome is the citekey. When exporting the Zotero
database to bibtex, a citekey is created (following the rules in
https://github.com/zotero/translators/blob/master/BibTeX.js, see also
https://tex.stackexchange.com/questions/398521/custom-citation-keys-in-programs-like-zotero-or-mendeley/398749).
While this is present in the exported bibtex data, it is not present
in the JSON file.

The solution is a Zotero translation server
(https://github.com/zotero/translation-server, also described in
https://forums.zotero.org/discussion/73694/is-there-a-zbib-api-that-returns-bibtex-entry-string).
This is a translation service (can be run locally) that can handle a
Zotero API JSON and is able to export into biblatex.

Although BetterBibTeX
(https://github.com/retorquere/zotero-better-bibtex) allows for the
creation of unique citekeys across the whole Zotero database, the JSON
export does not contain this citekey.

"""

__version__ = "1.0"
__date__ = "20190607"
__author__ = "kthoden@mpiwg-berlin.mpg.de"

import argparse
import logging
import json
import os
import sys
import requests
import bibtexparser
from lxml import etree
import utils.libeoaconvert as libeoaconvert

logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')

ns_tei = "http://www.tei-c.org/ns/1.0"
NS_MAP = {"t" : ns_tei}
ZOTERO_CSL_STRING = "ADDIN ZOTERO_ITEM CSL_CITATION"
CITATION_SEPARATOR = ";"
TRANSLATOR_URL = "http://127.0.0.1:1969"
BIBTEX_FILE = "bibliography.bib"
BRACKET = "()"


def find_citation_pis(xmltree):
    """Find processing instructions in XML tree"""

    biblio_pis = xmltree.xpath("//processing-instruction('biblio')")
    logging.info(f"Found {libeoaconvert.plural(len(biblio_pis), 'processing instruction')} called 'biblio'.")

    # retain only citations
    citation_pis = [x for x in biblio_pis if x.text.startswith(ZOTERO_CSL_STRING)]
    logging.info(f"Found {libeoaconvert.plural(len(citation_pis), 'zotero citation')}.")

    return citation_pis
# def find_citation_pis ends here


def parse_citation_pi(pi_text):
    """Parse the text of a Zotero processing instruction.

    It contains a string and json code. There can be more than one
    citation in citationItems:

    <?biblio ADDIN ZOTERO_ITEM CSL_CITATION {"citationID":"xcRF1gM9","properties":{"formattedCitation":"(Bulatovic et al. 2016)","plainCitation":"(Bulatovic et al. 2016)","noteIndex":0},"citationItems":[{"id":7569,"uris":["http://zotero.org/users/915539/items/GBBVGF4J"],"uri":["http://zotero.org/users/915539/items/GBBVGF4J"],"itemData":{"id":7569,"type":"report","title":"Usability von DH-Tools und -Services (DARIAH2 R 1.2.3)","publisher-place":"Göttingen","page":"Deutsch","event-place":"Göttingen","URL":"https://wiki.de.dariah.eu/download/attachments/14651583/AP1.2.3_Usability_von_DH-Tools_und-Services_final.pdf","number":"1.2.3","author":[{"family":"Bulatovic","given":"Natasa"},{"family":"Gnadt","given":"Timo"},{"family":"Romanello","given":"Matteo"},{"family":"Schmitt","given":"Viola"},{"family":"Stiller","given":"Juliane"},{"family":"Thoden","given":"Klaus"}],"issued":{"date-parts":[["2016"]]}}}],"schema":"https://github.com/citation-style-language/schema/raw/master/csl-citation.json"}?>(Bulatovic et al. 2016) und
    """

    json_part = pi_text.replace(ZOTERO_CSL_STRING, "")

    return json_part.strip()
# def parse_citation_pi ends here


def parse_json(citation_json):
    """Parse string into JSON object"""

    json_object = json.loads(citation_json)

    return json_object
# def parse_json ends here


def get_info_from_json(parsed_json):
    """Extract the relevant parts from JSON object."""

    citation_dict = {}

    properties = parsed_json.get("properties")
    items = parsed_json.get("citationItems")[0]
    itemdata = items.get('itemData')
    issued = itemdata.get('issued')
    try:
        year = issued['date-parts'][0][0]
    except KeyError:
        year = issued['literal']

    citation_dict["formatted"] = properties.get("plainCitation")
    citation_dict["pagerange"] = items.get("locator")
    citation_dict["prefix"] = items.get("prefix")
    citation_dict["suffix"] = items.get("suffix")
    citation_dict["zotero_url"] = items.get("uri")[0]
    citation_dict["year"] = year

    return citation_dict
# def get_info_from_json ends here


def convert_to_csl_json(items_dict):
    """Convert CSL JSON to Zotero API JSON.

    JSON data from a citation item in the processing instruction needs
    to modified to the CSL JSON format. Currently, this JSON is
    written to a temporary file

    The data consists of the itemData block, with the ID being the
    URI:

    [{"id": "http://zotero.org/users/915539/items/4W8TZXCQ", "type": "paper-conference", "title": "Discussion on Radiation", "container-title": "Report of the Eighty-Third Meeting of the British Association for the Advancement of Science. Birmingham: 1913, September 10-17", "publisher": "John Murray", "publisher-place": "London", "page": "376–386", "event-place": "London", "author": [ { "literal": "Anonymous" } ], "issued": { "date-parts": [ [ "1914" ] ] }}]

    """

    zotero_id = items_dict.get("uri")[0]
    itemdata = items_dict.get('itemData')
    issued = itemdata.get('issued')

    itemdata["id"] = zotero_id

    file_id = zotero_id.split("/")[-1]

    tmp_filename = f"{file_id}.json"

    # write itemdata to new file
    with open(tmp_filename, "w") as data_file:
        itemtojson = itemdata
        json.dump(itemtojson, data_file, indent=2)

    logging.debug(f"Wrote {tmp_filename}.")

    return f"{file_id}.json"
# def convert_to_csl_json ends here


def import_csl_json(csl_json):
    """Convert CSL JSON to Zotero API JSON

    The format looks like this:

    [ { "key": "PXKZK2WF", "version": 0, "itemType": "conferencePaper", "creators": [ { "name": "Anonymous", "creatorType": "author" } ], "tags": [], "title": "Discussion on Radiation", "proceedingsTitle": "Report of the Eighty-Third Meeting of the British Association for the Advancement of Science. Birmingham: 1913, September 10-17", "publisher": "John Murray", "place": "London", "pages": "376–386", "date": "1914" }]
    """

    # curl --data-binary @cslstylefile.json -H 'Content-Type: text/plain' 'http://127.0.0.1:1969/import'

    headers = { 'Content-Type': 'text/plain', }
    data = open(csl_json, 'rb').read()
    logging.debug(f"Trying to communicate with {TRANSLATOR_URL}")

    try:
        response = requests.post(f'{TRANSLATOR_URL}/import', headers=headers, data=data)
    except:
        logging.error(f"No connection possible to {TRANSLATOR_URL}. Maybe the translation service is down? Exiting.")
        sys.exit(0)

    return response.content
# def import_csl_json ends here


def create_bibtex(zotero_api_json):
    """Get bibtex entry from Zotero translation server"""

    # translation made by https://curl.trillworks.com/
    # source: curl -d @items.json -H 'Content-Type: application/json' 'http://127.0.0.1:1969/export?format=bibtex'

    headers = { 'Content-Type': 'application/json', }
    params = ( ('format', 'biblatex'), )
    data = zotero_api_json
    try:
        response = requests.post(f'{TRANSLATOR_URL}/export', headers=headers, params=params, data=data)
    except:
        logging.error(f"No connection possible to {TRANSLATOR_URL}. Maybe the translation service is down? Exiting.")
        sys.exit(0)

    bibtex = response.content.decode('utf-8')

    return bibtex
# def create_bibtex ends here


def write_to_bibfile(bibtex_entry):
    """Append entry to bibfile"""

    with open(BIBTEX_FILE, "a") as bibfile:
        bibfile.write(bibtex_entry)
# def write_to_bibfile ends here


def get_citekey(bibtex_entry):
    """Parse bibtex entry for citekey"""

    parser = bibtexparser.bparser.BibTexParser()
    # be a bit lax about nonstandard entry types
    parser.ignore_nonstandard_types = False

    parsed_entry = bibtexparser.loads(bibtex_entry, parser=parser)

    citekey = parsed_entry.entries[0]["ID"]

    return citekey
# def get_citekey ends here


def modify_preceding_text(bibl_element, prefix_text, BRACKET="[]"):
    """Retrieve and modify preceding text with bibliographic prefix"""

    preceding_element = bibl_element.getprevious()
    if preceding_element is None:
        preceding_element = bibl_element.getparent()
        print(preceding_element)
        preceding_text = preceding_element.text
        if preceding_text is None:
            preceding_element.text = f"{prefix_text} {BRACKET[0]}"
        else:
            preceding_element.text = f"{preceding_text}{prefix_text} {BRACKET[0]}"
    else:
        preceding_text = preceding_element.tail
        if preceding_text is None:
            preceding_element.tail = f"{prefix_text} {BRACKET[0]}"
        else:
            preceding_element.tail = f"{preceding_text}{prefix_text} {BRACKET[0]}"

    return
# def modify_preceding_text ends here


def create_citation_element(citation_dict, total_items, index_item, print_formatted_citation):
    """Create an XML element with zotero data"""

    bibl = etree.Element("bibl")

    if total_items > 1:
        formatted_citation = citation_dict['formatted'].split(CITATION_SEPARATOR)[index_item]
        if not index_item + 1 == total_items:
            bibl.tail = CITATION_SEPARATOR
    else:
        formatted_citation = citation_dict['formatted']

    # bibl.text = formatted_citation
    ref = etree.Element("ref")
    if print_formatted_citation:
        ref.tail = formatted_citation
    else:
        pass
    bibl.insert(0, ref)
    # add year or authoryear, need a good heuristic here
    if citation_dict["formatted"] == f"({citation_dict['year']})":
        ref.set("type", "year")
    else:
        ref.set("type", "authoryear")

    if citation_dict["citekey"]:
        ref.set("target", f"#{citation_dict['citekey']}")
    else:
        ref.set("target", f"#{citation_dict['zotero_url']}")
    if citation_dict["pagerange"]:
        citedrange = etree.Element("citedRange")
        citedrange.text = citation_dict["pagerange"]
        bibl.append(citedrange)

    return bibl
# def create_citation_element ends here


def citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_items, item_position, print_formatted_citation):
    """Wrapper function for citation items.

    This function makes use of the Zotero translation server
    (https://github.com/zotero/translation-server) for format
    conversion.

    The JSON found in the each citationItem of the
    processing instruction is
    - modified to CSL JSON format
    - translated into Zotero API JSON format
    - exported to BibTeX

    An tei:bibl element is written that will replace the processing instruction in the XML.
    """

    citation_dict = get_info_from_json(parsed_json)
    citation_id = parsed_json.get("citationID")
    citation_dict["id"] = citation_id

    # convert csl json to zotero api json
    zotero_api_json_filename = convert_to_csl_json(citation_item)
    # first call to translation server
    zotero_api_json = import_csl_json(zotero_api_json_filename)
    os.unlink(zotero_api_json_filename)
    # second call to translation server
    bibtex_entry = create_bibtex(zotero_api_json)
    citekey = get_citekey(bibtex_entry)
    citation_dict["citekey"] = citekey
    if citekey not in citekey_list:
        citekey_list.append(citekey)
        write_to_bibfile(bibtex_entry)

    citation_element = create_citation_element(citation_dict, number_of_items, item_position, print_formatted_citation)

    return citation_element, citation_dict
# def citation_item_to_bibl ends here


def turn_pi_into_bibl(pi, citekey_list, print_formatted_citation):
    """Wrapper function for the conversion steps."""

    # wrap a temporary element around citations
    tmp_element = etree.Element("tmp_bib")
    pi_json = parse_citation_pi(pi.text)
    parsed_json = parse_json(pi_json)
    # there can be more than one citation in one processing instruction
    items = parsed_json.get("citationItems")
    number_of_items = len(items)
    logging.info(f"Found {libeoaconvert.plural(number_of_items, 'item')} in this zotero citation.")
    for citation_item in items:
        item_position = items.index(citation_item)
        citation_element, citation_dict = citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_items, item_position, print_formatted_citation)
        tmp_element.append(citation_element)

    formatted_citation = citation_dict['formatted']
    # remove formatted citation from tail
    pi_tail = pi.tail

    if citation_dict["prefix"]:
        prefix_text = citation_dict["prefix"]
    else:
        prefix_text = ""

    if citation_dict["suffix"]:
        suffix_text = citation_dict["suffix"]
    else:
        suffix_text = ""

    tmp_element.tail = pi_tail.replace(formatted_citation, f"{BRACKET[1]}{suffix_text}")
    # replace processing instruction with bibl elements
    parent_element = pi.getparent()
    parent_element.replace(pi, tmp_element)

    modify_preceding_text(tmp_element, prefix_text, BRACKET)
# def turn_pi_into_bibl ends here


def add_bib_to_header(xmltree, BIBTEX_FILE):
    """Add a reference to bibfile to header"""

    sourcedesc = xmltree.xpath("/t:TEI/t:teiHeader/t:fileDesc/t:sourceDesc", namespaces=NS_MAP)[0]
    ab_element = etree.Element("ab", type="database")
    ref = etree.SubElement(ab_element, "ref")
    ref.set("target", BIBTEX_FILE)
    ref.set("type", "please-specify-anthology-or-monograph")

    sourcedesc.append(ab_element)
# def add_bib_to_header ends here


def cleanup_xml(xmltree):
    """Perform some cleanups"""

    etree.strip_tags(xmltree, "tmp_bib")
    literaturverzeichnis = xmltree.xpath("//*[@rend='Literaturverzeichnis1']")
    for element in literaturverzeichnis:
        element.attrib.pop("rend")
# def cleanup_xml ends here


def write_xml_output(tree, filename):
    """Write modified tree to file"""

    tree.write(filename, pretty_print=True, xml_declaration=True, encoding="utf-8")
    logging.info(f"Wrote {filename}.")
# def write_xml_output ends here


def main():
    """The main bit"""

    parser = argparse.ArgumentParser()
    parser.add_argument("xmlfile", help="XML file converted from Word, containing Zotero citations.")
    parser.add_argument("-f", "--format-citations", help="Print formatted citation in XML.", action="store_true")

    args = parser.parse_args()

    xmltree = etree.parse(args.xmlfile)
    citation_pis = find_citation_pis(xmltree)
    citekey_list = []

    for pi in citation_pis:
        turn_pi_into_bibl(pi, citekey_list, args.format_citations)

    cleanup_xml(xmltree)
    add_bib_to_header(xmltree, BIBTEX_FILE)
    write_xml_output(xmltree, args.xmlfile.replace(".xml", "-biblrefs.xml"))
# def main ends here


if __name__ == '__main__':
    main()
# finis

# Noch machen
# prefix and suffix
	#!/usr/bin/env python3
	# -- coding: utf-8; mode: python --

	"""Using a Zotero database for publications and their word plugin,
	users don't need to put in citekey shorthands into the word
	manuscripts. They can use the functionality of the plugin, which also
	allows to cite only a year, add page ranges and, if needed, add a
	prefix or a suffix.

	The TEI converter renders the Zotero data as a processing instruction
	that contains JSON code. This module parses out the relevant data and
	replaces the JSON with a TEI element.

	One last step to overcome is the citekey. When exporting the Zotero
	database to bibtex, a citekey is created (following the rules in
	https://github.com/zotero/translators/blob/master/BibTeX.js, see also
	https://tex.stackexchange.com/questions/398521/custom-citation-keys-in-programs-like-zotero-or-mendeley/398749).
	While this is present in the exported bibtex data, it is not present
	in the JSON file.

	The solution is a Zotero translation server
	(https://github.com/zotero/translation-server, also described in
	https://forums.zotero.org/discussion/73694/is-there-a-zbib-api-that-returns-bibtex-entry-string).
	This is a translation service (can be run locally) that can handle a
	Zotero API JSON and is able to export into biblatex.

	Although BetterBibTeX
	(https://github.com/retorquere/zotero-better-bibtex) allows for the
	creation of unique citekeys across the whole Zotero database, the JSON
	export does not contain this citekey.

	"""

	__version__ = "1.0"
	__date__ = "20190607"
	__author__ = "kthoden@mpiwg-berlin.mpg.de"

	import argparse
	import logging
	import json
	import os
	import sys
	import requests
	import bibtexparser
	from lxml import etree
	import utils.libeoaconvert as libeoaconvert

	logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')

	ns_tei = "http://www.tei-c.org/ns/1.0"
	NS_MAP = {"t" : ns_tei}
	ZOTERO_CSL_STRING = "ADDIN ZOTERO_ITEM CSL_CITATION"
	CITATION_SEPARATOR = ";"
	TRANSLATOR_URL = "http://127.0.0.1:1969"
	BIBTEX_FILE = "bibliography.bib"
	BRACKET = "()"


	def find_citation_pis(xmltree):
	"""Find processing instructions in XML tree"""

	biblio_pis = xmltree.xpath("//processing-instruction('biblio')")
	logging.info(f"Found {libeoaconvert.plural(len(biblio_pis), 'processing instruction')} called 'biblio'.")

	# retain only citations
	citation_pis = [x for x in biblio_pis if x.text.startswith(ZOTERO_CSL_STRING)]
	logging.info(f"Found {libeoaconvert.plural(len(citation_pis), 'zotero citation')}.")

	return citation_pis
	# def find_citation_pis ends here


	def parse_citation_pi(pi_text):
	"""Parse the text of a Zotero processing instruction.

	It contains a string and json code. There can be more than one
	citation in citationItems:

	<?biblio ADDIN ZOTERO_ITEM CSL_CITATION {"citationID":"xcRF1gM9","properties":{"formattedCitation":"(Bulatovic et al. 2016)","plainCitation":"(Bulatovic et al. 2016)","noteIndex":0},"citationItems":[{"id":7569,"uris":["http://zotero.org/users/915539/items/GBBVGF4J"],"uri":["http://zotero.org/users/915539/items/GBBVGF4J"],"itemData":{"id":7569,"type":"report","title":"Usability von DH-Tools und -Services (DARIAH2 R 1.2.3)","publisher-place":"Göttingen","page":"Deutsch","event-place":"Göttingen","URL":"https://wiki.de.dariah.eu/download/attachments/14651583/AP1.2.3_Usability_von_DH-Tools_und-Services_final.pdf","number":"1.2.3","author":[{"family":"Bulatovic","given":"Natasa"},{"family":"Gnadt","given":"Timo"},{"family":"Romanello","given":"Matteo"},{"family":"Schmitt","given":"Viola"},{"family":"Stiller","given":"Juliane"},{"family":"Thoden","given":"Klaus"}],"issued":{"date-parts":[["2016"]]}}}],"schema":"https://github.com/citation-style-language/schema/raw/master/csl-citation.json"}?>(Bulatovic et al. 2016) und
	"""

	json_part = pi_text.replace(ZOTERO_CSL_STRING, "")

	return json_part.strip()
	# def parse_citation_pi ends here


	def parse_json(citation_json):
	"""Parse string into JSON object"""

	json_object = json.loads(citation_json)

	return json_object
	# def parse_json ends here


	def get_info_from_json(parsed_json):
	"""Extract the relevant parts from JSON object."""

	citation_dict = {}

	properties = parsed_json.get("properties")
	items = parsed_json.get("citationItems")[0]
	itemdata = items.get('itemData')
	issued = itemdata.get('issued')
	try:
	year = issued['date-parts'][0][0]
	except KeyError:
	year = issued['literal']

	citation_dict["formatted"] = properties.get("plainCitation")
	citation_dict["pagerange"] = items.get("locator")
	citation_dict["prefix"] = items.get("prefix")
	citation_dict["suffix"] = items.get("suffix")
	citation_dict["zotero_url"] = items.get("uri")[0]
	citation_dict["year"] = year

	return citation_dict
	# def get_info_from_json ends here


	def convert_to_csl_json(items_dict):
	"""Convert CSL JSON to Zotero API JSON.

	JSON data from a citation item in the processing instruction needs
	to modified to the CSL JSON format. Currently, this JSON is
	written to a temporary file

	The data consists of the itemData block, with the ID being the
	URI:

	[{"id": "http://zotero.org/users/915539/items/4W8TZXCQ", "type": "paper-conference", "title": "Discussion on Radiation", "container-title": "Report of the Eighty-Third Meeting of the British Association for the Advancement of Science. Birmingham: 1913, September 10-17", "publisher": "John Murray", "publisher-place": "London", "page": "376–386", "event-place": "London", "author": [ { "literal": "Anonymous" } ], "issued": { "date-parts": [ [ "1914" ] ] }}]

	"""

	zotero_id = items_dict.get("uri")[0]
	itemdata = items_dict.get('itemData')
	issued = itemdata.get('issued')

	itemdata["id"] = zotero_id

	file_id = zotero_id.split("/")[-1]

	tmp_filename = f"{file_id}.json"

	# write itemdata to new file
	with open(tmp_filename, "w") as data_file:
	itemtojson = itemdata
	json.dump(itemtojson, data_file, indent=2)

	logging.debug(f"Wrote {tmp_filename}.")

	return f"{file_id}.json"
	# def convert_to_csl_json ends here


	def import_csl_json(csl_json):
	"""Convert CSL JSON to Zotero API JSON

	The format looks like this:

	[ { "key": "PXKZK2WF", "version": 0, "itemType": "conferencePaper", "creators": [ { "name": "Anonymous", "creatorType": "author" } ], "tags": [], "title": "Discussion on Radiation", "proceedingsTitle": "Report of the Eighty-Third Meeting of the British Association for the Advancement of Science. Birmingham: 1913, September 10-17", "publisher": "John Murray", "place": "London", "pages": "376–386", "date": "1914" }]
	"""

	# curl --data-binary @cslstylefile.json -H 'Content-Type: text/plain' 'http://127.0.0.1:1969/import'

	headers = { 'Content-Type': 'text/plain', }
	data = open(csl_json, 'rb').read()
	logging.debug(f"Trying to communicate with {TRANSLATOR_URL}")

	try:
	response = requests.post(f'{TRANSLATOR_URL}/import', headers=headers, data=data)
	except:
	logging.error(f"No connection possible to {TRANSLATOR_URL}. Maybe the translation service is down? Exiting.")
	sys.exit(0)

	return response.content
	# def import_csl_json ends here


	def create_bibtex(zotero_api_json):
	"""Get bibtex entry from Zotero translation server"""

	# translation made by https://curl.trillworks.com/
	# source: curl -d @items.json -H 'Content-Type: application/json' 'http://127.0.0.1:1969/export?format=bibtex'

	headers = { 'Content-Type': 'application/json', }
	params = ( ('format', 'biblatex'), )
	data = zotero_api_json
	try:
	response = requests.post(f'{TRANSLATOR_URL}/export', headers=headers, params=params, data=data)
	except:
	logging.error(f"No connection possible to {TRANSLATOR_URL}. Maybe the translation service is down? Exiting.")
	sys.exit(0)

	bibtex = response.content.decode('utf-8')

	return bibtex
	# def create_bibtex ends here


	def write_to_bibfile(bibtex_entry):
	"""Append entry to bibfile"""

	with open(BIBTEX_FILE, "a") as bibfile:
	bibfile.write(bibtex_entry)
	# def write_to_bibfile ends here


	def get_citekey(bibtex_entry):
	"""Parse bibtex entry for citekey"""

	parser = bibtexparser.bparser.BibTexParser()
	# be a bit lax about nonstandard entry types
	parser.ignore_nonstandard_types = False

	parsed_entry = bibtexparser.loads(bibtex_entry, parser=parser)

	citekey = parsed_entry.entries[0]["ID"]

	return citekey
	# def get_citekey ends here


	def modify_preceding_text(bibl_element, prefix_text, BRACKET="[]"):
	"""Retrieve and modify preceding text with bibliographic prefix"""

	preceding_element = bibl_element.getprevious()
	if preceding_element is None:
	preceding_element = bibl_element.getparent()
	print(preceding_element)
	preceding_text = preceding_element.text
	if preceding_text is None:
	preceding_element.text = f"{prefix_text} {BRACKET[0]}"
	else:
	preceding_element.text = f"{preceding_text}{prefix_text} {BRACKET[0]}"
	else:
	preceding_text = preceding_element.tail
	if preceding_text is None:
	preceding_element.tail = f"{prefix_text} {BRACKET[0]}"
	else:
	preceding_element.tail = f"{preceding_text}{prefix_text} {BRACKET[0]}"

	return
	# def modify_preceding_text ends here


	def create_citation_element(citation_dict, total_items, index_item, print_formatted_citation):
	"""Create an XML element with zotero data"""

	bibl = etree.Element("bibl")

	if total_items > 1:
	formatted_citation = citation_dict['formatted'].split(CITATION_SEPARATOR)[index_item]
	if not index_item + 1 == total_items:
	bibl.tail = CITATION_SEPARATOR
	else:
	formatted_citation = citation_dict['formatted']

	# bibl.text = formatted_citation
	ref = etree.Element("ref")
	if print_formatted_citation:
	ref.tail = formatted_citation
	else:
	pass
	bibl.insert(0, ref)
	# add year or authoryear, need a good heuristic here
	if citation_dict["formatted"] == f"({citation_dict['year']})":
	ref.set("type", "year")
	else:
	ref.set("type", "authoryear")

	if citation_dict["citekey"]:
	ref.set("target", f"#{citation_dict['citekey']}")
	else:
	ref.set("target", f"#{citation_dict['zotero_url']}")
	if citation_dict["pagerange"]:
	citedrange = etree.Element("citedRange")
	citedrange.text = citation_dict["pagerange"]
	bibl.append(citedrange)

	return bibl
	# def create_citation_element ends here


	def citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_items, item_position, print_formatted_citation):
	"""Wrapper function for citation items.

	This function makes use of the Zotero translation server
	(https://github.com/zotero/translation-server) for format
	conversion.

	The JSON found in the each citationItem of the
	processing instruction is
	- modified to CSL JSON format
	- translated into Zotero API JSON format
	- exported to BibTeX

	An tei:bibl element is written that will replace the processing instruction in the XML.
	"""

	citation_dict = get_info_from_json(parsed_json)
	citation_id = parsed_json.get("citationID")
	citation_dict["id"] = citation_id

	# convert csl json to zotero api json
	zotero_api_json_filename = convert_to_csl_json(citation_item)
	# first call to translation server
	zotero_api_json = import_csl_json(zotero_api_json_filename)
	os.unlink(zotero_api_json_filename)
	# second call to translation server
	bibtex_entry = create_bibtex(zotero_api_json)
	citekey = get_citekey(bibtex_entry)
	citation_dict["citekey"] = citekey
	if citekey not in citekey_list:
	citekey_list.append(citekey)
	write_to_bibfile(bibtex_entry)

	citation_element = create_citation_element(citation_dict, number_of_items, item_position, print_formatted_citation)

	return citation_element, citation_dict
	# def citation_item_to_bibl ends here


	def turn_pi_into_bibl(pi, citekey_list, print_formatted_citation):
	"""Wrapper function for the conversion steps."""

	# wrap a temporary element around citations
	tmp_element = etree.Element("tmp_bib")
	pi_json = parse_citation_pi(pi.text)
	parsed_json = parse_json(pi_json)
	# there can be more than one citation in one processing instruction
	items = parsed_json.get("citationItems")
	number_of_items = len(items)
	logging.info(f"Found {libeoaconvert.plural(number_of_items, 'item')} in this zotero citation.")
	for citation_item in items:
	item_position = items.index(citation_item)
	citation_element, citation_dict = citation_item_to_bibl(citation_item, parsed_json, citekey_list, number_of_items, item_position, print_formatted_citation)
	tmp_element.append(citation_element)

	formatted_citation = citation_dict['formatted']
	# remove formatted citation from tail
	pi_tail = pi.tail

	if citation_dict["prefix"]:
	prefix_text = citation_dict["prefix"]
	else:
	prefix_text = ""

	if citation_dict["suffix"]:
	suffix_text = citation_dict["suffix"]
	else:
	suffix_text = ""

	tmp_element.tail = pi_tail.replace(formatted_citation, f"{BRACKET[1]}{suffix_text}")
	# replace processing instruction with bibl elements
	parent_element = pi.getparent()
	parent_element.replace(pi, tmp_element)

	modify_preceding_text(tmp_element, prefix_text, BRACKET)
	# def turn_pi_into_bibl ends here


	def add_bib_to_header(xmltree, BIBTEX_FILE):
	"""Add a reference to bibfile to header"""

	sourcedesc = xmltree.xpath("/t:TEI/t:teiHeader/t:fileDesc/t:sourceDesc", namespaces=NS_MAP)[0]
	ab_element = etree.Element("ab", type="database")
	ref = etree.SubElement(ab_element, "ref")
	ref.set("target", BIBTEX_FILE)
	ref.set("type", "please-specify-anthology-or-monograph")

	sourcedesc.append(ab_element)
	# def add_bib_to_header ends here


	def cleanup_xml(xmltree):
	"""Perform some cleanups"""

	etree.strip_tags(xmltree, "tmp_bib")
	literaturverzeichnis = xmltree.xpath("//*[@rend='Literaturverzeichnis1']")
	for element in literaturverzeichnis:
	element.attrib.pop("rend")
	# def cleanup_xml ends here


	def write_xml_output(tree, filename):
	"""Write modified tree to file"""

	tree.write(filename, pretty_print=True, xml_declaration=True, encoding="utf-8")
	logging.info(f"Wrote {filename}.")
	# def write_xml_output ends here


	def main():
	"""The main bit"""

	parser = argparse.ArgumentParser()
	parser.add_argument("xmlfile", help="XML file converted from Word, containing Zotero citations.")
	parser.add_argument("-f", "--format-citations", help="Print formatted citation in XML.", action="store_true")

	args = parser.parse_args()

	xmltree = etree.parse(args.xmlfile)
	citation_pis = find_citation_pis(xmltree)
	citekey_list = []

	for pi in citation_pis:
	turn_pi_into_bibl(pi, citekey_list, args.format_citations)

	cleanup_xml(xmltree)
	add_bib_to_header(xmltree, BIBTEX_FILE)
	write_xml_output(xmltree, args.xmlfile.replace(".xml", "-biblrefs.xml"))
	# def main ends here


	if __name__ == '__main__':
	main()
	# finis

	# Noch machen
	# prefix and suffix