From 0ddbde5403c9acc700ed1ded0b487e512472d75f Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Mon, 5 Mar 2018 16:22:47 +0100 Subject: [PATCH] Generate a report of errors --- prepare_tei.py | 60 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/prepare_tei.py b/prepare_tei.py index 55b9024..170ee1f 100644 --- a/prepare_tei.py +++ b/prepare_tei.py @@ -105,6 +105,8 @@ def convert_citations(string): def parse_cited_range(list_of_xml_elements): """citedRange: split up parameters or remove element if attributes are empty""" + unsplittable_pageref = [] + for reference in list_of_xml_elements: cited_range = reference.find("t:citedRange", namespaces=NS_MAP) from_value = (cited_range.get("from")) @@ -123,22 +125,32 @@ def parse_cited_range(list_of_xml_elements): cited_range.set("to", split_values[2]) else: logging.info("Splitting the page range produced unexpected result. Tried to split %s" % from_value) + unsplittable_pageref.append(from_value) + + return unsplittable_pageref # def parse_cited_range ends here def validate_citations(used_citekeys, bibdata): - """Check if all found citekeys are in the database""" + """Check if all found citekeys are in the database + + Return a list of unavailable citekeys.""" available_citekeys = bibdata.keys() + no_citekey = [] + for citekey in used_citekeys: if citekey not in available_citekeys: + no_citekey.append(citekey) logging.info("%s is not in the bibliographic database" % citekey) + + return no_citekey # def validate_citations ends here def convert_figures(string): """Find figures shorthands""" - graphic_pattern = re.compile(r"\+(.*?)\+") + graphic_pattern = re.compile(r"(?