Skip to content

Commit

Permalink
Generate a report of errors
Browse files Browse the repository at this point in the history
  • Loading branch information
Klaus Thoden committed Mar 5, 2018
1 parent 029293a commit 0ddbde5
Showing 1 changed file with 46 additions and 14 deletions.
60 changes: 46 additions & 14 deletions prepare_tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def convert_citations(string):
def parse_cited_range(list_of_xml_elements):
"""citedRange: split up parameters or remove element if attributes are empty"""

unsplittable_pageref = []

for reference in list_of_xml_elements:
cited_range = reference.find("t:citedRange", namespaces=NS_MAP)
from_value = (cited_range.get("from"))
Expand All @@ -123,22 +125,32 @@ def parse_cited_range(list_of_xml_elements):
cited_range.set("to", split_values[2])
else:
logging.info("Splitting the page range produced unexpected result. Tried to split %s" % from_value)
unsplittable_pageref.append(from_value)

return unsplittable_pageref
# def parse_cited_range ends here

def validate_citations(used_citekeys, bibdata):
"""Check if all found citekeys are in the database"""
"""Check if all found citekeys are in the database
Return a list of unavailable citekeys."""

available_citekeys = bibdata.keys()

no_citekey = []

for citekey in used_citekeys:
if citekey not in available_citekeys:
no_citekey.append(citekey)
logging.info("%s is not in the bibliographic database" % citekey)

return no_citekey
# def validate_citations ends here

def convert_figures(string):
"""Find figures shorthands"""

graphic_pattern = re.compile(r"\+(.*?)\+")
graphic_pattern = re.compile(r"(?<!\\)\+(.*?)\+")

# +Fig.1CarteDuCielPotsdam!Glass photographic plate from the Carte
# du Ciel survey, Potsdam Observatory, Plate 5, taken January 11,
Expand All @@ -158,6 +170,15 @@ def convert_figures(string):
def make_figure_elements(list_of_figures, figure_directory):
"""Construct the figure element."""

bad_images = []
available_images = []
available_images_long = os.listdir(figure_directory)

for img in available_images_long:
available_images.append(os.path.splitext(img)[0])

print(available_images)

for figure in list_of_figures:
parent_tag = figure.getparent()
parent_tag.tag = "figure"
Expand All @@ -167,23 +188,27 @@ def make_figure_elements(list_of_figures, figure_directory):

parts = original_string.split("!")

if len(parts) == 3:
logging.info("This figure contains hyperimage directions")
elif len(parts) == 2:
available_images = os.listdir(figure_directory)
if len(parts) in range(2,4):
if parts[0] in available_images or parts[0] in available_images_long:
selected_image = parts[0]
logging.info("Found %s in the text. Selected %s as corresponding image." % (parts[0], selected_image))
figure.set("url", "images/" + selected_image)
else:
bad_images.append(original_string)

for image in available_images:
if image.startswith(parts[0]):
selected_image = image

logging.info("Found %s in the text. Selected %s as corresponding image." % (parts[0], selected_image))

figure.set("url", "images/" + selected_image)
head_element = etree.Element("head")
parent_tag.insert(1, head_element)
head_element.text = parts[1]

if len(parts) == 3:
logging.info("This figure contains hyperimage directions")
yenda_command = etree.Comment("Hyperimage direction: %s" % parts[2])
parent_tag.append(yenda_command)

else:
logging.info("The figure string could be be split by '!': %s" % figure)
logging.info("The figure string could not be split by '!': %s" % figure)

return bad_images
# def make_figure_elements ends here

def cleanup_xml(xml_tree):
Expand Down Expand Up @@ -279,6 +304,12 @@ def fix_tei_header(xml_tree):
return xml_tree
# def fix_tei_header ends here

def evaluate_report(report):
"""Print collection of not found items"""

print(report)
# def evaluate_report ends here

def main():
"""The main bit"""

Expand Down Expand Up @@ -374,6 +405,7 @@ def main():
tree.write(output, pretty_print=True, xml_declaration=True,encoding="utf-8")
logging.info("Wrote %s." % output)

evaluate_report(report)
# def main ends here

if __name__ == '__main__':
Expand Down

0 comments on commit 0ddbde5

Please sign in to comment.