Skip to content

Commit

Permalink
Preserve original graphic string as CDATA
Browse files Browse the repository at this point in the history
Captions might contain xml markup
  • Loading branch information
Klaus Thoden committed Mar 5, 2018
1 parent 035a952 commit e43eded
Showing 1 changed file with 7 additions and 8 deletions.
15 changes: 7 additions & 8 deletions prepare_tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def validate_citations(used_citekeys, bibdata):
def convert_figures(string):
"""Find figures shorthands"""

# negative lookbehind assertion. Real + characters must be escaped by \
graphic_pattern = re.compile(r"(?<!\\)\+(.*?)\+")

# +Fig.1CarteDuCielPotsdam!Glass photographic plate from the Carte
Expand All @@ -162,7 +163,7 @@ def convert_figures(string):
logging.info("Found %s figures" % len(figures))

for figure in figures:
string = re.sub(graphic_pattern, r"<graphic orig_string='\g<1>'/>", string)
string = re.sub(graphic_pattern, r"<graphic><![CDATA[\g<1>]]></graphic>", string)

return string
# def convert_figures ends here
Expand All @@ -177,14 +178,12 @@ def make_figure_elements(list_of_figures, figure_directory):
for img in available_images_long:
available_images.append(os.path.splitext(img)[0])

print(available_images)

for figure in list_of_figures:
parent_tag = figure.getparent()
parent_tag.tag = "figure"

original_string = figure.get("orig_string")
figure.attrib.pop("orig_string")
original_string = figure.text
figure.clear()

parts = original_string.split("!")

Expand All @@ -196,9 +195,9 @@ def make_figure_elements(list_of_figures, figure_directory):
else:
bad_images.append(original_string)

head_element = etree.Element("head")
caption = "<head>" + parts[1] + "</head>"
head_element = etree.fromstring(caption)
parent_tag.insert(1, head_element)
head_element.text = parts[1]

if len(parts) == 3:
logging.info("This figure contains hyperimage directions")
Expand Down Expand Up @@ -316,7 +315,7 @@ def plural(num, noun):
# def plural ends here

def evaluate_report(report):
"""Print collection of not found items"""
"""Print report of conversion."""

print("="*60)
print(' '*4, "Conversion report")
Expand Down

0 comments on commit e43eded

Please sign in to comment.