diff --git a/tei2imxml.py b/tei2imxml.py index 4000bf9..e7a7bf6 100644 --- a/tei2imxml.py +++ b/tei2imxml.py @@ -350,15 +350,17 @@ def transform_body(xml_tree, cited_data, publang): chapter_title = chapter.find("t:head", namespaces=NS_MAP) author_ids = chapter.get("resp") - list_author_id = author_ids.split(" ") - - if len(list_author_id) > 0: - author_string = format_authors(list_author_id, publang) - # print(author_string) - eoa_author = etree.Element("EOAauthor") - eoa_author.text = author_string - chapter_title.insert(0, eoa_author) - + if author_ids is not None: + list_author_id = author_ids.split(" ") + + if len(list_author_id) > 0: + author_string = format_authors(list_author_id, publang) + # print(author_string) + eoa_author = etree.Element("EOAauthor") + eoa_author.text = author_string + chapter_title.insert(0, eoa_author) + else: + logging.info("No chapter author.") chapter.insert(0, chapter_title) eoa_sections = xml_tree.xpath("//t:div[@type='section']", namespaces=NS_MAP) @@ -510,24 +512,32 @@ def transform_body(xml_tree, cited_data, publang): eoa_figures = xml_tree.xpath("//t:figure", namespaces=NS_MAP) for figure in eoa_figures: - figure.tag = "EOAfigure" - figure.set("id", "anotheruid") - - anchor_element = etree.SubElement(figure, "anchor") - # anchor_element.set("id-text", "id-text") - + # careful, caption can contain markup! + caption_element = figure.find("t:head", namespaces=NS_MAP) figure_type = figure.get("type") if figure_type == "hionly": pass else: - # careful, caption can contain markup! - caption_element = figure.xpath("t:head", namespaces=NS_MAP)[0] - caption_element.tag = "caption" + if caption_element is not None: + figure.tag = "EOAfigure" + figure.set("id", "anotheruid") + + anchor_element = etree.SubElement(figure, "anchor") + # anchor_element.set("id-text", "id-text") - fig_p_element = etree.SubElement(figure, "p") - figure_file = etree.SubElement(fig_p_element, "file").text = figure.xpath("t:graphic/@url", namespaces=NS_MAP)[0] - figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever - fig_p_element.append(caption_element) + caption_element.tag = "caption" + + fig_p_element = etree.SubElement(figure, "p") + figure_file = etree.SubElement(fig_p_element, "file").text = figure.xpath("t:graphic/@url", namespaces=NS_MAP)[0] + figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever + fig_p_element.append(caption_element) + else: + figure.tag = "EOAfigurenonumber" + fig_p_element = etree.SubElement(figure, "p") + figure_file = etree.SubElement(fig_p_element, "file").text = figure.xpath("t:graphic/@url", namespaces=NS_MAP)[0] + figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever + + #

images/1.jpg33

etree.strip_elements(figure, "{%s}graphic" % ns_tei) @@ -550,6 +560,80 @@ def transform_body(xml_tree, cited_data, publang): else: logging.info("The rend attribute in hi has the value %s. This is not supported" % rend_attribute) + ########## + # Tables # + ########## + eoa_tables = xml_tree.xpath("//t:table", namespaces=NS_MAP) + +# turn + # + # This is a table + # + # Heading 1 + # Heading 2 + # Heading 3 + # Heading 4 + # + # + # Here + # you + # may + # find + # + # + # some + # data + # spread + # over + # + # + # the + # table + # in + # cells + # + #
+ +# into + +# +# sec3table1 +# This is a table +# L2.3cmL2.3cmL2.3cmL2.3cm +# +# +# TRUEHeading 1 +# Heading 2 +# Heading 3 +# Heading 4 +# +# +# Here +# you +# may +# find +# +# +# some +# data +# spread +# over +# +# +# the +# table +# in +# cells +# +#
+#
+ + for table in eoa_tables: + table.tag = "EOAtable" + table_id = (table.get("{http://www.w3.org/XML/1998/namespace}id")) + + table_label = etree.SubElement(table, "EOAtablelabel").text = table_id + ############## # References # ############## @@ -662,7 +746,7 @@ def update_ids(xml_tree): """Update the references in EOAref to the id value assigned in assign_ids""" xmlReferences = xml_tree.findall(".//EOAref") - + logging.debug("Found %d references", len(xmlReferences)) for xmlReference in xmlReferences: eoa_reference = xmlReference.find("ref") @@ -674,7 +758,9 @@ def update_ids(xml_tree): # pass # else: corresponding_eoa_id_element = xml_tree.xpath("//*[@xml:id='{}']".format(label_text)) - if len(corresponding_eoa_id_element) == 0: + logging.debug("The corresponding id element is %s", corresponding_eoa_id_element) + # if len(corresponding_eoa_id_element) == 0: + if corresponding_eoa_id_element is None: print("There seems to be no corresponding xml:id for %s. Exiting." % label_text) sys.exit() elif len(corresponding_eoa_id_element) > 1: @@ -815,8 +901,12 @@ def fix_bib_entries(div_snippet): if not os.path.exists(TMP_DIR): os.mkdir(os.path.expanduser(TMP_DIR)) - with open(TMP_DIR + os.path.sep + 'data.pickle', 'rb') as f: - data = pickle.load(f) + try: + with open(TMP_DIR + os.path.sep + 'data.pickle', 'rb') as f: + data = pickle.load(f) + except FileNotFoundError: + print("File 'data.pickle' not found. You should run 'fix_tei.py' first. Exiting.") + sys.exit() tei_document = sys.argv[-1] xml_tree = etree.parse(tei_document) @@ -824,8 +914,8 @@ def fix_bib_entries(div_snippet): publication_language = xml_tree.xpath("//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", namespaces=NS_MAP)[0] bib_data = {} - bib_data["source"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibliography']/t:ref/@target", namespaces=NS_MAP)[0] - bib_data["type"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibliography']/t:ref/@type", namespaces=NS_MAP)[0] + bib_data["source"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibdatabase']/t:ref/@target", namespaces=NS_MAP)[0] + bib_data["type"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibdatabase']/t:ref/@type", namespaces=NS_MAP)[0] logging.info("The bibfile is %s and this publication type is %s." % (bib_data["source"], bib_data["type"])) if bib_data["type"] not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]: print("The bibliography type %s is not allowed." % bib_data["type"])