From 1bcd4e8ad4fddfd23591aa2c77b1fd5972565669 Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Fri, 31 Aug 2018 18:09:27 +0200 Subject: [PATCH] Adjusting script to schema --- tei2imxml.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tei2imxml.py b/tei2imxml.py index 75110e4..dc22f8c 100644 --- a/tei2imxml.py +++ b/tei2imxml.py @@ -88,8 +88,8 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False): info_dict['eoa_publicationdate'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:date/@when", mandatory=True) info_dict['eoa_language'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", mandatory=True) info_dict['eoa_license'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:availability/t:licence/text()", mandatory=True) - info_dict['eoa_number'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/@n", mandatory=True) - info_dict['eoa_series'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/text()", mandatory=True) + info_dict['eoa_number'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:seriesStmt/t:idno[@type='number']/text()", mandatory=True) + info_dict['eoa_series'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:seriesStmt/t:title/text()", mandatory=True) info_dict['eoa_title'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='main']/text()", mandatory=True) # Optional (according to database schema) @@ -104,6 +104,7 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False): info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='additionalinformation']/text()") info_dict['eoa_dedication'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='dedication']/text()") + # these references here need to be resolved info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']", findall=True) info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']", findall=True) info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']", findall=True) @@ -353,9 +354,8 @@ def transform_body(xml_tree, cited_data, publang): author_ids = chapter.get("resp") if author_ids is not None: list_author_id = author_ids.split(" ") - + logging.info("Found chapter author shortcuts: {}.".format(list_author_id)) if len(list_author_id) > 0: - print("hier", list_author_id, publang) author_string = format_authors(list_author_id, publang, xml_tree) # print(author_string) eoa_author = etree.Element("EOAauthor") @@ -553,12 +553,15 @@ def transform_body(xml_tree, cited_data, publang): if rend_attribute == "italic": hi.set("rend", "it") - elif rend_attribute == "sup": + elif rend_attribute == "superscript": hi.tag = "EOAup" del hi.attrib["rend"] - elif rend_attribute == "sub": + elif rend_attribute == "subscript": hi.tag = "EOAdown" del hi.attrib["rend"] + elif rend_attribute == "bold": + hi.tag = "EOAbold" + del hi.attrib["rend"] else: logging.info("The rend attribute in hi has the value %s. This is not supported" % rend_attribute)