Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Adjusting script to schema
  • Loading branch information
Klaus Thoden committed Aug 31, 2018
1 parent 12fdaf2 commit 1bcd4e8
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions tei2imxml.py
Expand Up @@ -88,8 +88,8 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False):
info_dict['eoa_publicationdate'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:date/@when", mandatory=True)
info_dict['eoa_language'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", mandatory=True)
info_dict['eoa_license'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:availability/t:licence/text()", mandatory=True)
info_dict['eoa_number'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/@n", mandatory=True)
info_dict['eoa_series'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/text()", mandatory=True)
info_dict['eoa_number'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:seriesStmt/t:idno[@type='number']/text()", mandatory=True)
info_dict['eoa_series'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:seriesStmt/t:title/text()", mandatory=True)
info_dict['eoa_title'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='main']/text()", mandatory=True)

# Optional (according to database schema)
Expand All @@ -104,6 +104,7 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False):
info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='additionalinformation']/text()")
info_dict['eoa_dedication'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='dedication']/text()")

# these references here need to be resolved
info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']", findall=True)
info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']", findall=True)
info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']", findall=True)
Expand Down Expand Up @@ -353,9 +354,8 @@ def transform_body(xml_tree, cited_data, publang):
author_ids = chapter.get("resp")
if author_ids is not None:
list_author_id = author_ids.split(" ")

logging.info("Found chapter author shortcuts: {}.".format(list_author_id))
if len(list_author_id) > 0:
print("hier", list_author_id, publang)
author_string = format_authors(list_author_id, publang, xml_tree)
# print(author_string)
eoa_author = etree.Element("EOAauthor")
Expand Down Expand Up @@ -553,12 +553,15 @@ def transform_body(xml_tree, cited_data, publang):

if rend_attribute == "italic":
hi.set("rend", "it")
elif rend_attribute == "sup":
elif rend_attribute == "superscript":
hi.tag = "EOAup"
del hi.attrib["rend"]
elif rend_attribute == "sub":
elif rend_attribute == "subscript":
hi.tag = "EOAdown"
del hi.attrib["rend"]
elif rend_attribute == "bold":
hi.tag = "EOAbold"
del hi.attrib["rend"]
else:
logging.info("The rend attribute in hi has the value %s. This is not supported" % rend_attribute)

Expand Down

0 comments on commit 1bcd4e8

Please sign in to comment.