Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fixed XPaths
  • Loading branch information
Klaus Thoden committed Sep 4, 2018
1 parent bb3dbc5 commit 017e257
Showing 1 changed file with 19 additions and 18 deletions.
37 changes: 19 additions & 18 deletions tei2imxml.py
Expand Up @@ -109,26 +109,26 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=Fal

# Optional (according to database schema)
info_dict['eoa_subtitle'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='sub']/text()")
info_dict['eoa_isbn'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='ISBN']/text()")
info_dict['eoa_price'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@unit='EUR']/@quantity")
info_dict['eoa_shoplink_url'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:base")
info_dict['eoa_shoplink_id'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:id")
info_dict['eoa_shoplink_text'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/text()")
info_dict['eoa_isbn'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='isbn']/text()")
info_dict['eoa_price'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@type='price']/@quantity")
info_dict['eoa_currency'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@type='price']/@unit")
info_dict['eoa_shoplink_url'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='shoplink']/text()")
info_dict['eoa_shoplink_text'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/t:orgName/text()")
info_dict['eoa_brief_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='BriefDescription']/text()")
info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='DetailedDescription']/text()")
info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='additionalinformation']/text()")
info_dict['eoa_dedication'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='dedication']/text()")

# these references here need to be resolved
info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']", findall=True)
info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']", findall=True)
info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']", findall=True)
info_dict['eoa_editorialcoordinators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='editorialcoordinator']", findall=True)
info_dict['eoa_copyeditors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='copyeditor']", findall=True)
info_dict['eoa_translators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='translator']", findall=True)
info_dict['eoa_keywords'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:textClass/t:keywords/t:list/t:item", findall=True)
info_dict['eoa_authors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:author", findall=True)
info_dict['eoa_editors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor", findall=True)
info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']/@ref", findall=True)
info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']/@ref", findall=True)
info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']/@ref", findall=True)
info_dict['eoa_editorialcoordinators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='editorialcoordinator']/@ref", findall=True)
info_dict['eoa_copyeditors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='copyeditor']/@ref", findall=True)
info_dict['eoa_translators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='translator']/@ref", findall=True)
info_dict['eoa_keywords'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:textClass/t:keywords/t:list/t:item", findall=True, noformat=True)
info_dict['eoa_authors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:author/@ref", findall=True)
info_dict['eoa_editors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='volumeeditor']/@ref", findall=True)

return info_dict
# def get_publication_info ends here
Expand Down Expand Up @@ -158,13 +158,14 @@ def make_publication_cfg(info_dict):
technical_config['PublicationDate'] = info_dict['eoa_publicationdate'] #ok
technical_config['PublicationYear'] = datetime.strftime(date_object, "%Y")
technical_config['ISBN'] = info_dict['eoa_isbn'] #ok
technical_config['Price'] = info_dict['eoa_price'] #ok
technical_config['Shoplink'] = """<a href="{0}{1}">{2}</a>""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_id'].replace("id_", ""), info_dict['eoa_shoplink_text']) #ok
technical_config['Price'] = "{} {}".format(info_dict['eoa_price'], info_dict['eoa_currency'])
technical_config['Shoplink'] = """<a href="{0}">{1}</a>""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_text']) #ok
technical_config['Language'] = info_dict['eoa_language'] #ok
technical_config['License'] = info_dict['eoa_license'] #ok
technical_config['License'] = info_dict['eoa_license'].strip() #ok

general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok
general_config['Submitter'] = ", ".join(info_dict['eoa_submitters']) #ok
if info_dict['eoa_submitters'] is not None:
general_config['Submitter'] = ", ".join(info_dict['eoa_submitters']) #ok
general_config['PublicationManagment'] = ", ".join(info_dict['eoa_publicationmanagers'])
general_config['PublicationAssistants'] = ", ".join(info_dict['eoa_publicationassistants'])

Expand Down

0 comments on commit 017e257

Please sign in to comment.