diff --git a/tei2imxml.py b/tei2imxml.py index c5f0721..390ad88 100644 --- a/tei2imxml.py +++ b/tei2imxml.py @@ -109,26 +109,26 @@ def get_field(xml_tree, query_path, mandatory=False, findall=False, noformat=Fal # Optional (according to database schema) info_dict['eoa_subtitle'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='sub']/text()") - info_dict['eoa_isbn'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='ISBN']/text()") - info_dict['eoa_price'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@unit='EUR']/@quantity") - info_dict['eoa_shoplink_url'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:base") - info_dict['eoa_shoplink_id'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:id") - info_dict['eoa_shoplink_text'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/text()") + info_dict['eoa_isbn'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='isbn']/text()") + info_dict['eoa_price'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@type='price']/@quantity") + info_dict['eoa_currency'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@type='price']/@unit") + info_dict['eoa_shoplink_url'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='shoplink']/text()") + info_dict['eoa_shoplink_text'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/t:orgName/text()") info_dict['eoa_brief_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='BriefDescription']/text()") info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='DetailedDescription']/text()") info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='additionalinformation']/text()") info_dict['eoa_dedication'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='dedication']/text()") # these references here need to be resolved - info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']", findall=True) - info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']", findall=True) - info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']", findall=True) - info_dict['eoa_editorialcoordinators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='editorialcoordinator']", findall=True) - info_dict['eoa_copyeditors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='copyeditor']", findall=True) - info_dict['eoa_translators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='translator']", findall=True) - info_dict['eoa_keywords'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:textClass/t:keywords/t:list/t:item", findall=True) - info_dict['eoa_authors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:author", findall=True) - info_dict['eoa_editors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor", findall=True) + info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']/@ref", findall=True) + info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']/@ref", findall=True) + info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']/@ref", findall=True) + info_dict['eoa_editorialcoordinators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='editorialcoordinator']/@ref", findall=True) + info_dict['eoa_copyeditors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='copyeditor']/@ref", findall=True) + info_dict['eoa_translators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='translator']/@ref", findall=True) + info_dict['eoa_keywords'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:textClass/t:keywords/t:list/t:item", findall=True, noformat=True) + info_dict['eoa_authors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:author/@ref", findall=True) + info_dict['eoa_editors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='volumeeditor']/@ref", findall=True) return info_dict # def get_publication_info ends here @@ -158,13 +158,14 @@ def make_publication_cfg(info_dict): technical_config['PublicationDate'] = info_dict['eoa_publicationdate'] #ok technical_config['PublicationYear'] = datetime.strftime(date_object, "%Y") technical_config['ISBN'] = info_dict['eoa_isbn'] #ok - technical_config['Price'] = info_dict['eoa_price'] #ok - technical_config['Shoplink'] = """{2}""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_id'].replace("id_", ""), info_dict['eoa_shoplink_text']) #ok + technical_config['Price'] = "{} {}".format(info_dict['eoa_price'], info_dict['eoa_currency']) + technical_config['Shoplink'] = """{1}""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_text']) #ok technical_config['Language'] = info_dict['eoa_language'] #ok - technical_config['License'] = info_dict['eoa_license'] #ok + technical_config['License'] = info_dict['eoa_license'].strip() #ok general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok - general_config['Submitter'] = ", ".join(info_dict['eoa_submitters']) #ok + if info_dict['eoa_submitters'] is not None: + general_config['Submitter'] = ", ".join(info_dict['eoa_submitters']) #ok general_config['PublicationManagment'] = ", ".join(info_dict['eoa_publicationmanagers']) general_config['PublicationAssistants'] = ", ".join(info_dict['eoa_publicationassistants'])