Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Writer for publication.cfg
  • Loading branch information
Klaus Thoden committed Mar 15, 2018
1 parent d109b3c commit 2676924
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 217 deletions.
217 changes: 0 additions & 217 deletions tei2django.py

This file was deleted.

147 changes: 147 additions & 0 deletions transform_xml.py
Expand Up @@ -12,6 +12,8 @@
import subprocess
import pickle
import shlex
import configparser
from datetime import datetime
from bs4 import BeautifulSoup
from lxml import etree, objectify
from lxml.html import soupparser
Expand All @@ -27,10 +29,149 @@

TMP_DIR = os.path.expanduser("tmp_files")
CSL_FILE = "/Users/kthoden/EOAKram/dev/eoa-csl/eoa.csl"
OUTPUT_DIR = os.path.expanduser("CONVERT")

# this is duplicated from libeoaconvert
dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"}

def get_publication_info(xml_tree):
"""Query the TEI document for metadata fields.
Return a dictionary"""

info_dict = {}

ns_tei = "http://www.tei-c.org/ns/1.0"
ns_cc = "http://web.resource.org/cc/"
ns_rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
NS_MAP = {"t" : ns_tei, "c" : ns_cc, "r" : ns_rdf}

def get_field(xml_tree, query_path, mandatory=False, findall=False):
"""Query XML for metadata fields.
Default behaviour is if it fails, move on, if mandatory is set
to True, exit the program
"""

if findall is True:
find_several = xml_tree.findall(query_path, namespaces=NS_MAP)
if len(find_several) == 1:
return_string = [find_several[0].text]
else:
return_string = [x.text for x in find_several]
else:
tmp_field = xml_tree.xpath(query_path, namespaces=NS_MAP)
if len(tmp_field) > 0:
return_string = tmp_field[0]
else:
if mandatory is True:
sys.exit("Field stored in %s is mandatory. Exiting." % query_path)
else:
return_string = ""

return return_string
# def get_field ends here

# Mandatory values (according to database schema)
info_dict['eoa_publicationdate'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:date/@when", mandatory=True)
info_dict['eoa_language'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", mandatory=True)
info_dict['eoa_license'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:availability/t:licence/text()", mandatory=True)
info_dict['eoa_number'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/@n", mandatory=True)
info_dict['eoa_series'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/text()", mandatory=True)
info_dict['eoa_title'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='main']/text()", mandatory=True)

# Optional (according to database schema)
info_dict['eoa_subtitle'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='sub']/text()")
info_dict['eoa_isbn'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='ISBN']/text()")
info_dict['eoa_price'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@unit='EUR']/@quantity")
info_dict['eoa_shoplink_url'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:base")
info_dict['eoa_shoplink_id'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:id")
info_dict['eoa_shoplink_text'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/text()")
info_dict['eoa_brief_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='BriefDescription']/text()")
info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='DetailedDescription']/text()")
info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='additionalinformation']/text()")
info_dict['eoa_dedication'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='dedication']/text()")

info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']", findall=True)
info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']", findall=True)
info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']", findall=True)
info_dict['eoa_editorialcoordinators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='editorialcoordinator']", findall=True)
info_dict['eoa_copyeditors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='copyeditor']", findall=True)
info_dict['eoa_translators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='translator']", findall=True)
info_dict['eoa_keywords'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:textClass/t:keywords/t:list/t:item", findall=True)
info_dict['eoa_authors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:author", findall=True)

return info_dict
# def get_publication_info ends here

def make_publication_cfg(info_dict):
"""Main function"""

config = configparser.ConfigParser(delimiters=(':'))
# https://stackoverflow.com/questions/1611799/preserve-case-in-configparser
config.optionxform=str

# set up three main bits
config['Technical'] = {}
technical_config = config['Technical']
config['General'] = {}
general_config = config['General']
config['Authors'] = {}
authors_config = config['Authors']

date_object = datetime.strptime(info_dict['eoa_publicationdate'], "%Y-%m-%d")

# fill in the fields
technical_config['Serie'] = info_dict['eoa_series'] #ok
technical_config['Number'] = info_dict['eoa_number'] #ok
technical_config['Title'] = info_dict['eoa_title'] #ok
technical_config['Subtitle'] = info_dict['eoa_subtitle'] #ok
technical_config['PublicationDate'] = info_dict['eoa_publicationdate'] #ok
technical_config['PublicationYear'] = datetime.strftime(date_object, "%Y")
technical_config['ISBN'] = info_dict['eoa_isbn'] #ok
technical_config['Price'] = info_dict['eoa_price'] #ok
technical_config['Shoplink'] = """<a href="{0}{1}">{2}</a>""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_id'].replace("id_", ""), info_dict['eoa_shoplink_text']) #ok
technical_config['Language'] = info_dict['eoa_language'] #ok
technical_config['License'] = info_dict['eoa_license'] #ok

general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok
general_config['Submitter'] = ", ".join(info_dict['eoa_submitters']) #ok
general_config['PublicationManagment'] = ", ".join(info_dict['eoa_publicationmanagers'])
general_config['PublicationAssistants'] = ", ".join(info_dict['eoa_publicationassistants'])

if len(info_dict['eoa_keywords']) > 8:
sys.exit("Too many Keywords. Up to 8 are allowed. Exiting.")
else:
for keyword in info_dict['eoa_keywords']:
keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1)
general_config[keyword_label] = keyword

general_config['DetailedDescription'] = info_dict['eoa_detail_desc'] #ok
general_config['AdditionalInformation'] = info_dict['eoa_additional_info'] #ok
general_config['EditorialCoordination'] = ", ".join(info_dict['eoa_editorialcoordinators'])
general_config['Copyediting'] = ", ".join(info_dict['eoa_copyeditors'])
general_config['Dedication'] = info_dict['eoa_dedication'] #ok
general_config['Translator'] = ", ".join(info_dict['eoa_translators'])

if len(info_dict['eoa_authors']) > 5:
sys.exit("Too many authors. Up to 5 are allowed. Exiting.")
else:
for entry in range(0, 5):
author_label = "Author" + str(entry + 1)
try:
authors_config[author_label] = info_dict['eoa_authors'][entry]
except IndexError:
authors_config[author_label] = ""

authors_config['Zusatz'] = ""

output_filename = OUTPUT_DIR + os.path.sep + "publication.cfg"
with open(output_filename, 'w') as configfile:
config.write(configfile)

print("Wrote", output_filename)
# def make_publication_cfg ends here

def render_reference(list_of_xml_elements, cited_data):
"""Provide an attribute for a formatted version of Reference.
Expand Down Expand Up @@ -521,6 +662,12 @@ def add_bibliography(xml_tree, refs_for_bib_chapter):
# if not os.path.exists("debug"):
# os.mkdir(os.path.expanduser("debug"))

if not os.path.exists(OUTPUT_DIR):
os.mkdir(os.path.expanduser(OUTPUT_DIR))

publication_info = get_publication_info(xml_tree)
make_publication_cfg(publication_info)

if not os.path.exists(TMP_DIR):
os.mkdir(os.path.expanduser(TMP_DIR))
output_filename = TMP_DIR + os.path.sep + "IntermediateXMLFile.xml"
Expand Down

0 comments on commit 2676924

Please sign in to comment.