diff --git a/tei2django.py b/tei2django.py
deleted file mode 100644
index c0c9215..0000000
--- a/tei2django.py
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8; mode: python -*-
-__version__ = "1.0"
-__date__ = "20170315"
-__author__ = "kthoden@mpiwg-berlin.mpg.de"
-__doc__ = """A converter from TEI to Django."""
-
-import sys
-import os
-import configparser
-from datetime import datetime
-from lxml import etree
-import mkimage
-
-OUTPUT_DIR = "./CONVERT"
-XSL_FILE = os.path.dirname(sys.argv[0]) + "/data/tei2django.xsl"
-FIGURE_DIR = "./data/images"
-
-def process_formulas(xml_tree):
- """Process formulas"""
-
- """
- Format of filenames: EOAineq_12_62.png chapter number
- """
-
- pass
-# def process formulas ends here
-
-def get_publication_info(xml_tree):
- """Query the TEI document for metadata fields.
-
- Return a dictionary"""
-
- info_dict = {}
-
- ns_tei = "http://www.tei-c.org/ns/1.0"
- ns_cc = "http://web.resource.org/cc/"
- ns_rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- NS_MAP = {"t" : ns_tei, "c" : ns_cc, "r" : ns_rdf}
-
- def get_field(xml_tree, query_path, mandatory=False, findall=False):
- """Query XML for metadata fields.
-
- Default behaviour is if it fails, move on, if mandatory is set
- to True, exit the program
- """
-
- if findall is True:
- find_several = xml_tree.findall(query_path, namespaces=NS_MAP)
- if len(find_several) == 1:
- return_string = [find_several[0].text]
- else:
- return_string = [x.text for x in find_several]
- else:
- tmp_field = xml_tree.xpath(query_path, namespaces=NS_MAP)
- if len(tmp_field) > 0:
- return_string = tmp_field[0]
- else:
- if mandatory is True:
- sys.exit("Field stored in %s is mandatory. Exiting." % query_path)
- else:
- return_string = ""
-
- return return_string
- # def get_field ends here
-
- # Mandatory values (according to database schema)
- info_dict['eoa_publicationdate'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:date/@when", mandatory=True)
- info_dict['eoa_language'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", mandatory=True)
- info_dict['eoa_license'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:availability/t:licence/text()", mandatory=True)
- info_dict['eoa_number'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/@n", mandatory=True)
- info_dict['eoa_series'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/text()", mandatory=True)
- info_dict['eoa_title'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='main']/text()", mandatory=True)
-
- # Optional (according to database schema)
- info_dict['eoa_subtitle'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='sub']/text()")
- info_dict['eoa_isbn'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='ISBN']/text()")
- info_dict['eoa_price'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@unit='EUR']/@quantity")
- info_dict['eoa_shoplink_url'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:base")
- info_dict['eoa_shoplink_id'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:id")
- info_dict['eoa_shoplink_text'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/text()")
- info_dict['eoa_brief_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='BriefDescription']/text()")
- info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='DetailedDescription']/text()")
- info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='additionalinformation']/text()")
- info_dict['eoa_dedication'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='dedication']/text()")
-
- info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']", findall=True)
- info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']", findall=True)
- info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']", findall=True)
- info_dict['eoa_editorialcoordinators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='editorialcoordinator']", findall=True)
- info_dict['eoa_copyeditors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='copyeditor']", findall=True)
- info_dict['eoa_translators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='translator']", findall=True)
- info_dict['eoa_keywords'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:textClass/t:keywords/t:list/t:item", findall=True)
- info_dict['eoa_authors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:author", findall=True)
-
- return info_dict
-# def get_publication_info ends here
-
-def populate_config_file(info_dict, config):
- """Parse the XML header and write it in config file."""
-
- # set up three main bits
- config['Technical'] = {}
- technical_config = config['Technical']
- config['General'] = {}
- general_config = config['General']
- config['Authors'] = {}
- authors_config = config['Authors']
-
- date_object = datetime.strptime(info_dict['eoa_publicationdate'], "%Y-%m-%d")
-
- # fill in the fields
- technical_config['Serie'] = info_dict['eoa_series'] #ok
- technical_config['Number'] = info_dict['eoa_number'] #ok
- technical_config['Title'] = info_dict['eoa_title'] #ok
- technical_config['Subtitle'] = info_dict['eoa_subtitle'] #ok
- technical_config['PublicationDate'] = info_dict['eoa_publicationdate'] #ok
- technical_config['PublicationYear'] = datetime.strftime(date_object, "%Y")
- technical_config['ISBN'] = info_dict['eoa_isbn'] #ok
- technical_config['Price'] = info_dict['eoa_price'] #ok
- technical_config['Shoplink'] = """{2}""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_id'].replace("id_", ""), info_dict['eoa_shoplink_text']) #ok
- technical_config['Language'] = info_dict['eoa_language'] #ok
- technical_config['License'] = info_dict['eoa_license'] #ok
-
- general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok
- general_config['Submitter'] = ", ".join(info_dict['eoa_submitters']) #ok
- general_config['PublicationManagment'] = ", ".join(info_dict['eoa_publicationmanagers'])
- general_config['PublicationAssistants'] = ", ".join(info_dict['eoa_publicationassistants'])
-
- if len(info_dict['eoa_keywords']) > 8:
- sys.exit("Too many Keywords. Up to 8 are allowed. Exiting.")
- else:
- for keyword in info_dict['eoa_keywords']:
- keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1)
- general_config[keyword_label] = keyword
-
- general_config['DetailedDescription'] = info_dict['eoa_detail_desc'] #ok
- general_config['AdditionalInformation'] = info_dict['eoa_additional_info'] #ok
- general_config['EditorialCoordination'] = ", ".join(info_dict['eoa_editorialcoordinators'])
- general_config['Copyediting'] = ", ".join(info_dict['eoa_copyeditors'])
- general_config['Dedication'] = info_dict['eoa_dedication'] #ok
- general_config['Translator'] = ", ".join(info_dict['eoa_translators'])
-
- if len(info_dict['eoa_authors']) > 5:
- sys.exit("Too many authors. Up to 5 are allowed. Exiting.")
- else:
- for entry in range(0, 5):
- author_label = "Author" + str(entry + 1)
- try:
- authors_config[author_label] = info_dict['eoa_authors'][entry]
- except IndexError:
- authors_config[author_label] = ""
-
- authors_config['Zusatz'] = ""
-
- return config
-# def populate_config_file ends here
-
-def write_publication_config(publication_dict):
- """Main function"""
-
- config = configparser.ConfigParser(delimiters=(':'))
- # https://stackoverflow.com/questions/1611799/preserve-case-in-configparser
- config.optionxform=str
- publication_config = populate_config_file(publication_dict, config)
-
- output_filename = OUTPUT_DIR + "/publication.cfg"
- with open(output_filename, 'w') as configfile:
- publication_config.write(configfile)
- print("Wrote", output_filename)
-# def write_publication_config ends here
-
-def write_django_xml(return_string):
- """Write the output of XSL transformation to file"""
-
- output_filename = OUTPUT_DIR + "/Django.xml"
-
- with open(output_filename, 'w') as djangofile:
- djangofile.write(str(return_string))
-
- print("Wrote", output_filename)
-# def write_django_xml ends here
-
-def xsl_for_body(xml_file, xsl_file):
- """Perform XSL transformation of body.
-
- Return XSLT result tree."""
-
- xml_tree = etree.parse(xml_file)
- parsed_xsl_file = etree.parse(xsl_file)
- transformer = etree.XSLT(parsed_xsl_file)
- result_tree = transformer(xml_tree)
-
- return result_tree
-# def xsl_for_body ends here
-
-if __name__ == '__main__':
- if len(sys.argv) == 1:
- print("You must specify an input file!")
- sys.exit()
- elif len(sys.argv) > 2:
- print("You can work with only one publication at a time!")
- sys.exit()
-
- tei_document = sys.argv[-1]
-
- xml_tree = etree.parse(tei_document)
- publication_dict = get_publication_info(xml_tree)
-
- if not os.path.exists(OUTPUT_DIR):
- os.mkdir(os.path.expanduser(OUTPUT_DIR))
-
- write_publication_config(publication_dict)
- # mkimage.create_cover(publication_dict, FIGURE_DIR, OUTPUT_DIR + "/Cover.jpg")
- body_transformed = xsl_for_body(tei_document, XSL_FILE)
- write_django_xml(body_transformed)
-# finis
diff --git a/transform_xml.py b/transform_xml.py
index b92aaaf..38d1c9e 100644
--- a/transform_xml.py
+++ b/transform_xml.py
@@ -12,6 +12,8 @@
import subprocess
import pickle
import shlex
+import configparser
+from datetime import datetime
from bs4 import BeautifulSoup
from lxml import etree, objectify
from lxml.html import soupparser
@@ -27,10 +29,149 @@
TMP_DIR = os.path.expanduser("tmp_files")
CSL_FILE = "/Users/kthoden/EOAKram/dev/eoa-csl/eoa.csl"
+OUTPUT_DIR = os.path.expanduser("CONVERT")
# this is duplicated from libeoaconvert
dictLangFootnotes = {"it" : "Note a piè pagina", "fr" : "notes en bas de page", "de" : "Fußnoten", "en" : "Footnotes"}
+def get_publication_info(xml_tree):
+ """Query the TEI document for metadata fields.
+
+ Return a dictionary"""
+
+ info_dict = {}
+
+ ns_tei = "http://www.tei-c.org/ns/1.0"
+ ns_cc = "http://web.resource.org/cc/"
+ ns_rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ NS_MAP = {"t" : ns_tei, "c" : ns_cc, "r" : ns_rdf}
+
+ def get_field(xml_tree, query_path, mandatory=False, findall=False):
+ """Query XML for metadata fields.
+
+ Default behaviour is if it fails, move on, if mandatory is set
+ to True, exit the program
+ """
+
+ if findall is True:
+ find_several = xml_tree.findall(query_path, namespaces=NS_MAP)
+ if len(find_several) == 1:
+ return_string = [find_several[0].text]
+ else:
+ return_string = [x.text for x in find_several]
+ else:
+ tmp_field = xml_tree.xpath(query_path, namespaces=NS_MAP)
+ if len(tmp_field) > 0:
+ return_string = tmp_field[0]
+ else:
+ if mandatory is True:
+ sys.exit("Field stored in %s is mandatory. Exiting." % query_path)
+ else:
+ return_string = ""
+
+ return return_string
+ # def get_field ends here
+
+ # Mandatory values (according to database schema)
+ info_dict['eoa_publicationdate'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:date/@when", mandatory=True)
+ info_dict['eoa_language'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", mandatory=True)
+ info_dict['eoa_license'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:availability/t:licence/text()", mandatory=True)
+ info_dict['eoa_number'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/@n", mandatory=True)
+ info_dict['eoa_series'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@level='s']/text()", mandatory=True)
+ info_dict['eoa_title'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='main']/text()", mandatory=True)
+
+ # Optional (according to database schema)
+ info_dict['eoa_subtitle'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:title[@type='sub']/text()")
+ info_dict['eoa_isbn'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:idno[@type='ISBN']/text()")
+ info_dict['eoa_price'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:extent/t:measure[@unit='EUR']/@quantity")
+ info_dict['eoa_shoplink_url'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:base")
+ info_dict['eoa_shoplink_id'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/@xml:id")
+ info_dict['eoa_shoplink_text'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:publicationStmt/t:distributor/text()")
+ info_dict['eoa_brief_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='BriefDescription']/text()")
+ info_dict['eoa_detail_desc'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='DetailedDescription']/text()")
+ info_dict['eoa_additional_info'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='additionalinformation']/text()")
+ info_dict['eoa_dedication'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='dedication']/text()")
+
+ info_dict['eoa_submitters'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='submitter']", findall=True)
+ info_dict['eoa_publicationmanagers'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationmanager']", findall=True)
+ info_dict['eoa_publicationassistants'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='publicationassistant']", findall=True)
+ info_dict['eoa_editorialcoordinators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='editorialcoordinator']", findall=True)
+ info_dict['eoa_copyeditors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='copyeditor']", findall=True)
+ info_dict['eoa_translators'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:editor[@role='translator']", findall=True)
+ info_dict['eoa_keywords'] = get_field(xml_tree, "//t:teiHeader/t:profileDesc/t:textClass/t:keywords/t:list/t:item", findall=True)
+ info_dict['eoa_authors'] = get_field(xml_tree, "//t:teiHeader/t:fileDesc/t:titleStmt/t:author", findall=True)
+
+ return info_dict
+# def get_publication_info ends here
+
+def make_publication_cfg(info_dict):
+ """Main function"""
+
+ config = configparser.ConfigParser(delimiters=(':'))
+ # https://stackoverflow.com/questions/1611799/preserve-case-in-configparser
+ config.optionxform=str
+
+ # set up three main bits
+ config['Technical'] = {}
+ technical_config = config['Technical']
+ config['General'] = {}
+ general_config = config['General']
+ config['Authors'] = {}
+ authors_config = config['Authors']
+
+ date_object = datetime.strptime(info_dict['eoa_publicationdate'], "%Y-%m-%d")
+
+ # fill in the fields
+ technical_config['Serie'] = info_dict['eoa_series'] #ok
+ technical_config['Number'] = info_dict['eoa_number'] #ok
+ technical_config['Title'] = info_dict['eoa_title'] #ok
+ technical_config['Subtitle'] = info_dict['eoa_subtitle'] #ok
+ technical_config['PublicationDate'] = info_dict['eoa_publicationdate'] #ok
+ technical_config['PublicationYear'] = datetime.strftime(date_object, "%Y")
+ technical_config['ISBN'] = info_dict['eoa_isbn'] #ok
+ technical_config['Price'] = info_dict['eoa_price'] #ok
+ technical_config['Shoplink'] = """{2}""".format(info_dict['eoa_shoplink_url'], info_dict['eoa_shoplink_id'].replace("id_", ""), info_dict['eoa_shoplink_text']) #ok
+ technical_config['Language'] = info_dict['eoa_language'] #ok
+ technical_config['License'] = info_dict['eoa_license'] #ok
+
+ general_config['BriefDescription'] = info_dict['eoa_brief_desc'] #ok
+ general_config['Submitter'] = ", ".join(info_dict['eoa_submitters']) #ok
+ general_config['PublicationManagment'] = ", ".join(info_dict['eoa_publicationmanagers'])
+ general_config['PublicationAssistants'] = ", ".join(info_dict['eoa_publicationassistants'])
+
+ if len(info_dict['eoa_keywords']) > 8:
+ sys.exit("Too many Keywords. Up to 8 are allowed. Exiting.")
+ else:
+ for keyword in info_dict['eoa_keywords']:
+ keyword_label = "Keyword" + str(info_dict['eoa_keywords'].index(keyword) + 1)
+ general_config[keyword_label] = keyword
+
+ general_config['DetailedDescription'] = info_dict['eoa_detail_desc'] #ok
+ general_config['AdditionalInformation'] = info_dict['eoa_additional_info'] #ok
+ general_config['EditorialCoordination'] = ", ".join(info_dict['eoa_editorialcoordinators'])
+ general_config['Copyediting'] = ", ".join(info_dict['eoa_copyeditors'])
+ general_config['Dedication'] = info_dict['eoa_dedication'] #ok
+ general_config['Translator'] = ", ".join(info_dict['eoa_translators'])
+
+ if len(info_dict['eoa_authors']) > 5:
+ sys.exit("Too many authors. Up to 5 are allowed. Exiting.")
+ else:
+ for entry in range(0, 5):
+ author_label = "Author" + str(entry + 1)
+ try:
+ authors_config[author_label] = info_dict['eoa_authors'][entry]
+ except IndexError:
+ authors_config[author_label] = ""
+
+ authors_config['Zusatz'] = ""
+
+ output_filename = OUTPUT_DIR + os.path.sep + "publication.cfg"
+ with open(output_filename, 'w') as configfile:
+ config.write(configfile)
+
+ print("Wrote", output_filename)
+# def make_publication_cfg ends here
+
def render_reference(list_of_xml_elements, cited_data):
"""Provide an attribute for a formatted version of Reference.
@@ -521,6 +662,12 @@ def add_bibliography(xml_tree, refs_for_bib_chapter):
# if not os.path.exists("debug"):
# os.mkdir(os.path.expanduser("debug"))
+ if not os.path.exists(OUTPUT_DIR):
+ os.mkdir(os.path.expanduser(OUTPUT_DIR))
+
+ publication_info = get_publication_info(xml_tree)
+ make_publication_cfg(publication_info)
+
if not os.path.exists(TMP_DIR):
os.mkdir(os.path.expanduser(TMP_DIR))
output_filename = TMP_DIR + os.path.sep + "IntermediateXMLFile.xml"