Skip to content
Permalink
f289774945
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
65 lines (48 sloc) 2.19 KB
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand, CommandError
from eoa.models import *
import xmltodict, re, os, argparse, django
from collections import OrderedDict
from ._importer_class import *
############################
# Add arguments:
# Path to file and file_name
#
# Open file, substitute formating rules,
# and transform into dict
############################
default_path = 'eoa/teidoc/'
default_filename = 'minimalTEI.xml'
class Command(BaseCommand):
help = 'Import TEI document to Django DB'
def add_arguments(self, parser):
parser.add_argument('-p', '--import_path', dest='p', type=str, default=default_path,
help='The path to the /tei/xml/file')
parser.add_argument('-f','--file_name',dest='f', type=str, default=default_filename,
help='Name of file to import.')
def handle(self,*args, **options):
with open(os.path.join(options['p'],options['f']),'r') as infile:
# skip this namespaces, i.e. a key in the dict "{namespace}key" will change to "key"
namespaces = {
'http://www.tei-c.org/ns/1.0': None,
'http://www.w3.org/XML/1998/namespace': None
}
# REformat highlighting from <hi> ... </hi> to {rend: italic, text}
# otherwise it makes the dictionary structure unreadable
#readfile = re.sub('<hi rend="(\w+)">(.+?)</hi>','{rend: \g<1>, \g<2>}',infile.read())
readfile = infile.read()
# parsing to dict
tei_dict = xmltodict.parse(readfile, process_namespaces=True, namespaces=namespaces)
######################
# Instantiate and add object via models, using _importer_class
######################
text = TEIimporter(tei_dict)
pub = createPublication(text)
authorList = createAuthors(text,pub)
chapters = createChapters(text,pub)
sections = createSections(text,pub)
#paragraphs = createParagraphs(text,pub)
citations = createCitations(text,pub)
self.stdout.write(
'Successfully read TEI file, by {0}'.format(text.authors())
)