Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
django-tei-importer/publications/management/commands/tei_import.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
65 lines (48 sloc)
2.19 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from django.core.management.base import BaseCommand, CommandError | |
from eoa.models import * | |
import xmltodict, re, os, argparse, django | |
from collections import OrderedDict | |
from ._importer_class import * | |
############################ | |
# Add arguments: | |
# Path to file and file_name | |
# | |
# Open file, substitute formating rules, | |
# and transform into dict | |
############################ | |
default_path = 'eoa/teidoc/' | |
default_filename = 'minimalTEI.xml' | |
class Command(BaseCommand): | |
help = 'Import TEI document to Django DB' | |
def add_arguments(self, parser): | |
parser.add_argument('-p', '--import_path', dest='p', type=str, default=default_path, | |
help='The path to the /tei/xml/file') | |
parser.add_argument('-f','--file_name',dest='f', type=str, default=default_filename, | |
help='Name of file to import.') | |
def handle(self,*args, **options): | |
with open(os.path.join(options['p'],options['f']),'r') as infile: | |
# skip this namespaces, i.e. a key in the dict "{namespace}key" will change to "key" | |
namespaces = { | |
'http://www.tei-c.org/ns/1.0': None, | |
'http://www.w3.org/XML/1998/namespace': None | |
} | |
# REformat highlighting from <hi> ... </hi> to {rend: italic, text} | |
# otherwise it makes the dictionary structure unreadable | |
#readfile = re.sub('<hi rend="(\w+)">(.+?)</hi>','{rend: \g<1>, \g<2>}',infile.read()) | |
readfile = infile.read() | |
# parsing to dict | |
tei_dict = xmltodict.parse(readfile, process_namespaces=True, namespaces=namespaces) | |
###################### | |
# Instantiate and add object via models, using _importer_class | |
###################### | |
text = TEIimporter(tei_dict) | |
pub = createPublication(text) | |
authorList = createAuthors(text,pub) | |
chapters = createChapters(text,pub) | |
sections = createSections(text,pub) | |
#paragraphs = createParagraphs(text,pub) | |
citations = createCitations(text,pub) | |
self.stdout.write( | |
'Successfully read TEI file, by {0}'.format(text.authors()) | |
) |