Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
django-eoapublications/eoapublications/management/commands/publicationimport.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
705 lines (658 sloc)
41.7 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
__author__ = 'kai' | |
from django.core.management.base import BaseCommand | |
from django.conf import settings | |
from eoapublications.models import * | |
import configparser | |
import sys | |
import os | |
import shutil | |
from lxml import etree | |
from PIL import Image | |
import shlex | |
import subprocess | |
class Command(BaseCommand): | |
help = "Import a publication into the Django database." | |
def add_arguments(self, parser): | |
parser.add_argument( | |
'inputDir', | |
default='import', | |
nargs='?', | |
metavar='INPUT_DIR', | |
help='import publication from INPUT_DIR. default: %(default)s' | |
) | |
def input_dir_sanitycheck(self, inputDir): | |
from os.path import exists, join | |
for f in \ | |
[ inputDir | |
, join( inputDir, "publication.cfg") | |
, join( inputDir, "Django.xml") | |
, join( inputDir, "Cover.jpg") | |
, join( inputDir, "images") | |
, join( inputDir, "images/embedded") \ | |
] \ | |
: | |
if not exists( f ): | |
raise( FileNotFoundError(f) ) | |
def dest_dir_sanitycheck(self, destDir): | |
from os.path import exists, join | |
if( exists( destDir ) ): | |
raise( FileExistsError( destDir ) ) | |
def dbg_print( self, x, level=1, **options): | |
verbosity = options['verbosity'] if ('verbosity' in options.keys()) else 0 | |
if level <= verbosity: | |
print( x ) | |
def dest_media_dir( self, inputDir ): | |
from os.path import join | |
config = configparser.ConfigParser(); config.read( join(inputDir, 'publication.cfg') ) | |
technical = config['Technical'] | |
mediaDestRel = join( technical['Serie'].lower() , technical['Number'] ) | |
return join( settings.MEDIA_ROOT, mediaDestRel ) | |
def resize_image(self, source_image, dest_image, max_size, dimension): | |
"""Resize an image, preserve ratio. | |
Takes four arguments, filenames of source and destination | |
images, the maximal size and the dimension (width or height). | |
https://stackoverflow.com/questions/273946/how-do-i-resize-an-image-using-pil-and-maintain-its-aspect-ratio | |
""" | |
image_object = Image.open(source_image) | |
width, height = image_object.size | |
if dimension == "height": | |
height_percent = (max_size/float(height)) | |
wsize = int((float(width)*float(height_percent))) | |
resized_image = image_object.resize((wsize, max_size), Image.ANTIALIAS) | |
dest_width, dest_height = wsize, max_size | |
elif dimension == "width": | |
width_percent = (max_size/float(width)) | |
hsize = int((float(height)*float(width_percent))) | |
resized_image = image_object.resize((max_size, hsize), Image.ANTIALIAS) | |
dest_width, dest_height = max_size, hsize | |
else: | |
print("You must either specify height or width as dimension. Exiting.") | |
sys.exit(0) | |
image_object.save(dest_image) | |
return dest_width, dest_height | |
# def resize_image ends here | |
def process_as_string(self,xmlElement): | |
"""Create html text for elements""" | |
strResult = xmlElement.text or "" | |
for xmlChild in xmlElement.iterchildren(): | |
strResult = strResult + etree.tostring(xmlChild).decode() | |
return strResult | |
# def process_as_string ends here | |
def process_index_entries(self, xmlIndex, xmlElement, xmlChapter, Newelement, Newpublication, index_type): | |
"""Construct the popups for index entries next to paragraphs.""" | |
if index_type == "keyword": | |
xml_entries = xmlElement.findall('.//EOAindex') | |
elif index_type == "person": | |
xml_entries = xmlElement.findall('.//EOAindexperson') | |
elif index_type == "location": | |
xml_entries = xmlElement.findall('.//EOAindexlocation') | |
else: | |
print("No suitable index found.") | |
for xmlEntry in xml_entries: | |
current_chapter_order = xmlChapter.get('order') | |
current_element_order = xmlElement.get('order') | |
if xmlEntry.get("secondary") is not None: | |
continue | |
Newindexelement = Indexelement(Element=Newelement) | |
strMainEntry = xmlEntry.get("main") | |
if xmlEntry.get("display") is not None: | |
strHtml = xmlEntry.get("display") + ": " | |
else: | |
strHtml = xmlEntry.get("main") + ": " | |
# Find corresponding Entry in EOAprintindex | |
for xmlIndexEntry in xmlIndex.findall(".//EOAindexentry"): | |
if xmlIndexEntry.get("main") == strMainEntry: | |
intLinkNumber = 1 | |
for xmlIndexLink in xmlIndexEntry.findall("./EOAindexlink"): | |
if xmlIndexLink.get("chapterorder") == current_chapter_order and xmlIndexLink.get('elementorder') == current_element_order: | |
strHtml = strHtml + "<span>" + str(intLinkNumber) + "</span> " | |
else: | |
strHtml = strHtml + "<a href='/" + Newpublication.Serie.lower() + "/" + \ | |
Newpublication.Number + "/" + xmlIndexLink.get("chapterorder") + "/index.html#"\ | |
+ xmlIndexLink.get('elementorder') + "'>" | |
strHtml = strHtml + str(intLinkNumber) | |
strHtml = strHtml + "</a> " | |
intLinkNumber += 1 | |
strHtml = strHtml + "<br/>" | |
Newindexelement.Html = strHtml | |
Newindexelement.save() | |
# def process_index_entries ends here | |
def process_indexsection(self, xmlIndexsection, intObjectOrder, intIndexEntry, publication_number, index_type, Newpublication, olddesign=False): | |
"""Create HTML code for indexsections""" | |
Newindexsection = Indexsection(Publication=Newpublication) | |
Newindexsection.Kind = index_type | |
Newindexsection.Title = xmlIndexsection.get("Character") | |
Newindexsection.Order = intObjectOrder | |
intObjectOrder += 1 | |
if olddesign: | |
strResult = "<div class=\"accordion\" id=\"accordion" + str(intObjectOrder) + "\">" | |
for xmlIndexEntry in xmlIndexsection.iterchildren(): | |
strIndexHtml = """<div class="accordion-group"> | |
<div class="accordion-heading"> | |
<a class="accordion-toggle" data-toggle="collapse" data-parent="#accordion""" | |
strIndexHtml = strIndexHtml + str(intIndexEntry) + "\" href=\"#" + "indexentry" + str(intIndexEntry) + "\">" | |
if xmlIndexEntry.get('display') is not None: | |
strIndexHtml = strIndexHtml + xmlIndexEntry.get('display') | |
else: | |
strIndexHtml = strIndexHtml + str(xmlIndexEntry.get('main')) | |
strIndexHtml = strIndexHtml + """</a></div> <div id=""" | |
strIndexHtml = strIndexHtml + "\"indexentry" + str(intIndexEntry) + "\"" | |
strIndexHtml = strIndexHtml + """ class="accordion-body collapse"><div class="accordion-inner">""" | |
# Process Links of the main entry | |
intMainIndexLinkNumber = 1 | |
for xmlIndexlink in xmlIndexEntry.findall("./EOAindexlink"): | |
strIndexLink = "<a href=\"/" + Newpublication.Serie.lower() + \ | |
"/"+ publication_number \ | |
+ "/" + xmlIndexlink.get('chapterorder') + "/index.html#" + xmlIndexlink.get('elementorder') + "\">" | |
strIndexLink = strIndexLink + str(intMainIndexLinkNumber) + "</a> " | |
if xmlIndexlink.get('bold') == "True": | |
strIndexLink = "<strong>" + strIndexLink + "</strong>" | |
strIndexHtml = strIndexHtml + strIndexLink | |
intMainIndexLinkNumber += 1 | |
# Process Subentries if available | |
for xmlIndexSubEntry in xmlIndexEntry.findall(".//EOAindexsubentry"): | |
if xmlIndexSubEntry.get('display') is not None: | |
strIndexHtml = strIndexHtml + "<p>" + xmlIndexSubEntry.get('display') + "<br/>" | |
else: | |
strIndexHtml = strIndexHtml + "<p>" + xmlIndexSubEntry.get('secondary') + "<br/>" | |
intSubIndexLinkNumber = 1 | |
for xmlIndexSublink in xmlIndexSubEntry.findall("EOAindexlink"): | |
strIndexSubLink = "<a href=\"/" + Newpublication.Serie.lower() + \ | |
"/"+ publication_number \ | |
+ "/" + xmlIndexSublink.get('chapterorder') + "/index.html#" + xmlIndexSublink.get('elementorder') + "\">" | |
strIndexSubLink = strIndexSubLink + str(intSubIndexLinkNumber) + "</a> " | |
if xmlIndexSublink.get("bold") == "True": | |
strIndexSubLink = "<b>" + strIndexSubLink + "</b>" | |
strIndexHtml = strIndexHtml + strIndexSubLink | |
intSubIndexLinkNumber += 1 | |
strIndexHtml = strIndexHtml + "</p>" | |
# Die Divs schliessen | |
strIndexHtml = strIndexHtml + "</div></div></div>" | |
intIndexEntry += 1 | |
strResult += strIndexHtml | |
strResult = strResult + "</div>" | |
else: | |
strResult = "" | |
for xmlIndexEntry in xmlIndexsection.iterchildren(): | |
strIndexHtml = """<h4>""" | |
if xmlIndexEntry.get('display') is not None: | |
strIndexHtml = strIndexHtml + xmlIndexEntry.get('display') | |
else: | |
strIndexHtml = strIndexHtml + str(xmlIndexEntry.get('main')) | |
strIndexHtml = strIndexHtml + "</h4><ul><li>" | |
# Process Links of the main entry | |
intMainIndexLinkNumber = 1 | |
for xmlIndexlink in xmlIndexEntry.findall("./EOAindexlink"): | |
strIndexLink = "<a href=\"/" + Newpublication.Serie.lower() + \ | |
"/"+ publication_number \ | |
+ "/" + xmlIndexlink.get('chapterorder') + "/index.html#" + xmlIndexlink.get('elementorder') + "\">" | |
strIndexLink = strIndexLink + str(intMainIndexLinkNumber) + "</a> " | |
if xmlIndexlink.get('bold') == "True": | |
strIndexLink = "<strong>" + strIndexLink + "</strong>" | |
strIndexHtml = strIndexHtml + strIndexLink | |
intMainIndexLinkNumber += 1 | |
# Process Subentries if available | |
for xmlIndexSubEntry in xmlIndexEntry.findall(".//EOAindexsubentry"): | |
if xmlIndexSubEntry.get('display') is not None: | |
strIndexHtml = strIndexHtml + "<p>" + xmlIndexSubEntry.get('display') + "<br/>" | |
else: | |
strIndexHtml = strIndexHtml + "<p>" + xmlIndexSubEntry.get('secondary') + "<br/>" | |
intSubIndexLinkNumber = 1 | |
for xmlIndexSublink in xmlIndexSubEntry.findall("EOAindexlink"): | |
strIndexSubLink = "<a href=\"/" + Newpublication.Serie.lower() + \ | |
"/"+ publication_number \ | |
+ "/" + xmlIndexSublink.get('chapterorder') + "/index.html#" + xmlIndexSublink.get('elementorder') + "\">" | |
strIndexSubLink = strIndexSubLink + str(intSubIndexLinkNumber) + "</a> " | |
if xmlIndexSublink.get("bold") == "True": | |
strIndexSubLink = "<b>" + strIndexSubLink + "</b>" | |
strIndexHtml = strIndexHtml + strIndexSubLink | |
intSubIndexLinkNumber += 1 | |
strIndexHtml = strIndexHtml + "</p>" | |
# Die Divs schliessen | |
strIndexHtml = strIndexHtml + "</li></ul>" | |
intIndexEntry += 1 | |
strResult += strIndexHtml | |
Newindexsection.Html = strResult | |
Newindexsection.save() | |
# def process_indexsection ends here | |
def progress(self, count, total, status=''): | |
"""Progress bar for command line. Taken from | |
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3""" | |
bar_len = 60 | |
filled_len = int(round(bar_len * count / float(total))) | |
percents = round(100.0 * count / float(total), 1) | |
bar = '#' * filled_len + '-' * (bar_len - filled_len) | |
sys.stdout.write('[%s] %s%s ... %s\r' % (bar, percents, '%', status)) | |
sys.stdout.flush() | |
# def progress ends here | |
def handle(self, **options): | |
"""The main bit.""" | |
from os import getcwd, mkdir, makedirs | |
from os.path import exists, join | |
inputDir = join( getcwd(), options['inputDir'] ) | |
self.input_dir_sanitycheck( inputDir ) | |
strDir = inputDir | |
# Get current directory, and append /import | |
# strDir = os.getcwd() + '/import' | |
# Read .cfg-File | |
cfgPublication = configparser.ConfigParser() | |
cfgPublication.read(strDir + '/publication.cfg') | |
# Setup and prepare the publication | |
Newpublication = Publication(Published=False) | |
Newpublication.Serie = cfgPublication.get('Technical', 'Serie').lower() | |
Newpublication.Number = cfgPublication.get('Technical', 'Number') | |
Newpublication.Title = cfgPublication.get('Technical', 'Title') | |
Newpublication.Subtitle = cfgPublication.get('Technical', 'Subtitle') | |
Newpublication.Datepublished = cfgPublication.get('Technical', 'PublicationDate') | |
Newpublication.Language = cfgPublication.get('Technical', 'Language') | |
Newpublication.Publicationlicense = cfgPublication.get('Technical', 'License') | |
Newpublication.Isbn = cfgPublication.get('Technical', 'ISBN') | |
Newpublication.Price = cfgPublication.get('Technical', 'Price') | |
Newpublication.Shoplink = cfgPublication.get('Technical', 'Shoplink') | |
Newpublication.Descriptionlong = cfgPublication.get('General', 'DetailedDescription') | |
Newpublication.Descriptionshort = cfgPublication.get('General', 'BriefDescription') | |
Newpublication.Submitter = cfgPublication.get('General', 'Submitter') | |
Newpublication.Editorialcoordination = cfgPublication.get('General', 'EditorialCoordination') | |
Newpublication.Copyeditor = cfgPublication.get('General', 'Copyediting') | |
Newpublication.Translator = cfgPublication.get('General', 'Translator') | |
Newpublication.Dedication = cfgPublication.get('General', 'Dedication') | |
if cfgPublication.get('Authors', 'Author1'): | |
Newpublication.Publicationauthor1 = cfgPublication.get('Authors', 'Author1') | |
if cfgPublication.get('Authors', 'Author2'): | |
Newpublication.Publicationauthor2 = cfgPublication.get('Authors', 'Author2') | |
if cfgPublication.get('Authors', 'Author3'): | |
Newpublication.Publicationauthor3 = cfgPublication.get('Authors', 'Author3') | |
if cfgPublication.get('Authors', 'Author4'): | |
Newpublication.Publicationauthor4 = cfgPublication.get('Authors', 'Author4') | |
if cfgPublication.get('Authors', 'Author5'): | |
Newpublication.Publicationauthor5 = cfgPublication.get('Authors', 'Author5') | |
if cfgPublication.get('Authors', 'Zusatz'): | |
Newpublication.Publicationauthorsuffix = cfgPublication.get('Authors', 'Zusatz') | |
# Create Media-Directory for this publication | |
serie_path = cfgPublication.get('Technical', 'Serie').lower() | |
publication_number = cfgPublication.get('Technical', 'Number') | |
if os.path.exists(str(settings.MEDIA_ROOT) + "/" + serie_path) == False: | |
os.mkdir(str(settings.MEDIA_ROOT) + "/" + serie_path) | |
os.mkdir(str(settings.MEDIA_ROOT) + "/" + serie_path + "/"+ publication_number) | |
import_files_dir = str(settings.MEDIA_ROOT) + "/" + serie_path + "/"+ publication_number + os.path.sep + "import_files" | |
strMediaDir = str(settings.MEDIA_ROOT) + "/" + serie_path + "/"+ publication_number | |
os.mkdir(strMediaDir + "/embedded") | |
# Copy and prepare Cover Images | |
shutil.copy(strDir + "/Cover.jpg", strMediaDir) | |
Newpublication.Coverbig = serie_path + "/"+ publication_number + "/Cover.jpg" | |
tmpImageObject = Image.open(str(settings.MEDIA_ROOT) + "/" + str(Newpublication.Coverbig)) | |
Newpublication.Coverbigwidth = tmpImageObject.size[0] | |
Newpublication.Coverbigheight = tmpImageObject.size[1] | |
dest_width, dest_height = self.resize_image(f"{strDir}/Cover.jpg", f"{strMediaDir}/Cover_medium.jpg", 250, "height") | |
Newpublication.Covermedium = serie_path + "/"+ publication_number + "/Cover_medium.jpg" | |
Newpublication.Covermediumwidth = dest_width | |
Newpublication.Covermediumheight = dest_height | |
# shutil.copy(strDir + "/Cover.jpg", strMediaDir + "/Cover_medium.jpg") | |
# strCommand = "/usr/bin/gm convert -resize 250x " + strMediaDir + "/Cover_medium.jpg " + strMediaDir + "/Cover_medium.jpg" | |
# listArguments = shlex.split(strCommand) | |
# subprocess.call(listArguments, shell=False) | |
# Newpublication.Covermedium = serie_path + "/"+ publication_number + "/Cover_medium.jpg" | |
# tmpImageObject = Image.open(str(settings.MEDIA_ROOT) + "/" + str(Newpublication.Covermedium)) | |
# Newpublication.Covermediumwidth = tmpImageObject.size[0] | |
# Newpublication.Covermediumheight = tmpImageObject.size[1] | |
shutil.copy(strDir + "/Cover.jpg", strMediaDir + "/Cover_small.jpg") | |
strCommand = "/usr/bin/gm convert -resize 140x " + strMediaDir + "/Cover_small.jpg " + strMediaDir + "/Cover_small.jpg" | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments, shell=False) | |
Newpublication.Coversmall = serie_path + "/"+ publication_number + "/Cover_small.jpg" | |
tmpImageObject = Image.open(str(settings.MEDIA_ROOT) + "/" + str(Newpublication.Coversmall)) | |
Newpublication.Coversmallwidth = tmpImageObject.size[0] | |
Newpublication.Coversmallheight = tmpImageObject.size[1] | |
shutil.copy(strDir + "/Cover.jpg", strMediaDir + "/Cover_smallright.jpg") | |
strCommand = "/usr/bin/gm convert -resize 180x " + strMediaDir + "/Cover_smallright.jpg " + strMediaDir + "/Cover_smallright.jpg" | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments, shell=False) | |
Newpublication.Coversmallright = serie_path + "/"+ publication_number + "/Cover_smallright.jpg" | |
tmpImageObject = Image.open(str(settings.MEDIA_ROOT) + "/" + str(Newpublication.Coversmallright)) | |
Newpublication.Coversmallrightwidth = tmpImageObject.size[0] | |
Newpublication.Coversmallrightheight = tmpImageObject.size[1] | |
# Publication is not featured by default | |
Newpublication.Featured = False | |
# Save the publication | |
Newpublication.save() | |
# Open Django.xml and process the XML | |
xmlParser = etree.XMLParser(no_network=True,load_dtd=True) | |
xmlTree = etree.parse(strDir + "/Django.xml", xmlParser) | |
xmlChapters = xmlTree.findall("EOAchapter") | |
for xmlChapter in xmlChapters: | |
Newchapter = Chapter(Publication=Newpublication) | |
Newchapter.Order = xmlChapter.get("order") | |
if xmlChapter.get("language") is not None: | |
Newchapter.Chapterlanguage = xmlChapter.get("language") | |
os.mkdir(strMediaDir + "/" + xmlChapter.get('order')) | |
if xmlChapter.get("number") != "": | |
Newchapter.Number = xmlChapter.get("number") | |
if xmlChapter.find(".//EOAparthtml") is not None: | |
xmlEOAparthtml = xmlChapter.find(".//EOAparthtml") | |
Newchapter.Chapterpart = self.process_as_string(xmlEOAparthtml) | |
xmlEOAparthtml.getparent().remove(xmlEOAparthtml) | |
if xmlChapter.find(".//EOAauthor") is not None: | |
if len(xmlChapter.findall(".//EOAauthor")) >= 1: | |
Newchapter.Chapterauthor1 = self.process_as_string(xmlChapter.findall(".//EOAauthor")[0]) | |
if len(xmlChapter.findall(".//EOAauthor")) >= 2: | |
Newchapter.Chapterauthor2 = self.process_as_string(xmlChapter.findall(".//EOAauthor")[1]) | |
if len(xmlChapter.findall(".//EOAauthor")) >= 3: | |
Newchapter.Chapterauthor3 = self.process_as_string(xmlChapter.findall(".//EOAauthor")[2]) | |
if len(xmlChapter.findall(".//EOAauthor")) >= 4: | |
Newchapter.Chapterauthor4 = self.process_as_string(xmlChapter.findall(".//EOAauthor")[3]) | |
if len(xmlChapter.findall(".//EOAauthor")) >= 5: | |
Newchapter.Chapterauthor5 = self.process_as_string(xmlChapter.findall(".//EOAauthor")[4]) | |
for xmlAuthor in xmlChapter.findall(".//EOAauthor"): | |
xmlAuthor.getparent().remove(xmlAuthor) | |
Newchapter.Title = self.process_as_string(xmlChapter.find("head")) | |
Newchapter.Chapterauthor1profile = False | |
Newchapter.Chapterauthor2profile = False | |
Newchapter.Chapterauthor3profile = False | |
Newchapter.Chapterauthor4profile = False | |
Newchapter.Chapterauthor5profile = False | |
Newchapter.save() | |
# Inline-Equation (EOAineq) need to be prepared first | |
xmlInlineEquations = xmlChapter.findall(".//img") | |
for xmlInlineEquation in xmlInlineEquations: | |
if xmlInlineEquation.get("class") == "EOAineq": | |
strEquationImage = xmlInlineEquation.get('src') | |
shutil.copy(strDir + "/images/" + strEquationImage, strMediaDir + "/" + xmlChapter.get('order') + "/") | |
strNewEquationImage = "/media/" + serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlChapter.get('order') + "/" + strEquationImage | |
xmlInlineEquation.set('src', strNewEquationImage) | |
for xmlChild in xmlChapter.iterchildren(): | |
Newelement = Element(Publication=Newpublication,Chapter=Newchapter) | |
Newelement.Indentation = False | |
print (xmlChild.tag) | |
# Process any embedded figurenonumbers yet ignoring already processed EOAineqs | |
xmlFigures = xmlChild.findall(".//img") | |
if xmlFigures is not None: | |
for xmlFigure in xmlFigures: | |
strFileName = xmlFigure.get("src") | |
if "EOAineq" in strFileName: | |
continue | |
if "EOAchem" in strFileName: | |
continue | |
if xmlFigure.getparent().tag == "td": | |
shutil.copy(strDir + "/images/" + strFileName, strMediaDir + "/" + xmlChapter.get('order') + "/") | |
strTableImage = "/media/" + serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlChapter.get('order') + "/" + strFileName | |
xmlFigure.set('src', strTableImage) | |
continue | |
shutil.copy(strDir + "/images/embedded/" + strFileName, strMediaDir + "/embedded/" + strFileName) | |
xmlFigure.set("src", "/media/" + serie_path + \ | |
"/"+ publication_number \ | |
+ "/embedded/" + strFileName) | |
if xmlChild.tag == "EOAparagraph" and xmlChild.get("rend") != "quoted": | |
Newelement.Kind = 'eoaparagraph' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Fulltext = self.process_as_string(xmlChild) | |
if xmlChild.get("style") == "box": | |
Newelement.Boxed = True | |
Newelement.save() | |
if xmlChild.tag == "EOAparagraph" and xmlChild.get("rend") == "quoted": | |
Newelement.Kind = 'eoaquotedparagraph' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Fulltext = self.process_as_string(xmlChild) | |
Newelement.save() | |
if xmlChild.tag == "EOAparagraph" and xmlChild.get("class") == "divider": | |
Newelement.Kind = 'eoadivider' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Fulltext = self.process_as_string(xmlChild) | |
Newelement.save() | |
if xmlChild.tag == "EOAsection": | |
Newelement.Kind = 'eoasection' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Fulltext = self.process_as_string(xmlChild.find('head')) | |
if xmlChild.get("number") is not None: | |
Newelement.Number = xmlChild.get("number") | |
Newelement.save() | |
if xmlChild.tag == "EOAsubsection": | |
Newelement.Kind = 'eoasubsection' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Fulltext = self.process_as_string(xmlChild.find('head')) | |
if xmlChild.get("number") is not None: | |
Newelement.Number = xmlChild.get("number") | |
Newelement.save() | |
if xmlChild.tag == "EOAsubsubsection": | |
Newelement.Kind = 'eoasubsubsection' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Fulltext = self.process_as_string(xmlChild.find('head')) | |
if xmlChild.get("number") is not None: | |
Newelement.Number = xmlChild.get("number") | |
Newelement.save() | |
if xmlChild.tag == "EOAfigure": | |
Newelement.Kind = 'eoafigure' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Number = xmlChild.get('number') | |
hielement_string = xmlChild.get('hielement') | |
if hielement_string is not None: | |
Newelement.HIElement = hielement_string | |
hi_caption = xmlChild.find('caption') | |
if hi_caption is not None: | |
Newelement.Caption = self.process_as_string(xmlChild.find('caption')) | |
else: | |
Newelement.Caption = "" | |
else: | |
Newelement.HIElement = "" | |
Newelement.Caption = self.process_as_string(xmlChild.find('caption')) | |
# Newelement.Caption = self.process_as_string(xmlChild.find('caption')) | |
# Newelement.HIElement = xmlChild.get('hielement') | |
strFigureImage = xmlChild.get('file') | |
print ("Converting Image File:") | |
print (strFigureImage) | |
shutil.copy(strDir + "/images/" + strFigureImage, strMediaDir + "/" + xmlChapter.get('order') + "/") | |
# next line from hyperimage | |
shutil.copy(strDir + "/images/" + strFigureImage, strMediaDir + "/" + xmlChapter.get('order') + "/orig" + strFigureImage) | |
strCommand = "/usr/bin/gm convert -resize 1500x900 " + strMediaDir + "/" + xmlChapter.get('order') + "/" + strFigureImage + " " + strMediaDir + "/" + xmlChapter.get('order') + "/big" + strFigureImage | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments, shell=False) | |
Newelement.Figureimagebig = serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlChapter.get('order') + "/big" + strFigureImage | |
strCommand = "/usr/bin/gm convert -resize 530x400 " + strMediaDir + "/" + xmlChapter.get('order') + "/" + strFigureImage + " " + strMediaDir + "/" + xmlChapter.get('order') + "/" + strFigureImage | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments, shell=False) | |
Newelement.Figureimagenormal = serie_path +\ | |
"/"+ publication_number\ | |
+ "/" + xmlChapter.get('order') + "/" + strFigureImage | |
Newelement.save() | |
if xmlChild.tag == "EOAfigurenonumber": | |
Newelement.Kind = 'eoafigure' | |
Newelement.Order = xmlChild.get('order') | |
strFigureImage = xmlChild.get('file') | |
shutil.copy(strDir + "/images/" + strFigureImage, strMediaDir + "/" + xmlChapter.get('order') + "/") | |
strCommand = "/usr/bin/gm convert -resize 1500x900 " + strMediaDir + "/" + xmlChapter.get('order') + "/" + strFigureImage + " " + strMediaDir + "/" + xmlChapter.get('order') + "/big" + strFigureImage | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments, shell=False) | |
Newelement.Figureimagebig = serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlChapter.get('order') + "/big" + strFigureImage | |
strCommand = "/usr/bin/gm convert -resize 530x400 " + strMediaDir + "/" + xmlChapter.get('order') + "/" + strFigureImage + " " + strMediaDir + "/" + xmlChapter.get('order') + "/" + strFigureImage | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments, shell=False) | |
Newelement.Figureimagenormal = serie_path +\ | |
"/"+ publication_number\ | |
+ "/" + xmlChapter.get('order') + "/" + strFigureImage | |
Newelement.save() | |
if xmlChild.tag == "EOAequation": | |
Newelement.Kind = 'eoaequation' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Number = xmlChild.get('number') | |
strFilename = xmlChild.get('filename') | |
shutil.copy(strDir + "/images/" + strFilename, strMediaDir + "/" + xmlChapter.get('order') + "/") | |
Newelement.Genericimage = serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlChapter.get('order') + "/" + strFilename | |
Newelement.Texcode = xmlChild.get('TeX') | |
Newelement.save() | |
if xmlChild.tag == "EOAtranscription": | |
Newelement.Kind = 'eoatranscription' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Transcriptionleftheader = self.process_as_string(xmlChild.find('Leftheader')) | |
Newelement.Transcriptionrightheader = self.process_as_string(xmlChild.find('Rightheader')) | |
Newelement.Transcriptionleftpage = self.process_as_string(xmlChild.find('EOAtranscriptionleft')) | |
Newelement.Transcriptionrightpage = self.process_as_string(xmlChild.find('EOAtranscriptionright')) | |
Newelement.save() | |
if xmlChild.tag == "EOAtheorem": | |
Newelement.Kind = 'eoatheorem' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Number = xmlChild.get('number') | |
Newelement.Theoremtitle = self.process_as_string(xmlChild.find('head')) | |
Newelement.Theoremdescription = self.process_as_string(xmlChild.find('p')) | |
Newelement.save() | |
if xmlChild.tag == "EOAdescription": | |
Newelement.Kind = 'eoadescription' | |
Newelement.Order = xmlChild.get('order') | |
xmlDescription = xmlChild.find('description') | |
Newelement.Caption = self.process_as_string(xmlDescription) | |
xmlDescription.getparent().remove(xmlDescription) | |
Newelement.Fulltext = self.process_as_string(xmlChild.find('p')) | |
Newelement.save() | |
if xmlChild.tag == "EOAletterhead": | |
Newelement.Kind = 'eoaletterhead' | |
Newelement.Order = xmlChild.get('order') | |
Newelement.Letterrecipient = self.process_as_string(xmlChild.find('Recipient')) | |
Newelement.Letteradditional = self.process_as_string(xmlChild.find('Additional')) | |
Newelement.Letterarchive = self.process_as_string(xmlChild.find('Archive')) | |
Newelement.Letterpages = self.process_as_string(xmlChild.find('Pages')) | |
Newelement.save() | |
if xmlChild.tag == "EOAtable": | |
Newelement.Kind = 'eoatable' | |
Newelement.Order = xmlChild.get('order') | |
if xmlChild.get('number'): | |
Newelement.Number = xmlChild.get('number') | |
if xmlChild.find('EOAtablecaption') is not None: | |
Newelement.Caption = self.process_as_string(xmlChild.find('EOAtablecaption')) | |
Newelement.Tablehtml = self.process_as_string(xmlChild.find('table')) | |
Newelement.Tablewidth = xmlChild.find('table').get('width') | |
Newelement.save() | |
if xmlChild.tag == "EOAfootnote": | |
Newelement.Kind = 'eoafootnote' | |
Newelement.Order = xmlChild.get('order') | |
# Copy and adjust embedded EOAequationnonumber | |
xmlEquations = xmlChild.findall(".//EOAequationnonumber") | |
for xmlEquation in xmlEquations: | |
strFilename = xmlEquation.get("filename") | |
strTexcode = xmlEquation.get("TeX") | |
xmlEquation.tag = "p" | |
del xmlEquation.attrib["TeX"] | |
del xmlEquation.attrib["filename"] | |
xmlImg = etree.Element("img") | |
shutil.copy(strDir + "/images/" + strFilename, strMediaDir + "/" + xmlChapter.get('order') + "/") | |
xmlImg.set("src", "/media/" + serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlChapter.get('order') + "/" + strFilename) | |
xmlImg.set("TeX", strTexcode) | |
xmlEquation.append(xmlImg) | |
Newelement.Number = xmlChild.get('number') | |
Newelement.Listcharacter = xmlChild.get('anchor') | |
Newelement.Fulltext = self.process_as_string(xmlChild) | |
Newelement.save() | |
if xmlChild.get("listtype") == "ordered": | |
Newelement.Indentation = True | |
Newelement.Listcharacter = xmlChild.get("listnumber") | |
Newelement.save() | |
if xmlChild.get("indent") is not None: | |
Newelement.Indentation = True | |
Newelement.save() | |
if xmlChild.findall("EOAindex") is not None: | |
xmlEOAindex = xmlTree.find(".//EOAprintindex") | |
if xmlEOAindex is not None: | |
self.process_index_entries(xmlEOAindex,xmlChild,xmlChapter,Newelement,Newpublication, "keyword") | |
if xmlChild.findall("EOAindexperson") is not None: | |
xmlEOAindex = xmlTree.find(".//EOAprintpersonindex") | |
if xmlEOAindex is not None: | |
self.process_index_entries(xmlEOAindex,xmlChild,xmlChapter,Newelement,Newpublication, "person") | |
if xmlChild.findall("EOAindexlocation") is not None: | |
xmlEOAindex = xmlTree.find(".//EOAprintlocationindex") | |
if xmlEOAindex is not None: | |
self.process_index_entries(xmlEOAindex,xmlChild,xmlChapter,Newelement,Newpublication, "location") | |
xmlFacsimileparts = xmlTree.findall("EOAfacsimilepart") | |
for xmlFacsimilepart in xmlFacsimileparts: | |
print("Process Facsimileparts") | |
Newchapter = Chapter(Publication=Newpublication) | |
Newchapter.Order = xmlFacsimilepart.get("order") | |
os.mkdir(strMediaDir + "/" + xmlFacsimilepart.get('order')) | |
Newchapter.Title = self.process_as_string(xmlFacsimilepart.find("head")) | |
Newchapter.Facsimile = True | |
Newchapter.save() | |
xmlFacsimilepages = xmlFacsimilepart.findall("EOAfacsimilepage") | |
intfacs = 1 | |
for xmlFacsimilepage in xmlFacsimilepages: | |
self.progress(intfacs, len(xmlFacsimilepages),"Processing facsimile %s of %s." % (intfacs, len(xmlFacsimilepages))) | |
Newelement = Element(Publication=Newpublication,Chapter=Newchapter) | |
Newelement.Kind = 'eoafacsimilepage' | |
Newelement.Order = xmlFacsimilepage.get('order') | |
if xmlFacsimilepage.get('pagenumber') is not None: | |
Newelement.Caption = xmlFacsimilepage.get('pagenumber') | |
strFilename = xmlFacsimilepage.get('file') | |
shutil.copy(strDir + "/images/" + strFilename, strMediaDir + "/" + xmlFacsimilepart.get('order') + "/") | |
Newelement.Figureimagebig = serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlFacsimilepart.get('order') + "/" + strFilename | |
# Process Figureimagenormal | |
strCommand = "/usr/bin/gm convert -resize 535x1700 " + strMediaDir + "/" + xmlFacsimilepart.get('order') + "/" + strFilename + " " + strMediaDir + "/" + xmlFacsimilepart.get('order') + "/normal" + strFilename | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments, shell=False) | |
Newelement.Figureimagenormal = serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlFacsimilepart.get('order') + "/normal" + strFilename | |
# Process Genericimage for Thumbnail | |
strCommand = "/usr/bin/gm convert -resize 170x300 " + strMediaDir + "/" + xmlFacsimilepart.get('order') + "/" + strFilename + " " + strMediaDir + "/" + xmlFacsimilepart.get('order') + "/tn" + strFilename | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments, shell=False) | |
Newelement.Genericimage = serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlFacsimilepart.get('order') + "/tn" + strFilename | |
# Process additional Facsimile-Information if available | |
tmpElements = xmlFacsimilepage.findall("EOAfacsimileelement") | |
if tmpElements: | |
for tmpElement in tmpElements: | |
# Fix embedded img-Tags to produce the correct link, and copy the embedded image | |
for xmlChild in tmpElement.iterdescendants(): | |
if xmlChild.tag == 'img': | |
print ("Image im Facsimile gefunden") | |
strFilename = xmlChild.get("src") | |
print (strDir + "/" + strFilename) | |
shutil.copy(strDir + "/images/" + strFilename, strMediaDir + "/" + xmlFacsimilepart.get('order') + "/") | |
strFilepath = "/media/" + serie_path + \ | |
"/"+ publication_number \ | |
+ "/" + xmlFacsimilepart.get('order') + "/" + strFilename | |
xmlChild.set('src', strFilepath) | |
if tmpElement.get('type') == 'text': | |
Newelement.Facsimiletext = self.process_as_string(tmpElement) | |
if tmpElement.get('type') == 'textPollux': | |
Newelement.Facsimilepollux = self.process_as_string(tmpElement) | |
if tmpElement.get('type') == 'xml': | |
Newelement.Facsimilexml = self.process_as_string(tmpElement) | |
Newelement.save() | |
intfacs += 1 | |
print("\n Finished processing facsimiles.") | |
xmlIndex = xmlTree.find(".//EOAprintindex") | |
if xmlIndex is not None: | |
xmlIndexsections = xmlIndex.findall(".//EOAindexsection") | |
intObjectOrder = 1 | |
intIndexEntry = 1 | |
for xmlIndexsection in xmlIndexsections: | |
self.process_indexsection(xmlIndexsection, intObjectOrder, intIndexEntry, publication_number, 'keyword', Newpublication) | |
xmlIndex = xmlTree.find(".//EOAprintpersonindex") | |
if xmlIndex is not None: | |
xmlIndexsections = xmlIndex.findall(".//EOAindexsection") | |
intObjectOrder = 1 | |
intIndexEntry = 1 | |
for xmlIndexsection in xmlIndexsections: | |
self.process_indexsection(xmlIndexsection, intObjectOrder, intIndexEntry, publication_number, 'person', Newpublication) | |
xmlIndex = xmlTree.find(".//EOAprintlocationindex") | |
if xmlIndex is not None: | |
xmlIndexsections = xmlIndex.findall(".//EOAindexsection") | |
intObjectOrder = 1 | |
intIndexEntry = 1 | |
for xmlIndexsection in xmlIndexsections: | |
self.process_indexsection(xmlIndexsection, intObjectOrder, intIndexEntry, publication_number, 'location', Newpublication) | |
shutil.copytree(strDir, import_files_dir) | |
print("Import finished") | |
# class Command ends here |