Skip to content
Permalink
6dc31b9380
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 3603 lines (3405 sloc) 176 KB
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2016-02-03 17:23:51 (kthoden)>
# CHANGES
# installed graphicsmagick: "brew install graphicsmagick"
# tralics needs to be set up
#
#
# some errors:
# [kthoden:~/EOAKram/Supersample/CONVERT] % ~/EOAKram/Probe/MPDL/Skripten/epub_test.sh EOASample.epub
# Epubcheck Version 1.0.5
# ERROR: EOASample.epub/OEBPS/chapter4.xhtml(65): unknown element "EOAindexlocation" from namespace "http://www.w3.org/1999/xhtml"
# ERROR: EOASample.epub/OEBPS/chapter5.xhtml(14): unknown element "EOAindexlocation" from namespace "http://www.w3.org/1999/xhtml"
# ERROR: EOASample.epub/OEBPS/chapter7.xhtml(17): unknown element "formula" from namespace "http://www.w3.org/1999/xhtml"
# ERROR: EOASample.epub/OEBPS/chapter7.xhtml(17): elements from namespace "http://www.w3.org/1998/Math/MathML" are not allowed
# ERROR: EOASample.epub/OEBPS/chapter8.xhtml(13): unknown element "EOAtocentry" from namespace "http://www.w3.org/1999/xhtml"
# ERROR: EOASample.epub/OEBPS/chapter11.xhtml(9): unknown element "EOAtocentry" from namespace "http://www.w3.org/1999/xhtml"
# ERROR: EOASample.epub/OEBPS/chapter11.xhtml(10): unknown element "EOAtocentry" from namespace "http://www.w3.org/1999/xhtml"
# ERROR: EOASample.epub/OEBPS/chapter11.xhtml(10): unknown element "EOAprintlocationindex" from namespace "http://www.w3.org/1999/xhtml"
# license?
#
# Also add EOAparagraph
from optparse import OptionParser
from lxml import etree
from lxml import objectify
from copy import deepcopy
from copy import copy
from EOAbibitem import Bibitem
import glob
import os
import re
import string
import shlex
import subprocess
import sys
import shutil
import time
import configparser
# TODO 2: Einfache URL bei einer Webseite noch fixen, siehe Manzer-Kapitel [1] in Proceedings 2, genauer: webpage noch als Typ einfügen
# Paths to executables
GM_PATH = "/usr/local/bin/gm"
TL_PATH = "/usr/local/texlive/2013/"
TRALICS_PATH_EXEC = "/Users/kthoden/src/tralics-2.15.2/src/tralics"
TRALICS_PATH_LIB = "/Users/kthoden/EOAKram/Probe/MPDL/tralics"
PDFTK_PATH = "/usr/local/bin/pdftk"
SUPPORT_TEMPLATE_PATH = "/Users/kthoden/EOAKram/Probe/MPDL/"
# curl als gegeben voraussetzen?
interimResult = ""
###############################################################
# Certain functions for specific tasks
###############################################################
# Maintain text and strip subchildren
def gettext(xmlElement):
xmlText = xmlElement.text or ""
for xmlChild in xmlElement:
xmlText += gettext(xmlChild)
if xmlChild.tail:
xmlText += xmlChild.tail
return xmlText
# include all subelements
def getchildren(xmlElement):
1 + 1
return xmlElement
# Adjust and convert image for epub standard
def sanitizeImageEpub(strImagepath):
print (strImagepath)
strCommand = GM_PATH + " identify -format \"%w\" " + strImagepath
listArguments = shlex.split(strCommand)
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
intImageWidth = int(exeShell)
if intImageWidth > 1500:
strCommand = GM_PATH + " convert " + strImagepath + " -resize 1500x\\> " + strImagepath
listArguments = shlex.split(strCommand)
subprocess.check_output(listArguments, shell=False)
strCommand = GM_PATH + " identify -format \"%h\" " + strImagepath
listArguments = shlex.split(strCommand)
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
intImageHeight = int(exeShell)
if intImageHeight > 2000:
strCommand = GM_PATH + " convert " + strImagepath + " -resize x2000\\> " + strImagepath
listArguments = shlex.split(strCommand)
subprocess.check_output(listArguments, shell=False)
strCommand = GM_PATH + " identify -format \"%m\" " + strImagepath
listArguments = shlex.split(strCommand)
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
strFileFormat = str(exeShell)
strFileFormat = strFileFormat.strip()
if strFileFormat == "PNG":
strNewImagepath = os.path.splitext(strImagepath)[0]
strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg"
listArguments = shlex.split(strCommand)
subprocess.call(listArguments)
os.remove(strImagepath)
strImagepath = strNewImagepath + ".jpg"
print ("Hier ein Pfad zu einem Bild:")
print (strImagepath)
return strImagepath
# Function to render LaTeX-Code into PNG-Files, returns PNG-Filename (epub & django)
def TeX2PNG(LaTeXCode, Type, Chapter, Number):
# Dictionary contains Type:begin/end
Types = {
"EOAineq" : ["$", "$"],
"EOAequation" : ["\\begin{equation*}", "\\end{equation*}"],
"EOAequationnonumber" : ["\\begin{equation*}", "\\end{equation*}"],
"EOAequationarray" : ["\\begin{align*}", "\\end{align*}"],
"EOAequationarraynonumber" : ["\\begin{align*}", "\\end{align*}"]
}
LaTeXCode = Types[Type][0] + LaTeXCode + Types[Type][1]
dictRebindedCommands = {
"\|ket\|" : r"\\ket",
"\|braket\|" : r"\\braket",
"\|bra\|" : r"\\bra",
"\|Bra\|" : r"\\Bra",
"\|Ket\|" : r"\\Ket",
"\slashed\|" : r"\\slashed"
}
for strCommand in dictRebindedCommands.keys():
#LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode)
LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode)
#print (LaTeXCode)
#return LaTeXCode
# Open plain LaTeX-Template
tmp = open(SUPPORT_TEMPLATE_PATH + "Templates/Formel.tex", "r")
Template = tmp.read()
tmp.close()
# Get tmp-directory for this user account
tmpDir = os.getenv("TMPDIR")
# Make directory items if it doesn't already exist
if os.path.exists(os.getcwd() + "/items") == False:
os.mkdir(os.getcwd() + "/items")
s = string.Template(Template)
e = s.substitute(DERINHALT=LaTeXCode)
tmpFile = tmpDir + Type + "_" + str(Chapter) + "_" + str(Number) + ".tex"
tmp = open(tmpFile, "w")
tmp.write(e)
tmp.close()
Kommando = "/usr/texbin/xelatex --halt-on-error " + tmpFile
Argumente = shlex.split(Kommando)
# Redirecting stderr to save XeLaTeX-Output
Datei = open('Test.txt', 'w')
Ergebnis = subprocess.call(Argumente,cwd=tmpDir,stdout=Datei)
if Ergebnis == 0:
print ("Konvertierung folgender Formel ist erfolgreich: " + Type + str(Chapter) + "_" + str(Number))
if Ergebnis == 1:
print ("Konvertierung folgender Formel ist fehlgeschlagen: " + Type + str(Chapter) + "_" + str(Number))
Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + tmpDir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + tmpDir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=tmpDir,stdout=Datei)
Kommando = GM_PATH + " convert -density 144 " + tmpDir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=tmpDir,stdout=Datei)
return LaTeXCode
# Function to create a complete Entry of a publication (epub & django) for author-year citation
def createBibEntryAuthorYear(bibEntry, boolSameAuthor):
strBibEntry = ""
if boolSameAuthor == False:
strAuthor = bibEntry.fullauthorlastfirst()
if boolSameAuthor == True:
strAuthor = "-"
if bibEntry.entrytype() == "book":
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i>." + str(bibEntry.location()) + "."
if bibEntry.entrytype() == "booklet":
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i>." + str(bibEntry.location()) + "."
if bibEntry.entrytype() == "report":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>."
if bibEntry.entrytype() == "thesis":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>." + bibEntry.thesistype() + bibEntry.institution()
if bibEntry.entrytype() == "misc":
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + ") <i>" + str(bibEntry.title()) + "</i>."
if bibEntry.entrytype() == "incollection":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + "." + bibEntry.booktitle() + bibEntry.editor() + bibEntry.series() + bibEntry.location() + bibEntry.pages()
if bibEntry.entrytype() == "inproceedings":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + "." + bibEntry.booktitle() + bibEntry.editor() + bibEntry.series() + bibEntry.location() + bibEntry.pages()
if bibEntry.entrytype() == "article":
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). " + str(bibEntry.title()) + "." + str(bibEntry.journaltitle()) + bibEntry.volumenumberpages()
if bibEntry.entrytype() == "newspaper":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>."
return strBibEntry
# Function to create a complete Entry of a publication (epub & django) for numeric citation
def createBibEntryNumeric(bibEntry):
strBibEntry = ""
strAuthor = bibEntry.fullauthorfirstlast()
if bibEntry.entrytype() == "book":
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>." + bibEntry.location() + ", " + bibEntry.year()
if bibEntry.entrytype() == "booklet":
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.howpublished() + ". " + bibEntry.location() + ", " + bibEntry.year()
if bibEntry.entrytype() == "report":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>."
if bibEntry.entrytype() == "thesis":
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.thesistype() + bibEntry.institution() + ", " + bibEntry.year()
if bibEntry.entrytype() == "misc":
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". "
if bibEntry.entrytype() == "incollection":
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + bibEntry.editor() + bibEntry.location() + ", " + bibEntry.year() + ". " + bibEntry.pages() + "."
if bibEntry.entrytype() == "inproceedings":
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". " + bibEntry.volumenumeric() + bibEntry.year() + ". " + bibEntry.pages() + "."
if bibEntry.entrytype() == "article":
strBibEntry = strAuthor + ". " + bibEntry.title() + "<i>" + bibEntry.journaltitle() + "</i> " + bibEntry.volumenumberpages() + " (" + bibEntry.year() + "):" + bibEntry.pages() + "."
if bibEntry.entrytype() == "newspaper":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>."
return strBibEntry
# Function to add Elements to Content-OPF (epub)
def addToContentopf(contentopf, Filename, FileID, Mediatype):
global listContentopf
# Sanitizing FileID, id-attribute may not contain _ : or /
# FileID may also not start with a number
FileID = re.sub("\_", "", FileID)
FileID = re.sub("\.", "", FileID)
FileID = re.sub("\/", "", FileID)
FileID = re.sub("^[0-9]", "", FileID)
FileID = re.sub("^[0-9]", "", FileID)
FileID = re.sub("^[0-9]", "", FileID)
if FileID in listContentopf:
return contentopf
else:
# Sanitizing FileID, id-attribute may not contain _ : or /
# FileID may also not start with a number
FileID = re.sub("\_", "", FileID)
FileID = re.sub("\.", "", FileID)
FileID = re.sub("\/", "", FileID)
FileID = re.sub("^[0-9]", "", FileID)
FileID = re.sub("^[0-9]", "", FileID)
FileID = re.sub("^[0-9]", "", FileID)
dictMediatypes = {
"xml" : "application/xhtml+xml",
"jpg" : "image/jpeg",
"png" : "image/png"
}
contentopfns = "{http://www.idpf.org/2007/opf}"
xmlManifest = contentopf.find(".//" + contentopfns + "manifest")
xmlItem = etree.Element("item")
xmlItem.set("id", FileID)
xmlItem.set("media-type", dictMediatypes[Mediatype])
xmlItem.set("href", Filename)
xmlManifest.append(xmlItem)
# if it's a XML-File also extent <spine>
if Mediatype == "xml":
xmlSpine = contentopf.find(".//" + contentopfns + "spine")
xmlItemref = etree.Element("itemref")
xmlItemref.set("idref", FileID)
xmlSpine.append(xmlItemref)
listContentopf.append(FileID)
return contentopf
# Function to add Chapters to Table of Contents (epub)
def addToTocncx(tocncx, Label, intTechnicalChapterNumber):
tocncxns = "{http://www.daisy.org/z3986/2005/ncx/}"
xmlNavMap = tocncx.find(".//" + tocncxns + "navMap")
xmlNavPoint = etree.Element("navPoint")
xmlNavPoint.set("playOrder", str(intTechnicalChapterNumber + 1))
xmlNavPoint.set("id", "chapter" + str(intTechnicalChapterNumber))
xmlNavLabel = etree.Element("navLabel")
xmlNavLabelText = etree.Element("text")
xmlNavLabelText.text = Label
xmlNavLabel.append(xmlNavLabelText)
xmlNavPoint.append(xmlNavLabel)
xmlContent = etree.Element("content")
xmlContent.set("src", "chapter" + str(intTechnicalChapterNumber) + ".xhtml")
xmlNavPoint.append(xmlContent)
xmlNavMap.append(xmlNavPoint)
return tocncx
# Remove Support Files
def cleanup():
try:
os.remove((os.getcwd() + "/classes.dtd"))
os.remove((os.getcwd() + "/mathml2-qname-1.mod"))
os.remove((os.getcwd() + "/mathml2.dtd"))
shutil.rmtree((os.getcwd() + "/html"))
shutil.rmtree((os.getcwd() + "/iso8879"))
shutil.rmtree((os.getcwd() + "/iso9573-13"))
shutil.rmtree((os.getcwd() + "/mathml"))
shutil.rmtree((os.getcwd() + "/mathml2"))
except:
print ("Keine Temporaeren Dateien")
###############################################################
# Preperation of certain files and some checks in advance
###############################################################
# Setup of various dictionaries for localization of various elements
dictLangFootnotes = {"english" : "Footnotes", "italian" : "Note a piè pagina", "french" : "notes en bas de page", "german" : "Fußnoten"}
# Options for the command line: filename / configfile
parser = OptionParser()
parser.add_option("-f", "--file", dest="filename",
help="Name of XML-File", metavar="FILE")
parser.add_option("-c", "--config", dest="configfile",
help="Name of Configuration-File", metavar="CONFIGURATION")
parser.add_option("-t", "--trash", dest="helpfiles",
help="Trash temporary files")
(options, args) = parser.parse_args()
# Check for folder and necessary files
if os.path.exists(os.getcwd() + "/CONVERT") == False:
print ("Das notwendige Verzeichnis CONVERT wurde noch nicht erstellt.")
sys.exit()
if os.path.exists(os.getcwd() + "/CONVERT/cover.jpg") == False:
print ("Die Datei cover.jpg im Verzeichnis CONVERT fehlt.")
sys.exit()
if os.path.exists(os.getcwd() + "/CONVERT/publication.cfg") == False:
print ("Die Datei publication.cfg im Verzeichnis CONVERT fehlt.")
sys.exit()
# Remove temporary files, neccessary for troubleshooting
if options.helpfiles == "temp":
cleanup()
sys.exit()
# Copy Support-Files from /Library/MPIWG to current directory
shutil.copy(SUPPORT_TEMPLATE_PATH + "Support/classes.dtd", os.getcwd())
shutil.copy(SUPPORT_TEMPLATE_PATH + "Support/mathml2-qname-1.mod", os.getcwd())
shutil.copy(SUPPORT_TEMPLATE_PATH + "Support/mathml2.dtd", os.getcwd())
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/html", (os.getcwd() + "/html"))
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/iso8879", (os.getcwd() + "/iso8879"))
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/iso9573-13", (os.getcwd() + "/iso9573-13"))
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/mathml", (os.getcwd() + "/mathml"))
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/mathml2", (os.getcwd() + "/mathml2"))
##############################################################
# Preparing the main document #
##############################################################
"""
Der Aufruf von tralics lautet:
/Library/MPIWG/Skripten/tralics -confdir /Library/MPIWG/tralics/tralics_conf -config /Library/MPIWG/tralics/tralics.tcf -utf8 -utf8output Vorlage2012.tex
"""
# Convert TeX to XML via Tralics
#Kommando = "/Library/MPIWG/Skripten/tralics -confdir /Library/MPIWG/tralics/tralics_conf -config /Library/MPIWG/tralics/tralics.tcf -utf8 -utf8output " + options.filename + ".tex"
Kommando = "%s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output %s.tex" % (TRALICS_PATH_EXEC, TRALICS_PATH_LIB, TRALICS_PATH_LIB, options.filename)
Argumente = shlex.split(Kommando)
Prozess = subprocess.call(Argumente)
# Fix underscore und fix EOAtranscripted
tmpFile = open ((options.filename) + ".xml", "r")
tmpText = tmpFile.read()
tmpFile.close()
tmpText = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpText)
tmpText = re.sub(r"<error n='\\par' l='(.*?)' c='Invalid \\par command: paragraph not started'/>", "", tmpText)
tmpFile = open ((options.filename) + ".xml", "w")
tmpFile.write(tmpText)
tmpFile.close()
# Complete XML-Document in xmlTree
xmlParser = etree.XMLParser(no_network=False,load_dtd=True) #resolve_entities=False
xmlTree = etree.parse((options.filename + ".xml"), xmlParser)
xmlChapters = xmlTree.findall("//div1")
# Cleanup of not needed tags in advance. To be cleaned: <error>
etree.strip_elements(xmlTree, with_tail=False, *['error'])
print ("-----------------------------------------------------")
print ("Move EOAlanguage from <head> into attribute of EOAchapter")
for xmlChapter in xmlChapters:
xmlLanguage = xmlChapter.find(".//EOAlanguage")
if xmlLanguage is not None:
strLanguage = xmlLanguage.text or "english"
xmlChapter.set("language", strLanguage)
xmlLanguage.text = None
print (strLanguage)
xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage")
##############################################################
# Numbering and Typesetting various Elements #
##############################################################
# Figure out how to number (like essay or regular)
strSerie = xmlTree.find(".//EOAseries").text or "regular"
if strSerie == "Essay":
strNumberingType = "essay"
else:
strNumberingType = "regular"
# Dictionaries containing UIDs and Numbers
dictChapters = {}
dictFigures = {}
dictEquations = {}
dictSections = {}
dictFootnotes = {}
dictPagelabels = {}
dictTables = {}
dictLists = {}
dictTheorems = {}
print ("-----------------------------------------------------")
print ("Numbering Chapters")
Chapternumber = 1
for xmlChapter in xmlChapters:
if xmlChapter.get('rend') != "nonumber":
Chapteruid = xmlChapter.get('id')
dictChapters[Chapteruid] = str(Chapternumber)
Chapternumber += 1
# EOAequation, EOAsubequation and EOAequationarray Numbering per Chapter
intChapterNumber = 1
for xmlChapter in xmlChapters:
intEquationnumber = 1
xmlDinge = xmlChapter.xpath(".//EOAequation | .//EOAequationarray | .//EOAsubequations")
print ("-----------------------------------------------------")
print ("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations")
print ("Working on Chapter " + str(intChapterNumber))
print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden")
for xmlDing in xmlDinge:
if xmlDing.tag == "EOAequationarray":
# tmpNumberinArray is only being used for filename
tmpNumberinArray = intEquationnumber
# tmpDictNumberLabel used to insert the attribute value into <EOAequation>
tmpDictNumberLabel = {}
# Numbering is being done by <mtr>-Tags
xmlMathmlrows = xmlDing.findall(".//{http://www.w3.org/1998/Math/MathML}mtr")
for xmlMathmlrow in xmlMathmlrows:
if "Label" in xmlMathmlrow.attrib:
# Label dem Dictionary für die Euqations hinzufügen
if xmlChapter.get("rend") != "nonumber":
dictEquations[xmlMathmlrow.get("Label")] = str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)
tmpDictNumberLabel[str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)] = xmlMathmlrow.get("Label")
if xmlChapter.get("rend") == "nonumber":
dictEquations[xmlMathmlrow.get("Label")] = str(intEquationnumber)
tmpDictNumberLabel[str(intEquationnumber)] = xmlMathmlrow.get("Label")
intEquationnumber += 1
xmlRohTeX = xmlDing.find(".//texmath")
xmlNew = etree.Element('EOAequationarray')
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in xmlRohTeX.text.splitlines() if s])
# \rowattributeunknown has to be deleted, its an artefact
textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode)
# Push Down loop to parse the raw code
textFormel = ""
boolBackslash = False
for Buchstabe in textSourcecode:
if Buchstabe == "\n":
continue
if Buchstabe == "\\":
if boolBackslash == False:
textFormel += Buchstabe
boolBackslash = True
continue
if boolBackslash == True:
textFormel += Buchstabe
strLaTeXCode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray))
if xmlChapter.get("rend") != "nonumber":
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)))
if xmlChapter.get("rend") == "nonumber":
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray)))
tmpXML.set("TeX", strLaTeXCode)
# Put Label into EOAequation
if xmlChapter.get("rend") != "nonumber":
strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)
if xmlChapter.get("rend") == "nonumber":
strTempKey = str(tmpNumberinArray)
if strTempKey in tmpDictNumberLabel:
#tmpXML.set("label", tmpDictNumberLabel[(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))])
tmpXML.set("label", tmpDictNumberLabel[strTempKey])
xmlNew.append(tmpXML)
textFormel = ""
boolBackslash = False
tmpNumberinArray += 1
continue
if Buchstabe != "\\":
textFormel += Buchstabe
boolBackslash = False
# Typeset last equation
strLaTeXCode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray))
if xmlChapter.get("rend") != "nonumber":
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(dictChapters[xmlChapter.get('id')] + "." + str(tmpNumberinArray)))
if xmlChapter.get("rend") == "nonumber":
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray)))
tmpXML.set("TeX", strLaTeXCode)
# Put Label into EOAequation
if xmlChapter.get("rend") != "nonumber":
strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)
if xmlChapter.get("rend") == "nonumber":
strTempKey = str(tmpNumberinArray)
if strTempKey in tmpDictNumberLabel:
print (strTempKey)
print (tmpDictNumberLabel)
print (dictChapters)
tmpXML.set("label", tmpDictNumberLabel[strTempKey])
xmlNew.append(tmpXML)
xmlDing.getparent().replace(xmlDing, xmlNew)
# enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on
#xmlNew.getparent().tag = "temp"
continue
if xmlDing.tag == "EOAsubequations":
# Enclosing <p>-Tag of the EOAsubequations needs to be removed
xmlDing.getparent().tag = "temp"
xmlSubequations = xmlDing.findall('.//EOAequation')
listCharacters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
tmpI = 0
# Insert Number of this Subequation into dictEquations
xmlAnchor = xmlDing.find(".//anchor")
print (xmlAnchor)
if xmlChapter.get("rend") != "nonumber":
dictEquations[xmlAnchor.get('id')] = dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber)
if xmlChapter.get("rend") == "nonumber":
dictEquations[xmlAnchor.get('id')] = str(intEquationnumber)
# Delete anchor
xmlAnchor.getparent().remove(xmlAnchor)
for xmlSubequation in xmlSubequations:
# Enclosing <p>-Tag of the EOAsubequation needs to be removed
#xmlSubequation.getparent().tag = "temp"
# Numbering Subequations with characters
strSubequationNumber = str(intEquationnumber) + listCharacters[tmpI]
tmpI += 1
textSourcecode = xmlSubequation.find('.//texmath').text
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
strLaTeXCode = TeX2PNG(textSourcecode, "EOAequation", str(intChapterNumber), strSubequationNumber)
xmlAnchor = xmlSubequation.find(".//anchor")
# Clear Equation
xmlSubequation.clear()
if xmlChapter.get("rend") != "nonumber":
xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png")
xmlSubequation.set("number", dictChapters[xmlChapter.get('id')] + "." + strSubequationNumber)
xmlSubequation.set("uid", xmlAnchor.get('id'))
if xmlChapter.get("rend") == "nonumber":
xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png")
xmlSubequation.set("number", strSubequationNumber)
xmlSubequation.set("uid", xmlAnchor.get('id'))
xmlSubequation.set("id", xmlAnchor.get('id'))
xmlSubequation.set("TeX", strLaTeXCode)
# Insert Number of this Equation into dictEquations
if strNumberingType == "regular":
dictEquations[xmlAnchor.get('id')] = str(dictChapters[xmlChapter.get('id')]) + "." + strSubequationNumber
if strNumberingType == "essay":
dictEquations[xmlAnchor.get('id')] = strSubequationNumber
# TODO: Anchor direkt unter Subequation aufheben, und der ersten Equation zuordnen, so dass auf 8.16 bei 8.16a und 8.16b verlinkt werden kann
xmlDing.tag = "temp"
# enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on
#xmlDing.getparent().tag = "temp"
intEquationnumber += 1
continue
if xmlDing.tag == "EOAequation":
# Check, if Equation has already been found in a Subeqation
xmlAnchor = xmlDing.find("anchor")
if xmlAnchor == None:
continue
if xmlAnchor.get('id') in dictEquations:
continue
if xmlDing.find('.//texmath') is not None:
textSourcecode = xmlDing.find('.//texmath').text
else:
textSourcecode = xmlDing.text
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
strLaTeXCode = TeX2PNG(textSourcecode, "EOAequation", intChapterNumber, intEquationnumber)
#print ("Got:")
#print (strLaTeXCode)
if xmlChapter.get("rend") != "nonumber":
xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png")
xmlDing.set("number", dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber))
xmlDing.set("uid", xmlAnchor.get('id'))
if xmlChapter.get("rend") == "nonumber":
xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png")
xmlDing.set("number", str(intEquationnumber))
xmlDing.set("uid", xmlAnchor.get('id'))
xmlDing.set("id", xmlAnchor.get('id'))
xmlDing.set("TeX", strLaTeXCode)
#xmlDing.getparent().replace(xmlDing, xmlNew)
# Insert Number of this Equation into dictEquations
if strNumberingType == "regular":
dictEquations[xmlAnchor.get('id')] = \
str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)
if strNumberingType == "essay":
dictEquations[xmlAnchor.get('id')] = str(intEquationnumber)
intEquationnumber += 1
continue
intChapterNumber += 1
intChapterNumber = 1
for xmlChapter in xmlChapters:
tempImagenumber = 1
xmlDinge = xmlChapter.xpath(".//EOAequationnonumber | .//EOAequationarraynonumber")
print ("-----------------------------------------------------")
print ("Processing .//EOAequationnonumber | .//EOAequationarraynonumber")
print ("Working on Chapter " + str(intChapterNumber))
print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden")
for xmlDing in xmlDinge:
if xmlDing.tag == "EOAequationarraynonumber":
if xmlDing.find(".//texmath") is not None:
textSourcecode = xmlDing.find(".//texmath").text
else:
textSourcecode = xmlDing.text
xmlNew = etree.Element('EOAequationarraynonumber')
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
# \rowattributeunknown has to be deleted, its an artefact
textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode)
# TODO: HIer überprüfen, ob und inwiefern es ausreichend ist, EOAequationarraynonumber in eine Grafik zu packen
strLateXCode = TeX2PNG(textSourcecode, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
xmlNew.set("TeX", strLaTeXCode)
xmlDing.getparent().replace(xmlDing, xmlNew)
tempImagenumber += 1
continue
# Push Down loop to parse the raw code (Wird vorerst nicht ausgeführt)
textFormel = ""
boolBackslash = False
for Buchstabe in textSourcecode:
if Buchstabe == "\n":
continue
if Buchstabe == "\\":
if boolBackslash == False:
textFormel += Buchstabe
boolBackslash = True
continue
if boolBackslash == True:
textFormel += Buchstabe
strLaTeXCode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
tmpXML.set("TeX", strLaTeXCode)
xmlNew.append(tmpXML)
textFormel = ""
boolBackslash = False
tempImagenumber += 1
continue
if Buchstabe != "\\":
textFormel += Buchstabe
boolBackslash = False
# Typeset last equation
strLaTeXCode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
tmpXML.set("TeX", strLaTeXCode)
xmlNew.append(tmpXML)
xmlDing.getparent().replace(xmlDing, xmlNew)
continue
if xmlDing.tag == "EOAequationnonumber":
textSourcecode = xmlDing.find('.//texmath').text
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
strLaTeXCode = TeX2PNG(textSourcecode, "EOAequationnonumber", str(intChapterNumber), tempImagenumber)
# TODO: HTML-Code für das fertige Bild einfügen (Ist dieser ToDo noch aktuell?)
xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationnonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
xmlNew.set("TeX", strLaTeXCode)
xmlDing.getparent().replace(xmlDing, xmlNew)
tempImagenumber += 1
continue
intChapterNumber += 1
print ("-----------------------------------------------------")
print ("New Function to convert EOAineq")
intChapterNumber = 1
intEOAineqRunningOrder = 1
dictEOAineqs = {}
strTeXEquations = ""
for xmlChapter in xmlChapters:
print ("Chapter " + str(intChapterNumber))
xmlEOAineqs = xmlChapter.findall(".//EOAineq")
intEOAineqnumber = 1
for xmlEOAineq in xmlEOAineqs:
if xmlEOAineq.find('.//texmath') is not None:
strSourceCode = xmlEOAineq.find('.//texmath').text
else:
strSourceCode = xmlEOAineq.text
strSourceCode = os.linesep.join([s for s in strSourceCode.splitlines() if s])
strTeXEquations = strTeXEquations + "$" + strSourceCode + "$\n\\newpage\n"
# Add intEOAineqRunningOrder : Filename to dictionary
strFilename = "EOAineq_" + str(intChapterNumber) + "_" + str(intEOAineqnumber)
dictEOAineqs[intEOAineqRunningOrder] = strFilename
# Prepare XML
tmpTail = xmlEOAineq.tail
xmlEOAineq.clear()
xmlEOAineq.tail = tmpTail
xmlEOAineq.set("src", strFilename + ".png")
xmlEOAineq.set("TeX", strSourceCode)
# increment integers
intEOAineqRunningOrder += 1
intEOAineqnumber +=1
intChapterNumber += 1
dictRebindedCommands = {
"\|ket\|" : r"\\ket",
"\|braket\|" : r"\\braket",
"\|bra\|" : r"\\bra",
"\|Bra\|" : r"\\Bra",
"\|Ket\|" : r"\\Ket",
"\slashed\|" : r"\\slashed"
}
for strCommand in dictRebindedCommands.keys():
strTeXEquations = re.sub(strCommand, dictRebindedCommands[strCommand], strTeXEquations)
tmp = open(SUPPORT_TEMPLATE_PATH + "Templates/Formel.tex", "r")
Template = tmp.read()
tmp.close()
# Get tmp-directory for this user account
tmpDir = os.getenv("TMPDIR")
# Make directory items if it doesn't already exist
if os.path.exists(os.getcwd() + "/items") == False:
os.mkdir(os.getcwd() + "/items")
s = string.Template(Template)
e = s.substitute(DERINHALT=strTeXEquations)
tmpFile = tmpDir + "EOAinline.tex"
tmp = open(tmpFile, "w")
tmp.write(e)
tmp.close()
print ("Typesetting all Inline Equations")
Kommando = "/usr/texbin/xelatex --halt-on-error " + tmpFile
Argumente = shlex.split(Kommando)
Datei = open('Test.txt', 'w')
Ergebnis = subprocess.call(Argumente,cwd=tmpDir,stdout=Datei)
print ("Splitting all Inline Equations")
Kommando = PDFTK_PATH + " EOAinline.pdf burst output EOAineq_%d.pdf"
Argumente = shlex.split(Kommando)
Ergebnis = subprocess.call(Argumente,cwd=tmpDir)
print ("Converting %s splitted pages into PNG-Images" % len(dictEOAineqs.keys()))
counter_dictEOAineqs = 1
for intRunningOrder in dictEOAineqs.keys():
# provide more status information here in output!
print("Image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys())))
Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + tmpDir + "EOAineq_" + str(intRunningOrder) + ".pdf " + tmpDir + dictEOAineqs[intRunningOrder] + ".pdf"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=tmpDir,stdout=Datei)
Kommando = GM_PATH + " convert -density 144 " + tmpDir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=tmpDir,stdout=Datei)
counter_dictEOAineqs += 1
print ("-----------------------------------------------------")
print ("EOAFigure Numbering per Chapter")
for xmlChapter in xmlChapters:
Figurenumber = 1
xmlFigures = xmlChapter.xpath(".//EOAfigure | .//EOAlsfigure")
for xmlFigure in xmlFigures:
xmlAnchor = xmlFigure.find("anchor")
# Check if Figure is in a numbered Chapter
# Otherwise just put the Number of the figure
if xmlChapter.get('id'):
dictFigures[xmlAnchor.get('id')] = \
str(dictChapters[xmlChapter.get('id')]) + "." + str(Figurenumber)
else:
dictFigures[xmlAnchor.get('id')] = str(Figurenumber)
xmlFigure.set("id", xmlAnchor.get("id"))
Figurenumber += 1
print ("-----------------------------------------------------")
print ("Numbering Theorems")
for xmlChapter in xmlChapters:
xmlTheorems = xmlChapter.findall(".//theorem")
for xmlTheorem in xmlTheorems:
strUID = xmlTheorem.get("id")
strNumber = xmlTheorem.get("id-text")
dictTheorems[strUID] = strNumber
print ("-----------------------------------------------------")
print ("Section, Subsection,... Numbering per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
strUID = xmlChapter.get("id")
#dictChapters[strUID] = str(intChapterNumber)
xmlSections = xmlChapter.findall("div2")
intSectionNumber = 1
for xmlSection in xmlSections:
if xmlSection.get("rend") == "nonumber":
continue
strUID = xmlSection.get("id")
if xmlChapter.get("rend") != "nonumber":
dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber)
if xmlChapter.get("rend") == "nonumber":
dictSections[strUID] = str(intSectionNumber)
xmlSubsections = xmlSection.findall("div3")
intSubsectionNumber = 1
for xmlSubsection in xmlSubsections:
if xmlSubsection.get("rend") == "nonumber":
continue
strUID = xmlSubsection.get("id")
if xmlChapter.get("rend") != "nonumber":
dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) + "." + str(intSubsectionNumber)
if xmlChapter.get("rend") == "nonumber":
dictSections[strUID] = str(intSectionNumber) + "." + str(intSubsectionNumber)
intSubsectionNumber += 1
intSectionNumber += 1
if xmlChapter.get("rend") != "nonumber":
intChapterNumber += 1
print ("-----------------------------------------------------")
print ("Numbering of Footnotes per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
intNoteNumber = 1
xmlFootnotes = xmlChapter.findall(".//note")
for xmlFootnote in xmlFootnotes:
strUID = xmlFootnote.get("id")
dictFootnotes[strUID] = str(intNoteNumber)
intNoteNumber += 1
# the new-style footnotes that use LaTeX bigfoot show up in the following order:
footnote_groups = ["decimal", "lower-latin"]
def get_bigfoot_data(chapter):
"""
footnotes are per-chapter
footnote numbers reset for each chapter
this helper takes a chapter and returns a collection containing its new-style footnotes that use LaTeX bigfoot
the result is an association list: a list of key-value pairs
the values are, for each type of footnote, a list of the footnotes of that type, in the order in which they appear in the chapter
"""
xmlBigfootNotes = list(chapter.findall(".//EOAbigfoot"))
return [ # a list
( # of tuples
grouping, # the key
[ # the value: a filter of the above list
note
for note
in xmlBigfootNotes
if grouping == note.get("list-style-type")
],
)
for grouping
in footnote_groups # the types we support
]
print ("-----------------------------------------------------")
print ("Numbering of Lists per Chapter")
for xmlChapter in xmlChapters:
xmlListitems = xmlChapter.findall(".//item")
for xmlListitem in xmlListitems:
strUID = xmlListitem.get("id")
strItemNumber = xmlListitem.get("id-text")
dictLists[strUID] = strItemNumber
print ("-----------------------------------------------------")
print ("Working on Page Numbers for References")
listAuxFiles = glob.glob(os.getcwd() + "/*.aux")
for strFile in listAuxFiles:
tmpFile = open(strFile, "r")
lines = tmpFile.readlines()
tmpFile.close()
for line in lines:
matchObjectLabel = re.match(r'\\newlabel\{(.*?)\}', line)
if matchObjectLabel:
matchObjectPage = re.match(r'(.*?)\}\{(\d{1,})\}\}$', line)
if matchObjectPage:
dictPagelabels[matchObjectLabel.group(1)] = matchObjectPage.group(2)
print ("-----------------------------------------------------")
print ("Numbering of Tables per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
intTableNumber = 1
xmlTables = xmlChapter.findall(".//EOAtable")
for xmlTable in xmlTables:
xmlTableLabel = xmlTable.find(".//EOAtablelabel")
strTableCaption = xmlTable.find(".//EOAtablecaption").text
if strTableCaption == "nonumber":
continue
if not xmlTableLabel.text or xmlTableLabel.text == "":
xmlTableLabel.text = "table" + str(intChapterNumber) + str(intTableNumber)
strUID = xmlTableLabel.text
print (strUID)
if xmlChapter.get("rend") != "nonumber":
dictTables[strUID] = dictChapters[xmlChapter.get('id')] + "." + str(intTableNumber)
if xmlChapter.get("rend") == "nonumber":
dictTables[strUID] = str(intTableNumber)
intTableNumber += 1
print (dictTables)
intChapterNumber += 1
##############################################################
# Preparing the Bibliography #
##############################################################
# Copy interim .bbl-File to interim bib.tex file
shutil.copy((options.filename) + ".bbl", ((options.filename) + "bib.tex"))
# Read all lines of Bibliographic TeX
tmpFile = open (((options.filename) + "bib.tex"), "r")
tmpLines = tmpFile.readlines()
tmpFile.close()
# First line should link to Bibliographic Praeambel
tmpLines[0] = "\\include{/Library/MPIWG/TeX/pre_bib}\n"
# Remove unwanted lines
for i in range (18,0,-1):
del tmpLines[i]
# Save changes
tmpFile = open (((options.filename) + "bib.tex"), "w")
tmpFile.writelines(tmpLines)
tmpFile.close()
# TeX has been sanitized, now tralics to make it intermediate XML
Kommando = "%s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output -entnames=false %sbib.tex" % (TRALICS_PATH_EXEC, TRALICS_PATH_LIB, TRALICS_PATH_LIB, options.filename)
Argumente = shlex.split(Kommando)
Prozess = subprocess.call(Argumente)
# Sanitaze XML to make it useable
tmpFile = open((options.filename) + "bib.xml", "r")
tmpContent = tmpFile.read()
tmpFile.close()
listReplace = [ r"<math mode='display' xmlns='http://www.w3.org/1998/Math/MathML'>",
r"<formula textype='displaymath' type='display'>",
r"<mi>",
r"</mi>",
r"<mn>",
r"<mn>",
r"<mo>",
r"</mo>",
r"<mn>",
r"</mn>",
r"<mrow/>",
r"<msup>",
r"</msup>",
r"</math>",
r"</formula>",
r"<formula type='inline'>",
r"<math xmlns='http://www.w3.org/1998/Math/MathML'>",
r"<formula textype='math' type='inline'>",
r"<mrow>uniquename=(.*?),hash=(.*?)</mrow>",
r"<mrow>hash=(.*?)</mrow>",
]
for strReplace in listReplace:
tmpContent = re.sub(strReplace, "", tmpContent)
# Put Back Underscore _
tmpContent = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpContent)
# Remove empty Lines
tmpContent = re.sub(r"\n\n", "\n", tmpContent)
# Put back Ampersand
tmpContent = re.sub(r"&", "&amp;", tmpContent)
tmpFile = open((options.filename) + "bib.xml", "w")
tmpFile.write(tmpContent)
tmpFile.close()
# TeXML has been sanitized, now load xml-Tree
xmlParser2 = etree.XMLParser(no_network=False,load_dtd=False)
xmlBibTree = etree.parse((options.filename + "bib.xml"), xmlParser2)
xmlEntries = xmlBibTree.findall(".//entry")
# If Bibliography-Type is monograph search for EOAbibliography and make it all
if xmlTree.find(".//EOAbibliographytype").text == "monograph":
if xmlTree.find(".//EOAprintbibliography") is not None:
xmlBibliography = xmlTree.find(".//EOAprintbibliography")
xmlBibliography.clear()
xmlBibliography.tag = "div"
xmlBibliography.getparent().tag = "div"
#xmlBibliography.addnext(xmlBibliographyDiv)
xmlEntries = xmlBibTree.findall(".//entry")
intNumberOfEntry = 0
for xmlEntry in xmlEntries:
if intNumberOfEntry == 0:
# Don't check for previous author if first entry of the Bibliography
bibEntry = Bibitem(xmlEntry)
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliography.append(xmlNew)
else:
bibEntry = Bibitem(xmlEntry)
# Check if author of previous Entry is the same
bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1])
if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst():
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliography.append(xmlNew)
else:
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliography.append(xmlNew)
intNumberOfEntry += 1
# If Bibliography-Type is anthology search for EOAbibliography and make one per chapter
if xmlTree.find(".//EOAbibliographytype").text == "anthology":
intChapterNumber = 1
for xmlChapter in xmlChapters:
if xmlChapter.find(".//EOAprintbibliography") is not None:
xmlBibliography = xmlChapter.find(".//EOAprintbibliography")
xmlBibliography.getparent().tag = "div"
xmlBibliographyDiv = etree.Element("div")
xmlBibliography.addnext(xmlBibliographyDiv)
xmlRefsections = xmlBibTree.findall(".//refsection")
for xmlRefsection in xmlRefsections:
if xmlRefsection.find(".//number").text == str(intChapterNumber):
break
xmlEntries = xmlRefsection.findall(".//entry")
intNumberOfEntry = 0
for xmlEntry in xmlEntries:
if intNumberOfEntry == 0:
# Don't check for previous author if first entry of the Bibliography
bibEntry = Bibitem(xmlEntry)
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliographyDiv.append(xmlNew)
else:
bibEntry = Bibitem(xmlEntry)
# Check if author of previous Entry is the same
bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1])
if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst():
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliographyDiv.append(xmlNew)
else:
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
print (strNewentry)
xmlNew = etree.fromstring(strNewentry)
xmlBibliographyDiv.append(xmlNew)
intNumberOfEntry += 1
intChapterNumber += 1
# for the time being
strCitation = ""
# Bibliographies are done, now for the citations
if xmlTree.find(".//EOAbibliographytype").text == "anthology" or xmlTree.find(".//EOAbibliographytype").text == "monograph":
intChapterNumber = 1
for xmlChapter in xmlChapters:
print ("-----------------------------------------------------")
print ("Processing References for Chapter " + str(intChapterNumber))
xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual")
for xmlCitation in xmlCitations:
print (xmlCitation.find("./citekey").text)
# If Bibliography-Type is anthology find Refsection for this Chapter
if xmlTree.find(".//EOAbibliographytype").text == "anthology":
xmlRefsections = xmlBibTree.findall(".//refsection")
for xmlRefsection in xmlRefsections:
if xmlRefsection.find(".//number").text == str(intChapterNumber):
break
xmlEntries = xmlRefsection.findall(".//entry")
# If Bibliography-Type is monograph find all entries, forget about refsection
if xmlTree.find(".//EOAbibliographytype").text == "monograph":
xmlEntries = xmlBibTree.findall(".//entry")
for xmlEntry in xmlEntries:
bibEntry = Bibitem(xmlEntry)
if bibEntry.citekey() == xmlCitation.find("./citekey").text:
if xmlCitation.tag == "EOAciteauthoryear":
strCitation = bibEntry.shortauthor() + " " + bibEntry.labelyear()
if bibEntry.labelyearsuffix() is not None:
strCitation = strCitation + bibEntry.labelyearsuffix()
strTitle = bibEntry.title()
if xmlCitation.tag == "EOAciteyear":
strCitation = bibEntry.labelyear()
if bibEntry.labelyearsuffix() is not None:
strCitation = strCitation + bibEntry.labelyearsuffix()
strTitle = bibEntry.title()
if xmlCitation.tag == "EOAcitemanual":
strCitation = xmlCitation.find("citetext").text
strTitle = bibEntry.title()
if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None:
strCitation = strCitation + ", " + xmlCitation.find("./page").text
# Hier den XML-Tag durch die Quellenangabe ersetzen
tmpTail = xmlCitation.tail
xmlCitation.clear()
xmlCitation.tag = "span"
xmlCitation.set("rel","popover")
xmlCitation.set("class","citation")
xmlCitation.text = strCitation
xmlCitation.tail = tmpTail
# Create Link to be used for website in a popover
xmlCitation.set("data-toggle", "popover")
xmlCitation.set("html", "true")
xmlCitation.set("data-placement", "bottom")
xmlCitation.set("data-title", strCitation)
try:
xmlCitation.set("data-content", strTitle)
except:
xmlCitation.set("data-content", "missing")
intChapterNumber += 1
# If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all
if xmlTree.find(".//EOAbibliographytype").text == "monograph-numeric":
if xmlTree.find(".//EOAprintbibliography") is not None:
dictCitekeysNumbers = {}
dictCitekeysTitles = {}
xmlBibliography = xmlTree.find(".//EOAprintbibliography")
xmlBibliography.clear()
xmlBibliography.tag = "div"
xmlBibliography.getparent().tag = "div"
xmlEntries = xmlBibTree.findall(".//entry")
intNumberOfEntry = 1
for xmlEntry in xmlEntries:
# Go through all entries and assign a number to the citekey
bibEntry = Bibitem(xmlEntry)
strCitekey = bibEntry.citekey()
dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry)
dictCitekeysTitles[strCitekey] = str(bibEntry.title())
strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliography.append(xmlNew)
intNumberOfEntry += 1
# Now for the references via EOAcitenumeric
xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric")
for xmlCitenumeric in xmlCitenumerics:
print (etree.tostring(xmlCitenumeric))
strPopover = ""
tmpCitekeys = xmlCitenumeric.find(".//citekey").text
tmpCitekeys = re.sub(" ", "", tmpCitekeys)
tmpCitekeys = re.sub("\n", "", tmpCitekeys)
listCitekeys = re.split("\,", tmpCitekeys)
listCitenumbers = []
for strCitekey in listCitekeys:
listCitenumbers.append(dictCitekeysNumbers[strCitekey])
# Create Text to be used on the website in a popover
strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " "
listCitenumbers = sorted(listCitenumbers, key=int)
strResult = "[" + listCitenumbers[0]
intNumberOfSequentialCite = 0
for i in range(1,len(listCitenumbers)):
intPreviousCitenumber = int(listCitenumbers[i-1])
intCurrentCitenumber = int(listCitenumbers[i])
if i == (len(listCitenumbers)-1):
if (intPreviousCitenumber + 1) == intCurrentCitenumber:
if intNumberOfSequentialCite == 0:
strResult = strResult + "," + str(listCitenumbers[i])
else:
strResult = strResult + "-" + str(listCitenumbers[i])
intNumberOfSequentialCite == 0
else:
strResult = strResult + "," + str(listCitenumbers[i])
break
intNextCitenumber = int(listCitenumbers[i+1])
if (intCurrentCitenumber + 1) != intNextCitenumber:
if intNumberOfSequentialCite != 0:
strResult = strResult + "-" + str(intCurrentCitenumber)
intNumberOfSequentialCite = 0
else:
strResult = strResult + "," + str(intCurrentCitenumber)
continue
if (intPreviousCitenumber + 1) == intCurrentCitenumber:
intNumberOfSequentialCite += 1
continue
else:
strResult = strResult + "," + str(intCurrentCitenumber)
intNumberOfSequentialCite = 0
strResult = strResult + "]"
xmlCitenumeric.text = strResult
# Create Link to be used for website
xmlCitenumeric.set("data-toggle", "popover")
xmlCitenumeric.set("html", "true")
xmlCitenumeric.set("data-content", strPopover)
xmlCitenumeric.set("class","citation")
xmlCitenumeric.set("data-placement", "bottom")
xmlCitenumeric.set("data-title", strResult)
# Numeric citations for the individual chapters
if xmlTree.find(".//EOAbibliographytype").text == "anthology-numeric":
intChapterNumber = 1
for xmlChapter in xmlChapters:
print ("Processing Bibliography")
if xmlChapter.find(".//EOAprintbibliography") is not None:
dictCitekeysNumbers = {}
dictCitekeysTitles = {}
xmlBibliography = xmlChapter.find(".//EOAprintbibliography")
#xmlBibliography.clear()
xmlBibliography.tag = "div"
xmlBibliography.getparent().tag = "div"
xmlRefsections = xmlBibTree.findall(".//refsection")
for xmlRefsection in xmlRefsections:
if xmlRefsection.find(".//number").text == str(intChapterNumber):
break
xmlEntries = xmlRefsection.findall(".//entry")
intNumberOfEntry = 1
for xmlEntry in xmlEntries:
# Go through all entries and assign a number to the citekey
bibEntry = Bibitem(xmlEntry)
strCitekey = bibEntry.citekey()
dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry)
dictCitekeysTitles[strCitekey] = str(bibEntry.title())
strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliography.append(xmlNew)
intNumberOfEntry += 1
# Now for the references via EOAcitenumeric
xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear")
print ("Numerische Citation gefunden in Kapitel " + str(intChapterNumber))
for xmlCitenumeric in xmlCitenumerics:
strPopover = ""
tmpCitekeys = xmlCitenumeric.find(".//citekey").text
tmpCitekeys = re.sub(" ", "", tmpCitekeys)
tmpCitekeys = re.sub("\n", "", tmpCitekeys)
print (tmpCitekeys)
listCitekeys = re.split("\,", tmpCitekeys)
listCitenumbers = []
for strCitekey in listCitekeys:
print (strCitekey)
listCitenumbers.append(dictCitekeysNumbers[strCitekey])
# Create Text to be used on the website in a popover
strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " "
listCitenumbers = sorted(listCitenumbers, key=int)
strResult = "[" + listCitenumbers[0]
intNumberOfSequentialCite = 0
for i in range(1,len(listCitenumbers)):
intPreviousCitenumber = int(listCitenumbers[i-1])
intCurrentCitenumber = int(listCitenumbers[i])
if i == (len(listCitenumbers)-1):
if (intPreviousCitenumber + 1) == intCurrentCitenumber:
if intNumberOfSequentialCite == 0:
strResult = strResult + "," + str(listCitenumbers[i])
else:
strResult = strResult + "-" + str(listCitenumbers[i])
intNumberOfSequentialCite == 0
else:
strResult = strResult + "," + str(listCitenumbers[i])
break
intNextCitenumber = int(listCitenumbers[i+1])
if (intCurrentCitenumber + 1) != intNextCitenumber:
if intNumberOfSequentialCite != 0:
strResult = strResult + "-" + str(intCurrentCitenumber)
intNumberOfSequentialCite = 0
else:
strResult = strResult + "," + str(intCurrentCitenumber)
continue
if (intPreviousCitenumber + 1) == intCurrentCitenumber:
intNumberOfSequentialCite += 1
continue
else:
strResult = strResult + "," + str(intCurrentCitenumber)
intNumberOfSequentialCite = 0
strResult = strResult + "]"
xmlCitenumeric.text = strResult
# Create Link to be used for website in a popover
xmlCitenumeric.set("data-toggle", "popover")
xmlCitenumeric.set("data-placement", "bottom")
xmlCitenumeric.set("data-title", " " + strResult)
xmlCitenumeric.set("data-content", strPopover)
xmlCitenumeric.set("class","citation")
intChapterNumber += 1
##############################################################
# Create .epub basic structure #
##############################################################
# Create folder structure for ebook
if os.path.exists(os.getcwd() + "/CONVERT/epub") == False:
os.mkdir(os.getcwd() + "/CONVERT/epub")
os.mkdir(os.getcwd() + "/CONVERT/epub/META-INF")
os.mkdir(os.getcwd() + "/CONVERT/epub/OEBPS")
os.mkdir(os.getcwd() + "/CONVERT/epub/OEBPS/images")
# Copy containter.xml and mimetype
shutil.copy(SUPPORT_TEMPLATE_PATH + "Templates/epubcontainer.xml", os.getcwd() + "/CONVERT/epub/META-INF/container.xml")
shutil.copy(SUPPORT_TEMPLATE_PATH + "Templates/epubmimetype", os.getcwd() + "/CONVERT/epub/mimetype")
# Shortcut for namespace
htmlns = "{http://www.w3.org/1999/xhtml}"
# Load Template for Chapter HTML
xmlChapterParser = etree.XMLParser(no_network=False,load_dtd=False) #resolve_entities=False
# Preparing toc.ncx
xmlTocncxParser = etree.XMLParser(no_network=False,load_dtd=False)
tocncx = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubtocncx.xml", xmlTocncxParser)
# Preparing content.opf
xmlContentopfParser = etree.XMLParser(no_network=False,load_dtd=False)
contentopf = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubcontentopf.xml", xmlContentopfParser)
print ("-----------------------------------------------------")
print ("Preparing content.opf")
xmlMetadata = contentopf.find(".//{http://www.idpf.org/2007/opf}metadata")
# Prepare Metadata based on Publication.cfg
cfgPublication = configparser.RawConfigParser()
cfgPublication.read(os.getcwd() + "/CONVERT/publication.cfg")
# Prepare Author String
strAuthorString = cfgPublication.get("Authors", "Author1")
if cfgPublication.get("Authors", "Author2") != "":
strAuthorString = cfgPublication.get("Authors", "Author1") + " and " + cfgPublication.get("Authors", "Author2")
if cfgPublication.get("Authors", "Author3") != "":
strAuthorString = cfgPublication.get("Authors", "Author1") + ", " + cfgPublication.get("Authors", "Author2") + " and " + cfgPublication.get("Authors", "Author3")
if cfgPublication.get("Authors", "Author4") != "":
strAuthorString = cfgPublication.get("Authors", "Author1") + ", " + cfgPublication.get("Authors", "Author2") + ", " + cfgPublication.get("Authors", "Author3") + " and " + cfgPublication.get("Authors", "Author4")
xmlAuthor = etree.Element("{http://purl.org/dc/elements/1.1/}creator")
xmlAuthor.text = strAuthorString
xmlMetadata.append(xmlAuthor)
# Prepare Title-String
strTitleString = cfgPublication.get("Technical", "Title")
xmlTitle = etree.Element("{http://purl.org/dc/elements/1.1/}title")
xmlTitle.text = strTitleString
xmlMetadata.append(xmlTitle)
# Prepare Description via Subtitle
strSubtitleString = cfgPublication.get("Technical", "Subtitle")
if strSubtitleString != "":
xmlSubtitle = etree.Element("{http://purl.org/dc/elements/1.1/}description")
xmlSubtitle.text = strSubtitleString
xmlMetadata.append(xmlSubtitle)
# Prepare Identifier
strIdentifier = "MPIWG:" + cfgPublication.get("Technical", "Serie") + cfgPublication.get("Technical", "Number")
xmlIdentifier = etree.Element("{http://purl.org/dc/elements/1.1/}identifier")
xmlIdentifier.text = strIdentifier
xmlIdentifier.set("id", "BookId")
xmlMetadata.append(xmlIdentifier)
# Prepare Type
xmlType = etree.Element("{http://purl.org/dc/elements/1.1/}type")
xmlType.text = "Text"
xmlMetadata.append(xmlType)
#Prepare Date
strPublicationDate = cfgPublication.get("Technical", "PublicationDate")
xmlDate = etree.Element("{http://purl.org/dc/elements/1.1/}date")
xmlDate.text = strPublicationDate
xmlDate.set("{http://www.idpf.org/2007/opf}event", "creation")
xmlMetadata.append(xmlDate)
# Prepare Publisher
xmlPublisher = etree.Element("{http://purl.org/dc/elements/1.1/}publisher")
xmlPublisher.text = "Edition Open Access"
xmlMetadata.append(xmlPublisher)
# Prepare Rights
xmlPublisher = etree.Element("{http://purl.org/dc/elements/1.1/}rights")
xmlPublisher.text = "Published under Creative Commons by-nc-sa 3.0 Germany Licence"
xmlMetadata.append(xmlPublisher)
# Prepare Source
xmlSource = etree.Element("{http://purl.org/dc/elements/1.1/}source")
xmlSource.text = "Max Planck Research Library for the History and Development of Knowledge"
xmlMetadata.append(xmlSource)
# Prepare Subject
strSubject = cfgPublication.get("General", "Keyword1")
xmlSubject = etree.Element("{http://purl.org/dc/elements/1.1/}subject")
xmlSubject.text = strSubject
xmlMetadata.append(xmlSubject)
# Prepare Language
strLanguage = cfgPublication.get("Technical", "Language")
xmlLanguage = etree.Element("{http://purl.org/dc/elements/1.1/}language")
xmlLanguage.text = strLanguage
xmlMetadata.append(xmlLanguage)
#Prepare Cover
xmlCover = etree.Element("meta")
xmlCover.set("content", "cover_pic")
xmlCover.set("name", "cover")
xmlMetadata.append(xmlCover)
xmlManifest = contentopf.find(".//{http://www.idpf.org/2007/opf}manifest")
xmlItem = etree.Element("item")
xmlItem.set("id", "cover_pic")
xmlItem.set("href", "images/cover.jpg")
xmlItem.set("media-type", "image/jpeg")
xmlManifest.append(xmlItem)
shutil.copy(os.getcwd() + "/CONVERT/cover.jpg", os.getcwd() + "/CONVERT/epub/OEBPS/images/")
xmlItem = etree.Element("item")
xmlItem.set("id", "cover")
xmlItem.set("href", "cover.xhtml")
xmlItem.set("media-type", "application/xhtml+xml")
xmlManifest.append(xmlItem)
shutil.copy(SUPPORT_TEMPLATE_PATH + "Templates/epubcover.xhtml", os.getcwd() + "/CONVERT/epub/OEBPS/cover.xhtml")
print ("-------------------")
print ("Preparing intro.xhtml")
print ("-------------------")
tmpFilePath = SUPPORT_TEMPLATE_PATH + "Templates/epubintro.xhtml"
tmpFile = open(tmpFilePath, "r")
strIntroHTML = tmpFile.read()
tmpFile.close()
strIntroHTML = re.sub("author", strAuthorString, strIntroHTML)
strIntroHTML = re.sub("TITLE", strTitleString, strIntroHTML)
strIntroHTML = re.sub("year", cfgPublication.get("Technical", "PublicationYear"), strIntroHTML)
strIntroHTML = re.sub("series", cfgPublication.get("Technical", "Serie"), strIntroHTML)
strIntroHTML = re.sub("number", cfgPublication.get("Technical", "Number"), strIntroHTML)
if cfgPublication.get("General", "AdditionalInformation") != "":
strIntroHTML = re.sub("AdditionalInformation", "<p>" + cfgPublication.get("General", "AdditionalInformation") + "</p>", strIntroHTML)
else:
strIntroHTML = re.sub("AdditionalInformation", "", strIntroHTML)
tmpFilePath = os.getcwd() + "/CONVERT/epub/OEBPS/intro.xhtml"
tmpFile = open(tmpFilePath, "w")
tmpFile.write(strIntroHTML)
print ("-------------------")
print ("Preparing toc.ncx")
print ("-------------------")
xmlHead = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}head")
xmlMeta = etree.Element("meta")
xmlMeta.set("name", "dtb:uid")
xmlMeta.set("content", "MPIWG:" + cfgPublication.get("Technical", "Serie") + cfgPublication.get("Technical", "Number"))
xmlHead.append(xmlMeta)
xmlTitle = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}docTitle")
xmlText = etree.Element("text")
xmlText.text = strTitleString
xmlTitle.append(xmlText)
xmlAuthor = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}docAuthor")
xmlText = etree.Element("text")
xmlText.text = strAuthorString
xmlAuthor.append(xmlText)
# This list includes all files which have already been included to avoid duplicates
listContentopf = []
##############################################################
# Convert Tralics-XML to Epub #
##############################################################
# Copy xmlTree to xmlEbookTree
xmlEbookTree = deepcopy(xmlTree)
# xmlChapters is a list containing all chapters
xmlChapters = xmlEbookTree.findall("//div1")
# Convert Chapters, Sections, Subsections and Subsubsections to h1, h2, h3, h4
# Insert Number from Dictionary where needed
print ("-----------------------------------------------------")
print ("Convert EOAChapter to H1")
for xmlChapter in xmlChapters:
xmlChapter.find("head").tag = "h1"
if xmlChapter.get("rend") != "nonumber":
idChapter = xmlChapter.get("id")
print (idChapter + " konvertierung into h1")
print (dictChapters[idChapter])
strHeadline = xmlChapter.find("h1").text or ""
xmlChapter.find("h1").text = str(dictChapters[idChapter]) + ". " + strHeadline
if xmlChapter.find(".//EOAauthor") is not None:
tmpXML = etree.Element("p")
tmpXML.append(etree.Element("i"))
tmpXML[0].text = xmlChapter.find(".//EOAauthor").text
xmlChapter.insert(1, tmpXML)
# Remove unwanted EOAauthor here
xmlChapter.find(".//EOAauthor").text = ""
xmlChapter = etree.strip_tags(xmlChapter, "EOAauthor")
print (dictSections)
print ("-----------------------------------------------------")
print ("Convert EOAsection to H2")
xmlSections = xmlEbookTree.findall(".//div2")
for xmlSection in xmlSections:
xmlSection.find("head").tag = "h2"
if xmlSection.get("rend") != "nonumber":
idSection = xmlSection.get("id")
strHeadline = xmlSection.find("h2").text or ""
print (strHeadline)
xmlSection.find("h2").text = str(dictSections[idSection]) + " " + strHeadline
print ("-----------------------------------------------------")
print ("Convert EOAsubsection to H3")
xmlSubsections = xmlEbookTree.findall(".//div3")
for xmlSubsection in xmlSubsections:
xmlSubsection.find("head").tag = "h3"
if xmlSubsection.get("rend") != "nonumber":
idSection = xmlSubsection.get("id")
strHeadline = xmlSubsection.find("h3").text or ""
print (strHeadline)
xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline
print ("-----------------------------------------------------")
print ("Convert EOAsubsubsection to H4")
xmlSubsubsections = xmlEbookTree.findall(".//div4")
for xmlSubsubsection in xmlSubsubsections:
xmlSubsubsection.find("head").tag = "h4"
#if xmlSubsubsection.get("rend") != "nonumber":
#idSection = xmlSubsection.get("id")
#strHeadline = xmlSubsection.find("h4").text
#xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline
print ("-----------------------------------------------------")
print ("Preparing Figures")
xmlFigures = xmlEbookTree.xpath(".//EOAfigure | .//EOAlsfigure")
for xmlFigure in xmlFigures:
# Copy File of the Image
# If it's in a subfolder, name of folder and name of image will be merged
strImageFileString = xmlFigure.find(".//file").text
strImageFileString = strImageFileString.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFileString)
# Remove / from path
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFileString)
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName)
strImageFilepath = sanitizeImageEpub(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName)
# Add copied file to contentopf
contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", strImageFileDir + strImageFileNamewoSuffix + "jpg", "jpg")
xmlFigureCaption = xmlFigure.find(".//caption")
idFigure = xmlFigure.find(".//anchor").get("id")
intFigureNumber = dictFigures[idFigure]
if xmlFigure.tag == "EOAfigure":
strImageWidth = xmlFigure.find(".//width").text
strImageWidth = strImageWidth.rstrip("\n")
if xmlFigure.tag == "EOAlsfigure":
strImageWidth = "100"
xmlFigure.clear()
xmlFigure.tag = "p"
xmlFigureImage = etree.Element("img")
xmlFigureImage.set("src", "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg")
xmlFigureImage.set("alt", "")
xmlFigureImage.set("style", "width: " + strImageWidth + "%")
xmlFigure.append(xmlFigureImage)
xmlFigureCaption.tag = "p"
strFigureCaption = xmlFigureCaption.text or ""
xmlFigureCaption.text = "Figure " + str(intFigureNumber) + ": " + strFigureCaption
xmlFigure.addnext(xmlFigureCaption)
# Change the tag of the parent <p>-Tag to <div> so that it may be removed
#xmlFigure.getparent().tag = "div"
print ("-----------------------------------------------------")
print ("Preparing not numbered Figures")
xmlFigures = xmlEbookTree.findall(".//EOAfigurenonumber")
for xmlFigure in xmlFigures:
# Copy File of the Image
# If it's in a subfolder, name of folder and name of image will be merged
strImageFileString = xmlFigure.find(".//file").text
strImageFileString = strImageFileString.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFileString)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFileString)
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName)
strImageFilepath = sanitizeImageEpub(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName)
# Add copied file to contentopf
contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", strImageFileDir + strImageFileNamewoSuffix + "jpg", "jpg")
strImageWidth = xmlFigure.find(".//width").text
strImageWidth = strImageWidth.rstrip("\n")
xmlFigure.clear()
xmlFigure.tag = "p"
xmlFigureImage = etree.Element("img")
xmlFigureImage.set("src", "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg")
xmlFigureImage.set("alt", "")
xmlFigureImage.set("style", "width: " + strImageWidth + "%")
xmlFigure.append(xmlFigureImage)
print ("-----------------------------------------------------")
print ("Preparing Footnotes")
def alph_footnote_index(fndex):
"""
lowercase Latin footnotes need to support more than 26 values
These are zero-indexed.
>>> alph_footnote_index(0)
'a'
>>> alph_footnote_index(1)
'b'
>>> alph_footnote_index(24)
'y'
>>> alph_footnote_index(25)
'z'
>>> alph_footnote_index(26)
'aa'
>>> alph_footnote_index(27)
'ab'
"""
alphabet = "abcdefghijklmnopqrstuvwxyz"
quotient, remainder = divmod(fndex, len(alphabet))
if not quotient: return alphabet[fndex]
return alph_footnote_index(quotient - 1) + alph_footnote_index(remainder)
def replace_footnote_equations(footnote):
"""
captures reusable behavior from the existing code
potentially, some of the old code could be replaced by calls to this helper
usage: contentopf = replace_footnote_equations(my_footnote)
unfortunately, returning the result seemed like a better idea than mutating the global variable
"""
result = contentopf
for equation in footnote.findall(".//EOAequationnonumber"):
filename = equation.get("filename")
equation.clear()
equation.tag = "p"
img = etree.Element("img", src="images/%s" % filename, alt="")
equation.append(img)
cwd = os.getcwd()
shutil.copy("%s/items/%s" % (cwd, filename), "%s/CONVERT/epub/DEBPS/images/%s" % (cwd, filename))
result = addToContentopf(result, "images/" + filename, filename, "png")
return result
def replace_footnote_with_sup(note):
"""
captures reusable behavior from the existing code
potentially, some of the old code could be replaced by calls to this helper
this behavior showed up in a few places
I thought I would be able to extract a little more, but this was all that was actually common
"""
tail = note.tail
note.clear()
note.tail = tail
note.tag = "sup"
def bring_footnote_down_epub(footnote, footnote_name, destination):
"""
captures reusable behavior from the existing code
potentially, some of the old code could be replaced by calls to this helper
"""
contentopf = replace_footnote_equations(footnote) # see usage note
kids = list(footnote.getchildren())
prefix = "[%s]" % footnote_name
# we would like to prepend this footnote identifier to the footnote element
if footnote.text is not None:
# if the element starts with some text anyway, prepend it there
footnote.text = "%s %s" (prefix, footnote.text)
else:
# if, however, the element begins with a child, prepend the text at the beginning of the first child instead
if len(kids):
first_child = kids[0]
child_text = prefix
# separate them with a space, unless the child had no text to begin with
child_suffix = first_child.text
if child_suffix is None:
child_suffix = ""
else:
child_prefix += " "
first_child.text = child_prefix + child_suffix
else:
# a totally empty footnote is weird, but who am I to judge?
footnote.text = prefix
footnote_text = footnote.text or ""
replace_footnote_with_sup(footnote)
footnote.text = "[%s] " % footnote_name
# append any text the footnote used to have to the destination
destkids = list(destination.getchildren())
if len(destkids):
# if the destination has children, append after the last one's tail
last_kid = destkids[-1]
prefix = last_kid.tail
if prefix is None:
prefix = ""
else:
prefix += " "
last_kid.tail = prefix + footnote_text
else:
# if the destination has no children, append to its text
prefix = destination.text
if prefix is None:
prefix = ""
else:
prefix += " "
destination.text = prefix + footnote_text
for kid in kids:
destination.append(kid)
return contentopf
class FootnoteError(Exception):
"""
we only support one type of footnote per chapter
don't try to mix-and-match
"""
pass
for xmlChapter in xmlChapters:
groupings = get_bigfoot_data(xmlChapter)
xmlFootnotes = list(xmlChapter.findall(".//note"))
has_old = 0 != len(xmlFootnotes)
has_new = 0 != len(
[ # flatten the association list whose values are lists, so we can take the length
note
for grouping, notes in groupings
for note in notes
]
)
# the XOR case falls through, the AND is an error, and the NOR skips to the next chapter
if has_old:
if has_new:
raise FootnoteError("Chapter %s contains both \\EOAfn and footnotes in the style of \\EOAfnalph" % xmlChapter.get("id-text"))
else:
if not has_new:
continue
xmlNewFootnotes = etree.Element("div")
xmlNewFootnotesHeader = etree.Element("h3")
xmlNewFootnotesHeader.text = dictLangFootnotes[xmlChapter.get("language")]
xmlNewFootnotes.append(xmlNewFootnotesHeader)
for grouping, notes in groupings:
# do for the new-style footnotes what was being done for the old
for index, note in enumerate(notes):
footnote_name = str(index + 1)
if "lower-latin" == grouping:
footnote_name = alph_footnote_index(index)
para = etree.Element("p")
para.text = "[%s] %s" % (footnote_name, note.text)
contentopf = bring_footnote_down_epub(note, footnote_name, para)
xmlNewFootnotes.append(para)
intFootnoteNumber = 1
for xmlFootnote in xmlFootnotes:
# Not numbered Equations may appear in a footnote, need to be treated differently
xmlEquationsnonumber = xmlFootnote.findall(".//EOAequationnonumber")
for xmlEquationnonumber in xmlEquationsnonumber:
strFilename = xmlEquationnonumber.get("filename")
xmlEquationnonumber.clear()
xmlEquationnonumber.tag = "p"
xmlIMG = etree.Element("img", src="images/"+ strFilename, alt="")
xmlEquationnonumber.append(xmlIMG)
shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename)
contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png")
xmlFirstChild = xmlFootnote.getchildren()[0]
if xmlFirstChild.text == None:
xmlFirstChild.text = "[" + str(intFootnoteNumber) + "] "
else:
xmlFirstChild.text = "[" + str(intFootnoteNumber) + "] " + xmlFirstChild.text
#Preserve tail and children of current <note>-Tag
xmlFootnoteContentsTail = xmlFootnote.tail
xmlFootnoteChildren = xmlFootnote.getchildren()
# Substitute current <note> with Number
xmlFootnote.clear()
xmlFootnote.text = "[" + str(intFootnoteNumber) + "]"
xmlFootnote.tail = xmlFootnoteContentsTail
xmlFootnote.tag = "sup"
# NOTE: Anchor not being used for the time being
#xmlNewFootnoteAnchor = etree.Element("a")
#xmlNewFootnoteAnchor.set("name", "fn" + str(intFootnoteNumber))
#xmlNewFootnote.append(xmlNewFootnoteAnchor)
if len(xmlFootnoteChildren) != 0:
for xmlFootnoteChild in xmlFootnoteChildren:
xmlNewFootnotes.append(xmlFootnoteChild)
intFootnoteNumber += 1
xmlChapter.append(xmlNewFootnotes)
print ("-----------------------------------------------------")
print ("Preparing Lists")
for xmlChapter in xmlChapters:
xmlLists = xmlChapter.findall(".//list")
for xmlList in xmlLists:
if xmlList.get("type") == "description":
continue
if xmlList.get("type") == "ordered":
xmlList.tag = "ol"
if xmlList.get("type") == "simple":
xmlList.tag = "ul"
xmlListItems = xmlList.findall(".//item")
for xmlListItem in xmlListItems:
xmlListItem.tag = "li"
print ("-----------------------------------------------------")
print ("Preparing Descriptions")
for xmlChapter in xmlChapters:
xmlDescriptions = xmlChapter.findall(".//list")
for xmlDescription in xmlDescriptions:
xmlDescription.tag = "dl"
del xmlDescription.attrib["type"]
for xmlChild in xmlDescription.iterchildren():
if xmlChild.tag == "label":
xmlChild.tag = "dt"
if xmlChild.tag == "item":
xmlChild.tag = "dd"
del xmlChild.attrib["id"]
del xmlChild.attrib["id-text"]
print ("-----------------------------------------------------")
print ("Preparing Blockquotes")
xmlParagraphs = xmlEbookTree.findall(".//p")
for xmlParagraph in xmlParagraphs:
if xmlParagraph.get("rend") == "quoted":
strParagraphText = xmlParagraph.text
strParagraphTail = xmlParagraph.tail
xmlParagraphChildren = xmlParagraph.getchildren()
xmlParagraph.clear()
xmlParagraph.tag = "blockquote"
xmlNew = etree.Element("p")
if strParagraphText is not None:
xmlNew.text = strParagraphText
if len(xmlParagraphChildren) != 0:
for xmlParagraphChild in xmlParagraphChildren:
xmlNew.append(xmlParagraphChild)
if strParagraphTail is not None:
xmlNew.tail = strParagraphTail
xmlParagraph.append(xmlNew)
print ("-----------------------------------------------------")
print ("Preparing Theorems")
for xmlChapter in xmlChapters:
xmlTheorems = xmlChapter.findall(".//theorem")
for xmlTheorem in xmlTheorems:
xmlTheoremHead = xmlTheorem.find(".//head")
strTheoremTitel = xmlTheorem.find(".//head").text
strTheoremText = xmlTheorem.find(".//p").text
xmlTheoremTextTail = xmlTheorem.find(".//p").tail
strTheoremNumber = xmlTheorem.get("id-text")
xmlTheorem.tag = "p"
xmlTheoremHead.tag = "b"
xmlTheoremHead.text = xmlTheoremHead.text + " " + strTheoremNumber
del xmlTheorem.attrib["style"]
del xmlTheorem.attrib["type"]
del xmlTheorem.attrib["id-text"]
del xmlTheorem.attrib["id"]
etree.strip_tags(xmlTheorem, "p")
print ("-----------------------------------------------------")
print ("Preparing Hyperlinks")
for xmlChapter in xmlChapters:
xmlHyperlinks = xmlChapter.findall(".//xref")
for xmlHyperlink in xmlHyperlinks:
strURL = xmlHyperlink.get('url')
print (strURL)
if strURL.startswith("http://") == False:
strURL = "http://" + strURL
xmlHyperlink.tag = "a"
del xmlHyperlink.attrib["url"]
xmlHyperlink.set("href", strURL)
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak'])
xmlHyperlink.text = strURL
print ("-----------------------------------------------------")
print ("Convert emphasized text")
for xmlChapter in xmlChapters:
xmlItalics = xmlChapter.findall(".//hi")
for xmlItalic in xmlItalics:
if xmlItalic.get("rend") == "it":
xmlItalic.tag = "em"
del xmlItalic.attrib["rend"]
print ("-----------------------------------------------------")
print ("Convert bold text")
for xmlChapter in xmlChapters:
xmlBolds = xmlChapter.findall(".//hi")
for xmlBold in xmlBolds:
if xmlBold.get("rend") == "bold":
xmlBold.tag = "b"
del xmlBold.attrib["rend"]
print ("-----------------------------------------------------")
print ("Convert EOAup to <sup>")
for xmlChapter in xmlChapters:
xmlUps = xmlChapter.findall(".//EOAup")
for xmlUp in xmlUps:
xmlUp.tag = "sup"
print ("-----------------------------------------------------")
print ("Convert EOAdown to <sub>")
for xmlChapter in xmlChapters:
xmlDowns = xmlChapter.findall(".//EOAdown")
for xmlDown in xmlDowns:
xmlDown.tag = "sub"
print ("-----------------------------------------------------")
print ("Convert EOAst to <span>")
for xmlChapter in xmlChapters:
xmlStrikeouts = xmlChapter.findall(".//EOAst")
for xmlStrikeout in xmlStrikeouts:
xmlStrikeout.tag = "span"
xmlStrikeout.set("style", "text-decoration: line-through;")
print ("-----------------------------------------------------")
print ("Convert EOAls to something nice")
for xmlChapter in xmlChapters:
xmlLetterspaceds = xmlChapter.findall(".//EOAls")
for xmlLetterspaced in xmlLetterspaceds:
xmlLetterspaced.tag = "span"
xmlLetterspaced.set("style", "letter-spacing: 0.5em;")
print ("-----------------------------------------------------")
print ("Convert EOAcaps to something nice")
for xmlChapter in xmlChapters:
xmlLetterspaceds = xmlChapter.findall(".//EOAcaps")
for xmlLetterspaced in xmlLetterspaceds:
xmlLetterspaced.tag = "span"
xmlLetterspaced.set("style", "font-variant:small-caps;")
print ("-----------------------------------------------------")
print ("Convert EOAineq into appropriate IMG-Tags")
for xmlChapter in xmlChapters:
xmlInlineEquations = xmlChapter.findall(".//EOAineq")
for xmlInlineEquation in xmlInlineEquations:
xmlInlineEquation.tag = "img"
xmlInlineEquation.set("alt", "")
del xmlInlineEquation.attrib["TeX"]
shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/epub/OEBPS/images/" + xmlInlineEquation.get("src"))
xmlInlineEquation.set("src", "images/" + xmlInlineEquation.get("src"))
contentopf = addToContentopf(contentopf, xmlInlineEquation.get("src"), xmlInlineEquation.get("src"), "png")
print ("-----------------------------------------------------")
print ("Convert EOAinline into appropriate IMG-Tags")
for xmlChapter in xmlChapters:
xmlInlineElements = xmlChapter.findall(".//EOAinline")
for xmlInlineElement in xmlInlineElements:
xmlInlineElement.tag = "img"
xmlInlineElement.set("alt", "Too late")
strInlineElementFilePath = xmlInlineElement.text
# remove text from element. This is visible in epub (at least in calibre's e-book-viewer)
# however, the text is taken as id in content.opf
# set it to nil after the addToContentopf
strInlineElementFileName = os.path.basename(strInlineElementFilePath)
strInlineElementDirName = os.path.dirname(strInlineElementFilePath)
strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName
# trouble when there are subdirectories in Image path!
# some thing goes wrong here: <EOAinline>Images/png_300dpi/A.png</EOAinline>
shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, strNewImagePath)
# strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName
strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath
listArguments = shlex.split(strCommand)
subprocess.check_output(listArguments, shell=False)
xmlInlineElement.set("src", "images/" + strInlineElementDirName + strInlineElementFileName)
# contentopf, Filename, FileID, Mediatype
# <item id="Troublemaker" media-type="image/png" href="images/inlineA.jpg"/>
# Mediatype should not be hard coded!!!
# base this on file extension
extension = strInlineElementFileName.split(".")[-1]
contentopf = addToContentopf(contentopf, "images/" + strInlineElementDirName + strInlineElementFileName, xmlInlineElement.text, extension)
xmlInlineElement.text = ""
print ("-----------------------------------------------------")
print ("Preparing Equations")
for xmlChapter in xmlChapters:
xmlEquations = xmlChapter.findall(".//EOAequation")
for xmlEquation in xmlEquations:
strNumber = xmlEquation.get("number")
strFilename = xmlEquation.get("filename")
# Copy image of Equation
shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename)
contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png")
# Find out Number of Equation to be appended in the last step
strEquationNumber = xmlEquation.get("number")
# Rework XML
xmlEquation.clear()
xmlEquation.tag = "p"
xmlEquationImage = etree.Element("img")
xmlEquationImage.set("src", "images/" + strFilename)
xmlEquationImage.set("alt", "")
xmlEquation.append(xmlEquationImage)
xmlNew = etree.Element('p')
xmlNew.text = "(" + strEquationNumber + ")"
xmlEquation.addnext(xmlNew)
# Parent tag of Equation should be <div> instead of <p>, so that it may be removed
#xmlEquation.getparent().tag = "div"
for xmlChapter in xmlChapters:
xmlEquations = xmlChapter.findall(".//EOAequationnonumber")
for xmlEquation in xmlEquations:
strFilename = xmlEquation.get("filename")
# Copy image of Equation
shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename)
contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png")
# Rework XML
xmlEquation.clear()
xmlEquation.tag = "p"
xmlEquationImage = etree.Element("img")
xmlEquationImage.set("src", "images/" + strFilename)
xmlEquationImage.set("alt", "")
xmlEquation.append(xmlEquationImage)
# Parent tag of Equation should be <div> instead of <p>, so that it may be removed
#xmlEquation.getparent().tag = "div"
# EOAequationarray not handled so far. However: my solution (renaming
# the div) just makes the element disappear, leaving only its children!
for xmlChapter in xmlChapters:
xmlEquationarrays = xmlChapter.findall(".//EOAequationarray")
for xmlEquationarray in xmlEquationarrays:
xmlEquationarray.tag = "div"
print ("-----------------------------------------------------")
print ("Preparing Letterheads")
for xmlChapter in xmlChapters:
xmlLetterheads = xmlChapter.xpath(".//EOAletterhead")
print (len(xmlLetterheads))
for xmlLetterhead in xmlLetterheads:
xmlRecipient = xmlLetterhead.find(".//Recipient")
print (etree.tostring(xmlRecipient))
xmlRecipient.tag = "p"
xmlRecipient.getchildren()[0].tag = "em"
xmlArchive = xmlLetterhead.find(".//Archive")
xmlArchive.tag = "p"
xmlArchive.getchildren()[0].tag = "em"
xmlAdditional = xmlLetterhead.find(".//Additional")
xmlAdditional.tag = "p"
xmlAdditional.getchildren()[0].tag = "em"
xmlPages = xmlLetterhead.find(".//Pages")
xmlPages.tag = "p"
xmlPages.getchildren()[0].tag = "em"
xmlHR = etree.Element("hr")
xmlHR2 = etree.Element("hr")
xmlLetterhead.insert(0, xmlHR)
xmlLetterhead.insert(5, xmlHR2)
print ("-----------------------------------------------------")
print ("Preparing Transcriptions")
# TODO: May need rework concerning the right Column
for xmlChapter in xmlChapters:
etree.strip_elements(xmlChapter, "Facsimilelink")
xmlTranscriptions = xmlChapter.xpath(".//EOAtranscripted")
for xmlTranscription in xmlTranscriptions:
print ("Processing Transcription")
print (etree.tostring(xmlTranscription))
xmlTranscription.tag = "table"
xmlHeader = xmlTranscription.find(".//EOAtranscriptedheader")
xmlHeader.tag = "tr"
xmlLeftHeader = xmlTranscription.find(".//Leftheader")
print (xmlLeftHeader.text)
xmlLeftHeader.tag = "td"
xmlLeftHeader.set("style", "width: 50%")
xmlRightHeader = xmlTranscription.find(".//Rightheader")
xmlRightHeader.tag = "td"
xmlTranscriptedtext = xmlTranscription.find(".//EOAtranscriptedtext")
# change \n\n into </p><p> and pagebreak into </p><pagebreak><p> to create some valid markup
strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode")
#strTranscriptedtext = re.sub (r"\n\n\n\n", "</p><p>", str(strTranscriptedtext), re.MULTILINE)
#strTranscriptedtext = re.sub (r"\n\n\n", "</p><p>", str(strTranscriptedtext), re.MULTILINE)
#strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext))
#strTranscriptedtext = re.sub (r"<pagebreak/>", "</p><pagebreak/><p>", strTranscriptedtext)
xmlLeftColumn = etree.Element("td")
xmlRightColumn = etree.Element("td")
boolRightColumn = False
xmlTemp = etree.XML(str(strTranscriptedtext))
for xmlElement in xmlTemp.iterchildren():
if xmlElement.tag == "pagebreak":
boolRightColumn = True
print ("Spaltenwechsel!")
continue
if boolRightColumn == False:
xmlLeftColumn.append(xmlElement)
if boolRightColumn == True:
xmlRightColumn.append(xmlElement)
xmlTranscriptedtext.clear()
xmlTranscriptedtext.tag = "tr"
xmlTranscriptedtext.set("valign", "top")
xmlTranscriptedtext.append(xmlLeftColumn)
xmlTranscriptedtext.append(xmlRightColumn)
# Remove <Facsimilelink>
print ("-----------------------------------------------------")
print ("Preparing Tables")
intChapterNumber = 1
for xmlChapter in xmlChapters:
xmlTables = xmlChapter.findall(".//EOAtable")
for xmlTable in xmlTables:
xmlRawTable = xmlTable.find(".//table")
strTableCaption = xmlTable.find(".//EOAtablecaption").text or ""
print (strTableCaption)
if strTableCaption != "nonumber":
intTableNumber = dictTables[xmlTable.find(".//EOAtablelabel").text]
xmlTableCaption = etree.Element("p")
print (strTableCaption)
print (intTableNumber)
xmlTableCaption.text = str(intTableNumber) + " " + strTableCaption
if xmlTable.find(".//EOAtablecaption").getchildren() is not None:
for xmlChild in xmlTable.find(".//EOAtablecaption").iterchildren():
xmlTableCaption.append(xmlChild)
xmlRawTable.addnext(xmlTableCaption)
xmlTable.find(".//EOAtablecaption").clear()
xmlTable.remove(xmlTable.find(".//EOAtablecaption"))
xmlTable.find(".//EOAtablelabel").clear()
xmlTable.remove(xmlTable.find(".//EOAtablelabel"))
# Analyze Width and Alignment of the Columns
strColumnString = xmlTable.find(".//EOAtablecolumns").text
strColumnString = re.sub(r"\|", "", strColumnString)
xmlTable.remove(xmlTable.find(".//EOAtablecolumns"))
reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString)
intTableWidth = 0
listColumnAlignments = [None]
listColumnWidths = [None]
intNumberOfColumns = 0
for strColumnDefinition in reMatchObjects:
strColumnDefinition = strColumnDefinition.rstrip("cm")
strColumnDefinition = strColumnDefinition.rstrip("mm")
strColumnAlignment = strColumnDefinition[0]
if strColumnAlignment == "L":
strColumnAlignment = "left"
if strColumnAlignment == "C":
strColumnAlignment = "center"
if strColumnAlignment == "R":
strColumnAlignment = "right"
listColumnAlignments.append(strColumnAlignment)
intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75)
listColumnWidths.append(intColumnWidth)
intTableWidth += intColumnWidth
intNumberOfColumns += 1
xmlRawTable.set("width", str(intTableWidth))
del xmlRawTable.attrib["rend"]
del xmlRawTable.attrib["id-text"]
del xmlRawTable.attrib["id"]
del xmlRawTable.attrib["place"]
# Figure out and deal with the Header
xmlHeader = xmlRawTable.find(".//row/cell/tableheader")
if xmlHeader is not None:
xmlHeader.text = ""
xmlHeader.getparent().text = xmlHeader.tail
xmlHeader.getparent().remove(xmlHeader)
xmlFirstRow = xmlRawTable.find(".//row")
xmlFirstRow.tag = "tr"
xmlFirstRowCells = xmlFirstRow.findall(".//cell")
for xmlFirstRowCell in xmlFirstRowCells:
xmlFirstRowCell.tag = "th"
# Now Deal with the rest of the rows
xmlTableRows = xmlRawTable.findall(".//row")
for xmlTableRow in xmlTableRows:
xmlTableCells = xmlTableRow.findall(".//cell")
intCurrentColumn = 1
print (listColumnAlignments)
for xmlTableCell in xmlTableCells:
xmlTableCell.tag = "td"
xmlTableCell.set("align",listColumnAlignments[intCurrentColumn])
xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";")
# Deal with multicolumn
if xmlTableCell.get("cols") is not None:
xmlTableCell.set("colspan", xmlTableCell.get("cols"))
if intCurrentColumn > len(xmlTableCells):
intCurrentColumn = 1
# Deal with multicolumn again, increase intCurrentColumn by the columns being spanned
elif xmlTableCell.get("cols") is not None:
intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols"))
del xmlTableCell.attrib["cols"]
else:
intCurrentColumn += 1
xmlTableRow.tag = "tr"
xmlTableRow.set("valign", "top")
xmlTableParent = xmlTable.getparent()
xmlTableParent.addnext(xmlTable)
xmlTableParent.getparent().remove(xmlTableParent)
intChapterNumber += 1
print ("-----------------------------------------------------")
print ("Preparing Facsimiles")
xmlParts = xmlEbookTree.findall(".//div0")
for xmlPart in xmlParts:
xmlFacsimiles = xmlPart.findall(".//EOAfacsimilepage")
for xmlFacsimile in xmlFacsimiles:
strImageFile = xmlFacsimile.find(".//file").text
strFacsimileLabel = xmlFacsimile.find(".//label").text
etree.strip_elements(xmlFacsimile, "file")
etree.strip_elements(xmlFacsimile, "label")
# TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren
strImageFile = strImageFile.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFile)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFile)
shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName)
# Add copied file to contentopf
contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileName, strImageFileDir + strImageFileName, "jpg")
strSVGTemplate = """<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100%" height="100%" viewBox="0 0 573 800" preserveAspectRatio="xMidYMid meet"></svg>"""
xmlSVGFacsimile = etree.fromstring(strSVGTemplate)
xmlNew = etree.Element('image')
xmlNew.set("width", "600")
xmlNew.set("height", "800")
xmlNew.set("{http://www.w3.org/1999/xlink}href", "images/" + strImageFileDir + strImageFileName)
xmlSVGFacsimile.append(xmlNew)
xmlFacsimile.getparent().replace(xmlFacsimile, xmlSVGFacsimile)
print ("-----------------------------------------------------")
print ("Preparing Cross-References")
for xmlChapter in xmlChapters:
xmlReferences = xmlChapter.findall(".//EOAref")
for xmlReference in xmlReferences:
print ("XXXXXXXX")
strResult = "!!! Cross Reference !!!"
xmlReferenceLabel = xmlReference.find("Label")
xmlReferenceLabelText = xmlReferenceLabel.text
xmlReferenceRef = xmlReference.find("ref")
xmlReferenceRefTarget = xmlReferenceRef.get("target")
if xmlReferenceLabelText in dictEquations:
print ("Verweis auf Array gefunden:" + xmlReferenceLabelText)
strResult = dictEquations[xmlReferenceLabelText]
if xmlReferenceRefTarget in dictEquations:
print ("Verweis auf Equation gefunden:" + xmlReferenceRefTarget)
strResult = dictEquations[xmlReferenceRefTarget]
if xmlReferenceRefTarget in dictLists:
print ("Verweis auf Liste gefunden")
strResult = dictLists[xmlReferenceRefTarget]
if xmlReferenceRefTarget in dictChapters:
print ("Verweis auf Kapitel gefunden")
strResult = dictChapters[xmlReferenceRefTarget]
if xmlReferenceRefTarget in dictSections:
print ("Verweis auf Section gefunden")
strResult = dictSections[xmlReferenceRefTarget]
if xmlReferenceRefTarget in dictFigures:
print ("Verweis auf Abbildung gefunden")
strResult = dictFigures[xmlReferenceRefTarget]
if xmlReferenceRefTarget in dictFootnotes:
print ("Verweis auf Fussnote gefunden")
strResult = dictFootnotes[xmlReferenceRefTarget]
if xmlReferenceRefTarget in dictTheorems:
print ("Verweis auf Theorem gefunden")
strResult = dictTheorems[xmlReferenceRefTarget]
if xmlReferenceLabelText in dictTables:
print ("Verweis auf Tabelle gefunden")
strResult = dictTables[xmlReferenceLabelText]
tmpTail = xmlReference.tail or ""
#tmpTail = tmpTail.strip()
print ("XXXXXXXX")
xmlReference.clear()
xmlReference.text = strResult
xmlReference.tail = tmpTail
# Substitute Page-References with their targets
for xmlChapter in xmlChapters:
xmlReferences = xmlChapter.findall(".//EOApageref")
for xmlReference in xmlReferences:
strResult = "!!! Page Reference !!!"
xmlReferenceLabel = xmlReference.find("Label")
xmlReferenceLabelText = xmlReferenceLabel.text
xmlReferenceRef = xmlReference.find("ref")
xmlReferenceRefTarget = xmlReferenceRef.get("target")
if xmlReferenceLabelText in dictPagelabels:
print ("Verweis auf Seite gefunden" + xmlReferenceLabelText)
strResult = dictPagelabels[xmlReferenceLabelText]
tmpTail = xmlReference.tail or ""
xmlReference.clear()
xmlReference.text = strResult
xmlReference.tail = tmpTail
# Correcting References to Publications
# NOTE: This may be reworked in the future to enable popups in the ebook
# NOTE: For the time being, span ist going to be removed
for xmlChapter in xmlChapters:
xmlPublicationreferences = xmlChapter.findall(".//span")
for xmlPublicationreference in xmlPublicationreferences:
if xmlPublicationreference.get("rel") == "popover":
xmlPublicationreference.tag = "EOAcitation"
##############################################################
# Finish ePub Conversion, save File #
##############################################################
print ("-----------------------------------------------------")
print ("Cleaning up XML")
xmlIndexentries = xmlEbookTree.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
for xmlIndexentry in xmlIndexentries:
tmpTail = xmlIndexentry.tail or ""
xmlIndexentry.clear()
xmlIndexentry.tail = tmpTail
etree.strip_tags(xmlEbookTree, "EOAlabel", "EOAindex", "EOApageref", "EOAcitenumeric", "EOAtable", "EOAref", "note", "div", "div2", "div3", "div4", "citetext", "newpage", "EOAciteyear", "EOAtablelabel" , "hi", "pagebreak", "page", "pagestyle", "EOAcitation", "EOAciteauthoryear", "EOAcitemanual", "EOAprintbibliography", "EOAindexperson", "EOAprintindex", "EOAprintpersonindex", "EOAindexlocation", "EOAprintlocationindex","anchor", "temp", "EOAletterhead")
etree.strip_attributes(xmlEbookTree, "id-text", "id", "noindent", "type", "label", "spacebefore", "rend")
etree.strip_elements(xmlEbookTree, "citekey", with_tail=False)
# Write every Part and Chapter into one file
xmlChapters = xmlEbookTree.findall("//div1")
listParts = []
intTechnicalChapterNumber = 1
for xmlChapter in xmlChapters:
# Load xmlHTMLTemplate
htmlChapter = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubchapter.xml", xmlChapterParser)
# Find out, if it's inside a part. If Part has not been worked on, then do it
xmlChapterParent = xmlChapter.getparent()
if xmlChapterParent.tag == "div0" and xmlChapterParent.get("id") not in listParts:
listParts.append(xmlChapterParent.get("id"))
strPartTitle = xmlChapterParent.find(".//head").text
htmlChapter.find(".//" + htmlns + "title").text = strPartTitle
xmlNew = etree.Element('h1')
xmlNew.text = strPartTitle
htmlChapter.find(".//" + htmlns + "body").append(xmlNew)
# Save Part
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"
tmpFile = open (tmpFileName, "w")
tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
# Add to TocNCX
tocncx = addToTocncx(tocncx, htmlChapter.find(".//" + htmlns + "title").text, intTechnicalChapterNumber)
contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml")
intTechnicalChapterNumber += 1
# Reset htmlChapter
htmlChapter = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubchapter.xml", xmlChapterParser)
# Aus div1 alle kinder auslesen und an htmlChapter dran hängen
xmlChildren = xmlChapter.getchildren()
for xmlChild in xmlChildren:
# Using Deepcopy, coz a simple append will delete the original
htmlChapter.find(".//" + htmlns + "body").append(deepcopy(xmlChild))
# Save Chapter
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"
tmpFile = open (tmpFileName, "w")
tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
# Add to TocNCX
tocncx = addToTocncx(tocncx, xmlChapter.find(".//h1").text, intTechnicalChapterNumber)
contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml")
# Content_OPF hinzufügen
intTechnicalChapterNumber += 1
# Convert Facsimile-Parts
xmlParts = xmlEbookTree.findall("//div0")
for xmlPart in xmlParts:
print ("-------------")
print ("Working on Facsimile-Part")
print ("-------------")
# check if it has a child element EOAfacsimilepart
if bool(xmlPart.findall(".//EOAfacsimilepart")):
htmlChapter = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubchapter.xml", xmlChapterParser)
# Change EOAfacsimilepart into H1
xmlHeadline = xmlPart.find(".//EOAfacsimilepart")
xmlHeadline.tag = "h1"
etree.strip_elements(xmlPart, "head")
# Aus div0 alle kinder auslesen und an htmlChapter dran hängen
xmlChildren = xmlPart.getchildren()
for xmlChild in xmlChildren:
# Using Deepcopy, coz a simple append will delete the original
htmlChapter.find(".//" + htmlns + "body").append(deepcopy(xmlChild))
# Save Chapter
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"
tmpFile = open (tmpFileName, "w")
tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
# Save Chapter
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml"
tmpFile = open (tmpFileName, "w")
tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
# Add to TocNCX
tocncx = addToTocncx(tocncx, xmlChapter.find("..//h1").text, intTechnicalChapterNumber)
contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml")
# Content_OPF hinzufügen
intTechnicalChapterNumber += 1
# Saving toc.ncx
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/toc.ncx"
tmpFile = open (tmpFileName, "w")
tmpResult = etree.tostring(tocncx, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
# Saving content.opf
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/content.opf"
tmpFile = open (tmpFileName, "w")
tmpResult = etree.tostring(contentopf, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
############################################################################
# Convert tralics-XML to Django Data Structure #
############################################################################
# Create django File Structure
if os.path.exists(os.getcwd() + "/CONVERT/django") == False:
os.mkdir(os.getcwd() + "/CONVERT/django")
os.mkdir(os.getcwd() + "/CONVERT/django/images")
os.mkdir(os.getcwd() + "/CONVERT/django/images/embedded")
os.mkdir(os.getcwd() + "/CONVERT/django/files")
# Create empty xmlTree
xmlEOAdocument = etree.Element("EOAdocument")
xmlDjangoTree = etree.ElementTree(xmlEOAdocument)
etree.strip_attributes(xmlTree, "noindent")
# Remove temp-Tag
etree.strip_tags(xmlTree, "temp")
# Write Temporary XML-Maintree
ergebnisdatei = open("Devel_django.xml", "w")
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode")
ergebnisdatei.write(ergebnis)
ergebnisdatei.close()
# Find all Chapters from the original tralics XML
xmlChapters = xmlTree.findall("//div1")
def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid=None):
# Get Dictionaries of Numbers via Global Variables
global dictChapters
global dictFigures
global dictEquations
global dictSections
global dictFootnotes
global dictPagelabels
global dictTables
global dictLists
global intObjectNumber
# Check what kind of Element we have and change the data
if xmlElement.tag == "EOAtranscripted":
xmlResult = etree.Element("temp")
xmlEOATranscription = etree.Element("EOAtranscription")
xmlEOATranscription.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlLeftheader = xmlElement.find(".//Leftheader")
etree.strip_tags(xmlLeftheader, "p")
xmlEOATranscription.append(xmlLeftheader)
xmlRightheader = xmlElement.find(".//Rightheader")
etree.strip_tags(xmlRightheader, "p")
xmlEOATranscription.append(xmlRightheader)
xmlTranscriptedtext = xmlElement.find(".//EOAtranscriptedtext")
# change \n\n into </p><p> and pagebreak intto </p><pagebreak><p> to create some valid markup
strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode")
#strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext))
#strTranscriptedtext = re.sub (r"<p><pagebreak/></p>", "<pagebreak/>", strTranscriptedtext)
xmlLeftColumn = etree.Element("EOAtranscriptionleft")
xmlRightColumn = etree.Element("EOAtranscriptionright")
boolRightColumn = False
xmlTemp = etree.XML(str(strTranscriptedtext))
for xmlElement in xmlTemp.iterchildren():
if xmlElement.tag == "pagebreak":
boolRightColumn = True
continue
if boolRightColumn == False:
xmlLeftColumn.append(xmlElement)
if boolRightColumn == True:
xmlRightColumn.append(xmlElement)
xmlEOATranscription.append(xmlLeftColumn)
xmlEOATranscription.append(xmlRightColumn)
# Convert Images within the transcription
xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber")
if xmlFigures is not None:
for xmlFigure in xmlFigures:
strImageFileString = xmlFigure.find(".//file").text
strImageFileString = strImageFileString.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFileString)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFileString)
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName
listArguments = shlex.split(strCommand)
subprocess.check_output(listArguments, shell=False)
tmpStrTail = xmlFigure.tail
xmlFigure.clear()
xmlFigure.tag = "img"
xmlFigure.set("src", strImageFileDir + strImageFileName)
xmlFigure.set("alt", "")
xmlResult.append(xmlEOATranscription)
elif xmlElement.tag == "EOAletterhead":
xmlResult = etree.Element("temp")
xmlEOAletterhead = etree.Element("EOAletterhead")
xmlEOAletterrecipient = xmlElement.find(".//Recipient")
xmlEOAletterhead.append(xmlEOAletterrecipient)
xmlEOAletterarchive = xmlElement.find(".//Archive")
xmlEOAletterhead.append(xmlEOAletterarchive)
xmlEOAletteradditional = xmlElement.find(".//Additional")
xmlEOAletterhead.append(xmlEOAletteradditional)
xmlEOAletterpages = xmlElement.find(".//Pages")
xmlEOAletterhead.append(xmlEOAletterpages)
xmlEOAletterhead.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlEOAletterhead)
elif xmlElement.findall(".//EOAfigurenonumber"):
xmlResult = etree.Element("temp")
# Create basic Element EOAfigurenonumber
xmlEOAfigure = etree.Element("EOAfigurenonumber")
# Copy Image
strImageFileString = xmlElement.find(".//file").text
strImageFileString = strImageFileString.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFileString)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFileString)
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
xmlEOAfigure.set("width", xmlElement.find(".//width").text)
xmlEOAfigure.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlEOAfigure)
elif xmlElement.tag == "EOAfigure":
xmlResult = etree.Element("temp")
# Create basic Element EOAfigure
xmlEOAfigure = etree.Element("EOAfigure")
# Copy Image
strImageFileString = xmlElement.find(".//file").text
strImageFileString = strImageFileString.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFileString)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFileString)
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
xmlEOAfigure.set("width", xmlElement.find(".//width").text)
xmlEOAfigure.set("order", str(intObjectNumber))
intObjectNumber += 1
# Insert visual Number and uid
strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
xmlEOAfigure.set("number", strFigureNumber)
strFigureUID = xmlElement.find(".//anchor").get("id")
xmlEOAfigure.set("id", strFigureUID)
# Insert Caption
xmlEOAfigure.append(xmlElement.find(".//caption"))
xmlResult.append(xmlEOAfigure)
elif xmlElement.findall(".//EOAtable"):
xmlResult = etree.Element("EOAtable")
xmlRawTable = xmlElement.find(".//table")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlRawTable)
# Copy Number, Label and Caption
if xmlElement.find(".//EOAtablecaption").text != "nonumber":
xmlResult.append(xmlElement.find(".//EOAtablecaption"))
xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text)
xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text])
xmlResult.set("id", xmlRawTable.get("id"))
else:
xmlElement.set("numbering", "false")
#if xmlElement.find(".//EOAtablelabel").text is not None:
# Transform width of Columns
strColumnString = xmlElement.find(".//EOAtablecolumns").text
strColumnString = re.sub(r"\|", "", strColumnString)
reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString)
intTableWidth = 0
listColumnAlignments = [None]
listColumnWidths = [None]
intNumberOfColumns = 0
for strColumnDefinition in reMatchObjects:
strColumnDefinition = strColumnDefinition.rstrip("cm")
strColumnAlignment = strColumnDefinition[0]
if strColumnAlignment == "L":
strColumnAlignment = "left"
if strColumnAlignment == "C":
strColumnAlignment = "center"
if strColumnAlignment == "R":
strColumnAlignment = "right"
listColumnAlignments.append(strColumnAlignment)
intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75)
listColumnWidths.append(intColumnWidth)
intTableWidth += intColumnWidth
intNumberOfColumns += 1
xmlRawTable.set("width", str(intTableWidth))
# Figure out and deal with the Header
xmlHeader = xmlRawTable.find(".//row/cell/tableheader")
if xmlHeader is not None:
xmlHeader.text = ""
xmlHeader.getparent().text = xmlHeader.tail
xmlHeader.getparent().remove(xmlHeader)
xmlFirstRow = xmlRawTable.find(".//row")
xmlFirstRow.tag = "tr"
xmlFirstRowCells = xmlFirstRow.findall(".//cell")
for xmlFirstRowCell in xmlFirstRowCells:
xmlFirstRowCell.tag = "th"
# Now Deal with the rest of the rows
xmlTableRows = xmlRawTable.findall(".//row")
for xmlTableRow in xmlTableRows:
xmlTableCells = xmlTableRow.findall(".//cell")
intCurrentColumn = 1
for xmlTableCell in xmlTableCells:
xmlTableCell.tag = "td"
xmlTableCell.set("align",listColumnAlignments[intCurrentColumn])
xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";")
# Deal with multicolumn
if xmlTableCell.get("cols") is not None:
xmlTableCell.set("colspan", xmlTableCell.get("cols"))
if intCurrentColumn > len(xmlTableCells):
intCurrentColumn = 1
# Deal with multicolumn again, increase intCurrentColumn by the columns being spanned
elif xmlTableCell.get("cols") is not None:
intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols"))
del xmlTableCell.attrib["cols"]
else:
intCurrentColumn += 1
xmlTableRow.tag = "tr"
xmlTableRow.set("valign", "top")
elif xmlElement.tag == "list" and xmlElement.get('type') != 'description':
xmlResult = etree.Element("temp")
if xmlElement.get('type') == 'ordered':
# Change first item into EOAlistfirstitem
xmlFirstItem = xmlElement.find("..//item")
xmlFirstItemElement = xmlFirstItem.getchildren()[0]
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber="1", uid=xmlFirstItem.get("id")))
# Process Child Elements which are Part of this item
if len(xmlFirstItem.getchildren()) >= 1:
for xmlChild in xmlFirstItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlFirstItem.getparent().remove(xmlFirstItem)
# Process remaining items in this list
tmpIntNumber = 2
for xmlItem in xmlElement.iterchildren():
xmlItemElement = xmlItem.getchildren()[0]
xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=str(tmpIntNumber), uid=xmlItem.get("id")))
tmpIntNumber += 1
if len(xmlItem.getchildren()) >= 1:
for xmlChild in xmlItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild, indent=True))
xmlItem.getparent().remove(xmlItem)
if xmlElement.get('type') == 'simple':
# Change first item into EOAlistfirstitem
xmlFirstItem = xmlElement.find("..//item")
xmlFirstItemElement = xmlFirstItem.getchildren()[0]
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-"))
# Process Child Elements which are Part of this item
if len(xmlFirstItem.getchildren()) >= 1:
for xmlChild in xmlFirstItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlFirstItem.getparent().remove(xmlFirstItem)
for xmlItem in xmlElement.iterchildren():
xmlItemElement = xmlItem.getchildren()[0]
xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
if len(xmlItem.getchildren()) >= 1:
for xmlChild in xmlItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlItem.getparent().remove(xmlItem)
elif xmlElement.tag == "list" and xmlElement.get('type') == 'description':
xmlResult = etree.Element("temp")
while len(xmlElement.getchildren()) != 0:
xmlDescription = etree.Element("EOAdescription")
xmlDescription.set("order", str(intObjectNumber))
xmlLabel = xmlElement.getchildren()[0]
print (etree.tostring(xmlLabel))
xmlItem = xmlElement.getchildren()[1]
if len(xmlItem.getchildren()) > 0:
xmlContent = xmlItem.getchildren()[0]
else:
xmlContent = etree.Element("p")
xmlLabel.tag = "description"
xmlDescription.append(xmlLabel)
xmlDescription.append(xmlContent)
xmlResult.append(xmlDescription)
intObjectNumber += 1
if len(xmlItem.getchildren()) > 0:
for xmlChild in xmlItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlItem.getparent().remove(xmlItem)
elif xmlElement.tag == "theorem":
xmlTheoremHead = xmlElement.find(".//head")
xmlTheoremText = xmlElement.find(".//p")
strTheoremNumber = xmlElement.get("id-text")
strTheoremID = xmlElement.get("id")
xmlResult = etree.Element("EOAtheorem")
xmlResult.append(xmlTheoremHead)
xmlResult.append(xmlTheoremText)
xmlResult.set("order", str(intObjectNumber))
xmlResult.set("number", strTheoremNumber)
xmlResult.set("uid", strTheoremID)
intObjectNumber += 1
elif xmlElement.findall(".//EOAequationarray"):
xmlResult = etree.Element("temp")
for xmlEquation in xmlElement.findall(".//EOAequation"):
xmlEOAequation = etree.Element("EOAequation")
xmlEOAequation.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlEOAequation.set("number", xmlEquation.get("number"))
xmlEOAequation.set("filename", xmlEquation.get("filename"))
if xmlEquation.get("label") is not None:
xmlEOAequation.set("label", xmlEquation.get("label"))
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
if xmlEquation.get("label") is not None:
xmlEOAequation.set("label", xmlEquation.get("label"))
xmlResult.append(xmlEOAequation)
elif xmlElement.findall(".//EOAequationarraynonumber"):
xmlResult = etree.Element("temp")
for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"):
xmlEOAequation = etree.Element("EOAequation")
xmlEOAequation.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlEOAequation.set("number", "")
xmlEOAequation.set("filename", xmlEquation.get("filename"))
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
xmlResult.append(xmlEOAequation)
elif xmlElement.tag == "EOAequationnonumber":
# Process one EOAequation which is not encapsulated
xmlResult = etree.Element("EOAequation")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.set("filename", xmlElement.get("filename"))
xmlResult.set("TeX", xmlElement.get("TeX"))
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlResult.set("number", "")
elif xmlElement.findall(".//EOAequation"):
# Process various Equations which may be encapsulated within <p>
xmlEquations = xmlElement.findall(".//EOAequation")
xmlResult = etree.Element("temp")
for xmlEquation in xmlEquations:
# Create basic Element EOAequation
xmlEOAequation = etree.Element("EOAequation")
xmlEOAequation.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlEOAequation.set("number", xmlEquation.get("number"))
xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
if xmlEquation.get("uid") is not None:
xmlEOAequation.set("uid", xmlEquation.get("uid"))
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlEOAequation.set("filename", xmlEquation.get("filename"))
xmlResult.append(xmlEOAequation)
elif xmlElement.tag == "EOAequation":
# Process one EOAequation which is not encapsulated
xmlResult = etree.Element("EOAequation")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.set("number", xmlElement.get("number"))
xmlResult.set("TeX", xmlElement.get("TeX"))
if xmlElement.get("uid") is not None:
xmlResult.set("uid", xmlElement.get("uid"))
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlResult.set("filename", xmlElement.get("filename"))
elif xmlElement.tag == "div4":
xmlResult = etree.Element("EOAsubsubsection")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlElement.find("head"))
for xmlChild in xmlElement.iterchildren():
xmlResult.append(djangoParseObject(xmlChild))
else:
xmlElement.tag = "EOAparagraph"
xmlElement.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult = xmlElement
if indent==True:
xmlResult.set("indent", "True")
if listtype != None:
xmlResult.set("listtype", listtype)
if listnumber != 0:
xmlResult.set("listnumber", listnumber)
if uid != None:
xmlResult.set("id", uid)
return xmlResult
def djangoParseHeadline(xmlElement):
# Parse EOAauthor and append it to the Chapter Information
xmlAuthors = xmlElement.find(".//EOAauthor")
if xmlAuthors is not None:
strAuthors = xmlAuthors.text
xmlElement.remove(xmlAuthors)
strAuthors = re.sub("(, and | and | und )", ",", strAuthors)
listAuthors = re.split("\,", strAuthors)
print (listAuthors)
if len(listAuthors) >= 1:
for i in range(len(listAuthors)):
xmlAuthor = etree.Element("EOAauthor")
# Remove Spaces before and after AuthorString
if listAuthors[i][0] == " ":
strAuthor = listAuthors[i][1:]
elif listAuthors[i].endswith(" "):
strAuthor = listAuthors[i][:-1]
else:
strAuthor = listAuthors[i]
xmlAuthor.text = strAuthor
xmlElement.append(xmlAuthor)
return xmlElement
# Iterate over Chapters, Sections, Subsections, and Subsubsections and
# Put all on one level: EOAchapter
intChapterNumber = 1
listPartIDs = []
for xmlChapter in xmlChapters:
intObjectNumber = 1
# Process Chapter Title
xmlEOAchapter = etree.Element("EOAchapter")
xmlEOAchapter.set("type","regular")
xmlEOAchapter.set("language", xmlChapter.get("language"))
xmlEOAchapter.set("order", str(intChapterNumber))
if xmlChapter.get("rend") != "nonumber":
xmlEOAchapter.set("id", xmlChapter.get("id"))
xmlChapterHeadline = xmlChapter.find(".//head")
if xmlChapter.get("id") in dictChapters:
xmlEOAchapter.set("number", dictChapters[xmlChapter.get("id")])
else:
xmlEOAchapter.set("number", "")
print ("-----------------------------------------------------")
print (gettext(xmlChapterHeadline))
xmlEOAchapter.append(djangoParseHeadline(xmlChapterHeadline))
# Deal with EOAauthor
if xmlChapter.find(".//EOAauthor") is not None:
xmlEOAchapter.append(xmlChapter.find(".//EOAauthor"))
# Attache enclosing Part to Chapter, see django structure for this purpose
if xmlChapter.getparent().tag == "div0":
if xmlChapter.getparent().get("id") not in listPartIDs:
listPartIDs.append(xmlChapter.getparent().get("id"))
xmlPartHeadline = xmlChapter.getparent().find("head")
xmlPartHeadline.tag = "EOAparthtml"
xmlEOAchapter.append(xmlPartHeadline)
# Append Chapter to xmlEOAdocument
xmlEOAdocument.append(xmlEOAchapter)
# iterate over children of Chapter
for xmlChapterChild in xmlChapter.iterchildren():
if xmlChapterChild.tag == "div2":
# Process Section Title
xmlEOAsection = etree.Element("EOAsection")
xmlEOAsection.set("order", str(intObjectNumber))
if xmlChapterChild.get("rend") != "nonumber":
xmlEOAsection.set("id", xmlChapterChild.get("id"))
xmlEOAsection.set("number", dictSections[xmlChapterChild.get("id")])
intObjectNumber += 1
xmlHead = xmlChapter.find(".//head")
xmlEOAsection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAsection)
# Iterate over Children of Section
for xmlSectionChild in xmlChapterChild.iterchildren():
if xmlSectionChild.tag == "div3":
# Process Subsection Title
xmlEOAsubsection = etree.Element("EOAsubsection")
xmlEOAsubsection.set("order", str(intObjectNumber))
if xmlSectionChild.get("rend") != "nonumber":
xmlEOAsubsection.set("id", xmlSectionChild.get("id"))
xmlEOAsubsection.set("number", dictSections[xmlSectionChild.get("id")])
intObjectNumber += 1
xmlHead = xmlSectionChild.find(".//head")
xmlEOAsubsection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAsubsection)
# Iterate over children of Subsection
for xmlSubsectionChild in xmlSectionChild.iterchildren():
if xmlSubsectionChild.tag == "div4":
# Process Subsubsection Title
xmlEOAsubsubsection = etree.Element("EOAsubsubsection")
xmlEOAsubsubsection.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlHead = xmlSubsectionChild.find(".//head")
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAsubsubsection)
# Iterate over children of Subsubsection
for xmlSubsubsectionChild in xmlSubsectionChild.iterchildren():
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild))
else:
xmlEOAchapter.append(djangoParseObject(xmlSubsectionChild))
elif xmlSectionChild.tag == "div4":
# Process Subsubsection Title
xmlEOAsubsubsection = etree.Element("EOAsubsubsection")
xmlEOAsubsubsection.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlHead = xmlSectionChild.find(".//head")
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAsubsubsection)
# Iterate over children of Subsubsection
for xmlSubsubsectionChild in xmlSectionChild.iterchildren():
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild))
else:
xmlEOAchapter.append(djangoParseObject(xmlSectionChild))
else:
xmlEOAchapter.append(djangoParseObject(xmlChapterChild))
intChapterNumber += 1
print ("----------------------------------------------")
print ("Processing Facsimile Parts")
listModes = ["text", "textPollux", "xml"]
strBasicURL = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql?document="
parserECHO = etree.XMLParser()
xmlParts = xmlTree.findall("//div0")
intFacNumber = 1
for xmlPart in xmlParts:
intObjectNumber = 1
intFacPartNumber = 1
if xmlPart.find(".//EOAfacsimilepart") is None:
continue
xmlEOAfacsimilepart = etree.Element("EOAfacsimilepart")
xmlEOAfacsimilepart.set("order", str(intChapterNumber))
xmlEOAfacsimileparthead = xmlPart.find(".//head")
for xmlChild in xmlEOAfacsimileparthead:
if xmlChild.tag == "hi":
xmlChild.tag = "em"
del xmlChild.attrib["rend"]
xmlEOAfacsimilepart.append(xmlEOAfacsimileparthead)
intChapterNumber += 1
xmlEOAdocument.append(xmlEOAfacsimilepart)
xmlFacsimilepages = xmlPart.findall(".//EOAfacsimilepage")
intFacPageNumber = 1
for xmlFacsimilepage in xmlFacsimilepages:
strImageFile = xmlFacsimilepage.find(".//file").text
strLabel = xmlFacsimilepage.find(".//label").text
strPagenumber = xmlFacsimilepage.find(".//pagenumber").text or ""
xmlEOAfacsimilepage = etree.Element("EOAfacsimilepage")
xmlEOAfacsimilepage.set("order", str(intObjectNumber))
# TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren
strImageFile = strImageFile.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFile)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFile)
shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
intObjectNumber += 1
# Download transcription for this Page
if xmlFacsimilepage.find(".//fulltext").text is not None:
print ("Ein Link zum Volltext wurde gefunden")
strFacsimileURL = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[0]
strFacsimilePage = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[1]
for strMode in listModes:
strURL = strBasicURL + strFacsimileURL + "&pn=" + strFacsimilePage + "&mode=" + strMode
print ("Processing Facsimile : " + strURL)
xmlECHOtree = etree.parse(strURL, parserECHO)
# Remove ECHO-namespaces
objectify.deannotate(xmlECHOtree, xsi_nil=True)
etree.cleanup_namespaces(xmlECHOtree)
xmlDivs = xmlECHOtree.findall(".//div")
for xmlDiv in xmlDivs:
if xmlDiv.get("class") == "pageContent":
# Create new EOA-Element
xmlEOAfacsimileelement = etree.Element("EOAfacsimileelement")
xmlEOAfacsimileelement.set("type", strMode)
# Fix Images in the <div>-Element
xmlImages = xmlDiv.findall(".//img")
intFacImgNumber = 1
for xmlImage in xmlImages:
strImageSrc = xmlImage.get("src")
strCommand = "curl " + strImageSrc + " -o CONVERT/django/images/facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg"
listArguments = shlex.split(strCommand)
try:
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
xmlImage.set("src", "facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg")
except:
xmlImage.tag = "temp"
intFacImgNumber += 1
# Change of scr of img-Element
xmlEOAfacsimileelement.append(xmlDiv)
xmlEOAfacsimilepage.append(xmlEOAfacsimileelement)
intFacPageNumber += 1
xmlEOAfacsimilepage.set("file", strImageFileDir + strImageFileName)
xmlEOAfacsimilepage.set("label", str(strLabel))
xmlEOAfacsimilepage.set("pagenumber", str(strPagenumber))
xmlEOAfacsimilepart.append(xmlEOAfacsimilepage)
intFacNumber =+ 1
etree.strip_tags(xmlDjangoTree, "temp")
print ("----------------------------------------------")
print ("Processing and linking Footnotes for django")
def bring_footnote_down_django(footnote, fragment, footnote_number, object_number, unique_id, destination):
"""
captures reusable behavior from the existing code
potentially, some of the old code could be replaced by calls to this helper
"""
kids = list(footnote.getchildren())
footnote_text = footnote.text or ""
replace_footnote_with_sup(footnote)
footnote.set("class", "footnote")
anchor = etree.Element("a")
anchor.set("href", "#" + fragment) # "fn" + str(intFootnoteNumber)
anchor.text = footnote_number # str(intFootnoteNumber)
footnote.append(anchor)
foot = etree.Element("EOAfootnote")
foot.set("order", str(object_number))
object_number += 1
foot.set("number", footnote_number)
anchor_number = next(
iter(
(
parent.get("order")
for parent
in footnote.iterancestors()
if parent.get("order") is not None
)
)
)
foot.set("anchor", anchor_number)
foot.set("id", unique_id)
foot.text = footnote_text
for kid in kids:
if "EOAequationnonumber" == kid.tag:
cwd = os.getcwd()
shutil.copy(
"%s/items/%s" % (cwd, kid.get("filename")),
"%s/CONVERT/django/images/" % cwd,
)
foot.append(kid)
destination.append(foot)
return object_number
xmlEOAchapters = xmlEOAdocument.findall(".//EOAchapter")
for xmlEOAchapter in xmlEOAchapters:
groupings = get_bigfoot_data(xmlEOAchapter)
has_old = 0 != len(xmlEOAchapter.findall(".//note"))
has_new = 0 != len(
[ # flatten
note
for grouping, notes in groupings
for note in notes
]
)
# XOR falls through, AND is an error (that should have already been thrown during the epub phase), and NOR skips to the next chapter
if has_old:
if has_new:
raise FootnoteError("This chapter contains both old-style footnotes and new-style footnotes")
else:
if not has_new:
continue
# Find out running order of last item the chapter
# Hier pro FN zunächst die EOAequationnonumber in <p> korrigieren
# Dann pro FN die Kindelemente abarbeiten und an die neue FN dran hängen
# Ggf. aufpassen, ob ein Absatz mit indent versehen ist, dann blockquote drum herum machen
xmlElement = xmlEOAchapter[(len(xmlEOAchapter)-1)]
print (etree.tostring(xmlElement))
intObjectNumber = (int(xmlElement.get("order")) + 1)
intFootnoteNumber = 1
xmlResult = etree.Element("temp")
xmlEOAsection = etree.Element("EOAsection")
xmlEOAsection.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlHead = etree.Element("head")
xmlHead.text = dictLangFootnotes[xmlEOAchapter.get("language")]
xmlEOAsection.append(xmlHead)
xmlResult.append(xmlEOAsection)
for grouping, notes in groupings:
for index, note in enumerate(notes):
# do for the new-style notes what the old code did for the other footnotes
fntext = str(index+1)
if "lower-latin" == grouping:
fntext = alph_footnote_index(index)
unique_id = "fn%s" % fntext
intObjectNumber = bring_footnote_down_django(note, unique_id, fntext, intObjectNumber, unique_id, xmlResult)
intFootnoteNumber = 1
xmlFootnotes = xmlEOAchapter.findall(".//note")
for xmlFootnote in xmlFootnotes:
xmlFootnoteContent = xmlFootnote.getchildren()
strFootnoteText = xmlFootnote.text or ""
tmpTail = xmlFootnote.tail
tmpStrUID = xmlFootnote.get("id")
xmlFootnote.clear()
xmlFootnote.tail = tmpTail
xmlFootnote.tag = "sup"
xmlFootnote.set("class", "footnote")
xmlFootnoteLink = etree.Element("a")
xmlFootnoteLink.set("href", "#fn" + str(intFootnoteNumber))
xmlFootnoteLink.text = str(intFootnoteNumber)
xmlFootnote.append(xmlFootnoteLink)
xmlEOAfootnote = etree.Element("EOAfootnote")
xmlEOAfootnote.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlEOAfootnote.set("number", str(intFootnoteNumber))
for xmlParent in xmlFootnote.iterancestors():
if xmlParent.get("order") is not None:
strFootnoteAnchorNumber = xmlParent.get("order")
break
xmlEOAfootnote.set("anchor", strFootnoteAnchorNumber)
xmlEOAfootnote.set("id", tmpStrUID)
xmlEOAfootnote.text = strFootnoteText
for xmlElement in xmlFootnoteContent:
if xmlElement.tag == "EOAequationnonumber":
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlEOAfootnote.append(xmlElement)
xmlResult.append(xmlEOAfootnote)
intFootnoteNumber += 1
xmlEOAchapter.append(xmlResult)
# Remove temp-Tag
etree.strip_tags(xmlDjangoTree, "temp")
print ("----------------------------------------------")
print ("Processing various Elements")
for xmlEOAchapter in xmlEOAchapters:
xmlEmphasized = xmlEOAchapter.findall(".//hi")
for xmlEmph in xmlEmphasized:
if xmlEmph.get("rend") == "it":
xmlEmph.tag = "em"
del xmlEmph.attrib["rend"]
xmlHyperlinks = xmlEOAchapter.findall(".//xref")
for xmlHyperlink in xmlHyperlinks:
strURL = xmlHyperlink.get('url')
if strURL.startswith("http://") == False:
strURL = "http://" + strURL
xmlHyperlink.tag = "a"
del xmlHyperlink.attrib["url"]
xmlHyperlink.set("href", strURL)
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak'])
xmlHyperlink.text = strURL
# Convert bold text
xmlBolds = xmlEOAchapter.findall(".//hi")
for xmlBold in xmlBolds:
if xmlBold.get("rend") == "bold":
xmlBold.tag = "b"
del xmlBold.attrib["rend"]
# Convert EOAup to <sup>
xmlUps = xmlEOAchapter.findall(".//EOAup")
for xmlUp in xmlUps:
xmlUp.tag = "sup"
# Convert EOAdown to <sub>
xmlDowns = xmlEOAchapter.findall(".//EOAdown")
for xmlDown in xmlDowns:
xmlDown.tag = "sub"
# Convert EOAst to <span>
xmlStrikeouts = xmlEOAchapter.findall(".//EOAst")
for xmlStrikeout in xmlStrikeouts:
xmlStrikeout.tag = "span"
xmlStrikeout.set("style", "text-decoration: line-through;")
# Convert letter-spacing into something nice
xmlLetterspaceds = xmlEOAchapter.findall(".//EOAls")
for xmlLetterspaced in xmlLetterspaceds:
xmlLetterspaced.tag = "span"
xmlLetterspaced.set("style", "letter-spacing: 0.5em;")
# Convert letter-spacing into something nice
xmlCaps = xmlEOAchapter.findall(".//EOAcaps")
for xmlCap in xmlCaps:
xmlCap.tag = "span"
xmlCap.set("style", "font-variant:small-caps;")
# Convert EOAineq into appropriate IMG-Tags
xmlInlineEquations = xmlEOAchapter.findall(".//EOAineq")
for xmlInlineEquation in xmlInlineEquations:
xmlInlineEquation.tag = "img"
xmlInlineEquation.set("class", "EOAineq")
xmlInlineEquation.set("alt", "")
shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src"))
# Convert EOAinline into appropriate IMG-Tags
xmlInlineElements = xmlEOAchapter.findall(".//EOAinline")
for xmlInlineElement in xmlInlineElements:
xmlInlineElement.tag = "img"
xmlInlineElement.set("class", "EOAinline")
xmlInlineElement.set("alt", "")
xmlInlineElement.set("class", "eoainlineimage")
strInlineElementFilePath = xmlInlineElement.text
strInlineElementFileName = os.path.basename(strInlineElementFilePath)
strInlineElementDirName = os.path.dirname(strInlineElementFilePath)
xmlInlineElement.text = None
xmlInlineElement.set("src", strInlineElementDirName + strInlineElementFileName)
shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName)
strNewImagePath = os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName
strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath
listArguments = shlex.split(strCommand)
subprocess.check_output(listArguments, shell=False)
# Change EOAcitenumeric into a span to create approriate link
xmlEOAcitenumerics = xmlEOAchapter.findall(".//EOAcitenumeric")
for xmlEOAcitenumeric in xmlEOAcitenumerics:
xmlEOAcitenumeric.tag = "span"
xmlEOAcitenumeric.set("class", "citation")
xmlEOAcitenumeric.set("rel", "popover")
# Change EOAciteauthoryear into a span to create approriate link
xmlEOAciteauthoryears = xmlEOAchapter.findall(".//EOAciteauthoryear")
for xmlEOAciteauthoryear in xmlEOAciteauthoryears:
xmlEOAciteauthoryear.tag = "span"
xmlEOAciteauthoryear.set("class", "citation")
xmlEOAciteauthoryear.set("rel", "popover")
# Change EOAciteauthoryear into a span to create approriate link
xmlEOAciteyears = xmlEOAchapter.findall(".//EOAciteyear")
for xmlEOAciteyear in xmlEOAciteyears:
xmlEOAciteyear.tag = "span"
xmlEOAciteyear.set("class", "citation")
xmlEOAciteyear.set("rel", "popover")
# Change EOAciteauthoryear into a span to create approriate link
xmlEOAcitemanuals = xmlEOAchapter.findall(".//EOAcitemanual")
for xmlEOAcitemanual in xmlEOAcitemanuals:
xmlEOAcitemanual.tag = "span"
xmlEOAcitemanual.set("class", "citation")
xmlEOAcitemanual.set("rel", "popover")
print ("----------------------------------------------")
print ("Processing Cross References")
# Substitute References with their targets (wit links)
for xmlEOAchapter in xmlEOAchapters:
xmlReferences = xmlEOAchapter.findall(".//EOAref")
for xmlReference in xmlReferences:
strResult = "!!! Cross Reference !!!"
strChapterOrder = ""
strObjectOrder = ""
xmlReferenceLabel = xmlReference.find("Label")
xmlReferenceLabelText = xmlReferenceLabel.text
xmlReferenceRef = xmlReference.find("ref")
xmlReferenceRefTarget = xmlReferenceRef.get("target")
if xmlReferenceLabelText in dictEquations:
# Grab Number from Dictionary
strResult = dictEquations[xmlReferenceLabelText]
# Go through all equations and find the corresponding Equation
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation")
for xmlEOAequation in xmlEOAequations:
tmpReferenceLabelText = xmlEOAequation.get("label")
if xmlReferenceLabelText == tmpReferenceLabelText:
print ("Erfolgreich Verweis auf Array-Formel gefunden:" + strResult)
for xmlParent in xmlEOAequation.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAequation.get("order")
if xmlReferenceRefTarget in dictEquations:
# Grab Number from Dictionary
strResult = dictEquations[xmlReferenceRefTarget]
# Go through all equations and find the corresponding Equation
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation")
for xmlEOAequation in xmlEOAequations:
tmpReferenceRefTarget = xmlEOAequation.get("uid")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
print ("Erfolgreich Verweis auf normale Formel gefunden: " + strResult)
for xmlParent in xmlEOAequation.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAequation.get("order")
if xmlReferenceRefTarget in dictLists:
print ("Verweis auf Liste gefunden")
strResult = dictLists[xmlReferenceRefTarget]
xmlEOAlistitem = xmlEOAdocument.xpath("//EOAchapter/*[contains(@id, $targetuid)]", targetuid = xmlReferenceRefTarget)[0]
for xmlParent in xmlEOAlistitem.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAlistitem.get("order")
if xmlReferenceRefTarget in dictChapters:
print ("Verweis auf Kapitel gefunden")
strResult = dictChapters[xmlReferenceRefTarget]
for xmlEOAchapter in xmlEOAdocument.findall(".//EOAchapter"):
if xmlEOAchapter.get("id") == xmlReferenceRefTarget:
print ("Erfolgreich Verweis auf ein Kapitel bearbeitet: " + strResult)
strObjectOrder = "top"
strChapterOrder = xmlEOAchapter.get("order")
if xmlReferenceRefTarget in dictTheorems:
print ("Verweis auf ein Theorem gefunden")
strResult = dictTheorems[xmlReferenceRefTarget]
for xmlEOAtheorem in xmlEOAdocument.findall(".//EOAtheorem"):
if xmlEOAtheorem.get("uid") == xmlReferenceRefTarget:
print ("Erfolgrech Verweis auf ein Theorem bearbeitet: " + strResult)
for xmlParent in xmlEOAtheorem.iterancestors():
if xmlParent.tag == "EOAchapter":
strObjectOrder = xmlEOAtheorem.get("order")
strChapterOrder = xmlParent.get("order")
if xmlReferenceRefTarget in dictSections:
print ("Verweis auf Section gefunden")
strResult = dictSections[xmlReferenceRefTarget]
xmlEOAsections = xmlEOAdocument.findall(".//EOAsection")
for xmlEOAsection in xmlEOAsections:
tmpReferenceRefTarget = xmlEOAsection.get("id")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
print ("Erfolgreich Verweis auf eine Section bearbeitet: " + strResult)
for xmlParent in xmlEOAsection.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAsection.get("order")
xmlEOAsubsections = xmlEOAdocument.findall(".//EOAsubsection")
for xmlEOAsubsection in xmlEOAsubsections:
tmpReferenceRefTarget = xmlEOAsubsection.get("id")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
print ("Erfolgreich Verweis auf eine Sub-Section bearbeitet: " + strResult)
for xmlParent in xmlEOAsubsection.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAsubsection.get("order")
if xmlReferenceRefTarget in dictFigures:
print ("Verweis auf Abbildung gefunden")
strResult = dictFigures[xmlReferenceRefTarget]
xmlEOAfigures = xmlEOAdocument.findall(".//EOAfigure")
for xmlEOAfigure in xmlEOAfigures:
tmpReferenceRefTarget = xmlEOAfigure.get("id")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
print ("Erfolgreich Verweis auf eine Abbildung bearbeitet: " + strResult)
for xmlParent in xmlEOAfigure.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAfigure.get("order")
if xmlReferenceRefTarget in dictFootnotes:
print ("Verweis auf Fussnote gefunden")
strResult = dictFootnotes[xmlReferenceRefTarget]
xmlEOAfootnotes = xmlEOAdocument.findall(".//EOAfootnote")
for xmlEOAfootnote in xmlEOAfootnotes:
tmpReferenceRefTarget = xmlEOAfootnote.get("id")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
print ("Erfolgreich Verweis auf eine Fussnote bearbeitet: " + strResult)
for xmlParent in xmlEOAfootnote.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAfootnote.get("order")
if xmlReferenceLabelText in dictTables:
print ("Verweis auf Tabelle gefunden")
strResult = dictTables[xmlReferenceLabelText]
xmlEOAtables = xmlEOAdocument.findall(".//EOAtable")
for xmlEOAtable in xmlEOAtables:
tmpReferenceRefTarget = xmlEOAtable.get("label")
if xmlReferenceLabelText == tmpReferenceRefTarget:
print ("Erfolgreich Verweis auf eine Tabelle bearbeitet:" + strResult)
for xmlParent in xmlEOAtable.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAtable.get("order")
tmpTail = xmlReference.tail or ""
xmlReference.clear()
xmlReference.text = strResult
xmlReference.tail = tmpTail
xmlReference.tag = "a"
xmlReference.set("href", "../" + strChapterOrder + "/index.html#" + strObjectOrder)
print ("----------------------------------------------")
print ("Processing Page References")
for xmlEOAchapter in xmlEOAchapters:
xmlPageReferences = xmlEOAchapter.findall(".//EOApageref")
strResult = "!!! Page Reference !!!"
for xmlReference in xmlPageReferences:
xmlReferenceLabel = xmlReference.find("Label")
xmlReferenceLabelText = xmlReferenceLabel.text
xmlReferenceRef = xmlReference.find("ref")
xmlReferenceRefTarget = xmlReferenceRef.get("target")
if xmlReferenceLabelText in dictPagelabels:
print ("Verweis auf Seite gefunden: " + xmlReferenceLabelText)
strResult = dictPagelabels[xmlReferenceLabelText]
xmlReference.text = strResult
for xmlChild in xmlReference.iterchildren():
xmlReference.remove(xmlChild)
# Check, if EOApageref points to a Facsimile-Page
# If yes, make a href to the facsimile
xmlEOAfacsimilepages = xmlEOAdocument.findall(".//EOAfacsimilepage")
for xmlEOAfacsimilepage in xmlEOAfacsimilepages:
if xmlEOAfacsimilepage.get("label") == xmlReferenceLabelText:
print ("Querverweis auf ein Facsimile gefunden")
xmlReference.tag = "a"
strPartOrder = xmlEOAfacsimilepage.getparent().get("order")
strFacsimileOrder = xmlEOAfacsimilepage.get("order")
print (strFacsimileOrder)
xmlReference.set("href", "../" + strPartOrder + "/" + strFacsimileOrder + ".html")
print ("----------------------------------------------")
print ("Normalizing Index Entries")
for xmlEOAchapter in xmlEOAchapters:
xmlEOAindexs = xmlEOAchapter.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
for xmlEOAindex in xmlEOAindexs:
strEOAindextext = xmlEOAindex.text
xmlEOAindex.text = None
listFirstPart = re.split('\|', strEOAindextext)
tmpEntry = listFirstPart[0]
listSecondPart = re.split('\!', tmpEntry)
strMainEntry = listSecondPart[0]
# Check if a sortkey is present via @
listSortKey = re.split('@', strMainEntry)
if len(listSortKey) == 2:
xmlEOAindex.set("main", listSortKey[0])
xmlEOAindex.set("display", listSortKey[1])
else:
xmlEOAindex.set("main", strMainEntry)
if len(listSecondPart) > 1:
strSecondPart = listSecondPart[1]
listSecondarySortkey = re.split('@', strSecondPart)
if len(listSecondarySortkey) == 2:
xmlEOAindex.set("secondary", listSecondarySortkey[0])
xmlEOAindex.set("secondarydisplay", listSecondarySortkey[1])
else:
xmlEOAindex.set("secondary", strSecondPart)
if len(listFirstPart) > 1:
strAddition = listFirstPart[1]
if strAddition == "textbf":
xmlEOAindex.set("bold", "true")
tmpseealso = re.match('seealso', strAddition)
if tmpseealso != None:
tmpAddition = re.sub('seealso', '', strAddition)
xmlEOAindex.set("seealso", tmpAddition)
# Entries containing seealso are omitted for the time being
xmlEOAindex.tag = "temp"
tmpsee = re.match('^see(?!also)', strAddition)
if tmpsee != None:
tmpAddition = re.sub('see', '', strAddition)
xmlEOAindex.set("see", tmpAddition)
# Entries containing seealso are omitted for the time being
xmlEOAindex.tag = "temp"
# Figure out parent chapter number and parent Element order
for xmlParent in xmlEOAindex.iterancestors():
if xmlParent.get("order") != None and xmlParent.tag != "EOAchapter":
xmlEOAindex.set("elementorder", xmlParent.get("order"))
if xmlParent.get("order") != None and xmlParent.tag == "EOAchapter":
xmlEOAindex.set("chapterorder", xmlParent.get("order"))
print (etree.tostring(xmlEOAindex))
etree.strip_tags(xmlDjangoTree, "temp")
print ("----------------------------------------------")
print ("Removing Duplicate Index Entries")
for xmlEOAchapter in xmlEOAchapters:
for xmlChild in xmlEOAchapter.iterchildren():
dictEntries = {}
xmlEOAindexs = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
for xmlEOAindex in xmlEOAindexs:
listEntry = []
strEntry = xmlEOAindex.get("main")
if strEntry in dictEntries:
strSubentry = xmlEOAindex.get("secondary")
if strSubentry in dictEntries[strEntry] or strSubentry == None:
if (xmlChild.get("see") is None) and (xmlChild.get("seealso") is None):
xmlEOAindex.tag = "temp"
else:
dictEntries[strEntry].append(strSubentry)
else:
dictEntries[strEntry] = listEntry
print ("----------------------------------------------")
print ("Removing Index Entries in Footnotes")
for xmlEOAchapter in xmlEOAchapters:
for xmlChild in xmlEOAchapter.iterchildren():
dictEntries = {}
xmlEOAindexs = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
for xmlEOAindex in xmlEOAindexs:
for xmlParent in xmlEOAindex.iterancestors():
if xmlParent.tag == "EOAfootnote":
xmlEOAindex.tag = "temp"
print ("Ding Index in Footnote")
print ("----------------------------------------------")
print ("Sorting and Creating Regular Index")
dictIndex = {}
xmlEOAindexs = xmlDjangoTree.findall("//EOAindex")
print ("Sorting " + str(len(xmlEOAindexs)) + " Entries")
for xmlEOAindex in xmlEOAindexs:
strMainEntry = xmlEOAindex.get("main")
# If strMainEntry not in Index, then create new index element
if strMainEntry not in dictIndex:
dictIndex[strMainEntry] = {}
dictIndex[strMainEntry]["listMainentries"] = []
dictIndex[strMainEntry]["dictSubentries"] = {}
# if entry has no subentry then append it to listMainentries
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None:
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex)
# if entry has subentry, proceed on the second level
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") != None:
strSubEntry = xmlEOAindex.get("secondary")
# if strSubEntry is not in dictSubentries, then create new list
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]:
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = []
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
else:
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
# Sort the main index
listSortedKeys = sorted(dictIndex.keys(), key=str.lower)
# Create new and empty xmlTree for xmlEOAindex
xmlEOAprintindex = etree.Element("EOAprintindex")
xmlEOAindexsection = None
listFirstChars = []
for strSortedKey in listSortedKeys:
strFirstChar = strSortedKey[0].upper()
if strFirstChar not in listFirstChars:
print (strFirstChar)
listFirstChars.append(strFirstChar)
if xmlEOAindexsection is not None:
xmlEOAprintindex.append(xmlEOAindexsection)
xmlEOAindexsection = etree.Element("EOAindexsection")
xmlEOAindexsection.set("Character", strFirstChar)
xmlEOAindexentry = etree.Element("EOAindexentry")
xmlEOAindexentry.set("main", strSortedKey)
for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]:
if xmlMainelement.get("display") != None:
strMainEntry = xmlMainelement.get("display")
else:
strMainEntry = xmlMainelement.get("main")
xmlEOAindexentry.set("display", strMainEntry)
print (strMainEntry)
print (xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder"))
xmlEOAindexlink = etree.Element("EOAindexlink")
xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder"))
xmlEOAindexlink.set("elementorder", xmlMainelement.get("elementorder"))
if xmlMainelement.get("bold") is not None:
xmlEOAindexlink.set("bold", "True")
xmlEOAindexentry.append(xmlEOAindexlink)
# If there are any subentries, process them now
if len(dictIndex[strSortedKey]["dictSubentries"]) > 0:
print ("Processing Subentries")
listSortedSubKeys = sorted(dictIndex[strSortedKey]["dictSubentries"])
for strSortedSubKey in listSortedSubKeys:
xmlEOAindexsubentry = etree.Element("EOAindexsubentry")
xmlEOAindexsubentry.set("secondary", strSortedSubKey)
for xmlSubElement in dictIndex[strSortedKey]["dictSubentries"][strSortedSubKey]:
strSubEntry = xmlSubElement.get("secondary")
# Hier noch die Links auf den Untereintrag einfügen
xmlEOAindexlink = etree.Element("EOAindexlink")
xmlEOAindexlink.set("chapterorder", xmlSubElement.get("chapterorder"))
xmlEOAindexlink.set("elementorder", xmlSubElement.get("elementorder"))
xmlEOAindexsubentry.append(xmlEOAindexlink)
if xmlSubElement.get("bold") is not None:
xmlEOAindexlink.set("bold", "True")
print (strSubEntry)
xmlEOAindexentry.append(xmlEOAindexsubentry)
xmlEOAindexsection.append(xmlEOAindexentry)
if xmlEOAindexsection is not None:
xmlEOAprintindex.append(xmlEOAindexsection)
# If EOAprintindex is gonna be found, append xmlEOAprintindex to xmlEOAdocument
xmlPrintindex = xmlDjangoTree.find(".//EOAprintindex")
if xmlPrintindex is not None != 0:
# Remove <p><EOAprintindex/></p> from xmlDjangoTree
xmlPrintindex.tag = "temp"
xmlPrintindex.getparent().tag = "temp"
xmlEOAdocument.append(xmlEOAprintindex)
print ("----------------------------------------------")
print ("Sorting and Creating Person Index")
dictIndex = {}
xmlEOAindexs = xmlDjangoTree.findall("//EOAindexperson")
print ("Sorting " + str(len(xmlEOAindexs)) + " Entries")
for xmlEOAindex in xmlEOAindexs:
strMainEntry = xmlEOAindex.get("main")
# If strMainEntry not in Index, then create new index element
if strMainEntry not in dictIndex:
dictIndex[strMainEntry] = {}
dictIndex[strMainEntry]["listMainentries"] = []
dictIndex[strMainEntry]["dictSubentries"] = {}
# if entry has no subentry then append it to listMainentries
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None:
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex)
# if entry has subentry, proceed on the second level
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") != None:
strSubEntry = xmlEOAindex.get("secondary")
# if strSubEntry is not in dictSubentries, then create new list
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]:
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = []
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
else:
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
# Sort the main index
listSortedKeys = sorted(dictIndex.keys(), key=str.lower)
# Create new and empty xmlTree for xmlEOAindex
xmlEOAprintindex = etree.Element("EOAprintpersonindex")
xmlEOAindexsection = None
listFirstChars = []
# doing the same for location index
print ("----------------------------------------------")
print ("Sorting and Creating Location Index")
dictIndex = {}
xmlEOAindexs = xmlDjangoTree.findall("//EOAindexlocation")
print ("Sorting " + str(len(xmlEOAindexs)) + " Entries")
for xmlEOAindex in xmlEOAindexs:
strMainEntry = xmlEOAindex.get("main")
# If strMainEntry not in Index, then create new index element
if strMainEntry not in dictIndex:
dictIndex[strMainEntry] = {}
dictIndex[strMainEntry]["listMainentries"] = []
dictIndex[strMainEntry]["dictSubentries"] = {}
# if entry has no subentry then append it to listMainentries
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None:
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex)
# if entry has subentry, proceed on the second level
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") != None:
strSubEntry = xmlEOAindex.get("secondary")
# if strSubEntry is not in dictSubentries, then create new list
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]:
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = []
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
else:
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
# Sort the main index
listSortedKeys = sorted(dictIndex.keys(), key=str.lower)
# Create new and empty xmlTree for xmlEOAindex
xmlEOAprintindex = etree.Element("EOAprintlocationindex")
xmlEOAindexsection = None
listFirstChars = []
# end here
for strSortedKey in listSortedKeys:
strFirstChar = strSortedKey[0].upper()
if strFirstChar not in listFirstChars:
print (strFirstChar)
listFirstChars.append(strFirstChar)
if xmlEOAindexsection is not None:
xmlEOAprintindex.append(xmlEOAindexsection)
xmlEOAindexsection = etree.Element("EOAindexsection")
xmlEOAindexsection.set("Character", strFirstChar)
xmlEOAindexentry = etree.Element("EOAindexentry")
xmlEOAindexentry.set("main", strSortedKey)
for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]:
if xmlMainelement.get("display") != None:
strMainEntry = xmlMainelement.get("display")
else:
strMainEntry = xmlMainelement.get("main")
xmlEOAindexentry.set("display", strMainEntry)
print (strMainEntry)
print (xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder"))
xmlEOAindexlink = etree.Element("EOAindexlink")
xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder"))
xmlEOAindexlink.set("elementorder", xmlMainelement.get("elementorder"))
if xmlMainelement.get("bold") is not None:
xmlEOAindexlink.set("bold", "True")
xmlEOAindexentry.append(xmlEOAindexlink)
# If there are any subentries, process them now
if len(dictIndex[strSortedKey]["dictSubentries"]) > 0:
print ("Processing Subentries")
listSortedSubKeys = sorted(dictIndex[strSortedKey]["dictSubentries"])
for strSortedSubKey in listSortedSubKeys:
xmlEOAindexsubentry = etree.Element("EOAindexsubentry")
xmlEOAindexsubentry.set("secondary", strSortedSubKey)
for xmlSubElement in dictIndex[strSortedKey]["dictSubentries"][strSortedSubKey]:
strSubEntry = xmlSubElement.get("secondary")
# Hier noch die Links auf den Untereintrag einfügen
xmlEOAindexlink = etree.Element("EOAindexlink")
xmlEOAindexlink.set("chapterorder", xmlSubElement.get("chapterorder"))
xmlEOAindexlink.set("elementorder", xmlSubElement.get("elementorder"))
xmlEOAindexsubentry.append(xmlEOAindexlink)
if xmlSubElement.get("bold") is not None:
xmlEOAindexlink.set("bold", "True")
print (strSubEntry)
xmlEOAindexentry.append(xmlEOAindexsubentry)
xmlEOAindexsection.append(xmlEOAindexentry)
if xmlEOAindexsection is not None:
xmlEOAprintindex.append(xmlEOAindexsection)
# If EOAprintpersonindex is gonna be found, append xmlEOAprintindex to xmlEOAdocument
xmlPrintindex = xmlDjangoTree.find(".//EOAprintpersonindex")
if xmlPrintindex is not None != 0:
# Remove <p><EOAprintindex/></p> from xmlDjangoTree
xmlPrintindex.tag = "temp"
xmlPrintindex.getparent().tag = "temp"
xmlEOAdocument.append(xmlEOAprintindex)
# If EOAprintlocationindex is found, append xmlEOAprintindex to xmlEOAdocument
xmlPrintindex = xmlDjangoTree.find(".//EOAprintlocationindex")
if xmlPrintindex is not None != 0:
# Remove <p><EOAprintindex/></p> from xmlDjangoTree
xmlPrintindex.tag = "temp"
xmlPrintindex.getparent().tag = "temp"
xmlEOAdocument.append(xmlEOAprintindex)
# TODO: Die unnötigen Attribute wie id löschen
# TODO: Die unnötigen Tags wie EOAlabel löschen
etree.strip_tags(xmlDjangoTree, "temp", "citetext", "EOAprintbibliography")
etree.strip_elements(xmlDjangoTree, "citekey", with_tail=False)
etree.strip_attributes(xmlDjangoTree, "id-text", "id", "noindent", "type", "label", "spacebefore", "rend")
############################################################################
# Save xmlDjangoTree #
############################################################################
tmpFile = open ("CONVERT/django/Django.xml", "w")
tmpResult = etree.tostring(xmlDjangoTree, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
############################################################################
# Finishing various Stuff #
############################################################################
# Write Temporary XML-Tree
ergebnisdatei = open("Devel_ebook.xml", "w")
ergebnis = etree.tostring(xmlEbookTree, pretty_print=True, encoding="unicode")
ergebnisdatei.write(ergebnis)
ergebnisdatei.close()
cleanup()
print ("Done!")
sys.exit()