Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
EOASkripts/Skripten/EOAconvert.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
3603 lines (3405 sloc)
176 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8; mode: python -*- | |
# Time-stamp: <2016-02-03 17:23:51 (kthoden)> | |
# CHANGES | |
# installed graphicsmagick: "brew install graphicsmagick" | |
# tralics needs to be set up | |
# | |
# | |
# some errors: | |
# [kthoden:~/EOAKram/Supersample/CONVERT] % ~/EOAKram/Probe/MPDL/Skripten/epub_test.sh EOASample.epub | |
# Epubcheck Version 1.0.5 | |
# ERROR: EOASample.epub/OEBPS/chapter4.xhtml(65): unknown element "EOAindexlocation" from namespace "http://www.w3.org/1999/xhtml" | |
# ERROR: EOASample.epub/OEBPS/chapter5.xhtml(14): unknown element "EOAindexlocation" from namespace "http://www.w3.org/1999/xhtml" | |
# ERROR: EOASample.epub/OEBPS/chapter7.xhtml(17): unknown element "formula" from namespace "http://www.w3.org/1999/xhtml" | |
# ERROR: EOASample.epub/OEBPS/chapter7.xhtml(17): elements from namespace "http://www.w3.org/1998/Math/MathML" are not allowed | |
# ERROR: EOASample.epub/OEBPS/chapter8.xhtml(13): unknown element "EOAtocentry" from namespace "http://www.w3.org/1999/xhtml" | |
# ERROR: EOASample.epub/OEBPS/chapter11.xhtml(9): unknown element "EOAtocentry" from namespace "http://www.w3.org/1999/xhtml" | |
# ERROR: EOASample.epub/OEBPS/chapter11.xhtml(10): unknown element "EOAtocentry" from namespace "http://www.w3.org/1999/xhtml" | |
# ERROR: EOASample.epub/OEBPS/chapter11.xhtml(10): unknown element "EOAprintlocationindex" from namespace "http://www.w3.org/1999/xhtml" | |
# license? | |
# | |
# Also add EOAparagraph | |
from optparse import OptionParser | |
from lxml import etree | |
from lxml import objectify | |
from copy import deepcopy | |
from copy import copy | |
from EOAbibitem import Bibitem | |
import glob | |
import os | |
import re | |
import string | |
import shlex | |
import subprocess | |
import sys | |
import shutil | |
import time | |
import configparser | |
# TODO 2: Einfache URL bei einer Webseite noch fixen, siehe Manzer-Kapitel [1] in Proceedings 2, genauer: webpage noch als Typ einfügen | |
# Paths to executables | |
GM_PATH = "/usr/local/bin/gm" | |
TL_PATH = "/usr/local/texlive/2013/" | |
TRALICS_PATH_EXEC = "/Users/kthoden/src/tralics-2.15.2/src/tralics" | |
TRALICS_PATH_LIB = "/Users/kthoden/EOAKram/Probe/MPDL/tralics" | |
PDFTK_PATH = "/usr/local/bin/pdftk" | |
SUPPORT_TEMPLATE_PATH = "/Users/kthoden/EOAKram/Probe/MPDL/" | |
# curl als gegeben voraussetzen? | |
interimResult = "" | |
############################################################### | |
# Certain functions for specific tasks | |
############################################################### | |
# Maintain text and strip subchildren | |
def gettext(xmlElement): | |
xmlText = xmlElement.text or "" | |
for xmlChild in xmlElement: | |
xmlText += gettext(xmlChild) | |
if xmlChild.tail: | |
xmlText += xmlChild.tail | |
return xmlText | |
# include all subelements | |
def getchildren(xmlElement): | |
1 + 1 | |
return xmlElement | |
# Adjust and convert image for epub standard | |
def sanitizeImageEpub(strImagepath): | |
print (strImagepath) | |
strCommand = GM_PATH + " identify -format \"%w\" " + strImagepath | |
listArguments = shlex.split(strCommand) | |
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) | |
intImageWidth = int(exeShell) | |
if intImageWidth > 1500: | |
strCommand = GM_PATH + " convert " + strImagepath + " -resize 1500x\\> " + strImagepath | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
strCommand = GM_PATH + " identify -format \"%h\" " + strImagepath | |
listArguments = shlex.split(strCommand) | |
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) | |
intImageHeight = int(exeShell) | |
if intImageHeight > 2000: | |
strCommand = GM_PATH + " convert " + strImagepath + " -resize x2000\\> " + strImagepath | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
strCommand = GM_PATH + " identify -format \"%m\" " + strImagepath | |
listArguments = shlex.split(strCommand) | |
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) | |
strFileFormat = str(exeShell) | |
strFileFormat = strFileFormat.strip() | |
if strFileFormat == "PNG": | |
strNewImagepath = os.path.splitext(strImagepath)[0] | |
strCommand = GM_PATH + " convert " + strImagepath + " " + strNewImagepath + ".jpg" | |
listArguments = shlex.split(strCommand) | |
subprocess.call(listArguments) | |
os.remove(strImagepath) | |
strImagepath = strNewImagepath + ".jpg" | |
print ("Hier ein Pfad zu einem Bild:") | |
print (strImagepath) | |
return strImagepath | |
# Function to render LaTeX-Code into PNG-Files, returns PNG-Filename (epub & django) | |
def TeX2PNG(LaTeXCode, Type, Chapter, Number): | |
# Dictionary contains Type:begin/end | |
Types = { | |
"EOAineq" : ["$", "$"], | |
"EOAequation" : ["\\begin{equation*}", "\\end{equation*}"], | |
"EOAequationnonumber" : ["\\begin{equation*}", "\\end{equation*}"], | |
"EOAequationarray" : ["\\begin{align*}", "\\end{align*}"], | |
"EOAequationarraynonumber" : ["\\begin{align*}", "\\end{align*}"] | |
} | |
LaTeXCode = Types[Type][0] + LaTeXCode + Types[Type][1] | |
dictRebindedCommands = { | |
"\|ket\|" : r"\\ket", | |
"\|braket\|" : r"\\braket", | |
"\|bra\|" : r"\\bra", | |
"\|Bra\|" : r"\\Bra", | |
"\|Ket\|" : r"\\Ket", | |
"\slashed\|" : r"\\slashed" | |
} | |
for strCommand in dictRebindedCommands.keys(): | |
#LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode) | |
LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode) | |
#print (LaTeXCode) | |
#return LaTeXCode | |
# Open plain LaTeX-Template | |
tmp = open(SUPPORT_TEMPLATE_PATH + "Templates/Formel.tex", "r") | |
Template = tmp.read() | |
tmp.close() | |
# Get tmp-directory for this user account | |
tmpDir = os.getenv("TMPDIR") | |
# Make directory items if it doesn't already exist | |
if os.path.exists(os.getcwd() + "/items") == False: | |
os.mkdir(os.getcwd() + "/items") | |
s = string.Template(Template) | |
e = s.substitute(DERINHALT=LaTeXCode) | |
tmpFile = tmpDir + Type + "_" + str(Chapter) + "_" + str(Number) + ".tex" | |
tmp = open(tmpFile, "w") | |
tmp.write(e) | |
tmp.close() | |
Kommando = "/usr/texbin/xelatex --halt-on-error " + tmpFile | |
Argumente = shlex.split(Kommando) | |
# Redirecting stderr to save XeLaTeX-Output | |
Datei = open('Test.txt', 'w') | |
Ergebnis = subprocess.call(Argumente,cwd=tmpDir,stdout=Datei) | |
if Ergebnis == 0: | |
print ("Konvertierung folgender Formel ist erfolgreich: " + Type + str(Chapter) + "_" + str(Number)) | |
if Ergebnis == 1: | |
print ("Konvertierung folgender Formel ist fehlgeschlagen: " + Type + str(Chapter) + "_" + str(Number)) | |
Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + tmpDir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + tmpDir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=tmpDir,stdout=Datei) | |
Kommando = GM_PATH + " convert -density 144 " + tmpDir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=tmpDir,stdout=Datei) | |
return LaTeXCode | |
# Function to create a complete Entry of a publication (epub & django) for author-year citation | |
def createBibEntryAuthorYear(bibEntry, boolSameAuthor): | |
strBibEntry = "" | |
if boolSameAuthor == False: | |
strAuthor = bibEntry.fullauthorlastfirst() | |
if boolSameAuthor == True: | |
strAuthor = "-" | |
if bibEntry.entrytype() == "book": | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i>." + str(bibEntry.location()) + "." | |
if bibEntry.entrytype() == "booklet": | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i>." + str(bibEntry.location()) + "." | |
if bibEntry.entrytype() == "report": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>." | |
if bibEntry.entrytype() == "thesis": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>." + bibEntry.thesistype() + bibEntry.institution() | |
if bibEntry.entrytype() == "misc": | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + ") <i>" + str(bibEntry.title()) + "</i>." | |
if bibEntry.entrytype() == "incollection": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + "." + bibEntry.booktitle() + bibEntry.editor() + bibEntry.series() + bibEntry.location() + bibEntry.pages() | |
if bibEntry.entrytype() == "inproceedings": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + "." + bibEntry.booktitle() + bibEntry.editor() + bibEntry.series() + bibEntry.location() + bibEntry.pages() | |
if bibEntry.entrytype() == "article": | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). " + str(bibEntry.title()) + "." + str(bibEntry.journaltitle()) + bibEntry.volumenumberpages() | |
if bibEntry.entrytype() == "newspaper": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>." | |
return strBibEntry | |
# Function to create a complete Entry of a publication (epub & django) for numeric citation | |
def createBibEntryNumeric(bibEntry): | |
strBibEntry = "" | |
strAuthor = bibEntry.fullauthorfirstlast() | |
if bibEntry.entrytype() == "book": | |
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>." + bibEntry.location() + ", " + bibEntry.year() | |
if bibEntry.entrytype() == "booklet": | |
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.howpublished() + ". " + bibEntry.location() + ", " + bibEntry.year() | |
if bibEntry.entrytype() == "report": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>." | |
if bibEntry.entrytype() == "thesis": | |
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.thesistype() + bibEntry.institution() + ", " + bibEntry.year() | |
if bibEntry.entrytype() == "misc": | |
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". " | |
if bibEntry.entrytype() == "incollection": | |
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + bibEntry.editor() + bibEntry.location() + ", " + bibEntry.year() + ". " + bibEntry.pages() + "." | |
if bibEntry.entrytype() == "inproceedings": | |
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". " + bibEntry.volumenumeric() + bibEntry.year() + ". " + bibEntry.pages() + "." | |
if bibEntry.entrytype() == "article": | |
strBibEntry = strAuthor + ". " + bibEntry.title() + "<i>" + bibEntry.journaltitle() + "</i> " + bibEntry.volumenumberpages() + " (" + bibEntry.year() + "):" + bibEntry.pages() + "." | |
if bibEntry.entrytype() == "newspaper": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>." | |
return strBibEntry | |
# Function to add Elements to Content-OPF (epub) | |
def addToContentopf(contentopf, Filename, FileID, Mediatype): | |
global listContentopf | |
# Sanitizing FileID, id-attribute may not contain _ : or / | |
# FileID may also not start with a number | |
FileID = re.sub("\_", "", FileID) | |
FileID = re.sub("\.", "", FileID) | |
FileID = re.sub("\/", "", FileID) | |
FileID = re.sub("^[0-9]", "", FileID) | |
FileID = re.sub("^[0-9]", "", FileID) | |
FileID = re.sub("^[0-9]", "", FileID) | |
if FileID in listContentopf: | |
return contentopf | |
else: | |
# Sanitizing FileID, id-attribute may not contain _ : or / | |
# FileID may also not start with a number | |
FileID = re.sub("\_", "", FileID) | |
FileID = re.sub("\.", "", FileID) | |
FileID = re.sub("\/", "", FileID) | |
FileID = re.sub("^[0-9]", "", FileID) | |
FileID = re.sub("^[0-9]", "", FileID) | |
FileID = re.sub("^[0-9]", "", FileID) | |
dictMediatypes = { | |
"xml" : "application/xhtml+xml", | |
"jpg" : "image/jpeg", | |
"png" : "image/png" | |
} | |
contentopfns = "{http://www.idpf.org/2007/opf}" | |
xmlManifest = contentopf.find(".//" + contentopfns + "manifest") | |
xmlItem = etree.Element("item") | |
xmlItem.set("id", FileID) | |
xmlItem.set("media-type", dictMediatypes[Mediatype]) | |
xmlItem.set("href", Filename) | |
xmlManifest.append(xmlItem) | |
# if it's a XML-File also extent <spine> | |
if Mediatype == "xml": | |
xmlSpine = contentopf.find(".//" + contentopfns + "spine") | |
xmlItemref = etree.Element("itemref") | |
xmlItemref.set("idref", FileID) | |
xmlSpine.append(xmlItemref) | |
listContentopf.append(FileID) | |
return contentopf | |
# Function to add Chapters to Table of Contents (epub) | |
def addToTocncx(tocncx, Label, intTechnicalChapterNumber): | |
tocncxns = "{http://www.daisy.org/z3986/2005/ncx/}" | |
xmlNavMap = tocncx.find(".//" + tocncxns + "navMap") | |
xmlNavPoint = etree.Element("navPoint") | |
xmlNavPoint.set("playOrder", str(intTechnicalChapterNumber + 1)) | |
xmlNavPoint.set("id", "chapter" + str(intTechnicalChapterNumber)) | |
xmlNavLabel = etree.Element("navLabel") | |
xmlNavLabelText = etree.Element("text") | |
xmlNavLabelText.text = Label | |
xmlNavLabel.append(xmlNavLabelText) | |
xmlNavPoint.append(xmlNavLabel) | |
xmlContent = etree.Element("content") | |
xmlContent.set("src", "chapter" + str(intTechnicalChapterNumber) + ".xhtml") | |
xmlNavPoint.append(xmlContent) | |
xmlNavMap.append(xmlNavPoint) | |
return tocncx | |
# Remove Support Files | |
def cleanup(): | |
try: | |
os.remove((os.getcwd() + "/classes.dtd")) | |
os.remove((os.getcwd() + "/mathml2-qname-1.mod")) | |
os.remove((os.getcwd() + "/mathml2.dtd")) | |
shutil.rmtree((os.getcwd() + "/html")) | |
shutil.rmtree((os.getcwd() + "/iso8879")) | |
shutil.rmtree((os.getcwd() + "/iso9573-13")) | |
shutil.rmtree((os.getcwd() + "/mathml")) | |
shutil.rmtree((os.getcwd() + "/mathml2")) | |
except: | |
print ("Keine Temporaeren Dateien") | |
############################################################### | |
# Preperation of certain files and some checks in advance | |
############################################################### | |
# Setup of various dictionaries for localization of various elements | |
dictLangFootnotes = {"english" : "Footnotes", "italian" : "Note a piè pagina", "french" : "notes en bas de page", "german" : "Fußnoten"} | |
# Options for the command line: filename / configfile | |
parser = OptionParser() | |
parser.add_option("-f", "--file", dest="filename", | |
help="Name of XML-File", metavar="FILE") | |
parser.add_option("-c", "--config", dest="configfile", | |
help="Name of Configuration-File", metavar="CONFIGURATION") | |
parser.add_option("-t", "--trash", dest="helpfiles", | |
help="Trash temporary files") | |
(options, args) = parser.parse_args() | |
# Check for folder and necessary files | |
if os.path.exists(os.getcwd() + "/CONVERT") == False: | |
print ("Das notwendige Verzeichnis CONVERT wurde noch nicht erstellt.") | |
sys.exit() | |
if os.path.exists(os.getcwd() + "/CONVERT/cover.jpg") == False: | |
print ("Die Datei cover.jpg im Verzeichnis CONVERT fehlt.") | |
sys.exit() | |
if os.path.exists(os.getcwd() + "/CONVERT/publication.cfg") == False: | |
print ("Die Datei publication.cfg im Verzeichnis CONVERT fehlt.") | |
sys.exit() | |
# Remove temporary files, neccessary for troubleshooting | |
if options.helpfiles == "temp": | |
cleanup() | |
sys.exit() | |
# Copy Support-Files from /Library/MPIWG to current directory | |
shutil.copy(SUPPORT_TEMPLATE_PATH + "Support/classes.dtd", os.getcwd()) | |
shutil.copy(SUPPORT_TEMPLATE_PATH + "Support/mathml2-qname-1.mod", os.getcwd()) | |
shutil.copy(SUPPORT_TEMPLATE_PATH + "Support/mathml2.dtd", os.getcwd()) | |
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/html", (os.getcwd() + "/html")) | |
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/iso8879", (os.getcwd() + "/iso8879")) | |
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/iso9573-13", (os.getcwd() + "/iso9573-13")) | |
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/mathml", (os.getcwd() + "/mathml")) | |
shutil.copytree(SUPPORT_TEMPLATE_PATH + "Support/mathml2", (os.getcwd() + "/mathml2")) | |
############################################################## | |
# Preparing the main document # | |
############################################################## | |
""" | |
Der Aufruf von tralics lautet: | |
/Library/MPIWG/Skripten/tralics -confdir /Library/MPIWG/tralics/tralics_conf -config /Library/MPIWG/tralics/tralics.tcf -utf8 -utf8output Vorlage2012.tex | |
""" | |
# Convert TeX to XML via Tralics | |
#Kommando = "/Library/MPIWG/Skripten/tralics -confdir /Library/MPIWG/tralics/tralics_conf -config /Library/MPIWG/tralics/tralics.tcf -utf8 -utf8output " + options.filename + ".tex" | |
Kommando = "%s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output %s.tex" % (TRALICS_PATH_EXEC, TRALICS_PATH_LIB, TRALICS_PATH_LIB, options.filename) | |
Argumente = shlex.split(Kommando) | |
Prozess = subprocess.call(Argumente) | |
# Fix underscore und fix EOAtranscripted | |
tmpFile = open ((options.filename) + ".xml", "r") | |
tmpText = tmpFile.read() | |
tmpFile.close() | |
tmpText = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpText) | |
tmpText = re.sub(r"<error n='\\par' l='(.*?)' c='Invalid \\par command: paragraph not started'/>", "", tmpText) | |
tmpFile = open ((options.filename) + ".xml", "w") | |
tmpFile.write(tmpText) | |
tmpFile.close() | |
# Complete XML-Document in xmlTree | |
xmlParser = etree.XMLParser(no_network=False,load_dtd=True) #resolve_entities=False | |
xmlTree = etree.parse((options.filename + ".xml"), xmlParser) | |
xmlChapters = xmlTree.findall("//div1") | |
# Cleanup of not needed tags in advance. To be cleaned: <error> | |
etree.strip_elements(xmlTree, with_tail=False, *['error']) | |
print ("-----------------------------------------------------") | |
print ("Move EOAlanguage from <head> into attribute of EOAchapter") | |
for xmlChapter in xmlChapters: | |
xmlLanguage = xmlChapter.find(".//EOAlanguage") | |
if xmlLanguage is not None: | |
strLanguage = xmlLanguage.text or "english" | |
xmlChapter.set("language", strLanguage) | |
xmlLanguage.text = None | |
print (strLanguage) | |
xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage") | |
############################################################## | |
# Numbering and Typesetting various Elements # | |
############################################################## | |
# Figure out how to number (like essay or regular) | |
strSerie = xmlTree.find(".//EOAseries").text or "regular" | |
if strSerie == "Essay": | |
strNumberingType = "essay" | |
else: | |
strNumberingType = "regular" | |
# Dictionaries containing UIDs and Numbers | |
dictChapters = {} | |
dictFigures = {} | |
dictEquations = {} | |
dictSections = {} | |
dictFootnotes = {} | |
dictPagelabels = {} | |
dictTables = {} | |
dictLists = {} | |
dictTheorems = {} | |
print ("-----------------------------------------------------") | |
print ("Numbering Chapters") | |
Chapternumber = 1 | |
for xmlChapter in xmlChapters: | |
if xmlChapter.get('rend') != "nonumber": | |
Chapteruid = xmlChapter.get('id') | |
dictChapters[Chapteruid] = str(Chapternumber) | |
Chapternumber += 1 | |
# EOAequation, EOAsubequation and EOAequationarray Numbering per Chapter | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
intEquationnumber = 1 | |
xmlDinge = xmlChapter.xpath(".//EOAequation | .//EOAequationarray | .//EOAsubequations") | |
print ("-----------------------------------------------------") | |
print ("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations") | |
print ("Working on Chapter " + str(intChapterNumber)) | |
print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden") | |
for xmlDing in xmlDinge: | |
if xmlDing.tag == "EOAequationarray": | |
# tmpNumberinArray is only being used for filename | |
tmpNumberinArray = intEquationnumber | |
# tmpDictNumberLabel used to insert the attribute value into <EOAequation> | |
tmpDictNumberLabel = {} | |
# Numbering is being done by <mtr>-Tags | |
xmlMathmlrows = xmlDing.findall(".//{http://www.w3.org/1998/Math/MathML}mtr") | |
for xmlMathmlrow in xmlMathmlrows: | |
if "Label" in xmlMathmlrow.attrib: | |
# Label dem Dictionary für die Euqations hinzufügen | |
if xmlChapter.get("rend") != "nonumber": | |
dictEquations[xmlMathmlrow.get("Label")] = str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber) | |
tmpDictNumberLabel[str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)] = xmlMathmlrow.get("Label") | |
if xmlChapter.get("rend") == "nonumber": | |
dictEquations[xmlMathmlrow.get("Label")] = str(intEquationnumber) | |
tmpDictNumberLabel[str(intEquationnumber)] = xmlMathmlrow.get("Label") | |
intEquationnumber += 1 | |
xmlRohTeX = xmlDing.find(".//texmath") | |
xmlNew = etree.Element('EOAequationarray') | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in xmlRohTeX.text.splitlines() if s]) | |
# \rowattributeunknown has to be deleted, its an artefact | |
textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode) | |
# Push Down loop to parse the raw code | |
textFormel = "" | |
boolBackslash = False | |
for Buchstabe in textSourcecode: | |
if Buchstabe == "\n": | |
continue | |
if Buchstabe == "\\": | |
if boolBackslash == False: | |
textFormel += Buchstabe | |
boolBackslash = True | |
continue | |
if boolBackslash == True: | |
textFormel += Buchstabe | |
strLaTeXCode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray)) | |
if xmlChapter.get("rend") != "nonumber": | |
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))) | |
if xmlChapter.get("rend") == "nonumber": | |
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray))) | |
tmpXML.set("TeX", strLaTeXCode) | |
# Put Label into EOAequation | |
if xmlChapter.get("rend") != "nonumber": | |
strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray) | |
if xmlChapter.get("rend") == "nonumber": | |
strTempKey = str(tmpNumberinArray) | |
if strTempKey in tmpDictNumberLabel: | |
#tmpXML.set("label", tmpDictNumberLabel[(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))]) | |
tmpXML.set("label", tmpDictNumberLabel[strTempKey]) | |
xmlNew.append(tmpXML) | |
textFormel = "" | |
boolBackslash = False | |
tmpNumberinArray += 1 | |
continue | |
if Buchstabe != "\\": | |
textFormel += Buchstabe | |
boolBackslash = False | |
# Typeset last equation | |
strLaTeXCode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray)) | |
if xmlChapter.get("rend") != "nonumber": | |
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(dictChapters[xmlChapter.get('id')] + "." + str(tmpNumberinArray))) | |
if xmlChapter.get("rend") == "nonumber": | |
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray))) | |
tmpXML.set("TeX", strLaTeXCode) | |
# Put Label into EOAequation | |
if xmlChapter.get("rend") != "nonumber": | |
strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray) | |
if xmlChapter.get("rend") == "nonumber": | |
strTempKey = str(tmpNumberinArray) | |
if strTempKey in tmpDictNumberLabel: | |
print (strTempKey) | |
print (tmpDictNumberLabel) | |
print (dictChapters) | |
tmpXML.set("label", tmpDictNumberLabel[strTempKey]) | |
xmlNew.append(tmpXML) | |
xmlDing.getparent().replace(xmlDing, xmlNew) | |
# enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on | |
#xmlNew.getparent().tag = "temp" | |
continue | |
if xmlDing.tag == "EOAsubequations": | |
# Enclosing <p>-Tag of the EOAsubequations needs to be removed | |
xmlDing.getparent().tag = "temp" | |
xmlSubequations = xmlDing.findall('.//EOAequation') | |
listCharacters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] | |
tmpI = 0 | |
# Insert Number of this Subequation into dictEquations | |
xmlAnchor = xmlDing.find(".//anchor") | |
print (xmlAnchor) | |
if xmlChapter.get("rend") != "nonumber": | |
dictEquations[xmlAnchor.get('id')] = dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber) | |
if xmlChapter.get("rend") == "nonumber": | |
dictEquations[xmlAnchor.get('id')] = str(intEquationnumber) | |
# Delete anchor | |
xmlAnchor.getparent().remove(xmlAnchor) | |
for xmlSubequation in xmlSubequations: | |
# Enclosing <p>-Tag of the EOAsubequation needs to be removed | |
#xmlSubequation.getparent().tag = "temp" | |
# Numbering Subequations with characters | |
strSubequationNumber = str(intEquationnumber) + listCharacters[tmpI] | |
tmpI += 1 | |
textSourcecode = xmlSubequation.find('.//texmath').text | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) | |
strLaTeXCode = TeX2PNG(textSourcecode, "EOAequation", str(intChapterNumber), strSubequationNumber) | |
xmlAnchor = xmlSubequation.find(".//anchor") | |
# Clear Equation | |
xmlSubequation.clear() | |
if xmlChapter.get("rend") != "nonumber": | |
xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png") | |
xmlSubequation.set("number", dictChapters[xmlChapter.get('id')] + "." + strSubequationNumber) | |
xmlSubequation.set("uid", xmlAnchor.get('id')) | |
if xmlChapter.get("rend") == "nonumber": | |
xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png") | |
xmlSubequation.set("number", strSubequationNumber) | |
xmlSubequation.set("uid", xmlAnchor.get('id')) | |
xmlSubequation.set("id", xmlAnchor.get('id')) | |
xmlSubequation.set("TeX", strLaTeXCode) | |
# Insert Number of this Equation into dictEquations | |
if strNumberingType == "regular": | |
dictEquations[xmlAnchor.get('id')] = str(dictChapters[xmlChapter.get('id')]) + "." + strSubequationNumber | |
if strNumberingType == "essay": | |
dictEquations[xmlAnchor.get('id')] = strSubequationNumber | |
# TODO: Anchor direkt unter Subequation aufheben, und der ersten Equation zuordnen, so dass auf 8.16 bei 8.16a und 8.16b verlinkt werden kann | |
xmlDing.tag = "temp" | |
# enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on | |
#xmlDing.getparent().tag = "temp" | |
intEquationnumber += 1 | |
continue | |
if xmlDing.tag == "EOAequation": | |
# Check, if Equation has already been found in a Subeqation | |
xmlAnchor = xmlDing.find("anchor") | |
if xmlAnchor == None: | |
continue | |
if xmlAnchor.get('id') in dictEquations: | |
continue | |
if xmlDing.find('.//texmath') is not None: | |
textSourcecode = xmlDing.find('.//texmath').text | |
else: | |
textSourcecode = xmlDing.text | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) | |
strLaTeXCode = TeX2PNG(textSourcecode, "EOAequation", intChapterNumber, intEquationnumber) | |
#print ("Got:") | |
#print (strLaTeXCode) | |
if xmlChapter.get("rend") != "nonumber": | |
xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png") | |
xmlDing.set("number", dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber)) | |
xmlDing.set("uid", xmlAnchor.get('id')) | |
if xmlChapter.get("rend") == "nonumber": | |
xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png") | |
xmlDing.set("number", str(intEquationnumber)) | |
xmlDing.set("uid", xmlAnchor.get('id')) | |
xmlDing.set("id", xmlAnchor.get('id')) | |
xmlDing.set("TeX", strLaTeXCode) | |
#xmlDing.getparent().replace(xmlDing, xmlNew) | |
# Insert Number of this Equation into dictEquations | |
if strNumberingType == "regular": | |
dictEquations[xmlAnchor.get('id')] = \ | |
str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber) | |
if strNumberingType == "essay": | |
dictEquations[xmlAnchor.get('id')] = str(intEquationnumber) | |
intEquationnumber += 1 | |
continue | |
intChapterNumber += 1 | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
tempImagenumber = 1 | |
xmlDinge = xmlChapter.xpath(".//EOAequationnonumber | .//EOAequationarraynonumber") | |
print ("-----------------------------------------------------") | |
print ("Processing .//EOAequationnonumber | .//EOAequationarraynonumber") | |
print ("Working on Chapter " + str(intChapterNumber)) | |
print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden") | |
for xmlDing in xmlDinge: | |
if xmlDing.tag == "EOAequationarraynonumber": | |
if xmlDing.find(".//texmath") is not None: | |
textSourcecode = xmlDing.find(".//texmath").text | |
else: | |
textSourcecode = xmlDing.text | |
xmlNew = etree.Element('EOAequationarraynonumber') | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) | |
# \rowattributeunknown has to be deleted, its an artefact | |
textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode) | |
# TODO: HIer überprüfen, ob und inwiefern es ausreichend ist, EOAequationarraynonumber in eine Grafik zu packen | |
strLateXCode = TeX2PNG(textSourcecode, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) | |
xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) | |
xmlNew.set("TeX", strLaTeXCode) | |
xmlDing.getparent().replace(xmlDing, xmlNew) | |
tempImagenumber += 1 | |
continue | |
# Push Down loop to parse the raw code (Wird vorerst nicht ausgeführt) | |
textFormel = "" | |
boolBackslash = False | |
for Buchstabe in textSourcecode: | |
if Buchstabe == "\n": | |
continue | |
if Buchstabe == "\\": | |
if boolBackslash == False: | |
textFormel += Buchstabe | |
boolBackslash = True | |
continue | |
if boolBackslash == True: | |
textFormel += Buchstabe | |
strLaTeXCode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) | |
tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) | |
tmpXML.set("TeX", strLaTeXCode) | |
xmlNew.append(tmpXML) | |
textFormel = "" | |
boolBackslash = False | |
tempImagenumber += 1 | |
continue | |
if Buchstabe != "\\": | |
textFormel += Buchstabe | |
boolBackslash = False | |
# Typeset last equation | |
strLaTeXCode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) | |
tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) | |
tmpXML.set("TeX", strLaTeXCode) | |
xmlNew.append(tmpXML) | |
xmlDing.getparent().replace(xmlDing, xmlNew) | |
continue | |
if xmlDing.tag == "EOAequationnonumber": | |
textSourcecode = xmlDing.find('.//texmath').text | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) | |
strLaTeXCode = TeX2PNG(textSourcecode, "EOAequationnonumber", str(intChapterNumber), tempImagenumber) | |
# TODO: HTML-Code für das fertige Bild einfügen (Ist dieser ToDo noch aktuell?) | |
xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationnonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) | |
xmlNew.set("TeX", strLaTeXCode) | |
xmlDing.getparent().replace(xmlDing, xmlNew) | |
tempImagenumber += 1 | |
continue | |
intChapterNumber += 1 | |
print ("-----------------------------------------------------") | |
print ("New Function to convert EOAineq") | |
intChapterNumber = 1 | |
intEOAineqRunningOrder = 1 | |
dictEOAineqs = {} | |
strTeXEquations = "" | |
for xmlChapter in xmlChapters: | |
print ("Chapter " + str(intChapterNumber)) | |
xmlEOAineqs = xmlChapter.findall(".//EOAineq") | |
intEOAineqnumber = 1 | |
for xmlEOAineq in xmlEOAineqs: | |
if xmlEOAineq.find('.//texmath') is not None: | |
strSourceCode = xmlEOAineq.find('.//texmath').text | |
else: | |
strSourceCode = xmlEOAineq.text | |
strSourceCode = os.linesep.join([s for s in strSourceCode.splitlines() if s]) | |
strTeXEquations = strTeXEquations + "$" + strSourceCode + "$\n\\newpage\n" | |
# Add intEOAineqRunningOrder : Filename to dictionary | |
strFilename = "EOAineq_" + str(intChapterNumber) + "_" + str(intEOAineqnumber) | |
dictEOAineqs[intEOAineqRunningOrder] = strFilename | |
# Prepare XML | |
tmpTail = xmlEOAineq.tail | |
xmlEOAineq.clear() | |
xmlEOAineq.tail = tmpTail | |
xmlEOAineq.set("src", strFilename + ".png") | |
xmlEOAineq.set("TeX", strSourceCode) | |
# increment integers | |
intEOAineqRunningOrder += 1 | |
intEOAineqnumber +=1 | |
intChapterNumber += 1 | |
dictRebindedCommands = { | |
"\|ket\|" : r"\\ket", | |
"\|braket\|" : r"\\braket", | |
"\|bra\|" : r"\\bra", | |
"\|Bra\|" : r"\\Bra", | |
"\|Ket\|" : r"\\Ket", | |
"\slashed\|" : r"\\slashed" | |
} | |
for strCommand in dictRebindedCommands.keys(): | |
strTeXEquations = re.sub(strCommand, dictRebindedCommands[strCommand], strTeXEquations) | |
tmp = open(SUPPORT_TEMPLATE_PATH + "Templates/Formel.tex", "r") | |
Template = tmp.read() | |
tmp.close() | |
# Get tmp-directory for this user account | |
tmpDir = os.getenv("TMPDIR") | |
# Make directory items if it doesn't already exist | |
if os.path.exists(os.getcwd() + "/items") == False: | |
os.mkdir(os.getcwd() + "/items") | |
s = string.Template(Template) | |
e = s.substitute(DERINHALT=strTeXEquations) | |
tmpFile = tmpDir + "EOAinline.tex" | |
tmp = open(tmpFile, "w") | |
tmp.write(e) | |
tmp.close() | |
print ("Typesetting all Inline Equations") | |
Kommando = "/usr/texbin/xelatex --halt-on-error " + tmpFile | |
Argumente = shlex.split(Kommando) | |
Datei = open('Test.txt', 'w') | |
Ergebnis = subprocess.call(Argumente,cwd=tmpDir,stdout=Datei) | |
print ("Splitting all Inline Equations") | |
Kommando = PDFTK_PATH + " EOAinline.pdf burst output EOAineq_%d.pdf" | |
Argumente = shlex.split(Kommando) | |
Ergebnis = subprocess.call(Argumente,cwd=tmpDir) | |
print ("Converting %s splitted pages into PNG-Images" % len(dictEOAineqs.keys())) | |
counter_dictEOAineqs = 1 | |
for intRunningOrder in dictEOAineqs.keys(): | |
# provide more status information here in output! | |
print("Image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys()))) | |
Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + tmpDir + "EOAineq_" + str(intRunningOrder) + ".pdf " + tmpDir + dictEOAineqs[intRunningOrder] + ".pdf" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=tmpDir,stdout=Datei) | |
Kommando = GM_PATH + " convert -density 144 " + tmpDir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=tmpDir,stdout=Datei) | |
counter_dictEOAineqs += 1 | |
print ("-----------------------------------------------------") | |
print ("EOAFigure Numbering per Chapter") | |
for xmlChapter in xmlChapters: | |
Figurenumber = 1 | |
xmlFigures = xmlChapter.xpath(".//EOAfigure | .//EOAlsfigure") | |
for xmlFigure in xmlFigures: | |
xmlAnchor = xmlFigure.find("anchor") | |
# Check if Figure is in a numbered Chapter | |
# Otherwise just put the Number of the figure | |
if xmlChapter.get('id'): | |
dictFigures[xmlAnchor.get('id')] = \ | |
str(dictChapters[xmlChapter.get('id')]) + "." + str(Figurenumber) | |
else: | |
dictFigures[xmlAnchor.get('id')] = str(Figurenumber) | |
xmlFigure.set("id", xmlAnchor.get("id")) | |
Figurenumber += 1 | |
print ("-----------------------------------------------------") | |
print ("Numbering Theorems") | |
for xmlChapter in xmlChapters: | |
xmlTheorems = xmlChapter.findall(".//theorem") | |
for xmlTheorem in xmlTheorems: | |
strUID = xmlTheorem.get("id") | |
strNumber = xmlTheorem.get("id-text") | |
dictTheorems[strUID] = strNumber | |
print ("-----------------------------------------------------") | |
print ("Section, Subsection,... Numbering per Chapter") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
strUID = xmlChapter.get("id") | |
#dictChapters[strUID] = str(intChapterNumber) | |
xmlSections = xmlChapter.findall("div2") | |
intSectionNumber = 1 | |
for xmlSection in xmlSections: | |
if xmlSection.get("rend") == "nonumber": | |
continue | |
strUID = xmlSection.get("id") | |
if xmlChapter.get("rend") != "nonumber": | |
dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) | |
if xmlChapter.get("rend") == "nonumber": | |
dictSections[strUID] = str(intSectionNumber) | |
xmlSubsections = xmlSection.findall("div3") | |
intSubsectionNumber = 1 | |
for xmlSubsection in xmlSubsections: | |
if xmlSubsection.get("rend") == "nonumber": | |
continue | |
strUID = xmlSubsection.get("id") | |
if xmlChapter.get("rend") != "nonumber": | |
dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) + "." + str(intSubsectionNumber) | |
if xmlChapter.get("rend") == "nonumber": | |
dictSections[strUID] = str(intSectionNumber) + "." + str(intSubsectionNumber) | |
intSubsectionNumber += 1 | |
intSectionNumber += 1 | |
if xmlChapter.get("rend") != "nonumber": | |
intChapterNumber += 1 | |
print ("-----------------------------------------------------") | |
print ("Numbering of Footnotes per Chapter") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
intNoteNumber = 1 | |
xmlFootnotes = xmlChapter.findall(".//note") | |
for xmlFootnote in xmlFootnotes: | |
strUID = xmlFootnote.get("id") | |
dictFootnotes[strUID] = str(intNoteNumber) | |
intNoteNumber += 1 | |
# the new-style footnotes that use LaTeX bigfoot show up in the following order: | |
footnote_groups = ["decimal", "lower-latin"] | |
def get_bigfoot_data(chapter): | |
""" | |
footnotes are per-chapter | |
footnote numbers reset for each chapter | |
this helper takes a chapter and returns a collection containing its new-style footnotes that use LaTeX bigfoot | |
the result is an association list: a list of key-value pairs | |
the values are, for each type of footnote, a list of the footnotes of that type, in the order in which they appear in the chapter | |
""" | |
xmlBigfootNotes = list(chapter.findall(".//EOAbigfoot")) | |
return [ # a list | |
( # of tuples | |
grouping, # the key | |
[ # the value: a filter of the above list | |
note | |
for note | |
in xmlBigfootNotes | |
if grouping == note.get("list-style-type") | |
], | |
) | |
for grouping | |
in footnote_groups # the types we support | |
] | |
print ("-----------------------------------------------------") | |
print ("Numbering of Lists per Chapter") | |
for xmlChapter in xmlChapters: | |
xmlListitems = xmlChapter.findall(".//item") | |
for xmlListitem in xmlListitems: | |
strUID = xmlListitem.get("id") | |
strItemNumber = xmlListitem.get("id-text") | |
dictLists[strUID] = strItemNumber | |
print ("-----------------------------------------------------") | |
print ("Working on Page Numbers for References") | |
listAuxFiles = glob.glob(os.getcwd() + "/*.aux") | |
for strFile in listAuxFiles: | |
tmpFile = open(strFile, "r") | |
lines = tmpFile.readlines() | |
tmpFile.close() | |
for line in lines: | |
matchObjectLabel = re.match(r'\\newlabel\{(.*?)\}', line) | |
if matchObjectLabel: | |
matchObjectPage = re.match(r'(.*?)\}\{(\d{1,})\}\}$', line) | |
if matchObjectPage: | |
dictPagelabels[matchObjectLabel.group(1)] = matchObjectPage.group(2) | |
print ("-----------------------------------------------------") | |
print ("Numbering of Tables per Chapter") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
intTableNumber = 1 | |
xmlTables = xmlChapter.findall(".//EOAtable") | |
for xmlTable in xmlTables: | |
xmlTableLabel = xmlTable.find(".//EOAtablelabel") | |
strTableCaption = xmlTable.find(".//EOAtablecaption").text | |
if strTableCaption == "nonumber": | |
continue | |
if not xmlTableLabel.text or xmlTableLabel.text == "": | |
xmlTableLabel.text = "table" + str(intChapterNumber) + str(intTableNumber) | |
strUID = xmlTableLabel.text | |
print (strUID) | |
if xmlChapter.get("rend") != "nonumber": | |
dictTables[strUID] = dictChapters[xmlChapter.get('id')] + "." + str(intTableNumber) | |
if xmlChapter.get("rend") == "nonumber": | |
dictTables[strUID] = str(intTableNumber) | |
intTableNumber += 1 | |
print (dictTables) | |
intChapterNumber += 1 | |
############################################################## | |
# Preparing the Bibliography # | |
############################################################## | |
# Copy interim .bbl-File to interim bib.tex file | |
shutil.copy((options.filename) + ".bbl", ((options.filename) + "bib.tex")) | |
# Read all lines of Bibliographic TeX | |
tmpFile = open (((options.filename) + "bib.tex"), "r") | |
tmpLines = tmpFile.readlines() | |
tmpFile.close() | |
# First line should link to Bibliographic Praeambel | |
tmpLines[0] = "\\include{/Library/MPIWG/TeX/pre_bib}\n" | |
# Remove unwanted lines | |
for i in range (18,0,-1): | |
del tmpLines[i] | |
# Save changes | |
tmpFile = open (((options.filename) + "bib.tex"), "w") | |
tmpFile.writelines(tmpLines) | |
tmpFile.close() | |
# TeX has been sanitized, now tralics to make it intermediate XML | |
Kommando = "%s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output -entnames=false %sbib.tex" % (TRALICS_PATH_EXEC, TRALICS_PATH_LIB, TRALICS_PATH_LIB, options.filename) | |
Argumente = shlex.split(Kommando) | |
Prozess = subprocess.call(Argumente) | |
# Sanitaze XML to make it useable | |
tmpFile = open((options.filename) + "bib.xml", "r") | |
tmpContent = tmpFile.read() | |
tmpFile.close() | |
listReplace = [ r"<math mode='display' xmlns='http://www.w3.org/1998/Math/MathML'>", | |
r"<formula textype='displaymath' type='display'>", | |
r"<mi>", | |
r"</mi>", | |
r"<mn>", | |
r"<mn>", | |
r"<mo>", | |
r"</mo>", | |
r"<mn>", | |
r"</mn>", | |
r"<mrow/>", | |
r"<msup>", | |
r"</msup>", | |
r"</math>", | |
r"</formula>", | |
r"<formula type='inline'>", | |
r"<math xmlns='http://www.w3.org/1998/Math/MathML'>", | |
r"<formula textype='math' type='inline'>", | |
r"<mrow>uniquename=(.*?),hash=(.*?)</mrow>", | |
r"<mrow>hash=(.*?)</mrow>", | |
] | |
for strReplace in listReplace: | |
tmpContent = re.sub(strReplace, "", tmpContent) | |
# Put Back Underscore _ | |
tmpContent = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpContent) | |
# Remove empty Lines | |
tmpContent = re.sub(r"\n\n", "\n", tmpContent) | |
# Put back Ampersand | |
tmpContent = re.sub(r"&", "&", tmpContent) | |
tmpFile = open((options.filename) + "bib.xml", "w") | |
tmpFile.write(tmpContent) | |
tmpFile.close() | |
# TeXML has been sanitized, now load xml-Tree | |
xmlParser2 = etree.XMLParser(no_network=False,load_dtd=False) | |
xmlBibTree = etree.parse((options.filename + "bib.xml"), xmlParser2) | |
xmlEntries = xmlBibTree.findall(".//entry") | |
# If Bibliography-Type is monograph search for EOAbibliography and make it all | |
if xmlTree.find(".//EOAbibliographytype").text == "monograph": | |
if xmlTree.find(".//EOAprintbibliography") is not None: | |
xmlBibliography = xmlTree.find(".//EOAprintbibliography") | |
xmlBibliography.clear() | |
xmlBibliography.tag = "div" | |
xmlBibliography.getparent().tag = "div" | |
#xmlBibliography.addnext(xmlBibliographyDiv) | |
xmlEntries = xmlBibTree.findall(".//entry") | |
intNumberOfEntry = 0 | |
for xmlEntry in xmlEntries: | |
if intNumberOfEntry == 0: | |
# Don't check for previous author if first entry of the Bibliography | |
bibEntry = Bibitem(xmlEntry) | |
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliography.append(xmlNew) | |
else: | |
bibEntry = Bibitem(xmlEntry) | |
# Check if author of previous Entry is the same | |
bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1]) | |
if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst(): | |
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliography.append(xmlNew) | |
else: | |
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliography.append(xmlNew) | |
intNumberOfEntry += 1 | |
# If Bibliography-Type is anthology search for EOAbibliography and make one per chapter | |
if xmlTree.find(".//EOAbibliographytype").text == "anthology": | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
if xmlChapter.find(".//EOAprintbibliography") is not None: | |
xmlBibliography = xmlChapter.find(".//EOAprintbibliography") | |
xmlBibliography.getparent().tag = "div" | |
xmlBibliographyDiv = etree.Element("div") | |
xmlBibliography.addnext(xmlBibliographyDiv) | |
xmlRefsections = xmlBibTree.findall(".//refsection") | |
for xmlRefsection in xmlRefsections: | |
if xmlRefsection.find(".//number").text == str(intChapterNumber): | |
break | |
xmlEntries = xmlRefsection.findall(".//entry") | |
intNumberOfEntry = 0 | |
for xmlEntry in xmlEntries: | |
if intNumberOfEntry == 0: | |
# Don't check for previous author if first entry of the Bibliography | |
bibEntry = Bibitem(xmlEntry) | |
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliographyDiv.append(xmlNew) | |
else: | |
bibEntry = Bibitem(xmlEntry) | |
# Check if author of previous Entry is the same | |
bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1]) | |
if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst(): | |
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliographyDiv.append(xmlNew) | |
else: | |
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>" | |
print (strNewentry) | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliographyDiv.append(xmlNew) | |
intNumberOfEntry += 1 | |
intChapterNumber += 1 | |
# for the time being | |
strCitation = "" | |
# Bibliographies are done, now for the citations | |
if xmlTree.find(".//EOAbibliographytype").text == "anthology" or xmlTree.find(".//EOAbibliographytype").text == "monograph": | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
print ("-----------------------------------------------------") | |
print ("Processing References for Chapter " + str(intChapterNumber)) | |
xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual") | |
for xmlCitation in xmlCitations: | |
print (xmlCitation.find("./citekey").text) | |
# If Bibliography-Type is anthology find Refsection for this Chapter | |
if xmlTree.find(".//EOAbibliographytype").text == "anthology": | |
xmlRefsections = xmlBibTree.findall(".//refsection") | |
for xmlRefsection in xmlRefsections: | |
if xmlRefsection.find(".//number").text == str(intChapterNumber): | |
break | |
xmlEntries = xmlRefsection.findall(".//entry") | |
# If Bibliography-Type is monograph find all entries, forget about refsection | |
if xmlTree.find(".//EOAbibliographytype").text == "monograph": | |
xmlEntries = xmlBibTree.findall(".//entry") | |
for xmlEntry in xmlEntries: | |
bibEntry = Bibitem(xmlEntry) | |
if bibEntry.citekey() == xmlCitation.find("./citekey").text: | |
if xmlCitation.tag == "EOAciteauthoryear": | |
strCitation = bibEntry.shortauthor() + " " + bibEntry.labelyear() | |
if bibEntry.labelyearsuffix() is not None: | |
strCitation = strCitation + bibEntry.labelyearsuffix() | |
strTitle = bibEntry.title() | |
if xmlCitation.tag == "EOAciteyear": | |
strCitation = bibEntry.labelyear() | |
if bibEntry.labelyearsuffix() is not None: | |
strCitation = strCitation + bibEntry.labelyearsuffix() | |
strTitle = bibEntry.title() | |
if xmlCitation.tag == "EOAcitemanual": | |
strCitation = xmlCitation.find("citetext").text | |
strTitle = bibEntry.title() | |
if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: | |
strCitation = strCitation + ", " + xmlCitation.find("./page").text | |
# Hier den XML-Tag durch die Quellenangabe ersetzen | |
tmpTail = xmlCitation.tail | |
xmlCitation.clear() | |
xmlCitation.tag = "span" | |
xmlCitation.set("rel","popover") | |
xmlCitation.set("class","citation") | |
xmlCitation.text = strCitation | |
xmlCitation.tail = tmpTail | |
# Create Link to be used for website in a popover | |
xmlCitation.set("data-toggle", "popover") | |
xmlCitation.set("html", "true") | |
xmlCitation.set("data-placement", "bottom") | |
xmlCitation.set("data-title", strCitation) | |
try: | |
xmlCitation.set("data-content", strTitle) | |
except: | |
xmlCitation.set("data-content", "missing") | |
intChapterNumber += 1 | |
# If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all | |
if xmlTree.find(".//EOAbibliographytype").text == "monograph-numeric": | |
if xmlTree.find(".//EOAprintbibliography") is not None: | |
dictCitekeysNumbers = {} | |
dictCitekeysTitles = {} | |
xmlBibliography = xmlTree.find(".//EOAprintbibliography") | |
xmlBibliography.clear() | |
xmlBibliography.tag = "div" | |
xmlBibliography.getparent().tag = "div" | |
xmlEntries = xmlBibTree.findall(".//entry") | |
intNumberOfEntry = 1 | |
for xmlEntry in xmlEntries: | |
# Go through all entries and assign a number to the citekey | |
bibEntry = Bibitem(xmlEntry) | |
strCitekey = bibEntry.citekey() | |
dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry) | |
dictCitekeysTitles[strCitekey] = str(bibEntry.title()) | |
strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliography.append(xmlNew) | |
intNumberOfEntry += 1 | |
# Now for the references via EOAcitenumeric | |
xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric") | |
for xmlCitenumeric in xmlCitenumerics: | |
print (etree.tostring(xmlCitenumeric)) | |
strPopover = "" | |
tmpCitekeys = xmlCitenumeric.find(".//citekey").text | |
tmpCitekeys = re.sub(" ", "", tmpCitekeys) | |
tmpCitekeys = re.sub("\n", "", tmpCitekeys) | |
listCitekeys = re.split("\,", tmpCitekeys) | |
listCitenumbers = [] | |
for strCitekey in listCitekeys: | |
listCitenumbers.append(dictCitekeysNumbers[strCitekey]) | |
# Create Text to be used on the website in a popover | |
strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " | |
listCitenumbers = sorted(listCitenumbers, key=int) | |
strResult = "[" + listCitenumbers[0] | |
intNumberOfSequentialCite = 0 | |
for i in range(1,len(listCitenumbers)): | |
intPreviousCitenumber = int(listCitenumbers[i-1]) | |
intCurrentCitenumber = int(listCitenumbers[i]) | |
if i == (len(listCitenumbers)-1): | |
if (intPreviousCitenumber + 1) == intCurrentCitenumber: | |
if intNumberOfSequentialCite == 0: | |
strResult = strResult + "," + str(listCitenumbers[i]) | |
else: | |
strResult = strResult + "-" + str(listCitenumbers[i]) | |
intNumberOfSequentialCite == 0 | |
else: | |
strResult = strResult + "," + str(listCitenumbers[i]) | |
break | |
intNextCitenumber = int(listCitenumbers[i+1]) | |
if (intCurrentCitenumber + 1) != intNextCitenumber: | |
if intNumberOfSequentialCite != 0: | |
strResult = strResult + "-" + str(intCurrentCitenumber) | |
intNumberOfSequentialCite = 0 | |
else: | |
strResult = strResult + "," + str(intCurrentCitenumber) | |
continue | |
if (intPreviousCitenumber + 1) == intCurrentCitenumber: | |
intNumberOfSequentialCite += 1 | |
continue | |
else: | |
strResult = strResult + "," + str(intCurrentCitenumber) | |
intNumberOfSequentialCite = 0 | |
strResult = strResult + "]" | |
xmlCitenumeric.text = strResult | |
# Create Link to be used for website | |
xmlCitenumeric.set("data-toggle", "popover") | |
xmlCitenumeric.set("html", "true") | |
xmlCitenumeric.set("data-content", strPopover) | |
xmlCitenumeric.set("class","citation") | |
xmlCitenumeric.set("data-placement", "bottom") | |
xmlCitenumeric.set("data-title", strResult) | |
# Numeric citations for the individual chapters | |
if xmlTree.find(".//EOAbibliographytype").text == "anthology-numeric": | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
print ("Processing Bibliography") | |
if xmlChapter.find(".//EOAprintbibliography") is not None: | |
dictCitekeysNumbers = {} | |
dictCitekeysTitles = {} | |
xmlBibliography = xmlChapter.find(".//EOAprintbibliography") | |
#xmlBibliography.clear() | |
xmlBibliography.tag = "div" | |
xmlBibliography.getparent().tag = "div" | |
xmlRefsections = xmlBibTree.findall(".//refsection") | |
for xmlRefsection in xmlRefsections: | |
if xmlRefsection.find(".//number").text == str(intChapterNumber): | |
break | |
xmlEntries = xmlRefsection.findall(".//entry") | |
intNumberOfEntry = 1 | |
for xmlEntry in xmlEntries: | |
# Go through all entries and assign a number to the citekey | |
bibEntry = Bibitem(xmlEntry) | |
strCitekey = bibEntry.citekey() | |
dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry) | |
dictCitekeysTitles[strCitekey] = str(bibEntry.title()) | |
strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliography.append(xmlNew) | |
intNumberOfEntry += 1 | |
# Now for the references via EOAcitenumeric | |
xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear") | |
print ("Numerische Citation gefunden in Kapitel " + str(intChapterNumber)) | |
for xmlCitenumeric in xmlCitenumerics: | |
strPopover = "" | |
tmpCitekeys = xmlCitenumeric.find(".//citekey").text | |
tmpCitekeys = re.sub(" ", "", tmpCitekeys) | |
tmpCitekeys = re.sub("\n", "", tmpCitekeys) | |
print (tmpCitekeys) | |
listCitekeys = re.split("\,", tmpCitekeys) | |
listCitenumbers = [] | |
for strCitekey in listCitekeys: | |
print (strCitekey) | |
listCitenumbers.append(dictCitekeysNumbers[strCitekey]) | |
# Create Text to be used on the website in a popover | |
strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " | |
listCitenumbers = sorted(listCitenumbers, key=int) | |
strResult = "[" + listCitenumbers[0] | |
intNumberOfSequentialCite = 0 | |
for i in range(1,len(listCitenumbers)): | |
intPreviousCitenumber = int(listCitenumbers[i-1]) | |
intCurrentCitenumber = int(listCitenumbers[i]) | |
if i == (len(listCitenumbers)-1): | |
if (intPreviousCitenumber + 1) == intCurrentCitenumber: | |
if intNumberOfSequentialCite == 0: | |
strResult = strResult + "," + str(listCitenumbers[i]) | |
else: | |
strResult = strResult + "-" + str(listCitenumbers[i]) | |
intNumberOfSequentialCite == 0 | |
else: | |
strResult = strResult + "," + str(listCitenumbers[i]) | |
break | |
intNextCitenumber = int(listCitenumbers[i+1]) | |
if (intCurrentCitenumber + 1) != intNextCitenumber: | |
if intNumberOfSequentialCite != 0: | |
strResult = strResult + "-" + str(intCurrentCitenumber) | |
intNumberOfSequentialCite = 0 | |
else: | |
strResult = strResult + "," + str(intCurrentCitenumber) | |
continue | |
if (intPreviousCitenumber + 1) == intCurrentCitenumber: | |
intNumberOfSequentialCite += 1 | |
continue | |
else: | |
strResult = strResult + "," + str(intCurrentCitenumber) | |
intNumberOfSequentialCite = 0 | |
strResult = strResult + "]" | |
xmlCitenumeric.text = strResult | |
# Create Link to be used for website in a popover | |
xmlCitenumeric.set("data-toggle", "popover") | |
xmlCitenumeric.set("data-placement", "bottom") | |
xmlCitenumeric.set("data-title", " " + strResult) | |
xmlCitenumeric.set("data-content", strPopover) | |
xmlCitenumeric.set("class","citation") | |
intChapterNumber += 1 | |
############################################################## | |
# Create .epub basic structure # | |
############################################################## | |
# Create folder structure for ebook | |
if os.path.exists(os.getcwd() + "/CONVERT/epub") == False: | |
os.mkdir(os.getcwd() + "/CONVERT/epub") | |
os.mkdir(os.getcwd() + "/CONVERT/epub/META-INF") | |
os.mkdir(os.getcwd() + "/CONVERT/epub/OEBPS") | |
os.mkdir(os.getcwd() + "/CONVERT/epub/OEBPS/images") | |
# Copy containter.xml and mimetype | |
shutil.copy(SUPPORT_TEMPLATE_PATH + "Templates/epubcontainer.xml", os.getcwd() + "/CONVERT/epub/META-INF/container.xml") | |
shutil.copy(SUPPORT_TEMPLATE_PATH + "Templates/epubmimetype", os.getcwd() + "/CONVERT/epub/mimetype") | |
# Shortcut for namespace | |
htmlns = "{http://www.w3.org/1999/xhtml}" | |
# Load Template for Chapter HTML | |
xmlChapterParser = etree.XMLParser(no_network=False,load_dtd=False) #resolve_entities=False | |
# Preparing toc.ncx | |
xmlTocncxParser = etree.XMLParser(no_network=False,load_dtd=False) | |
tocncx = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubtocncx.xml", xmlTocncxParser) | |
# Preparing content.opf | |
xmlContentopfParser = etree.XMLParser(no_network=False,load_dtd=False) | |
contentopf = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubcontentopf.xml", xmlContentopfParser) | |
print ("-----------------------------------------------------") | |
print ("Preparing content.opf") | |
xmlMetadata = contentopf.find(".//{http://www.idpf.org/2007/opf}metadata") | |
# Prepare Metadata based on Publication.cfg | |
cfgPublication = configparser.RawConfigParser() | |
cfgPublication.read(os.getcwd() + "/CONVERT/publication.cfg") | |
# Prepare Author String | |
strAuthorString = cfgPublication.get("Authors", "Author1") | |
if cfgPublication.get("Authors", "Author2") != "": | |
strAuthorString = cfgPublication.get("Authors", "Author1") + " and " + cfgPublication.get("Authors", "Author2") | |
if cfgPublication.get("Authors", "Author3") != "": | |
strAuthorString = cfgPublication.get("Authors", "Author1") + ", " + cfgPublication.get("Authors", "Author2") + " and " + cfgPublication.get("Authors", "Author3") | |
if cfgPublication.get("Authors", "Author4") != "": | |
strAuthorString = cfgPublication.get("Authors", "Author1") + ", " + cfgPublication.get("Authors", "Author2") + ", " + cfgPublication.get("Authors", "Author3") + " and " + cfgPublication.get("Authors", "Author4") | |
xmlAuthor = etree.Element("{http://purl.org/dc/elements/1.1/}creator") | |
xmlAuthor.text = strAuthorString | |
xmlMetadata.append(xmlAuthor) | |
# Prepare Title-String | |
strTitleString = cfgPublication.get("Technical", "Title") | |
xmlTitle = etree.Element("{http://purl.org/dc/elements/1.1/}title") | |
xmlTitle.text = strTitleString | |
xmlMetadata.append(xmlTitle) | |
# Prepare Description via Subtitle | |
strSubtitleString = cfgPublication.get("Technical", "Subtitle") | |
if strSubtitleString != "": | |
xmlSubtitle = etree.Element("{http://purl.org/dc/elements/1.1/}description") | |
xmlSubtitle.text = strSubtitleString | |
xmlMetadata.append(xmlSubtitle) | |
# Prepare Identifier | |
strIdentifier = "MPIWG:" + cfgPublication.get("Technical", "Serie") + cfgPublication.get("Technical", "Number") | |
xmlIdentifier = etree.Element("{http://purl.org/dc/elements/1.1/}identifier") | |
xmlIdentifier.text = strIdentifier | |
xmlIdentifier.set("id", "BookId") | |
xmlMetadata.append(xmlIdentifier) | |
# Prepare Type | |
xmlType = etree.Element("{http://purl.org/dc/elements/1.1/}type") | |
xmlType.text = "Text" | |
xmlMetadata.append(xmlType) | |
#Prepare Date | |
strPublicationDate = cfgPublication.get("Technical", "PublicationDate") | |
xmlDate = etree.Element("{http://purl.org/dc/elements/1.1/}date") | |
xmlDate.text = strPublicationDate | |
xmlDate.set("{http://www.idpf.org/2007/opf}event", "creation") | |
xmlMetadata.append(xmlDate) | |
# Prepare Publisher | |
xmlPublisher = etree.Element("{http://purl.org/dc/elements/1.1/}publisher") | |
xmlPublisher.text = "Edition Open Access" | |
xmlMetadata.append(xmlPublisher) | |
# Prepare Rights | |
xmlPublisher = etree.Element("{http://purl.org/dc/elements/1.1/}rights") | |
xmlPublisher.text = "Published under Creative Commons by-nc-sa 3.0 Germany Licence" | |
xmlMetadata.append(xmlPublisher) | |
# Prepare Source | |
xmlSource = etree.Element("{http://purl.org/dc/elements/1.1/}source") | |
xmlSource.text = "Max Planck Research Library for the History and Development of Knowledge" | |
xmlMetadata.append(xmlSource) | |
# Prepare Subject | |
strSubject = cfgPublication.get("General", "Keyword1") | |
xmlSubject = etree.Element("{http://purl.org/dc/elements/1.1/}subject") | |
xmlSubject.text = strSubject | |
xmlMetadata.append(xmlSubject) | |
# Prepare Language | |
strLanguage = cfgPublication.get("Technical", "Language") | |
xmlLanguage = etree.Element("{http://purl.org/dc/elements/1.1/}language") | |
xmlLanguage.text = strLanguage | |
xmlMetadata.append(xmlLanguage) | |
#Prepare Cover | |
xmlCover = etree.Element("meta") | |
xmlCover.set("content", "cover_pic") | |
xmlCover.set("name", "cover") | |
xmlMetadata.append(xmlCover) | |
xmlManifest = contentopf.find(".//{http://www.idpf.org/2007/opf}manifest") | |
xmlItem = etree.Element("item") | |
xmlItem.set("id", "cover_pic") | |
xmlItem.set("href", "images/cover.jpg") | |
xmlItem.set("media-type", "image/jpeg") | |
xmlManifest.append(xmlItem) | |
shutil.copy(os.getcwd() + "/CONVERT/cover.jpg", os.getcwd() + "/CONVERT/epub/OEBPS/images/") | |
xmlItem = etree.Element("item") | |
xmlItem.set("id", "cover") | |
xmlItem.set("href", "cover.xhtml") | |
xmlItem.set("media-type", "application/xhtml+xml") | |
xmlManifest.append(xmlItem) | |
shutil.copy(SUPPORT_TEMPLATE_PATH + "Templates/epubcover.xhtml", os.getcwd() + "/CONVERT/epub/OEBPS/cover.xhtml") | |
print ("-------------------") | |
print ("Preparing intro.xhtml") | |
print ("-------------------") | |
tmpFilePath = SUPPORT_TEMPLATE_PATH + "Templates/epubintro.xhtml" | |
tmpFile = open(tmpFilePath, "r") | |
strIntroHTML = tmpFile.read() | |
tmpFile.close() | |
strIntroHTML = re.sub("author", strAuthorString, strIntroHTML) | |
strIntroHTML = re.sub("TITLE", strTitleString, strIntroHTML) | |
strIntroHTML = re.sub("year", cfgPublication.get("Technical", "PublicationYear"), strIntroHTML) | |
strIntroHTML = re.sub("series", cfgPublication.get("Technical", "Serie"), strIntroHTML) | |
strIntroHTML = re.sub("number", cfgPublication.get("Technical", "Number"), strIntroHTML) | |
if cfgPublication.get("General", "AdditionalInformation") != "": | |
strIntroHTML = re.sub("AdditionalInformation", "<p>" + cfgPublication.get("General", "AdditionalInformation") + "</p>", strIntroHTML) | |
else: | |
strIntroHTML = re.sub("AdditionalInformation", "", strIntroHTML) | |
tmpFilePath = os.getcwd() + "/CONVERT/epub/OEBPS/intro.xhtml" | |
tmpFile = open(tmpFilePath, "w") | |
tmpFile.write(strIntroHTML) | |
print ("-------------------") | |
print ("Preparing toc.ncx") | |
print ("-------------------") | |
xmlHead = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}head") | |
xmlMeta = etree.Element("meta") | |
xmlMeta.set("name", "dtb:uid") | |
xmlMeta.set("content", "MPIWG:" + cfgPublication.get("Technical", "Serie") + cfgPublication.get("Technical", "Number")) | |
xmlHead.append(xmlMeta) | |
xmlTitle = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}docTitle") | |
xmlText = etree.Element("text") | |
xmlText.text = strTitleString | |
xmlTitle.append(xmlText) | |
xmlAuthor = tocncx.find("//{http://www.daisy.org/z3986/2005/ncx/}docAuthor") | |
xmlText = etree.Element("text") | |
xmlText.text = strAuthorString | |
xmlAuthor.append(xmlText) | |
# This list includes all files which have already been included to avoid duplicates | |
listContentopf = [] | |
############################################################## | |
# Convert Tralics-XML to Epub # | |
############################################################## | |
# Copy xmlTree to xmlEbookTree | |
xmlEbookTree = deepcopy(xmlTree) | |
# xmlChapters is a list containing all chapters | |
xmlChapters = xmlEbookTree.findall("//div1") | |
# Convert Chapters, Sections, Subsections and Subsubsections to h1, h2, h3, h4 | |
# Insert Number from Dictionary where needed | |
print ("-----------------------------------------------------") | |
print ("Convert EOAChapter to H1") | |
for xmlChapter in xmlChapters: | |
xmlChapter.find("head").tag = "h1" | |
if xmlChapter.get("rend") != "nonumber": | |
idChapter = xmlChapter.get("id") | |
print (idChapter + " konvertierung into h1") | |
print (dictChapters[idChapter]) | |
strHeadline = xmlChapter.find("h1").text or "" | |
xmlChapter.find("h1").text = str(dictChapters[idChapter]) + ". " + strHeadline | |
if xmlChapter.find(".//EOAauthor") is not None: | |
tmpXML = etree.Element("p") | |
tmpXML.append(etree.Element("i")) | |
tmpXML[0].text = xmlChapter.find(".//EOAauthor").text | |
xmlChapter.insert(1, tmpXML) | |
# Remove unwanted EOAauthor here | |
xmlChapter.find(".//EOAauthor").text = "" | |
xmlChapter = etree.strip_tags(xmlChapter, "EOAauthor") | |
print (dictSections) | |
print ("-----------------------------------------------------") | |
print ("Convert EOAsection to H2") | |
xmlSections = xmlEbookTree.findall(".//div2") | |
for xmlSection in xmlSections: | |
xmlSection.find("head").tag = "h2" | |
if xmlSection.get("rend") != "nonumber": | |
idSection = xmlSection.get("id") | |
strHeadline = xmlSection.find("h2").text or "" | |
print (strHeadline) | |
xmlSection.find("h2").text = str(dictSections[idSection]) + " " + strHeadline | |
print ("-----------------------------------------------------") | |
print ("Convert EOAsubsection to H3") | |
xmlSubsections = xmlEbookTree.findall(".//div3") | |
for xmlSubsection in xmlSubsections: | |
xmlSubsection.find("head").tag = "h3" | |
if xmlSubsection.get("rend") != "nonumber": | |
idSection = xmlSubsection.get("id") | |
strHeadline = xmlSubsection.find("h3").text or "" | |
print (strHeadline) | |
xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline | |
print ("-----------------------------------------------------") | |
print ("Convert EOAsubsubsection to H4") | |
xmlSubsubsections = xmlEbookTree.findall(".//div4") | |
for xmlSubsubsection in xmlSubsubsections: | |
xmlSubsubsection.find("head").tag = "h4" | |
#if xmlSubsubsection.get("rend") != "nonumber": | |
#idSection = xmlSubsection.get("id") | |
#strHeadline = xmlSubsection.find("h4").text | |
#xmlSubsection.find("h3").text = str(dictSections[idSection]) + " " + strHeadline | |
print ("-----------------------------------------------------") | |
print ("Preparing Figures") | |
xmlFigures = xmlEbookTree.xpath(".//EOAfigure | .//EOAlsfigure") | |
for xmlFigure in xmlFigures: | |
# Copy File of the Image | |
# If it's in a subfolder, name of folder and name of image will be merged | |
strImageFileString = xmlFigure.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
# Remove / from path | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) | |
strImageFilepath = sanitizeImageEpub(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) | |
# Add copied file to contentopf | |
contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", strImageFileDir + strImageFileNamewoSuffix + "jpg", "jpg") | |
xmlFigureCaption = xmlFigure.find(".//caption") | |
idFigure = xmlFigure.find(".//anchor").get("id") | |
intFigureNumber = dictFigures[idFigure] | |
if xmlFigure.tag == "EOAfigure": | |
strImageWidth = xmlFigure.find(".//width").text | |
strImageWidth = strImageWidth.rstrip("\n") | |
if xmlFigure.tag == "EOAlsfigure": | |
strImageWidth = "100" | |
xmlFigure.clear() | |
xmlFigure.tag = "p" | |
xmlFigureImage = etree.Element("img") | |
xmlFigureImage.set("src", "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg") | |
xmlFigureImage.set("alt", "") | |
xmlFigureImage.set("style", "width: " + strImageWidth + "%") | |
xmlFigure.append(xmlFigureImage) | |
xmlFigureCaption.tag = "p" | |
strFigureCaption = xmlFigureCaption.text or "" | |
xmlFigureCaption.text = "Figure " + str(intFigureNumber) + ": " + strFigureCaption | |
xmlFigure.addnext(xmlFigureCaption) | |
# Change the tag of the parent <p>-Tag to <div> so that it may be removed | |
#xmlFigure.getparent().tag = "div" | |
print ("-----------------------------------------------------") | |
print ("Preparing not numbered Figures") | |
xmlFigures = xmlEbookTree.findall(".//EOAfigurenonumber") | |
for xmlFigure in xmlFigures: | |
# Copy File of the Image | |
# If it's in a subfolder, name of folder and name of image will be merged | |
strImageFileString = xmlFigure.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) | |
strImageFilepath = sanitizeImageEpub(os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) | |
# Add copied file to contentopf | |
contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg", strImageFileDir + strImageFileNamewoSuffix + "jpg", "jpg") | |
strImageWidth = xmlFigure.find(".//width").text | |
strImageWidth = strImageWidth.rstrip("\n") | |
xmlFigure.clear() | |
xmlFigure.tag = "p" | |
xmlFigureImage = etree.Element("img") | |
xmlFigureImage.set("src", "images/" + strImageFileDir + strImageFileNamewoSuffix + ".jpg") | |
xmlFigureImage.set("alt", "") | |
xmlFigureImage.set("style", "width: " + strImageWidth + "%") | |
xmlFigure.append(xmlFigureImage) | |
print ("-----------------------------------------------------") | |
print ("Preparing Footnotes") | |
def alph_footnote_index(fndex): | |
""" | |
lowercase Latin footnotes need to support more than 26 values | |
These are zero-indexed. | |
>>> alph_footnote_index(0) | |
'a' | |
>>> alph_footnote_index(1) | |
'b' | |
>>> alph_footnote_index(24) | |
'y' | |
>>> alph_footnote_index(25) | |
'z' | |
>>> alph_footnote_index(26) | |
'aa' | |
>>> alph_footnote_index(27) | |
'ab' | |
""" | |
alphabet = "abcdefghijklmnopqrstuvwxyz" | |
quotient, remainder = divmod(fndex, len(alphabet)) | |
if not quotient: return alphabet[fndex] | |
return alph_footnote_index(quotient - 1) + alph_footnote_index(remainder) | |
def replace_footnote_equations(footnote): | |
""" | |
captures reusable behavior from the existing code | |
potentially, some of the old code could be replaced by calls to this helper | |
usage: contentopf = replace_footnote_equations(my_footnote) | |
unfortunately, returning the result seemed like a better idea than mutating the global variable | |
""" | |
result = contentopf | |
for equation in footnote.findall(".//EOAequationnonumber"): | |
filename = equation.get("filename") | |
equation.clear() | |
equation.tag = "p" | |
img = etree.Element("img", src="images/%s" % filename, alt="") | |
equation.append(img) | |
cwd = os.getcwd() | |
shutil.copy("%s/items/%s" % (cwd, filename), "%s/CONVERT/epub/DEBPS/images/%s" % (cwd, filename)) | |
result = addToContentopf(result, "images/" + filename, filename, "png") | |
return result | |
def replace_footnote_with_sup(note): | |
""" | |
captures reusable behavior from the existing code | |
potentially, some of the old code could be replaced by calls to this helper | |
this behavior showed up in a few places | |
I thought I would be able to extract a little more, but this was all that was actually common | |
""" | |
tail = note.tail | |
note.clear() | |
note.tail = tail | |
note.tag = "sup" | |
def bring_footnote_down_epub(footnote, footnote_name, destination): | |
""" | |
captures reusable behavior from the existing code | |
potentially, some of the old code could be replaced by calls to this helper | |
""" | |
contentopf = replace_footnote_equations(footnote) # see usage note | |
kids = list(footnote.getchildren()) | |
prefix = "[%s]" % footnote_name | |
# we would like to prepend this footnote identifier to the footnote element | |
if footnote.text is not None: | |
# if the element starts with some text anyway, prepend it there | |
footnote.text = "%s %s" (prefix, footnote.text) | |
else: | |
# if, however, the element begins with a child, prepend the text at the beginning of the first child instead | |
if len(kids): | |
first_child = kids[0] | |
child_text = prefix | |
# separate them with a space, unless the child had no text to begin with | |
child_suffix = first_child.text | |
if child_suffix is None: | |
child_suffix = "" | |
else: | |
child_prefix += " " | |
first_child.text = child_prefix + child_suffix | |
else: | |
# a totally empty footnote is weird, but who am I to judge? | |
footnote.text = prefix | |
footnote_text = footnote.text or "" | |
replace_footnote_with_sup(footnote) | |
footnote.text = "[%s] " % footnote_name | |
# append any text the footnote used to have to the destination | |
destkids = list(destination.getchildren()) | |
if len(destkids): | |
# if the destination has children, append after the last one's tail | |
last_kid = destkids[-1] | |
prefix = last_kid.tail | |
if prefix is None: | |
prefix = "" | |
else: | |
prefix += " " | |
last_kid.tail = prefix + footnote_text | |
else: | |
# if the destination has no children, append to its text | |
prefix = destination.text | |
if prefix is None: | |
prefix = "" | |
else: | |
prefix += " " | |
destination.text = prefix + footnote_text | |
for kid in kids: | |
destination.append(kid) | |
return contentopf | |
class FootnoteError(Exception): | |
""" | |
we only support one type of footnote per chapter | |
don't try to mix-and-match | |
""" | |
pass | |
for xmlChapter in xmlChapters: | |
groupings = get_bigfoot_data(xmlChapter) | |
xmlFootnotes = list(xmlChapter.findall(".//note")) | |
has_old = 0 != len(xmlFootnotes) | |
has_new = 0 != len( | |
[ # flatten the association list whose values are lists, so we can take the length | |
note | |
for grouping, notes in groupings | |
for note in notes | |
] | |
) | |
# the XOR case falls through, the AND is an error, and the NOR skips to the next chapter | |
if has_old: | |
if has_new: | |
raise FootnoteError("Chapter %s contains both \\EOAfn and footnotes in the style of \\EOAfnalph" % xmlChapter.get("id-text")) | |
else: | |
if not has_new: | |
continue | |
xmlNewFootnotes = etree.Element("div") | |
xmlNewFootnotesHeader = etree.Element("h3") | |
xmlNewFootnotesHeader.text = dictLangFootnotes[xmlChapter.get("language")] | |
xmlNewFootnotes.append(xmlNewFootnotesHeader) | |
for grouping, notes in groupings: | |
# do for the new-style footnotes what was being done for the old | |
for index, note in enumerate(notes): | |
footnote_name = str(index + 1) | |
if "lower-latin" == grouping: | |
footnote_name = alph_footnote_index(index) | |
para = etree.Element("p") | |
para.text = "[%s] %s" % (footnote_name, note.text) | |
contentopf = bring_footnote_down_epub(note, footnote_name, para) | |
xmlNewFootnotes.append(para) | |
intFootnoteNumber = 1 | |
for xmlFootnote in xmlFootnotes: | |
# Not numbered Equations may appear in a footnote, need to be treated differently | |
xmlEquationsnonumber = xmlFootnote.findall(".//EOAequationnonumber") | |
for xmlEquationnonumber in xmlEquationsnonumber: | |
strFilename = xmlEquationnonumber.get("filename") | |
xmlEquationnonumber.clear() | |
xmlEquationnonumber.tag = "p" | |
xmlIMG = etree.Element("img", src="images/"+ strFilename, alt="") | |
xmlEquationnonumber.append(xmlIMG) | |
shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) | |
contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png") | |
xmlFirstChild = xmlFootnote.getchildren()[0] | |
if xmlFirstChild.text == None: | |
xmlFirstChild.text = "[" + str(intFootnoteNumber) + "] " | |
else: | |
xmlFirstChild.text = "[" + str(intFootnoteNumber) + "] " + xmlFirstChild.text | |
#Preserve tail and children of current <note>-Tag | |
xmlFootnoteContentsTail = xmlFootnote.tail | |
xmlFootnoteChildren = xmlFootnote.getchildren() | |
# Substitute current <note> with Number | |
xmlFootnote.clear() | |
xmlFootnote.text = "[" + str(intFootnoteNumber) + "]" | |
xmlFootnote.tail = xmlFootnoteContentsTail | |
xmlFootnote.tag = "sup" | |
# NOTE: Anchor not being used for the time being | |
#xmlNewFootnoteAnchor = etree.Element("a") | |
#xmlNewFootnoteAnchor.set("name", "fn" + str(intFootnoteNumber)) | |
#xmlNewFootnote.append(xmlNewFootnoteAnchor) | |
if len(xmlFootnoteChildren) != 0: | |
for xmlFootnoteChild in xmlFootnoteChildren: | |
xmlNewFootnotes.append(xmlFootnoteChild) | |
intFootnoteNumber += 1 | |
xmlChapter.append(xmlNewFootnotes) | |
print ("-----------------------------------------------------") | |
print ("Preparing Lists") | |
for xmlChapter in xmlChapters: | |
xmlLists = xmlChapter.findall(".//list") | |
for xmlList in xmlLists: | |
if xmlList.get("type") == "description": | |
continue | |
if xmlList.get("type") == "ordered": | |
xmlList.tag = "ol" | |
if xmlList.get("type") == "simple": | |
xmlList.tag = "ul" | |
xmlListItems = xmlList.findall(".//item") | |
for xmlListItem in xmlListItems: | |
xmlListItem.tag = "li" | |
print ("-----------------------------------------------------") | |
print ("Preparing Descriptions") | |
for xmlChapter in xmlChapters: | |
xmlDescriptions = xmlChapter.findall(".//list") | |
for xmlDescription in xmlDescriptions: | |
xmlDescription.tag = "dl" | |
del xmlDescription.attrib["type"] | |
for xmlChild in xmlDescription.iterchildren(): | |
if xmlChild.tag == "label": | |
xmlChild.tag = "dt" | |
if xmlChild.tag == "item": | |
xmlChild.tag = "dd" | |
del xmlChild.attrib["id"] | |
del xmlChild.attrib["id-text"] | |
print ("-----------------------------------------------------") | |
print ("Preparing Blockquotes") | |
xmlParagraphs = xmlEbookTree.findall(".//p") | |
for xmlParagraph in xmlParagraphs: | |
if xmlParagraph.get("rend") == "quoted": | |
strParagraphText = xmlParagraph.text | |
strParagraphTail = xmlParagraph.tail | |
xmlParagraphChildren = xmlParagraph.getchildren() | |
xmlParagraph.clear() | |
xmlParagraph.tag = "blockquote" | |
xmlNew = etree.Element("p") | |
if strParagraphText is not None: | |
xmlNew.text = strParagraphText | |
if len(xmlParagraphChildren) != 0: | |
for xmlParagraphChild in xmlParagraphChildren: | |
xmlNew.append(xmlParagraphChild) | |
if strParagraphTail is not None: | |
xmlNew.tail = strParagraphTail | |
xmlParagraph.append(xmlNew) | |
print ("-----------------------------------------------------") | |
print ("Preparing Theorems") | |
for xmlChapter in xmlChapters: | |
xmlTheorems = xmlChapter.findall(".//theorem") | |
for xmlTheorem in xmlTheorems: | |
xmlTheoremHead = xmlTheorem.find(".//head") | |
strTheoremTitel = xmlTheorem.find(".//head").text | |
strTheoremText = xmlTheorem.find(".//p").text | |
xmlTheoremTextTail = xmlTheorem.find(".//p").tail | |
strTheoremNumber = xmlTheorem.get("id-text") | |
xmlTheorem.tag = "p" | |
xmlTheoremHead.tag = "b" | |
xmlTheoremHead.text = xmlTheoremHead.text + " " + strTheoremNumber | |
del xmlTheorem.attrib["style"] | |
del xmlTheorem.attrib["type"] | |
del xmlTheorem.attrib["id-text"] | |
del xmlTheorem.attrib["id"] | |
etree.strip_tags(xmlTheorem, "p") | |
print ("-----------------------------------------------------") | |
print ("Preparing Hyperlinks") | |
for xmlChapter in xmlChapters: | |
xmlHyperlinks = xmlChapter.findall(".//xref") | |
for xmlHyperlink in xmlHyperlinks: | |
strURL = xmlHyperlink.get('url') | |
print (strURL) | |
if strURL.startswith("http://") == False: | |
strURL = "http://" + strURL | |
xmlHyperlink.tag = "a" | |
del xmlHyperlink.attrib["url"] | |
xmlHyperlink.set("href", strURL) | |
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak']) | |
xmlHyperlink.text = strURL | |
print ("-----------------------------------------------------") | |
print ("Convert emphasized text") | |
for xmlChapter in xmlChapters: | |
xmlItalics = xmlChapter.findall(".//hi") | |
for xmlItalic in xmlItalics: | |
if xmlItalic.get("rend") == "it": | |
xmlItalic.tag = "em" | |
del xmlItalic.attrib["rend"] | |
print ("-----------------------------------------------------") | |
print ("Convert bold text") | |
for xmlChapter in xmlChapters: | |
xmlBolds = xmlChapter.findall(".//hi") | |
for xmlBold in xmlBolds: | |
if xmlBold.get("rend") == "bold": | |
xmlBold.tag = "b" | |
del xmlBold.attrib["rend"] | |
print ("-----------------------------------------------------") | |
print ("Convert EOAup to <sup>") | |
for xmlChapter in xmlChapters: | |
xmlUps = xmlChapter.findall(".//EOAup") | |
for xmlUp in xmlUps: | |
xmlUp.tag = "sup" | |
print ("-----------------------------------------------------") | |
print ("Convert EOAdown to <sub>") | |
for xmlChapter in xmlChapters: | |
xmlDowns = xmlChapter.findall(".//EOAdown") | |
for xmlDown in xmlDowns: | |
xmlDown.tag = "sub" | |
print ("-----------------------------------------------------") | |
print ("Convert EOAst to <span>") | |
for xmlChapter in xmlChapters: | |
xmlStrikeouts = xmlChapter.findall(".//EOAst") | |
for xmlStrikeout in xmlStrikeouts: | |
xmlStrikeout.tag = "span" | |
xmlStrikeout.set("style", "text-decoration: line-through;") | |
print ("-----------------------------------------------------") | |
print ("Convert EOAls to something nice") | |
for xmlChapter in xmlChapters: | |
xmlLetterspaceds = xmlChapter.findall(".//EOAls") | |
for xmlLetterspaced in xmlLetterspaceds: | |
xmlLetterspaced.tag = "span" | |
xmlLetterspaced.set("style", "letter-spacing: 0.5em;") | |
print ("-----------------------------------------------------") | |
print ("Convert EOAcaps to something nice") | |
for xmlChapter in xmlChapters: | |
xmlLetterspaceds = xmlChapter.findall(".//EOAcaps") | |
for xmlLetterspaced in xmlLetterspaceds: | |
xmlLetterspaced.tag = "span" | |
xmlLetterspaced.set("style", "font-variant:small-caps;") | |
print ("-----------------------------------------------------") | |
print ("Convert EOAineq into appropriate IMG-Tags") | |
for xmlChapter in xmlChapters: | |
xmlInlineEquations = xmlChapter.findall(".//EOAineq") | |
for xmlInlineEquation in xmlInlineEquations: | |
xmlInlineEquation.tag = "img" | |
xmlInlineEquation.set("alt", "") | |
del xmlInlineEquation.attrib["TeX"] | |
shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/epub/OEBPS/images/" + xmlInlineEquation.get("src")) | |
xmlInlineEquation.set("src", "images/" + xmlInlineEquation.get("src")) | |
contentopf = addToContentopf(contentopf, xmlInlineEquation.get("src"), xmlInlineEquation.get("src"), "png") | |
print ("-----------------------------------------------------") | |
print ("Convert EOAinline into appropriate IMG-Tags") | |
for xmlChapter in xmlChapters: | |
xmlInlineElements = xmlChapter.findall(".//EOAinline") | |
for xmlInlineElement in xmlInlineElements: | |
xmlInlineElement.tag = "img" | |
xmlInlineElement.set("alt", "Too late") | |
strInlineElementFilePath = xmlInlineElement.text | |
# remove text from element. This is visible in epub (at least in calibre's e-book-viewer) | |
# however, the text is taken as id in content.opf | |
# set it to nil after the addToContentopf | |
strInlineElementFileName = os.path.basename(strInlineElementFilePath) | |
strInlineElementDirName = os.path.dirname(strInlineElementFilePath) | |
strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName | |
# trouble when there are subdirectories in Image path! | |
# some thing goes wrong here: <EOAinline>Images/png_300dpi/A.png</EOAinline> | |
shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, strNewImagePath) | |
# strNewImagePath = os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strInlineElementDirName + strInlineElementFileName | |
strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
xmlInlineElement.set("src", "images/" + strInlineElementDirName + strInlineElementFileName) | |
# contentopf, Filename, FileID, Mediatype | |
# <item id="Troublemaker" media-type="image/png" href="images/inlineA.jpg"/> | |
# Mediatype should not be hard coded!!! | |
# base this on file extension | |
extension = strInlineElementFileName.split(".")[-1] | |
contentopf = addToContentopf(contentopf, "images/" + strInlineElementDirName + strInlineElementFileName, xmlInlineElement.text, extension) | |
xmlInlineElement.text = "" | |
print ("-----------------------------------------------------") | |
print ("Preparing Equations") | |
for xmlChapter in xmlChapters: | |
xmlEquations = xmlChapter.findall(".//EOAequation") | |
for xmlEquation in xmlEquations: | |
strNumber = xmlEquation.get("number") | |
strFilename = xmlEquation.get("filename") | |
# Copy image of Equation | |
shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) | |
contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png") | |
# Find out Number of Equation to be appended in the last step | |
strEquationNumber = xmlEquation.get("number") | |
# Rework XML | |
xmlEquation.clear() | |
xmlEquation.tag = "p" | |
xmlEquationImage = etree.Element("img") | |
xmlEquationImage.set("src", "images/" + strFilename) | |
xmlEquationImage.set("alt", "") | |
xmlEquation.append(xmlEquationImage) | |
xmlNew = etree.Element('p') | |
xmlNew.text = "(" + strEquationNumber + ")" | |
xmlEquation.addnext(xmlNew) | |
# Parent tag of Equation should be <div> instead of <p>, so that it may be removed | |
#xmlEquation.getparent().tag = "div" | |
for xmlChapter in xmlChapters: | |
xmlEquations = xmlChapter.findall(".//EOAequationnonumber") | |
for xmlEquation in xmlEquations: | |
strFilename = xmlEquation.get("filename") | |
# Copy image of Equation | |
shutil.copy(os.getcwd() + "/items/" + strFilename, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strFilename) | |
contentopf = addToContentopf(contentopf, "images/" + strFilename, strFilename, "png") | |
# Rework XML | |
xmlEquation.clear() | |
xmlEquation.tag = "p" | |
xmlEquationImage = etree.Element("img") | |
xmlEquationImage.set("src", "images/" + strFilename) | |
xmlEquationImage.set("alt", "") | |
xmlEquation.append(xmlEquationImage) | |
# Parent tag of Equation should be <div> instead of <p>, so that it may be removed | |
#xmlEquation.getparent().tag = "div" | |
# EOAequationarray not handled so far. However: my solution (renaming | |
# the div) just makes the element disappear, leaving only its children! | |
for xmlChapter in xmlChapters: | |
xmlEquationarrays = xmlChapter.findall(".//EOAequationarray") | |
for xmlEquationarray in xmlEquationarrays: | |
xmlEquationarray.tag = "div" | |
print ("-----------------------------------------------------") | |
print ("Preparing Letterheads") | |
for xmlChapter in xmlChapters: | |
xmlLetterheads = xmlChapter.xpath(".//EOAletterhead") | |
print (len(xmlLetterheads)) | |
for xmlLetterhead in xmlLetterheads: | |
xmlRecipient = xmlLetterhead.find(".//Recipient") | |
print (etree.tostring(xmlRecipient)) | |
xmlRecipient.tag = "p" | |
xmlRecipient.getchildren()[0].tag = "em" | |
xmlArchive = xmlLetterhead.find(".//Archive") | |
xmlArchive.tag = "p" | |
xmlArchive.getchildren()[0].tag = "em" | |
xmlAdditional = xmlLetterhead.find(".//Additional") | |
xmlAdditional.tag = "p" | |
xmlAdditional.getchildren()[0].tag = "em" | |
xmlPages = xmlLetterhead.find(".//Pages") | |
xmlPages.tag = "p" | |
xmlPages.getchildren()[0].tag = "em" | |
xmlHR = etree.Element("hr") | |
xmlHR2 = etree.Element("hr") | |
xmlLetterhead.insert(0, xmlHR) | |
xmlLetterhead.insert(5, xmlHR2) | |
print ("-----------------------------------------------------") | |
print ("Preparing Transcriptions") | |
# TODO: May need rework concerning the right Column | |
for xmlChapter in xmlChapters: | |
etree.strip_elements(xmlChapter, "Facsimilelink") | |
xmlTranscriptions = xmlChapter.xpath(".//EOAtranscripted") | |
for xmlTranscription in xmlTranscriptions: | |
print ("Processing Transcription") | |
print (etree.tostring(xmlTranscription)) | |
xmlTranscription.tag = "table" | |
xmlHeader = xmlTranscription.find(".//EOAtranscriptedheader") | |
xmlHeader.tag = "tr" | |
xmlLeftHeader = xmlTranscription.find(".//Leftheader") | |
print (xmlLeftHeader.text) | |
xmlLeftHeader.tag = "td" | |
xmlLeftHeader.set("style", "width: 50%") | |
xmlRightHeader = xmlTranscription.find(".//Rightheader") | |
xmlRightHeader.tag = "td" | |
xmlTranscriptedtext = xmlTranscription.find(".//EOAtranscriptedtext") | |
# change \n\n into </p><p> and pagebreak into </p><pagebreak><p> to create some valid markup | |
strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode") | |
#strTranscriptedtext = re.sub (r"\n\n\n\n", "</p><p>", str(strTranscriptedtext), re.MULTILINE) | |
#strTranscriptedtext = re.sub (r"\n\n\n", "</p><p>", str(strTranscriptedtext), re.MULTILINE) | |
#strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext)) | |
#strTranscriptedtext = re.sub (r"<pagebreak/>", "</p><pagebreak/><p>", strTranscriptedtext) | |
xmlLeftColumn = etree.Element("td") | |
xmlRightColumn = etree.Element("td") | |
boolRightColumn = False | |
xmlTemp = etree.XML(str(strTranscriptedtext)) | |
for xmlElement in xmlTemp.iterchildren(): | |
if xmlElement.tag == "pagebreak": | |
boolRightColumn = True | |
print ("Spaltenwechsel!") | |
continue | |
if boolRightColumn == False: | |
xmlLeftColumn.append(xmlElement) | |
if boolRightColumn == True: | |
xmlRightColumn.append(xmlElement) | |
xmlTranscriptedtext.clear() | |
xmlTranscriptedtext.tag = "tr" | |
xmlTranscriptedtext.set("valign", "top") | |
xmlTranscriptedtext.append(xmlLeftColumn) | |
xmlTranscriptedtext.append(xmlRightColumn) | |
# Remove <Facsimilelink> | |
print ("-----------------------------------------------------") | |
print ("Preparing Tables") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
xmlTables = xmlChapter.findall(".//EOAtable") | |
for xmlTable in xmlTables: | |
xmlRawTable = xmlTable.find(".//table") | |
strTableCaption = xmlTable.find(".//EOAtablecaption").text or "" | |
print (strTableCaption) | |
if strTableCaption != "nonumber": | |
intTableNumber = dictTables[xmlTable.find(".//EOAtablelabel").text] | |
xmlTableCaption = etree.Element("p") | |
print (strTableCaption) | |
print (intTableNumber) | |
xmlTableCaption.text = str(intTableNumber) + " " + strTableCaption | |
if xmlTable.find(".//EOAtablecaption").getchildren() is not None: | |
for xmlChild in xmlTable.find(".//EOAtablecaption").iterchildren(): | |
xmlTableCaption.append(xmlChild) | |
xmlRawTable.addnext(xmlTableCaption) | |
xmlTable.find(".//EOAtablecaption").clear() | |
xmlTable.remove(xmlTable.find(".//EOAtablecaption")) | |
xmlTable.find(".//EOAtablelabel").clear() | |
xmlTable.remove(xmlTable.find(".//EOAtablelabel")) | |
# Analyze Width and Alignment of the Columns | |
strColumnString = xmlTable.find(".//EOAtablecolumns").text | |
strColumnString = re.sub(r"\|", "", strColumnString) | |
xmlTable.remove(xmlTable.find(".//EOAtablecolumns")) | |
reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString) | |
intTableWidth = 0 | |
listColumnAlignments = [None] | |
listColumnWidths = [None] | |
intNumberOfColumns = 0 | |
for strColumnDefinition in reMatchObjects: | |
strColumnDefinition = strColumnDefinition.rstrip("cm") | |
strColumnDefinition = strColumnDefinition.rstrip("mm") | |
strColumnAlignment = strColumnDefinition[0] | |
if strColumnAlignment == "L": | |
strColumnAlignment = "left" | |
if strColumnAlignment == "C": | |
strColumnAlignment = "center" | |
if strColumnAlignment == "R": | |
strColumnAlignment = "right" | |
listColumnAlignments.append(strColumnAlignment) | |
intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75) | |
listColumnWidths.append(intColumnWidth) | |
intTableWidth += intColumnWidth | |
intNumberOfColumns += 1 | |
xmlRawTable.set("width", str(intTableWidth)) | |
del xmlRawTable.attrib["rend"] | |
del xmlRawTable.attrib["id-text"] | |
del xmlRawTable.attrib["id"] | |
del xmlRawTable.attrib["place"] | |
# Figure out and deal with the Header | |
xmlHeader = xmlRawTable.find(".//row/cell/tableheader") | |
if xmlHeader is not None: | |
xmlHeader.text = "" | |
xmlHeader.getparent().text = xmlHeader.tail | |
xmlHeader.getparent().remove(xmlHeader) | |
xmlFirstRow = xmlRawTable.find(".//row") | |
xmlFirstRow.tag = "tr" | |
xmlFirstRowCells = xmlFirstRow.findall(".//cell") | |
for xmlFirstRowCell in xmlFirstRowCells: | |
xmlFirstRowCell.tag = "th" | |
# Now Deal with the rest of the rows | |
xmlTableRows = xmlRawTable.findall(".//row") | |
for xmlTableRow in xmlTableRows: | |
xmlTableCells = xmlTableRow.findall(".//cell") | |
intCurrentColumn = 1 | |
print (listColumnAlignments) | |
for xmlTableCell in xmlTableCells: | |
xmlTableCell.tag = "td" | |
xmlTableCell.set("align",listColumnAlignments[intCurrentColumn]) | |
xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";") | |
# Deal with multicolumn | |
if xmlTableCell.get("cols") is not None: | |
xmlTableCell.set("colspan", xmlTableCell.get("cols")) | |
if intCurrentColumn > len(xmlTableCells): | |
intCurrentColumn = 1 | |
# Deal with multicolumn again, increase intCurrentColumn by the columns being spanned | |
elif xmlTableCell.get("cols") is not None: | |
intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols")) | |
del xmlTableCell.attrib["cols"] | |
else: | |
intCurrentColumn += 1 | |
xmlTableRow.tag = "tr" | |
xmlTableRow.set("valign", "top") | |
xmlTableParent = xmlTable.getparent() | |
xmlTableParent.addnext(xmlTable) | |
xmlTableParent.getparent().remove(xmlTableParent) | |
intChapterNumber += 1 | |
print ("-----------------------------------------------------") | |
print ("Preparing Facsimiles") | |
xmlParts = xmlEbookTree.findall(".//div0") | |
for xmlPart in xmlParts: | |
xmlFacsimiles = xmlPart.findall(".//EOAfacsimilepage") | |
for xmlFacsimile in xmlFacsimiles: | |
strImageFile = xmlFacsimile.find(".//file").text | |
strFacsimileLabel = xmlFacsimile.find(".//label").text | |
etree.strip_elements(xmlFacsimile, "file") | |
etree.strip_elements(xmlFacsimile, "label") | |
# TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren | |
strImageFile = strImageFile.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFile) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFile) | |
shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/epub/OEBPS/images/" + strImageFileDir + strImageFileName) | |
# Add copied file to contentopf | |
contentopf = addToContentopf(contentopf, "images/" + strImageFileDir + strImageFileName, strImageFileDir + strImageFileName, "jpg") | |
strSVGTemplate = """<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100%" height="100%" viewBox="0 0 573 800" preserveAspectRatio="xMidYMid meet"></svg>""" | |
xmlSVGFacsimile = etree.fromstring(strSVGTemplate) | |
xmlNew = etree.Element('image') | |
xmlNew.set("width", "600") | |
xmlNew.set("height", "800") | |
xmlNew.set("{http://www.w3.org/1999/xlink}href", "images/" + strImageFileDir + strImageFileName) | |
xmlSVGFacsimile.append(xmlNew) | |
xmlFacsimile.getparent().replace(xmlFacsimile, xmlSVGFacsimile) | |
print ("-----------------------------------------------------") | |
print ("Preparing Cross-References") | |
for xmlChapter in xmlChapters: | |
xmlReferences = xmlChapter.findall(".//EOAref") | |
for xmlReference in xmlReferences: | |
print ("XXXXXXXX") | |
strResult = "!!! Cross Reference !!!" | |
xmlReferenceLabel = xmlReference.find("Label") | |
xmlReferenceLabelText = xmlReferenceLabel.text | |
xmlReferenceRef = xmlReference.find("ref") | |
xmlReferenceRefTarget = xmlReferenceRef.get("target") | |
if xmlReferenceLabelText in dictEquations: | |
print ("Verweis auf Array gefunden:" + xmlReferenceLabelText) | |
strResult = dictEquations[xmlReferenceLabelText] | |
if xmlReferenceRefTarget in dictEquations: | |
print ("Verweis auf Equation gefunden:" + xmlReferenceRefTarget) | |
strResult = dictEquations[xmlReferenceRefTarget] | |
if xmlReferenceRefTarget in dictLists: | |
print ("Verweis auf Liste gefunden") | |
strResult = dictLists[xmlReferenceRefTarget] | |
if xmlReferenceRefTarget in dictChapters: | |
print ("Verweis auf Kapitel gefunden") | |
strResult = dictChapters[xmlReferenceRefTarget] | |
if xmlReferenceRefTarget in dictSections: | |
print ("Verweis auf Section gefunden") | |
strResult = dictSections[xmlReferenceRefTarget] | |
if xmlReferenceRefTarget in dictFigures: | |
print ("Verweis auf Abbildung gefunden") | |
strResult = dictFigures[xmlReferenceRefTarget] | |
if xmlReferenceRefTarget in dictFootnotes: | |
print ("Verweis auf Fussnote gefunden") | |
strResult = dictFootnotes[xmlReferenceRefTarget] | |
if xmlReferenceRefTarget in dictTheorems: | |
print ("Verweis auf Theorem gefunden") | |
strResult = dictTheorems[xmlReferenceRefTarget] | |
if xmlReferenceLabelText in dictTables: | |
print ("Verweis auf Tabelle gefunden") | |
strResult = dictTables[xmlReferenceLabelText] | |
tmpTail = xmlReference.tail or "" | |
#tmpTail = tmpTail.strip() | |
print ("XXXXXXXX") | |
xmlReference.clear() | |
xmlReference.text = strResult | |
xmlReference.tail = tmpTail | |
# Substitute Page-References with their targets | |
for xmlChapter in xmlChapters: | |
xmlReferences = xmlChapter.findall(".//EOApageref") | |
for xmlReference in xmlReferences: | |
strResult = "!!! Page Reference !!!" | |
xmlReferenceLabel = xmlReference.find("Label") | |
xmlReferenceLabelText = xmlReferenceLabel.text | |
xmlReferenceRef = xmlReference.find("ref") | |
xmlReferenceRefTarget = xmlReferenceRef.get("target") | |
if xmlReferenceLabelText in dictPagelabels: | |
print ("Verweis auf Seite gefunden" + xmlReferenceLabelText) | |
strResult = dictPagelabels[xmlReferenceLabelText] | |
tmpTail = xmlReference.tail or "" | |
xmlReference.clear() | |
xmlReference.text = strResult | |
xmlReference.tail = tmpTail | |
# Correcting References to Publications | |
# NOTE: This may be reworked in the future to enable popups in the ebook | |
# NOTE: For the time being, span ist going to be removed | |
for xmlChapter in xmlChapters: | |
xmlPublicationreferences = xmlChapter.findall(".//span") | |
for xmlPublicationreference in xmlPublicationreferences: | |
if xmlPublicationreference.get("rel") == "popover": | |
xmlPublicationreference.tag = "EOAcitation" | |
############################################################## | |
# Finish ePub Conversion, save File # | |
############################################################## | |
print ("-----------------------------------------------------") | |
print ("Cleaning up XML") | |
xmlIndexentries = xmlEbookTree.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlIndexentry in xmlIndexentries: | |
tmpTail = xmlIndexentry.tail or "" | |
xmlIndexentry.clear() | |
xmlIndexentry.tail = tmpTail | |
etree.strip_tags(xmlEbookTree, "EOAlabel", "EOAindex", "EOApageref", "EOAcitenumeric", "EOAtable", "EOAref", "note", "div", "div2", "div3", "div4", "citetext", "newpage", "EOAciteyear", "EOAtablelabel" , "hi", "pagebreak", "page", "pagestyle", "EOAcitation", "EOAciteauthoryear", "EOAcitemanual", "EOAprintbibliography", "EOAindexperson", "EOAprintindex", "EOAprintpersonindex", "EOAindexlocation", "EOAprintlocationindex","anchor", "temp", "EOAletterhead") | |
etree.strip_attributes(xmlEbookTree, "id-text", "id", "noindent", "type", "label", "spacebefore", "rend") | |
etree.strip_elements(xmlEbookTree, "citekey", with_tail=False) | |
# Write every Part and Chapter into one file | |
xmlChapters = xmlEbookTree.findall("//div1") | |
listParts = [] | |
intTechnicalChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
# Load xmlHTMLTemplate | |
htmlChapter = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubchapter.xml", xmlChapterParser) | |
# Find out, if it's inside a part. If Part has not been worked on, then do it | |
xmlChapterParent = xmlChapter.getparent() | |
if xmlChapterParent.tag == "div0" and xmlChapterParent.get("id") not in listParts: | |
listParts.append(xmlChapterParent.get("id")) | |
strPartTitle = xmlChapterParent.find(".//head").text | |
htmlChapter.find(".//" + htmlns + "title").text = strPartTitle | |
xmlNew = etree.Element('h1') | |
xmlNew.text = strPartTitle | |
htmlChapter.find(".//" + htmlns + "body").append(xmlNew) | |
# Save Part | |
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" | |
tmpFile = open (tmpFileName, "w") | |
tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
# Add to TocNCX | |
tocncx = addToTocncx(tocncx, htmlChapter.find(".//" + htmlns + "title").text, intTechnicalChapterNumber) | |
contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml") | |
intTechnicalChapterNumber += 1 | |
# Reset htmlChapter | |
htmlChapter = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubchapter.xml", xmlChapterParser) | |
# Aus div1 alle kinder auslesen und an htmlChapter dran hängen | |
xmlChildren = xmlChapter.getchildren() | |
for xmlChild in xmlChildren: | |
# Using Deepcopy, coz a simple append will delete the original | |
htmlChapter.find(".//" + htmlns + "body").append(deepcopy(xmlChild)) | |
# Save Chapter | |
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" | |
tmpFile = open (tmpFileName, "w") | |
tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
# Add to TocNCX | |
tocncx = addToTocncx(tocncx, xmlChapter.find(".//h1").text, intTechnicalChapterNumber) | |
contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml") | |
# Content_OPF hinzufügen | |
intTechnicalChapterNumber += 1 | |
# Convert Facsimile-Parts | |
xmlParts = xmlEbookTree.findall("//div0") | |
for xmlPart in xmlParts: | |
print ("-------------") | |
print ("Working on Facsimile-Part") | |
print ("-------------") | |
# check if it has a child element EOAfacsimilepart | |
if bool(xmlPart.findall(".//EOAfacsimilepart")): | |
htmlChapter = etree.parse(SUPPORT_TEMPLATE_PATH + "Templates/epubchapter.xml", xmlChapterParser) | |
# Change EOAfacsimilepart into H1 | |
xmlHeadline = xmlPart.find(".//EOAfacsimilepart") | |
xmlHeadline.tag = "h1" | |
etree.strip_elements(xmlPart, "head") | |
# Aus div0 alle kinder auslesen und an htmlChapter dran hängen | |
xmlChildren = xmlPart.getchildren() | |
for xmlChild in xmlChildren: | |
# Using Deepcopy, coz a simple append will delete the original | |
htmlChapter.find(".//" + htmlns + "body").append(deepcopy(xmlChild)) | |
# Save Chapter | |
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" | |
tmpFile = open (tmpFileName, "w") | |
tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
# Save Chapter | |
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/chapter" + (str(intTechnicalChapterNumber)) + ".xhtml" | |
tmpFile = open (tmpFileName, "w") | |
tmpResult = etree.tostring(htmlChapter, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
# Add to TocNCX | |
tocncx = addToTocncx(tocncx, xmlChapter.find("..//h1").text, intTechnicalChapterNumber) | |
contentopf = addToContentopf(contentopf, "chapter" + str(intTechnicalChapterNumber) + ".xhtml", "chapter" + str(intTechnicalChapterNumber), "xml") | |
# Content_OPF hinzufügen | |
intTechnicalChapterNumber += 1 | |
# Saving toc.ncx | |
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/toc.ncx" | |
tmpFile = open (tmpFileName, "w") | |
tmpResult = etree.tostring(tocncx, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
# Saving content.opf | |
tmpFileName = os.getcwd() + "/CONVERT/epub/OEBPS/content.opf" | |
tmpFile = open (tmpFileName, "w") | |
tmpResult = etree.tostring(contentopf, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
############################################################################ | |
# Convert tralics-XML to Django Data Structure # | |
############################################################################ | |
# Create django File Structure | |
if os.path.exists(os.getcwd() + "/CONVERT/django") == False: | |
os.mkdir(os.getcwd() + "/CONVERT/django") | |
os.mkdir(os.getcwd() + "/CONVERT/django/images") | |
os.mkdir(os.getcwd() + "/CONVERT/django/images/embedded") | |
os.mkdir(os.getcwd() + "/CONVERT/django/files") | |
# Create empty xmlTree | |
xmlEOAdocument = etree.Element("EOAdocument") | |
xmlDjangoTree = etree.ElementTree(xmlEOAdocument) | |
etree.strip_attributes(xmlTree, "noindent") | |
# Remove temp-Tag | |
etree.strip_tags(xmlTree, "temp") | |
# Write Temporary XML-Maintree | |
ergebnisdatei = open("Devel_django.xml", "w") | |
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode") | |
ergebnisdatei.write(ergebnis) | |
ergebnisdatei.close() | |
# Find all Chapters from the original tralics XML | |
xmlChapters = xmlTree.findall("//div1") | |
def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid=None): | |
# Get Dictionaries of Numbers via Global Variables | |
global dictChapters | |
global dictFigures | |
global dictEquations | |
global dictSections | |
global dictFootnotes | |
global dictPagelabels | |
global dictTables | |
global dictLists | |
global intObjectNumber | |
# Check what kind of Element we have and change the data | |
if xmlElement.tag == "EOAtranscripted": | |
xmlResult = etree.Element("temp") | |
xmlEOATranscription = etree.Element("EOAtranscription") | |
xmlEOATranscription.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlLeftheader = xmlElement.find(".//Leftheader") | |
etree.strip_tags(xmlLeftheader, "p") | |
xmlEOATranscription.append(xmlLeftheader) | |
xmlRightheader = xmlElement.find(".//Rightheader") | |
etree.strip_tags(xmlRightheader, "p") | |
xmlEOATranscription.append(xmlRightheader) | |
xmlTranscriptedtext = xmlElement.find(".//EOAtranscriptedtext") | |
# change \n\n into </p><p> and pagebreak intto </p><pagebreak><p> to create some valid markup | |
strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode") | |
#strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext)) | |
#strTranscriptedtext = re.sub (r"<p><pagebreak/></p>", "<pagebreak/>", strTranscriptedtext) | |
xmlLeftColumn = etree.Element("EOAtranscriptionleft") | |
xmlRightColumn = etree.Element("EOAtranscriptionright") | |
boolRightColumn = False | |
xmlTemp = etree.XML(str(strTranscriptedtext)) | |
for xmlElement in xmlTemp.iterchildren(): | |
if xmlElement.tag == "pagebreak": | |
boolRightColumn = True | |
continue | |
if boolRightColumn == False: | |
xmlLeftColumn.append(xmlElement) | |
if boolRightColumn == True: | |
xmlRightColumn.append(xmlElement) | |
xmlEOATranscription.append(xmlLeftColumn) | |
xmlEOATranscription.append(xmlRightColumn) | |
# Convert Images within the transcription | |
xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber") | |
if xmlFigures is not None: | |
for xmlFigure in xmlFigures: | |
strImageFileString = xmlFigure.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
tmpStrTail = xmlFigure.tail | |
xmlFigure.clear() | |
xmlFigure.tag = "img" | |
xmlFigure.set("src", strImageFileDir + strImageFileName) | |
xmlFigure.set("alt", "") | |
xmlResult.append(xmlEOATranscription) | |
elif xmlElement.tag == "EOAletterhead": | |
xmlResult = etree.Element("temp") | |
xmlEOAletterhead = etree.Element("EOAletterhead") | |
xmlEOAletterrecipient = xmlElement.find(".//Recipient") | |
xmlEOAletterhead.append(xmlEOAletterrecipient) | |
xmlEOAletterarchive = xmlElement.find(".//Archive") | |
xmlEOAletterhead.append(xmlEOAletterarchive) | |
xmlEOAletteradditional = xmlElement.find(".//Additional") | |
xmlEOAletterhead.append(xmlEOAletteradditional) | |
xmlEOAletterpages = xmlElement.find(".//Pages") | |
xmlEOAletterhead.append(xmlEOAletterpages) | |
xmlEOAletterhead.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlEOAletterhead) | |
elif xmlElement.findall(".//EOAfigurenonumber"): | |
xmlResult = etree.Element("temp") | |
# Create basic Element EOAfigurenonumber | |
xmlEOAfigure = etree.Element("EOAfigurenonumber") | |
# Copy Image | |
strImageFileString = xmlElement.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("width", xmlElement.find(".//width").text) | |
xmlEOAfigure.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlEOAfigure) | |
elif xmlElement.tag == "EOAfigure": | |
xmlResult = etree.Element("temp") | |
# Create basic Element EOAfigure | |
xmlEOAfigure = etree.Element("EOAfigure") | |
# Copy Image | |
strImageFileString = xmlElement.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("width", xmlElement.find(".//width").text) | |
xmlEOAfigure.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
# Insert visual Number and uid | |
strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")] | |
xmlEOAfigure.set("number", strFigureNumber) | |
strFigureUID = xmlElement.find(".//anchor").get("id") | |
xmlEOAfigure.set("id", strFigureUID) | |
# Insert Caption | |
xmlEOAfigure.append(xmlElement.find(".//caption")) | |
xmlResult.append(xmlEOAfigure) | |
elif xmlElement.findall(".//EOAtable"): | |
xmlResult = etree.Element("EOAtable") | |
xmlRawTable = xmlElement.find(".//table") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlRawTable) | |
# Copy Number, Label and Caption | |
if xmlElement.find(".//EOAtablecaption").text != "nonumber": | |
xmlResult.append(xmlElement.find(".//EOAtablecaption")) | |
xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text) | |
xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text]) | |
xmlResult.set("id", xmlRawTable.get("id")) | |
else: | |
xmlElement.set("numbering", "false") | |
#if xmlElement.find(".//EOAtablelabel").text is not None: | |
# Transform width of Columns | |
strColumnString = xmlElement.find(".//EOAtablecolumns").text | |
strColumnString = re.sub(r"\|", "", strColumnString) | |
reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString) | |
intTableWidth = 0 | |
listColumnAlignments = [None] | |
listColumnWidths = [None] | |
intNumberOfColumns = 0 | |
for strColumnDefinition in reMatchObjects: | |
strColumnDefinition = strColumnDefinition.rstrip("cm") | |
strColumnAlignment = strColumnDefinition[0] | |
if strColumnAlignment == "L": | |
strColumnAlignment = "left" | |
if strColumnAlignment == "C": | |
strColumnAlignment = "center" | |
if strColumnAlignment == "R": | |
strColumnAlignment = "right" | |
listColumnAlignments.append(strColumnAlignment) | |
intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75) | |
listColumnWidths.append(intColumnWidth) | |
intTableWidth += intColumnWidth | |
intNumberOfColumns += 1 | |
xmlRawTable.set("width", str(intTableWidth)) | |
# Figure out and deal with the Header | |
xmlHeader = xmlRawTable.find(".//row/cell/tableheader") | |
if xmlHeader is not None: | |
xmlHeader.text = "" | |
xmlHeader.getparent().text = xmlHeader.tail | |
xmlHeader.getparent().remove(xmlHeader) | |
xmlFirstRow = xmlRawTable.find(".//row") | |
xmlFirstRow.tag = "tr" | |
xmlFirstRowCells = xmlFirstRow.findall(".//cell") | |
for xmlFirstRowCell in xmlFirstRowCells: | |
xmlFirstRowCell.tag = "th" | |
# Now Deal with the rest of the rows | |
xmlTableRows = xmlRawTable.findall(".//row") | |
for xmlTableRow in xmlTableRows: | |
xmlTableCells = xmlTableRow.findall(".//cell") | |
intCurrentColumn = 1 | |
for xmlTableCell in xmlTableCells: | |
xmlTableCell.tag = "td" | |
xmlTableCell.set("align",listColumnAlignments[intCurrentColumn]) | |
xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";") | |
# Deal with multicolumn | |
if xmlTableCell.get("cols") is not None: | |
xmlTableCell.set("colspan", xmlTableCell.get("cols")) | |
if intCurrentColumn > len(xmlTableCells): | |
intCurrentColumn = 1 | |
# Deal with multicolumn again, increase intCurrentColumn by the columns being spanned | |
elif xmlTableCell.get("cols") is not None: | |
intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols")) | |
del xmlTableCell.attrib["cols"] | |
else: | |
intCurrentColumn += 1 | |
xmlTableRow.tag = "tr" | |
xmlTableRow.set("valign", "top") | |
elif xmlElement.tag == "list" and xmlElement.get('type') != 'description': | |
xmlResult = etree.Element("temp") | |
if xmlElement.get('type') == 'ordered': | |
# Change first item into EOAlistfirstitem | |
xmlFirstItem = xmlElement.find("..//item") | |
xmlFirstItemElement = xmlFirstItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber="1", uid=xmlFirstItem.get("id"))) | |
# Process Child Elements which are Part of this item | |
if len(xmlFirstItem.getchildren()) >= 1: | |
for xmlChild in xmlFirstItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlFirstItem.getparent().remove(xmlFirstItem) | |
# Process remaining items in this list | |
tmpIntNumber = 2 | |
for xmlItem in xmlElement.iterchildren(): | |
xmlItemElement = xmlItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=str(tmpIntNumber), uid=xmlItem.get("id"))) | |
tmpIntNumber += 1 | |
if len(xmlItem.getchildren()) >= 1: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild, indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
if xmlElement.get('type') == 'simple': | |
# Change first item into EOAlistfirstitem | |
xmlFirstItem = xmlElement.find("..//item") | |
xmlFirstItemElement = xmlFirstItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-")) | |
# Process Child Elements which are Part of this item | |
if len(xmlFirstItem.getchildren()) >= 1: | |
for xmlChild in xmlFirstItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlFirstItem.getparent().remove(xmlFirstItem) | |
for xmlItem in xmlElement.iterchildren(): | |
xmlItemElement = xmlItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) | |
if len(xmlItem.getchildren()) >= 1: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
elif xmlElement.tag == "list" and xmlElement.get('type') == 'description': | |
xmlResult = etree.Element("temp") | |
while len(xmlElement.getchildren()) != 0: | |
xmlDescription = etree.Element("EOAdescription") | |
xmlDescription.set("order", str(intObjectNumber)) | |
xmlLabel = xmlElement.getchildren()[0] | |
print (etree.tostring(xmlLabel)) | |
xmlItem = xmlElement.getchildren()[1] | |
if len(xmlItem.getchildren()) > 0: | |
xmlContent = xmlItem.getchildren()[0] | |
else: | |
xmlContent = etree.Element("p") | |
xmlLabel.tag = "description" | |
xmlDescription.append(xmlLabel) | |
xmlDescription.append(xmlContent) | |
xmlResult.append(xmlDescription) | |
intObjectNumber += 1 | |
if len(xmlItem.getchildren()) > 0: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
elif xmlElement.tag == "theorem": | |
xmlTheoremHead = xmlElement.find(".//head") | |
xmlTheoremText = xmlElement.find(".//p") | |
strTheoremNumber = xmlElement.get("id-text") | |
strTheoremID = xmlElement.get("id") | |
xmlResult = etree.Element("EOAtheorem") | |
xmlResult.append(xmlTheoremHead) | |
xmlResult.append(xmlTheoremText) | |
xmlResult.set("order", str(intObjectNumber)) | |
xmlResult.set("number", strTheoremNumber) | |
xmlResult.set("uid", strTheoremID) | |
intObjectNumber += 1 | |
elif xmlElement.findall(".//EOAequationarray"): | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlElement.findall(".//EOAequation"): | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", xmlEquation.get("number")) | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
if xmlEquation.get("label") is not None: | |
xmlEOAequation.set("label", xmlEquation.get("label")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
if xmlEquation.get("label") is not None: | |
xmlEOAequation.set("label", xmlEquation.get("label")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.findall(".//EOAequationarraynonumber"): | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"): | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", "") | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.tag == "EOAequationnonumber": | |
# Process one EOAequation which is not encapsulated | |
xmlResult = etree.Element("EOAequation") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.set("filename", xmlElement.get("filename")) | |
xmlResult.set("TeX", xmlElement.get("TeX")) | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlResult.set("number", "") | |
elif xmlElement.findall(".//EOAequation"): | |
# Process various Equations which may be encapsulated within <p> | |
xmlEquations = xmlElement.findall(".//EOAequation") | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlEquations: | |
# Create basic Element EOAequation | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", xmlEquation.get("number")) | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
if xmlEquation.get("uid") is not None: | |
xmlEOAequation.set("uid", xmlEquation.get("uid")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.tag == "EOAequation": | |
# Process one EOAequation which is not encapsulated | |
xmlResult = etree.Element("EOAequation") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.set("number", xmlElement.get("number")) | |
xmlResult.set("TeX", xmlElement.get("TeX")) | |
if xmlElement.get("uid") is not None: | |
xmlResult.set("uid", xmlElement.get("uid")) | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlResult.set("filename", xmlElement.get("filename")) | |
elif xmlElement.tag == "div4": | |
xmlResult = etree.Element("EOAsubsubsection") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlElement.find("head")) | |
for xmlChild in xmlElement.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild)) | |
else: | |
xmlElement.tag = "EOAparagraph" | |
xmlElement.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult = xmlElement | |
if indent==True: | |
xmlResult.set("indent", "True") | |
if listtype != None: | |
xmlResult.set("listtype", listtype) | |
if listnumber != 0: | |
xmlResult.set("listnumber", listnumber) | |
if uid != None: | |
xmlResult.set("id", uid) | |
return xmlResult | |
def djangoParseHeadline(xmlElement): | |
# Parse EOAauthor and append it to the Chapter Information | |
xmlAuthors = xmlElement.find(".//EOAauthor") | |
if xmlAuthors is not None: | |
strAuthors = xmlAuthors.text | |
xmlElement.remove(xmlAuthors) | |
strAuthors = re.sub("(, and | and | und )", ",", strAuthors) | |
listAuthors = re.split("\,", strAuthors) | |
print (listAuthors) | |
if len(listAuthors) >= 1: | |
for i in range(len(listAuthors)): | |
xmlAuthor = etree.Element("EOAauthor") | |
# Remove Spaces before and after AuthorString | |
if listAuthors[i][0] == " ": | |
strAuthor = listAuthors[i][1:] | |
elif listAuthors[i].endswith(" "): | |
strAuthor = listAuthors[i][:-1] | |
else: | |
strAuthor = listAuthors[i] | |
xmlAuthor.text = strAuthor | |
xmlElement.append(xmlAuthor) | |
return xmlElement | |
# Iterate over Chapters, Sections, Subsections, and Subsubsections and | |
# Put all on one level: EOAchapter | |
intChapterNumber = 1 | |
listPartIDs = [] | |
for xmlChapter in xmlChapters: | |
intObjectNumber = 1 | |
# Process Chapter Title | |
xmlEOAchapter = etree.Element("EOAchapter") | |
xmlEOAchapter.set("type","regular") | |
xmlEOAchapter.set("language", xmlChapter.get("language")) | |
xmlEOAchapter.set("order", str(intChapterNumber)) | |
if xmlChapter.get("rend") != "nonumber": | |
xmlEOAchapter.set("id", xmlChapter.get("id")) | |
xmlChapterHeadline = xmlChapter.find(".//head") | |
if xmlChapter.get("id") in dictChapters: | |
xmlEOAchapter.set("number", dictChapters[xmlChapter.get("id")]) | |
else: | |
xmlEOAchapter.set("number", "") | |
print ("-----------------------------------------------------") | |
print (gettext(xmlChapterHeadline)) | |
xmlEOAchapter.append(djangoParseHeadline(xmlChapterHeadline)) | |
# Deal with EOAauthor | |
if xmlChapter.find(".//EOAauthor") is not None: | |
xmlEOAchapter.append(xmlChapter.find(".//EOAauthor")) | |
# Attache enclosing Part to Chapter, see django structure for this purpose | |
if xmlChapter.getparent().tag == "div0": | |
if xmlChapter.getparent().get("id") not in listPartIDs: | |
listPartIDs.append(xmlChapter.getparent().get("id")) | |
xmlPartHeadline = xmlChapter.getparent().find("head") | |
xmlPartHeadline.tag = "EOAparthtml" | |
xmlEOAchapter.append(xmlPartHeadline) | |
# Append Chapter to xmlEOAdocument | |
xmlEOAdocument.append(xmlEOAchapter) | |
# iterate over children of Chapter | |
for xmlChapterChild in xmlChapter.iterchildren(): | |
if xmlChapterChild.tag == "div2": | |
# Process Section Title | |
xmlEOAsection = etree.Element("EOAsection") | |
xmlEOAsection.set("order", str(intObjectNumber)) | |
if xmlChapterChild.get("rend") != "nonumber": | |
xmlEOAsection.set("id", xmlChapterChild.get("id")) | |
xmlEOAsection.set("number", dictSections[xmlChapterChild.get("id")]) | |
intObjectNumber += 1 | |
xmlHead = xmlChapter.find(".//head") | |
xmlEOAsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsection) | |
# Iterate over Children of Section | |
for xmlSectionChild in xmlChapterChild.iterchildren(): | |
if xmlSectionChild.tag == "div3": | |
# Process Subsection Title | |
xmlEOAsubsection = etree.Element("EOAsubsection") | |
xmlEOAsubsection.set("order", str(intObjectNumber)) | |
if xmlSectionChild.get("rend") != "nonumber": | |
xmlEOAsubsection.set("id", xmlSectionChild.get("id")) | |
xmlEOAsubsection.set("number", dictSections[xmlSectionChild.get("id")]) | |
intObjectNumber += 1 | |
xmlHead = xmlSectionChild.find(".//head") | |
xmlEOAsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsection) | |
# Iterate over children of Subsection | |
for xmlSubsectionChild in xmlSectionChild.iterchildren(): | |
if xmlSubsectionChild.tag == "div4": | |
# Process Subsubsection Title | |
xmlEOAsubsubsection = etree.Element("EOAsubsubsection") | |
xmlEOAsubsubsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = xmlSubsectionChild.find(".//head") | |
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsubsection) | |
# Iterate over children of Subsubsection | |
for xmlSubsubsectionChild in xmlSubsectionChild.iterchildren(): | |
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlSubsectionChild)) | |
elif xmlSectionChild.tag == "div4": | |
# Process Subsubsection Title | |
xmlEOAsubsubsection = etree.Element("EOAsubsubsection") | |
xmlEOAsubsubsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = xmlSectionChild.find(".//head") | |
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsubsection) | |
# Iterate over children of Subsubsection | |
for xmlSubsubsectionChild in xmlSectionChild.iterchildren(): | |
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlSectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlChapterChild)) | |
intChapterNumber += 1 | |
print ("----------------------------------------------") | |
print ("Processing Facsimile Parts") | |
listModes = ["text", "textPollux", "xml"] | |
strBasicURL = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql?document=" | |
parserECHO = etree.XMLParser() | |
xmlParts = xmlTree.findall("//div0") | |
intFacNumber = 1 | |
for xmlPart in xmlParts: | |
intObjectNumber = 1 | |
intFacPartNumber = 1 | |
if xmlPart.find(".//EOAfacsimilepart") is None: | |
continue | |
xmlEOAfacsimilepart = etree.Element("EOAfacsimilepart") | |
xmlEOAfacsimilepart.set("order", str(intChapterNumber)) | |
xmlEOAfacsimileparthead = xmlPart.find(".//head") | |
for xmlChild in xmlEOAfacsimileparthead: | |
if xmlChild.tag == "hi": | |
xmlChild.tag = "em" | |
del xmlChild.attrib["rend"] | |
xmlEOAfacsimilepart.append(xmlEOAfacsimileparthead) | |
intChapterNumber += 1 | |
xmlEOAdocument.append(xmlEOAfacsimilepart) | |
xmlFacsimilepages = xmlPart.findall(".//EOAfacsimilepage") | |
intFacPageNumber = 1 | |
for xmlFacsimilepage in xmlFacsimilepages: | |
strImageFile = xmlFacsimilepage.find(".//file").text | |
strLabel = xmlFacsimilepage.find(".//label").text | |
strPagenumber = xmlFacsimilepage.find(".//pagenumber").text or "" | |
xmlEOAfacsimilepage = etree.Element("EOAfacsimilepage") | |
xmlEOAfacsimilepage.set("order", str(intObjectNumber)) | |
# TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren | |
strImageFile = strImageFile.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFile) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFile) | |
shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
intObjectNumber += 1 | |
# Download transcription for this Page | |
if xmlFacsimilepage.find(".//fulltext").text is not None: | |
print ("Ein Link zum Volltext wurde gefunden") | |
strFacsimileURL = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[0] | |
strFacsimilePage = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[1] | |
for strMode in listModes: | |
strURL = strBasicURL + strFacsimileURL + "&pn=" + strFacsimilePage + "&mode=" + strMode | |
print ("Processing Facsimile : " + strURL) | |
xmlECHOtree = etree.parse(strURL, parserECHO) | |
# Remove ECHO-namespaces | |
objectify.deannotate(xmlECHOtree, xsi_nil=True) | |
etree.cleanup_namespaces(xmlECHOtree) | |
xmlDivs = xmlECHOtree.findall(".//div") | |
for xmlDiv in xmlDivs: | |
if xmlDiv.get("class") == "pageContent": | |
# Create new EOA-Element | |
xmlEOAfacsimileelement = etree.Element("EOAfacsimileelement") | |
xmlEOAfacsimileelement.set("type", strMode) | |
# Fix Images in the <div>-Element | |
xmlImages = xmlDiv.findall(".//img") | |
intFacImgNumber = 1 | |
for xmlImage in xmlImages: | |
strImageSrc = xmlImage.get("src") | |
strCommand = "curl " + strImageSrc + " -o CONVERT/django/images/facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg" | |
listArguments = shlex.split(strCommand) | |
try: | |
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) | |
xmlImage.set("src", "facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg") | |
except: | |
xmlImage.tag = "temp" | |
intFacImgNumber += 1 | |
# Change of scr of img-Element | |
xmlEOAfacsimileelement.append(xmlDiv) | |
xmlEOAfacsimilepage.append(xmlEOAfacsimileelement) | |
intFacPageNumber += 1 | |
xmlEOAfacsimilepage.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfacsimilepage.set("label", str(strLabel)) | |
xmlEOAfacsimilepage.set("pagenumber", str(strPagenumber)) | |
xmlEOAfacsimilepart.append(xmlEOAfacsimilepage) | |
intFacNumber =+ 1 | |
etree.strip_tags(xmlDjangoTree, "temp") | |
print ("----------------------------------------------") | |
print ("Processing and linking Footnotes for django") | |
def bring_footnote_down_django(footnote, fragment, footnote_number, object_number, unique_id, destination): | |
""" | |
captures reusable behavior from the existing code | |
potentially, some of the old code could be replaced by calls to this helper | |
""" | |
kids = list(footnote.getchildren()) | |
footnote_text = footnote.text or "" | |
replace_footnote_with_sup(footnote) | |
footnote.set("class", "footnote") | |
anchor = etree.Element("a") | |
anchor.set("href", "#" + fragment) # "fn" + str(intFootnoteNumber) | |
anchor.text = footnote_number # str(intFootnoteNumber) | |
footnote.append(anchor) | |
foot = etree.Element("EOAfootnote") | |
foot.set("order", str(object_number)) | |
object_number += 1 | |
foot.set("number", footnote_number) | |
anchor_number = next( | |
iter( | |
( | |
parent.get("order") | |
for parent | |
in footnote.iterancestors() | |
if parent.get("order") is not None | |
) | |
) | |
) | |
foot.set("anchor", anchor_number) | |
foot.set("id", unique_id) | |
foot.text = footnote_text | |
for kid in kids: | |
if "EOAequationnonumber" == kid.tag: | |
cwd = os.getcwd() | |
shutil.copy( | |
"%s/items/%s" % (cwd, kid.get("filename")), | |
"%s/CONVERT/django/images/" % cwd, | |
) | |
foot.append(kid) | |
destination.append(foot) | |
return object_number | |
xmlEOAchapters = xmlEOAdocument.findall(".//EOAchapter") | |
for xmlEOAchapter in xmlEOAchapters: | |
groupings = get_bigfoot_data(xmlEOAchapter) | |
has_old = 0 != len(xmlEOAchapter.findall(".//note")) | |
has_new = 0 != len( | |
[ # flatten | |
note | |
for grouping, notes in groupings | |
for note in notes | |
] | |
) | |
# XOR falls through, AND is an error (that should have already been thrown during the epub phase), and NOR skips to the next chapter | |
if has_old: | |
if has_new: | |
raise FootnoteError("This chapter contains both old-style footnotes and new-style footnotes") | |
else: | |
if not has_new: | |
continue | |
# Find out running order of last item the chapter | |
# Hier pro FN zunächst die EOAequationnonumber in <p> korrigieren | |
# Dann pro FN die Kindelemente abarbeiten und an die neue FN dran hängen | |
# Ggf. aufpassen, ob ein Absatz mit indent versehen ist, dann blockquote drum herum machen | |
xmlElement = xmlEOAchapter[(len(xmlEOAchapter)-1)] | |
print (etree.tostring(xmlElement)) | |
intObjectNumber = (int(xmlElement.get("order")) + 1) | |
intFootnoteNumber = 1 | |
xmlResult = etree.Element("temp") | |
xmlEOAsection = etree.Element("EOAsection") | |
xmlEOAsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = etree.Element("head") | |
xmlHead.text = dictLangFootnotes[xmlEOAchapter.get("language")] | |
xmlEOAsection.append(xmlHead) | |
xmlResult.append(xmlEOAsection) | |
for grouping, notes in groupings: | |
for index, note in enumerate(notes): | |
# do for the new-style notes what the old code did for the other footnotes | |
fntext = str(index+1) | |
if "lower-latin" == grouping: | |
fntext = alph_footnote_index(index) | |
unique_id = "fn%s" % fntext | |
intObjectNumber = bring_footnote_down_django(note, unique_id, fntext, intObjectNumber, unique_id, xmlResult) | |
intFootnoteNumber = 1 | |
xmlFootnotes = xmlEOAchapter.findall(".//note") | |
for xmlFootnote in xmlFootnotes: | |
xmlFootnoteContent = xmlFootnote.getchildren() | |
strFootnoteText = xmlFootnote.text or "" | |
tmpTail = xmlFootnote.tail | |
tmpStrUID = xmlFootnote.get("id") | |
xmlFootnote.clear() | |
xmlFootnote.tail = tmpTail | |
xmlFootnote.tag = "sup" | |
xmlFootnote.set("class", "footnote") | |
xmlFootnoteLink = etree.Element("a") | |
xmlFootnoteLink.set("href", "#fn" + str(intFootnoteNumber)) | |
xmlFootnoteLink.text = str(intFootnoteNumber) | |
xmlFootnote.append(xmlFootnoteLink) | |
xmlEOAfootnote = etree.Element("EOAfootnote") | |
xmlEOAfootnote.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAfootnote.set("number", str(intFootnoteNumber)) | |
for xmlParent in xmlFootnote.iterancestors(): | |
if xmlParent.get("order") is not None: | |
strFootnoteAnchorNumber = xmlParent.get("order") | |
break | |
xmlEOAfootnote.set("anchor", strFootnoteAnchorNumber) | |
xmlEOAfootnote.set("id", tmpStrUID) | |
xmlEOAfootnote.text = strFootnoteText | |
for xmlElement in xmlFootnoteContent: | |
if xmlElement.tag == "EOAequationnonumber": | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAfootnote.append(xmlElement) | |
xmlResult.append(xmlEOAfootnote) | |
intFootnoteNumber += 1 | |
xmlEOAchapter.append(xmlResult) | |
# Remove temp-Tag | |
etree.strip_tags(xmlDjangoTree, "temp") | |
print ("----------------------------------------------") | |
print ("Processing various Elements") | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlEmphasized = xmlEOAchapter.findall(".//hi") | |
for xmlEmph in xmlEmphasized: | |
if xmlEmph.get("rend") == "it": | |
xmlEmph.tag = "em" | |
del xmlEmph.attrib["rend"] | |
xmlHyperlinks = xmlEOAchapter.findall(".//xref") | |
for xmlHyperlink in xmlHyperlinks: | |
strURL = xmlHyperlink.get('url') | |
if strURL.startswith("http://") == False: | |
strURL = "http://" + strURL | |
xmlHyperlink.tag = "a" | |
del xmlHyperlink.attrib["url"] | |
xmlHyperlink.set("href", strURL) | |
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak']) | |
xmlHyperlink.text = strURL | |
# Convert bold text | |
xmlBolds = xmlEOAchapter.findall(".//hi") | |
for xmlBold in xmlBolds: | |
if xmlBold.get("rend") == "bold": | |
xmlBold.tag = "b" | |
del xmlBold.attrib["rend"] | |
# Convert EOAup to <sup> | |
xmlUps = xmlEOAchapter.findall(".//EOAup") | |
for xmlUp in xmlUps: | |
xmlUp.tag = "sup" | |
# Convert EOAdown to <sub> | |
xmlDowns = xmlEOAchapter.findall(".//EOAdown") | |
for xmlDown in xmlDowns: | |
xmlDown.tag = "sub" | |
# Convert EOAst to <span> | |
xmlStrikeouts = xmlEOAchapter.findall(".//EOAst") | |
for xmlStrikeout in xmlStrikeouts: | |
xmlStrikeout.tag = "span" | |
xmlStrikeout.set("style", "text-decoration: line-through;") | |
# Convert letter-spacing into something nice | |
xmlLetterspaceds = xmlEOAchapter.findall(".//EOAls") | |
for xmlLetterspaced in xmlLetterspaceds: | |
xmlLetterspaced.tag = "span" | |
xmlLetterspaced.set("style", "letter-spacing: 0.5em;") | |
# Convert letter-spacing into something nice | |
xmlCaps = xmlEOAchapter.findall(".//EOAcaps") | |
for xmlCap in xmlCaps: | |
xmlCap.tag = "span" | |
xmlCap.set("style", "font-variant:small-caps;") | |
# Convert EOAineq into appropriate IMG-Tags | |
xmlInlineEquations = xmlEOAchapter.findall(".//EOAineq") | |
for xmlInlineEquation in xmlInlineEquations: | |
xmlInlineEquation.tag = "img" | |
xmlInlineEquation.set("class", "EOAineq") | |
xmlInlineEquation.set("alt", "") | |
shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src")) | |
# Convert EOAinline into appropriate IMG-Tags | |
xmlInlineElements = xmlEOAchapter.findall(".//EOAinline") | |
for xmlInlineElement in xmlInlineElements: | |
xmlInlineElement.tag = "img" | |
xmlInlineElement.set("class", "EOAinline") | |
xmlInlineElement.set("alt", "") | |
xmlInlineElement.set("class", "eoainlineimage") | |
strInlineElementFilePath = xmlInlineElement.text | |
strInlineElementFileName = os.path.basename(strInlineElementFilePath) | |
strInlineElementDirName = os.path.dirname(strInlineElementFilePath) | |
xmlInlineElement.text = None | |
xmlInlineElement.set("src", strInlineElementDirName + strInlineElementFileName) | |
shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName) | |
strNewImagePath = os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName | |
strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
# Change EOAcitenumeric into a span to create approriate link | |
xmlEOAcitenumerics = xmlEOAchapter.findall(".//EOAcitenumeric") | |
for xmlEOAcitenumeric in xmlEOAcitenumerics: | |
xmlEOAcitenumeric.tag = "span" | |
xmlEOAcitenumeric.set("class", "citation") | |
xmlEOAcitenumeric.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAciteauthoryears = xmlEOAchapter.findall(".//EOAciteauthoryear") | |
for xmlEOAciteauthoryear in xmlEOAciteauthoryears: | |
xmlEOAciteauthoryear.tag = "span" | |
xmlEOAciteauthoryear.set("class", "citation") | |
xmlEOAciteauthoryear.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAciteyears = xmlEOAchapter.findall(".//EOAciteyear") | |
for xmlEOAciteyear in xmlEOAciteyears: | |
xmlEOAciteyear.tag = "span" | |
xmlEOAciteyear.set("class", "citation") | |
xmlEOAciteyear.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAcitemanuals = xmlEOAchapter.findall(".//EOAcitemanual") | |
for xmlEOAcitemanual in xmlEOAcitemanuals: | |
xmlEOAcitemanual.tag = "span" | |
xmlEOAcitemanual.set("class", "citation") | |
xmlEOAcitemanual.set("rel", "popover") | |
print ("----------------------------------------------") | |
print ("Processing Cross References") | |
# Substitute References with their targets (wit links) | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlReferences = xmlEOAchapter.findall(".//EOAref") | |
for xmlReference in xmlReferences: | |
strResult = "!!! Cross Reference !!!" | |
strChapterOrder = "" | |
strObjectOrder = "" | |
xmlReferenceLabel = xmlReference.find("Label") | |
xmlReferenceLabelText = xmlReferenceLabel.text | |
xmlReferenceRef = xmlReference.find("ref") | |
xmlReferenceRefTarget = xmlReferenceRef.get("target") | |
if xmlReferenceLabelText in dictEquations: | |
# Grab Number from Dictionary | |
strResult = dictEquations[xmlReferenceLabelText] | |
# Go through all equations and find the corresponding Equation | |
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation") | |
for xmlEOAequation in xmlEOAequations: | |
tmpReferenceLabelText = xmlEOAequation.get("label") | |
if xmlReferenceLabelText == tmpReferenceLabelText: | |
print ("Erfolgreich Verweis auf Array-Formel gefunden:" + strResult) | |
for xmlParent in xmlEOAequation.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAequation.get("order") | |
if xmlReferenceRefTarget in dictEquations: | |
# Grab Number from Dictionary | |
strResult = dictEquations[xmlReferenceRefTarget] | |
# Go through all equations and find the corresponding Equation | |
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation") | |
for xmlEOAequation in xmlEOAequations: | |
tmpReferenceRefTarget = xmlEOAequation.get("uid") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print ("Erfolgreich Verweis auf normale Formel gefunden: " + strResult) | |
for xmlParent in xmlEOAequation.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAequation.get("order") | |
if xmlReferenceRefTarget in dictLists: | |
print ("Verweis auf Liste gefunden") | |
strResult = dictLists[xmlReferenceRefTarget] | |
xmlEOAlistitem = xmlEOAdocument.xpath("//EOAchapter/*[contains(@id, $targetuid)]", targetuid = xmlReferenceRefTarget)[0] | |
for xmlParent in xmlEOAlistitem.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAlistitem.get("order") | |
if xmlReferenceRefTarget in dictChapters: | |
print ("Verweis auf Kapitel gefunden") | |
strResult = dictChapters[xmlReferenceRefTarget] | |
for xmlEOAchapter in xmlEOAdocument.findall(".//EOAchapter"): | |
if xmlEOAchapter.get("id") == xmlReferenceRefTarget: | |
print ("Erfolgreich Verweis auf ein Kapitel bearbeitet: " + strResult) | |
strObjectOrder = "top" | |
strChapterOrder = xmlEOAchapter.get("order") | |
if xmlReferenceRefTarget in dictTheorems: | |
print ("Verweis auf ein Theorem gefunden") | |
strResult = dictTheorems[xmlReferenceRefTarget] | |
for xmlEOAtheorem in xmlEOAdocument.findall(".//EOAtheorem"): | |
if xmlEOAtheorem.get("uid") == xmlReferenceRefTarget: | |
print ("Erfolgrech Verweis auf ein Theorem bearbeitet: " + strResult) | |
for xmlParent in xmlEOAtheorem.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strObjectOrder = xmlEOAtheorem.get("order") | |
strChapterOrder = xmlParent.get("order") | |
if xmlReferenceRefTarget in dictSections: | |
print ("Verweis auf Section gefunden") | |
strResult = dictSections[xmlReferenceRefTarget] | |
xmlEOAsections = xmlEOAdocument.findall(".//EOAsection") | |
for xmlEOAsection in xmlEOAsections: | |
tmpReferenceRefTarget = xmlEOAsection.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print ("Erfolgreich Verweis auf eine Section bearbeitet: " + strResult) | |
for xmlParent in xmlEOAsection.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAsection.get("order") | |
xmlEOAsubsections = xmlEOAdocument.findall(".//EOAsubsection") | |
for xmlEOAsubsection in xmlEOAsubsections: | |
tmpReferenceRefTarget = xmlEOAsubsection.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print ("Erfolgreich Verweis auf eine Sub-Section bearbeitet: " + strResult) | |
for xmlParent in xmlEOAsubsection.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAsubsection.get("order") | |
if xmlReferenceRefTarget in dictFigures: | |
print ("Verweis auf Abbildung gefunden") | |
strResult = dictFigures[xmlReferenceRefTarget] | |
xmlEOAfigures = xmlEOAdocument.findall(".//EOAfigure") | |
for xmlEOAfigure in xmlEOAfigures: | |
tmpReferenceRefTarget = xmlEOAfigure.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print ("Erfolgreich Verweis auf eine Abbildung bearbeitet: " + strResult) | |
for xmlParent in xmlEOAfigure.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAfigure.get("order") | |
if xmlReferenceRefTarget in dictFootnotes: | |
print ("Verweis auf Fussnote gefunden") | |
strResult = dictFootnotes[xmlReferenceRefTarget] | |
xmlEOAfootnotes = xmlEOAdocument.findall(".//EOAfootnote") | |
for xmlEOAfootnote in xmlEOAfootnotes: | |
tmpReferenceRefTarget = xmlEOAfootnote.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print ("Erfolgreich Verweis auf eine Fussnote bearbeitet: " + strResult) | |
for xmlParent in xmlEOAfootnote.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAfootnote.get("order") | |
if xmlReferenceLabelText in dictTables: | |
print ("Verweis auf Tabelle gefunden") | |
strResult = dictTables[xmlReferenceLabelText] | |
xmlEOAtables = xmlEOAdocument.findall(".//EOAtable") | |
for xmlEOAtable in xmlEOAtables: | |
tmpReferenceRefTarget = xmlEOAtable.get("label") | |
if xmlReferenceLabelText == tmpReferenceRefTarget: | |
print ("Erfolgreich Verweis auf eine Tabelle bearbeitet:" + strResult) | |
for xmlParent in xmlEOAtable.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAtable.get("order") | |
tmpTail = xmlReference.tail or "" | |
xmlReference.clear() | |
xmlReference.text = strResult | |
xmlReference.tail = tmpTail | |
xmlReference.tag = "a" | |
xmlReference.set("href", "../" + strChapterOrder + "/index.html#" + strObjectOrder) | |
print ("----------------------------------------------") | |
print ("Processing Page References") | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlPageReferences = xmlEOAchapter.findall(".//EOApageref") | |
strResult = "!!! Page Reference !!!" | |
for xmlReference in xmlPageReferences: | |
xmlReferenceLabel = xmlReference.find("Label") | |
xmlReferenceLabelText = xmlReferenceLabel.text | |
xmlReferenceRef = xmlReference.find("ref") | |
xmlReferenceRefTarget = xmlReferenceRef.get("target") | |
if xmlReferenceLabelText in dictPagelabels: | |
print ("Verweis auf Seite gefunden: " + xmlReferenceLabelText) | |
strResult = dictPagelabels[xmlReferenceLabelText] | |
xmlReference.text = strResult | |
for xmlChild in xmlReference.iterchildren(): | |
xmlReference.remove(xmlChild) | |
# Check, if EOApageref points to a Facsimile-Page | |
# If yes, make a href to the facsimile | |
xmlEOAfacsimilepages = xmlEOAdocument.findall(".//EOAfacsimilepage") | |
for xmlEOAfacsimilepage in xmlEOAfacsimilepages: | |
if xmlEOAfacsimilepage.get("label") == xmlReferenceLabelText: | |
print ("Querverweis auf ein Facsimile gefunden") | |
xmlReference.tag = "a" | |
strPartOrder = xmlEOAfacsimilepage.getparent().get("order") | |
strFacsimileOrder = xmlEOAfacsimilepage.get("order") | |
print (strFacsimileOrder) | |
xmlReference.set("href", "../" + strPartOrder + "/" + strFacsimileOrder + ".html") | |
print ("----------------------------------------------") | |
print ("Normalizing Index Entries") | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlEOAindexs = xmlEOAchapter.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xmlEOAindexs: | |
strEOAindextext = xmlEOAindex.text | |
xmlEOAindex.text = None | |
listFirstPart = re.split('\|', strEOAindextext) | |
tmpEntry = listFirstPart[0] | |
listSecondPart = re.split('\!', tmpEntry) | |
strMainEntry = listSecondPart[0] | |
# Check if a sortkey is present via @ | |
listSortKey = re.split('@', strMainEntry) | |
if len(listSortKey) == 2: | |
xmlEOAindex.set("main", listSortKey[0]) | |
xmlEOAindex.set("display", listSortKey[1]) | |
else: | |
xmlEOAindex.set("main", strMainEntry) | |
if len(listSecondPart) > 1: | |
strSecondPart = listSecondPart[1] | |
listSecondarySortkey = re.split('@', strSecondPart) | |
if len(listSecondarySortkey) == 2: | |
xmlEOAindex.set("secondary", listSecondarySortkey[0]) | |
xmlEOAindex.set("secondarydisplay", listSecondarySortkey[1]) | |
else: | |
xmlEOAindex.set("secondary", strSecondPart) | |
if len(listFirstPart) > 1: | |
strAddition = listFirstPart[1] | |
if strAddition == "textbf": | |
xmlEOAindex.set("bold", "true") | |
tmpseealso = re.match('seealso', strAddition) | |
if tmpseealso != None: | |
tmpAddition = re.sub('seealso', '', strAddition) | |
xmlEOAindex.set("seealso", tmpAddition) | |
# Entries containing seealso are omitted for the time being | |
xmlEOAindex.tag = "temp" | |
tmpsee = re.match('^see(?!also)', strAddition) | |
if tmpsee != None: | |
tmpAddition = re.sub('see', '', strAddition) | |
xmlEOAindex.set("see", tmpAddition) | |
# Entries containing seealso are omitted for the time being | |
xmlEOAindex.tag = "temp" | |
# Figure out parent chapter number and parent Element order | |
for xmlParent in xmlEOAindex.iterancestors(): | |
if xmlParent.get("order") != None and xmlParent.tag != "EOAchapter": | |
xmlEOAindex.set("elementorder", xmlParent.get("order")) | |
if xmlParent.get("order") != None and xmlParent.tag == "EOAchapter": | |
xmlEOAindex.set("chapterorder", xmlParent.get("order")) | |
print (etree.tostring(xmlEOAindex)) | |
etree.strip_tags(xmlDjangoTree, "temp") | |
print ("----------------------------------------------") | |
print ("Removing Duplicate Index Entries") | |
for xmlEOAchapter in xmlEOAchapters: | |
for xmlChild in xmlEOAchapter.iterchildren(): | |
dictEntries = {} | |
xmlEOAindexs = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xmlEOAindexs: | |
listEntry = [] | |
strEntry = xmlEOAindex.get("main") | |
if strEntry in dictEntries: | |
strSubentry = xmlEOAindex.get("secondary") | |
if strSubentry in dictEntries[strEntry] or strSubentry == None: | |
if (xmlChild.get("see") is None) and (xmlChild.get("seealso") is None): | |
xmlEOAindex.tag = "temp" | |
else: | |
dictEntries[strEntry].append(strSubentry) | |
else: | |
dictEntries[strEntry] = listEntry | |
print ("----------------------------------------------") | |
print ("Removing Index Entries in Footnotes") | |
for xmlEOAchapter in xmlEOAchapters: | |
for xmlChild in xmlEOAchapter.iterchildren(): | |
dictEntries = {} | |
xmlEOAindexs = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xmlEOAindexs: | |
for xmlParent in xmlEOAindex.iterancestors(): | |
if xmlParent.tag == "EOAfootnote": | |
xmlEOAindex.tag = "temp" | |
print ("Ding Index in Footnote") | |
print ("----------------------------------------------") | |
print ("Sorting and Creating Regular Index") | |
dictIndex = {} | |
xmlEOAindexs = xmlDjangoTree.findall("//EOAindex") | |
print ("Sorting " + str(len(xmlEOAindexs)) + " Entries") | |
for xmlEOAindex in xmlEOAindexs: | |
strMainEntry = xmlEOAindex.get("main") | |
# If strMainEntry not in Index, then create new index element | |
if strMainEntry not in dictIndex: | |
dictIndex[strMainEntry] = {} | |
dictIndex[strMainEntry]["listMainentries"] = [] | |
dictIndex[strMainEntry]["dictSubentries"] = {} | |
# if entry has no subentry then append it to listMainentries | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None: | |
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex) | |
# if entry has subentry, proceed on the second level | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") != None: | |
strSubEntry = xmlEOAindex.get("secondary") | |
# if strSubEntry is not in dictSubentries, then create new list | |
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = [] | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
else: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
# Sort the main index | |
listSortedKeys = sorted(dictIndex.keys(), key=str.lower) | |
# Create new and empty xmlTree for xmlEOAindex | |
xmlEOAprintindex = etree.Element("EOAprintindex") | |
xmlEOAindexsection = None | |
listFirstChars = [] | |
for strSortedKey in listSortedKeys: | |
strFirstChar = strSortedKey[0].upper() | |
if strFirstChar not in listFirstChars: | |
print (strFirstChar) | |
listFirstChars.append(strFirstChar) | |
if xmlEOAindexsection is not None: | |
xmlEOAprintindex.append(xmlEOAindexsection) | |
xmlEOAindexsection = etree.Element("EOAindexsection") | |
xmlEOAindexsection.set("Character", strFirstChar) | |
xmlEOAindexentry = etree.Element("EOAindexentry") | |
xmlEOAindexentry.set("main", strSortedKey) | |
for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]: | |
if xmlMainelement.get("display") != None: | |
strMainEntry = xmlMainelement.get("display") | |
else: | |
strMainEntry = xmlMainelement.get("main") | |
xmlEOAindexentry.set("display", strMainEntry) | |
print (strMainEntry) | |
print (xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder")) | |
xmlEOAindexlink = etree.Element("EOAindexlink") | |
xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder")) | |
xmlEOAindexlink.set("elementorder", xmlMainelement.get("elementorder")) | |
if xmlMainelement.get("bold") is not None: | |
xmlEOAindexlink.set("bold", "True") | |
xmlEOAindexentry.append(xmlEOAindexlink) | |
# If there are any subentries, process them now | |
if len(dictIndex[strSortedKey]["dictSubentries"]) > 0: | |
print ("Processing Subentries") | |
listSortedSubKeys = sorted(dictIndex[strSortedKey]["dictSubentries"]) | |
for strSortedSubKey in listSortedSubKeys: | |
xmlEOAindexsubentry = etree.Element("EOAindexsubentry") | |
xmlEOAindexsubentry.set("secondary", strSortedSubKey) | |
for xmlSubElement in dictIndex[strSortedKey]["dictSubentries"][strSortedSubKey]: | |
strSubEntry = xmlSubElement.get("secondary") | |
# Hier noch die Links auf den Untereintrag einfügen | |
xmlEOAindexlink = etree.Element("EOAindexlink") | |
xmlEOAindexlink.set("chapterorder", xmlSubElement.get("chapterorder")) | |
xmlEOAindexlink.set("elementorder", xmlSubElement.get("elementorder")) | |
xmlEOAindexsubentry.append(xmlEOAindexlink) | |
if xmlSubElement.get("bold") is not None: | |
xmlEOAindexlink.set("bold", "True") | |
print (strSubEntry) | |
xmlEOAindexentry.append(xmlEOAindexsubentry) | |
xmlEOAindexsection.append(xmlEOAindexentry) | |
if xmlEOAindexsection is not None: | |
xmlEOAprintindex.append(xmlEOAindexsection) | |
# If EOAprintindex is gonna be found, append xmlEOAprintindex to xmlEOAdocument | |
xmlPrintindex = xmlDjangoTree.find(".//EOAprintindex") | |
if xmlPrintindex is not None != 0: | |
# Remove <p><EOAprintindex/></p> from xmlDjangoTree | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xmlEOAprintindex) | |
print ("----------------------------------------------") | |
print ("Sorting and Creating Person Index") | |
dictIndex = {} | |
xmlEOAindexs = xmlDjangoTree.findall("//EOAindexperson") | |
print ("Sorting " + str(len(xmlEOAindexs)) + " Entries") | |
for xmlEOAindex in xmlEOAindexs: | |
strMainEntry = xmlEOAindex.get("main") | |
# If strMainEntry not in Index, then create new index element | |
if strMainEntry not in dictIndex: | |
dictIndex[strMainEntry] = {} | |
dictIndex[strMainEntry]["listMainentries"] = [] | |
dictIndex[strMainEntry]["dictSubentries"] = {} | |
# if entry has no subentry then append it to listMainentries | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None: | |
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex) | |
# if entry has subentry, proceed on the second level | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") != None: | |
strSubEntry = xmlEOAindex.get("secondary") | |
# if strSubEntry is not in dictSubentries, then create new list | |
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = [] | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
else: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
# Sort the main index | |
listSortedKeys = sorted(dictIndex.keys(), key=str.lower) | |
# Create new and empty xmlTree for xmlEOAindex | |
xmlEOAprintindex = etree.Element("EOAprintpersonindex") | |
xmlEOAindexsection = None | |
listFirstChars = [] | |
# doing the same for location index | |
print ("----------------------------------------------") | |
print ("Sorting and Creating Location Index") | |
dictIndex = {} | |
xmlEOAindexs = xmlDjangoTree.findall("//EOAindexlocation") | |
print ("Sorting " + str(len(xmlEOAindexs)) + " Entries") | |
for xmlEOAindex in xmlEOAindexs: | |
strMainEntry = xmlEOAindex.get("main") | |
# If strMainEntry not in Index, then create new index element | |
if strMainEntry not in dictIndex: | |
dictIndex[strMainEntry] = {} | |
dictIndex[strMainEntry]["listMainentries"] = [] | |
dictIndex[strMainEntry]["dictSubentries"] = {} | |
# if entry has no subentry then append it to listMainentries | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None: | |
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex) | |
# if entry has subentry, proceed on the second level | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") != None: | |
strSubEntry = xmlEOAindex.get("secondary") | |
# if strSubEntry is not in dictSubentries, then create new list | |
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = [] | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
else: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
# Sort the main index | |
listSortedKeys = sorted(dictIndex.keys(), key=str.lower) | |
# Create new and empty xmlTree for xmlEOAindex | |
xmlEOAprintindex = etree.Element("EOAprintlocationindex") | |
xmlEOAindexsection = None | |
listFirstChars = [] | |
# end here | |
for strSortedKey in listSortedKeys: | |
strFirstChar = strSortedKey[0].upper() | |
if strFirstChar not in listFirstChars: | |
print (strFirstChar) | |
listFirstChars.append(strFirstChar) | |
if xmlEOAindexsection is not None: | |
xmlEOAprintindex.append(xmlEOAindexsection) | |
xmlEOAindexsection = etree.Element("EOAindexsection") | |
xmlEOAindexsection.set("Character", strFirstChar) | |
xmlEOAindexentry = etree.Element("EOAindexentry") | |
xmlEOAindexentry.set("main", strSortedKey) | |
for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]: | |
if xmlMainelement.get("display") != None: | |
strMainEntry = xmlMainelement.get("display") | |
else: | |
strMainEntry = xmlMainelement.get("main") | |
xmlEOAindexentry.set("display", strMainEntry) | |
print (strMainEntry) | |
print (xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder")) | |
xmlEOAindexlink = etree.Element("EOAindexlink") | |
xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder")) | |
xmlEOAindexlink.set("elementorder", xmlMainelement.get("elementorder")) | |
if xmlMainelement.get("bold") is not None: | |
xmlEOAindexlink.set("bold", "True") | |
xmlEOAindexentry.append(xmlEOAindexlink) | |
# If there are any subentries, process them now | |
if len(dictIndex[strSortedKey]["dictSubentries"]) > 0: | |
print ("Processing Subentries") | |
listSortedSubKeys = sorted(dictIndex[strSortedKey]["dictSubentries"]) | |
for strSortedSubKey in listSortedSubKeys: | |
xmlEOAindexsubentry = etree.Element("EOAindexsubentry") | |
xmlEOAindexsubentry.set("secondary", strSortedSubKey) | |
for xmlSubElement in dictIndex[strSortedKey]["dictSubentries"][strSortedSubKey]: | |
strSubEntry = xmlSubElement.get("secondary") | |
# Hier noch die Links auf den Untereintrag einfügen | |
xmlEOAindexlink = etree.Element("EOAindexlink") | |
xmlEOAindexlink.set("chapterorder", xmlSubElement.get("chapterorder")) | |
xmlEOAindexlink.set("elementorder", xmlSubElement.get("elementorder")) | |
xmlEOAindexsubentry.append(xmlEOAindexlink) | |
if xmlSubElement.get("bold") is not None: | |
xmlEOAindexlink.set("bold", "True") | |
print (strSubEntry) | |
xmlEOAindexentry.append(xmlEOAindexsubentry) | |
xmlEOAindexsection.append(xmlEOAindexentry) | |
if xmlEOAindexsection is not None: | |
xmlEOAprintindex.append(xmlEOAindexsection) | |
# If EOAprintpersonindex is gonna be found, append xmlEOAprintindex to xmlEOAdocument | |
xmlPrintindex = xmlDjangoTree.find(".//EOAprintpersonindex") | |
if xmlPrintindex is not None != 0: | |
# Remove <p><EOAprintindex/></p> from xmlDjangoTree | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xmlEOAprintindex) | |
# If EOAprintlocationindex is found, append xmlEOAprintindex to xmlEOAdocument | |
xmlPrintindex = xmlDjangoTree.find(".//EOAprintlocationindex") | |
if xmlPrintindex is not None != 0: | |
# Remove <p><EOAprintindex/></p> from xmlDjangoTree | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xmlEOAprintindex) | |
# TODO: Die unnötigen Attribute wie id löschen | |
# TODO: Die unnötigen Tags wie EOAlabel löschen | |
etree.strip_tags(xmlDjangoTree, "temp", "citetext", "EOAprintbibliography") | |
etree.strip_elements(xmlDjangoTree, "citekey", with_tail=False) | |
etree.strip_attributes(xmlDjangoTree, "id-text", "id", "noindent", "type", "label", "spacebefore", "rend") | |
############################################################################ | |
# Save xmlDjangoTree # | |
############################################################################ | |
tmpFile = open ("CONVERT/django/Django.xml", "w") | |
tmpResult = etree.tostring(xmlDjangoTree, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
############################################################################ | |
# Finishing various Stuff # | |
############################################################################ | |
# Write Temporary XML-Tree | |
ergebnisdatei = open("Devel_ebook.xml", "w") | |
ergebnis = etree.tostring(xmlEbookTree, pretty_print=True, encoding="unicode") | |
ergebnisdatei.write(ergebnis) | |
ergebnisdatei.close() | |
cleanup() | |
print ("Done!") | |
sys.exit() |