Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
EOASkripts/tralics2django.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
1440 lines (1333 sloc)
69.5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8; mode: python -*- | |
# Time-stamp: <2017-10-30 13:13:43 (kthoden)> | |
import pickle | |
import os | |
import sys | |
import re | |
import shutil | |
import shlex | |
import subprocess | |
import argparse | |
import configparser | |
import libeoaconvert | |
from copy import deepcopy | |
from lxml import etree | |
##################### | |
# Parsing arguments # | |
##################### | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-c", "--config", dest="CONFIG_FILE", help="Name of configuration file", metavar="CONFIGURATION") | |
args = parser.parse_args() | |
if args.CONFIG_FILE is not None: | |
CONFIG_FILE = os.path.abspath(args.CONFIG_FILE) | |
else: | |
CONFIG_FILE = os.path.dirname(sys.argv[0]) + "/config/eoaconvert.cfg" | |
print("The configfile is ", CONFIG_FILE) | |
################################## | |
# Reading the configuration file # | |
################################## | |
CONFIG = configparser.ConfigParser() | |
CONFIG.read(CONFIG_FILE) | |
######################## | |
# Paths to executables # | |
######################## | |
GM_PATH = CONFIG['Executables']['graphicsmagic'] | |
TL_PATH = CONFIG['Executables']['texlive'] | |
########################################### | |
# Loading data from first conversion step # | |
########################################### | |
with open('tmp_files/data.pickle', 'rb') as f: | |
data = pickle.load(f) | |
dictChapters = data["chapterdict"] | |
dictEquations = data["eqdict"] | |
dictLists = data["listdict"] | |
dictTheorems = data["theoremdict"] | |
dictSections = data["secdict"] | |
dictFigures = data["figdict"] | |
dictFootnotes = data["fndict"] | |
dictTables = data["tabdict"] | |
dictPagelabels = data["pagelabeldict"] | |
xmlTree = etree.parse("tmp_files/IntermediateXMLFile.xml") | |
print(""" | |
############################################################################ | |
# Convert tralics-XML to Django Data Structure # | |
############################################################################ | |
""") | |
# Create django File Structure | |
if os.path.exists(os.getcwd() + "/CONVERT/django") == False: | |
os.mkdir(os.getcwd() + "/CONVERT/django") | |
os.mkdir(os.getcwd() + "/CONVERT/django/images") | |
os.mkdir(os.getcwd() + "/CONVERT/django/images/embedded") | |
os.mkdir(os.getcwd() + "/CONVERT/django/files") | |
# Create empty xmlTree | |
xmlEOAdocument = etree.Element("EOAdocument") | |
xmlDjangoTree = etree.ElementTree(xmlEOAdocument) | |
etree.strip_attributes(xmlTree, "noindent") | |
# Remove temp-Tag | |
etree.strip_tags(xmlTree, "temp") | |
# Write Temporary XML-Maintree | |
ergebnisdatei = open("tmp_files/Devel_django.xml", "w") | |
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode") | |
ergebnisdatei.write(ergebnis) | |
ergebnisdatei.close() | |
# Find all Chapters from the original tralics XML | |
xmlChapters = xmlTree.findall("//div1") | |
def debug_chapters(xmlEOAchapters): | |
"""Write individual chapters to files""" | |
chap_num = 1 | |
for chapter in xmlEOAchapters: | |
tmp_filename = "%s/debug/debug-chapter-%02d.xml" % (os.getcwd(), chap_num) | |
tmp_file = open (tmp_filename, "w") | |
tmp_result = etree.tostring(chapter, pretty_print=True, encoding="unicode") | |
tmp_file.write(tmp_result) | |
tmp_file.close() | |
chap_num += 1 | |
# def debug_chapters ends here | |
def gettext(xmlElement): | |
"""Maintain text and strip subchildren""" | |
xmlText = xmlElement.text or "" | |
for xmlChild in xmlElement: | |
xmlText += gettext(xmlChild) | |
if xmlChild.tail: | |
xmlText += xmlChild.tail | |
return xmlText | |
# def gettext ends here | |
def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid=None): | |
# Get Dictionaries of Numbers via Global Variables | |
global dictChapters | |
global dictFigures | |
global dictEquations | |
global dictSections | |
global dictFootnotes | |
global dictPagelabels | |
global dictTables | |
global dictLists | |
global intObjectNumber | |
# Check what kind of Element we have and change the data | |
if xmlElement.tag == "EOAtranscripted": | |
xmlResult = etree.Element("temp") | |
xmlEOATranscription = etree.Element("EOAtranscription") | |
xmlEOATranscription.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlLeftheader = xmlElement.find(".//Leftheader") | |
etree.strip_tags(xmlLeftheader, "p") | |
xmlEOATranscription.append(xmlLeftheader) | |
xmlRightheader = xmlElement.find(".//Rightheader") | |
etree.strip_tags(xmlRightheader, "p") | |
xmlEOATranscription.append(xmlRightheader) | |
xmlTranscriptedtext = xmlElement.find(".//EOAtranscriptedtext") | |
# change \n\n into </p><p> and pagebreak intto </p><pagebreak><p> to create some valid markup | |
strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode") | |
#strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext)) | |
#strTranscriptedtext = re.sub (r"<p><pagebreak/></p>", "<pagebreak/>", strTranscriptedtext) | |
xmlLeftColumn = etree.Element("EOAtranscriptionleft") | |
xmlRightColumn = etree.Element("EOAtranscriptionright") | |
boolRightColumn = False | |
xmlTemp = etree.XML(str(strTranscriptedtext)) | |
for xmlElement in xmlTemp.iterchildren(): | |
if xmlElement.tag == "pagebreak": | |
boolRightColumn = True | |
continue | |
if boolRightColumn == False: | |
xmlLeftColumn.append(xmlElement) | |
if boolRightColumn == True: | |
xmlRightColumn.append(xmlElement) | |
xmlEOATranscription.append(xmlLeftColumn) | |
xmlEOATranscription.append(xmlRightColumn) | |
# Convert Images within the transcription | |
xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber") | |
if xmlFigures is not None: | |
for xmlFigure in xmlFigures: | |
strImageFileString = xmlFigure.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
tmpStrTail = xmlFigure.tail | |
xmlFigure.clear() | |
xmlFigure.tag = "img" | |
xmlFigure.set("src", strImageFileDir + strImageFileName) | |
xmlFigure.set("alt", "") | |
xmlResult.append(xmlEOATranscription) | |
elif xmlElement.tag == "EOAletterhead": | |
xmlResult = etree.Element("temp") | |
xmlEOAletterhead = etree.Element("EOAletterhead") | |
xmlEOAletterrecipient = xmlElement.find(".//Recipient") | |
xmlEOAletterhead.append(xmlEOAletterrecipient) | |
xmlEOAletterarchive = xmlElement.find(".//Archive") | |
xmlEOAletterhead.append(xmlEOAletterarchive) | |
xmlEOAletteradditional = xmlElement.find(".//Additional") | |
xmlEOAletterhead.append(xmlEOAletteradditional) | |
xmlEOAletterpages = xmlElement.find(".//Pages") | |
xmlEOAletterhead.append(xmlEOAletterpages) | |
xmlEOAletterhead.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlEOAletterhead) | |
elif xmlElement.findall(".//EOAfigurenonumber"): | |
xmlResult = etree.Element("temp") | |
# Create basic Element EOAfigurenonumber | |
xmlEOAfigure = etree.Element("EOAfigurenonumber") | |
# Copy Image | |
strImageFileString = xmlElement.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") | |
xmlEOAfigure.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlEOAfigure) | |
elif xmlElement.tag == "EOAfigure": | |
xmlResult = etree.Element("temp") | |
# Create basic Element EOAfigure | |
xmlEOAfigure = etree.Element("EOAfigure") | |
# Copy Image | |
strImageFileString = xmlElement.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
print("django figure", strImageFileName) | |
# yellow | |
if os.path.splitext(strImageFileName)[1].lower() == ".pdf": | |
print("Found a PDF file") | |
strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH) | |
xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".jpg")) | |
xmlEOAfigure.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") | |
xmlEOAfigure.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
# Insert visual Number and uid | |
strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")] | |
xmlEOAfigure.set("number", strFigureNumber) | |
strFigureUID = xmlElement.find(".//anchor").get("id") | |
xmlEOAfigure.set("id", strFigureUID) | |
# Insert Caption | |
xmlEOAfigure.append(xmlElement.find(".//caption")) | |
xmlResult.append(xmlEOAfigure) | |
elif xmlElement.findall(".//EOAtable"): | |
xmlResult = etree.Element("EOAtable") | |
xmlRawTable = xmlElement.find(".//table") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlRawTable) | |
# Copy Number, Label and Caption | |
if xmlElement.find(".//EOAtablecaption").text != "nonumber": | |
xmlResult.append(xmlElement.find(".//EOAtablecaption")) | |
xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text) | |
xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text]) | |
xmlResult.set("id", xmlRawTable.get("id")) | |
else: | |
xmlElement.set("numbering", "false") | |
#if xmlElement.find(".//EOAtablelabel").text is not None: | |
# Transform width of Columns | |
strColumnString = xmlElement.find(".//EOAtablecolumns").text | |
strColumnString = re.sub(r"\|", "", strColumnString) | |
reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString) | |
intTableWidth = 0 | |
listColumnAlignments = [None] | |
listColumnWidths = [None] | |
intNumberOfColumns = 0 | |
for strColumnDefinition in reMatchObjects: | |
strColumnDefinition = strColumnDefinition.rstrip("cm") | |
strColumnAlignment = strColumnDefinition[0] | |
if strColumnAlignment == "L": | |
strColumnAlignment = "left" | |
if strColumnAlignment == "C": | |
strColumnAlignment = "center" | |
if strColumnAlignment == "R": | |
strColumnAlignment = "right" | |
listColumnAlignments.append(strColumnAlignment) | |
intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75) | |
listColumnWidths.append(intColumnWidth) | |
intTableWidth += intColumnWidth | |
intNumberOfColumns += 1 | |
xmlRawTable.set("width", str(intTableWidth)) | |
# Figure out and deal with the Header | |
xmlHeader = xmlRawTable.find(".//row/cell/tableheader") | |
if xmlHeader is not None: | |
xmlHeader.text = "" | |
xmlHeader.getparent().text = xmlHeader.tail | |
xmlHeader.getparent().remove(xmlHeader) | |
xmlFirstRow = xmlRawTable.find(".//row") | |
xmlFirstRow.tag = "tr" | |
xmlFirstRowCells = xmlFirstRow.findall(".//cell") | |
for xmlFirstRowCell in xmlFirstRowCells: | |
xmlFirstRowCell.tag = "th" | |
# Now Deal with the rest of the rows | |
xmlTableRows = xmlRawTable.findall(".//row") | |
for xmlTableRow in xmlTableRows: | |
xmlTableCells = xmlTableRow.findall(".//cell") | |
intCurrentColumn = 1 | |
for xmlTableCell in xmlTableCells: | |
xmlTableCell.tag = "td" | |
xmlTableCell.set("align",listColumnAlignments[intCurrentColumn]) | |
xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";") | |
# Deal with multicolumn | |
if xmlTableCell.get("cols") is not None: | |
xmlTableCell.set("colspan", xmlTableCell.get("cols")) | |
if intCurrentColumn > len(xmlTableCells): | |
intCurrentColumn = 1 | |
# Deal with multicolumn again, increase intCurrentColumn by the columns being spanned | |
elif xmlTableCell.get("cols") is not None: | |
intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols")) | |
del xmlTableCell.attrib["cols"] | |
else: | |
intCurrentColumn += 1 | |
xmlTableRow.tag = "tr" | |
xmlTableRow.set("valign", "top") | |
elif xmlElement.tag == "list" and xmlElement.get('type') != 'description': | |
xmlResult = etree.Element("temp") | |
if xmlElement.get('type') == 'ordered': | |
# Change first item into EOAlistfirstitem | |
xmlFirstItem = xmlElement.find("..//item") | |
xmlFirstItemElement = xmlFirstItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber=xmlFirstItem.get("id-text"), uid=xmlFirstItem.get("id"))) | |
# Process Child Elements which are Part of this item | |
if len(xmlFirstItem.getchildren()) >= 1: | |
for xmlChild in xmlFirstItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlFirstItem.getparent().remove(xmlFirstItem) | |
# Process remaining items in this list | |
tmpIntNumber = 2 | |
for xmlItem in xmlElement.iterchildren(): | |
xmlItemElement = xmlItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=xmlItem.get("id-text"), uid=xmlItem.get("id"))) | |
tmpIntNumber += 1 | |
if len(xmlItem.getchildren()) >= 1: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild, indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
if xmlElement.get('type') == 'simple': | |
xml_first_child = xmlElement.getchildren()[0] | |
if xml_first_child.tag == 'item': | |
print("a simple list with no special items") | |
# Change first item into EOAlistfirstitem | |
xmlFirstItem = xmlElement.find("..//item") | |
xmlFirstItemElement = xmlFirstItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-")) | |
# Process Child Elements which are Part of this item | |
if len(xmlFirstItem.getchildren()) >= 1: | |
print("len xmlFirstItem.getchildren is greater or equal 1") | |
for xmlChild in xmlFirstItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlFirstItem.getparent().remove(xmlFirstItem) | |
for xmlItem in xmlElement.iterchildren(): | |
xmlItemElement = xmlItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) | |
if len(xmlItem.getchildren()) >= 1: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
############# | |
# Baustelle # | |
############# | |
elif xml_first_child.tag == 'label': | |
print("a simple list with named items") | |
# Change first item into EOAlistfirstitem | |
xmlFirstItem = xmlElement.find("..//item") | |
xmlFirstItemElement = xmlFirstItem.getchildren()[0] | |
print(xmlFirstItemElement.text) | |
# debugging | |
print("debug zone") | |
print(etree.tostring(xmlFirstItemElement)) | |
print("end of debugging") | |
# end of debugging | |
xml_first_label = xmlElement.find("..//label") | |
listnumber_text = xml_first_label.text | |
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered custom", listnumber=listnumber_text)) | |
print("The length of the children of the first item:", len(xmlFirstItem.getchildren())) | |
# Process Child Elements which are Part of this item | |
if len(xmlFirstItem.getchildren()) >= 1: | |
print("len xmlFirstItem.getchildren is greater or equal 1") | |
for xmlChild in xmlFirstItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlFirstItem.getparent().remove(xmlFirstItem) | |
xml_first_label.getparent().remove(xml_first_label) | |
all_the_labels = xmlElement.findall("label") | |
all_the_items = xmlElement.findall("item") | |
print("itemlength", len(all_the_items)) | |
print("labellength", len(all_the_labels)) | |
for listlabel, listitem in zip(all_the_labels, all_the_items): | |
print("listitem text", listitem.text) | |
print("listlabel text", listlabel.text) | |
xml_item_element = listitem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xml_item_element, indent=True, listnumber=listlabel.text)) | |
listlabel.getparent().remove(listlabel) | |
listitem.getparent().remove(listitem) | |
# for xmlItem in xmlElement.iterchildren(): | |
# print("So many items have we: ", len(xmlItem)) | |
# xmlItemElement = xmlItem.getchildren()[0] | |
# xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) | |
# if len(xmlItem.getchildren()) >= 1: | |
# for xmlChild in xmlItem.iterchildren(): | |
# xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
# xmlItem.getparent().remove(xmlItem) | |
################## | |
# Ende Baustelle # | |
################## | |
elif xmlElement.tag == "list" and xmlElement.get('type') == 'description': | |
print("A description") | |
xmlResult = etree.Element("temp") | |
while len(xmlElement.getchildren()) != 0: | |
xmlDescription = etree.Element("EOAdescription") | |
xmlDescription.set("order", str(intObjectNumber)) | |
xmlLabel = xmlElement.getchildren()[0] | |
xmlItem = xmlElement.getchildren()[1] | |
if len(xmlItem.getchildren()) > 0: | |
xmlContent = xmlItem.getchildren()[0] | |
else: | |
xmlContent = etree.Element("p") | |
xmlLabel.tag = "description" | |
xmlDescription.append(xmlLabel) | |
xmlDescription.append(xmlContent) | |
xmlResult.append(xmlDescription) | |
intObjectNumber += 1 | |
if len(xmlItem.getchildren()) > 0: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
elif xmlElement.tag == "theorem": | |
xmlTheoremHead = xmlElement.find(".//head") | |
xmlTheoremText = xmlElement.find(".//p") | |
strTheoremNumber = xmlElement.get("id-text") | |
strTheoremID = xmlElement.get("id") | |
xmlResult = etree.Element("EOAtheorem") | |
xmlResult.append(xmlTheoremHead) | |
xmlResult.append(xmlTheoremText) | |
xmlResult.set("order", str(intObjectNumber)) | |
xmlResult.set("number", strTheoremNumber) | |
xmlResult.set("uid", strTheoremID) | |
intObjectNumber += 1 | |
elif xmlElement.findall(".//EOAequationarray"): | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlElement.findall(".//EOAequation"): | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", xmlEquation.get("number")) | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
if xmlEquation.get("label") is not None: | |
xmlEOAequation.set("label", xmlEquation.get("label")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
if xmlEquation.get("label") is not None: | |
xmlEOAequation.set("label", xmlEquation.get("label")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.findall(".//EOAequationarraynonumber"): | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"): | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", "") | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.tag == "EOAequationnonumber": | |
# Process one EOAequation which is not encapsulated | |
xmlResult = etree.Element("EOAequation") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.set("filename", xmlElement.get("filename")) | |
xmlResult.set("TeX", xmlElement.get("TeX")) | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlResult.set("number", "") | |
elif xmlElement.findall(".//EOAequation"): | |
# Process various Equations which may be encapsulated within <p> | |
xmlEquations = xmlElement.findall(".//EOAequation") | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlEquations: | |
# Create basic Element EOAequation | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", xmlEquation.get("number")) | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
if xmlEquation.get("uid") is not None: | |
xmlEOAequation.set("uid", xmlEquation.get("uid")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.tag == "EOAequation": | |
# Process one EOAequation which is not encapsulated | |
xmlResult = etree.Element("EOAequation") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.set("number", xmlElement.get("number")) | |
xmlResult.set("TeX", xmlElement.get("TeX")) | |
if xmlElement.get("uid") is not None: | |
xmlResult.set("uid", xmlElement.get("uid")) | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlResult.set("filename", xmlElement.get("filename")) | |
elif xmlElement.tag == "div3": | |
xmlResult = etree.Element("EOAsubsection") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlElement.find("head")) | |
for xmlChild in xmlElement.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild)) | |
elif xmlElement.tag == "div4": | |
xmlResult = etree.Element("EOAsubsubsection") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlElement.find("head")) | |
for xmlChild in xmlElement.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild)) | |
elif xmlElement.tag == "EOAverse": | |
xmlResult = etree.Element("EOAparagraph") | |
xmlResult.set("style", "verse") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xml_verselines = xmlElement.findall("p") | |
xmlResult.append(deepcopy(xml_verselines[0])) | |
for xml_verseline in xml_verselines[1:]: | |
linebreak = etree.Element("br") | |
xmlResult.append(linebreak) | |
copied_line = deepcopy(xml_verseline) | |
xmlResult.append(copied_line) | |
etree.strip_tags(xmlResult, "p") | |
else: | |
xmlElement.tag = "EOAparagraph" | |
xmlElement.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult = xmlElement | |
if indent==True: | |
xmlResult.set("indent", "True") | |
if listtype != None: | |
xmlResult.set("listtype", listtype) | |
if listnumber != 0: | |
xmlResult.set("listnumber", listnumber) | |
if uid != None: | |
xmlResult.set("id", uid) | |
return xmlResult | |
# def djangoParseObject ends here | |
def make_index(index_hits, index_type): | |
"""Make an index""" | |
dictIndex = {} | |
for xmlEOAindex in index_hits: | |
strMainEntry = xmlEOAindex.get("main") | |
str_display_entry = xmlEOAindex.get("display") | |
# If strMainEntry not in Index, then create new index element | |
if strMainEntry not in dictIndex: | |
dictIndex[strMainEntry] = {} | |
dictIndex[strMainEntry]["display_string"] = "" | |
dictIndex[strMainEntry]["listMainentries"] = [] | |
dictIndex[strMainEntry]["dictSubentries"] = {} | |
# store the display string here. | |
if str_display_entry is not None: | |
dictIndex[strMainEntry]["display_string"] = str_display_entry | |
else: | |
dictIndex[strMainEntry]["display_string"] = strMainEntry | |
# if entry has no subentry then append it to listMainentries | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None: | |
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex) | |
# if entry has subentry, proceed on the second level | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") is not None: | |
# put the next line in anyway | |
# dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex) | |
strSubEntry = xmlEOAindex.get("secondary") | |
# if strSubEntry is not in dictSubentries, then create new list | |
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = [] | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
else: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
# Sort the main index | |
listSortedKeys = sorted(dictIndex.keys(), key=str.lower) | |
if index_type == "regular": | |
new_index_element = "EOAprintindex" | |
else: | |
new_index_element = "EOAprint%sindex" % index_type | |
# Create new and empty xmlTree for xmlEOAindex | |
xmlEOAprintindex = etree.Element(new_index_element) | |
xmlEOAindexsection = None | |
listFirstChars = [] | |
for strSortedKey in listSortedKeys: | |
strFirstChar = strSortedKey[0].upper() | |
if strFirstChar not in listFirstChars: | |
print("Beginning a new letter:", strFirstChar) | |
listFirstChars.append(strFirstChar) | |
if xmlEOAindexsection is not None: | |
xmlEOAprintindex.append(xmlEOAindexsection) | |
xmlEOAindexsection = etree.Element("EOAindexsection") | |
xmlEOAindexsection.set("Character", strFirstChar) | |
# beginning a new entry | |
xmlEOAindexentry = etree.Element("EOAindexentry") | |
xmlEOAindexentry.set("main", strSortedKey) | |
xmlEOAindexentry.set("display", dictIndex[strSortedKey]["display_string"]) | |
for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]: | |
print(xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder")) | |
xmlEOAindexlink = etree.Element("EOAindexlink") | |
xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder")) | |
xmlEOAindexlink.set("elementorder", xmlMainelement.get("elementorder")) | |
if xmlMainelement.get("bold") is not None: | |
xmlEOAindexlink.set("bold", "True") | |
xmlEOAindexentry.append(xmlEOAindexlink) | |
# If there are any subentries, process them now | |
if len(dictIndex[strSortedKey]["dictSubentries"]) > 0: | |
print("Processing Subentries") | |
listSortedSubKeys = sorted(dictIndex[strSortedKey]["dictSubentries"]) | |
for strSortedSubKey in listSortedSubKeys: | |
xmlEOAindexsubentry = etree.Element("EOAindexsubentry") | |
xmlEOAindexsubentry.set("secondary", strSortedSubKey) | |
for xmlSubElement in dictIndex[strSortedKey]["dictSubentries"][strSortedSubKey]: | |
strSubEntry = xmlSubElement.get("secondary") | |
# Hier noch die Links auf den Untereintrag einfügen | |
xmlEOAindexlink = etree.Element("EOAindexlink") | |
xmlEOAindexlink.set("chapterorder", xmlSubElement.get("chapterorder")) | |
xmlEOAindexlink.set("elementorder", xmlSubElement.get("elementorder")) | |
xmlEOAindexsubentry.append(xmlEOAindexlink) | |
if xmlSubElement.get("bold") is not None: | |
xmlEOAindexlink.set("bold", "True") | |
print(strSubEntry) | |
xmlEOAindexentry.append(xmlEOAindexsubentry) | |
xmlEOAindexsection.append(xmlEOAindexentry) | |
# if xmlEOAindexsection is not None: | |
xmlEOAprintindex.append(xmlEOAindexsection) | |
return(xmlEOAprintindex) | |
# def make_index ends here | |
def djangoParseHeadline(xmlElement): | |
# Parse EOAauthor and append it to the Chapter Information | |
xmlAuthors = xmlElement.find(".//EOAauthor") | |
if xmlAuthors is not None: | |
strAuthors = xmlAuthors.text | |
xmlElement.remove(xmlAuthors) | |
strAuthors = re.sub("(, and | and | und )", ",", strAuthors) | |
listAuthors = re.split("\,", strAuthors) | |
print(listAuthors) | |
if len(listAuthors) >= 1: | |
for i in range(len(listAuthors)): | |
xmlAuthor = etree.Element("EOAauthor") | |
# Remove Spaces before and after AuthorString | |
if listAuthors[i][0] == " ": | |
strAuthor = listAuthors[i][1:] | |
elif listAuthors[i].endswith(" "): | |
strAuthor = listAuthors[i][:-1] | |
else: | |
strAuthor = listAuthors[i] | |
xmlAuthor.text = strAuthor | |
xmlElement.append(xmlAuthor) | |
return xmlElement | |
# def djangoParseHeadline ends here | |
# Iterate over Chapters, Sections, Subsections, and Subsubsections and | |
# Put all on one level: EOAchapter | |
intChapterNumber = 1 | |
listPartIDs = [] | |
for xmlChapter in xmlChapters: | |
intObjectNumber = 1 | |
# Process Chapter Title | |
xmlEOAchapter = etree.Element("EOAchapter") | |
xmlEOAchapter.set("type","regular") | |
xmlLanguage = xmlChapter.find(".//language") | |
if xmlLanguage is not None: | |
# KT changing this after separating the big script | |
strLanguage = xmlLanguage.text #or "english" | |
else: | |
strLanguage = "english" | |
xmlEOAchapter.set("language", strLanguage) | |
# xmlEOAchapter.set("language", xmlChapter.get("language")) | |
xmlEOAchapter.set("order", str(intChapterNumber)) | |
if xmlChapter.get("rend") != "nonumber": | |
xmlEOAchapter.set("id", xmlChapter.get("id")) | |
xmlChapterHeadline = xmlChapter.find(".//head") | |
if xmlChapter.get("id") in dictChapters: | |
xmlEOAchapter.set("number", dictChapters[xmlChapter.get("id")]) | |
else: | |
xmlEOAchapter.set("number", "") | |
print("-----------------------------------------------------") | |
print(gettext(xmlChapterHeadline)) | |
xmlEOAchapter.append(djangoParseHeadline(xmlChapterHeadline)) | |
# Deal with EOAauthor | |
if xmlChapter.find(".//EOAauthor") is not None: | |
xmlEOAchapter.append(xmlChapter.find(".//EOAauthor")) | |
# Attache enclosing Part to Chapter, see django structure for this purpose | |
if xmlChapter.getparent().tag == "div0": | |
if xmlChapter.getparent().get("id") not in listPartIDs: | |
listPartIDs.append(xmlChapter.getparent().get("id")) | |
xmlPartHeadline = xmlChapter.getparent().find("head") | |
xmlPartHeadline.tag = "EOAparthtml" | |
xmlEOAchapter.append(xmlPartHeadline) | |
# Append Chapter to xmlEOAdocument | |
xmlEOAdocument.append(xmlEOAchapter) | |
# iterate over children of Chapter | |
for xmlChapterChild in xmlChapter.iterchildren(): | |
if xmlChapterChild.tag == "div2": | |
# Process Section Title | |
xmlEOAsection = etree.Element("EOAsection") | |
xmlEOAsection.set("order", str(intObjectNumber)) | |
if xmlChapterChild.get("rend") != "nonumber": | |
xmlEOAsection.set("id", xmlChapterChild.get("id")) | |
xmlEOAsection.set("number", dictSections[xmlChapterChild.get("id")]) | |
intObjectNumber += 1 | |
xmlHead = xmlChapter.find(".//head") | |
print(gettext(xmlHead)) | |
xmlEOAsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsection) | |
# Iterate over Children of Section | |
for xmlSectionChild in xmlChapterChild.iterchildren(): | |
if xmlSectionChild.tag == "div3": | |
# Process Subsection Title | |
xmlEOAsubsection = etree.Element("EOAsubsection") | |
xmlEOAsubsection.set("order", str(intObjectNumber)) | |
if xmlSectionChild.get("rend") != "nonumber": | |
xmlEOAsubsection.set("id", xmlSectionChild.get("id")) | |
xmlEOAsubsection.set("number", dictSections[xmlSectionChild.get("id")]) | |
intObjectNumber += 1 | |
xmlHead = xmlSectionChild.find(".//head") | |
print(gettext(xmlHead)) | |
xmlEOAsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsection) | |
# Iterate over children of Subsection | |
for xmlSubsectionChild in xmlSectionChild.iterchildren(): | |
if xmlSubsectionChild.tag == "div4": | |
# Process Subsubsection Title | |
xmlEOAsubsubsection = etree.Element("EOAsubsubsection") | |
xmlEOAsubsubsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = xmlSubsectionChild.find(".//head") | |
print(gettext(xmlHead)) | |
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsubsection) | |
# Iterate over children of Subsubsection | |
for xmlSubsubsectionChild in xmlSubsectionChild.iterchildren(): | |
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlSubsectionChild)) | |
elif xmlSectionChild.tag == "div4": | |
# Process Subsubsection Title | |
xmlEOAsubsubsection = etree.Element("EOAsubsubsection") | |
xmlEOAsubsubsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = xmlSectionChild.find(".//head") | |
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsubsection) | |
# Iterate over children of Subsubsection | |
for xmlSubsubsectionChild in xmlSectionChild.iterchildren(): | |
if xmlSubsubsectionChild.tag == "div5": | |
print("jubel") | |
# although it's div5, promote it to subsubsection | |
xmlEOAparasection = etree.Element("EOAsubsubsection") | |
# xmlEOAparasection = etree.Element("EOAparasection") | |
xmlEOAparasection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = xmlSubsubsectionChild.find(".//head") | |
print(gettext(xmlHead)) | |
xmlEOAparasection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAparasection) | |
for xmlParasectionChild in xmlSubsubsectionChild.iterchildren(): | |
xmlEOAchapter.append(djangoParseObject(xmlParasectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlSectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlChapterChild)) | |
intChapterNumber += 1 | |
print("----------------------------------------------") | |
print("Processing Facsimile Parts") | |
listModes = ["text", "textPollux", "xml"] | |
strBasicURL = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql?document=" | |
parserECHO = etree.XMLParser() | |
xmlParts = xmlTree.findall("//div0") | |
intFacNumber = 1 | |
for xmlPart in xmlParts: | |
intObjectNumber = 1 | |
intFacPartNumber = 1 | |
if xmlPart.find(".//EOAfacsimilepart") is None: | |
continue | |
xmlEOAfacsimilepart = etree.Element("EOAfacsimilepart") | |
xmlEOAfacsimilepart.set("order", str(intChapterNumber)) | |
xmlEOAfacsimileparthead = xmlPart.find(".//head") | |
for xmlChild in xmlEOAfacsimileparthead: | |
if xmlChild.tag == "hi": | |
xmlChild.tag = "em" | |
del xmlChild.attrib["rend"] | |
xmlEOAfacsimilepart.append(xmlEOAfacsimileparthead) | |
intChapterNumber += 1 | |
xmlEOAdocument.append(xmlEOAfacsimilepart) | |
xmlFacsimilepages = xmlPart.findall(".//EOAfacsimilepage") | |
intFacPageNumber = 1 | |
for xmlFacsimilepage in xmlFacsimilepages: | |
strImageFile = xmlFacsimilepage.find(".//file").text | |
strLabel = xmlFacsimilepage.find(".//label").text | |
strPagenumber = xmlFacsimilepage.find(".//pagenumber").text or "" | |
xmlEOAfacsimilepage = etree.Element("EOAfacsimilepage") | |
xmlEOAfacsimilepage.set("order", str(intObjectNumber)) | |
# TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren | |
strImageFile = strImageFile.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFile) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFile) | |
shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
intObjectNumber += 1 | |
# Download transcription for this Page | |
if xmlFacsimilepage.find(".//fulltext").text is not None: | |
print("Ein Link zum Volltext wurde gefunden") | |
strFacsimileURL = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[0] | |
strFacsimilePage = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[1] | |
for strMode in listModes: | |
strURL = strBasicURL + strFacsimileURL + "&pn=" + strFacsimilePage + "&mode=" + strMode | |
print("Processing Facsimile : " + strURL) | |
xmlECHOtree = etree.parse(strURL, parserECHO) | |
# Remove ECHO-namespaces | |
objectify.deannotate(xmlECHOtree, xsi_nil=True) | |
etree.cleanup_namespaces(xmlECHOtree) | |
xmlDivs = xmlECHOtree.findall(".//div") | |
for xmlDiv in xmlDivs: | |
if xmlDiv.get("class") == "pageContent": | |
# Create new EOA-Element | |
xmlEOAfacsimileelement = etree.Element("EOAfacsimileelement") | |
xmlEOAfacsimileelement.set("type", strMode) | |
# Fix Images in the <div>-Element | |
xmlImages = xmlDiv.findall(".//img") | |
intFacImgNumber = 1 | |
for xmlImage in xmlImages: | |
strImageSrc = xmlImage.get("src") | |
strCommand = "curl " + strImageSrc + " -o CONVERT/django/images/facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg" | |
listArguments = shlex.split(strCommand) | |
try: | |
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) | |
xmlImage.set("src", "facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg") | |
except: | |
xmlImage.tag = "temp" | |
intFacImgNumber += 1 | |
# Change of scr of img-Element | |
xmlEOAfacsimileelement.append(xmlDiv) | |
xmlEOAfacsimilepage.append(xmlEOAfacsimileelement) | |
intFacPageNumber += 1 | |
xmlEOAfacsimilepage.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfacsimilepage.set("label", str(strLabel)) | |
xmlEOAfacsimilepage.set("pagenumber", str(strPagenumber)) | |
xmlEOAfacsimilepart.append(xmlEOAfacsimilepage) | |
intFacNumber =+ 1 | |
etree.strip_tags(xmlDjangoTree, "temp") | |
print("----------------------------------------------") | |
print("Processing and linking Footnotes for django") | |
def bring_footnote_down_django(footnote, fragment, footnote_number, object_number, unique_id, destination): | |
""" | |
captures reusable behavior from the existing code | |
potentially, some of the old code could be replaced by calls to this helper | |
usage: intObjectNumber = bring_footnote_down_django(xmlFootnote, "fn"+str(intFootnoteNumber), str(intFootnoteNumber), intObjectNumber, tmpStrUID, xmlResult) | |
unfortunately, returning the result seemed like a better idea than mutating the global variable | |
""" | |
kids = list(footnote.getchildren()) | |
footnote_text = footnote.text or "" | |
replace_footnote_with_sup(footnote) | |
footnote.set("class", "footnote") | |
anchor = etree.Element("a") | |
anchor.set("href", "#" + fragment) # "fn" + str(intFootnoteNumber) | |
anchor.text = footnote_number # str(intFootnoteNumber) | |
footnote.append(anchor) | |
foot = etree.Element("EOAfootnote") | |
foot.set("order", str(object_number)) | |
object_number += 1 | |
foot.set("number", footnote_number) | |
anchor_number = next( | |
iter( | |
( | |
parent.get("order") | |
for parent | |
in footnote.iterancestors() | |
if parent.get("order") is not None | |
) | |
) | |
) | |
foot.set("anchor", anchor_number) | |
foot.set("id", unique_id) | |
foot.text = footnote_text | |
for kid in kids: | |
if "EOAequationnonumber" == kid.tag: | |
cwd = os.getcwd() | |
shutil.copy( | |
"%s/items/%s" % (cwd, kid.get("filename")), | |
"%s/CONVERT/django/images/" % cwd, | |
) | |
foot.append(kid) | |
destination.append(foot) | |
return object_number | |
# def bring_footnote_down_django ends here | |
xmlEOAchapters = xmlEOAdocument.findall(".//EOAchapter") | |
debug_chapters(xmlEOAchapters) | |
for xmlEOAchapter in xmlEOAchapters: | |
groupings = libeoaconvert.get_bigfoot_data(xmlEOAchapter) | |
has_old = 0 != len(xmlEOAchapter.findall(".//note")) | |
has_new = 0 != len( | |
[ # flatten | |
note | |
for grouping, notes in groupings | |
for note in notes | |
] | |
) | |
# XOR falls through, AND is an error (that should have already been thrown during the epub phase), and NOR skips to the next chapter | |
if has_old: | |
if has_new: | |
raise FootnoteError("This chapter contains both old-style footnotes and new-style footnotes") | |
else: | |
if not has_new: | |
continue | |
# Find out running order of last item the chapter | |
# Hier pro FN zunächst die EOAequationnonumber in <p> korrigieren | |
# Dann pro FN die Kindelemente abarbeiten und an die neue FN dran hängen | |
# Ggf. aufpassen, ob ein Absatz mit indent versehen ist, dann blockquote drum herum machen | |
xmlElement = xmlEOAchapter[(len(xmlEOAchapter)-1)] | |
print(etree.tostring(xmlElement)) | |
intObjectNumber = (int(xmlElement.get("order")) + 1) | |
intFootnoteNumber = 1 | |
xmlResult = etree.Element("temp") | |
xmlEOAsection = etree.Element("EOAsection") | |
xmlEOAsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = etree.Element("head") | |
xmlHead.text = libeoaconvert.dictLangFootnotes[xmlEOAchapter.get("language")] | |
xmlEOAsection.append(xmlHead) | |
xmlResult.append(xmlEOAsection) | |
for grouping, notes in groupings: | |
for index, note in enumerate(notes): | |
# do for the new-style notes what the old code did for the other footnotes | |
fntext = str(index+1) | |
if "lower-latin" == grouping: | |
fntext = alph_footnote_index(index) | |
unique_id = "fn%s" % fntext | |
intObjectNumber = bring_footnote_down_django(note, unique_id, fntext, intObjectNumber, unique_id, xmlResult) | |
intFootnoteNumber = 1 | |
xmlFootnotes = xmlEOAchapter.findall(".//note") | |
for xmlFootnote in xmlFootnotes: | |
xmlFootnoteContent = xmlFootnote.getchildren() | |
strFootnoteText = xmlFootnote.text or "" | |
tmpTail = xmlFootnote.tail | |
tmpStrUID = xmlFootnote.get("id") | |
xmlFootnote.clear() | |
xmlFootnote.tail = tmpTail | |
xmlFootnote.tag = "sup" | |
xmlFootnote.set("class", "footnote") | |
xmlFootnoteLink = etree.Element("a") | |
xmlFootnoteLink.set("href", "#fn" + str(intFootnoteNumber)) | |
xmlFootnoteLink.text = str(intFootnoteNumber) | |
xmlFootnote.append(xmlFootnoteLink) | |
xmlEOAfootnote = etree.Element("EOAfootnote") | |
xmlEOAfootnote.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAfootnote.set("number", str(intFootnoteNumber)) | |
for xmlParent in xmlFootnote.iterancestors(): | |
if xmlParent.get("order") is not None: | |
strFootnoteAnchorNumber = xmlParent.get("order") | |
break | |
xmlEOAfootnote.set("anchor", strFootnoteAnchorNumber) | |
xmlEOAfootnote.set("id", tmpStrUID) | |
xmlEOAfootnote.text = strFootnoteText | |
for xmlElement in xmlFootnoteContent: | |
if xmlElement.tag == "EOAequationnonumber": | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAfootnote.append(xmlElement) | |
xmlResult.append(xmlEOAfootnote) | |
intFootnoteNumber += 1 | |
xmlEOAchapter.append(xmlResult) | |
# Remove temp-Tag | |
etree.strip_tags(xmlDjangoTree, "temp") | |
# print("----------------------------------------------") | |
# print("Processing Verses") | |
# for xmlEOAchapter in xmlEOAchapters: | |
# verses = xmlEOAchapter.findall(".//EOAverse") | |
# print("Found lotsa verses: ", len(verses)) | |
print("----------------------------------------------") | |
print("Processing various Elements") | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlEmphasized = xmlEOAchapter.findall(".//hi") | |
for xmlEmph in xmlEmphasized: | |
if xmlEmph.get("rend") == "it": | |
xmlEmph.tag = "em" | |
del xmlEmph.attrib["rend"] | |
xmlHyperlinks = xmlEOAchapter.findall(".//xref") | |
for xmlHyperlink in xmlHyperlinks: | |
strURL = xmlHyperlink.get('url') | |
if strURL.startswith("http://") == False: | |
if strURL.startswith("https://") == False: | |
strURL = "http://" + strURL | |
xmlHyperlink.tag = "a" | |
del xmlHyperlink.attrib["url"] | |
xmlHyperlink.set("href", strURL) | |
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak']) | |
xmlHyperlink.text = strURL | |
# Convert bold text | |
xmlBolds = xmlEOAchapter.findall(".//hi") | |
for xmlBold in xmlBolds: | |
if xmlBold.get("rend") == "bold": | |
xmlBold.tag = "b" | |
del xmlBold.attrib["rend"] | |
# Convert EOAup to <sup> | |
xmlUps = xmlEOAchapter.findall(".//EOAup") | |
for xmlUp in xmlUps: | |
xmlUp.tag = "sup" | |
# Convert EOAdown to <sub> | |
xmlDowns = xmlEOAchapter.findall(".//EOAdown") | |
for xmlDown in xmlDowns: | |
xmlDown.tag = "sub" | |
# Convert EOAst to <span> | |
xmlStrikeouts = xmlEOAchapter.findall(".//EOAst") | |
for xmlStrikeout in xmlStrikeouts: | |
xmlStrikeout.tag = "span" | |
xmlStrikeout.set("style", "text-decoration: line-through;") | |
# Convert letter-spacing into something nice | |
xmlLetterspaceds = xmlEOAchapter.findall(".//EOAls") | |
for xmlLetterspaced in xmlLetterspaceds: | |
xmlLetterspaced.tag = "span" | |
xmlLetterspaced.set("style", "letter-spacing: 0.5em;") | |
# Convert letter-spacing into something nice | |
xmlCaps = xmlEOAchapter.findall(".//EOAcaps") | |
for xmlCap in xmlCaps: | |
xmlCap.tag = "span" | |
xmlCap.set("style", "font-variant:small-caps;") | |
# Convert EOAineq into appropriate IMG-Tags | |
xmlInlineEquations = xmlEOAchapter.findall(".//EOAineq") | |
for xmlInlineEquation in xmlInlineEquations: | |
xmlInlineEquation.tag = "img" | |
xmlInlineEquation.set("class", "EOAineq") | |
xmlInlineEquation.set("alt", "") | |
shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src")) | |
# Convert EOAinline into appropriate IMG-Tags | |
xmlInlineElements = xmlEOAchapter.findall(".//EOAinline") | |
for xmlInlineElement in xmlInlineElements: | |
xmlInlineElement.tag = "img" | |
xmlInlineElement.set("class", "EOAinline") | |
xmlInlineElement.set("alt", "") | |
xmlInlineElement.set("class", "eoainlineimage") | |
strInlineElementFilePath = xmlInlineElement.text | |
strInlineElementFileName = os.path.basename(strInlineElementFilePath) | |
strInlineElementDirName = os.path.dirname(strInlineElementFilePath) | |
xmlInlineElement.text = None | |
xmlInlineElement.set("src", strInlineElementDirName + strInlineElementFileName) | |
shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName) | |
strNewImagePath = os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName | |
strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
# Change EOAcitenumeric into a span to create approriate link | |
xmlEOAcitenumerics = xmlEOAchapter.findall(".//EOAcitenumeric") | |
for xmlEOAcitenumeric in xmlEOAcitenumerics: | |
xmlEOAcitenumeric.tag = "span" | |
xmlEOAcitenumeric.set("class", "citation") | |
xmlEOAcitenumeric.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAciteauthoryears = xmlEOAchapter.findall(".//EOAciteauthoryear") | |
for xmlEOAciteauthoryear in xmlEOAciteauthoryears: | |
xmlEOAciteauthoryear.tag = "span" | |
xmlEOAciteauthoryear.set("class", "citation") | |
xmlEOAciteauthoryear.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAciteyears = xmlEOAchapter.findall(".//EOAciteyear") | |
for xmlEOAciteyear in xmlEOAciteyears: | |
xmlEOAciteyear.tag = "span" | |
xmlEOAciteyear.set("class", "citation") | |
xmlEOAciteyear.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAcitemanuals = xmlEOAchapter.findall(".//EOAcitemanual") | |
for xmlEOAcitemanual in xmlEOAcitemanuals: | |
xmlEOAcitemanual.tag = "span" | |
xmlEOAcitemanual.set("class", "citation") | |
xmlEOAcitemanual.set("rel", "popover") | |
print("----------------------------------------------") | |
print("Processing Cross References") | |
# Substitute References with their targets (wit links) | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlReferences = xmlEOAchapter.findall(".//EOAref") | |
for xmlReference in xmlReferences: | |
strResult = "!!! Cross Reference !!!" | |
strChapterOrder = "" | |
strObjectOrder = "" | |
xmlReferenceLabel = xmlReference.find("Label") | |
xmlReferenceLabelText = xmlReferenceLabel.text | |
xmlReferenceRef = xmlReference.find("ref") | |
xmlReferenceRefTarget = xmlReferenceRef.get("target") | |
if xmlReferenceLabelText in dictEquations: | |
# Grab Number from Dictionary | |
strResult = dictEquations[xmlReferenceLabelText] | |
# Go through all equations and find the corresponding Equation | |
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation") | |
for xmlEOAequation in xmlEOAequations: | |
tmpReferenceLabelText = xmlEOAequation.get("label") | |
if xmlReferenceLabelText == tmpReferenceLabelText: | |
print("Erfolgreich Verweis auf Array-Formel gefunden:" + strResult) | |
for xmlParent in xmlEOAequation.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAequation.get("order") | |
if xmlReferenceRefTarget in dictEquations: | |
# Grab Number from Dictionary | |
strResult = dictEquations[xmlReferenceRefTarget] | |
# Go through all equations and find the corresponding Equation | |
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation") | |
for xmlEOAequation in xmlEOAequations: | |
tmpReferenceRefTarget = xmlEOAequation.get("uid") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print("Erfolgreich Verweis auf normale Formel gefunden: " + strResult) | |
for xmlParent in xmlEOAequation.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAequation.get("order") | |
if xmlReferenceRefTarget in dictLists: | |
print("Verweis auf Liste gefunden") | |
strResult = dictLists[xmlReferenceRefTarget] | |
xmlEOAlistitem = xmlEOAdocument.xpath("//EOAchapter/*[contains(@id, $targetuid)]", targetuid = xmlReferenceRefTarget)[0] | |
for xmlParent in xmlEOAlistitem.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAlistitem.get("order") | |
if xmlReferenceRefTarget in dictChapters: | |
print("Verweis auf Kapitel gefunden") | |
strResult = dictChapters[xmlReferenceRefTarget] | |
for xmlEOAchapter in xmlEOAdocument.findall(".//EOAchapter"): | |
if xmlEOAchapter.get("id") == xmlReferenceRefTarget: | |
print("Erfolgreich Verweis auf ein Kapitel bearbeitet: " + strResult) | |
strObjectOrder = "top" | |
strChapterOrder = xmlEOAchapter.get("order") | |
if xmlReferenceRefTarget in dictTheorems: | |
print("Verweis auf ein Theorem gefunden") | |
strResult = dictTheorems[xmlReferenceRefTarget] | |
for xmlEOAtheorem in xmlEOAdocument.findall(".//EOAtheorem"): | |
if xmlEOAtheorem.get("uid") == xmlReferenceRefTarget: | |
print("Erfolgrech Verweis auf ein Theorem bearbeitet: " + strResult) | |
for xmlParent in xmlEOAtheorem.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strObjectOrder = xmlEOAtheorem.get("order") | |
strChapterOrder = xmlParent.get("order") | |
if xmlReferenceRefTarget in dictSections: | |
print("Verweis auf Section gefunden") | |
strResult = dictSections[xmlReferenceRefTarget] | |
xmlEOAsections = xmlEOAdocument.findall(".//EOAsection") | |
for xmlEOAsection in xmlEOAsections: | |
tmpReferenceRefTarget = xmlEOAsection.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print("Erfolgreich Verweis auf eine Section bearbeitet: " + strResult) | |
for xmlParent in xmlEOAsection.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAsection.get("order") | |
xmlEOAsubsections = xmlEOAdocument.findall(".//EOAsubsection") | |
for xmlEOAsubsection in xmlEOAsubsections: | |
tmpReferenceRefTarget = xmlEOAsubsection.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print("Erfolgreich Verweis auf eine Sub-Section bearbeitet: " + strResult) | |
for xmlParent in xmlEOAsubsection.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAsubsection.get("order") | |
if xmlReferenceRefTarget in dictFigures: | |
print("Verweis auf Abbildung gefunden") | |
strResult = dictFigures[xmlReferenceRefTarget] | |
xmlEOAfigures = xmlEOAdocument.findall(".//EOAfigure") | |
for xmlEOAfigure in xmlEOAfigures: | |
tmpReferenceRefTarget = xmlEOAfigure.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print("Erfolgreich Verweis auf eine Abbildung bearbeitet: " + strResult) | |
for xmlParent in xmlEOAfigure.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAfigure.get("order") | |
if xmlReferenceRefTarget in dictFootnotes: | |
print("Verweis auf Fussnote gefunden") | |
strResult = dictFootnotes[xmlReferenceRefTarget] | |
xmlEOAfootnotes = xmlEOAdocument.findall(".//EOAfootnote") | |
for xmlEOAfootnote in xmlEOAfootnotes: | |
tmpReferenceRefTarget = xmlEOAfootnote.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
print("Erfolgreich Verweis auf eine Fussnote bearbeitet: " + strResult) | |
for xmlParent in xmlEOAfootnote.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAfootnote.get("order") | |
if xmlReferenceLabelText in dictTables: | |
print("Verweis auf Tabelle gefunden") | |
strResult = dictTables[xmlReferenceLabelText] | |
xmlEOAtables = xmlEOAdocument.findall(".//EOAtable") | |
for xmlEOAtable in xmlEOAtables: | |
tmpReferenceRefTarget = xmlEOAtable.get("label") | |
if xmlReferenceLabelText == tmpReferenceRefTarget: | |
print("Erfolgreich Verweis auf eine Tabelle bearbeitet:" + strResult) | |
for xmlParent in xmlEOAtable.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAtable.get("order") | |
tmpTail = xmlReference.tail or "" | |
xmlReference.clear() | |
xmlReference.text = strResult | |
xmlReference.tail = tmpTail | |
xmlReference.tag = "a" | |
xmlReference.set("href", "../" + strChapterOrder + "/index.html#" + strObjectOrder) | |
print("----------------------------------------------") | |
print("Processing Page References") | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlPageReferences = xmlEOAchapter.findall(".//EOApageref") | |
strResult = "!!! Page Reference !!!" | |
for xmlReference in xmlPageReferences: | |
xmlReferenceLabel = xmlReference.find("Label") | |
xmlReferenceLabelText = xmlReferenceLabel.text | |
xmlReferenceRef = xmlReference.find("ref") | |
xmlReferenceRefTarget = xmlReferenceRef.get("target") | |
if xmlReferenceLabelText in dictPagelabels: | |
print("Verweis auf Seite gefunden: " + xmlReferenceLabelText) | |
strResult = dictPagelabels[xmlReferenceLabelText] | |
xmlReference.text = strResult | |
for xmlChild in xmlReference.iterchildren(): | |
xmlReference.remove(xmlChild) | |
# Check, if EOApageref points to a Facsimile-Page | |
# If yes, make a href to the facsimile | |
xmlEOAfacsimilepages = xmlEOAdocument.findall(".//EOAfacsimilepage") | |
for xmlEOAfacsimilepage in xmlEOAfacsimilepages: | |
if xmlEOAfacsimilepage.get("label") == xmlReferenceLabelText: | |
print("Querverweis auf ein Facsimile gefunden") | |
xmlReference.tag = "a" | |
strPartOrder = xmlEOAfacsimilepage.getparent().get("order") | |
strFacsimileOrder = xmlEOAfacsimilepage.get("order") | |
print(strFacsimileOrder) | |
xmlReference.set("href", "../" + strPartOrder + "/" + strFacsimileOrder + ".html") | |
print("----------------------------------------------") | |
print("Normalizing Index Entries") | |
for xmlEOAchapter in xmlEOAchapters: | |
xml_EOA_indices = xmlEOAchapter.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xml_EOA_indices: | |
# Using the gettext function here, because of subelements | |
# strEOAindextext = xmlEOAindex.text | |
strEOAindextext = gettext(xmlEOAindex) | |
strEOAindextext = strEOAindextext.replace("\n", " ") | |
index_children = xmlEOAindex.getchildren() | |
if index_children is not None: | |
for sub_element in index_children: | |
xmlEOAindex.remove(sub_element) | |
xmlEOAindex.text = None | |
listFirstPart = re.split('\|', strEOAindextext) | |
tmpEntry = listFirstPart[0] | |
listSecondPart = re.split('\!', tmpEntry) | |
strMainEntry = listSecondPart[0] | |
# Check if a sortkey is present via @ | |
listSortKey = re.split('@', strMainEntry) | |
if len(listSortKey) == 2: | |
xmlEOAindex.set("main", listSortKey[0]) | |
xmlEOAindex.set("display", listSortKey[1]) | |
else: | |
xmlEOAindex.set("main", strMainEntry) | |
if len(listSecondPart) > 1: | |
strSecondPart = listSecondPart[1] | |
listSecondarySortkey = re.split('@', strSecondPart) | |
if len(listSecondarySortkey) == 2: | |
xmlEOAindex.set("secondary", listSecondarySortkey[0]) | |
xmlEOAindex.set("secondarydisplay", listSecondarySortkey[1]) | |
else: | |
xmlEOAindex.set("secondary", strSecondPart) | |
if len(listFirstPart) > 1: | |
strAddition = listFirstPart[1] | |
if strAddition == "textbf": | |
xmlEOAindex.set("bold", "true") | |
tmpseealso = re.match('seealso', strAddition) | |
if tmpseealso != None: | |
tmpAddition = re.sub('seealso', '', strAddition) | |
xmlEOAindex.set("seealso", tmpAddition) | |
# Entries containing seealso are omitted for the time being | |
xmlEOAindex.tag = "temp" | |
tmpsee = re.match('^see(?!also)', strAddition) | |
if tmpsee != None: | |
tmpAddition = re.sub('see', '', strAddition) | |
xmlEOAindex.set("see", tmpAddition) | |
# Entries containing seealso are omitted for the time being | |
xmlEOAindex.tag = "temp" | |
# Figure out parent chapter number and parent Element order | |
for xmlParent in xmlEOAindex.iterancestors(): | |
if xmlParent.get("order") != None and xmlParent.tag != "EOAchapter": | |
xmlEOAindex.set("elementorder", xmlParent.get("order")) | |
if xmlParent.get("order") != None and xmlParent.tag == "EOAchapter": | |
xmlEOAindex.set("chapterorder", xmlParent.get("order")) | |
# print(etree.tostring(xmlEOAindex)) | |
etree.strip_tags(xmlDjangoTree, "temp") | |
print("----------------------------------------------") | |
print("Removing Duplicate Index Entries") | |
for xmlEOAchapter in xmlEOAchapters: | |
for xmlChild in xmlEOAchapter.iterchildren(): | |
dictEntries = {} | |
xml_EOA_indices = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xml_EOA_indices: | |
listEntry = [] | |
strEntry = xmlEOAindex.get("main") | |
if strEntry in dictEntries: | |
strSubentry = xmlEOAindex.get("secondary") | |
if strSubentry in dictEntries[strEntry] or strSubentry == None: | |
if (xmlChild.get("see") is None) and (xmlChild.get("seealso") is None): | |
xmlEOAindex.tag = "temp" | |
else: | |
dictEntries[strEntry].append(strSubentry) | |
else: | |
dictEntries[strEntry] = listEntry | |
print("----------------------------------------------") | |
print("Removing Index Entries in Footnotes") | |
for xmlEOAchapter in xmlEOAchapters: | |
for xmlChild in xmlEOAchapter.iterchildren(): | |
dictEntries = {} | |
xml_EOA_indices = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xml_EOA_indices: | |
for xmlParent in xmlEOAindex.iterancestors(): | |
if xmlParent.tag == "EOAfootnote": | |
xmlEOAindex.tag = "temp" | |
print("Ding Index in Footnote") | |
print("----------------------------------------------") | |
print("Sorting and Creating Regular Index") | |
xml_regular_EOAindices = xmlDjangoTree.findall("//EOAindex") | |
if len(xml_regular_EOAindices) != 0:# is not None: | |
print("Sorting " + str(len(xml_regular_EOAindices)) + " entries for regular index.") | |
xml_eoa_print_regular_index = make_index(xml_regular_EOAindices, index_type = "regular") | |
# If EOAprintindex is found, append xml_eoa_print_regular_index to xmlEOAdocument | |
xmlPrintindex = xmlDjangoTree.find(".//EOAprintindex") | |
if xmlPrintindex is not None != 0: | |
# Remove <p><EOAprintindex/></p> from xmlDjangoTree | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xml_eoa_print_regular_index) | |
print("----------------------------------------------") | |
print("Sorting and Creating Person Index") | |
xml_person_EOAindices = xmlDjangoTree.findall("//EOAindexperson") | |
if len(xml_person_EOAindices) != 0:# is not None: | |
xml_eoa_print_person_index = make_index(xml_person_EOAindices, index_type = "person") | |
# If EOAprintpersonindex is found, append xml_eoa_print_person_index to xmlEOAdocument | |
xmlPrintindex = xmlDjangoTree.find(".//EOAprintpersonindex") | |
if xmlPrintindex is not None != 0: | |
# Remove <p><EOAprintindex/></p> from xmlDjangoTree | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xml_eoa_print_person_index) | |
# doing the same for location index | |
print("----------------------------------------------") | |
print("Sorting and Creating Location Index") | |
xml_location_EOAindices = xmlDjangoTree.findall("//EOAindexlocation") | |
if len(xml_location_EOAindices) != 0:# is not None: | |
xml_eoa_print_location_index = make_index(xml_location_EOAindices, index_type = "location") | |
# If EOAprintlocationindex is found, append xml_eoa_print_location_index to xmlEOAdocument | |
xmlPrintindex = xmlDjangoTree.find(".//EOAprintlocationindex") | |
if xmlPrintindex is not None != 0: | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xmlEOAprintindex) | |
############################################################################ | |
# Cleaning up # | |
############################################################################ | |
# TODO: Die unnötigen Attribute wie id löschen | |
# TODO: Die unnötigen Tags wie EOAlabel löschen | |
etree.strip_tags(xmlDjangoTree, "temp", "citetext", "EOAprintbibliography") | |
etree.strip_elements(xmlDjangoTree, "citekey", with_tail=False) | |
etree.strip_attributes(xmlDjangoTree, "id-text", "id", "noindent", "type", "label", "spacebefore", "rend") | |
############################################################################ | |
# Save xmlDjangoTree # | |
############################################################################ | |
tmpFile = open("CONVERT/django/Django.xml", "w") | |
tmpResult = etree.tostring(xmlDjangoTree, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
print("Wrote Django.xml") |