Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
EOASkripts/imxml2django.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
1536 lines (1418 sloc)
74.8 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8; mode: python -*- | |
# Time-stamp: <2018-03-19 16:07:01 (kthoden)> | |
import pickle | |
import os | |
import sys | |
import re | |
import shutil | |
import shlex | |
import subprocess | |
import argparse | |
import configparser | |
import libeoaconvert | |
import logging | |
from copy import deepcopy | |
from lxml import etree | |
##################### | |
# Parsing arguments # | |
##################### | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-c", "--config", dest="CONFIG_FILE", help="Name of configuration file", metavar="CONFIGURATION") | |
args = parser.parse_args() | |
if args.CONFIG_FILE is not None: | |
CONFIG_FILE = os.path.abspath(args.CONFIG_FILE) | |
else: | |
CONFIG_FILE = os.path.dirname(sys.argv[0]) + "/config/eoaconvert.cfg" | |
################################## | |
# Reading the configuration file # | |
################################## | |
CONFIG = configparser.ConfigParser() | |
CONFIG.read(CONFIG_FILE) | |
###################### | |
# Setting up logging # | |
###################### | |
LOGFILE = CONFIG['General']['logfile'] | |
LOGLEVEL = CONFIG['General']['loglevel'] | |
logging.basicConfig(level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s') | |
logging.debug("The configfile is%s." % CONFIG_FILE) | |
######################## | |
# Paths to executables # | |
######################## | |
GM_PATH = CONFIG['Executables']['graphicsmagic'] | |
TL_PATH = CONFIG['Executables']['texlive'] | |
########################################### | |
# Loading data from first conversion step # | |
########################################### | |
with open('tmp_files/data.pickle', 'rb') as f: | |
data = pickle.load(f) | |
dictChapters = data["chapterdict"] | |
dictEquations = data["eqdict"] | |
dictLists = data["listdict"] | |
dictTheorems = data["theoremdict"] | |
dictSections = data["secdict"] | |
dictFigures = data["figdict"] | |
dictFootnotes = data["fndict"] | |
dictTables = data["tabdict"] | |
dictPagelabels = data["pagelabeldict"] | |
xmlTree = etree.parse("tmp_files/IntermediateXMLFile.xml") | |
print(""" | |
############################################################################ | |
# Convert tralics-XML to Django Data Structure # | |
############################################################################ | |
""") | |
# Create django File Structure | |
if os.path.exists(os.getcwd() + "/CONVERT/django") == False: | |
os.mkdir(os.getcwd() + "/CONVERT/django") | |
os.mkdir(os.getcwd() + "/CONVERT/django/images") | |
os.mkdir(os.getcwd() + "/CONVERT/django/images/embedded") | |
os.mkdir(os.getcwd() + "/CONVERT/django/files") | |
# Create empty xmlTree | |
xmlEOAdocument = etree.Element("EOAdocument") | |
xmlDjangoTree = etree.ElementTree(xmlEOAdocument) | |
etree.strip_attributes(xmlTree, "noindent") | |
# Remove temp-Tag | |
etree.strip_tags(xmlTree, "temp") | |
# Write Temporary XML-Maintree | |
ergebnisdatei = open("tmp_files/Devel_django.xml", "w") | |
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode") | |
ergebnisdatei.write(ergebnis) | |
ergebnisdatei.close() | |
# Find all Chapters from the original tralics XML | |
xmlChapters = xmlTree.findall("//div1") | |
def replace_footnote_with_sup(note): | |
""" | |
captures reusable behavior from the existing code | |
potentially, some of the old code could be replaced by calls to this helper | |
this behavior showed up in a few places | |
I thought I would be able to extract a little more, but this was all that was actually common | |
""" | |
tail = note.tail | |
note.clear() | |
note.tail = tail | |
note.tag = "sup" | |
# def replace_footnote_with_sup ends here | |
def alph_footnote_index(fndex): | |
""" | |
lowercase Latin footnotes need to support more than 26 values | |
These are zero-indexed. | |
>>> alph_footnote_index(0) | |
'a' | |
>>> alph_footnote_index(1) | |
'b' | |
>>> alph_footnote_index(24) | |
'y' | |
>>> alph_footnote_index(25) | |
'z' | |
>>> alph_footnote_index(26) | |
'aa' | |
>>> alph_footnote_index(27) | |
'ab' | |
""" | |
alphabet = "abcdefghijklmnopqrstuvwxyz" | |
quotient, remainder = divmod(fndex, len(alphabet)) | |
if not quotient: return alphabet[fndex] | |
return alph_footnote_index(quotient - 1) + alph_footnote_index(remainder) | |
# def alph_footnote_index ends here | |
def debug_chapters(xmlEOAchapters): | |
"""Write individual chapters to files""" | |
chap_num = 1 | |
for chapter in xmlEOAchapters: | |
tmp_filename = "%s/debug/debug-chapter-%02d.xml" % (os.getcwd(), chap_num) | |
tmp_file = open (tmp_filename, "w") | |
tmp_result = etree.tostring(chapter, pretty_print=True, encoding="unicode") | |
tmp_file.write(tmp_result) | |
tmp_file.close() | |
chap_num += 1 | |
# def debug_chapters ends here | |
def gettext(xmlElement): | |
"""Maintain text and strip subchildren""" | |
xmlText = xmlElement.text or "" | |
for xmlChild in xmlElement: | |
xmlText += gettext(xmlChild) | |
if xmlChild.tail: | |
xmlText += xmlChild.tail | |
return xmlText | |
# def gettext ends here | |
def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid=None): | |
# Get Dictionaries of Numbers via Global Variables | |
global dictChapters | |
global dictFigures | |
global dictEquations | |
global dictSections | |
global dictFootnotes | |
global dictPagelabels | |
global dictTables | |
global dictLists | |
global intObjectNumber | |
# Check what kind of Element we have and change the data | |
if isinstance(xmlElement.tag, str): | |
if xmlElement.tag == "EOAtranscripted": | |
xmlResult = etree.Element("temp") | |
xmlEOATranscription = etree.Element("EOAtranscription") | |
xmlEOATranscription.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlLeftheader = xmlElement.find(".//Leftheader") | |
etree.strip_tags(xmlLeftheader, "p") | |
xmlEOATranscription.append(xmlLeftheader) | |
xmlRightheader = xmlElement.find(".//Rightheader") | |
etree.strip_tags(xmlRightheader, "p") | |
xmlEOATranscription.append(xmlRightheader) | |
xmlTranscriptedtext = xmlElement.find(".//EOAtranscriptedtext") | |
# change \n\n into </p><p> and pagebreak intto </p><pagebreak><p> to create some valid markup | |
strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode") | |
#strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext)) | |
#strTranscriptedtext = re.sub (r"<p><pagebreak/></p>", "<pagebreak/>", strTranscriptedtext) | |
xmlLeftColumn = etree.Element("EOAtranscriptionleft") | |
xmlRightColumn = etree.Element("EOAtranscriptionright") | |
boolRightColumn = False | |
xmlTemp = etree.XML(str(strTranscriptedtext)) | |
for xmlElement in xmlTemp.iterchildren(): | |
if xmlElement.tag == "pagebreak": | |
boolRightColumn = True | |
continue | |
if boolRightColumn == False: | |
xmlLeftColumn.append(xmlElement) | |
if boolRightColumn == True: | |
xmlRightColumn.append(xmlElement) | |
xmlEOATranscription.append(xmlLeftColumn) | |
xmlEOATranscription.append(xmlRightColumn) | |
# Convert Images within the transcription | |
logging.debug("EOAfigurenonumber") | |
xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber") | |
logging.debug(xmlFigures) | |
if xmlFigures is not None: | |
for xmlFigure in xmlFigures: | |
strImageFileString = xmlFigure.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
tmpStrTail = xmlFigure.tail | |
xmlFigure.clear() | |
xmlFigure.tag = "img" | |
xmlFigure.set("src", strImageFileDir + strImageFileName) | |
xmlFigure.set("alt", "") | |
xmlResult.append(xmlEOATranscription) | |
elif xmlElement.tag == "EOAletterhead": | |
xmlResult = etree.Element("temp") | |
xmlEOAletterhead = etree.Element("EOAletterhead") | |
xmlEOAletterrecipient = xmlElement.find(".//Recipient") | |
xmlEOAletterhead.append(xmlEOAletterrecipient) | |
xmlEOAletterarchive = xmlElement.find(".//Archive") | |
xmlEOAletterhead.append(xmlEOAletterarchive) | |
xmlEOAletteradditional = xmlElement.find(".//Additional") | |
xmlEOAletterhead.append(xmlEOAletteradditional) | |
xmlEOAletterpages = xmlElement.find(".//Pages") | |
xmlEOAletterhead.append(xmlEOAletterpages) | |
xmlEOAletterhead.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlEOAletterhead) | |
elif xmlElement.tag == "EOAfigurenonumber": | |
# elif xmlElement.findall(".//EOAfigurenonumber"): | |
xmlResult = etree.Element("temp") | |
# Create basic Element EOAfigurenonumber | |
xmlEOAfigure = etree.Element("EOAfigurenonumber") | |
# Copy Image | |
strImageFileString = xmlElement.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") | |
xmlEOAfigure.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlEOAfigure) | |
elif xmlElement.tag == "EOAfigure": | |
xmlResult = etree.Element("temp") | |
# Create basic Element EOAfigure | |
xmlEOAfigure = etree.Element("EOAfigure") | |
# Copy Image | |
strImageFileString = xmlElement.find(".//file").text | |
strImageFileString = strImageFileString.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFileString) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFileString) | |
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] | |
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
logging.debug("Django figure %s." % strImageFileName) | |
# yellow | |
if os.path.splitext(strImageFileName)[1].lower() == ".pdf": | |
logging.debug("Found a PDF file") | |
strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH) | |
xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".png")) | |
logging.debug("The filename is %s" % xmlEOAfigure.get("file")) | |
else: | |
xmlEOAfigure.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") | |
xmlEOAfigure.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
# Insert visual Number and uid | |
strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")] | |
xmlEOAfigure.set("number", strFigureNumber) | |
strFigureUID = xmlElement.find(".//anchor").get("id") | |
xmlEOAfigure.set("id", strFigureUID) | |
# Insert Caption | |
xmlEOAfigure.append(xmlElement.find(".//caption")) | |
xmlResult.append(xmlEOAfigure) | |
elif xmlElement.findall(".//EOAtable"): | |
xmlResult = etree.Element("EOAtable") | |
xmlRawTable = xmlElement.find(".//table") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlRawTable) | |
# Copy Number, Label and Caption | |
if xmlElement.find(".//EOAtablecaption").text != "nonumber": | |
xmlResult.append(xmlElement.find(".//EOAtablecaption")) | |
xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text) | |
xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text]) | |
xmlResult.set("id", xmlRawTable.get("id")) | |
else: | |
xmlElement.set("numbering", "false") | |
#if xmlElement.find(".//EOAtablelabel").text is not None: | |
# Transform width of Columns | |
strColumnString = xmlElement.find(".//EOAtablecolumns").text | |
strColumnString = re.sub(r"\|", "", strColumnString) | |
reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString) | |
intTableWidth = 0 | |
listColumnAlignments = [None] | |
listColumnWidths = [None] | |
intNumberOfColumns = 0 | |
for strColumnDefinition in reMatchObjects: | |
strColumnDefinition = strColumnDefinition.rstrip("cm") | |
strColumnAlignment = strColumnDefinition[0] | |
if strColumnAlignment == "L": | |
strColumnAlignment = "left" | |
if strColumnAlignment == "C": | |
strColumnAlignment = "center" | |
if strColumnAlignment == "R": | |
strColumnAlignment = "right" | |
listColumnAlignments.append(strColumnAlignment) | |
intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75) | |
listColumnWidths.append(intColumnWidth) | |
intTableWidth += intColumnWidth | |
intNumberOfColumns += 1 | |
xmlRawTable.set("width", str(intTableWidth)) | |
# Figure out and deal with the Header | |
xmlHeader = xmlRawTable.find(".//row/cell/tableheader") | |
if xmlHeader is not None: | |
xmlHeader.text = "" | |
xmlHeader.getparent().text = xmlHeader.tail | |
xmlHeader.getparent().remove(xmlHeader) | |
xmlFirstRow = xmlRawTable.find(".//row") | |
xmlFirstRow.tag = "tr" | |
xmlFirstRowCells = xmlFirstRow.findall(".//cell") | |
for xmlFirstRowCell in xmlFirstRowCells: | |
xmlFirstRowCell.tag = "th" | |
# Now Deal with the rest of the rows | |
xmlTableRows = xmlRawTable.findall(".//row") | |
for xmlTableRow in xmlTableRows: | |
xmlTableCells = xmlTableRow.findall(".//cell") | |
intCurrentColumn = 1 | |
for xmlTableCell in xmlTableCells: | |
xmlTableCell.tag = "td" | |
xmlTableCell.set("align",listColumnAlignments[intCurrentColumn]) | |
xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";") | |
# Deal with multicolumn | |
if xmlTableCell.get("cols") is not None: | |
xmlTableCell.set("colspan", xmlTableCell.get("cols")) | |
if intCurrentColumn > len(xmlTableCells): | |
intCurrentColumn = 1 | |
# Deal with multicolumn again, increase intCurrentColumn by the columns being spanned | |
elif xmlTableCell.get("cols") is not None: | |
intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols")) | |
del xmlTableCell.attrib["cols"] | |
else: | |
intCurrentColumn += 1 | |
xmlTableRow.tag = "tr" | |
xmlTableRow.set("valign", "top") | |
elif xmlElement.tag == "list" and xmlElement.get('type') != 'description': | |
xmlResult = etree.Element("temp") | |
if xmlElement.get('type') == 'ordered': | |
# Change first item into EOAlistfirstitem | |
xmlFirstItem = xmlElement.find("..//item") | |
xmlFirstItemElement = xmlFirstItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber=xmlFirstItem.get("id-text"), uid=xmlFirstItem.get("id"))) | |
# Process Child Elements which are Part of this item | |
if len(xmlFirstItem.getchildren()) >= 1: | |
for xmlChild in xmlFirstItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlFirstItem.getparent().remove(xmlFirstItem) | |
# Process remaining items in this list | |
tmpIntNumber = 2 | |
for xmlItem in xmlElement.iterchildren(): | |
xmlItemElement = xmlItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=xmlItem.get("id-text"), uid=xmlItem.get("id"))) | |
tmpIntNumber += 1 | |
if len(xmlItem.getchildren()) >= 1: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild, indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
if xmlElement.get('type') == 'simple': | |
xml_first_child = xmlElement.getchildren()[0] | |
if xml_first_child.tag == 'item': | |
logging.debug("a simple list with no special items") | |
# Change first item into EOAlistfirstitem | |
xmlFirstItem = xmlElement.find("..//item") | |
xmlFirstItemElement = xmlFirstItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-")) | |
# Process Child Elements which are Part of this item | |
if len(xmlFirstItem.getchildren()) >= 1: | |
logging.debug("len xmlFirstItem.getchildren is greater or equal 1") | |
for xmlChild in xmlFirstItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlFirstItem.getparent().remove(xmlFirstItem) | |
for xmlItem in xmlElement.iterchildren(): | |
xmlItemElement = xmlItem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) | |
if len(xmlItem.getchildren()) >= 1: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
############# | |
# Baustelle # | |
############# | |
elif xml_first_child.tag == 'label': | |
logging.debug("a simple list with named items") | |
# Change first item into EOAlistfirstitem | |
xmlFirstItem = xmlElement.find("..//item") | |
xmlFirstItemElement = xmlFirstItem.getchildren()[0] | |
logging.debug(xmlFirstItemElement.text) | |
# debugging | |
logging.debug(etree.tostring(xmlFirstItemElement)) | |
# end of debugging | |
xml_first_label = xmlElement.find("..//label") | |
listnumber_text = xml_first_label.text | |
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered custom", listnumber=listnumber_text)) | |
logging.debug("The length of the children of the first item: %s." % len(xmlFirstItem.getchildren())) | |
# Process Child Elements which are Part of this item | |
if len(xmlFirstItem.getchildren()) >= 1: | |
logging.debug("len xmlFirstItem.getchildren is greater or equal 1") | |
for xmlChild in xmlFirstItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlFirstItem.getparent().remove(xmlFirstItem) | |
xml_first_label.getparent().remove(xml_first_label) | |
all_the_labels = xmlElement.findall("label") | |
all_the_items = xmlElement.findall("item") | |
logging.debug("itemlength %s." % len(all_the_items)) | |
logging.debug("labellength %s." % len(all_the_labels)) | |
for listlabel, listitem in zip(all_the_labels, all_the_items): | |
logging.debug("listitem text %s." % listitem.text) | |
logging.debug("listlabel text %s." % listlabel.text) | |
xml_item_element = listitem.getchildren()[0] | |
xmlResult.append(djangoParseObject(xml_item_element, indent=True, listnumber=listlabel.text)) | |
listlabel.getparent().remove(listlabel) | |
listitem.getparent().remove(listitem) | |
# for xmlItem in xmlElement.iterchildren(): | |
# print("So many items have we: ", len(xmlItem)) | |
# xmlItemElement = xmlItem.getchildren()[0] | |
# xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) | |
# if len(xmlItem.getchildren()) >= 1: | |
# for xmlChild in xmlItem.iterchildren(): | |
# xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
# xmlItem.getparent().remove(xmlItem) | |
################## | |
# Ende Baustelle # | |
################## | |
elif xmlElement.tag == "list" and xmlElement.get('type') == 'description': | |
logging.debug("A description") | |
xmlResult = etree.Element("temp") | |
while len(xmlElement.getchildren()) != 0: | |
xmlDescription = etree.Element("EOAdescription") | |
xmlDescription.set("order", str(intObjectNumber)) | |
xmlLabel = xmlElement.getchildren()[0] | |
xmlItem = xmlElement.getchildren()[1] | |
if len(xmlItem.getchildren()) > 0: | |
xmlContent = xmlItem.getchildren()[0] | |
else: | |
xmlContent = etree.Element("p") | |
xmlLabel.tag = "description" | |
xmlDescription.append(xmlLabel) | |
xmlDescription.append(xmlContent) | |
xmlResult.append(xmlDescription) | |
intObjectNumber += 1 | |
if len(xmlItem.getchildren()) > 0: | |
for xmlChild in xmlItem.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild,indent=True)) | |
xmlItem.getparent().remove(xmlItem) | |
elif xmlElement.tag == "theorem": | |
xmlTheoremHead = xmlElement.find(".//head") | |
xmlTheoremText = xmlElement.find(".//p") | |
strTheoremNumber = xmlElement.get("id-text") | |
strTheoremID = xmlElement.get("id") | |
xmlResult = etree.Element("EOAtheorem") | |
xmlResult.append(xmlTheoremHead) | |
xmlResult.append(xmlTheoremText) | |
xmlResult.set("order", str(intObjectNumber)) | |
xmlResult.set("number", strTheoremNumber) | |
xmlResult.set("uid", strTheoremID) | |
intObjectNumber += 1 | |
elif xmlElement.findall(".//EOAequationarray"): | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlElement.findall(".//EOAequation"): | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", xmlEquation.get("number")) | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
if xmlEquation.get("label") is not None: | |
xmlEOAequation.set("label", xmlEquation.get("label")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
if xmlEquation.get("label") is not None: | |
xmlEOAequation.set("label", xmlEquation.get("label")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.findall(".//EOAequationarraynonumber"): | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"): | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", "") | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.tag == "EOAequationnonumber": | |
# Process one EOAequation which is not encapsulated | |
xmlResult = etree.Element("EOAequation") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.set("filename", xmlElement.get("filename")) | |
xmlResult.set("TeX", xmlElement.get("TeX")) | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlResult.set("number", "") | |
elif xmlElement.findall(".//EOAequation"): | |
# Process various Equations which may be encapsulated within <p> | |
xmlEquations = xmlElement.findall(".//EOAequation") | |
xmlResult = etree.Element("temp") | |
for xmlEquation in xmlEquations: | |
# Create basic Element EOAequation | |
xmlEOAequation = etree.Element("EOAequation") | |
xmlEOAequation.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAequation.set("number", xmlEquation.get("number")) | |
xmlEOAequation.set("TeX", xmlEquation.get("TeX")) | |
if xmlEquation.get("uid") is not None: | |
xmlEOAequation.set("uid", xmlEquation.get("uid")) | |
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAequation.set("filename", xmlEquation.get("filename")) | |
xmlResult.append(xmlEOAequation) | |
elif xmlElement.tag == "EOAequation": | |
# Process one EOAequation which is not encapsulated | |
xmlResult = etree.Element("EOAequation") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.set("number", xmlElement.get("number")) | |
xmlResult.set("TeX", xmlElement.get("TeX")) | |
if xmlElement.get("uid") is not None: | |
xmlResult.set("uid", xmlElement.get("uid")) | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlResult.set("filename", xmlElement.get("filename")) | |
elif xmlElement.tag == "div3": | |
xmlResult = etree.Element("EOAsubsection") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlElement.find("head")) | |
for xmlChild in xmlElement.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild)) | |
elif xmlElement.tag == "div4": | |
xmlResult = etree.Element("EOAsubsubsection") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult.append(xmlElement.find("head")) | |
for xmlChild in xmlElement.iterchildren(): | |
xmlResult.append(djangoParseObject(xmlChild)) | |
elif xmlElement.tag == "EOAverse": | |
xmlResult = etree.Element("EOAparagraph") | |
xmlResult.set("style", "verse") | |
xmlResult.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xml_verselines = xmlElement.findall("p") | |
xmlResult.append(deepcopy(xml_verselines[0])) | |
for xml_verseline in xml_verselines[1:]: | |
linebreak = etree.Element("br") | |
xmlResult.append(linebreak) | |
copied_line = deepcopy(xml_verseline) | |
xmlResult.append(copied_line) | |
etree.strip_tags(xmlResult, "p") | |
elif xmlElement.tag == "EOAbox": | |
logging.debug("Found a box") | |
xmlResult = etree.Element("temp") | |
xmlResult.set("style", "box") | |
box_header = xmlElement.find("head") | |
box_header.tag = "EOAparagraph" | |
box_header.set("style", "box") | |
box_header.set("order", str(intObjectNumber)) | |
head_contents = box_header.find("p") | |
head_contents.tag = "b" | |
# etree.strip_tags(box_header, "p") | |
xmlResult.append(box_header) | |
intObjectNumber += 1 | |
# question: what to do about paragraph equivalent objects? | |
box_elements = xmlElement.getchildren() | |
logging.debug(len(box_elements)) | |
for box_element in box_elements: | |
if box_element.tag == "p": | |
box_element.tag = "EOAparagraph" | |
box_element.set("style", "box") | |
box_element.set("order", str(intObjectNumber)) | |
xmlResult.append(box_element) | |
intObjectNumber += 1 | |
elif xmlElement.tag == "EOAtocentry": | |
# throw them out for the time being | |
xmlResult = etree.Element("temp") | |
else: | |
xmlElement.tag = "EOAparagraph" | |
quoted_paragraph = xmlElement.get("rend") | |
if quoted_paragraph is not None and quoted_paragraph == "quoted": | |
xmlElement.set("rend", "quoted") | |
xmlElement.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlResult = xmlElement | |
else: | |
print("SPECIAL: %s - %s" % (xmlElement, xmlElement.text)) | |
xmlResult = xmlElement | |
if indent==True: | |
xmlResult.set("indent", "True") | |
if listtype != None: | |
xmlResult.set("listtype", listtype) | |
if listnumber != 0: | |
xmlResult.set("listnumber", listnumber) | |
if uid != None: | |
xmlResult.set("id", uid) | |
return xmlResult | |
# def djangoParseObject ends here | |
def make_index(index_hits, index_type): | |
"""Make an index""" | |
dictIndex = {} | |
for xmlEOAindex in index_hits: | |
strMainEntry = xmlEOAindex.get("main") | |
str_display_entry = xmlEOAindex.get("display") | |
# If strMainEntry not in Index, then create new index element | |
if strMainEntry not in dictIndex: | |
dictIndex[strMainEntry] = {} | |
dictIndex[strMainEntry]["display_string"] = "" | |
dictIndex[strMainEntry]["listMainentries"] = [] | |
dictIndex[strMainEntry]["dictSubentries"] = {} | |
# store the display string here. | |
if str_display_entry is not None: | |
dictIndex[strMainEntry]["display_string"] = str_display_entry | |
else: | |
dictIndex[strMainEntry]["display_string"] = strMainEntry | |
# if entry has no subentry then append it to listMainentries | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None: | |
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex) | |
# if entry has subentry, proceed on the second level | |
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") is not None: | |
# put the next line in anyway | |
# dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex) | |
strSubEntry = xmlEOAindex.get("secondary") | |
# if strSubEntry is not in dictSubentries, then create new list | |
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = [] | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
else: | |
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex) | |
# Sort the main index | |
listSortedKeys = sorted(dictIndex.keys(), key=str.lower) | |
if index_type == "regular": | |
new_index_element = "EOAprintindex" | |
else: | |
new_index_element = "EOAprint%sindex" % index_type | |
# Create new and empty xmlTree for xmlEOAindex | |
xmlEOAprintindex = etree.Element(new_index_element) | |
xmlEOAindexsection = None | |
listFirstChars = [] | |
for strSortedKey in listSortedKeys: | |
strFirstChar = strSortedKey[0].upper() | |
if strFirstChar not in listFirstChars: | |
logging.debug("Beginning a new letter: %s." % strFirstChar) | |
listFirstChars.append(strFirstChar) | |
if xmlEOAindexsection is not None: | |
xmlEOAprintindex.append(xmlEOAindexsection) | |
xmlEOAindexsection = etree.Element("EOAindexsection") | |
xmlEOAindexsection.set("Character", strFirstChar) | |
# beginning a new entry | |
xmlEOAindexentry = etree.Element("EOAindexentry") | |
xmlEOAindexentry.set("main", strSortedKey) | |
xmlEOAindexentry.set("display", dictIndex[strSortedKey]["display_string"]) | |
for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]: | |
print(xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder")) | |
xmlEOAindexlink = etree.Element("EOAindexlink") | |
xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder")) | |
xmlEOAindexlink.set("elementorder", xmlMainelement.get("elementorder")) | |
if xmlMainelement.get("bold") is not None: | |
xmlEOAindexlink.set("bold", "True") | |
xmlEOAindexentry.append(xmlEOAindexlink) | |
# If there are any subentries, process them now | |
if len(dictIndex[strSortedKey]["dictSubentries"]) > 0: | |
logging.debug("Processing Subentries") | |
listSortedSubKeys = sorted(dictIndex[strSortedKey]["dictSubentries"]) | |
for strSortedSubKey in listSortedSubKeys: | |
xmlEOAindexsubentry = etree.Element("EOAindexsubentry") | |
xmlEOAindexsubentry.set("secondary", strSortedSubKey) | |
for xmlSubElement in dictIndex[strSortedKey]["dictSubentries"][strSortedSubKey]: | |
strSubEntry = xmlSubElement.get("secondary") | |
# Hier noch die Links auf den Untereintrag einfügen | |
xmlEOAindexlink = etree.Element("EOAindexlink") | |
xmlEOAindexlink.set("chapterorder", xmlSubElement.get("chapterorder")) | |
xmlEOAindexlink.set("elementorder", xmlSubElement.get("elementorder")) | |
xmlEOAindexsubentry.append(xmlEOAindexlink) | |
if xmlSubElement.get("bold") is not None: | |
xmlEOAindexlink.set("bold", "True") | |
logging.debug(strSubEntry) | |
xmlEOAindexentry.append(xmlEOAindexsubentry) | |
xmlEOAindexsection.append(xmlEOAindexentry) | |
# if xmlEOAindexsection is not None: | |
xmlEOAprintindex.append(xmlEOAindexsection) | |
return(xmlEOAprintindex) | |
# def make_index ends here | |
def djangoParseHeadline(xmlElement): | |
# Parse EOAauthor and append it to the Chapter Information | |
xmlAuthors = xmlElement.find(".//EOAauthor") | |
if xmlAuthors is not None: | |
strAuthors = xmlAuthors.text | |
xmlElement.remove(xmlAuthors) | |
strAuthors = re.sub("(, and | and | und )", ",", strAuthors) | |
listAuthors = re.split("\,", strAuthors) | |
logging.debug(listAuthors) | |
if len(listAuthors) >= 1: | |
for i in range(len(listAuthors)): | |
xmlAuthor = etree.Element("EOAauthor") | |
# Remove Spaces before and after AuthorString | |
if listAuthors[i][0] == " ": | |
strAuthor = listAuthors[i][1:] | |
elif listAuthors[i].endswith(" "): | |
strAuthor = listAuthors[i][:-1] | |
else: | |
strAuthor = listAuthors[i] | |
xmlAuthor.text = strAuthor | |
xmlElement.append(xmlAuthor) | |
return xmlElement | |
# def djangoParseHeadline ends here | |
# Iterate over Chapters, Sections, Subsections, and Subsubsections and | |
# Put all on one level: EOAchapter | |
intChapterNumber = 1 | |
listPartIDs = [] | |
for xmlChapter in xmlChapters: | |
intObjectNumber = 1 | |
# Process Chapter Title | |
xmlEOAchapter = etree.Element("EOAchapter") | |
xmlEOAchapter.set("type","regular") | |
xmlLanguage = xmlChapter.find(".//language") | |
if xmlLanguage is not None: | |
# KT changing this after separating the big script | |
strLanguage = xmlLanguage.text #or "english" | |
else: | |
strLanguage = "english" | |
xmlEOAchapter.set("language", strLanguage) | |
# xmlEOAchapter.set("language", xmlChapter.get("language")) | |
xmlEOAchapter.set("order", str(intChapterNumber)) | |
if xmlChapter.get("rend") != "nonumber": | |
xmlEOAchapter.set("id", xmlChapter.get("id")) | |
xmlChapterHeadline = xmlChapter.find(".//head") | |
if xmlChapter.get("id") in dictChapters: | |
xmlEOAchapter.set("number", dictChapters[xmlChapter.get("id")]) | |
else: | |
xmlEOAchapter.set("number", "") | |
print("-----------------------------------------------------") | |
print(gettext(xmlChapterHeadline)) | |
xmlEOAchapter.append(djangoParseHeadline(xmlChapterHeadline)) | |
# Deal with EOAauthor | |
if xmlChapter.find(".//EOAauthor") is not None: | |
xmlEOAchapter.append(xmlChapter.find(".//EOAauthor")) | |
# Attache enclosing Part to Chapter, see django structure for this purpose | |
if xmlChapter.getparent().tag == "div0": | |
if xmlChapter.getparent().get("id") not in listPartIDs: | |
listPartIDs.append(xmlChapter.getparent().get("id")) | |
xmlPartHeadline = xmlChapter.getparent().find("head") | |
xmlPartHeadline.tag = "EOAparthtml" | |
xmlEOAchapter.append(xmlPartHeadline) | |
# Append Chapter to xmlEOAdocument | |
xmlEOAdocument.append(xmlEOAchapter) | |
# iterate over children of Chapter | |
for xmlChapterChild in xmlChapter.iterchildren(): | |
if xmlChapterChild.tag == "div2": | |
# Process Section Title | |
xmlEOAsection = etree.Element("EOAsection") | |
xmlEOAsection.set("order", str(intObjectNumber)) | |
if xmlChapterChild.get("rend") != "nonumber": | |
xmlEOAsection.set("id", xmlChapterChild.get("id")) | |
xmlEOAsection.set("number", dictSections[xmlChapterChild.get("id")]) | |
intObjectNumber += 1 | |
xmlHead = xmlChapter.find(".//head") | |
logging.debug("Section '%s'" % gettext(xmlHead)) | |
xmlEOAsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsection) | |
# Iterate over Children of Section | |
for xmlSectionChild in xmlChapterChild.iterchildren(): | |
if xmlSectionChild.tag == "div3": | |
# Process Subsection Title | |
xmlEOAsubsection = etree.Element("EOAsubsection") | |
xmlEOAsubsection.set("order", str(intObjectNumber)) | |
if xmlSectionChild.get("rend") != "nonumber": | |
xmlEOAsubsection.set("id", xmlSectionChild.get("id")) | |
xmlEOAsubsection.set("number", dictSections[xmlSectionChild.get("id")]) | |
intObjectNumber += 1 | |
xmlHead = xmlSectionChild.find(".//head") | |
logging.debug("Subsection '%s'" % gettext(xmlHead)) | |
xmlEOAsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsection) | |
# Iterate over children of Subsection | |
for xmlSubsectionChild in xmlSectionChild.iterchildren(): | |
if xmlSubsectionChild.tag == "div4": | |
# Process Subsubsection Title | |
xmlEOAsubsubsection = etree.Element("EOAsubsubsection") | |
xmlEOAsubsubsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = xmlSubsectionChild.find(".//head") | |
logging.debug(gettext(xmlHead)) | |
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsubsection) | |
# Iterate over children of Subsubsection | |
for xmlSubsubsectionChild in xmlSubsectionChild.iterchildren(): | |
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlSubsectionChild)) | |
elif xmlSectionChild.tag == "div4": | |
# Process Subsubsection Title | |
xmlEOAsubsubsection = etree.Element("EOAsubsubsection") | |
xmlEOAsubsubsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = xmlSectionChild.find(".//head") | |
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAsubsubsection) | |
# Iterate over children of Subsubsection | |
for xmlSubsubsectionChild in xmlSectionChild.iterchildren(): | |
if xmlSubsubsectionChild.tag == "div5": | |
logging.debug("jubel") | |
# although it's div5, promote it to subsubsection | |
xmlEOAparasection = etree.Element("EOAsubsubsection") | |
# xmlEOAparasection = etree.Element("EOAparasection") | |
xmlEOAparasection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = xmlSubsubsectionChild.find(".//head") | |
logging.debug(gettext(xmlHead)) | |
xmlEOAparasection.append(djangoParseHeadline(xmlHead)) | |
xmlEOAchapter.append(xmlEOAparasection) | |
for xmlParasectionChild in xmlSubsubsectionChild.iterchildren(): | |
xmlEOAchapter.append(djangoParseObject(xmlParasectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlSectionChild)) | |
else: | |
xmlEOAchapter.append(djangoParseObject(xmlChapterChild)) | |
intChapterNumber += 1 | |
print("----------------------------------------------") | |
print("Processing Facsimile Parts") | |
listModes = ["text", "textPollux", "xml"] | |
strBasicURL = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql?document=" | |
parserECHO = etree.XMLParser() | |
xmlParts = xmlTree.findall("//div0") | |
intFacNumber = 1 | |
for xmlPart in xmlParts: | |
intObjectNumber = 1 | |
intFacPartNumber = 1 | |
if xmlPart.find(".//EOAfacsimilepart") is None: | |
continue | |
xmlEOAfacsimilepart = etree.Element("EOAfacsimilepart") | |
xmlEOAfacsimilepart.set("order", str(intChapterNumber)) | |
xmlEOAfacsimileparthead = xmlPart.find(".//head") | |
for xmlChild in xmlEOAfacsimileparthead: | |
if xmlChild.tag == "hi": | |
xmlChild.tag = "em" | |
del xmlChild.attrib["rend"] | |
xmlEOAfacsimilepart.append(xmlEOAfacsimileparthead) | |
intChapterNumber += 1 | |
xmlEOAdocument.append(xmlEOAfacsimilepart) | |
xmlFacsimilepages = xmlPart.findall(".//EOAfacsimilepage") | |
intFacPageNumber = 1 | |
for xmlFacsimilepage in xmlFacsimilepages: | |
strImageFile = xmlFacsimilepage.find(".//file").text | |
strLabel = xmlFacsimilepage.find(".//label").text | |
strPagenumber = xmlFacsimilepage.find(".//pagenumber").text or "" | |
xmlEOAfacsimilepage = etree.Element("EOAfacsimilepage") | |
xmlEOAfacsimilepage.set("order", str(intObjectNumber)) | |
# TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren | |
strImageFile = strImageFile.rstrip("\n") | |
strImageFileDir = os.path.dirname(strImageFile) | |
strImageFileDir = re.sub("/", "", strImageFileDir) | |
strImageFileName = os.path.basename(strImageFile) | |
shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) | |
intObjectNumber += 1 | |
# Download transcription for this Page | |
if xmlFacsimilepage.find(".//fulltext").text is not None: | |
logging.debug("Found a link to full text.") | |
strFacsimileURL = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[0] | |
strFacsimilePage = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[1] | |
for strMode in listModes: | |
strURL = strBasicURL + strFacsimileURL + "&pn=" + strFacsimilePage + "&mode=" + strMode | |
logging.debug("Processing Facsimile : " + strURL) | |
xmlECHOtree = etree.parse(strURL, parserECHO) | |
# Remove ECHO-namespaces | |
objectify.deannotate(xmlECHOtree, xsi_nil=True) | |
etree.cleanup_namespaces(xmlECHOtree) | |
xmlDivs = xmlECHOtree.findall(".//div") | |
for xmlDiv in xmlDivs: | |
if xmlDiv.get("class") == "pageContent": | |
# Create new EOA-Element | |
xmlEOAfacsimileelement = etree.Element("EOAfacsimileelement") | |
xmlEOAfacsimileelement.set("type", strMode) | |
# Fix Images in the <div>-Element | |
xmlImages = xmlDiv.findall(".//img") | |
intFacImgNumber = 1 | |
for xmlImage in xmlImages: | |
strImageSrc = xmlImage.get("src") | |
strCommand = "curl " + strImageSrc + " -o CONVERT/django/images/facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg" | |
listArguments = shlex.split(strCommand) | |
try: | |
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True) | |
xmlImage.set("src", "facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg") | |
except: | |
xmlImage.tag = "temp" | |
intFacImgNumber += 1 | |
# Change of scr of img-Element | |
xmlEOAfacsimileelement.append(xmlDiv) | |
xmlEOAfacsimilepage.append(xmlEOAfacsimileelement) | |
intFacPageNumber += 1 | |
xmlEOAfacsimilepage.set("file", strImageFileDir + strImageFileName) | |
xmlEOAfacsimilepage.set("label", str(strLabel)) | |
xmlEOAfacsimilepage.set("pagenumber", str(strPagenumber)) | |
xmlEOAfacsimilepart.append(xmlEOAfacsimilepage) | |
intFacNumber =+ 1 | |
etree.strip_tags(xmlDjangoTree, "temp") | |
print("----------------------------------------------") | |
print("Processing and linking Footnotes for django") | |
def bring_footnote_down_django(footnote, fragment, footnote_number, object_number, unique_id, destination): | |
""" | |
captures reusable behavior from the existing code | |
potentially, some of the old code could be replaced by calls to this helper | |
usage: intObjectNumber = bring_footnote_down_django(xmlFootnote, "fn"+str(intFootnoteNumber), str(intFootnoteNumber), intObjectNumber, tmpStrUID, xmlResult) | |
unfortunately, returning the result seemed like a better idea than mutating the global variable | |
""" | |
kids = list(footnote.getchildren()) | |
footnote_text = footnote.text or "" | |
replace_footnote_with_sup(footnote) | |
footnote.set("class", "footnote") | |
anchor = etree.Element("a") | |
anchor.set("href", "#" + fragment) # "fn" + str(intFootnoteNumber) | |
anchor.text = footnote_number # str(intFootnoteNumber) | |
footnote.append(anchor) | |
foot = etree.Element("EOAfootnote") | |
foot.set("order", str(object_number)) | |
object_number += 1 | |
foot.set("number", footnote_number) | |
anchor_number = next( | |
iter( | |
( | |
parent.get("order") | |
for parent | |
in footnote.iterancestors() | |
if parent.get("order") is not None | |
) | |
) | |
) | |
foot.set("anchor", anchor_number) | |
foot.set("id", unique_id) | |
foot.text = footnote_text | |
for kid in kids: | |
if "EOAequationnonumber" == kid.tag: | |
cwd = os.getcwd() | |
shutil.copy( | |
"%s/items/%s" % (cwd, kid.get("filename")), | |
"%s/CONVERT/django/images/" % cwd, | |
) | |
foot.append(kid) | |
destination.append(foot) | |
return object_number | |
# def bring_footnote_down_django ends here | |
xmlEOAchapters = xmlEOAdocument.findall(".//EOAchapter") | |
debug_chapters(xmlEOAchapters) | |
for xmlEOAchapter in xmlEOAchapters: | |
groupings = libeoaconvert.get_bigfoot_data(xmlEOAchapter) | |
has_old = 0 != len(xmlEOAchapter.findall(".//note")) | |
has_new = 0 != len( | |
[ # flatten | |
note | |
for grouping, notes in groupings | |
for note in notes | |
] | |
) | |
# XOR falls through, AND is an error (that should have already been thrown during the epub phase), and NOR skips to the next chapter | |
if has_old: | |
if has_new: | |
raise FootnoteError("This chapter contains both old-style footnotes and new-style footnotes") | |
else: | |
if not has_new: | |
continue | |
# Find out running order of last item the chapter | |
# Hier pro FN zunächst die EOAequationnonumber in <p> korrigieren | |
# Dann pro FN die Kindelemente abarbeiten und an die neue FN dran hängen | |
# Ggf. aufpassen, ob ein Absatz mit indent versehen ist, dann blockquote drum herum machen | |
xmlElement = xmlEOAchapter[(len(xmlEOAchapter)-1)] | |
logging.debug(etree.tostring(xmlElement)) | |
intObjectNumber = (int(xmlElement.get("order")) + 1) | |
intFootnoteNumber = 1 | |
xmlResult = etree.Element("temp") | |
xmlEOAsection = etree.Element("EOAsection") | |
xmlEOAsection.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlHead = etree.Element("head") | |
xmlHead.text = libeoaconvert.dictLangFootnotes[libeoaconvert.two_letter_language(xmlEOAchapter.get("language"))] | |
xmlEOAsection.append(xmlHead) | |
xmlResult.append(xmlEOAsection) | |
for grouping, notes in groupings: | |
for index, note in enumerate(notes): | |
# do for the new-style notes what the old code did for the other footnotes | |
fntext = str(index+1) | |
if "lower-latin" == grouping: | |
fntext = alph_footnote_index(index) | |
unique_id = "fn%s" % fntext | |
intObjectNumber = bring_footnote_down_django(note, unique_id, fntext, intObjectNumber, unique_id, xmlResult) | |
intFootnoteNumber = 1 | |
xmlFootnotes = xmlEOAchapter.findall(".//note") | |
for xmlFootnote in xmlFootnotes: | |
xmlFootnoteContent = xmlFootnote.getchildren() | |
strFootnoteText = xmlFootnote.text or "" | |
tmpTail = xmlFootnote.tail | |
tmpStrUID = xmlFootnote.get("id") | |
xmlFootnote.clear() | |
xmlFootnote.tail = tmpTail | |
xmlFootnote.tag = "sup" | |
xmlFootnote.set("class", "footnote") | |
xmlFootnoteLink = etree.Element("a") | |
xmlFootnoteLink.set("href", "#fn" + str(intFootnoteNumber)) | |
xmlFootnoteLink.text = str(intFootnoteNumber) | |
xmlFootnote.append(xmlFootnoteLink) | |
xmlEOAfootnote = etree.Element("EOAfootnote") | |
xmlEOAfootnote.set("order", str(intObjectNumber)) | |
intObjectNumber += 1 | |
xmlEOAfootnote.set("number", str(intFootnoteNumber)) | |
for xmlParent in xmlFootnote.iterancestors(): | |
if xmlParent.get("order") is not None: | |
strFootnoteAnchorNumber = xmlParent.get("order") | |
break | |
xmlEOAfootnote.set("anchor", strFootnoteAnchorNumber) | |
xmlEOAfootnote.set("id", tmpStrUID) | |
xmlEOAfootnote.text = strFootnoteText | |
for xmlElement in xmlFootnoteContent: | |
if xmlElement.tag == "EOAequationnonumber": | |
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") | |
xmlEOAfootnote.append(xmlElement) | |
xmlResult.append(xmlEOAfootnote) | |
intFootnoteNumber += 1 | |
xmlEOAchapter.append(xmlResult) | |
# Remove temp-Tag | |
etree.strip_tags(xmlDjangoTree, "temp") | |
# print("----------------------------------------------") | |
# print("Processing Verses") | |
# for xmlEOAchapter in xmlEOAchapters: | |
# verses = xmlEOAchapter.findall(".//EOAverse") | |
# print("Found lotsa verses: ", len(verses)) | |
print("----------------------------------------------") | |
print("Processing various Elements") | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlEmphasized = xmlEOAchapter.findall(".//hi") | |
for xmlEmph in xmlEmphasized: | |
if xmlEmph.get("rend") == "it": | |
xmlEmph.tag = "em" | |
del xmlEmph.attrib["rend"] | |
xmlHyperlinks = xmlEOAchapter.findall(".//xref") | |
for xmlHyperlink in xmlHyperlinks: | |
strURL = xmlHyperlink.get('url') | |
if strURL.startswith("http://") == False: | |
if strURL.startswith("https://") == False: | |
strURL = "http://" + strURL | |
xmlHyperlink.tag = "a" | |
del xmlHyperlink.attrib["url"] | |
xmlHyperlink.set("href", strURL) | |
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak']) | |
xmlHyperlink.text = strURL | |
# Convert bold text | |
xmlBolds = xmlEOAchapter.findall(".//hi") | |
for xmlBold in xmlBolds: | |
if xmlBold.get("rend") == "bold": | |
xmlBold.tag = "b" | |
del xmlBold.attrib["rend"] | |
# Convert EOAup to <sup> | |
xmlUps = xmlEOAchapter.findall(".//EOAup") | |
for xmlUp in xmlUps: | |
xmlUp.tag = "sup" | |
# Convert EOAdown to <sub> | |
xmlDowns = xmlEOAchapter.findall(".//EOAdown") | |
for xmlDown in xmlDowns: | |
xmlDown.tag = "sub" | |
# Convert EOAst to <span> | |
xmlStrikeouts = xmlEOAchapter.findall(".//EOAst") | |
for xmlStrikeout in xmlStrikeouts: | |
xmlStrikeout.tag = "span" | |
xmlStrikeout.set("style", "text-decoration: line-through;") | |
# Convert letter-spacing into something nice | |
xmlLetterspaceds = xmlEOAchapter.findall(".//EOAls") | |
for xmlLetterspaced in xmlLetterspaceds: | |
xmlLetterspaced.tag = "span" | |
xmlLetterspaced.set("style", "letter-spacing: 0.5em;") | |
# Convert letter-spacing into something nice | |
xmlCaps = xmlEOAchapter.findall(".//EOAcaps") | |
for xmlCap in xmlCaps: | |
xmlCap.tag = "span" | |
xmlCap.set("style", "font-variant:small-caps;") | |
# Convert EOAineq into appropriate IMG-Tags | |
xmlInlineEquations = xmlEOAchapter.findall(".//EOAineq") | |
for xmlInlineEquation in xmlInlineEquations: | |
xmlInlineEquation.tag = "img" | |
xmlInlineEquation.set("class", "EOAineq") | |
xmlInlineEquation.set("alt", xmlInlineEquation.get("TeX")) | |
shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src")) | |
# Convert EOAchem into appropriate IMG-Tags | |
xml_inline_chems = xmlEOAchapter.findall(".//EOAchem") | |
for xml_inline_chem in xml_inline_chems: | |
xml_inline_chem.tag = "img" | |
xml_inline_chem.set("class", "EOAineq") | |
xml_inline_chem.set("alt", xml_inline_chem.get("TeX")) | |
shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/django/images/" + xml_inline_chem.get("src")) | |
# Convert EOAinline into appropriate IMG-Tags | |
xmlInlineElements = xmlEOAchapter.findall(".//EOAinline") | |
for xmlInlineElement in xmlInlineElements: | |
xmlInlineElement.tag = "img" | |
xmlInlineElement.set("class", "EOAinline") | |
xmlInlineElement.set("alt", "") | |
xmlInlineElement.set("class", "eoainlineimage") | |
strInlineElementFilePath = xmlInlineElement.text | |
strInlineElementFileName = os.path.basename(strInlineElementFilePath) | |
strInlineElementDirName = os.path.dirname(strInlineElementFilePath) | |
xmlInlineElement.text = None | |
xmlInlineElement.set("src", strInlineElementDirName + strInlineElementFileName) | |
shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName) | |
strNewImagePath = os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName | |
strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath | |
listArguments = shlex.split(strCommand) | |
subprocess.check_output(listArguments, shell=False) | |
# Change EOAcitenumeric into a span to create approriate link | |
xmlEOAcitenumerics = xmlEOAchapter.findall(".//EOAcitenumeric") | |
for xmlEOAcitenumeric in xmlEOAcitenumerics: | |
xmlEOAcitenumeric.tag = "span" | |
xmlEOAcitenumeric.set("class", "citation") | |
xmlEOAcitenumeric.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAciteauthoryears = xmlEOAchapter.findall(".//EOAciteauthoryear") | |
for xmlEOAciteauthoryear in xmlEOAciteauthoryears: | |
xmlEOAciteauthoryear.tag = "span" | |
xmlEOAciteauthoryear.set("class", "citation") | |
xmlEOAciteauthoryear.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAciteyears = xmlEOAchapter.findall(".//EOAciteyear") | |
for xmlEOAciteyear in xmlEOAciteyears: | |
xmlEOAciteyear.tag = "span" | |
xmlEOAciteyear.set("class", "citation") | |
xmlEOAciteyear.set("rel", "popover") | |
# Change EOAciteauthoryear into a span to create approriate link | |
xmlEOAcitemanuals = xmlEOAchapter.findall(".//EOAcitemanual") | |
for xmlEOAcitemanual in xmlEOAcitemanuals: | |
xmlEOAcitemanual.tag = "span" | |
xmlEOAcitemanual.set("class", "citation") | |
xmlEOAcitemanual.set("rel", "popover") | |
print("----------------------------------------------") | |
print("Processing Cross References") | |
# Substitute References with their targets (wit links) | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlReferences = xmlEOAchapter.findall(".//EOAref") | |
for xmlReference in xmlReferences: | |
strResult = "!!! Cross Reference !!!" | |
strChapterOrder = "" | |
strObjectOrder = "" | |
xmlReferenceLabel = xmlReference.find("Label") | |
xmlReferenceLabelText = xmlReferenceLabel.text | |
xmlReferenceRef = xmlReference.find("ref") | |
xmlReferenceRefTarget = xmlReferenceRef.get("target") | |
if xmlReferenceLabelText in dictEquations: | |
# Grab Number from Dictionary | |
strResult = dictEquations[xmlReferenceLabelText] | |
# Go through all equations and find the corresponding Equation | |
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation") | |
for xmlEOAequation in xmlEOAequations: | |
tmpReferenceLabelText = xmlEOAequation.get("label") | |
if xmlReferenceLabelText == tmpReferenceLabelText: | |
logging.debug("Successfully found link to array formula: %s" % strResult) | |
for xmlParent in xmlEOAequation.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAequation.get("order") | |
if xmlReferenceRefTarget in dictEquations: | |
# Grab Number from Dictionary | |
strResult = dictEquations[xmlReferenceRefTarget] | |
# Go through all equations and find the corresponding Equation | |
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation") | |
for xmlEOAequation in xmlEOAequations: | |
tmpReferenceRefTarget = xmlEOAequation.get("uid") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
logging.debug("Successfully found link to normal formula: %s" % strResult) | |
for xmlParent in xmlEOAequation.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAequation.get("order") | |
if xmlReferenceRefTarget in dictLists: | |
logging.debug("Found link to list.") | |
strResult = dictLists[xmlReferenceRefTarget] | |
xmlEOAlistitem = xmlEOAdocument.xpath("//EOAchapter/*[contains(@id, $targetuid)]", targetuid = xmlReferenceRefTarget)[0] | |
for xmlParent in xmlEOAlistitem.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAlistitem.get("order") | |
if xmlReferenceRefTarget in dictChapters: | |
logging.debug("Found link to chapter.") | |
strResult = dictChapters[xmlReferenceRefTarget] | |
for xmlEOAchapter in xmlEOAdocument.findall(".//EOAchapter"): | |
if xmlEOAchapter.get("id") == xmlReferenceRefTarget: | |
logging.debug("Successfully handled link to a chapter: %s" % strResult) | |
strObjectOrder = "top" | |
strChapterOrder = xmlEOAchapter.get("order") | |
if xmlReferenceRefTarget in dictTheorems: | |
logging.debug("Found link to ein Theorem") | |
strResult = dictTheorems[xmlReferenceRefTarget] | |
for xmlEOAtheorem in xmlEOAdocument.findall(".//EOAtheorem"): | |
if xmlEOAtheorem.get("uid") == xmlReferenceRefTarget: | |
logging.debug("Successfully handled link to a theorem: %s " % strResult) | |
for xmlParent in xmlEOAtheorem.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strObjectOrder = xmlEOAtheorem.get("order") | |
strChapterOrder = xmlParent.get("order") | |
if xmlReferenceRefTarget in dictSections: | |
logging.debug("Found link to section") | |
strResult = dictSections[xmlReferenceRefTarget] | |
xmlEOAsections = xmlEOAdocument.findall(".//EOAsection") | |
for xmlEOAsection in xmlEOAsections: | |
tmpReferenceRefTarget = xmlEOAsection.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
logging.debug("Successfully handled link to section: %s " % strResult) | |
for xmlParent in xmlEOAsection.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAsection.get("order") | |
xmlEOAsubsections = xmlEOAdocument.findall(".//EOAsubsection") | |
for xmlEOAsubsection in xmlEOAsubsections: | |
tmpReferenceRefTarget = xmlEOAsubsection.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
logging.debug("Successfully handled link to subsection %s: " % strResult) | |
for xmlParent in xmlEOAsubsection.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAsubsection.get("order") | |
if xmlReferenceRefTarget in dictFigures: | |
logging.debug("Found link to figure") | |
strResult = dictFigures[xmlReferenceRefTarget] | |
xmlEOAfigures = xmlEOAdocument.findall(".//EOAfigure") | |
for xmlEOAfigure in xmlEOAfigures: | |
tmpReferenceRefTarget = xmlEOAfigure.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
logging.debug("Successfully handled link to figure: %s" % strResult) | |
for xmlParent in xmlEOAfigure.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAfigure.get("order") | |
if xmlReferenceRefTarget in dictFootnotes: | |
logging.debug("Found link to footnote") | |
strResult = dictFootnotes[xmlReferenceRefTarget] | |
xmlEOAfootnotes = xmlEOAdocument.findall(".//EOAfootnote") | |
for xmlEOAfootnote in xmlEOAfootnotes: | |
tmpReferenceRefTarget = xmlEOAfootnote.get("id") | |
if xmlReferenceRefTarget == tmpReferenceRefTarget: | |
logging.debug("Successfully handled link to footnote: %s" % strResult) | |
for xmlParent in xmlEOAfootnote.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAfootnote.get("order") | |
if xmlReferenceLabelText in dictTables: | |
logging.debug("Found link to table") | |
strResult = dictTables[xmlReferenceLabelText] | |
xmlEOAtables = xmlEOAdocument.findall(".//EOAtable") | |
for xmlEOAtable in xmlEOAtables: | |
tmpReferenceRefTarget = xmlEOAtable.get("label") | |
if xmlReferenceLabelText == tmpReferenceRefTarget: | |
logging.debug("Successfully handled link to table: %s" % strResult) | |
for xmlParent in xmlEOAtable.iterancestors(): | |
if xmlParent.tag == "EOAchapter": | |
strChapterOrder = xmlParent.get("order") | |
strObjectOrder = xmlEOAtable.get("order") | |
tmpTail = xmlReference.tail or "" | |
xmlReference.clear() | |
xmlReference.text = strResult | |
xmlReference.tail = tmpTail | |
xmlReference.tag = "a" | |
xmlReference.set("href", "../" + strChapterOrder + "/index.html#" + strObjectOrder) | |
print("----------------------------------------------") | |
print("Processing Page References") | |
for xmlEOAchapter in xmlEOAchapters: | |
xmlPageReferences = xmlEOAchapter.findall(".//EOApageref") | |
strResult = "!!! Page Reference !!!" | |
for xmlReference in xmlPageReferences: | |
xmlReferenceLabel = xmlReference.find("Label") | |
xmlReferenceLabelText = xmlReferenceLabel.text | |
xmlReferenceRef = xmlReference.find("ref") | |
xmlReferenceRefTarget = xmlReferenceRef.get("target") | |
if xmlReferenceLabelText in dictPagelabels: | |
logging.debug("Found link to page: %s" % xmlReferenceLabelText) | |
strResult = dictPagelabels[xmlReferenceLabelText] | |
xmlReference.text = strResult | |
for xmlChild in xmlReference.iterchildren(): | |
xmlReference.remove(xmlChild) | |
# Check, if EOApageref points to a Facsimile-Page | |
# If yes, make a href to the facsimile | |
xmlEOAfacsimilepages = xmlEOAdocument.findall(".//EOAfacsimilepage") | |
for xmlEOAfacsimilepage in xmlEOAfacsimilepages: | |
if xmlEOAfacsimilepage.get("label") == xmlReferenceLabelText: | |
logging.debug("Found cross reference to facsimile.") | |
xmlReference.tag = "a" | |
strPartOrder = xmlEOAfacsimilepage.getparent().get("order") | |
strFacsimileOrder = xmlEOAfacsimilepage.get("order") | |
logging.debug(strFacsimileOrder) | |
xmlReference.set("href", "../" + strPartOrder + "/" + strFacsimileOrder + ".html") | |
print("----------------------------------------------") | |
print("Normalizing Index Entries") | |
for xmlEOAchapter in xmlEOAchapters: | |
xml_EOA_indices = xmlEOAchapter.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xml_EOA_indices: | |
# Using the gettext function here, because of subelements | |
# strEOAindextext = xmlEOAindex.text | |
strEOAindextext = gettext(xmlEOAindex) | |
strEOAindextext = strEOAindextext.replace("\n", " ") | |
index_children = xmlEOAindex.getchildren() | |
if index_children is not None: | |
for sub_element in index_children: | |
xmlEOAindex.remove(sub_element) | |
xmlEOAindex.text = None | |
listFirstPart = re.split('\|', strEOAindextext) | |
tmpEntry = listFirstPart[0] | |
listSecondPart = re.split('\!', tmpEntry) | |
strMainEntry = listSecondPart[0] | |
# Check if a sortkey is present via @ | |
listSortKey = re.split('@', strMainEntry) | |
if len(listSortKey) == 2: | |
xmlEOAindex.set("main", listSortKey[0]) | |
xmlEOAindex.set("display", listSortKey[1]) | |
else: | |
xmlEOAindex.set("main", strMainEntry) | |
if len(listSecondPart) > 1: | |
strSecondPart = listSecondPart[1] | |
listSecondarySortkey = re.split('@', strSecondPart) | |
if len(listSecondarySortkey) == 2: | |
xmlEOAindex.set("secondary", listSecondarySortkey[0]) | |
xmlEOAindex.set("secondarydisplay", listSecondarySortkey[1]) | |
else: | |
xmlEOAindex.set("secondary", strSecondPart) | |
if len(listFirstPart) > 1: | |
strAddition = listFirstPart[1] | |
if strAddition == "textbf": | |
xmlEOAindex.set("bold", "true") | |
tmpseealso = re.match('seealso', strAddition) | |
if tmpseealso != None: | |
tmpAddition = re.sub('seealso', '', strAddition) | |
xmlEOAindex.set("seealso", tmpAddition) | |
# Entries containing seealso are omitted for the time being | |
xmlEOAindex.tag = "temp" | |
tmpsee = re.match('^see(?!also)', strAddition) | |
if tmpsee != None: | |
tmpAddition = re.sub('see', '', strAddition) | |
xmlEOAindex.set("see", tmpAddition) | |
# Entries containing seealso are omitted for the time being | |
xmlEOAindex.tag = "temp" | |
# Figure out parent chapter number and parent Element order | |
for xmlParent in xmlEOAindex.iterancestors(): | |
if xmlParent.get("order") != None and xmlParent.tag != "EOAchapter": | |
xmlEOAindex.set("elementorder", xmlParent.get("order")) | |
if xmlParent.get("order") != None and xmlParent.tag == "EOAchapter": | |
xmlEOAindex.set("chapterorder", xmlParent.get("order")) | |
# print(etree.tostring(xmlEOAindex)) | |
etree.strip_tags(xmlDjangoTree, "temp") | |
print("----------------------------------------------") | |
print("Removing Duplicate Index Entries") | |
for xmlEOAchapter in xmlEOAchapters: | |
for xmlChild in xmlEOAchapter.iterchildren(): | |
dictEntries = {} | |
xml_EOA_indices = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xml_EOA_indices: | |
listEntry = [] | |
strEntry = xmlEOAindex.get("main") | |
if strEntry in dictEntries: | |
strSubentry = xmlEOAindex.get("secondary") | |
if strSubentry in dictEntries[strEntry] or strSubentry == None: | |
if (xmlChild.get("see") is None) and (xmlChild.get("seealso") is None): | |
xmlEOAindex.tag = "temp" | |
else: | |
dictEntries[strEntry].append(strSubentry) | |
else: | |
dictEntries[strEntry] = listEntry | |
print("----------------------------------------------") | |
print("Removing Index Entries in Footnotes") | |
for xmlEOAchapter in xmlEOAchapters: | |
for xmlChild in xmlEOAchapter.iterchildren(): | |
dictEntries = {} | |
xml_EOA_indices = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation") | |
for xmlEOAindex in xml_EOA_indices: | |
for xmlParent in xmlEOAindex.iterancestors(): | |
if xmlParent.tag == "EOAfootnote": | |
xmlEOAindex.tag = "temp" | |
logging.debug("Found index in footnote") | |
print("----------------------------------------------") | |
print("Sorting and Creating Regular Index") | |
xml_regular_EOAindices = xmlDjangoTree.findall("//EOAindex") | |
if len(xml_regular_EOAindices) != 0:# is not None: | |
logging.debug("Sorting %s entries for regular index." % str(len(xml_regular_EOAindices))) | |
xml_eoa_print_regular_index = make_index(xml_regular_EOAindices, index_type = "regular") | |
# If EOAprintindex is found, append xml_eoa_print_regular_index to xmlEOAdocument | |
xmlPrintindex = xmlTree.find(".//EOAprintindex") | |
if xmlPrintindex is not None != 0: | |
# Remove <p><EOAprintindex/></p> from xmlDjangoTree | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xml_eoa_print_regular_index) | |
print("----------------------------------------------") | |
print("Sorting and Creating Person Index") | |
xml_person_EOAindices = xmlDjangoTree.findall("//EOAindexperson") | |
if len(xml_person_EOAindices) != 0:# is not None: | |
xml_eoa_print_person_index = make_index(xml_person_EOAindices, index_type = "person") | |
# If EOAprintpersonindex is found, append xml_eoa_print_person_index to xmlEOAdocument | |
# xmlPrintindex = xmlDjangoTree.find(".//EOAprintpersonindex") | |
xmlPrintindex = xmlTree.find(".//EOAprintpersonindex") | |
if xmlPrintindex is not None != 0: | |
# Remove <p><EOAprintindex/></p> from xmlDjangoTree | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xml_eoa_print_person_index) | |
# doing the same for location index | |
print("----------------------------------------------") | |
print("Sorting and Creating Location Index") | |
xml_location_EOAindices = xmlDjangoTree.findall("//EOAindexlocation") | |
if len(xml_location_EOAindices) != 0:# is not None: | |
xml_eoa_print_location_index = make_index(xml_location_EOAindices, index_type = "location") | |
# If EOAprintlocationindex is found, append xml_eoa_print_location_index to xmlEOAdocument | |
xmlPrintindex = xmlTree.find(".//EOAprintlocationindex") | |
if xmlPrintindex is not None != 0: | |
xmlPrintindex.tag = "temp" | |
xmlPrintindex.getparent().tag = "temp" | |
xmlEOAdocument.append(xml_eoa_print_location_index) | |
############################################################################ | |
# Cleaning up # | |
############################################################################ | |
# TODO: Die unnötigen Attribute wie id löschen | |
# TODO: Die unnötigen Tags wie EOAlabel löschen | |
etree.strip_tags(xmlDjangoTree, "temp", "citetext", "EOAprintbibliography") | |
etree.strip_elements(xmlDjangoTree, "citekey", with_tail=False) | |
etree.strip_attributes(xmlDjangoTree, "id-text", "id", "noindent", "type", "label", "spacebefore")#, "rend") | |
############################################################################ | |
# Save xmlDjangoTree # | |
############################################################################ | |
tmpFile = open("CONVERT/django/Django.xml", "w") | |
tmpResult = etree.tostring(xmlDjangoTree, pretty_print=True, encoding="unicode") | |
tmpFile.write(tmpResult) | |
tmpFile.close() | |
logging.debug("Wrote Django.xml") |