Skip to content
Permalink
7d735ccb15
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 1560 lines (1436 sloc) 75.7 KB
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2018-06-04 15:09:32 (kthoden)>
import pickle
import os
import sys
import re
import shutil
import shlex
import subprocess
import argparse
import configparser
import libeoaconvert
import logging
from copy import deepcopy
from lxml import etree
#####################
# Parsing arguments #
#####################
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", dest="CONFIG_FILE", help="Name of configuration file", metavar="CONFIGURATION")
args = parser.parse_args()
if args.CONFIG_FILE is not None:
CONFIG_FILE = os.path.abspath(args.CONFIG_FILE)
else:
CONFIG_FILE = os.path.dirname(sys.argv[0]) + "/config/eoaconvert.cfg"
##################################
# Reading the configuration file #
##################################
CONFIG = configparser.ConfigParser()
CONFIG.read(CONFIG_FILE)
######################
# Setting up logging #
######################
LOGFILE = CONFIG['General']['logfile']
LOGLEVEL = CONFIG['General']['loglevel']
logging.basicConfig(level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s')
logging.debug("The configfile is%s." % CONFIG_FILE)
########################
# Paths to executables #
########################
GM_PATH = CONFIG['Executables']['graphicsmagic']
TL_PATH = CONFIG['Executables']['texlive']
###########################################
# Loading data from first conversion step #
###########################################
with open('tmp_files/data.pickle', 'rb') as f:
data = pickle.load(f)
dictChapters = data["chapterdict"]
dictEquations = data["eqdict"]
dictLists = data["listdict"]
dictTheorems = data["theoremdict"]
dictSections = data["secdict"]
dictFigures = data["figdict"]
dictFootnotes = data["fndict"]
dictTables = data["tabdict"]
dictPagelabels = data["pagelabeldict"]
if not os.path.exists(os.getcwd() + os.path.sep + "debug"):
os.mkdir(os.getcwd() + os.path.sep + "debug")
xmlTree = etree.parse("tmp_files/IntermediateXMLFile.xml")
libeoaconvert.debug_xml_here(xmlTree, "fresh")
print("""
############################################################################
# Convert tralics-XML to Django Data Structure #
############################################################################
""")
# Create django File Structure
if os.path.exists(os.getcwd() + "/CONVERT/django") == False:
os.mkdir(os.getcwd() + "/CONVERT/django")
os.mkdir(os.getcwd() + "/CONVERT/django/images")
os.mkdir(os.getcwd() + "/CONVERT/django/images/embedded")
os.mkdir(os.getcwd() + "/CONVERT/django/files")
# Create empty xmlTree
xmlEOAdocument = etree.Element("EOAdocument")
xmlDjangoTree = etree.ElementTree(xmlEOAdocument)
etree.strip_attributes(xmlTree, "noindent")
# Remove temp-Tag
etree.strip_tags(xmlTree, "temp")
libeoaconvert.debug_xml_here(xmlTree, "afterstriptags")
# Write Temporary XML-Maintree
ergebnisdatei = open("tmp_files/Devel_django.xml", "w")
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode")
ergebnisdatei.write(ergebnis)
ergebnisdatei.close()
# Find all Chapters from the original tralics XML
xmlChapters = xmlTree.findall("//div1")
def replace_footnote_with_sup(note):
"""
captures reusable behavior from the existing code
potentially, some of the old code could be replaced by calls to this helper
this behavior showed up in a few places
I thought I would be able to extract a little more, but this was all that was actually common
"""
tail = note.tail
note.clear()
note.tail = tail
note.tag = "sup"
# def replace_footnote_with_sup ends here
def alph_footnote_index(fndex):
"""
lowercase Latin footnotes need to support more than 26 values
These are zero-indexed.
>>> alph_footnote_index(0)
'a'
>>> alph_footnote_index(1)
'b'
>>> alph_footnote_index(24)
'y'
>>> alph_footnote_index(25)
'z'
>>> alph_footnote_index(26)
'aa'
>>> alph_footnote_index(27)
'ab'
"""
alphabet = "abcdefghijklmnopqrstuvwxyz"
quotient, remainder = divmod(fndex, len(alphabet))
if not quotient: return alphabet[fndex]
return alph_footnote_index(quotient - 1) + alph_footnote_index(remainder)
# def alph_footnote_index ends here
def debug_chapters(xmlEOAchapters):
"""Write individual chapters to files"""
chap_num = 1
for chapter in xmlEOAchapters:
tmp_filename = "%s/debug/debug-chapter-%02d.xml" % (os.getcwd(), chap_num)
tmp_file = open (tmp_filename, "w")
tmp_result = etree.tostring(chapter, pretty_print=True, encoding="unicode")
tmp_file.write(tmp_result)
tmp_file.close()
chap_num += 1
# def debug_chapters ends here
def gettext(xmlElement):
"""Maintain text and strip subchildren"""
xmlText = xmlElement.text or ""
for xmlChild in xmlElement:
xmlText += gettext(xmlChild)
if xmlChild.tail:
xmlText += xmlChild.tail
return xmlText
# def gettext ends here
def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid=None):
# Get Dictionaries of Numbers via Global Variables
global dictChapters
global dictFigures
global dictEquations
global dictSections
global dictFootnotes
global dictPagelabels
global dictTables
global dictLists
global intObjectNumber
# Check what kind of Element we have and change the data
if isinstance(xmlElement.tag, str):
if xmlElement.tag == "EOAtranscripted":
xmlResult = etree.Element("temp")
xmlEOATranscription = etree.Element("EOAtranscription")
xmlEOATranscription.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlLeftheader = xmlElement.find(".//Leftheader")
etree.strip_tags(xmlLeftheader, "p")
xmlEOATranscription.append(xmlLeftheader)
xmlRightheader = xmlElement.find(".//Rightheader")
etree.strip_tags(xmlRightheader, "p")
xmlEOATranscription.append(xmlRightheader)
xmlTranscriptedtext = xmlElement.find(".//EOAtranscriptedtext")
# change \n\n into </p><p> and pagebreak intto </p><pagebreak><p> to create some valid markup
strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode")
#strTranscriptedtext = re.sub (r"\n\n", "</p><p>", str(strTranscriptedtext))
#strTranscriptedtext = re.sub (r"<p><pagebreak/></p>", "<pagebreak/>", strTranscriptedtext)
xmlLeftColumn = etree.Element("EOAtranscriptionleft")
xmlRightColumn = etree.Element("EOAtranscriptionright")
boolRightColumn = False
xmlTemp = etree.XML(str(strTranscriptedtext))
for xmlElement in xmlTemp.iterchildren():
if xmlElement.tag == "pagebreak":
boolRightColumn = True
continue
if boolRightColumn == False:
xmlLeftColumn.append(xmlElement)
if boolRightColumn == True:
xmlRightColumn.append(xmlElement)
xmlEOATranscription.append(xmlLeftColumn)
xmlEOATranscription.append(xmlRightColumn)
# Convert Images within the transcription
logging.debug("EOAfigurenonumber")
xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber")
logging.debug(xmlFigures)
if xmlFigures is not None:
for xmlFigure in xmlFigures:
strImageFileString = xmlFigure.find(".//file").text
strImageFileString = strImageFileString.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFileString)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFileString)
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName
listArguments = shlex.split(strCommand)
subprocess.check_output(listArguments, shell=False)
tmpStrTail = xmlFigure.tail
xmlFigure.clear()
xmlFigure.tag = "img"
xmlFigure.set("src", strImageFileDir + strImageFileName)
xmlFigure.set("alt", "")
xmlResult.append(xmlEOATranscription)
elif xmlElement.tag == "EOAletterhead":
xmlResult = etree.Element("temp")
xmlEOAletterhead = etree.Element("EOAletterhead")
xmlEOAletterrecipient = xmlElement.find(".//Recipient")
xmlEOAletterhead.append(xmlEOAletterrecipient)
xmlEOAletterarchive = xmlElement.find(".//Archive")
xmlEOAletterhead.append(xmlEOAletterarchive)
xmlEOAletteradditional = xmlElement.find(".//Additional")
xmlEOAletterhead.append(xmlEOAletteradditional)
xmlEOAletterpages = xmlElement.find(".//Pages")
xmlEOAletterhead.append(xmlEOAletterpages)
xmlEOAletterhead.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlEOAletterhead)
elif xmlElement.tag == "EOAfigurenonumber":
# elif xmlElement.findall(".//EOAfigurenonumber"):
xmlResult = etree.Element("temp")
# Create basic Element EOAfigurenonumber
xmlEOAfigure = etree.Element("EOAfigurenonumber")
# Copy Image
strImageFileString = xmlElement.find(".//file").text
strImageFileString = strImageFileString.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFileString)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFileString)
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;")
xmlEOAfigure.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlEOAfigure)
elif xmlElement.tag == "EOAfigure":
xmlResult = etree.Element("temp")
# Create basic Element EOAfigure
xmlEOAfigure = etree.Element("EOAfigure")
# Copy Image
if xmlElement.get("type") == "hionly":
logging.debug("Found hyperimage figure, continuing")
pass
else:
strImageFileString = xmlElement.find(".//file").text
strImageFileString = strImageFileString.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFileString)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFileString)
strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
logging.debug("Django figure %s." % strImageFileName)
# yellow
if os.path.splitext(strImageFileName)[1].lower() == ".pdf":
logging.debug("Found a PDF file")
strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH)
xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".png"))
logging.debug("The filename is %s" % xmlEOAfigure.get("file"))
else:
xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;")
xmlEOAfigure.set("order", str(intObjectNumber))
# Insert Caption
xmlEOAfigure.append(xmlElement.find(".//caption"))
xmlResult.append(xmlEOAfigure)
intObjectNumber += 1
# Insert visual Number and uid
strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
xmlEOAfigure.set("number", strFigureNumber)
strFigureUID = xmlElement.find(".//anchor").get("id")
xmlEOAfigure.set("id", strFigureUID)
elif xmlElement.findall(".//EOAtable"):
xmlResult = etree.Element("EOAtable")
xmlRawTable = xmlElement.find(".//table")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlRawTable)
# Copy Number, Label and Caption
if xmlElement.find(".//EOAtablecaption").text != "nonumber":
xmlResult.append(xmlElement.find(".//EOAtablecaption"))
xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text)
xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text])
xmlResult.set("id", xmlRawTable.get("id"))
else:
xmlElement.set("numbering", "false")
#if xmlElement.find(".//EOAtablelabel").text is not None:
# Transform width of Columns
strColumnString = xmlElement.find(".//EOAtablecolumns").text
strColumnString = re.sub(r"\|", "", strColumnString)
reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString)
intTableWidth = 0
listColumnAlignments = [None]
listColumnWidths = [None]
intNumberOfColumns = 0
for strColumnDefinition in reMatchObjects:
strColumnDefinition = strColumnDefinition.rstrip("cm")
strColumnAlignment = strColumnDefinition[0]
if strColumnAlignment == "L":
strColumnAlignment = "left"
if strColumnAlignment == "C":
strColumnAlignment = "center"
if strColumnAlignment == "R":
strColumnAlignment = "right"
listColumnAlignments.append(strColumnAlignment)
intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75)
listColumnWidths.append(intColumnWidth)
intTableWidth += intColumnWidth
intNumberOfColumns += 1
xmlRawTable.set("width", str(intTableWidth))
# Figure out and deal with the Header
xmlHeader = xmlRawTable.find(".//row/cell/tableheader")
if xmlHeader is not None:
xmlHeader.text = ""
xmlHeader.getparent().text = xmlHeader.tail
xmlHeader.getparent().remove(xmlHeader)
xmlFirstRow = xmlRawTable.find(".//row")
xmlFirstRow.tag = "tr"
xmlFirstRowCells = xmlFirstRow.findall(".//cell")
for xmlFirstRowCell in xmlFirstRowCells:
xmlFirstRowCell.tag = "th"
# Now Deal with the rest of the rows
xmlTableRows = xmlRawTable.findall(".//row")
for xmlTableRow in xmlTableRows:
xmlTableCells = xmlTableRow.findall(".//cell")
intCurrentColumn = 1
for xmlTableCell in xmlTableCells:
xmlTableCell.tag = "td"
xmlTableCell.set("align",listColumnAlignments[intCurrentColumn])
xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";")
# Deal with multicolumn
if xmlTableCell.get("cols") is not None:
xmlTableCell.set("colspan", xmlTableCell.get("cols"))
if intCurrentColumn > len(xmlTableCells):
intCurrentColumn = 1
# Deal with multicolumn again, increase intCurrentColumn by the columns being spanned
elif xmlTableCell.get("cols") is not None:
intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols"))
del xmlTableCell.attrib["cols"]
else:
intCurrentColumn += 1
xmlTableRow.tag = "tr"
xmlTableRow.set("valign", "top")
elif xmlElement.tag == "list" and xmlElement.get('type') != 'description':
xmlResult = etree.Element("temp")
if xmlElement.get('type') == 'ordered':
# Change first item into EOAlistfirstitem
xmlFirstItem = xmlElement.find("..//item")
xmlFirstItemElement = xmlFirstItem.getchildren()[0]
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber=xmlFirstItem.get("id-text"), uid=xmlFirstItem.get("id")))
# Process Child Elements which are Part of this item
if len(xmlFirstItem.getchildren()) >= 1:
for xmlChild in xmlFirstItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlFirstItem.getparent().remove(xmlFirstItem)
# Process remaining items in this list
tmpIntNumber = 2
for xmlItem in xmlElement.iterchildren():
xmlItemElement = xmlItem.getchildren()[0]
xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=xmlItem.get("id-text"), uid=xmlItem.get("id")))
tmpIntNumber += 1
if len(xmlItem.getchildren()) >= 1:
for xmlChild in xmlItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild, indent=True))
xmlItem.getparent().remove(xmlItem)
if xmlElement.get('type') == 'simple':
xml_first_child = xmlElement.getchildren()[0]
if xml_first_child.tag == 'item':
logging.debug("a simple list with no special items")
# Change first item into EOAlistfirstitem
xmlFirstItem = xmlElement.find("..//item")
xmlFirstItemElement = xmlFirstItem.getchildren()[0]
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-"))
# Process Child Elements which are Part of this item
if len(xmlFirstItem.getchildren()) >= 1:
logging.debug("len xmlFirstItem.getchildren is greater or equal 1")
for xmlChild in xmlFirstItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlFirstItem.getparent().remove(xmlFirstItem)
for xmlItem in xmlElement.iterchildren():
xmlItemElement = xmlItem.getchildren()[0]
xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
if len(xmlItem.getchildren()) >= 1:
for xmlChild in xmlItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlItem.getparent().remove(xmlItem)
#############
# Baustelle #
#############
elif xml_first_child.tag == 'label':
logging.debug("a simple list with named items")
# Change first item into EOAlistfirstitem
xmlFirstItem = xmlElement.find("..//item")
xmlFirstItemElement = xmlFirstItem.getchildren()[0]
logging.debug(xmlFirstItemElement.text)
# debugging
logging.debug(etree.tostring(xmlFirstItemElement))
# end of debugging
xml_first_label = xmlElement.find("..//label")
listnumber_text = xml_first_label.text
xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered custom", listnumber=listnumber_text))
logging.debug("The length of the children of the first item: %s." % len(xmlFirstItem.getchildren()))
# Process Child Elements which are Part of this item
if len(xmlFirstItem.getchildren()) >= 1:
logging.debug("len xmlFirstItem.getchildren is greater or equal 1")
for xmlChild in xmlFirstItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlFirstItem.getparent().remove(xmlFirstItem)
xml_first_label.getparent().remove(xml_first_label)
all_the_labels = xmlElement.findall("label")
all_the_items = xmlElement.findall("item")
logging.debug("itemlength %s." % len(all_the_items))
logging.debug("labellength %s." % len(all_the_labels))
for listlabel, listitem in zip(all_the_labels, all_the_items):
logging.debug("listitem text %s." % listitem.text)
logging.debug("listlabel text %s." % listlabel.text)
xml_item_element = listitem.getchildren()[0]
xmlResult.append(djangoParseObject(xml_item_element, indent=True, listnumber=listlabel.text))
listlabel.getparent().remove(listlabel)
listitem.getparent().remove(listitem)
# for xmlItem in xmlElement.iterchildren():
# print("So many items have we: ", len(xmlItem))
# xmlItemElement = xmlItem.getchildren()[0]
# xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
# if len(xmlItem.getchildren()) >= 1:
# for xmlChild in xmlItem.iterchildren():
# xmlResult.append(djangoParseObject(xmlChild,indent=True))
# xmlItem.getparent().remove(xmlItem)
##################
# Ende Baustelle #
##################
elif xmlElement.tag == "list" and xmlElement.get('type') == 'description':
logging.debug("A description")
xmlResult = etree.Element("temp")
while len(xmlElement.getchildren()) != 0:
xmlDescription = etree.Element("EOAdescription")
xmlDescription.set("order", str(intObjectNumber))
xmlLabel = xmlElement.getchildren()[0]
xmlItem = xmlElement.getchildren()[1]
if len(xmlItem.getchildren()) > 0:
xmlContent = xmlItem.getchildren()[0]
else:
xmlContent = etree.Element("p")
xmlLabel.tag = "description"
xmlDescription.append(xmlLabel)
xmlDescription.append(xmlContent)
xmlResult.append(xmlDescription)
intObjectNumber += 1
if len(xmlItem.getchildren()) > 0:
for xmlChild in xmlItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlItem.getparent().remove(xmlItem)
elif xmlElement.tag == "theorem":
xmlTheoremHead = xmlElement.find(".//head")
xmlTheoremText = xmlElement.find(".//p")
strTheoremNumber = xmlElement.get("id-text")
strTheoremID = xmlElement.get("id")
xmlResult = etree.Element("EOAtheorem")
xmlResult.append(xmlTheoremHead)
xmlResult.append(xmlTheoremText)
xmlResult.set("order", str(intObjectNumber))
xmlResult.set("number", strTheoremNumber)
xmlResult.set("uid", strTheoremID)
intObjectNumber += 1
elif xmlElement.findall(".//EOAequationarray"):
xmlResult = etree.Element("temp")
for xmlEquation in xmlElement.findall(".//EOAequation"):
xmlEOAequation = etree.Element("EOAequation")
xmlEOAequation.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlEOAequation.set("number", xmlEquation.get("number"))
xmlEOAequation.set("filename", xmlEquation.get("filename"))
if xmlEquation.get("label") is not None:
xmlEOAequation.set("label", xmlEquation.get("label"))
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
if xmlEquation.get("label") is not None:
xmlEOAequation.set("label", xmlEquation.get("label"))
xmlResult.append(xmlEOAequation)
elif xmlElement.findall(".//EOAequationarraynonumber"):
xmlResult = etree.Element("temp")
for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"):
xmlEOAequation = etree.Element("EOAequation")
xmlEOAequation.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlEOAequation.set("number", "")
xmlEOAequation.set("filename", xmlEquation.get("filename"))
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
xmlResult.append(xmlEOAequation)
elif xmlElement.tag == "EOAequationnonumber":
# Process one EOAequation which is not encapsulated
xmlResult = etree.Element("EOAequation")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.set("filename", xmlElement.get("filename"))
xmlResult.set("TeX", xmlElement.get("TeX"))
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlResult.set("number", "")
elif xmlElement.findall(".//EOAequation"):
# Process various Equations which may be encapsulated within <p>
xmlEquations = xmlElement.findall(".//EOAequation")
xmlResult = etree.Element("temp")
for xmlEquation in xmlEquations:
# Create basic Element EOAequation
xmlEOAequation = etree.Element("EOAequation")
xmlEOAequation.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlEOAequation.set("number", xmlEquation.get("number"))
xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
if xmlEquation.get("uid") is not None:
xmlEOAequation.set("uid", xmlEquation.get("uid"))
shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlEOAequation.set("filename", xmlEquation.get("filename"))
xmlResult.append(xmlEOAequation)
elif xmlElement.tag == "EOAequation":
# Process one EOAequation which is not encapsulated
xmlResult = etree.Element("EOAequation")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.set("number", xmlElement.get("number"))
xmlResult.set("TeX", xmlElement.get("TeX"))
if xmlElement.get("uid") is not None:
xmlResult.set("uid", xmlElement.get("uid"))
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlResult.set("filename", xmlElement.get("filename"))
elif xmlElement.tag == "div3":
xmlResult = etree.Element("EOAsubsection")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlElement.find("head"))
for xmlChild in xmlElement.iterchildren():
xmlResult.append(djangoParseObject(xmlChild))
elif xmlElement.tag == "div4":
xmlResult = etree.Element("EOAsubsubsection")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult.append(xmlElement.find("head"))
for xmlChild in xmlElement.iterchildren():
xmlResult.append(djangoParseObject(xmlChild))
elif xmlElement.tag == "EOAverse":
xmlResult = etree.Element("EOAparagraph")
xmlResult.set("style", "verse")
xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
xml_verselines = xmlElement.findall("p")
xmlResult.append(deepcopy(xml_verselines[0]))
for xml_verseline in xml_verselines[1:]:
linebreak = etree.Element("br")
xmlResult.append(linebreak)
copied_line = deepcopy(xml_verseline)
xmlResult.append(copied_line)
etree.strip_tags(xmlResult, "p")
elif xmlElement.tag == "EOAbox":
logging.debug("Found a box")
xmlResult = etree.Element("temp")
xmlResult.set("style", "box")
box_header = xmlElement.find("head")
box_header.tag = "EOAparagraph"
box_header.set("style", "box")
box_header.set("order", str(intObjectNumber))
head_contents = box_header.find("p")
head_contents.tag = "b"
# etree.strip_tags(box_header, "p")
xmlResult.append(box_header)
intObjectNumber += 1
# question: what to do about paragraph equivalent objects?
box_elements = xmlElement.getchildren()
logging.debug(len(box_elements))
for box_element in box_elements:
if box_element.tag == "p":
box_element.tag = "EOAparagraph"
box_element.set("style", "box")
box_element.set("order", str(intObjectNumber))
xmlResult.append(box_element)
intObjectNumber += 1
elif xmlElement.tag == "EOAtocentry":
# throw them out for the time being
xmlResult = etree.Element("temp")
elif xmlElement.tag == "pagebreak":
# throw them out for the time being
xmlResult = etree.Element("temp")
else:
xmlElement.tag = "EOAparagraph"
quoted_paragraph = xmlElement.get("rend")
if quoted_paragraph is not None and quoted_paragraph == "quoted":
xmlElement.set("rend", "quoted")
xmlElement.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlResult = xmlElement
else:
print("SPECIAL: %s - %s" % (xmlElement, xmlElement.text))
xmlResult = xmlElement
if indent==True:
xmlResult.set("indent", "True")
if listtype != None:
xmlResult.set("listtype", listtype)
if listnumber != 0:
xmlResult.set("listnumber", listnumber)
if uid != None:
xmlResult.set("id", uid)
return xmlResult
# def djangoParseObject ends here
def make_index(index_hits, index_type):
"""Make an index"""
dictIndex = {}
for xmlEOAindex in index_hits:
strMainEntry = xmlEOAindex.get("main")
str_display_entry = xmlEOAindex.get("display")
# If strMainEntry not in Index, then create new index element
if strMainEntry not in dictIndex:
dictIndex[strMainEntry] = {}
dictIndex[strMainEntry]["display_string"] = ""
dictIndex[strMainEntry]["listMainentries"] = []
dictIndex[strMainEntry]["dictSubentries"] = {}
# store the display string here.
if str_display_entry is not None:
dictIndex[strMainEntry]["display_string"] = str_display_entry
else:
dictIndex[strMainEntry]["display_string"] = strMainEntry
# if entry has no subentry then append it to listMainentries
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") == None:
dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex)
# if entry has subentry, proceed on the second level
if strMainEntry in dictIndex and xmlEOAindex.get("secondary") is not None:
# put the next line in anyway
# dictIndex[strMainEntry]["listMainentries"].append(xmlEOAindex)
strSubEntry = xmlEOAindex.get("secondary")
# if strSubEntry is not in dictSubentries, then create new list
if strSubEntry not in dictIndex[strMainEntry]["dictSubentries"]:
dictIndex[strMainEntry]["dictSubentries"][strSubEntry] = []
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
else:
dictIndex[strMainEntry]["dictSubentries"][strSubEntry].append(xmlEOAindex)
# Sort the main index
listSortedKeys = sorted(dictIndex.keys(), key=str.lower)
if index_type == "regular":
new_index_element = "EOAprintindex"
else:
new_index_element = "EOAprint%sindex" % index_type
# Create new and empty xmlTree for xmlEOAindex
xmlEOAprintindex = etree.Element(new_index_element)
xmlEOAindexsection = None
listFirstChars = []
for strSortedKey in listSortedKeys:
strFirstChar = strSortedKey[0].upper()
if strFirstChar not in listFirstChars:
logging.debug("Beginning a new letter: %s." % strFirstChar)
listFirstChars.append(strFirstChar)
if xmlEOAindexsection is not None:
xmlEOAprintindex.append(xmlEOAindexsection)
xmlEOAindexsection = etree.Element("EOAindexsection")
xmlEOAindexsection.set("Character", strFirstChar)
# beginning a new entry
xmlEOAindexentry = etree.Element("EOAindexentry")
xmlEOAindexentry.set("main", strSortedKey)
xmlEOAindexentry.set("display", dictIndex[strSortedKey]["display_string"])
for xmlMainelement in dictIndex[strSortedKey]["listMainentries"]:
print(xmlMainelement.get("chapterorder") + ":" + xmlMainelement.get("elementorder"))
xmlEOAindexlink = etree.Element("EOAindexlink")
xmlEOAindexlink.set("chapterorder", xmlMainelement.get("chapterorder"))
xmlEOAindexlink.set("elementorder", xmlMainelement.get("elementorder"))
if xmlMainelement.get("bold") is not None:
xmlEOAindexlink.set("bold", "True")
xmlEOAindexentry.append(xmlEOAindexlink)
# If there are any subentries, process them now
if len(dictIndex[strSortedKey]["dictSubentries"]) > 0:
logging.debug("Processing Subentries")
listSortedSubKeys = sorted(dictIndex[strSortedKey]["dictSubentries"])
for strSortedSubKey in listSortedSubKeys:
xmlEOAindexsubentry = etree.Element("EOAindexsubentry")
xmlEOAindexsubentry.set("secondary", strSortedSubKey)
for xmlSubElement in dictIndex[strSortedKey]["dictSubentries"][strSortedSubKey]:
strSubEntry = xmlSubElement.get("secondary")
# Hier noch die Links auf den Untereintrag einfügen
xmlEOAindexlink = etree.Element("EOAindexlink")
xmlEOAindexlink.set("chapterorder", xmlSubElement.get("chapterorder"))
xmlEOAindexlink.set("elementorder", xmlSubElement.get("elementorder"))
xmlEOAindexsubentry.append(xmlEOAindexlink)
if xmlSubElement.get("bold") is not None:
xmlEOAindexlink.set("bold", "True")
logging.debug(strSubEntry)
xmlEOAindexentry.append(xmlEOAindexsubentry)
xmlEOAindexsection.append(xmlEOAindexentry)
# if xmlEOAindexsection is not None:
xmlEOAprintindex.append(xmlEOAindexsection)
return(xmlEOAprintindex)
# def make_index ends here
def djangoParseHeadline(xmlElement):
# Parse EOAauthor and append it to the Chapter Information
xmlAuthors = xmlElement.find(".//EOAauthor")
if xmlAuthors is not None:
strAuthors = xmlAuthors.text
xmlElement.remove(xmlAuthors)
strAuthors = re.sub("(, and | and | und )", ",", strAuthors)
listAuthors = re.split("\,", strAuthors)
logging.debug(listAuthors)
if len(listAuthors) >= 1:
for i in range(len(listAuthors)):
xmlAuthor = etree.Element("EOAauthor")
# Remove Spaces before and after AuthorString
if listAuthors[i][0] == " ":
strAuthor = listAuthors[i][1:]
elif listAuthors[i].endswith(" "):
strAuthor = listAuthors[i][:-1]
else:
strAuthor = listAuthors[i]
xmlAuthor.text = strAuthor
xmlElement.append(xmlAuthor)
return xmlElement
# def djangoParseHeadline ends here
# Iterate over Chapters, Sections, Subsections, and Subsubsections and
# Put all on one level: EOAchapter
intChapterNumber = 1
listPartIDs = []
for xmlChapter in xmlChapters:
intObjectNumber = 1
# Process Chapter Title
xmlEOAchapter = etree.Element("EOAchapter")
xmlEOAchapter.set("type","regular")
xmlLanguage = xmlChapter.get("language")
if xmlLanguage is not None:
# KT changing this after separating the big script
strLanguage = xmlLanguage #or "english"
else:
strLanguage = "english"
xmlEOAchapter.set("language", strLanguage)
# xmlEOAchapter.set("language", xmlChapter.get("language"))
xmlEOAchapter.set("order", str(intChapterNumber))
if xmlChapter.get("rend") != "nonumber":
xmlEOAchapter.set("id", xmlChapter.get("id"))
xmlChapterHeadline = xmlChapter.find(".//head")
if xmlChapter.get("id") in dictChapters:
xmlEOAchapter.set("number", dictChapters[xmlChapter.get("id")])
else:
xmlEOAchapter.set("number", "")
print("-----------------------------------------------------")
print(gettext(xmlChapterHeadline))
xmlEOAchapter.append(djangoParseHeadline(xmlChapterHeadline))
# Deal with EOAauthor
if xmlChapter.find(".//EOAauthor") is not None:
xmlEOAchapter.append(xmlChapter.find(".//EOAauthor"))
# Attache enclosing Part to Chapter, see django structure for this purpose
if xmlChapter.getparent().tag == "div0":
if xmlChapter.getparent().get("id") not in listPartIDs:
listPartIDs.append(xmlChapter.getparent().get("id"))
xmlPartHeadline = xmlChapter.getparent().find("head")
xmlPartHeadline.tag = "EOAparthtml"
xmlEOAchapter.append(xmlPartHeadline)
# Append Chapter to xmlEOAdocument
xmlEOAdocument.append(xmlEOAchapter)
# iterate over children of Chapter
for xmlChapterChild in xmlChapter.iterchildren():
if xmlChapterChild.tag == "div2":
# Process Section Title
xmlEOAsection = etree.Element("EOAsection")
xmlEOAsection.set("order", str(intObjectNumber))
if xmlChapterChild.get("rend") != "nonumber":
xmlEOAsection.set("id", xmlChapterChild.get("id"))
xmlEOAsection.set("number", dictSections[xmlChapterChild.get("id")])
intObjectNumber += 1
xmlHead = xmlChapter.find(".//head")
logging.debug("Section '%s'" % gettext(xmlHead))
xmlEOAsection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAsection)
# Iterate over Children of Section
for xmlSectionChild in xmlChapterChild.iterchildren():
if xmlSectionChild.tag == "div3":
# Process Subsection Title
xmlEOAsubsection = etree.Element("EOAsubsection")
xmlEOAsubsection.set("order", str(intObjectNumber))
if xmlSectionChild.get("rend") != "nonumber":
xmlEOAsubsection.set("id", xmlSectionChild.get("id"))
xmlEOAsubsection.set("number", dictSections[xmlSectionChild.get("id")])
intObjectNumber += 1
xmlHead = xmlSectionChild.find(".//head")
logging.debug("Subsection '%s'" % gettext(xmlHead))
xmlEOAsubsection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAsubsection)
# Iterate over children of Subsection
for xmlSubsectionChild in xmlSectionChild.iterchildren():
if xmlSubsectionChild.tag == "div4":
# Process Subsubsection Title
xmlEOAsubsubsection = etree.Element("EOAsubsubsection")
xmlEOAsubsubsection.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlHead = xmlSubsectionChild.find(".//head")
logging.debug(gettext(xmlHead))
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAsubsubsection)
# Iterate over children of Subsubsection
for xmlSubsubsectionChild in xmlSubsectionChild.iterchildren():
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild))
else:
xmlEOAchapter.append(djangoParseObject(xmlSubsectionChild))
elif xmlSectionChild.tag == "div4":
# Process Subsubsection Title
xmlEOAsubsubsection = etree.Element("EOAsubsubsection")
xmlEOAsubsubsection.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlHead = xmlSectionChild.find(".//head")
xmlEOAsubsubsection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAsubsubsection)
# Iterate over children of Subsubsection
for xmlSubsubsectionChild in xmlSectionChild.iterchildren():
if xmlSubsubsectionChild.tag == "div5":
logging.debug("jubel")
# although it's div5, promote it to subsubsection
xmlEOAparasection = etree.Element("EOAsubsubsection")
# xmlEOAparasection = etree.Element("EOAparasection")
xmlEOAparasection.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlHead = xmlSubsubsectionChild.find(".//head")
logging.debug(gettext(xmlHead))
xmlEOAparasection.append(djangoParseHeadline(xmlHead))
xmlEOAchapter.append(xmlEOAparasection)
for xmlParasectionChild in xmlSubsubsectionChild.iterchildren():
xmlEOAchapter.append(djangoParseObject(xmlParasectionChild))
else:
xmlEOAchapter.append(djangoParseObject(xmlSubsubsectionChild))
else:
xmlEOAchapter.append(djangoParseObject(xmlSectionChild))
else:
xmlEOAchapter.append(djangoParseObject(xmlChapterChild))
intChapterNumber += 1
libeoaconvert.debug_xml_here(xmlTree, "afterchapter")
print("----------------------------------------------")
print("Processing Facsimile Parts")
listModes = ["text", "textPollux", "xml"]
strBasicURL = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql?document="
parserECHO = etree.XMLParser()
xmlParts = xmlTree.findall("//div0")
intFacNumber = 1
for xmlPart in xmlParts:
intObjectNumber = 1
intFacPartNumber = 1
if xmlPart.find(".//EOAfacsimilepart") is None:
continue
xmlEOAfacsimilepart = etree.Element("EOAfacsimilepart")
xmlEOAfacsimilepart.set("order", str(intChapterNumber))
xmlEOAfacsimileparthead = xmlPart.find(".//head")
for xmlChild in xmlEOAfacsimileparthead:
if xmlChild.tag == "hi":
xmlChild.tag = "em"
del xmlChild.attrib["rend"]
xmlEOAfacsimilepart.append(xmlEOAfacsimileparthead)
intChapterNumber += 1
xmlEOAdocument.append(xmlEOAfacsimilepart)
xmlFacsimilepages = xmlPart.findall(".//EOAfacsimilepage")
intFacPageNumber = 1
for xmlFacsimilepage in xmlFacsimilepages:
strImageFile = xmlFacsimilepage.find(".//file").text
strLabel = xmlFacsimilepage.find(".//label").text
strPagenumber = xmlFacsimilepage.find(".//pagenumber").text or ""
xmlEOAfacsimilepage = etree.Element("EOAfacsimilepage")
xmlEOAfacsimilepage.set("order", str(intObjectNumber))
# TODO: Hier noch irgendwie (fehlendem) Suffix der Datei umgehen. Und ggf. Dateien Konvertieren
strImageFile = strImageFile.rstrip("\n")
strImageFileDir = os.path.dirname(strImageFile)
strImageFileDir = re.sub("/", "", strImageFileDir)
strImageFileName = os.path.basename(strImageFile)
shutil.copy(os.getcwd() + "/" + strImageFile, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
intObjectNumber += 1
# Download transcription for this Page
if xmlFacsimilepage.find(".//fulltext").text is not None:
logging.debug("Found a link to full text.")
strFacsimileURL = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[0]
strFacsimilePage = re.split(",", xmlFacsimilepage.find(".//fulltext").text)[1]
for strMode in listModes:
strURL = strBasicURL + strFacsimileURL + "&pn=" + strFacsimilePage + "&mode=" + strMode
logging.debug("Processing Facsimile : " + strURL)
xmlECHOtree = etree.parse(strURL, parserECHO)
# Remove ECHO-namespaces
objectify.deannotate(xmlECHOtree, xsi_nil=True)
etree.cleanup_namespaces(xmlECHOtree)
xmlDivs = xmlECHOtree.findall(".//div")
for xmlDiv in xmlDivs:
if xmlDiv.get("class") == "pageContent":
# Create new EOA-Element
xmlEOAfacsimileelement = etree.Element("EOAfacsimileelement")
xmlEOAfacsimileelement.set("type", strMode)
# Fix Images in the <div>-Element
xmlImages = xmlDiv.findall(".//img")
intFacImgNumber = 1
for xmlImage in xmlImages:
strImageSrc = xmlImage.get("src")
strCommand = "curl " + strImageSrc + " -o CONVERT/django/images/facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg"
listArguments = shlex.split(strCommand)
try:
exeShell = subprocess.check_output(listArguments, shell=False, universal_newlines=True)
xmlImage.set("src", "facsupplements_" + str(intFacNumber) + "_" + str(intFacPageNumber) + "_" + str(intFacImgNumber) + ".jpg")
except:
xmlImage.tag = "temp"
intFacImgNumber += 1
# Change of scr of img-Element
xmlEOAfacsimileelement.append(xmlDiv)
xmlEOAfacsimilepage.append(xmlEOAfacsimileelement)
intFacPageNumber += 1
xmlEOAfacsimilepage.set("file", strImageFileDir + strImageFileName)
xmlEOAfacsimilepage.set("label", str(strLabel))
xmlEOAfacsimilepage.set("pagenumber", str(strPagenumber))
xmlEOAfacsimilepart.append(xmlEOAfacsimilepage)
intFacNumber =+ 1
etree.strip_tags(xmlDjangoTree, "temp")
print("----------------------------------------------")
print("Processing and linking Footnotes for django")
def bring_footnote_down_django(footnote, fragment, footnote_number, object_number, unique_id, destination):
"""
captures reusable behavior from the existing code
potentially, some of the old code could be replaced by calls to this helper
usage: intObjectNumber = bring_footnote_down_django(xmlFootnote, "fn"+str(intFootnoteNumber), str(intFootnoteNumber), intObjectNumber, tmpStrUID, xmlResult)
unfortunately, returning the result seemed like a better idea than mutating the global variable
"""
kids = list(footnote.getchildren())
footnote_text = footnote.text or ""
replace_footnote_with_sup(footnote)
footnote.set("class", "footnote")
anchor = etree.Element("a")
anchor.set("href", "#" + fragment) # "fn" + str(intFootnoteNumber)
anchor.text = footnote_number # str(intFootnoteNumber)
footnote.append(anchor)
foot = etree.Element("EOAfootnote")
foot.set("order", str(object_number))
object_number += 1
foot.set("number", footnote_number)
anchor_number = next(
iter(
(
parent.get("order")
for parent
in footnote.iterancestors()
if parent.get("order") is not None
)
)
)
foot.set("anchor", anchor_number)
foot.set("id", unique_id)
foot.text = footnote_text
for kid in kids:
if "EOAequationnonumber" == kid.tag:
cwd = os.getcwd()
shutil.copy(
"%s/items/%s" % (cwd, kid.get("filename")),
"%s/CONVERT/django/images/" % cwd,
)
foot.append(kid)
destination.append(foot)
return object_number
# def bring_footnote_down_django ends here
xmlEOAchapters = xmlEOAdocument.findall(".//EOAchapter")
debug_chapters(xmlEOAchapters)
for xmlEOAchapter in xmlEOAchapters:
groupings = libeoaconvert.get_bigfoot_data(xmlEOAchapter)
has_old = 0 != len(xmlEOAchapter.findall(".//note"))
has_new = 0 != len(
[ # flatten
note
for grouping, notes in groupings
for note in notes
]
)
# XOR falls through, AND is an error (that should have already been thrown during the epub phase), and NOR skips to the next chapter
if has_old:
if has_new:
raise FootnoteError("This chapter contains both old-style footnotes and new-style footnotes")
else:
if not has_new:
continue
# Find out running order of last item the chapter
# Hier pro FN zunächst die EOAequationnonumber in <p> korrigieren
# Dann pro FN die Kindelemente abarbeiten und an die neue FN dran hängen
# Ggf. aufpassen, ob ein Absatz mit indent versehen ist, dann blockquote drum herum machen
xmlElement = xmlEOAchapter[(len(xmlEOAchapter)-1)]
logging.debug(etree.tostring(xmlElement))
intObjectNumber = (int(xmlElement.get("order")) + 1)
intFootnoteNumber = 1
xmlResult = etree.Element("temp")
xmlEOAsection = etree.Element("EOAsection")
xmlEOAsection.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlHead = etree.Element("head")
xmlHead.text = libeoaconvert.dictLangFootnotes[libeoaconvert.two_letter_language(xmlEOAchapter.get("language"))]
xmlEOAsection.append(xmlHead)
xmlResult.append(xmlEOAsection)
for grouping, notes in groupings:
for index, note in enumerate(notes):
# do for the new-style notes what the old code did for the other footnotes
fntext = str(index+1)
if "lower-latin" == grouping:
fntext = alph_footnote_index(index)
unique_id = "fn%s" % fntext
intObjectNumber = bring_footnote_down_django(note, unique_id, fntext, intObjectNumber, unique_id, xmlResult)
intFootnoteNumber = 1
xmlFootnotes = xmlEOAchapter.findall(".//note")
for xmlFootnote in xmlFootnotes:
xmlFootnoteContent = xmlFootnote.getchildren()
strFootnoteText = xmlFootnote.text or ""
tmpTail = xmlFootnote.tail
tmpStrUID = xmlFootnote.get("id")
xmlFootnote.clear()
xmlFootnote.tail = tmpTail
xmlFootnote.tag = "sup"
xmlFootnote.set("class", "footnote")
xmlFootnoteLink = etree.Element("a")
xmlFootnoteLink.set("href", "#fn" + str(intFootnoteNumber))
xmlFootnoteLink.text = str(intFootnoteNumber)
xmlFootnote.append(xmlFootnoteLink)
xmlEOAfootnote = etree.Element("EOAfootnote")
xmlEOAfootnote.set("order", str(intObjectNumber))
intObjectNumber += 1
xmlEOAfootnote.set("number", str(intFootnoteNumber))
for xmlParent in xmlFootnote.iterancestors():
if xmlParent.get("order") is not None:
strFootnoteAnchorNumber = xmlParent.get("order")
break
xmlEOAfootnote.set("anchor", strFootnoteAnchorNumber)
xmlEOAfootnote.set("id", tmpStrUID)
xmlEOAfootnote.text = strFootnoteText
for xmlElement in xmlFootnoteContent:
if xmlElement.tag == "EOAequationnonumber":
shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
xmlEOAfootnote.append(xmlElement)
xmlResult.append(xmlEOAfootnote)
intFootnoteNumber += 1
xmlEOAchapter.append(xmlResult)
# Remove temp-Tag
etree.strip_tags(xmlDjangoTree, "temp")
# print("----------------------------------------------")
# print("Processing Verses")
# for xmlEOAchapter in xmlEOAchapters:
# verses = xmlEOAchapter.findall(".//EOAverse")
# print("Found lotsa verses: ", len(verses))
print("----------------------------------------------")
print("Processing various Elements")
for xmlEOAchapter in xmlEOAchapters:
xmlEmphasized = xmlEOAchapter.findall(".//hi")
for xmlEmph in xmlEmphasized:
if xmlEmph.get("rend") == "it":
xmlEmph.tag = "em"
del xmlEmph.attrib["rend"]
xmlHyperlinks = xmlEOAchapter.findall(".//xref")
for xmlHyperlink in xmlHyperlinks:
strURL = xmlHyperlink.get('url')
if strURL.startswith("http://") == False:
if strURL.startswith("https://") == False:
strURL = "http://" + strURL
xmlHyperlink.tag = "a"
del xmlHyperlink.attrib["url"]
xmlHyperlink.set("href", strURL)
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak'])
xmlHyperlink.text = strURL
# Convert bold text
xmlBolds = xmlEOAchapter.findall(".//hi")
for xmlBold in xmlBolds:
if xmlBold.get("rend") == "bold":
xmlBold.tag = "b"
del xmlBold.attrib["rend"]
# Convert EOAup to <sup>
xmlUps = xmlEOAchapter.findall(".//EOAup")
for xmlUp in xmlUps:
xmlUp.tag = "sup"
# Convert EOAdown to <sub>
xmlDowns = xmlEOAchapter.findall(".//EOAdown")
for xmlDown in xmlDowns:
xmlDown.tag = "sub"
# Convert EOAst to <span>
xmlStrikeouts = xmlEOAchapter.findall(".//EOAst")
for xmlStrikeout in xmlStrikeouts:
xmlStrikeout.tag = "span"
xmlStrikeout.set("style", "text-decoration: line-through;")
# Convert letter-spacing into something nice
xmlLetterspaceds = xmlEOAchapter.findall(".//EOAls")
for xmlLetterspaced in xmlLetterspaceds:
xmlLetterspaced.tag = "span"
xmlLetterspaced.set("style", "letter-spacing: 0.5em;")
# Convert letter-spacing into something nice
xmlCaps = xmlEOAchapter.findall(".//EOAcaps")
for xmlCap in xmlCaps:
xmlCap.tag = "span"
xmlCap.set("style", "font-variant:small-caps;")
# Convert EOAineq into appropriate IMG-Tags
xmlInlineEquations = xmlEOAchapter.findall(".//EOAineq")
for xmlInlineEquation in xmlInlineEquations:
xmlInlineEquation.tag = "img"
xmlInlineEquation.set("class", "EOAineq")
xmlInlineEquation.set("alt", xmlInlineEquation.get("TeX"))
shutil.copy(os.getcwd() + "/items/" + xmlInlineEquation.get("src"), os.getcwd() + "/CONVERT/django/images/" + xmlInlineEquation.get("src"))
# Convert EOAchem into appropriate IMG-Tags
xml_inline_chems = xmlEOAchapter.findall(".//EOAchem")
for xml_inline_chem in xml_inline_chems:
xml_inline_chem.tag = "img"
xml_inline_chem.set("class", "EOAineq")
xml_inline_chem.set("alt", xml_inline_chem.get("TeX"))
shutil.copy(os.getcwd() + "/items/" + xml_inline_chem.get("src"), os.getcwd() + "/CONVERT/django/images/" + xml_inline_chem.get("src"))
# Convert EOAinline into appropriate IMG-Tags
xmlInlineElements = xmlEOAchapter.findall(".//EOAinline")
for xmlInlineElement in xmlInlineElements:
xmlInlineElement.tag = "img"
xmlInlineElement.set("class", "EOAinline")
xmlInlineElement.set("alt", "")
xmlInlineElement.set("class", "eoainlineimage")
strInlineElementFilePath = xmlInlineElement.text
strInlineElementFileName = os.path.basename(strInlineElementFilePath)
strInlineElementDirName = os.path.dirname(strInlineElementFilePath)
xmlInlineElement.text = None
xmlInlineElement.set("src", strInlineElementDirName + strInlineElementFileName)
shutil.copy(os.getcwd() + "/" + strInlineElementDirName + "/" + strInlineElementFileName, os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName)
strNewImagePath = os.getcwd() + "/CONVERT/django/images/embedded/" + strInlineElementDirName + strInlineElementFileName
strCommand = GM_PATH + " convert " + strNewImagePath + " -resize 20x20 " + strNewImagePath
listArguments = shlex.split(strCommand)
subprocess.check_output(listArguments, shell=False)
# Change EOAcitenumeric into a span to create approriate link
xmlEOAcitenumerics = xmlEOAchapter.findall(".//EOAcitenumeric")
for xmlEOAcitenumeric in xmlEOAcitenumerics:
xmlEOAcitenumeric.tag = "span"
xmlEOAcitenumeric.set("class", "citation")
xmlEOAcitenumeric.set("rel", "popover")
# Change EOAciteauthoryear into a span to create approriate link
xmlEOAciteauthoryears = xmlEOAchapter.findall(".//EOAciteauthoryear")
for xmlEOAciteauthoryear in xmlEOAciteauthoryears:
xmlEOAciteauthoryear.tag = "span"
xmlEOAciteauthoryear.set("class", "citation")
xmlEOAciteauthoryear.set("rel", "popover")
# Change EOAciteauthoryear into a span to create approriate link
xmlEOAciteyears = xmlEOAchapter.findall(".//EOAciteyear")
for xmlEOAciteyear in xmlEOAciteyears:
xmlEOAciteyear.tag = "span"
xmlEOAciteyear.set("class", "citation")
xmlEOAciteyear.set("rel", "popover")
# Change EOAciteauthoryear into a span to create approriate link
xmlEOAcitemanuals = xmlEOAchapter.findall(".//EOAcitemanual")
for xmlEOAcitemanual in xmlEOAcitemanuals:
xmlEOAcitemanual.tag = "span"
xmlEOAcitemanual.set("class", "citation")
xmlEOAcitemanual.set("rel", "popover")
print("----------------------------------------------")
print("Processing Cross References")
# Substitute References with their targets (wit links)
for xmlEOAchapter in xmlEOAchapters:
xmlReferences = xmlEOAchapter.findall(".//EOAref")
for xmlReference in xmlReferences:
strResult = "!!! Cross Reference !!!"
strChapterOrder = ""
strObjectOrder = ""
xmlReferenceLabel = xmlReference.find("Label")
xmlReferenceLabelText = xmlReferenceLabel.text
xmlReferenceRef = xmlReference.find("ref")
xmlReferenceRefTarget = xmlReferenceRef.get("target")
if xmlReferenceLabelText in dictEquations:
# Grab Number from Dictionary
strResult = dictEquations[xmlReferenceLabelText]
# Go through all equations and find the corresponding Equation
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation")
for xmlEOAequation in xmlEOAequations:
tmpReferenceLabelText = xmlEOAequation.get("label")
if xmlReferenceLabelText == tmpReferenceLabelText:
logging.debug("Successfully found link to array formula: %s" % strResult)
for xmlParent in xmlEOAequation.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAequation.get("order")
if xmlReferenceRefTarget in dictEquations:
# Grab Number from Dictionary
strResult = dictEquations[xmlReferenceRefTarget]
# Go through all equations and find the corresponding Equation
xmlEOAequations = xmlEOAdocument.findall(".//EOAequation")
for xmlEOAequation in xmlEOAequations:
tmpReferenceRefTarget = xmlEOAequation.get("uid")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
logging.debug("Successfully found link to normal formula: %s" % strResult)
for xmlParent in xmlEOAequation.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAequation.get("order")
if xmlReferenceRefTarget in dictLists:
logging.debug("Found link to list.")
strResult = dictLists[xmlReferenceRefTarget]
xmlEOAlistitem = xmlEOAdocument.xpath("//EOAchapter/*[contains(@id, $targetuid)]", targetuid = xmlReferenceRefTarget)[0]
for xmlParent in xmlEOAlistitem.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAlistitem.get("order")
if xmlReferenceRefTarget in dictChapters:
logging.debug("Found link to chapter.")
strResult = dictChapters[xmlReferenceRefTarget]
for xmlEOAchapter in xmlEOAdocument.findall(".//EOAchapter"):
if xmlEOAchapter.get("id") == xmlReferenceRefTarget:
logging.debug("Successfully handled link to a chapter: %s" % strResult)
strObjectOrder = "top"
strChapterOrder = xmlEOAchapter.get("order")
if xmlReferenceRefTarget in dictTheorems:
logging.debug("Found link to ein Theorem")
strResult = dictTheorems[xmlReferenceRefTarget]
for xmlEOAtheorem in xmlEOAdocument.findall(".//EOAtheorem"):
if xmlEOAtheorem.get("uid") == xmlReferenceRefTarget:
logging.debug("Successfully handled link to a theorem: %s " % strResult)
for xmlParent in xmlEOAtheorem.iterancestors():
if xmlParent.tag == "EOAchapter":
strObjectOrder = xmlEOAtheorem.get("order")
strChapterOrder = xmlParent.get("order")
if xmlReferenceRefTarget in dictSections:
logging.debug("Found link to section")
strResult = dictSections[xmlReferenceRefTarget]
xmlEOAsections = xmlEOAdocument.findall(".//EOAsection")
for xmlEOAsection in xmlEOAsections:
tmpReferenceRefTarget = xmlEOAsection.get("id")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
logging.debug("Successfully handled link to section: %s " % strResult)
for xmlParent in xmlEOAsection.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAsection.get("order")
xmlEOAsubsections = xmlEOAdocument.findall(".//EOAsubsection")
for xmlEOAsubsection in xmlEOAsubsections:
tmpReferenceRefTarget = xmlEOAsubsection.get("id")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
logging.debug("Successfully handled link to subsection %s: " % strResult)
for xmlParent in xmlEOAsubsection.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAsubsection.get("order")
if xmlReferenceRefTarget in dictFigures:
logging.debug("Found link to figure")
strResult = dictFigures[xmlReferenceRefTarget]
xmlEOAfigures = xmlEOAdocument.findall(".//EOAfigure")
for xmlEOAfigure in xmlEOAfigures:
tmpReferenceRefTarget = xmlEOAfigure.get("id")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
logging.debug("Successfully handled link to figure: %s" % strResult)
for xmlParent in xmlEOAfigure.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAfigure.get("order")
if xmlReferenceRefTarget in dictFootnotes:
logging.debug("Found link to footnote")
strResult = dictFootnotes[xmlReferenceRefTarget]
xmlEOAfootnotes = xmlEOAdocument.findall(".//EOAfootnote")
for xmlEOAfootnote in xmlEOAfootnotes:
tmpReferenceRefTarget = xmlEOAfootnote.get("id")
if xmlReferenceRefTarget == tmpReferenceRefTarget:
logging.debug("Successfully handled link to footnote: %s" % strResult)
for xmlParent in xmlEOAfootnote.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAfootnote.get("order")
if xmlReferenceLabelText in dictTables:
logging.debug("Found link to table")
strResult = dictTables[xmlReferenceLabelText]
xmlEOAtables = xmlEOAdocument.findall(".//EOAtable")
for xmlEOAtable in xmlEOAtables:
tmpReferenceRefTarget = xmlEOAtable.get("label")
if xmlReferenceLabelText == tmpReferenceRefTarget:
logging.debug("Successfully handled link to table: %s" % strResult)
for xmlParent in xmlEOAtable.iterancestors():
if xmlParent.tag == "EOAchapter":
strChapterOrder = xmlParent.get("order")
strObjectOrder = xmlEOAtable.get("order")
tmpTail = xmlReference.tail or ""
xmlReference.clear()
xmlReference.text = strResult
xmlReference.tail = tmpTail
xmlReference.tag = "a"
xmlReference.set("href", "../" + strChapterOrder + "/index.html#" + strObjectOrder)
print("----------------------------------------------")
print("Processing Page References")
for xmlEOAchapter in xmlEOAchapters:
xmlPageReferences = xmlEOAchapter.findall(".//EOApageref")
strResult = "!!! Page Reference !!!"
for xmlReference in xmlPageReferences:
xmlReferenceLabel = xmlReference.find("Label")
xmlReferenceLabelText = xmlReferenceLabel.text
xmlReferenceRef = xmlReference.find("ref")
xmlReferenceRefTarget = xmlReferenceRef.get("target")
if xmlReferenceLabelText in dictPagelabels:
logging.debug("Found link to page: %s" % xmlReferenceLabelText)
strResult = dictPagelabels[xmlReferenceLabelText]
xmlReference.text = strResult
for xmlChild in xmlReference.iterchildren():
xmlReference.remove(xmlChild)
# Check, if EOApageref points to a Facsimile-Page
# If yes, make a href to the facsimile
xmlEOAfacsimilepages = xmlEOAdocument.findall(".//EOAfacsimilepage")
for xmlEOAfacsimilepage in xmlEOAfacsimilepages:
if xmlEOAfacsimilepage.get("label") == xmlReferenceLabelText:
logging.debug("Found cross reference to facsimile.")
xmlReference.tag = "a"
strPartOrder = xmlEOAfacsimilepage.getparent().get("order")
strFacsimileOrder = xmlEOAfacsimilepage.get("order")
logging.debug(strFacsimileOrder)
xmlReference.set("href", "../" + strPartOrder + "/" + strFacsimileOrder + ".html")
print("----------------------------------------------")
print("Normalizing Index Entries")
for xmlEOAchapter in xmlEOAchapters:
xml_EOA_indices = xmlEOAchapter.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
for xmlEOAindex in xml_EOA_indices:
# Using the gettext function here, because of subelements
# strEOAindextext = xmlEOAindex.text
strEOAindextext = gettext(xmlEOAindex)
strEOAindextext = strEOAindextext.replace("\n", " ")
index_children = xmlEOAindex.getchildren()
if index_children is not None:
for sub_element in index_children:
xmlEOAindex.remove(sub_element)
xmlEOAindex.text = None
listFirstPart = re.split('\|', strEOAindextext)
tmpEntry = listFirstPart[0]
listSecondPart = re.split('\!', tmpEntry)
strMainEntry = listSecondPart[0]
# Check if a sortkey is present via @
listSortKey = re.split('@', strMainEntry)
if len(listSortKey) == 2:
xmlEOAindex.set("main", listSortKey[0])
xmlEOAindex.set("display", listSortKey[1])
else:
xmlEOAindex.set("main", strMainEntry)
if len(listSecondPart) > 1:
strSecondPart = listSecondPart[1]
listSecondarySortkey = re.split('@', strSecondPart)
if len(listSecondarySortkey) == 2:
xmlEOAindex.set("secondary", listSecondarySortkey[0])
xmlEOAindex.set("secondarydisplay", listSecondarySortkey[1])
else:
xmlEOAindex.set("secondary", strSecondPart)
if len(listFirstPart) > 1:
strAddition = listFirstPart[1]
if strAddition == "textbf":
xmlEOAindex.set("bold", "true")
tmpseealso = re.match('seealso', strAddition)
if tmpseealso != None:
tmpAddition = re.sub('seealso', '', strAddition)
xmlEOAindex.set("seealso", tmpAddition)
# Entries containing seealso are omitted for the time being
xmlEOAindex.tag = "temp"
tmpsee = re.match('^see(?!also)', strAddition)
if tmpsee != None:
tmpAddition = re.sub('see', '', strAddition)
xmlEOAindex.set("see", tmpAddition)
# Entries containing seealso are omitted for the time being
xmlEOAindex.tag = "temp"
# Figure out parent chapter number and parent Element order
for xmlParent in xmlEOAindex.iterancestors():
if xmlParent.get("order") != None and xmlParent.tag != "EOAchapter":
xmlEOAindex.set("elementorder", xmlParent.get("order"))
if xmlParent.get("order") != None and xmlParent.tag == "EOAchapter":
xmlEOAindex.set("chapterorder", xmlParent.get("order"))
# print(etree.tostring(xmlEOAindex))
etree.strip_tags(xmlDjangoTree, "temp")
print("----------------------------------------------")
print("Removing Duplicate Index Entries")
for xmlEOAchapter in xmlEOAchapters:
for xmlChild in xmlEOAchapter.iterchildren():
dictEntries = {}
xml_EOA_indices = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
for xmlEOAindex in xml_EOA_indices:
listEntry = []
strEntry = xmlEOAindex.get("main")
if strEntry in dictEntries:
strSubentry = xmlEOAindex.get("secondary")
if strSubentry in dictEntries[strEntry] or strSubentry == None:
if (xmlChild.get("see") is None) and (xmlChild.get("seealso") is None):
xmlEOAindex.tag = "temp"
else:
dictEntries[strEntry].append(strSubentry)
else:
dictEntries[strEntry] = listEntry
print("----------------------------------------------")
print("Removing Index Entries in Footnotes")
for xmlEOAchapter in xmlEOAchapters:
for xmlChild in xmlEOAchapter.iterchildren():
dictEntries = {}
xml_EOA_indices = xmlChild.xpath(".//EOAindex | .//EOAindexperson | .//EOAindexlocation")
for xmlEOAindex in xml_EOA_indices:
for xmlParent in xmlEOAindex.iterancestors():
if xmlParent.tag == "EOAfootnote":
xmlEOAindex.tag = "temp"
logging.debug("Found index in footnote")
print("----------------------------------------------")
print("Sorting and Creating Regular Index")
xml_regular_EOAindices = xmlDjangoTree.findall("//EOAindex")
if len(xml_regular_EOAindices) != 0:# is not None:
logging.debug("Sorting %s entries for regular index." % str(len(xml_regular_EOAindices)))
xml_eoa_print_regular_index = make_index(xml_regular_EOAindices, index_type = "regular")
libeoaconvert.debug_xml_here(xmlDjangoTree, "djangotree")
libeoaconvert.debug_xml_here(xmlEOAdocument, "xmleoadocument")
libeoaconvert.debug_xml_here(xmlTree, "xmltree")
# If EOAprintindex is found, append xml_eoa_print_regular_index to xmlEOAdocument
xmlPrintindex = xmlTree.find(".//EOAprintindex")
if xmlPrintindex is not None != 0:
# Remove <p><EOAprintindex/></p> from xmlDjangoTree
print("found an index")
xmlPrintindex.tag = "temp"
xmlPrintindex.getparent().tag = "temp"
xmlEOAdocument.append(xml_eoa_print_regular_index)
else:
print("found no index")
print("----------------------------------------------")
print("Sorting and Creating Person Index")
xml_person_EOAindices = xmlDjangoTree.findall("//EOAindexperson")
if len(xml_person_EOAindices) != 0:# is not None:
xml_eoa_print_person_index = make_index(xml_person_EOAindices, index_type = "person")
# If EOAprintpersonindex is found, append xml_eoa_print_person_index to xmlEOAdocument
# xmlPrintindex = xmlDjangoTree.find(".//EOAprintpersonindex")
xmlPrintindex = xmlTree.find(".//EOAprintpersonindex")
if xmlPrintindex is not None != 0:
# Remove <p><EOAprintindex/></p> from xmlDjangoTree
xmlPrintindex.tag = "temp"
xmlPrintindex.getparent().tag = "temp"
xmlEOAdocument.append(xml_eoa_print_person_index)
# doing the same for location index
print("----------------------------------------------")
print("Sorting and Creating Location Index")
xml_location_EOAindices = xmlDjangoTree.findall("//EOAindexlocation")
if len(xml_location_EOAindices) != 0:# is not None:
xml_eoa_print_location_index = make_index(xml_location_EOAindices, index_type = "location")
# If EOAprintlocationindex is found, append xml_eoa_print_location_index to xmlEOAdocument
xmlPrintindex = xmlTree.find(".//EOAprintlocationindex")
if xmlPrintindex is not None != 0:
xmlPrintindex.tag = "temp"
xmlPrintindex.getparent().tag = "temp"
xmlEOAdocument.append(xml_eoa_print_location_index)
############################################################################
# Cleaning up #
############################################################################
# TODO: Die unnötigen Attribute wie id löschen
# TODO: Die unnötigen Tags wie EOAlabel löschen
etree.strip_tags(xmlDjangoTree, "temp", "citetext", "EOAprintbibliography")
etree.strip_elements(xmlDjangoTree, "citekey", with_tail=False)
etree.strip_attributes(xmlDjangoTree, "id-text", "id", "noindent", "type", "label", "spacebefore")#, "rend")
############################################################################
# Save xmlDjangoTree #
############################################################################
tmpFile = open("CONVERT/django/Django.xml", "w")
tmpResult = etree.tostring(xmlDjangoTree, pretty_print=True, encoding="unicode")
tmpFile.write(tmpResult)
tmpFile.close()
logging.debug("Wrote Django.xml")