to create some valid markup
- strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode")
- #strTranscriptedtext = re.sub (r"\n\n", "
to create some valid markup
+ strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode")
+ #strTranscriptedtext = re.sub (r"\n\n", "
", str(strTranscriptedtext))
+ #strTranscriptedtext = re.sub (r"
", "", strTranscriptedtext)
+ xmlLeftColumn = etree.Element("EOAtranscriptionleft")
+ xmlRightColumn = etree.Element("EOAtranscriptionright")
+ boolRightColumn = False
+ xmlTemp = etree.XML(str(strTranscriptedtext))
+ for xmlElement in xmlTemp.iterchildren():
+ if xmlElement.tag == "pagebreak":
+ boolRightColumn = True
+ continue
+ if boolRightColumn == False:
+ xmlLeftColumn.append(xmlElement)
+ if boolRightColumn == True:
+ xmlRightColumn.append(xmlElement)
+ xmlEOATranscription.append(xmlLeftColumn)
+ xmlEOATranscription.append(xmlRightColumn)
+ # Convert Images within the transcription
+ logging.debug("EOAfigurenonumber")
+ xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber")
+ logging.debug(xmlFigures)
+ if xmlFigures is not None:
+ for xmlFigure in xmlFigures:
+ strImageFileString = xmlFigure.find(".//file").text
+ strImageFileString = strImageFileString.rstrip("\n")
+ strImageFileDir = os.path.dirname(strImageFileString)
+ strImageFileDir = re.sub("/", "", strImageFileDir)
+ strImageFileName = os.path.basename(strImageFileString)
+ strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
+ strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName
+ listArguments = shlex.split(strCommand)
+ subprocess.check_output(listArguments, shell=False)
+ tmpStrTail = xmlFigure.tail
+ xmlFigure.clear()
+ xmlFigure.tag = "img"
+ xmlFigure.set("src", strImageFileDir + strImageFileName)
+ xmlFigure.set("alt", "")
+ xmlResult.append(xmlEOATranscription)
+ elif xmlElement.tag == "EOAletterhead":
+ xmlResult = etree.Element("temp")
+ xmlEOAletterhead = etree.Element("EOAletterhead")
+ xmlEOAletterrecipient = xmlElement.find(".//Recipient")
+ xmlEOAletterhead.append(xmlEOAletterrecipient)
+ xmlEOAletterarchive = xmlElement.find(".//Archive")
+ xmlEOAletterhead.append(xmlEOAletterarchive)
+ xmlEOAletteradditional = xmlElement.find(".//Additional")
+ xmlEOAletterhead.append(xmlEOAletteradditional)
+ xmlEOAletterpages = xmlElement.find(".//Pages")
+ xmlEOAletterhead.append(xmlEOAletterpages)
+ xmlEOAletterhead.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ xmlResult.append(xmlEOAletterhead)
+
+ elif xmlElement.tag == "EOAfigurenonumber":
+ # elif xmlElement.findall(".//EOAfigurenonumber"):
+ xmlResult = etree.Element("temp")
+ # Create basic Element EOAfigurenonumber
+ xmlEOAfigure = etree.Element("EOAfigurenonumber")
+ # Copy Image
+ strImageFileString = xmlElement.find(".//file").text
+ strImageFileString = strImageFileString.rstrip("\n")
+ strImageFileDir = os.path.dirname(strImageFileString)
+ strImageFileDir = re.sub("/", "", strImageFileDir)
+ strImageFileName = os.path.basename(strImageFileString)
+ strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
+ shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
- xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;")
- xmlEOAfigure.set("order", str(intObjectNumber))
- intObjectNumber += 1
- # Insert visual Number and uid
- strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
- xmlEOAfigure.set("number", strFigureNumber)
- strFigureUID = xmlElement.find(".//anchor").get("id")
- xmlEOAfigure.set("id", strFigureUID)
- # Insert Caption
- xmlEOAfigure.append(xmlElement.find(".//caption"))
- xmlResult.append(xmlEOAfigure)
- elif xmlElement.findall(".//EOAtable"):
- xmlResult = etree.Element("EOAtable")
- xmlRawTable = xmlElement.find(".//table")
- xmlResult.set("order", str(intObjectNumber))
- intObjectNumber += 1
- xmlResult.append(xmlRawTable)
- # Copy Number, Label and Caption
- if xmlElement.find(".//EOAtablecaption").text != "nonumber":
- xmlResult.append(xmlElement.find(".//EOAtablecaption"))
- xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text)
- xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text])
- xmlResult.set("id", xmlRawTable.get("id"))
- else:
- xmlElement.set("numbering", "false")
- #if xmlElement.find(".//EOAtablelabel").text is not None:
- # Transform width of Columns
- strColumnString = xmlElement.find(".//EOAtablecolumns").text
- strColumnString = re.sub(r"\|", "", strColumnString)
- reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString)
- intTableWidth = 0
- listColumnAlignments = [None]
- listColumnWidths = [None]
- intNumberOfColumns = 0
- for strColumnDefinition in reMatchObjects:
- strColumnDefinition = strColumnDefinition.rstrip("cm")
- strColumnAlignment = strColumnDefinition[0]
- if strColumnAlignment == "L":
- strColumnAlignment = "left"
- if strColumnAlignment == "C":
- strColumnAlignment = "center"
- if strColumnAlignment == "R":
- strColumnAlignment = "right"
- listColumnAlignments.append(strColumnAlignment)
- intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75)
- listColumnWidths.append(intColumnWidth)
- intTableWidth += intColumnWidth
- intNumberOfColumns += 1
- xmlRawTable.set("width", str(intTableWidth))
- # Figure out and deal with the Header
- xmlHeader = xmlRawTable.find(".//row/cell/tableheader")
- if xmlHeader is not None:
- xmlHeader.text = ""
- xmlHeader.getparent().text = xmlHeader.tail
- xmlHeader.getparent().remove(xmlHeader)
- xmlFirstRow = xmlRawTable.find(".//row")
- xmlFirstRow.tag = "tr"
- xmlFirstRowCells = xmlFirstRow.findall(".//cell")
- for xmlFirstRowCell in xmlFirstRowCells:
- xmlFirstRowCell.tag = "th"
- # Now Deal with the rest of the rows
- xmlTableRows = xmlRawTable.findall(".//row")
- for xmlTableRow in xmlTableRows:
- xmlTableCells = xmlTableRow.findall(".//cell")
- intCurrentColumn = 1
- for xmlTableCell in xmlTableCells:
- xmlTableCell.tag = "td"
- xmlTableCell.set("align",listColumnAlignments[intCurrentColumn])
- xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";")
- # Deal with multicolumn
- if xmlTableCell.get("cols") is not None:
- xmlTableCell.set("colspan", xmlTableCell.get("cols"))
- if intCurrentColumn > len(xmlTableCells):
- intCurrentColumn = 1
- # Deal with multicolumn again, increase intCurrentColumn by the columns being spanned
- elif xmlTableCell.get("cols") is not None:
- intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols"))
- del xmlTableCell.attrib["cols"]
- else:
- intCurrentColumn += 1
- xmlTableRow.tag = "tr"
- xmlTableRow.set("valign", "top")
- elif xmlElement.tag == "list" and xmlElement.get('type') != 'description':
- xmlResult = etree.Element("temp")
- if xmlElement.get('type') == 'ordered':
-
- # Change first item into EOAlistfirstitem
- xmlFirstItem = xmlElement.find("..//item")
- xmlFirstItemElement = xmlFirstItem.getchildren()[0]
-
- xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber=xmlFirstItem.get("id-text"), uid=xmlFirstItem.get("id")))
- # Process Child Elements which are Part of this item
- if len(xmlFirstItem.getchildren()) >= 1:
- for xmlChild in xmlFirstItem.iterchildren():
- xmlResult.append(djangoParseObject(xmlChild,indent=True))
- xmlFirstItem.getparent().remove(xmlFirstItem)
- # Process remaining items in this list
- tmpIntNumber = 2
- for xmlItem in xmlElement.iterchildren():
- xmlItemElement = xmlItem.getchildren()[0]
- xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=xmlItem.get("id-text"), uid=xmlItem.get("id")))
- tmpIntNumber += 1
- if len(xmlItem.getchildren()) >= 1:
- for xmlChild in xmlItem.iterchildren():
- xmlResult.append(djangoParseObject(xmlChild, indent=True))
- xmlItem.getparent().remove(xmlItem)
- if xmlElement.get('type') == 'simple':
- xml_first_child = xmlElement.getchildren()[0]
+ xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;")
+ xmlEOAfigure.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ xmlResult.append(xmlEOAfigure)
+ elif xmlElement.tag == "EOAfigure":
+ xmlResult = etree.Element("temp")
+ # Create basic Element EOAfigure
+ xmlEOAfigure = etree.Element("EOAfigure")
+ # Copy Image
+ strImageFileString = xmlElement.find(".//file").text
+ strImageFileString = strImageFileString.rstrip("\n")
+ strImageFileDir = os.path.dirname(strImageFileString)
+ strImageFileDir = re.sub("/", "", strImageFileDir)
+ strImageFileName = os.path.basename(strImageFileString)
+ strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0]
+ shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName)
+ logging.debug("Django figure %s." % strImageFileName)
+ # yellow
+ if os.path.splitext(strImageFileName)[1].lower() == ".pdf":
+ logging.debug("Found a PDF file")
+ strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH)
+ xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".png"))
+ logging.debug("The filename is %s" % xmlEOAfigure.get("file"))
+ else:
+ xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
+ xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;")
+ xmlEOAfigure.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ # Insert visual Number and uid
+ strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
+ xmlEOAfigure.set("number", strFigureNumber)
+ strFigureUID = xmlElement.find(".//anchor").get("id")
+ xmlEOAfigure.set("id", strFigureUID)
+ # Insert Caption
+ xmlEOAfigure.append(xmlElement.find(".//caption"))
+ xmlResult.append(xmlEOAfigure)
+ elif xmlElement.findall(".//EOAtable"):
+ xmlResult = etree.Element("EOAtable")
+ xmlRawTable = xmlElement.find(".//table")
+ xmlResult.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ xmlResult.append(xmlRawTable)
+ # Copy Number, Label and Caption
+ if xmlElement.find(".//EOAtablecaption").text != "nonumber":
+ xmlResult.append(xmlElement.find(".//EOAtablecaption"))
+ xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text)
+ xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text])
+ xmlResult.set("id", xmlRawTable.get("id"))
+ else:
+ xmlElement.set("numbering", "false")
+ #if xmlElement.find(".//EOAtablelabel").text is not None:
+ # Transform width of Columns
+ strColumnString = xmlElement.find(".//EOAtablecolumns").text
+ strColumnString = re.sub(r"\|", "", strColumnString)
+ reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString)
+ intTableWidth = 0
+ listColumnAlignments = [None]
+ listColumnWidths = [None]
+ intNumberOfColumns = 0
+ for strColumnDefinition in reMatchObjects:
+ strColumnDefinition = strColumnDefinition.rstrip("cm")
+ strColumnAlignment = strColumnDefinition[0]
+ if strColumnAlignment == "L":
+ strColumnAlignment = "left"
+ if strColumnAlignment == "C":
+ strColumnAlignment = "center"
+ if strColumnAlignment == "R":
+ strColumnAlignment = "right"
+ listColumnAlignments.append(strColumnAlignment)
+ intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75)
+ listColumnWidths.append(intColumnWidth)
+ intTableWidth += intColumnWidth
+ intNumberOfColumns += 1
+ xmlRawTable.set("width", str(intTableWidth))
+ # Figure out and deal with the Header
+ xmlHeader = xmlRawTable.find(".//row/cell/tableheader")
+ if xmlHeader is not None:
+ xmlHeader.text = ""
+ xmlHeader.getparent().text = xmlHeader.tail
+ xmlHeader.getparent().remove(xmlHeader)
+ xmlFirstRow = xmlRawTable.find(".//row")
+ xmlFirstRow.tag = "tr"
+ xmlFirstRowCells = xmlFirstRow.findall(".//cell")
+ for xmlFirstRowCell in xmlFirstRowCells:
+ xmlFirstRowCell.tag = "th"
+ # Now Deal with the rest of the rows
+ xmlTableRows = xmlRawTable.findall(".//row")
+ for xmlTableRow in xmlTableRows:
+ xmlTableCells = xmlTableRow.findall(".//cell")
+ intCurrentColumn = 1
+ for xmlTableCell in xmlTableCells:
+ xmlTableCell.tag = "td"
+ xmlTableCell.set("align",listColumnAlignments[intCurrentColumn])
+ xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";")
+ # Deal with multicolumn
+ if xmlTableCell.get("cols") is not None:
+ xmlTableCell.set("colspan", xmlTableCell.get("cols"))
+ if intCurrentColumn > len(xmlTableCells):
+ intCurrentColumn = 1
+ # Deal with multicolumn again, increase intCurrentColumn by the columns being spanned
+ elif xmlTableCell.get("cols") is not None:
+ intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols"))
+ del xmlTableCell.attrib["cols"]
+ else:
+ intCurrentColumn += 1
+ xmlTableRow.tag = "tr"
+ xmlTableRow.set("valign", "top")
+ elif xmlElement.tag == "list" and xmlElement.get('type') != 'description':
+ xmlResult = etree.Element("temp")
+ if xmlElement.get('type') == 'ordered':
- if xml_first_child.tag == 'item':
- logging.debug("a simple list with no special items")
# Change first item into EOAlistfirstitem
xmlFirstItem = xmlElement.find("..//item")
xmlFirstItemElement = xmlFirstItem.getchildren()[0]
- xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-"))
+
+ xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber=xmlFirstItem.get("id-text"), uid=xmlFirstItem.get("id")))
# Process Child Elements which are Part of this item
if len(xmlFirstItem.getchildren()) >= 1:
- logging.debug("len xmlFirstItem.getchildren is greater or equal 1")
for xmlChild in xmlFirstItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
xmlFirstItem.getparent().remove(xmlFirstItem)
+ # Process remaining items in this list
+ tmpIntNumber = 2
for xmlItem in xmlElement.iterchildren():
xmlItemElement = xmlItem.getchildren()[0]
- xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
+ xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=xmlItem.get("id-text"), uid=xmlItem.get("id")))
+ tmpIntNumber += 1
if len(xmlItem.getchildren()) >= 1:
for xmlChild in xmlItem.iterchildren():
- xmlResult.append(djangoParseObject(xmlChild,indent=True))
+ xmlResult.append(djangoParseObject(xmlChild, indent=True))
xmlItem.getparent().remove(xmlItem)
+ if xmlElement.get('type') == 'simple':
+ xml_first_child = xmlElement.getchildren()[0]
+
+ if xml_first_child.tag == 'item':
+ logging.debug("a simple list with no special items")
+ # Change first item into EOAlistfirstitem
+ xmlFirstItem = xmlElement.find("..//item")
+ xmlFirstItemElement = xmlFirstItem.getchildren()[0]
+ xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-"))
+ # Process Child Elements which are Part of this item
+ if len(xmlFirstItem.getchildren()) >= 1:
+ logging.debug("len xmlFirstItem.getchildren is greater or equal 1")
+ for xmlChild in xmlFirstItem.iterchildren():
+ xmlResult.append(djangoParseObject(xmlChild,indent=True))
+ xmlFirstItem.getparent().remove(xmlFirstItem)
+ for xmlItem in xmlElement.iterchildren():
+ xmlItemElement = xmlItem.getchildren()[0]
+ xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
+ if len(xmlItem.getchildren()) >= 1:
+ for xmlChild in xmlItem.iterchildren():
+ xmlResult.append(djangoParseObject(xmlChild,indent=True))
+ xmlItem.getparent().remove(xmlItem)
+
+ #############
+ # Baustelle #
+ #############
+ elif xml_first_child.tag == 'label':
+ logging.debug("a simple list with named items")
+
+ # Change first item into EOAlistfirstitem
+ xmlFirstItem = xmlElement.find("..//item")
+ xmlFirstItemElement = xmlFirstItem.getchildren()[0]
+ logging.debug(xmlFirstItemElement.text)
+
+ # debugging
+ logging.debug(etree.tostring(xmlFirstItemElement))
+ # end of debugging
+
+ xml_first_label = xmlElement.find("..//label")
+ listnumber_text = xml_first_label.text
+
+ xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered custom", listnumber=listnumber_text))
+
+ logging.debug("The length of the children of the first item: %s." % len(xmlFirstItem.getchildren()))
+
+ # Process Child Elements which are Part of this item
+ if len(xmlFirstItem.getchildren()) >= 1:
+ logging.debug("len xmlFirstItem.getchildren is greater or equal 1")
+ for xmlChild in xmlFirstItem.iterchildren():
+ xmlResult.append(djangoParseObject(xmlChild,indent=True))
- #############
- # Baustelle #
- #############
- elif xml_first_child.tag == 'label':
- logging.debug("a simple list with named items")
-
- # Change first item into EOAlistfirstitem
- xmlFirstItem = xmlElement.find("..//item")
- xmlFirstItemElement = xmlFirstItem.getchildren()[0]
- logging.debug(xmlFirstItemElement.text)
-
- # debugging
- logging.debug(etree.tostring(xmlFirstItemElement))
- # end of debugging
-
- xml_first_label = xmlElement.find("..//label")
- listnumber_text = xml_first_label.text
-
- xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered custom", listnumber=listnumber_text))
-
- logging.debug("The length of the children of the first item: %s." % len(xmlFirstItem.getchildren()))
-
- # Process Child Elements which are Part of this item
- if len(xmlFirstItem.getchildren()) >= 1:
- logging.debug("len xmlFirstItem.getchildren is greater or equal 1")
- for xmlChild in xmlFirstItem.iterchildren():
+ xmlFirstItem.getparent().remove(xmlFirstItem)
+ xml_first_label.getparent().remove(xml_first_label)
+
+ all_the_labels = xmlElement.findall("label")
+ all_the_items = xmlElement.findall("item")
+
+ logging.debug("itemlength %s." % len(all_the_items))
+ logging.debug("labellength %s." % len(all_the_labels))
+
+ for listlabel, listitem in zip(all_the_labels, all_the_items):
+ logging.debug("listitem text %s." % listitem.text)
+ logging.debug("listlabel text %s." % listlabel.text)
+ xml_item_element = listitem.getchildren()[0]
+ xmlResult.append(djangoParseObject(xml_item_element, indent=True, listnumber=listlabel.text))
+
+ listlabel.getparent().remove(listlabel)
+ listitem.getparent().remove(listitem)
+
+ # for xmlItem in xmlElement.iterchildren():
+ # print("So many items have we: ", len(xmlItem))
+ # xmlItemElement = xmlItem.getchildren()[0]
+ # xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
+ # if len(xmlItem.getchildren()) >= 1:
+ # for xmlChild in xmlItem.iterchildren():
+ # xmlResult.append(djangoParseObject(xmlChild,indent=True))
+ # xmlItem.getparent().remove(xmlItem)
+ ##################
+ # Ende Baustelle #
+ ##################
+
+ elif xmlElement.tag == "list" and xmlElement.get('type') == 'description':
+ logging.debug("A description")
+ xmlResult = etree.Element("temp")
+ while len(xmlElement.getchildren()) != 0:
+ xmlDescription = etree.Element("EOAdescription")
+ xmlDescription.set("order", str(intObjectNumber))
+ xmlLabel = xmlElement.getchildren()[0]
+ xmlItem = xmlElement.getchildren()[1]
+ if len(xmlItem.getchildren()) > 0:
+ xmlContent = xmlItem.getchildren()[0]
+ else:
+ xmlContent = etree.Element("p")
+ xmlLabel.tag = "description"
+ xmlDescription.append(xmlLabel)
+ xmlDescription.append(xmlContent)
+ xmlResult.append(xmlDescription)
+ intObjectNumber += 1
+ if len(xmlItem.getchildren()) > 0:
+ for xmlChild in xmlItem.iterchildren():
xmlResult.append(djangoParseObject(xmlChild,indent=True))
-
- xmlFirstItem.getparent().remove(xmlFirstItem)
- xml_first_label.getparent().remove(xml_first_label)
-
- all_the_labels = xmlElement.findall("label")
- all_the_items = xmlElement.findall("item")
-
- logging.debug("itemlength %s." % len(all_the_items))
- logging.debug("labellength %s." % len(all_the_labels))
-
- for listlabel, listitem in zip(all_the_labels, all_the_items):
- logging.debug("listitem text %s." % listitem.text)
- logging.debug("listlabel text %s." % listlabel.text)
- xml_item_element = listitem.getchildren()[0]
- xmlResult.append(djangoParseObject(xml_item_element, indent=True, listnumber=listlabel.text))
-
- listlabel.getparent().remove(listlabel)
- listitem.getparent().remove(listitem)
-
- # for xmlItem in xmlElement.iterchildren():
- # print("So many items have we: ", len(xmlItem))
- # xmlItemElement = xmlItem.getchildren()[0]
- # xmlResult.append(djangoParseObject(xmlItemElement,indent=True))
- # if len(xmlItem.getchildren()) >= 1:
- # for xmlChild in xmlItem.iterchildren():
- # xmlResult.append(djangoParseObject(xmlChild,indent=True))
- # xmlItem.getparent().remove(xmlItem)
- ##################
- # Ende Baustelle #
- ##################
-
- elif xmlElement.tag == "list" and xmlElement.get('type') == 'description':
- logging.debug("A description")
- xmlResult = etree.Element("temp")
- while len(xmlElement.getchildren()) != 0:
- xmlDescription = etree.Element("EOAdescription")
- xmlDescription.set("order", str(intObjectNumber))
- xmlLabel = xmlElement.getchildren()[0]
- xmlItem = xmlElement.getchildren()[1]
- if len(xmlItem.getchildren()) > 0:
- xmlContent = xmlItem.getchildren()[0]
- else:
- xmlContent = etree.Element("p")
- xmlLabel.tag = "description"
- xmlDescription.append(xmlLabel)
- xmlDescription.append(xmlContent)
- xmlResult.append(xmlDescription)
+ xmlItem.getparent().remove(xmlItem)
+ elif xmlElement.tag == "theorem":
+ xmlTheoremHead = xmlElement.find(".//head")
+ xmlTheoremText = xmlElement.find(".//p")
+ strTheoremNumber = xmlElement.get("id-text")
+ strTheoremID = xmlElement.get("id")
+ xmlResult = etree.Element("EOAtheorem")
+ xmlResult.append(xmlTheoremHead)
+ xmlResult.append(xmlTheoremText)
+ xmlResult.set("order", str(intObjectNumber))
+ xmlResult.set("number", strTheoremNumber)
+ xmlResult.set("uid", strTheoremID)
intObjectNumber += 1
- if len(xmlItem.getchildren()) > 0:
- for xmlChild in xmlItem.iterchildren():
- xmlResult.append(djangoParseObject(xmlChild,indent=True))
- xmlItem.getparent().remove(xmlItem)
- elif xmlElement.tag == "theorem":
- xmlTheoremHead = xmlElement.find(".//head")
- xmlTheoremText = xmlElement.find(".//p")
- strTheoremNumber = xmlElement.get("id-text")
- strTheoremID = xmlElement.get("id")
- xmlResult = etree.Element("EOAtheorem")
- xmlResult.append(xmlTheoremHead)
- xmlResult.append(xmlTheoremText)
- xmlResult.set("order", str(intObjectNumber))
- xmlResult.set("number", strTheoremNumber)
- xmlResult.set("uid", strTheoremID)
- intObjectNumber += 1
- elif xmlElement.findall(".//EOAequationarray"):
- xmlResult = etree.Element("temp")
- for xmlEquation in xmlElement.findall(".//EOAequation"):
- xmlEOAequation = etree.Element("EOAequation")
- xmlEOAequation.set("order", str(intObjectNumber))
+ elif xmlElement.findall(".//EOAequationarray"):
+ xmlResult = etree.Element("temp")
+ for xmlEquation in xmlElement.findall(".//EOAequation"):
+ xmlEOAequation = etree.Element("EOAequation")
+ xmlEOAequation.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ xmlEOAequation.set("number", xmlEquation.get("number"))
+ xmlEOAequation.set("filename", xmlEquation.get("filename"))
+ if xmlEquation.get("label") is not None:
+ xmlEOAequation.set("label", xmlEquation.get("label"))
+ shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
+ xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
+ if xmlEquation.get("label") is not None:
+ xmlEOAequation.set("label", xmlEquation.get("label"))
+ xmlResult.append(xmlEOAequation)
+ elif xmlElement.findall(".//EOAequationarraynonumber"):
+ xmlResult = etree.Element("temp")
+ for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"):
+ xmlEOAequation = etree.Element("EOAequation")
+ xmlEOAequation.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ xmlEOAequation.set("number", "")
+ xmlEOAequation.set("filename", xmlEquation.get("filename"))
+ shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
+ xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
+ xmlResult.append(xmlEOAequation)
+ elif xmlElement.tag == "EOAequationnonumber":
+ # Process one EOAequation which is not encapsulated
+ xmlResult = etree.Element("EOAequation")
+ xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
- xmlEOAequation.set("number", xmlEquation.get("number"))
- xmlEOAequation.set("filename", xmlEquation.get("filename"))
- if xmlEquation.get("label") is not None:
- xmlEOAequation.set("label", xmlEquation.get("label"))
- shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
- xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
- if xmlEquation.get("label") is not None:
- xmlEOAequation.set("label", xmlEquation.get("label"))
- xmlResult.append(xmlEOAequation)
- elif xmlElement.findall(".//EOAequationarraynonumber"):
- xmlResult = etree.Element("temp")
- for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"):
- xmlEOAequation = etree.Element("EOAequation")
- xmlEOAequation.set("order", str(intObjectNumber))
+ xmlResult.set("filename", xmlElement.get("filename"))
+ xmlResult.set("TeX", xmlElement.get("TeX"))
+ shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
+ xmlResult.set("number", "")
+ elif xmlElement.findall(".//EOAequation"):
+ # Process various Equations which may be encapsulated within
+ xmlEquations = xmlElement.findall(".//EOAequation")
+ xmlResult = etree.Element("temp")
+ for xmlEquation in xmlEquations:
+ # Create basic Element EOAequation
+ xmlEOAequation = etree.Element("EOAequation")
+ xmlEOAequation.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ xmlEOAequation.set("number", xmlEquation.get("number"))
+ xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
+ if xmlEquation.get("uid") is not None:
+ xmlEOAequation.set("uid", xmlEquation.get("uid"))
+ shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
+ xmlEOAequation.set("filename", xmlEquation.get("filename"))
+ xmlResult.append(xmlEOAequation)
+ elif xmlElement.tag == "EOAequation":
+ # Process one EOAequation which is not encapsulated
+ xmlResult = etree.Element("EOAequation")
+ xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
- xmlEOAequation.set("number", "")
- xmlEOAequation.set("filename", xmlEquation.get("filename"))
- shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
- xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
- xmlResult.append(xmlEOAequation)
- elif xmlElement.tag == "EOAequationnonumber":
- # Process one EOAequation which is not encapsulated
- xmlResult = etree.Element("EOAequation")
- xmlResult.set("order", str(intObjectNumber))
- intObjectNumber += 1
- xmlResult.set("filename", xmlElement.get("filename"))
- xmlResult.set("TeX", xmlElement.get("TeX"))
- shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
- xmlResult.set("number", "")
- elif xmlElement.findall(".//EOAequation"):
- # Process various Equations which may be encapsulated within
- xmlEquations = xmlElement.findall(".//EOAequation")
- xmlResult = etree.Element("temp")
- for xmlEquation in xmlEquations:
- # Create basic Element EOAequation
- xmlEOAequation = etree.Element("EOAequation")
- xmlEOAequation.set("order", str(intObjectNumber))
+ xmlResult.set("number", xmlElement.get("number"))
+ xmlResult.set("TeX", xmlElement.get("TeX"))
+ if xmlElement.get("uid") is not None:
+ xmlResult.set("uid", xmlElement.get("uid"))
+ shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
+ xmlResult.set("filename", xmlElement.get("filename"))
+ elif xmlElement.tag == "div3":
+ xmlResult = etree.Element("EOAsubsection")
+ xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
- xmlEOAequation.set("number", xmlEquation.get("number"))
- xmlEOAequation.set("TeX", xmlEquation.get("TeX"))
- if xmlEquation.get("uid") is not None:
- xmlEOAequation.set("uid", xmlEquation.get("uid"))
- shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/")
- xmlEOAequation.set("filename", xmlEquation.get("filename"))
- xmlResult.append(xmlEOAequation)
- elif xmlElement.tag == "EOAequation":
- # Process one EOAequation which is not encapsulated
- xmlResult = etree.Element("EOAequation")
- xmlResult.set("order", str(intObjectNumber))
- intObjectNumber += 1
- xmlResult.set("number", xmlElement.get("number"))
- xmlResult.set("TeX", xmlElement.get("TeX"))
- if xmlElement.get("uid") is not None:
- xmlResult.set("uid", xmlElement.get("uid"))
- shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/")
- xmlResult.set("filename", xmlElement.get("filename"))
- elif xmlElement.tag == "div3":
- xmlResult = etree.Element("EOAsubsection")
- xmlResult.set("order", str(intObjectNumber))
- intObjectNumber += 1
- xmlResult.append(xmlElement.find("head"))
- for xmlChild in xmlElement.iterchildren():
- xmlResult.append(djangoParseObject(xmlChild))
- elif xmlElement.tag == "div4":
- xmlResult = etree.Element("EOAsubsubsection")
- xmlResult.set("order", str(intObjectNumber))
- intObjectNumber += 1
- xmlResult.append(xmlElement.find("head"))
- for xmlChild in xmlElement.iterchildren():
- xmlResult.append(djangoParseObject(xmlChild))
- elif xmlElement.tag == "EOAverse":
- xmlResult = etree.Element("EOAparagraph")
- xmlResult.set("style", "verse")
- xmlResult.set("order", str(intObjectNumber))
- intObjectNumber += 1
-
- xml_verselines = xmlElement.findall("p")
- xmlResult.append(deepcopy(xml_verselines[0]))
- for xml_verseline in xml_verselines[1:]:
- linebreak = etree.Element("br")
- xmlResult.append(linebreak)
- copied_line = deepcopy(xml_verseline)
- xmlResult.append(copied_line)
- etree.strip_tags(xmlResult, "p")
- elif xmlElement.tag == "EOAbox":
- logging.debug("Found a box")
- xmlResult = etree.Element("temp")
- xmlResult.set("style", "box")
-
- box_header = xmlElement.find("head")
- box_header.tag = "EOAparagraph"
- box_header.set("style", "box")
- box_header.set("order", str(intObjectNumber))
- head_contents = box_header.find("p")
- head_contents.tag = "b"
- # etree.strip_tags(box_header, "p")
- xmlResult.append(box_header)
- intObjectNumber += 1
- # question: what to do about paragraph equivalent objects?
- box_elements = xmlElement.getchildren()
- logging.debug(len(box_elements))
- for box_element in box_elements:
- if box_element.tag == "p":
- box_element.tag = "EOAparagraph"
- box_element.set("style", "box")
- box_element.set("order", str(intObjectNumber))
- xmlResult.append(box_element)
+ xmlResult.append(xmlElement.find("head"))
+ for xmlChild in xmlElement.iterchildren():
+ xmlResult.append(djangoParseObject(xmlChild))
+ elif xmlElement.tag == "div4":
+ xmlResult = etree.Element("EOAsubsubsection")
+ xmlResult.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ xmlResult.append(xmlElement.find("head"))
+ for xmlChild in xmlElement.iterchildren():
+ xmlResult.append(djangoParseObject(xmlChild))
+ elif xmlElement.tag == "EOAverse":
+ xmlResult = etree.Element("EOAparagraph")
+ xmlResult.set("style", "verse")
+ xmlResult.set("order", str(intObjectNumber))
intObjectNumber += 1
- elif xmlElement.tag == "EOAtocentry":
- # throw them out for the time being
- xmlResult = etree.Element("temp")
+ xml_verselines = xmlElement.findall("p")
+ xmlResult.append(deepcopy(xml_verselines[0]))
+ for xml_verseline in xml_verselines[1:]:
+ linebreak = etree.Element("br")
+ xmlResult.append(linebreak)
+ copied_line = deepcopy(xml_verseline)
+ xmlResult.append(copied_line)
+ etree.strip_tags(xmlResult, "p")
+ elif xmlElement.tag == "EOAbox":
+ logging.debug("Found a box")
+ xmlResult = etree.Element("temp")
+ xmlResult.set("style", "box")
+
+ box_header = xmlElement.find("head")
+ box_header.tag = "EOAparagraph"
+ box_header.set("style", "box")
+ box_header.set("order", str(intObjectNumber))
+ head_contents = box_header.find("p")
+ head_contents.tag = "b"
+ # etree.strip_tags(box_header, "p")
+ xmlResult.append(box_header)
+ intObjectNumber += 1
+ # question: what to do about paragraph equivalent objects?
+ box_elements = xmlElement.getchildren()
+ logging.debug(len(box_elements))
+ for box_element in box_elements:
+ if box_element.tag == "p":
+ box_element.tag = "EOAparagraph"
+ box_element.set("style", "box")
+ box_element.set("order", str(intObjectNumber))
+ xmlResult.append(box_element)
+ intObjectNumber += 1
+ elif xmlElement.tag == "EOAtocentry":
+ # throw them out for the time being
+ xmlResult = etree.Element("temp")
+ else:
+ xmlElement.tag = "EOAparagraph"
+ quoted_paragraph = xmlElement.get("rend")
+ if quoted_paragraph is not None and quoted_paragraph == "quoted":
+ xmlElement.set("rend", "quoted")
+ xmlElement.set("order", str(intObjectNumber))
+ intObjectNumber += 1
+ xmlResult = xmlElement
else:
- xmlElement.tag = "EOAparagraph"
- quoted_paragraph = xmlElement.get("rend")
- if quoted_paragraph is not None and quoted_paragraph == "quoted":
- xmlElement.set("rend", "quoted")
- xmlElement.set("order", str(intObjectNumber))
- intObjectNumber += 1
+ print("SPECIAL: %s - %s" % (xmlElement, xmlElement.text))
xmlResult = xmlElement
-
if indent==True:
xmlResult.set("indent", "True")
if listtype != None: