diff --git a/tralics2django.py b/tralics2django.py index b91c46d..c1ea06b 100755 --- a/tralics2django.py +++ b/tralics2django.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2018-02-09 14:21:09 (kthoden)> +# Time-stamp: <2018-03-07 11:20:17 (kthoden)> import pickle import os @@ -171,459 +171,462 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid global dictLists global intObjectNumber # Check what kind of Element we have and change the data - if xmlElement.tag == "EOAtranscripted": - xmlResult = etree.Element("temp") - xmlEOATranscription = etree.Element("EOAtranscription") - xmlEOATranscription.set("order", str(intObjectNumber)) - intObjectNumber += 1 - xmlLeftheader = xmlElement.find(".//Leftheader") - etree.strip_tags(xmlLeftheader, "p") - xmlEOATranscription.append(xmlLeftheader) - xmlRightheader = xmlElement.find(".//Rightheader") - etree.strip_tags(xmlRightheader, "p") - xmlEOATranscription.append(xmlRightheader) - xmlTranscriptedtext = xmlElement.find(".//EOAtranscriptedtext") - # change \n\n into

and pagebreak intto

to create some valid markup - strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode") - #strTranscriptedtext = re.sub (r"\n\n", "

", str(strTranscriptedtext)) - #strTranscriptedtext = re.sub (r"

", "", strTranscriptedtext) - xmlLeftColumn = etree.Element("EOAtranscriptionleft") - xmlRightColumn = etree.Element("EOAtranscriptionright") - boolRightColumn = False - xmlTemp = etree.XML(str(strTranscriptedtext)) - for xmlElement in xmlTemp.iterchildren(): - if xmlElement.tag == "pagebreak": - boolRightColumn = True - continue - if boolRightColumn == False: - xmlLeftColumn.append(xmlElement) - if boolRightColumn == True: - xmlRightColumn.append(xmlElement) - xmlEOATranscription.append(xmlLeftColumn) - xmlEOATranscription.append(xmlRightColumn) - # Convert Images within the transcription - logging.debug("EOAfigurenonumber") - xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber") - logging.debug(xmlFigures) - if xmlFigures is not None: - for xmlFigure in xmlFigures: - strImageFileString = xmlFigure.find(".//file").text - strImageFileString = strImageFileString.rstrip("\n") - strImageFileDir = os.path.dirname(strImageFileString) - strImageFileDir = re.sub("/", "", strImageFileDir) - strImageFileName = os.path.basename(strImageFileString) - strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] - strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName - listArguments = shlex.split(strCommand) - subprocess.check_output(listArguments, shell=False) - tmpStrTail = xmlFigure.tail - xmlFigure.clear() - xmlFigure.tag = "img" - xmlFigure.set("src", strImageFileDir + strImageFileName) - xmlFigure.set("alt", "") - xmlResult.append(xmlEOATranscription) - elif xmlElement.tag == "EOAletterhead": - xmlResult = etree.Element("temp") - xmlEOAletterhead = etree.Element("EOAletterhead") - xmlEOAletterrecipient = xmlElement.find(".//Recipient") - xmlEOAletterhead.append(xmlEOAletterrecipient) - xmlEOAletterarchive = xmlElement.find(".//Archive") - xmlEOAletterhead.append(xmlEOAletterarchive) - xmlEOAletteradditional = xmlElement.find(".//Additional") - xmlEOAletterhead.append(xmlEOAletteradditional) - xmlEOAletterpages = xmlElement.find(".//Pages") - xmlEOAletterhead.append(xmlEOAletterpages) - xmlEOAletterhead.set("order", str(intObjectNumber)) - intObjectNumber += 1 - xmlResult.append(xmlEOAletterhead) - - elif xmlElement.tag == "EOAfigurenonumber": - # elif xmlElement.findall(".//EOAfigurenonumber"): - xmlResult = etree.Element("temp") - # Create basic Element EOAfigurenonumber - xmlEOAfigure = etree.Element("EOAfigurenonumber") - # Copy Image - strImageFileString = xmlElement.find(".//file").text - strImageFileString = strImageFileString.rstrip("\n") - strImageFileDir = os.path.dirname(strImageFileString) - strImageFileDir = re.sub("/", "", strImageFileDir) - strImageFileName = os.path.basename(strImageFileString) - strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] - shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) - xmlEOAfigure.set("file", strImageFileDir + strImageFileName) - xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") - xmlEOAfigure.set("order", str(intObjectNumber)) - intObjectNumber += 1 - xmlResult.append(xmlEOAfigure) - elif xmlElement.tag == "EOAfigure": - xmlResult = etree.Element("temp") - # Create basic Element EOAfigure - xmlEOAfigure = etree.Element("EOAfigure") - # Copy Image - strImageFileString = xmlElement.find(".//file").text - strImageFileString = strImageFileString.rstrip("\n") - strImageFileDir = os.path.dirname(strImageFileString) - strImageFileDir = re.sub("/", "", strImageFileDir) - strImageFileName = os.path.basename(strImageFileString) - strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] - shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) - logging.debug("Django figure %s." % strImageFileName) - # yellow - if os.path.splitext(strImageFileName)[1].lower() == ".pdf": - logging.debug("Found a PDF file") - strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH) - xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".png")) - logging.debug("The filename is %s" % xmlEOAfigure.get("file")) - else: + + if isinstance(xmlElement.tag, str): + if xmlElement.tag == "EOAtranscripted": + xmlResult = etree.Element("temp") + xmlEOATranscription = etree.Element("EOAtranscription") + xmlEOATranscription.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlLeftheader = xmlElement.find(".//Leftheader") + etree.strip_tags(xmlLeftheader, "p") + xmlEOATranscription.append(xmlLeftheader) + xmlRightheader = xmlElement.find(".//Rightheader") + etree.strip_tags(xmlRightheader, "p") + xmlEOATranscription.append(xmlRightheader) + xmlTranscriptedtext = xmlElement.find(".//EOAtranscriptedtext") + # change \n\n into

and pagebreak intto

to create some valid markup + strTranscriptedtext = etree.tostring(xmlTranscriptedtext, encoding="unicode") + #strTranscriptedtext = re.sub (r"\n\n", "

", str(strTranscriptedtext)) + #strTranscriptedtext = re.sub (r"

", "", strTranscriptedtext) + xmlLeftColumn = etree.Element("EOAtranscriptionleft") + xmlRightColumn = etree.Element("EOAtranscriptionright") + boolRightColumn = False + xmlTemp = etree.XML(str(strTranscriptedtext)) + for xmlElement in xmlTemp.iterchildren(): + if xmlElement.tag == "pagebreak": + boolRightColumn = True + continue + if boolRightColumn == False: + xmlLeftColumn.append(xmlElement) + if boolRightColumn == True: + xmlRightColumn.append(xmlElement) + xmlEOATranscription.append(xmlLeftColumn) + xmlEOATranscription.append(xmlRightColumn) + # Convert Images within the transcription + logging.debug("EOAfigurenonumber") + xmlFigures = xmlEOATranscription.findall(".//EOAfigurenonumber") + logging.debug(xmlFigures) + if xmlFigures is not None: + for xmlFigure in xmlFigures: + strImageFileString = xmlFigure.find(".//file").text + strImageFileString = strImageFileString.rstrip("\n") + strImageFileDir = os.path.dirname(strImageFileString) + strImageFileDir = re.sub("/", "", strImageFileDir) + strImageFileName = os.path.basename(strImageFileString) + strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] + strCommand = GM_PATH + " convert " + os.getcwd() + "/" + strImageFileString + " -resize 250x250\\> " + os.getcwd() + "/CONVERT/django/images/embedded/" + strImageFileDir + strImageFileName + listArguments = shlex.split(strCommand) + subprocess.check_output(listArguments, shell=False) + tmpStrTail = xmlFigure.tail + xmlFigure.clear() + xmlFigure.tag = "img" + xmlFigure.set("src", strImageFileDir + strImageFileName) + xmlFigure.set("alt", "") + xmlResult.append(xmlEOATranscription) + elif xmlElement.tag == "EOAletterhead": + xmlResult = etree.Element("temp") + xmlEOAletterhead = etree.Element("EOAletterhead") + xmlEOAletterrecipient = xmlElement.find(".//Recipient") + xmlEOAletterhead.append(xmlEOAletterrecipient) + xmlEOAletterarchive = xmlElement.find(".//Archive") + xmlEOAletterhead.append(xmlEOAletterarchive) + xmlEOAletteradditional = xmlElement.find(".//Additional") + xmlEOAletterhead.append(xmlEOAletteradditional) + xmlEOAletterpages = xmlElement.find(".//Pages") + xmlEOAletterhead.append(xmlEOAletterpages) + xmlEOAletterhead.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlResult.append(xmlEOAletterhead) + + elif xmlElement.tag == "EOAfigurenonumber": + # elif xmlElement.findall(".//EOAfigurenonumber"): + xmlResult = etree.Element("temp") + # Create basic Element EOAfigurenonumber + xmlEOAfigure = etree.Element("EOAfigurenonumber") + # Copy Image + strImageFileString = xmlElement.find(".//file").text + strImageFileString = strImageFileString.rstrip("\n") + strImageFileDir = os.path.dirname(strImageFileString) + strImageFileDir = re.sub("/", "", strImageFileDir) + strImageFileName = os.path.basename(strImageFileString) + strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] + shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) xmlEOAfigure.set("file", strImageFileDir + strImageFileName) - xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") - xmlEOAfigure.set("order", str(intObjectNumber)) - intObjectNumber += 1 - # Insert visual Number and uid - strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")] - xmlEOAfigure.set("number", strFigureNumber) - strFigureUID = xmlElement.find(".//anchor").get("id") - xmlEOAfigure.set("id", strFigureUID) - # Insert Caption - xmlEOAfigure.append(xmlElement.find(".//caption")) - xmlResult.append(xmlEOAfigure) - elif xmlElement.findall(".//EOAtable"): - xmlResult = etree.Element("EOAtable") - xmlRawTable = xmlElement.find(".//table") - xmlResult.set("order", str(intObjectNumber)) - intObjectNumber += 1 - xmlResult.append(xmlRawTable) - # Copy Number, Label and Caption - if xmlElement.find(".//EOAtablecaption").text != "nonumber": - xmlResult.append(xmlElement.find(".//EOAtablecaption")) - xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text) - xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text]) - xmlResult.set("id", xmlRawTable.get("id")) - else: - xmlElement.set("numbering", "false") - #if xmlElement.find(".//EOAtablelabel").text is not None: - # Transform width of Columns - strColumnString = xmlElement.find(".//EOAtablecolumns").text - strColumnString = re.sub(r"\|", "", strColumnString) - reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString) - intTableWidth = 0 - listColumnAlignments = [None] - listColumnWidths = [None] - intNumberOfColumns = 0 - for strColumnDefinition in reMatchObjects: - strColumnDefinition = strColumnDefinition.rstrip("cm") - strColumnAlignment = strColumnDefinition[0] - if strColumnAlignment == "L": - strColumnAlignment = "left" - if strColumnAlignment == "C": - strColumnAlignment = "center" - if strColumnAlignment == "R": - strColumnAlignment = "right" - listColumnAlignments.append(strColumnAlignment) - intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75) - listColumnWidths.append(intColumnWidth) - intTableWidth += intColumnWidth - intNumberOfColumns += 1 - xmlRawTable.set("width", str(intTableWidth)) - # Figure out and deal with the Header - xmlHeader = xmlRawTable.find(".//row/cell/tableheader") - if xmlHeader is not None: - xmlHeader.text = "" - xmlHeader.getparent().text = xmlHeader.tail - xmlHeader.getparent().remove(xmlHeader) - xmlFirstRow = xmlRawTable.find(".//row") - xmlFirstRow.tag = "tr" - xmlFirstRowCells = xmlFirstRow.findall(".//cell") - for xmlFirstRowCell in xmlFirstRowCells: - xmlFirstRowCell.tag = "th" - # Now Deal with the rest of the rows - xmlTableRows = xmlRawTable.findall(".//row") - for xmlTableRow in xmlTableRows: - xmlTableCells = xmlTableRow.findall(".//cell") - intCurrentColumn = 1 - for xmlTableCell in xmlTableCells: - xmlTableCell.tag = "td" - xmlTableCell.set("align",listColumnAlignments[intCurrentColumn]) - xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";") - # Deal with multicolumn - if xmlTableCell.get("cols") is not None: - xmlTableCell.set("colspan", xmlTableCell.get("cols")) - if intCurrentColumn > len(xmlTableCells): - intCurrentColumn = 1 - # Deal with multicolumn again, increase intCurrentColumn by the columns being spanned - elif xmlTableCell.get("cols") is not None: - intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols")) - del xmlTableCell.attrib["cols"] - else: - intCurrentColumn += 1 - xmlTableRow.tag = "tr" - xmlTableRow.set("valign", "top") - elif xmlElement.tag == "list" and xmlElement.get('type') != 'description': - xmlResult = etree.Element("temp") - if xmlElement.get('type') == 'ordered': - - # Change first item into EOAlistfirstitem - xmlFirstItem = xmlElement.find("..//item") - xmlFirstItemElement = xmlFirstItem.getchildren()[0] - - xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber=xmlFirstItem.get("id-text"), uid=xmlFirstItem.get("id"))) - # Process Child Elements which are Part of this item - if len(xmlFirstItem.getchildren()) >= 1: - for xmlChild in xmlFirstItem.iterchildren(): - xmlResult.append(djangoParseObject(xmlChild,indent=True)) - xmlFirstItem.getparent().remove(xmlFirstItem) - # Process remaining items in this list - tmpIntNumber = 2 - for xmlItem in xmlElement.iterchildren(): - xmlItemElement = xmlItem.getchildren()[0] - xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=xmlItem.get("id-text"), uid=xmlItem.get("id"))) - tmpIntNumber += 1 - if len(xmlItem.getchildren()) >= 1: - for xmlChild in xmlItem.iterchildren(): - xmlResult.append(djangoParseObject(xmlChild, indent=True)) - xmlItem.getparent().remove(xmlItem) - if xmlElement.get('type') == 'simple': - xml_first_child = xmlElement.getchildren()[0] + xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") + xmlEOAfigure.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlResult.append(xmlEOAfigure) + elif xmlElement.tag == "EOAfigure": + xmlResult = etree.Element("temp") + # Create basic Element EOAfigure + xmlEOAfigure = etree.Element("EOAfigure") + # Copy Image + strImageFileString = xmlElement.find(".//file").text + strImageFileString = strImageFileString.rstrip("\n") + strImageFileDir = os.path.dirname(strImageFileString) + strImageFileDir = re.sub("/", "", strImageFileDir) + strImageFileName = os.path.basename(strImageFileString) + strImageFileNamewoSuffix = os.path.splitext(strImageFileName)[0] + shutil.copy(os.getcwd() + "/" + strImageFileString, os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName) + logging.debug("Django figure %s." % strImageFileName) + # yellow + if os.path.splitext(strImageFileName)[1].lower() == ".pdf": + logging.debug("Found a PDF file") + strImageFilepath = libeoaconvert.sanitizeImage(os.getcwd() + "/CONVERT/django/images/" + strImageFileDir + strImageFileName, GM_PATH, TL_PATH) + xmlEOAfigure.set("file", strImageFileDir + strImageFileName.replace(".pdf", ".png")) + logging.debug("The filename is %s" % xmlEOAfigure.get("file")) + else: + xmlEOAfigure.set("file", strImageFileDir + strImageFileName) + xmlEOAfigure.set("width", xmlElement.find(".//width").text + "px;") + xmlEOAfigure.set("order", str(intObjectNumber)) + intObjectNumber += 1 + # Insert visual Number and uid + strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")] + xmlEOAfigure.set("number", strFigureNumber) + strFigureUID = xmlElement.find(".//anchor").get("id") + xmlEOAfigure.set("id", strFigureUID) + # Insert Caption + xmlEOAfigure.append(xmlElement.find(".//caption")) + xmlResult.append(xmlEOAfigure) + elif xmlElement.findall(".//EOAtable"): + xmlResult = etree.Element("EOAtable") + xmlRawTable = xmlElement.find(".//table") + xmlResult.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlResult.append(xmlRawTable) + # Copy Number, Label and Caption + if xmlElement.find(".//EOAtablecaption").text != "nonumber": + xmlResult.append(xmlElement.find(".//EOAtablecaption")) + xmlResult.set("label", xmlElement.find(".//EOAtablelabel").text) + xmlResult.set("number", dictTables[xmlElement.find(".//EOAtablelabel").text]) + xmlResult.set("id", xmlRawTable.get("id")) + else: + xmlElement.set("numbering", "false") + #if xmlElement.find(".//EOAtablelabel").text is not None: + # Transform width of Columns + strColumnString = xmlElement.find(".//EOAtablecolumns").text + strColumnString = re.sub(r"\|", "", strColumnString) + reMatchObjects = re.findall(r'([L|R|C].*?cm)', strColumnString) + intTableWidth = 0 + listColumnAlignments = [None] + listColumnWidths = [None] + intNumberOfColumns = 0 + for strColumnDefinition in reMatchObjects: + strColumnDefinition = strColumnDefinition.rstrip("cm") + strColumnAlignment = strColumnDefinition[0] + if strColumnAlignment == "L": + strColumnAlignment = "left" + if strColumnAlignment == "C": + strColumnAlignment = "center" + if strColumnAlignment == "R": + strColumnAlignment = "right" + listColumnAlignments.append(strColumnAlignment) + intColumnWidth = int(float(strColumnDefinition.lstrip("LRC")) * 75) + listColumnWidths.append(intColumnWidth) + intTableWidth += intColumnWidth + intNumberOfColumns += 1 + xmlRawTable.set("width", str(intTableWidth)) + # Figure out and deal with the Header + xmlHeader = xmlRawTable.find(".//row/cell/tableheader") + if xmlHeader is not None: + xmlHeader.text = "" + xmlHeader.getparent().text = xmlHeader.tail + xmlHeader.getparent().remove(xmlHeader) + xmlFirstRow = xmlRawTable.find(".//row") + xmlFirstRow.tag = "tr" + xmlFirstRowCells = xmlFirstRow.findall(".//cell") + for xmlFirstRowCell in xmlFirstRowCells: + xmlFirstRowCell.tag = "th" + # Now Deal with the rest of the rows + xmlTableRows = xmlRawTable.findall(".//row") + for xmlTableRow in xmlTableRows: + xmlTableCells = xmlTableRow.findall(".//cell") + intCurrentColumn = 1 + for xmlTableCell in xmlTableCells: + xmlTableCell.tag = "td" + xmlTableCell.set("align",listColumnAlignments[intCurrentColumn]) + xmlTableCell.set("style","width: " + str(listColumnWidths[intCurrentColumn]) + ";") + # Deal with multicolumn + if xmlTableCell.get("cols") is not None: + xmlTableCell.set("colspan", xmlTableCell.get("cols")) + if intCurrentColumn > len(xmlTableCells): + intCurrentColumn = 1 + # Deal with multicolumn again, increase intCurrentColumn by the columns being spanned + elif xmlTableCell.get("cols") is not None: + intCurrentColumn = intCurrentColumn + int(xmlTableCell.get("cols")) + del xmlTableCell.attrib["cols"] + else: + intCurrentColumn += 1 + xmlTableRow.tag = "tr" + xmlTableRow.set("valign", "top") + elif xmlElement.tag == "list" and xmlElement.get('type') != 'description': + xmlResult = etree.Element("temp") + if xmlElement.get('type') == 'ordered': - if xml_first_child.tag == 'item': - logging.debug("a simple list with no special items") # Change first item into EOAlistfirstitem xmlFirstItem = xmlElement.find("..//item") xmlFirstItemElement = xmlFirstItem.getchildren()[0] - xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-")) + + xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True, listtype="ordered", listnumber=xmlFirstItem.get("id-text"), uid=xmlFirstItem.get("id"))) # Process Child Elements which are Part of this item if len(xmlFirstItem.getchildren()) >= 1: - logging.debug("len xmlFirstItem.getchildren is greater or equal 1") for xmlChild in xmlFirstItem.iterchildren(): xmlResult.append(djangoParseObject(xmlChild,indent=True)) xmlFirstItem.getparent().remove(xmlFirstItem) + # Process remaining items in this list + tmpIntNumber = 2 for xmlItem in xmlElement.iterchildren(): xmlItemElement = xmlItem.getchildren()[0] - xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) + xmlResult.append(djangoParseObject(xmlItemElement,indent=True,listtype="ordered",listnumber=xmlItem.get("id-text"), uid=xmlItem.get("id"))) + tmpIntNumber += 1 if len(xmlItem.getchildren()) >= 1: for xmlChild in xmlItem.iterchildren(): - xmlResult.append(djangoParseObject(xmlChild,indent=True)) + xmlResult.append(djangoParseObject(xmlChild, indent=True)) xmlItem.getparent().remove(xmlItem) + if xmlElement.get('type') == 'simple': + xml_first_child = xmlElement.getchildren()[0] + + if xml_first_child.tag == 'item': + logging.debug("a simple list with no special items") + # Change first item into EOAlistfirstitem + xmlFirstItem = xmlElement.find("..//item") + xmlFirstItemElement = xmlFirstItem.getchildren()[0] + xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered", listnumber="-")) + # Process Child Elements which are Part of this item + if len(xmlFirstItem.getchildren()) >= 1: + logging.debug("len xmlFirstItem.getchildren is greater or equal 1") + for xmlChild in xmlFirstItem.iterchildren(): + xmlResult.append(djangoParseObject(xmlChild,indent=True)) + xmlFirstItem.getparent().remove(xmlFirstItem) + for xmlItem in xmlElement.iterchildren(): + xmlItemElement = xmlItem.getchildren()[0] + xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) + if len(xmlItem.getchildren()) >= 1: + for xmlChild in xmlItem.iterchildren(): + xmlResult.append(djangoParseObject(xmlChild,indent=True)) + xmlItem.getparent().remove(xmlItem) + + ############# + # Baustelle # + ############# + elif xml_first_child.tag == 'label': + logging.debug("a simple list with named items") + + # Change first item into EOAlistfirstitem + xmlFirstItem = xmlElement.find("..//item") + xmlFirstItemElement = xmlFirstItem.getchildren()[0] + logging.debug(xmlFirstItemElement.text) + + # debugging + logging.debug(etree.tostring(xmlFirstItemElement)) + # end of debugging + + xml_first_label = xmlElement.find("..//label") + listnumber_text = xml_first_label.text + + xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered custom", listnumber=listnumber_text)) + + logging.debug("The length of the children of the first item: %s." % len(xmlFirstItem.getchildren())) + + # Process Child Elements which are Part of this item + if len(xmlFirstItem.getchildren()) >= 1: + logging.debug("len xmlFirstItem.getchildren is greater or equal 1") + for xmlChild in xmlFirstItem.iterchildren(): + xmlResult.append(djangoParseObject(xmlChild,indent=True)) - ############# - # Baustelle # - ############# - elif xml_first_child.tag == 'label': - logging.debug("a simple list with named items") - - # Change first item into EOAlistfirstitem - xmlFirstItem = xmlElement.find("..//item") - xmlFirstItemElement = xmlFirstItem.getchildren()[0] - logging.debug(xmlFirstItemElement.text) - - # debugging - logging.debug(etree.tostring(xmlFirstItemElement)) - # end of debugging - - xml_first_label = xmlElement.find("..//label") - listnumber_text = xml_first_label.text - - xmlResult.append(djangoParseObject(xmlFirstItemElement,indent=True,listtype="unordered custom", listnumber=listnumber_text)) - - logging.debug("The length of the children of the first item: %s." % len(xmlFirstItem.getchildren())) - - # Process Child Elements which are Part of this item - if len(xmlFirstItem.getchildren()) >= 1: - logging.debug("len xmlFirstItem.getchildren is greater or equal 1") - for xmlChild in xmlFirstItem.iterchildren(): + xmlFirstItem.getparent().remove(xmlFirstItem) + xml_first_label.getparent().remove(xml_first_label) + + all_the_labels = xmlElement.findall("label") + all_the_items = xmlElement.findall("item") + + logging.debug("itemlength %s." % len(all_the_items)) + logging.debug("labellength %s." % len(all_the_labels)) + + for listlabel, listitem in zip(all_the_labels, all_the_items): + logging.debug("listitem text %s." % listitem.text) + logging.debug("listlabel text %s." % listlabel.text) + xml_item_element = listitem.getchildren()[0] + xmlResult.append(djangoParseObject(xml_item_element, indent=True, listnumber=listlabel.text)) + + listlabel.getparent().remove(listlabel) + listitem.getparent().remove(listitem) + + # for xmlItem in xmlElement.iterchildren(): + # print("So many items have we: ", len(xmlItem)) + # xmlItemElement = xmlItem.getchildren()[0] + # xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) + # if len(xmlItem.getchildren()) >= 1: + # for xmlChild in xmlItem.iterchildren(): + # xmlResult.append(djangoParseObject(xmlChild,indent=True)) + # xmlItem.getparent().remove(xmlItem) + ################## + # Ende Baustelle # + ################## + + elif xmlElement.tag == "list" and xmlElement.get('type') == 'description': + logging.debug("A description") + xmlResult = etree.Element("temp") + while len(xmlElement.getchildren()) != 0: + xmlDescription = etree.Element("EOAdescription") + xmlDescription.set("order", str(intObjectNumber)) + xmlLabel = xmlElement.getchildren()[0] + xmlItem = xmlElement.getchildren()[1] + if len(xmlItem.getchildren()) > 0: + xmlContent = xmlItem.getchildren()[0] + else: + xmlContent = etree.Element("p") + xmlLabel.tag = "description" + xmlDescription.append(xmlLabel) + xmlDescription.append(xmlContent) + xmlResult.append(xmlDescription) + intObjectNumber += 1 + if len(xmlItem.getchildren()) > 0: + for xmlChild in xmlItem.iterchildren(): xmlResult.append(djangoParseObject(xmlChild,indent=True)) - - xmlFirstItem.getparent().remove(xmlFirstItem) - xml_first_label.getparent().remove(xml_first_label) - - all_the_labels = xmlElement.findall("label") - all_the_items = xmlElement.findall("item") - - logging.debug("itemlength %s." % len(all_the_items)) - logging.debug("labellength %s." % len(all_the_labels)) - - for listlabel, listitem in zip(all_the_labels, all_the_items): - logging.debug("listitem text %s." % listitem.text) - logging.debug("listlabel text %s." % listlabel.text) - xml_item_element = listitem.getchildren()[0] - xmlResult.append(djangoParseObject(xml_item_element, indent=True, listnumber=listlabel.text)) - - listlabel.getparent().remove(listlabel) - listitem.getparent().remove(listitem) - - # for xmlItem in xmlElement.iterchildren(): - # print("So many items have we: ", len(xmlItem)) - # xmlItemElement = xmlItem.getchildren()[0] - # xmlResult.append(djangoParseObject(xmlItemElement,indent=True)) - # if len(xmlItem.getchildren()) >= 1: - # for xmlChild in xmlItem.iterchildren(): - # xmlResult.append(djangoParseObject(xmlChild,indent=True)) - # xmlItem.getparent().remove(xmlItem) - ################## - # Ende Baustelle # - ################## - - elif xmlElement.tag == "list" and xmlElement.get('type') == 'description': - logging.debug("A description") - xmlResult = etree.Element("temp") - while len(xmlElement.getchildren()) != 0: - xmlDescription = etree.Element("EOAdescription") - xmlDescription.set("order", str(intObjectNumber)) - xmlLabel = xmlElement.getchildren()[0] - xmlItem = xmlElement.getchildren()[1] - if len(xmlItem.getchildren()) > 0: - xmlContent = xmlItem.getchildren()[0] - else: - xmlContent = etree.Element("p") - xmlLabel.tag = "description" - xmlDescription.append(xmlLabel) - xmlDescription.append(xmlContent) - xmlResult.append(xmlDescription) + xmlItem.getparent().remove(xmlItem) + elif xmlElement.tag == "theorem": + xmlTheoremHead = xmlElement.find(".//head") + xmlTheoremText = xmlElement.find(".//p") + strTheoremNumber = xmlElement.get("id-text") + strTheoremID = xmlElement.get("id") + xmlResult = etree.Element("EOAtheorem") + xmlResult.append(xmlTheoremHead) + xmlResult.append(xmlTheoremText) + xmlResult.set("order", str(intObjectNumber)) + xmlResult.set("number", strTheoremNumber) + xmlResult.set("uid", strTheoremID) intObjectNumber += 1 - if len(xmlItem.getchildren()) > 0: - for xmlChild in xmlItem.iterchildren(): - xmlResult.append(djangoParseObject(xmlChild,indent=True)) - xmlItem.getparent().remove(xmlItem) - elif xmlElement.tag == "theorem": - xmlTheoremHead = xmlElement.find(".//head") - xmlTheoremText = xmlElement.find(".//p") - strTheoremNumber = xmlElement.get("id-text") - strTheoremID = xmlElement.get("id") - xmlResult = etree.Element("EOAtheorem") - xmlResult.append(xmlTheoremHead) - xmlResult.append(xmlTheoremText) - xmlResult.set("order", str(intObjectNumber)) - xmlResult.set("number", strTheoremNumber) - xmlResult.set("uid", strTheoremID) - intObjectNumber += 1 - elif xmlElement.findall(".//EOAequationarray"): - xmlResult = etree.Element("temp") - for xmlEquation in xmlElement.findall(".//EOAequation"): - xmlEOAequation = etree.Element("EOAequation") - xmlEOAequation.set("order", str(intObjectNumber)) + elif xmlElement.findall(".//EOAequationarray"): + xmlResult = etree.Element("temp") + for xmlEquation in xmlElement.findall(".//EOAequation"): + xmlEOAequation = etree.Element("EOAequation") + xmlEOAequation.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlEOAequation.set("number", xmlEquation.get("number")) + xmlEOAequation.set("filename", xmlEquation.get("filename")) + if xmlEquation.get("label") is not None: + xmlEOAequation.set("label", xmlEquation.get("label")) + shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") + xmlEOAequation.set("TeX", xmlEquation.get("TeX")) + if xmlEquation.get("label") is not None: + xmlEOAequation.set("label", xmlEquation.get("label")) + xmlResult.append(xmlEOAequation) + elif xmlElement.findall(".//EOAequationarraynonumber"): + xmlResult = etree.Element("temp") + for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"): + xmlEOAequation = etree.Element("EOAequation") + xmlEOAequation.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlEOAequation.set("number", "") + xmlEOAequation.set("filename", xmlEquation.get("filename")) + shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") + xmlEOAequation.set("TeX", xmlEquation.get("TeX")) + xmlResult.append(xmlEOAequation) + elif xmlElement.tag == "EOAequationnonumber": + # Process one EOAequation which is not encapsulated + xmlResult = etree.Element("EOAequation") + xmlResult.set("order", str(intObjectNumber)) intObjectNumber += 1 - xmlEOAequation.set("number", xmlEquation.get("number")) - xmlEOAequation.set("filename", xmlEquation.get("filename")) - if xmlEquation.get("label") is not None: - xmlEOAequation.set("label", xmlEquation.get("label")) - shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") - xmlEOAequation.set("TeX", xmlEquation.get("TeX")) - if xmlEquation.get("label") is not None: - xmlEOAequation.set("label", xmlEquation.get("label")) - xmlResult.append(xmlEOAequation) - elif xmlElement.findall(".//EOAequationarraynonumber"): - xmlResult = etree.Element("temp") - for xmlEquation in xmlElement.findall(".//EOAequationarraynonumber"): - xmlEOAequation = etree.Element("EOAequation") - xmlEOAequation.set("order", str(intObjectNumber)) + xmlResult.set("filename", xmlElement.get("filename")) + xmlResult.set("TeX", xmlElement.get("TeX")) + shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") + xmlResult.set("number", "") + elif xmlElement.findall(".//EOAequation"): + # Process various Equations which may be encapsulated within

+ xmlEquations = xmlElement.findall(".//EOAequation") + xmlResult = etree.Element("temp") + for xmlEquation in xmlEquations: + # Create basic Element EOAequation + xmlEOAequation = etree.Element("EOAequation") + xmlEOAequation.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlEOAequation.set("number", xmlEquation.get("number")) + xmlEOAequation.set("TeX", xmlEquation.get("TeX")) + if xmlEquation.get("uid") is not None: + xmlEOAequation.set("uid", xmlEquation.get("uid")) + shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") + xmlEOAequation.set("filename", xmlEquation.get("filename")) + xmlResult.append(xmlEOAequation) + elif xmlElement.tag == "EOAequation": + # Process one EOAequation which is not encapsulated + xmlResult = etree.Element("EOAequation") + xmlResult.set("order", str(intObjectNumber)) intObjectNumber += 1 - xmlEOAequation.set("number", "") - xmlEOAequation.set("filename", xmlEquation.get("filename")) - shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") - xmlEOAequation.set("TeX", xmlEquation.get("TeX")) - xmlResult.append(xmlEOAequation) - elif xmlElement.tag == "EOAequationnonumber": - # Process one EOAequation which is not encapsulated - xmlResult = etree.Element("EOAequation") - xmlResult.set("order", str(intObjectNumber)) - intObjectNumber += 1 - xmlResult.set("filename", xmlElement.get("filename")) - xmlResult.set("TeX", xmlElement.get("TeX")) - shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") - xmlResult.set("number", "") - elif xmlElement.findall(".//EOAequation"): - # Process various Equations which may be encapsulated within

- xmlEquations = xmlElement.findall(".//EOAequation") - xmlResult = etree.Element("temp") - for xmlEquation in xmlEquations: - # Create basic Element EOAequation - xmlEOAequation = etree.Element("EOAequation") - xmlEOAequation.set("order", str(intObjectNumber)) + xmlResult.set("number", xmlElement.get("number")) + xmlResult.set("TeX", xmlElement.get("TeX")) + if xmlElement.get("uid") is not None: + xmlResult.set("uid", xmlElement.get("uid")) + shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") + xmlResult.set("filename", xmlElement.get("filename")) + elif xmlElement.tag == "div3": + xmlResult = etree.Element("EOAsubsection") + xmlResult.set("order", str(intObjectNumber)) intObjectNumber += 1 - xmlEOAequation.set("number", xmlEquation.get("number")) - xmlEOAequation.set("TeX", xmlEquation.get("TeX")) - if xmlEquation.get("uid") is not None: - xmlEOAequation.set("uid", xmlEquation.get("uid")) - shutil.copy(os.getcwd() + "/items/" + xmlEquation.get("filename"), os.getcwd() + "/CONVERT/django/images/") - xmlEOAequation.set("filename", xmlEquation.get("filename")) - xmlResult.append(xmlEOAequation) - elif xmlElement.tag == "EOAequation": - # Process one EOAequation which is not encapsulated - xmlResult = etree.Element("EOAequation") - xmlResult.set("order", str(intObjectNumber)) - intObjectNumber += 1 - xmlResult.set("number", xmlElement.get("number")) - xmlResult.set("TeX", xmlElement.get("TeX")) - if xmlElement.get("uid") is not None: - xmlResult.set("uid", xmlElement.get("uid")) - shutil.copy(os.getcwd() + "/items/" + xmlElement.get("filename"), os.getcwd() + "/CONVERT/django/images/") - xmlResult.set("filename", xmlElement.get("filename")) - elif xmlElement.tag == "div3": - xmlResult = etree.Element("EOAsubsection") - xmlResult.set("order", str(intObjectNumber)) - intObjectNumber += 1 - xmlResult.append(xmlElement.find("head")) - for xmlChild in xmlElement.iterchildren(): - xmlResult.append(djangoParseObject(xmlChild)) - elif xmlElement.tag == "div4": - xmlResult = etree.Element("EOAsubsubsection") - xmlResult.set("order", str(intObjectNumber)) - intObjectNumber += 1 - xmlResult.append(xmlElement.find("head")) - for xmlChild in xmlElement.iterchildren(): - xmlResult.append(djangoParseObject(xmlChild)) - elif xmlElement.tag == "EOAverse": - xmlResult = etree.Element("EOAparagraph") - xmlResult.set("style", "verse") - xmlResult.set("order", str(intObjectNumber)) - intObjectNumber += 1 - - xml_verselines = xmlElement.findall("p") - xmlResult.append(deepcopy(xml_verselines[0])) - for xml_verseline in xml_verselines[1:]: - linebreak = etree.Element("br") - xmlResult.append(linebreak) - copied_line = deepcopy(xml_verseline) - xmlResult.append(copied_line) - etree.strip_tags(xmlResult, "p") - elif xmlElement.tag == "EOAbox": - logging.debug("Found a box") - xmlResult = etree.Element("temp") - xmlResult.set("style", "box") - - box_header = xmlElement.find("head") - box_header.tag = "EOAparagraph" - box_header.set("style", "box") - box_header.set("order", str(intObjectNumber)) - head_contents = box_header.find("p") - head_contents.tag = "b" - # etree.strip_tags(box_header, "p") - xmlResult.append(box_header) - intObjectNumber += 1 - # question: what to do about paragraph equivalent objects? - box_elements = xmlElement.getchildren() - logging.debug(len(box_elements)) - for box_element in box_elements: - if box_element.tag == "p": - box_element.tag = "EOAparagraph" - box_element.set("style", "box") - box_element.set("order", str(intObjectNumber)) - xmlResult.append(box_element) + xmlResult.append(xmlElement.find("head")) + for xmlChild in xmlElement.iterchildren(): + xmlResult.append(djangoParseObject(xmlChild)) + elif xmlElement.tag == "div4": + xmlResult = etree.Element("EOAsubsubsection") + xmlResult.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlResult.append(xmlElement.find("head")) + for xmlChild in xmlElement.iterchildren(): + xmlResult.append(djangoParseObject(xmlChild)) + elif xmlElement.tag == "EOAverse": + xmlResult = etree.Element("EOAparagraph") + xmlResult.set("style", "verse") + xmlResult.set("order", str(intObjectNumber)) intObjectNumber += 1 - elif xmlElement.tag == "EOAtocentry": - # throw them out for the time being - xmlResult = etree.Element("temp") + xml_verselines = xmlElement.findall("p") + xmlResult.append(deepcopy(xml_verselines[0])) + for xml_verseline in xml_verselines[1:]: + linebreak = etree.Element("br") + xmlResult.append(linebreak) + copied_line = deepcopy(xml_verseline) + xmlResult.append(copied_line) + etree.strip_tags(xmlResult, "p") + elif xmlElement.tag == "EOAbox": + logging.debug("Found a box") + xmlResult = etree.Element("temp") + xmlResult.set("style", "box") + + box_header = xmlElement.find("head") + box_header.tag = "EOAparagraph" + box_header.set("style", "box") + box_header.set("order", str(intObjectNumber)) + head_contents = box_header.find("p") + head_contents.tag = "b" + # etree.strip_tags(box_header, "p") + xmlResult.append(box_header) + intObjectNumber += 1 + # question: what to do about paragraph equivalent objects? + box_elements = xmlElement.getchildren() + logging.debug(len(box_elements)) + for box_element in box_elements: + if box_element.tag == "p": + box_element.tag = "EOAparagraph" + box_element.set("style", "box") + box_element.set("order", str(intObjectNumber)) + xmlResult.append(box_element) + intObjectNumber += 1 + elif xmlElement.tag == "EOAtocentry": + # throw them out for the time being + xmlResult = etree.Element("temp") + else: + xmlElement.tag = "EOAparagraph" + quoted_paragraph = xmlElement.get("rend") + if quoted_paragraph is not None and quoted_paragraph == "quoted": + xmlElement.set("rend", "quoted") + xmlElement.set("order", str(intObjectNumber)) + intObjectNumber += 1 + xmlResult = xmlElement else: - xmlElement.tag = "EOAparagraph" - quoted_paragraph = xmlElement.get("rend") - if quoted_paragraph is not None and quoted_paragraph == "quoted": - xmlElement.set("rend", "quoted") - xmlElement.set("order", str(intObjectNumber)) - intObjectNumber += 1 + print("SPECIAL: %s - %s" % (xmlElement, xmlElement.text)) xmlResult = xmlElement - if indent==True: xmlResult.set("indent", "True") if listtype != None: