diff --git a/imxml2epub.py b/imxml2epub.py index 32b0669..f6e197c 100755 --- a/imxml2epub.py +++ b/imxml2epub.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2018-03-19 13:13:47 (kthoden)> +# Time-stamp: <2018-04-24 17:07:01 (kthoden)> import os import sys @@ -11,6 +11,7 @@ import pickle import shlex import subprocess +import logging from copy import deepcopy from lxml import etree import libeoaconvert @@ -20,7 +21,7 @@ ##################### parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", dest="CONFIG_FILE", help="Name of configuration file", metavar="CONFIGURATION") - +parser.add_argument("-f", "--font", help="Font to be used, default is TeX Gyre Termes", default="termes") args = parser.parse_args() if args.CONFIG_FILE is not None: @@ -76,9 +77,64 @@ dictTables = data["tabdict"] dictPagelabels = data["pagelabeldict"] +def remove_processinginstruction(xml_tree, pi_name): + """Remove processing instructions with a specific name""" + + proc_insts = xml_tree.xpath("//processing-instruction('{}')".format(pi_name)) + + # counter = 1 + + # for instruction in proc_insts: + # logging.debug("looking at pi %d" % counter) + + # instruction_previous = instruction.getprevious() + # instruction_parent = instruction.getparent() + + # if instruction_previous is not None: + # if instruction_previous.tail is not None: + # instruction_previous_tail = instruction_previous.tail + # else: + # instruction_previous_tail = "" + # else: + # instruction_previous_tail = "" + + # instruction_tail = instruction.tail + # instruction_parent_text = instruction_parent.text + + # print("parent text", instruction_parent_text) + # print("previous ", instruction_previous_tail) + # print("pi tail", instruction_tail) + + # if instruction_previous is not None: + # logging.debug("case 1") + # if instruction_tail is not None: + # logging.debug("case 2") + # instruction_previous_tail += instruction_tail + # else: + # logging.debug("case 3") + # if instruction_tail is not None: + # instruction_parent.text += instruction_tail + + # instruction_parent.remove(instruction) + # counter += 1 + + # Leaving that out for now. + # Found solution below on https://stackoverflow.com/questions/31522162/, but that + # seems only to work in all cases + + for instruction in proc_insts: + etree.strip_tags(instruction.getparent(), instruction.tag) + + logging.debug("Removed %s processing instructions of type %s." % (len(proc_insts), pi_name)) + + return xml_tree +# def remove_processinginstruction ends here + def addToContentopf(contentopf, Filename, FileID, Mediatype): """Function to add Elements to Content-OPF (epub)""" + # logging.debug("considering adding %s with FileID %s to content.opf" % (Filename, FileID)) + global listContentopf # Sanitizing FileID, id-attribute may not contain _ : or / # FileID may also not start with a number @@ -89,6 +145,7 @@ def addToContentopf(contentopf, Filename, FileID, Mediatype): FileID = re.sub("^[0-9]", "", FileID) FileID = re.sub("^[0-9]", "", FileID) if FileID in listContentopf: + # logging.debug("Not adding %s, because something with a FileID %s is already there" % (Filename, FileID)) return contentopf else: # Sanitizing FileID, id-attribute may not contain _ : or / @@ -100,6 +157,8 @@ def addToContentopf(contentopf, Filename, FileID, Mediatype): FileID = re.sub("^[0-9]", "", FileID) FileID = re.sub("^[0-9]", "", FileID) dictMediatypes = { + "txt" : "text/plain", + "otf" : "application/vnd.ms-opentype", "xml" : "application/xhtml+xml", "jpg" : "image/jpeg", "png" : "image/png" @@ -111,6 +170,8 @@ def addToContentopf(contentopf, Filename, FileID, Mediatype): xmlItem.set("media-type", dictMediatypes[Mediatype]) xmlItem.set("href", Filename) xmlManifest.append(xmlItem) + # logging.debug("Added %s, with FileID %s" % (Filename, FileID)) + # if it's a XML-File also extent if Mediatype == "xml": xmlSpine = contentopf.find(".//" + contentopfns + "spine") @@ -158,11 +219,52 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): # Copy containter.xml and mimetype shutil.copy(EPUB_FILES + "epubcontainer.xml", os.getcwd() + "/CONVERT/epub/META-INF/container.xml") shutil.copy(EPUB_FILES + "epubmimetype", os.getcwd() + "/CONVERT/epub/mimetype") -shutil.copy(EPUB_FILES + "eoa-epub.css", os.getcwd() + "/CONVERT/epub/OEBPS/") -shutil.copy(EPUB_FILES + "texgyretermes-bold.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") -shutil.copy(EPUB_FILES + "texgyretermes-bolditalic.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") -shutil.copy(EPUB_FILES + "texgyretermes-italic.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") -shutil.copy(EPUB_FILES + "texgyretermes-regular.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") + +# Preparing content.opf +xmlContentopfParser = etree.XMLParser(no_network=False,load_dtd=False) +contentopf = etree.parse(EPUB_FILES + "epubcontentopf.xml", xmlContentopfParser) + +# This list includes all files which have already been included to avoid duplicates +listContentopf = [] + +######### +# Fonts # +######### +libertine_fonts = ["GPL.txt", "LICENCE.txt", "LinLibertine_R.otf", "LinLibertine_RI.otf", "LinLibertine_RZ.otf", "LinLibertine_RZI.otf", "OFL-1.1.txt"] +termes_fonts = ["texgyretermes-bold.otf", "texgyretermes-bolditalic.otf", "texgyretermes-italic.otf", "texgyretermes-regular.otf"] + +if args.font == "termes": + font_files = termes_fonts + shutil.copy(EPUB_FILES + "eoa-epub-termes.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") +elif args.font == "libertine": + shutil.copy(EPUB_FILES + "eoa-epub-libertine.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") + font_files = libertine_fonts +else: + logging.info("Font not recognized, falling back to default.") + shutil.copy(EPUB_FILES + "eoa-epub-termes.css", os.getcwd() + "/CONVERT/epub/OEBPS/eoa-epub.css") + +otf_id_counter = 1 +txt_id_counter = 1 + +for fontfile in font_files: + shutil.copy(EPUB_FILES + fontfile, os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") + + base_file_name, file_extension = os.path.splitext(fontfile) + + if file_extension == ".otf": + contentopf = addToContentopf(contentopf, "fonts/" + fontfile, "otf-font" + str(otf_id_counter), file_extension[1:]) + otf_id_counter += 1 + elif file_extension == ".txt": + contentopf = addToContentopf(contentopf, "fonts/" + fontfile, "font-txt" + str(txt_id_counter), file_extension[1:]) + txt_id_counter += 1 + else: + print("Other file found. Exiting") + sys.exit() + +# shutil.copy(EPUB_FILES + "texgyretermes-bold.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") +# shutil.copy(EPUB_FILES + "texgyretermes-bolditalic.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") +# shutil.copy(EPUB_FILES + "texgyretermes-italic.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") +# shutil.copy(EPUB_FILES + "texgyretermes-regular.otf", os.getcwd() + "/CONVERT/epub/OEBPS/fonts/") # Shortcut for namespace htmlns = "{http://www.w3.org/1999/xhtml}" @@ -174,10 +276,6 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): xmlTocncxParser = etree.XMLParser(no_network=False,load_dtd=False) tocncx = etree.parse(EPUB_FILES + "epubtocncx.xml", xmlTocncxParser) -# Preparing content.opf -xmlContentopfParser = etree.XMLParser(no_network=False,load_dtd=False) -contentopf = etree.parse(EPUB_FILES + "epubcontentopf.xml", xmlContentopfParser) - print("-----------------------------------------------------") print("Preparing content.opf") xmlMetadata = contentopf.find(".//{http://www.idpf.org/2007/opf}metadata") @@ -308,13 +406,13 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): xmlText.text = strAuthorString xmlAuthor.append(xmlText) -# This list includes all files which have already been included to avoid duplicates -listContentopf = [] - ############################################################## # Convert Tralics-XML to Epub # ############################################################## +#xmlTree = remove_processinginstruction(xmlTree, 'hyperimage') + + # Copy xmlTree to xmlEbookTree xmlEbookTree = deepcopy(xmlTree) # xmlChapters is a list containing all chapters @@ -329,8 +427,8 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): xmlChapter.find("head").tag = "h1" if xmlChapter.get("rend") != "nonumber": idChapter = xmlChapter.get("id") - print(idChapter + " konvertierung into h1") - print(dictChapters[idChapter]) + # print(idChapter + " konvertierung into h1") + # print(dictChapters[idChapter]) strHeadline = xmlChapter.find("h1").text or "" xmlChapter.find("h1").text = str(dictChapters[idChapter]) + ". " + strHeadline if xmlChapter.find(".//EOAauthor") is not None: @@ -342,7 +440,7 @@ def addToTocncx(tocncx, Label, intTechnicalChapterNumber): xmlChapter.find(".//EOAauthor").text = "" xmlChapter = etree.strip_tags(xmlChapter, "EOAauthor") -print(dictSections) +# print(dictSections) print("-----------------------------------------------------") print("Convert EOAsection to H2")