From 53f737f5a2d4f1f4752b365fb1cd871c16301a72 Mon Sep 17 00:00:00 2001 From: kthoden Date: Fri, 25 Sep 2020 15:05:23 +0200 Subject: [PATCH] Enable paragraph linking in EPUB --- src/data/epub_files/icon-translation.svg | 89 ++++++++++++++++++++++++ src/imxml2epub.py | 53 +++++++++++++- 2 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 src/data/epub_files/icon-translation.svg diff --git a/src/data/epub_files/icon-translation.svg b/src/data/epub_files/icon-translation.svg new file mode 100644 index 0000000..2cf59c4 --- /dev/null +++ b/src/data/epub_files/icon-translation.svg @@ -0,0 +1,89 @@ + +image/svg+xml \ No newline at end of file diff --git a/src/imxml2epub.py b/src/imxml2epub.py index 3c35199..cb255ba 100755 --- a/src/imxml2epub.py +++ b/src/imxml2epub.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2020-09-25 08:08:41 (kthoden)> +# Time-stamp: <2020-09-25 14:10:21 (kthoden)> """ Convert a customized DocBook XML file into a set of files that constitute the contents of an EPUB file. @@ -292,6 +292,7 @@ def addToContentopf(contentopf, Filename, FileID, Mediatype): "xml" : "application/xhtml+xml", "jpg" : "image/jpeg", "png" : "image/png", + "svg" : "image/svg+xml", "bitstream" : "application/octet-stream" } contentopfns = "{http://www.idpf.org/2007/opf}" @@ -1835,6 +1836,54 @@ class FootnoteError(Exception): if xmlPublicationreference.get("rel") == "popover": xmlPublicationreference.tag = "EOAcitation" + +logging.info(f"{logseparator}Creating paragraph links") +libeoaconvert.debug_xml_here( + xmlEbookTree, + "epubtree_beforeparagraphlinks", + DEBUG_DIR +) + +# for correct linking, get all parts and chapters +xml_parts_and_chapters = xmlEbookTree.xpath("//div0|//div1") +paragraphs_with_corresp = xmlEbookTree.xpath("//p[@corresp]") +if len(paragraphs_with_corresp) > 0: + shutil.copy( + EPUB_FILES / "icon-translation.svg", + OUTPUT_DIR / "OEBPS/images/icon-translation.svg" + ) + + contentopf = addToContentopf( + contentopf, + "images/icon-translation.svg", + "icontranslation", + "svg" + ) + +for pc in paragraphs_with_corresp: + corresponding_attribute = pc.get("corresp")[1:] + corresponding_paragraph = xmlEbookTree.xpath(f"//p[@xml:id='{corresponding_attribute}']") + if len(corresponding_paragraph) == 0: + logging.error("There seems to be no corresponding xml:id for %s. Exiting." % corresponding_attribute) + sys.exit(1) + elif len(corresponding_paragraph) > 1: + logging.error("The xml:id %s has been assigned more than once. This is not allowed. Exiting." % corresponding_paragraph[0].attrib["{http://www.w3.org/XML/1998/namespace}id"]) + sys.exit(1) + else: + eoa_id_element = corresponding_paragraph[0] + for xml_parent in eoa_id_element.iterancestors(): + if xml_parent.tag == "div1": + chapter_element = xml_parent + chapter_element_index = xml_parts_and_chapters.index(chapter_element) + 1 + paratext_link = etree.Element("a") + paratext_link.set("id", pc.attrib["{http://www.w3.org/XML/1998/namespace}id"]) + href_text = f"chapter{chapter_element_index}.xhtml#{eoa_id_element.attrib['{http://www.w3.org/XML/1998/namespace}id']}" + paratext_link.set("href", href_text) + paratext_link.set("class", "paralleltext") + paratext_icon = etree.SubElement(paratext_link, "img", src="images/icon-translation.svg", alt="link to parallel text", height="14px") + paratext_link.tail = " " + pc.insert(0, paratext_link) + ############################################################## # Finish ePub Conversion, save File # ############################################################## @@ -1846,7 +1895,7 @@ class FootnoteError(Exception): xmlIndexentry.clear() xmlIndexentry.tail = tmpTail etree.strip_tags(xmlEbookTree, "EOAlabel", "EOAindex", "EOApageref", "EOAcitenumeric", "EOAtable", "EOAref", "note", "div", "div2", "div3", "div4", "div5", "citetext", "newpage", "EOAciteyear", "EOAtablelabel" , "hi", "pagebreak", "page", "pagestyle", "EOAcitation", "EOAciteauthoryear", "EOAcitemanual", "EOAprintbibliography", "EOAindexperson", "EOAprintindex", "EOAindexlocation", "EOAprintpersonindex", "EOAprintlocationindex","anchor", "temp", "EOAletterhead", "EOAhifigure", "EOAtocentry","tagtobestripped") -etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend", "hielement") # also contained "id" +etree.strip_attributes(xmlEbookTree, "id-text", "noindent", "type", "label", "spacebefore", "rend", "hielement", "corresp") # also contained "id" etree.strip_elements(xmlEbookTree, "citekey", "originalcontents", "elementtoberemoved", with_tail=False) logging.info("Write every Part and Chapter into one file")