diff --git a/src/imxml2django.py b/src/imxml2django.py index b1995f6..dfe51d0 100755 --- a/src/imxml2django.py +++ b/src/imxml2django.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2019-12-17 16:35:51 (kthoden)> +# Time-stamp: <2019-12-18 10:27:58 (kthoden)> """ Create an XML file that can be inserted into the Django database @@ -24,7 +24,6 @@ import argparse import configparser import logging -from datetime import datetime from copy import deepcopy from lxml import etree from pathlib import Path @@ -1022,22 +1021,6 @@ def check_publication_cfg(configuration_file): return # def check_publication_cfg ends here -def format_date(accessed_date, language): - """Format date string""" - - parsed_date = datetime.strptime(accessed_date, "%Y-%m-%d") - - if language == "en": - accessed_string = f"accessed {parsed_date:%B} {parsed_date.day}, {parsed_date:%Y}" - elif language == "de": - accessed_string = f"besucht am {parsed_date.day}.{parsed_date:%m}.{parsed_date:%Y}" - else: - logging.error("Got an unrecognized language: %s. Exiting.", language) - sys.exit(1) - - return accessed_string -# def format_date ends here - # Iterate over Chapters, Sections, Subsections, and Subsubsections and # Put all on one level: EOAchapter @@ -1439,23 +1422,7 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe del xmlEmph.attrib["rend"] xmlHyperlinks = xmlEOAchapter.findall(".//xref") for xmlHyperlink in xmlHyperlinks: - strURL = xmlHyperlink.get('url') - if strURL.startswith("http://") == False: - if strURL.startswith("https://") == False: - strURL = "http://" + strURL - xmlHyperlink.tag = "a" - del xmlHyperlink.attrib["url"] - xmlHyperlink.set("href", strURL) - etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak']) - accessed_date_element = xmlHyperlink.find("./date") - accessed_date = accessed_date_element.get("when") - formatted_date = format_date(accessed_date, libeoaconvert.two_letter_language(strLanguage)) - # etree.strip_elements(accessed_date_element, with_tail=True) - accessed_date_element.tag = "elementtobestripped" - accessed_date_element.tail = "" - url_tail = xmlHyperlink.tail - xmlHyperlink.tail = f", {formatted_date}{url_tail}" - xmlHyperlink.text = strURL + libeoaconvert.format_hyperlinks_django_epub(xmlHyperlink, strLanguage) # Convert bold text xmlBolds = xmlEOAchapter.findall(".//EOAbold") for xmlBold in xmlBolds: diff --git a/src/imxml2epub.py b/src/imxml2epub.py index ed7d6dd..82a86e6 100755 --- a/src/imxml2epub.py +++ b/src/imxml2epub.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2019-12-18 09:44:30 (kthoden)> +# Time-stamp: <2019-12-18 10:13:44 (kthoden)> """ Convert a customized DocBook XML file into a set of files that constitute the contents of an EPUB file. @@ -1204,17 +1204,16 @@ class FootnoteError(Exception): logging.info(f"{logseparator}Preparing Hyperlinks") for xmlChapter in xmlChapters: + xmlLanguage = xmlChapter.get("language") + if xmlLanguage is not None: + # KT changing this after separating the big script + strLanguage = xmlLanguage #or "english" + else: + strLanguage = "english" + xmlHyperlinks = xmlChapter.findall(".//xref") for xmlHyperlink in xmlHyperlinks: - strURL = xmlHyperlink.get('url') - if strURL.startswith("http://") == False: - if strURL.startswith("https://") == False: - strURL = "http://" + strURL - xmlHyperlink.tag = "a" - del xmlHyperlink.attrib["url"] - xmlHyperlink.set("href", strURL) - etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak']) - xmlHyperlink.text = strURL + libeoaconvert.format_hyperlinks_django_epub(xmlHyperlink, strLanguage) logging.info(f"{logseparator}Convert emphasized text") for xmlChapter in xmlChapters: diff --git a/src/utils/libeoaconvert.py b/src/utils/libeoaconvert.py index 2bd1be5..2d01e61 100644 --- a/src/utils/libeoaconvert.py +++ b/src/utils/libeoaconvert.py @@ -11,6 +11,7 @@ import shlex import logging import configparser +from datetime import datetime from lxml import etree from lxml.html import soupparser from pathlib import Path @@ -484,3 +485,49 @@ def escape_xml(text_bytes): return text # def escape_xml ends here + + +def format_hyperlinks_django_epub(xmlHyperlink, strLanguage): + """Convert IMXML element to href and append localized accessed date""" + + strURL = xmlHyperlink.get('url') + if strURL.startswith("http://") == False: + if strURL.startswith("https://") == False: + strURL = "http://" + strURL + xmlHyperlink.tag = "a" + del xmlHyperlink.attrib["url"] + xmlHyperlink.set("href", strURL) + etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak']) + accessed_date_element = xmlHyperlink.find("./date") + if accessed_date_element is not None: + accessed_date = accessed_date_element.get("when") + formatted_date = format_date(accessed_date, two_letter_language(strLanguage)) + # etree.strip_elements(accessed_date_element, with_tail=True) + accessed_date_element.tag = "elementtobestripped" + accessed_date_element.tail = "" + url_tail = xmlHyperlink.tail + xmlHyperlink.tail = f", {formatted_date}{url_tail}" + xmlHyperlink.text = strURL + else: + logging.error(f"Found no accessed date at url {strURL}. Exiting.") + sys.exit() + + return +# def format_hyperlinks_django_epub ends here + + +def format_date(accessed_date, language): + """Format date string""" + + parsed_date = datetime.strptime(accessed_date, "%Y-%m-%d") + + if language == "en": + accessed_string = f"accessed {parsed_date:%B} {parsed_date.day}, {parsed_date:%Y}" + elif language == "de": + accessed_string = f"besucht am {parsed_date:%d}.{parsed_date:%m}.{parsed_date:%Y}" + else: + logging.error("Got an unrecognized language: %s. Exiting.", language) + sys.exit(1) + + return accessed_string +# def format_date ends here