Skip to content

Commit

Permalink
Move common functions for hyperlink formatting to library
Browse files Browse the repository at this point in the history
  • Loading branch information
kthoden committed Dec 18, 2019
1 parent b212f4a commit 48e666e
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 45 deletions.
37 changes: 2 additions & 35 deletions src/imxml2django.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2019-12-17 16:35:51 (kthoden)>
# Time-stamp: <2019-12-18 10:27:58 (kthoden)>

"""
Create an XML file that can be inserted into the Django database
Expand All @@ -24,7 +24,6 @@
import argparse
import configparser
import logging
from datetime import datetime
from copy import deepcopy
from lxml import etree
from pathlib import Path
Expand Down Expand Up @@ -1022,22 +1021,6 @@ def check_publication_cfg(configuration_file):
return
# def check_publication_cfg ends here

def format_date(accessed_date, language):
"""Format date string"""

parsed_date = datetime.strptime(accessed_date, "%Y-%m-%d")

if language == "en":
accessed_string = f"accessed {parsed_date:%B} {parsed_date.day}, {parsed_date:%Y}"
elif language == "de":
accessed_string = f"besucht am {parsed_date.day}.{parsed_date:%m}.{parsed_date:%Y}"
else:
logging.error("Got an unrecognized language: %s. Exiting.", language)
sys.exit(1)

return accessed_string
# def format_date ends here


# Iterate over Chapters, Sections, Subsections, and Subsubsections and
# Put all on one level: EOAchapter
Expand Down Expand Up @@ -1439,23 +1422,7 @@ def bring_footnote_down_django(footnote, fragment, footnote_number, object_numbe
del xmlEmph.attrib["rend"]
xmlHyperlinks = xmlEOAchapter.findall(".//xref")
for xmlHyperlink in xmlHyperlinks:
strURL = xmlHyperlink.get('url')
if strURL.startswith("http://") == False:
if strURL.startswith("https://") == False:
strURL = "http://" + strURL
xmlHyperlink.tag = "a"
del xmlHyperlink.attrib["url"]
xmlHyperlink.set("href", strURL)
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak'])
accessed_date_element = xmlHyperlink.find("./date")
accessed_date = accessed_date_element.get("when")
formatted_date = format_date(accessed_date, libeoaconvert.two_letter_language(strLanguage))
# etree.strip_elements(accessed_date_element, with_tail=True)
accessed_date_element.tag = "elementtobestripped"
accessed_date_element.tail = ""
url_tail = xmlHyperlink.tail
xmlHyperlink.tail = f", {formatted_date}{url_tail}"
xmlHyperlink.text = strURL
libeoaconvert.format_hyperlinks_django_epub(xmlHyperlink, strLanguage)
# Convert bold text
xmlBolds = xmlEOAchapter.findall(".//EOAbold")
for xmlBold in xmlBolds:
Expand Down
19 changes: 9 additions & 10 deletions src/imxml2epub.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2019-12-18 09:44:30 (kthoden)>
# Time-stamp: <2019-12-18 10:13:44 (kthoden)>

""" Convert a customized DocBook XML file into a set of files that
constitute the contents of an EPUB file.
Expand Down Expand Up @@ -1204,17 +1204,16 @@ class FootnoteError(Exception):

logging.info(f"{logseparator}Preparing Hyperlinks")
for xmlChapter in xmlChapters:
xmlLanguage = xmlChapter.get("language")
if xmlLanguage is not None:
# KT changing this after separating the big script
strLanguage = xmlLanguage #or "english"
else:
strLanguage = "english"

xmlHyperlinks = xmlChapter.findall(".//xref")
for xmlHyperlink in xmlHyperlinks:
strURL = xmlHyperlink.get('url')
if strURL.startswith("http://") == False:
if strURL.startswith("https://") == False:
strURL = "http://" + strURL
xmlHyperlink.tag = "a"
del xmlHyperlink.attrib["url"]
xmlHyperlink.set("href", strURL)
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak'])
xmlHyperlink.text = strURL
libeoaconvert.format_hyperlinks_django_epub(xmlHyperlink, strLanguage)

logging.info(f"{logseparator}Convert emphasized text")
for xmlChapter in xmlChapters:
Expand Down
47 changes: 47 additions & 0 deletions src/utils/libeoaconvert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import shlex
import logging
import configparser
from datetime import datetime
from lxml import etree
from lxml.html import soupparser
from pathlib import Path
Expand Down Expand Up @@ -484,3 +485,49 @@ def escape_xml(text_bytes):

return text
# def escape_xml ends here


def format_hyperlinks_django_epub(xmlHyperlink, strLanguage):
"""Convert IMXML element to href and append localized accessed date"""

strURL = xmlHyperlink.get('url')
if strURL.startswith("http://") == False:
if strURL.startswith("https://") == False:
strURL = "http://" + strURL
xmlHyperlink.tag = "a"
del xmlHyperlink.attrib["url"]
xmlHyperlink.set("href", strURL)
etree.strip_elements(xmlHyperlink, with_tail=True, *['allowbreak'])
accessed_date_element = xmlHyperlink.find("./date")
if accessed_date_element is not None:
accessed_date = accessed_date_element.get("when")
formatted_date = format_date(accessed_date, two_letter_language(strLanguage))
# etree.strip_elements(accessed_date_element, with_tail=True)
accessed_date_element.tag = "elementtobestripped"
accessed_date_element.tail = ""
url_tail = xmlHyperlink.tail
xmlHyperlink.tail = f", {formatted_date}{url_tail}"
xmlHyperlink.text = strURL
else:
logging.error(f"Found no accessed date at url {strURL}. Exiting.")
sys.exit()

return
# def format_hyperlinks_django_epub ends here


def format_date(accessed_date, language):
"""Format date string"""

parsed_date = datetime.strptime(accessed_date, "%Y-%m-%d")

if language == "en":
accessed_string = f"accessed {parsed_date:%B} {parsed_date.day}, {parsed_date:%Y}"
elif language == "de":
accessed_string = f"besucht am {parsed_date:%d}.{parsed_date:%m}.{parsed_date:%Y}"
else:
logging.error("Got an unrecognized language: %s. Exiting.", language)
sys.exit(1)

return accessed_string
# def format_date ends here

0 comments on commit 48e666e

Please sign in to comment.