Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
"""
A wrapper for creating a static version using wget.
"""
__version__ = "1.0"
__date__ = "20200305"
__author__ = "kthoden@mpiwg-berlin.mpg.de"
import argparse
import logging
import re
import shutil
import json
from bs4 import BeautifulSoup
from pathlib import Path
BASE_DIR = Path( __file__ ).resolve().parent
logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
def get_hi_publications(json_config):
"""Load Hyperimage enabled publications from JSON config file. Return a list
The list contains a dictionary with the keys series, number and chapters:
[{'series': 'studies', 'number': 12, 'chapters': [2, 3, 7, 8, 11, 14, 15, 18]}]
"""
with open(json_config, "r") as json_data:
json_object = json.load(json_data)
publications = json_object["hi_enabled"]
return publications
# def get_hi_publications ends here
def replace_header(html_data):
"""Repair versioned file references such as min.css?v=1.0.css"""
cases = html_data.find_all(href=re.compile('%3F'))
for case in cases:
old_href = case["href"]
new_href = old_href.replace('%3F', '?')
case["href"] = new_href
return html_data
# def replace_header ends here
def fix_hi_link(html_data):
"""Repair the href attribute for hyperimage links """
cases = html_data.find_all(class_="HILink")
for case in cases:
old_href = case["href"]
new_href = old_href.replace('index.html', '')
case["href"] = new_href
return html_data
# def fix_hi_link ends here
def modify_publications(hi_publications, staticfilepath):
"""Based on JSON config, perform necessary modifications."""
for publication in hi_publications:
chapters = publication["chapters"]
logging.info(f"Found {len(chapters)} chapters in this publication.")
for chapter in chapters:
htmlfile = f"{staticfilepath}/{publication['series'].lower()}/{publication['number']}/{chapter}/index.html"
backup_original = shutil.copy(htmlfile, htmlfile.replace(".html", "-bak.html"))
logging.info(f"Opening {htmlfile}.")
with open(htmlfile, "r") as html_file:
htmldata = BeautifulSoup(html_file, 'html.parser')
replaced_header = replace_header(htmldata)
fixed_hi_link = fix_hi_link(replaced_header)
with open(htmlfile, "w") as outputfile:
outputfile.write(str(fixed_hi_link))
# def modify_publications ends here
def main():
"""The main bit"""
parser = argparse.ArgumentParser()
parser.add_argument("staticfilepath", help="HTMLfile to convert")
parser.add_argument(
"-c", "--config",
default = BASE_DIR / "ms.json",
dest="CONFIG_FILE",
help="Name of configuration file",
metavar="CONFIGURATION",
)
args = parser.parse_args()
config_file = args.CONFIG_FILE
hi_publications = get_hi_publications(config_file)
logging.info(f"Found {len(hi_publications)} publication(s).")
modify_publications(hi_publications, args.staticfilepath)
# def main ends here
if __name__ == '__main__':
main()
# finis