Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
eoa_makestatic/makestatic.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
114 lines (81 sloc)
3.18 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8; mode: python -*- | |
""" | |
A wrapper for creating a static version using wget. | |
""" | |
__version__ = "1.0" | |
__date__ = "20200305" | |
__author__ = "kthoden@mpiwg-berlin.mpg.de" | |
import argparse | |
import logging | |
import re | |
import shutil | |
import json | |
from bs4 import BeautifulSoup | |
from pathlib import Path | |
BASE_DIR = Path( __file__ ).resolve().parent | |
logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s') | |
def get_hi_publications(json_config): | |
"""Load Hyperimage enabled publications from JSON config file. Return a list | |
The list contains a dictionary with the keys series, number and chapters: | |
[{'series': 'studies', 'number': 12, 'chapters': [2, 3, 7, 8, 11, 14, 15, 18]}] | |
""" | |
with open(json_config, "r") as json_data: | |
json_object = json.load(json_data) | |
publications = json_object["hi_enabled"] | |
return publications | |
# def get_hi_publications ends here | |
def replace_header(html_data): | |
"""Repair versioned file references such as min.css?v=1.0.css""" | |
cases = html_data.find_all(href=re.compile('%3F')) | |
for case in cases: | |
old_href = case["href"] | |
new_href = old_href.replace('%3F', '?') | |
case["href"] = new_href | |
return html_data | |
# def replace_header ends here | |
def fix_hi_link(html_data): | |
"""Repair the href attribute for hyperimage links """ | |
cases = html_data.find_all(class_="HILink") | |
for case in cases: | |
old_href = case["href"] | |
new_href = old_href.replace('index.html', '') | |
case["href"] = new_href | |
return html_data | |
# def fix_hi_link ends here | |
def modify_publications(hi_publications, staticfilepath): | |
"""Based on JSON config, perform necessary modifications.""" | |
for publication in hi_publications: | |
chapters = publication["chapters"] | |
logging.info(f"Found {len(chapters)} chapters in this publication.") | |
for chapter in chapters: | |
htmlfile = f"{staticfilepath}/{publication['series'].lower()}/{publication['number']}/{chapter}/index.html" | |
backup_original = shutil.copy(htmlfile, htmlfile.replace(".html", "-bak.html")) | |
logging.info(f"Opening {htmlfile}.") | |
with open(htmlfile, "r") as html_file: | |
htmldata = BeautifulSoup(html_file, 'html.parser') | |
replaced_header = replace_header(htmldata) | |
fixed_hi_link = fix_hi_link(replaced_header) | |
with open(htmlfile, "w") as outputfile: | |
outputfile.write(str(fixed_hi_link)) | |
# def modify_publications ends here | |
def main(): | |
"""The main bit""" | |
parser = argparse.ArgumentParser() | |
parser.add_argument("staticfilepath", help="HTMLfile to convert") | |
parser.add_argument( | |
"-c", "--config", | |
default = BASE_DIR / "ms.json", | |
dest="CONFIG_FILE", | |
help="Name of configuration file", | |
metavar="CONFIGURATION", | |
) | |
args = parser.parse_args() | |
config_file = args.CONFIG_FILE | |
hi_publications = get_hi_publications(config_file) | |
logging.info(f"Found {len(hi_publications)} publication(s).") | |
modify_publications(hi_publications, args.staticfilepath) | |
# def main ends here | |
if __name__ == '__main__': | |
main() | |
# finis |