Skip to content

Commit

Permalink
Inserting common functionalities
Browse files Browse the repository at this point in the history
  • Loading branch information
kthoden committed Nov 6, 2019
1 parent 5181386 commit fb6af94
Showing 1 changed file with 85 additions and 2 deletions.
87 changes: 85 additions & 2 deletions imxml2tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,93 @@
"""

import argparse
import sys
import configparser
from pathlib import Path
from lxml import etree
import utils.libeoaconvert as libeoaconvert

BASE_DIR = Path( __file__ ).resolve().parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.stem

#####################
# Parsing arguments #
#####################

parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"-c", "--config",
default = BASE_DIR / "config" / "eoaconvert.cfg",
help="Name of config file"
)
parser.add_argument(
"-l", "--log-dir",
default = Path("output/logs"),
# default = Path("logs", SCRIPT_NAME).with_suffix(".log"),
help="logfile"
)
parser.add_argument(
"--log-level",
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
parser.add_argument(
"-f", "--filename",
default = "IntermediateXMLFile.xml",
help="Name of intermediate XML file (without suffix!)."
)
parser.add_argument(
"-o", "--output-dir",
default = "./output/tei",
help="where to dump all output files"
)
parser.add_argument(
"-i", "--input-dir",
default = "./output/imxml",
help="location of intermediate XML file"
)

args = parser.parse_args()

CONFIG_FILE = args.config

print("The configfile is %s." % CONFIG_FILE)

CONFIG = load_config(
CONFIG_FILE,
args.log_level,
(Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ),
# args.log_file,
)

############################
# Paths:
############################
INPUT_DIR = Path( args.input_dir )
INPUT_PATH = Path( args.filename )
OUTPUT_DIR = Path( args.output_dir )
LOG_DIR = Path( args.log_dir )

TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"

# where to output the xml file:
XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" )

##################################
# Setting up various directories #
##################################

if not os.path.exists(OUTPUT_DIR):
os.mkdir( OUTPUT_DIR )
if not os.path.exists(TEMP_DIR):
os.mkdir( TEMP_DIR )
if not os.path.exists( DEBUG_DIR ):
os.mkdir( DEBUG_DIR )

# citations need a little more work: especially citedRange
# so do landscape figures, no way to distinguish them!
Expand Down Expand Up @@ -417,10 +501,9 @@ def main():
back_part = etree.SubElement(tei_body, "back")
tei_body.insert(1, tei_body_xml.getroot())

outfile = 'CONVERT/TEI.xml'
output_string = etree.tostring(tei_root, xml_declaration=True, pretty_print=True, encoding="UTF-8", doctype= '<?xml-model href="eoa_tei.rnc" type="application/relax-ng-compact-syntax"?>\n<?xml-stylesheet type="text/css" href="tei.css" ?>')

with open(outfile, 'w') as output_file:
with open(XML_FILE, 'w') as output_file:
output_file.write(output_string.decode("utf-8"))
# def main ends here

Expand Down

0 comments on commit fb6af94

Please sign in to comment.