From b8aa5f5d9eee81b5f7a481f8779ae63f67878e4d Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Mon, 3 Dec 2018 16:15:07 +0100 Subject: [PATCH] Hyperimage integration --- tei2imxml.py | 99 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 16 deletions(-) diff --git a/tei2imxml.py b/tei2imxml.py index d09a1d5..29dfd6b 100644 --- a/tei2imxml.py +++ b/tei2imxml.py @@ -46,6 +46,8 @@ CSL_FILE = CONFIG['Auxiliaries']['CSL_FILE'] +CSV_FILE = os.path.expanduser("hi_figures.csv") + def get_publication_info(xml_tree): """Query the TEI document for metadata fields. @@ -349,9 +351,64 @@ def format_authors(list_author_id, publang, xml_tree): return author_string # def format_authors ends here -def transform_body(xml_tree, cited_data, publang): +def hi_lookup(hi_id): + """Get hyperimage code from CSV file""" + + import csv + + nd = {} + + logging.debug("Opening %s", CSV_FILE) + + with open(CSV_FILE, newline='') as f: + reader = csv.DictReader(f, fieldnames = ("checked","id","number","elementstring")) + jsonStr = json.dumps(list(reader)[1:]) + jsonObj = json.loads(jsonStr) + + for xx in jsonObj[1:]: + nd[xx["id"]] = {"number": xx["number"], "checked" : xx["checked"], "elementstring" : xx["elementstring"]} + + if nd[hi_id]: + hi_code = nd[hi_id]["elementstring"] + else: + logging.error("Could not find hi code %s", hi_id) + + return hi_code +# def hi_lookup ends here + +def transform_body(xml_tree, cited_data, publang, hyperimage=False): """Transform the body of XML document into IntermediateXML file""" + def handle_refs_default(ref): + """Handle refs the normal way""" + + target_attribute = ref.get("target") + if not target_attribute: + logging.error("Found a ref element without target. Exiting.") + sys.exit() + else: + url_attribute = ref.get("type") + if url_attribute == "url": + del ref.attrib["type"] + del ref.attrib["target"] + ref.tag = "xref" + ref.set("url", target_attribute) + else: + ref.tag = "EOAref" + del ref.attrib["target"] + etree.SubElement(ref, "ref", teitarget=target_attribute) + etree.SubElement(ref, "Label").text = target_attribute + return + # def handle_refs_default ends here + + def handle_refs_hyperimage(ref): + """Treat also the special cases of hyperimage refs""" + + logging.info("Found a ref without target, what else have we got?") + + return + # def handle_refs_hyperimage ends here + logging.info("Performing XML transformations of the body.") ###################### # Document structure # @@ -516,6 +573,13 @@ def transform_body(xml_tree, cited_data, publang):

+hyperimage +
+ + +The town hall in Prague’s Academy of Sciences + + """ figure_counter = 1 @@ -549,6 +613,15 @@ def transform_body(xml_tree, cited_data, publang): figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever #

images/1.jpg33

+ if figure_type == "hiviewer": + # display image in hyperimage viewer, not in lightbox + # hi_id needs to be looked up in hi_figures.csv + logging.debug("Found figure for hiviewer.") + hi_id = figure.get("corresp")[1:] + hi_code = hi_lookup(hi_id) + figure.set("hielement", hi_code) + else: + pass etree.strip_elements(figure, "{%s}graphic" % ns_tei) @@ -631,22 +704,11 @@ def transform_body(xml_tree, cited_data, publang): ref_parent = ref.getparent() if ref_parent == "bibl": continue - target_attribute = ref.get("target") - if len(target_attribute) == 0: - print("Found a ref element without target. Exiting.") - sys.exit() else: - url_attribute = ref.get("type") - if url_attribute == "url": - del ref.attrib["type"] - del ref.attrib["target"] - ref.tag = "xref" - ref.set("url", target_attribute) + if hyperimage: + handle_refs_hyperimage(ref) else: - ref.tag = "EOAref" - del ref.attrib["target"] - etree.SubElement(ref, "ref", teitarget=target_attribute) - etree.SubElement(ref, "Label").text = target_attribute + handle_refs_default(ref) return xml_tree # def transform_body ends here @@ -923,6 +985,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("-d", "--pickleddata", default=data_pickle, help="Pickled data file to be used.") + parser.add_argument("-him", "--hyperimage", action="store_true") parser.add_argument("teifile", help="TEI XML file to convert into DocBook XML.") args = parser.parse_args() @@ -999,7 +1062,11 @@ def main(): formatted_references_dict[tmp_dict_key] = refs_for_bib_chapter tei_body = xml_tree.xpath("//t:body", namespaces=NS_MAP)[0] - body_transformed_tmp = transform_body(tei_body, cited_dict, publang=publication_language) + if args.hyperimage: + logging.info("Transforming body with Hyperimage support") + else: + pass + body_transformed_tmp = transform_body(tei_body, cited_dict, publang=publication_language, hyperimage=args.hyperimage) libeoaconvert.debug_xml_here(body_transformed_tmp, "body_transformed") body_transformed = etree.ElementTree(body_transformed_tmp)