From 4369492710f0c7c0fb931182a5aed49ed3f9f16b Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Fri, 2 Aug 2019 16:26:33 +0200
Subject: [PATCH 1/9] Exception handling

---
 tei2imxml.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tei2imxml.py b/tei2imxml.py
index 778ccb5..42783c8 100755
--- a/tei2imxml.py
+++ b/tei2imxml.py
@@ -449,10 +449,11 @@ def hi_lookup_code(nd, hitrue_xml_id):
 def get_hitarget(nd, teitarget):
     """Find out corresponding hyperimage id for hyperimage link"""
 
-    if nd[teitarget]:
+    try:
         hi_target = nd[teitarget]["hiid"]
-    else:
-        logging.error("Could not find hi code %s", teitarget)
+    except KeyError:
+        logging.error("Could not find hi code %s. Exiting", teitarget)
+        sys.exit(1)
 
     return hi_target
 # def get_hitarget ends here

From 17d29ad181d4177bf7a4120a876114c703ce1203 Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Fri, 2 Aug 2019 16:26:56 +0200
Subject: [PATCH 2/9] collage handling

---
 imxml2django.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/imxml2django.py b/imxml2django.py
index 0b82359..29258b7 100755
--- a/imxml2django.py
+++ b/imxml2django.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8; mode: python -*-
-# Time-stamp: <2019-07-31 14:46:18 (kthoden)>
+# Time-stamp: <2019-08-02 16:25:56 (kthoden)>
 
 """
 Create an XML file that can be inserted into the Django database
@@ -416,7 +416,7 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid
                 else:
                     xmlEOAfigure.set("file", strImageFileDir + strImageFileName)
 
-            if figure_type == "hionly" or figure_type == "hionlycollage":
+            if figure_type == "hionly":# or figure_type == "hionlycollage":
                 logging.debug(f"Found hyperimage figure ({figure_type}), no need for caption and size information.")
                 pass
             else:
@@ -426,10 +426,11 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid
                 xmlResult.append(xmlEOAfigure)
                 intObjectNumber += 1
                 # Insert visual Number and uid
-                strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
-                xmlEOAfigure.set("number", strFigureNumber)
-                strFigureUID = xmlElement.find(".//anchor").get("id")
-                xmlEOAfigure.set("id", strFigureUID)
+                if figure_type != "hionlycollage":
+                    strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
+                    xmlEOAfigure.set("number", strFigureNumber)
+                    strFigureUID = xmlElement.find(".//anchor").get("id")
+                    xmlEOAfigure.set("id", strFigureUID)
 
             hi_figure_types = ["hitrue", "hionly", "hionlycollage"]
 

From c28dc8d183db81cb8175c2acbb8cd1439d52d0cf Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Mon, 2 Sep 2019 16:28:50 +0200
Subject: [PATCH 3/9] Additional information

---
 doc/datapickle.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/doc/datapickle.md b/doc/datapickle.md
index d03ed0a..119c644 100644
--- a/doc/datapickle.md
+++ b/doc/datapickle.md
@@ -2,6 +2,17 @@
 
 The file data.pickle is created during a run of `eoatex2imxml.py` or `fix_tei.py` and primarily assigns numbers to elements. For example, the thirteenth figure in the first (numbered) chapter, that carries the id `uid17` is assigned the human readable reference `1.13`.
 
+The original list of stored items is
+- chapterdict
+- figdict
+- eqdict
+- fndict
+- listdict
+- pagelabeldict
+- secdict
+- tabdict
+- theoremdict
+
 ## eoatex2imxml.py
 In the classic variant, the file contains these fields:
 

From def54087c9bfab222f2648bb3ffcf6f2df888752 Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Wed, 18 Sep 2019 10:46:05 +0200
Subject: [PATCH 4/9] Corrected rendering thanks to Martin Sievers

---
 data/aux/bibliography4ht.tex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/aux/bibliography4ht.tex b/data/aux/bibliography4ht.tex
index 5ba07b3..91f7607 100644
--- a/data/aux/bibliography4ht.tex
+++ b/data/aux/bibliography4ht.tex
@@ -42,7 +42,7 @@
   \printfield{volume}%
   \iffieldundef{number}
      {}
-      {\mkbibparens{\printfield{number}}}%
+     {\printfield[parens]{number}}
   \setunit{\addcomma\space}%
   \printfield{eid}%
   \setunit{\addspace}%

From e28da2c6edd94861c988cd1d778108b0678815bb Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Wed, 18 Sep 2019 10:47:13 +0200
Subject: [PATCH 5/9] Lists

---
 tei2imxml.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tei2imxml.py b/tei2imxml.py
index 42783c8..72e9eef 100755
--- a/tei2imxml.py
+++ b/tei2imxml.py
@@ -901,16 +901,19 @@ def handle_refs_default(ref):
     #########
     eoa_lists = xml_tree.xpath("//t:body//t:list", namespaces=NS_MAP)
     for eoalist in eoa_lists:
+        items = eoalist.findall("t:item", namespaces=NS_MAP)
+        for listitem in items:
+            listitem.tag = "p"
+            libeoaconvert.wrap_into_element(etree.Element("item"), listitem)
         if eoalist.get("type") == "ordered":
-            pass
+            for listitem in items:
+                new_item_element = listitem.getparent()
+                new_item_element.set("id-text", f"{str(items.index(listitem) + 1)}")
+                new_item_element.set("label", f"{str(items.index(listitem) + 1)}.")
         if eoalist.get("type") == "unordered":
-            pass
+            eoalist.set("type", "simple")
         if eoalist.get("type") == "gloss":
             eoalist.set("type", "description")
-            items = eoalist.findall("t:item", namespaces=NS_MAP)
-            for listitem in items:
-                listitem.tag = "p"
-                libeoaconvert.wrap_into_element(etree.Element("item"), listitem)
 
     ##############
     # References #

From 3ca1343596920bcef199d8ee1ecbe022fce5c716 Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Wed, 18 Sep 2019 10:47:26 +0200
Subject: [PATCH 6/9] Hyperimage exceptions

---
 tei2imxml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tei2imxml.py b/tei2imxml.py
index 72e9eef..c1b6334 100755
--- a/tei2imxml.py
+++ b/tei2imxml.py
@@ -787,7 +787,7 @@ def handle_refs_default(ref):
             figure.set("id", "anotheruid")
 
             # the anchor element is used to determine whether a figure gets an id and can be numbered
-            if figure_type == "hionlycollage":
+            if figure_type == "hionlycollage" or figure_type == "hionlysub":
                 logging.debug("No anchor element for collages.")
 
             else:

From 51813866055dcc17f14fa587935172a8fb394e5e Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Wed, 18 Sep 2019 10:47:58 +0200
Subject: [PATCH 7/9] Some more Hyperimage rules

---
 imxml2django.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/imxml2django.py b/imxml2django.py
index 29258b7..84e3323 100755
--- a/imxml2django.py
+++ b/imxml2django.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8; mode: python -*-
-# Time-stamp: <2019-08-02 16:25:56 (kthoden)>
+# Time-stamp: <2019-08-06 15:05:17 (kthoden)>
 
 """
 Create an XML file that can be inserted into the Django database
@@ -426,13 +426,15 @@ def djangoParseObject(xmlElement, indent=False, listtype=None, listnumber=0, uid
                 xmlResult.append(xmlEOAfigure)
                 intObjectNumber += 1
                 # Insert visual Number and uid
-                if figure_type != "hionlycollage":
+                if figure_type == "hionlycollage" or figure_type == "hionlysub":
+                    pass
+                else:
                     strFigureNumber = dictFigures[xmlElement.find(".//anchor").get("id")]
                     xmlEOAfigure.set("number", strFigureNumber)
                     strFigureUID = xmlElement.find(".//anchor").get("id")
                     xmlEOAfigure.set("id", strFigureUID)
 
-            hi_figure_types = ["hitrue", "hionly", "hionlycollage"]
+            hi_figure_types = ["hitrue", "hionly", "hionlycollage"]#, "hionlysub"]
 
             if figure_type in hi_figure_types:
                 xmlEOAfigure.set("hielement", xmlElement.get("hielement"))

From fb6af94f52ba7f45a0718fd13213713ed760b38b Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Wed, 6 Nov 2019 15:28:16 +0100
Subject: [PATCH 8/9] Inserting common functionalities

---
 imxml2tei.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 85 insertions(+), 2 deletions(-)

diff --git a/imxml2tei.py b/imxml2tei.py
index c029395..b98aed8 100755
--- a/imxml2tei.py
+++ b/imxml2tei.py
@@ -7,9 +7,93 @@
 
 """
 
+import argparse
 import sys
 import configparser
+from pathlib import Path
 from lxml import etree
+import utils.libeoaconvert as libeoaconvert
+
+BASE_DIR = Path( __file__ ).resolve().parent
+SCRIPT_PATH = Path( __file__ )
+SCRIPT_NAME = SCRIPT_PATH.stem
+
+#####################
+# Parsing arguments #
+#####################
+
+parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+)
+parser.add_argument(
+        "-c", "--config",
+        default = BASE_DIR / "config" / "eoaconvert.cfg",
+        help="Name of config file"
+)
+parser.add_argument(
+        "-l", "--log-dir",
+        default = Path("output/logs"),
+        # default = Path("logs", SCRIPT_NAME).with_suffix(".log"),
+        help="logfile"
+)
+parser.add_argument(
+        "--log-level",
+        default = "INFO",
+        help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
+)
+parser.add_argument(
+        "-f", "--filename",
+        default = "IntermediateXMLFile.xml",
+        help="Name of intermediate XML file (without suffix!)."
+)
+parser.add_argument(
+        "-o", "--output-dir",
+        default = "./output/tei",
+        help="where to dump all output files"
+)
+parser.add_argument(
+        "-i", "--input-dir",
+        default = "./output/imxml",
+        help="location of intermediate XML file"
+)
+
+args = parser.parse_args()
+
+CONFIG_FILE = args.config
+
+print("The configfile is %s." % CONFIG_FILE)
+
+CONFIG = load_config(
+        CONFIG_FILE,
+        args.log_level,
+        (Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ),
+        # args.log_file,
+)
+
+############################
+# Paths:
+############################
+INPUT_DIR = Path( args.input_dir )
+INPUT_PATH = Path( args.filename )
+OUTPUT_DIR = Path( args.output_dir )
+LOG_DIR = Path( args.log_dir )
+
+TEMP_DIR = OUTPUT_DIR / "tmp_files"
+DEBUG_DIR = OUTPUT_DIR / "debug"
+
+# where to output the xml file:
+XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" )
+
+##################################
+# Setting up various directories #
+##################################
+
+if not os.path.exists(OUTPUT_DIR):
+    os.mkdir( OUTPUT_DIR )
+if not os.path.exists(TEMP_DIR):
+    os.mkdir( TEMP_DIR )
+if not os.path.exists( DEBUG_DIR ):
+    os.mkdir( DEBUG_DIR  )
 
 # citations need a little more work: especially citedRange
 # so do landscape figures, no way to distinguish them!
@@ -417,10 +501,9 @@ def main():
     back_part = etree.SubElement(tei_body, "back")
     tei_body.insert(1, tei_body_xml.getroot())
 
-    outfile = 'CONVERT/TEI.xml'
     output_string = etree.tostring(tei_root, xml_declaration=True, pretty_print=True, encoding="UTF-8", doctype= '<?xml-model href="eoa_tei.rnc" type="application/relax-ng-compact-syntax"?>\n<?xml-stylesheet type="text/css" href="tei.css" ?>')
 
-    with open(outfile, 'w') as output_file:
+    with open(XML_FILE, 'w') as output_file:
         output_file.write(output_string.decode("utf-8"))
 # def main ends here
 

From c02135d027dba92bc6dca9f3dff743345b49ecea Mon Sep 17 00:00:00 2001
From: kthoden <kthoden@gwdg.de>
Date: Wed, 6 Nov 2019 15:28:59 +0100
Subject: [PATCH 9/9] Insert id, irrespective of nonumber status

---
 idassigner.py | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/idassigner.py b/idassigner.py
index 9dec660..2dea957 100644
--- a/idassigner.py
+++ b/idassigner.py
@@ -46,29 +46,21 @@ def assign_ids(chapter_tree, elements):
         sections = chapter_tree.xpath("//t:div[@type='section']", namespaces=NS_MAP)
         section_id_counter = 1
         for section in sections:
-            if section.get("n") == "nonumber":
-                logging.info("Leaving out unnumbered section.")
-                pass
-            else:
-                section_id = "{}_sec{:02d}".format(chapter_id, section_id_counter)
-                libeoaconvert.assign_xml_id(section, section_id)
-                section_id_counter += 1
+            section_id = "{}_sec{:02d}".format(chapter_id, section_id_counter)
+            libeoaconvert.assign_xml_id(section, section_id)
+            section_id_counter += 1
 
     if "sections" in elements:
         subsections = chapter_tree.xpath("//t:div[@type='subsection']", namespaces=NS_MAP)
         subsection_id_counter = 1
         for subsection in subsections:
-            if subsection.get("n") == "nonumber":
-                logging.info("Leaving out unnumbered subsection.")
-                pass
-            else:
-                section_element = subsection.getparent()
-                section_id = section_element.attrib["{http://www.w3.org/XML/1998/namespace}id"]
-                logging.debug("Found a subsection in section %s", section_id)
-                rest, section_number = section_id.split("_sec")
-                subsection_id = "{}_subsec{}-{:02d}".format(chapter_id, section_number, subsection_id_counter)
-                libeoaconvert.assign_xml_id(subsection, subsection_id)
-                subsection_id_counter += 1
+            section_element = subsection.getparent()
+            section_id = section_element.attrib["{http://www.w3.org/XML/1998/namespace}id"]
+            logging.debug("Found a subsection in section %s", section_id)
+            rest, section_number = section_id.split("_sec")
+            subsection_id = "{}_subsec{}-{:02d}".format(chapter_id, section_number, subsection_id_counter)
+            libeoaconvert.assign_xml_id(subsection, subsection_id)
+            subsection_id_counter += 1
 
     if "figures" in elements:
         figures = chapter_tree.xpath("//t:figure", namespaces=NS_MAP)
@@ -119,13 +111,13 @@ def main():
         print(selected_chapters)
         chapters = []
         for xml_chapter in selected_chapters:
-            chapter = xml_tree.xpath(f"//t:div[@xml:id='{xml_chapter}' and not(@n='nonumber')]", namespaces=NS_MAP)[0]
+            chapter = xml_tree.xpath(f"//t:div[@xml:id='{xml_chapter}'", namespaces=NS_MAP)[0]
             copied_chapter = deepcopy(chapter)
             assign_ids(copied_chapter, elements=list_of_elements)
             chapter.addprevious(copied_chapter)
             chapter.tag = "elementtobestripped"
     else:
-        chapters = xml_tree.xpath("//t:div[@type='chapter' and not(@n='nonumber')]", namespaces=NS_MAP)
+        chapters = xml_tree.xpath("//t:div[@type='chapter']", namespaces=NS_MAP)
         logging.debug("Found %s chapters.", len(chapters))
         # in this iteration, a copy is made of each chapter and fitted
         # with ids, the original chapter is being discarded