Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Ongoing work
  • Loading branch information
Klaus Thoden committed Aug 30, 2018
1 parent 4fbaa0b commit e89dc78
Showing 1 changed file with 118 additions and 28 deletions.
146 changes: 118 additions & 28 deletions tei2imxml.py
Expand Up @@ -350,15 +350,17 @@ def transform_body(xml_tree, cited_data, publang):
chapter_title = chapter.find("t:head", namespaces=NS_MAP)

author_ids = chapter.get("resp")
list_author_id = author_ids.split(" ")

if len(list_author_id) > 0:
author_string = format_authors(list_author_id, publang)
# print(author_string)
eoa_author = etree.Element("EOAauthor")
eoa_author.text = author_string
chapter_title.insert(0, eoa_author)

if author_ids is not None:
list_author_id = author_ids.split(" ")

if len(list_author_id) > 0:
author_string = format_authors(list_author_id, publang)
# print(author_string)
eoa_author = etree.Element("EOAauthor")
eoa_author.text = author_string
chapter_title.insert(0, eoa_author)
else:
logging.info("No chapter author.")
chapter.insert(0, chapter_title)

eoa_sections = xml_tree.xpath("//t:div[@type='section']", namespaces=NS_MAP)
Expand Down Expand Up @@ -510,24 +512,32 @@ def transform_body(xml_tree, cited_data, publang):
eoa_figures = xml_tree.xpath("//t:figure", namespaces=NS_MAP)

for figure in eoa_figures:
figure.tag = "EOAfigure"
figure.set("id", "anotheruid")

anchor_element = etree.SubElement(figure, "anchor")
# anchor_element.set("id-text", "id-text")

# careful, caption can contain markup!
caption_element = figure.find("t:head", namespaces=NS_MAP)
figure_type = figure.get("type")
if figure_type == "hionly":
pass
else:
# careful, caption can contain markup!
caption_element = figure.xpath("t:head", namespaces=NS_MAP)[0]
caption_element.tag = "caption"
if caption_element is not None:
figure.tag = "EOAfigure"
figure.set("id", "anotheruid")

anchor_element = etree.SubElement(figure, "anchor")
# anchor_element.set("id-text", "id-text")

fig_p_element = etree.SubElement(figure, "p")
figure_file = etree.SubElement(fig_p_element, "file").text = figure.xpath("t:graphic/@url", namespaces=NS_MAP)[0]
figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever
fig_p_element.append(caption_element)
caption_element.tag = "caption"

fig_p_element = etree.SubElement(figure, "p")
figure_file = etree.SubElement(fig_p_element, "file").text = figure.xpath("t:graphic/@url", namespaces=NS_MAP)[0]
figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever
fig_p_element.append(caption_element)
else:
figure.tag = "EOAfigurenonumber"
fig_p_element = etree.SubElement(figure, "p")
figure_file = etree.SubElement(fig_p_element, "file").text = figure.xpath("t:graphic/@url", namespaces=NS_MAP)[0]
figure_width = etree.SubElement(fig_p_element, "width").text = "60" #whatever

# <EOAfigurenonumber><p><file>images/1.jpg</file><width>33</width></p>

etree.strip_elements(figure, "{%s}graphic" % ns_tei)

Expand All @@ -550,6 +560,80 @@ def transform_body(xml_tree, cited_data, publang):
else:
logging.info("The rend attribute in hi has the value %s. This is not supported" % rend_attribute)

##########
# Tables #
##########
eoa_tables = xml_tree.xpath("//t:table", namespaces=NS_MAP)

# turn
# <table rows="4" cols="4" rend="blank" xml:id="sec3table1">
# <head>This is a table</head>
# <row role="label">
# <cell role="label">Heading 1</cell>
# <cell role="label">Heading <hi rend="bold">2</hi></cell>
# <cell role="label">Heading 3</cell>
# <cell role="label">Heading 4</cell>
# </row>
# <row role="data">
# <cell role="data">Here</cell>
# <cell role="data">you</cell>
# <cell role="data">may</cell>
# <cell role="data">find</cell>
# </row>
# <row role="data">
# <cell role="data">some</cell>
# <cell role="data">data</cell>
# <cell role="data">spread</cell>
# <cell role="data">over</cell>
# </row>
# <row role="data">
# <cell role="data">the</cell>
# <cell role="data">table</cell>
# <cell role="data">in</cell>
# <cell role="data">cells</cell>
# </row>
# </table>

# into

# <EOAtable>
# <EOAtablelabel>sec3table1</EOAtablelabel>
# <EOAtablecaption>This is a table</EOAtablecaption>
# <EOAtablecolumns>L2.3cmL2.3cmL2.3cmL2.3cm</EOAtablecolumns>
# <table id-text="1" id="uid38" place="sec3table1" rend="display">
# <row>
# <cell><tableheader>TRUE</tableheader>Heading 1</cell>
# <cell>Heading <hi rend="bold">2</hi></cell>
# <cell>Heading 3</cell>
# <cell>Heading 4</cell>
# </row>
# <row>
# <cell>Here</cell>
# <cell>you</cell>
# <cell>may</cell>
# <cell>find</cell>
# </row>
# <row>
# <cell>some</cell>
# <cell>data</cell>
# <cell>spread</cell>
# <cell>over</cell>
# </row>
# <row>
# <cell>the</cell>
# <cell>table</cell>
# <cell>in</cell>
# <cell>cells</cell>
# </row>
# </table>
# </EOAtable>

for table in eoa_tables:
table.tag = "EOAtable"
table_id = (table.get("{http://www.w3.org/XML/1998/namespace}id"))

table_label = etree.SubElement(table, "EOAtablelabel").text = table_id

##############
# References #
##############
Expand Down Expand Up @@ -662,7 +746,7 @@ def update_ids(xml_tree):
"""Update the references in EOAref to the id value assigned in assign_ids"""

xmlReferences = xml_tree.findall(".//EOAref")

logging.debug("Found %d references", len(xmlReferences))
for xmlReference in xmlReferences:
eoa_reference = xmlReference.find("ref")

Expand All @@ -674,7 +758,9 @@ def update_ids(xml_tree):
# pass
# else:
corresponding_eoa_id_element = xml_tree.xpath("//*[@xml:id='{}']".format(label_text))
if len(corresponding_eoa_id_element) == 0:
logging.debug("The corresponding id element is %s", corresponding_eoa_id_element)
# if len(corresponding_eoa_id_element) == 0:
if corresponding_eoa_id_element is None:
print("There seems to be no corresponding xml:id for %s. Exiting." % label_text)
sys.exit()
elif len(corresponding_eoa_id_element) > 1:
Expand Down Expand Up @@ -815,17 +901,21 @@ def fix_bib_entries(div_snippet):
if not os.path.exists(TMP_DIR):
os.mkdir(os.path.expanduser(TMP_DIR))

with open(TMP_DIR + os.path.sep + 'data.pickle', 'rb') as f:
data = pickle.load(f)
try:
with open(TMP_DIR + os.path.sep + 'data.pickle', 'rb') as f:
data = pickle.load(f)
except FileNotFoundError:
print("File 'data.pickle' not found. You should run 'fix_tei.py' first. Exiting.")
sys.exit()

tei_document = sys.argv[-1]
xml_tree = etree.parse(tei_document)

publication_language = xml_tree.xpath("//t:teiHeader/t:profileDesc/t:langUsage/t:language/@ident", namespaces=NS_MAP)[0]

bib_data = {}
bib_data["source"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibliography']/t:ref/@target", namespaces=NS_MAP)[0]
bib_data["type"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibliography']/t:ref/@type", namespaces=NS_MAP)[0]
bib_data["source"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibdatabase']/t:ref/@target", namespaces=NS_MAP)[0]
bib_data["type"] = xml_tree.xpath("//t:teiHeader/t:fileDesc/t:sourceDesc/t:ab[@type='bibdatabase']/t:ref/@type", namespaces=NS_MAP)[0]
logging.info("The bibfile is %s and this publication type is %s." % (bib_data["source"], bib_data["type"]))
if bib_data["type"] not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]:
print("The bibliography type %s is not allowed." % bib_data["type"])
Expand Down

0 comments on commit e89dc78

Please sign in to comment.