Permalink
Browse files

Code cleanup

  • Loading branch information...
kthoden committed Nov 22, 2018
1 parent 2b439ea commit 254a81b43e97e99d90c5969fcbba0a5d770364fe
Showing with 29 additions and 24 deletions.
  1. +29 −24 fix_tei.py
@@ -51,7 +51,7 @@
RUNNING_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
logging.debug("The script is run from {}".format(RUNNING_DIRECTORY))
TEI_BOILERPLATE = os.path.sep.join([RUNNING_DIRECTORY, "data", "tei_boilerplate.cfg"])
print(TEI_BOILERPLATE)

if not os.path.exists(TEI_BOILERPLATE):
logging.error("Could not find TEI boilerplate config. Exiting.")
sys.exit()
@@ -70,8 +70,8 @@ def parse_bibtex(bibfile):

all_references.update(tmp_dict)

return tmp_dict
# return all_references
return tmp_dict
# def parse_bibtex ends here

def restore_xml_tags(text):
@@ -119,7 +119,8 @@ def fixup(m):
text = chr(html.entities.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
# leave as is
return text
return re.sub(r"&#?\w+;", fixup, text)
# def unescape ends here

@@ -295,7 +296,7 @@ def cleanup_xml(xml_tree):
logging.info("Found %s colour attributes." % len(color_attrib))

for attribute in metypeset_attrib:
logging.info("number of attributes: %s" % len(attribute.attrib))
logging.info("Number of attributes: %s" % len(attribute.attrib))
attribute.attrib.pop("meTypesetSize")

for attribute in color_attrib:
@@ -385,7 +386,7 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype):
respname = etree.SubElement(example_resp, "persName")
surname = etree.SubElement(respname, "surname").text = "N"
forename = etree.SubElement(respname, "forename").text = "N"
libeoaconvert.insert_after(example_resp, author_element)
author_element.addnext(example_resp)

edition = xml_tree.xpath("//t:editionStmt/t:edition", namespaces=NS_MAP)[0]
edition_date = edition.find("t:date", namespaces=NS_MAP)
@@ -402,7 +403,8 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype):
extent_element = etree.Element("extent")
pages = etree.SubElement(extent_element, "measure", commodity="pages", quantity="0")
price = etree.SubElement(extent_element, "measure", type="price", unit="EUR", quantity="0")
libeoaconvert.insert_after(extent_element, publication_statement, before=True)

publication_statement.addprevious(extent_element)

publisher_element = etree.SubElement(publication_statement, "publisher")
overall_org = etree.SubElement(publisher_element, "orgName", n="EOA", ref=BOILERPLATES.get("Header","eoa_url"))
@@ -423,9 +425,8 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype):
resp_names = etree.SubElement(resp_stmt, "name", type="serieseditors")
resp_names.text = BOILERPLATES.get("Header","mprl_series_editors")
series_number = etree.SubElement(series_stmt, "idno", type="number").text = "number"
publication_stmt_parent = publication_statement.getparent()
series_stmt_insertion_point = libeoaconvert.get_place_in_xml_tree(publication_statement, publication_stmt_parent) + 1
publication_stmt_parent.insert(series_stmt_insertion_point, series_stmt)

publication_statement.addnext(series_stmt)

source_desc = xml_tree.xpath("//t:sourceDesc", namespaces=NS_MAP)[0]
suggested_citation = etree.SubElement(source_desc, "ab", type="suggestedcitation").text = "Suggested Citation"
@@ -495,29 +496,33 @@ def add_tei_frontpart():
def evaluate_report(report):
"""Print report of conversion."""

print("="*60)
print(' '*4, "Conversion report")
print("-"*60)
report_string = "="*60 + "\n"
report_string += ' '*4 + "Conversion report\n"
report_string += "-"*60 + "\n"
if len(report["bad_figures"]) > 0:
print("{} {} could not be linked to a file in the image directory:".format(len(report["bad_figures"]), libeoaconvert.plural(len(report["bad_figures"]), "figure")))
report_string += "{} {} could not be linked to a file in the image directory:\n".format(len(report["bad_figures"]), libeoaconvert.plural(len(report["bad_figures"]), "figure"))
for item in report["bad_figures"]:
print(' '*4, item)
report_string += ' '*4 + item + "\n"
else:
print("All figures were linked.")
report_string += "All figures were linked.\n"
if len(report["citekeys_not_in_bib"]) > 0:
print("{} of {} {} could not be found in the bibliography database:".format(len(report["citekeys_not_in_bib"]), report["len_citekeys"], libeoaconvert.plural(len(report["citekeys_not_in_bib"]), "citation")))
report_string += "{} of {} {} could not be found in the bibliography database:\n".format(len(report["citekeys_not_in_bib"]), report["len_citekeys"], libeoaconvert.plural(len(report["citekeys_not_in_bib"]), "citation"))
for item in report["citekeys_not_in_bib"]:
print(' '*4, item)
print("\nThe missing citations were also stored in the pickle file and can be re-used by the create_tmpbib tool.\n")
report_string += ' '*4 + item + "\n"
report_string += "\nThe missing citations were also stored in the pickle file and can be re-used by the create_tmpbib tool.\n"
else:
print("All citekeys were found in the bibliography database.")
report_string += "All citekeys were found in the bibliography database.\n"
if len(report["bad_pageref"]) > 0:
print("{} page {} could not be parsed into start and end value:".format(len(report["bad_pageref"]), libeoaconvert.plural(len(report["bad_pageref"]), "reference")))
report_string += "{} page {} could not be parsed into start and end value:".format(len(report["bad_pageref"]), libeoaconvert.plural(len(report["bad_pageref"]), "reference"))
for item in report["bad_pageref"]:
print(' '*4, item)
report_string += ' '*4 + item + "\n"
else:
print("All page references could be parsed into discrete values.")
print("="*60)
report_string += ("All page references could be parsed into discrete values.\n")
report_string += "="*60 + "\n"

print(report_string)

return
# def evaluate_report ends here

def main():
@@ -581,7 +586,7 @@ def main():
try:
xml_tree2 = etree.fromstring(mod_string3)
except etree.XMLSyntaxError:
print("\nXML syntax error when trying to parse modified tree. Dumped it to %s." % debug_output)
logging.error("\nXML syntax error when trying to parse modified tree. Dumped it to %s." % debug_output)
print("-"*60)
traceback.print_exc(file=sys.stdout)
print("-"*60)

0 comments on commit 254a81b

Please sign in to comment.