Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Code cleanup
  • Loading branch information
Klaus Thoden committed Nov 22, 2018
1 parent 2b439ea commit 254a81b
Showing 1 changed file with 29 additions and 24 deletions.
53 changes: 29 additions & 24 deletions fix_tei.py
Expand Up @@ -51,7 +51,7 @@
RUNNING_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) RUNNING_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
logging.debug("The script is run from {}".format(RUNNING_DIRECTORY)) logging.debug("The script is run from {}".format(RUNNING_DIRECTORY))
TEI_BOILERPLATE = os.path.sep.join([RUNNING_DIRECTORY, "data", "tei_boilerplate.cfg"]) TEI_BOILERPLATE = os.path.sep.join([RUNNING_DIRECTORY, "data", "tei_boilerplate.cfg"])
print(TEI_BOILERPLATE)
if not os.path.exists(TEI_BOILERPLATE): if not os.path.exists(TEI_BOILERPLATE):
logging.error("Could not find TEI boilerplate config. Exiting.") logging.error("Could not find TEI boilerplate config. Exiting.")
sys.exit() sys.exit()
Expand All @@ -70,8 +70,8 @@ def parse_bibtex(bibfile):


all_references.update(tmp_dict) all_references.update(tmp_dict)


return tmp_dict
# return all_references # return all_references
return tmp_dict
# def parse_bibtex ends here # def parse_bibtex ends here


def restore_xml_tags(text): def restore_xml_tags(text):
Expand Down Expand Up @@ -119,7 +119,8 @@ def fixup(m):
text = chr(html.entities.name2codepoint[text[1:-1]]) text = chr(html.entities.name2codepoint[text[1:-1]])
except KeyError: except KeyError:
pass pass
return text # leave as is # leave as is
return text
return re.sub(r"&#?\w+;", fixup, text) return re.sub(r"&#?\w+;", fixup, text)
# def unescape ends here # def unescape ends here


Expand Down Expand Up @@ -295,7 +296,7 @@ def cleanup_xml(xml_tree):
logging.info("Found %s colour attributes." % len(color_attrib)) logging.info("Found %s colour attributes." % len(color_attrib))


for attribute in metypeset_attrib: for attribute in metypeset_attrib:
logging.info("number of attributes: %s" % len(attribute.attrib)) logging.info("Number of attributes: %s" % len(attribute.attrib))
attribute.attrib.pop("meTypesetSize") attribute.attrib.pop("meTypesetSize")


for attribute in color_attrib: for attribute in color_attrib:
Expand Down Expand Up @@ -385,7 +386,7 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype):
respname = etree.SubElement(example_resp, "persName") respname = etree.SubElement(example_resp, "persName")
surname = etree.SubElement(respname, "surname").text = "N" surname = etree.SubElement(respname, "surname").text = "N"
forename = etree.SubElement(respname, "forename").text = "N" forename = etree.SubElement(respname, "forename").text = "N"
libeoaconvert.insert_after(example_resp, author_element) author_element.addnext(example_resp)


edition = xml_tree.xpath("//t:editionStmt/t:edition", namespaces=NS_MAP)[0] edition = xml_tree.xpath("//t:editionStmt/t:edition", namespaces=NS_MAP)[0]
edition_date = edition.find("t:date", namespaces=NS_MAP) edition_date = edition.find("t:date", namespaces=NS_MAP)
Expand All @@ -402,7 +403,8 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype):
extent_element = etree.Element("extent") extent_element = etree.Element("extent")
pages = etree.SubElement(extent_element, "measure", commodity="pages", quantity="0") pages = etree.SubElement(extent_element, "measure", commodity="pages", quantity="0")
price = etree.SubElement(extent_element, "measure", type="price", unit="EUR", quantity="0") price = etree.SubElement(extent_element, "measure", type="price", unit="EUR", quantity="0")
libeoaconvert.insert_after(extent_element, publication_statement, before=True)
publication_statement.addprevious(extent_element)


publisher_element = etree.SubElement(publication_statement, "publisher") publisher_element = etree.SubElement(publication_statement, "publisher")
overall_org = etree.SubElement(publisher_element, "orgName", n="EOA", ref=BOILERPLATES.get("Header","eoa_url")) overall_org = etree.SubElement(publisher_element, "orgName", n="EOA", ref=BOILERPLATES.get("Header","eoa_url"))
Expand All @@ -423,9 +425,8 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype):
resp_names = etree.SubElement(resp_stmt, "name", type="serieseditors") resp_names = etree.SubElement(resp_stmt, "name", type="serieseditors")
resp_names.text = BOILERPLATES.get("Header","mprl_series_editors") resp_names.text = BOILERPLATES.get("Header","mprl_series_editors")
series_number = etree.SubElement(series_stmt, "idno", type="number").text = "number" series_number = etree.SubElement(series_stmt, "idno", type="number").text = "number"
publication_stmt_parent = publication_statement.getparent()
series_stmt_insertion_point = libeoaconvert.get_place_in_xml_tree(publication_statement, publication_stmt_parent) + 1 publication_statement.addnext(series_stmt)
publication_stmt_parent.insert(series_stmt_insertion_point, series_stmt)


source_desc = xml_tree.xpath("//t:sourceDesc", namespaces=NS_MAP)[0] source_desc = xml_tree.xpath("//t:sourceDesc", namespaces=NS_MAP)[0]
suggested_citation = etree.SubElement(source_desc, "ab", type="suggestedcitation").text = "Suggested Citation" suggested_citation = etree.SubElement(source_desc, "ab", type="suggestedcitation").text = "Suggested Citation"
Expand Down Expand Up @@ -495,29 +496,33 @@ def add_tei_frontpart():
def evaluate_report(report): def evaluate_report(report):
"""Print report of conversion.""" """Print report of conversion."""


print("="*60) report_string = "="*60 + "\n"
print(' '*4, "Conversion report") report_string += ' '*4 + "Conversion report\n"
print("-"*60) report_string += "-"*60 + "\n"
if len(report["bad_figures"]) > 0: if len(report["bad_figures"]) > 0:
print("{} {} could not be linked to a file in the image directory:".format(len(report["bad_figures"]), libeoaconvert.plural(len(report["bad_figures"]), "figure"))) report_string += "{} {} could not be linked to a file in the image directory:\n".format(len(report["bad_figures"]), libeoaconvert.plural(len(report["bad_figures"]), "figure"))
for item in report["bad_figures"]: for item in report["bad_figures"]:
print(' '*4, item) report_string += ' '*4 + item + "\n"
else: else:
print("All figures were linked.") report_string += "All figures were linked.\n"
if len(report["citekeys_not_in_bib"]) > 0: if len(report["citekeys_not_in_bib"]) > 0:
print("{} of {} {} could not be found in the bibliography database:".format(len(report["citekeys_not_in_bib"]), report["len_citekeys"], libeoaconvert.plural(len(report["citekeys_not_in_bib"]), "citation"))) report_string += "{} of {} {} could not be found in the bibliography database:\n".format(len(report["citekeys_not_in_bib"]), report["len_citekeys"], libeoaconvert.plural(len(report["citekeys_not_in_bib"]), "citation"))
for item in report["citekeys_not_in_bib"]: for item in report["citekeys_not_in_bib"]:
print(' '*4, item) report_string += ' '*4 + item + "\n"
print("\nThe missing citations were also stored in the pickle file and can be re-used by the create_tmpbib tool.\n") report_string += "\nThe missing citations were also stored in the pickle file and can be re-used by the create_tmpbib tool.\n"
else: else:
print("All citekeys were found in the bibliography database.") report_string += "All citekeys were found in the bibliography database.\n"
if len(report["bad_pageref"]) > 0: if len(report["bad_pageref"]) > 0:
print("{} page {} could not be parsed into start and end value:".format(len(report["bad_pageref"]), libeoaconvert.plural(len(report["bad_pageref"]), "reference"))) report_string += "{} page {} could not be parsed into start and end value:".format(len(report["bad_pageref"]), libeoaconvert.plural(len(report["bad_pageref"]), "reference"))
for item in report["bad_pageref"]: for item in report["bad_pageref"]:
print(' '*4, item) report_string += ' '*4 + item + "\n"
else: else:
print("All page references could be parsed into discrete values.") report_string += ("All page references could be parsed into discrete values.\n")
print("="*60) report_string += "="*60 + "\n"

print(report_string)

return
# def evaluate_report ends here # def evaluate_report ends here


def main(): def main():
Expand Down Expand Up @@ -581,7 +586,7 @@ def main():
try: try:
xml_tree2 = etree.fromstring(mod_string3) xml_tree2 = etree.fromstring(mod_string3)
except etree.XMLSyntaxError: except etree.XMLSyntaxError:
print("\nXML syntax error when trying to parse modified tree. Dumped it to %s." % debug_output) logging.error("\nXML syntax error when trying to parse modified tree. Dumped it to %s." % debug_output)
print("-"*60) print("-"*60)
traceback.print_exc(file=sys.stdout) traceback.print_exc(file=sys.stdout)
print("-"*60) print("-"*60)
Expand Down

0 comments on commit 254a81b

Please sign in to comment.