From 254a81b43e97e99d90c5969fcbba0a5d770364fe Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Thu, 22 Nov 2018 11:43:30 +0100 Subject: [PATCH] Code cleanup --- fix_tei.py | 53 +++++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/fix_tei.py b/fix_tei.py index 3da69c1..060d019 100644 --- a/fix_tei.py +++ b/fix_tei.py @@ -51,7 +51,7 @@ RUNNING_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) logging.debug("The script is run from {}".format(RUNNING_DIRECTORY)) TEI_BOILERPLATE = os.path.sep.join([RUNNING_DIRECTORY, "data", "tei_boilerplate.cfg"]) -print(TEI_BOILERPLATE) + if not os.path.exists(TEI_BOILERPLATE): logging.error("Could not find TEI boilerplate config. Exiting.") sys.exit() @@ -70,8 +70,8 @@ def parse_bibtex(bibfile): all_references.update(tmp_dict) - return tmp_dict # return all_references + return tmp_dict # def parse_bibtex ends here def restore_xml_tags(text): @@ -119,7 +119,8 @@ def fixup(m): text = chr(html.entities.name2codepoint[text[1:-1]]) except KeyError: pass - return text # leave as is + # leave as is + return text return re.sub(r"&#?\w+;", fixup, text) # def unescape ends here @@ -295,7 +296,7 @@ def cleanup_xml(xml_tree): logging.info("Found %s colour attributes." % len(color_attrib)) for attribute in metypeset_attrib: - logging.info("number of attributes: %s" % len(attribute.attrib)) + logging.info("Number of attributes: %s" % len(attribute.attrib)) attribute.attrib.pop("meTypesetSize") for attribute in color_attrib: @@ -385,7 +386,7 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype): respname = etree.SubElement(example_resp, "persName") surname = etree.SubElement(respname, "surname").text = "N" forename = etree.SubElement(respname, "forename").text = "N" - libeoaconvert.insert_after(example_resp, author_element) + author_element.addnext(example_resp) edition = xml_tree.xpath("//t:editionStmt/t:edition", namespaces=NS_MAP)[0] edition_date = edition.find("t:date", namespaces=NS_MAP) @@ -402,7 +403,8 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype): extent_element = etree.Element("extent") pages = etree.SubElement(extent_element, "measure", commodity="pages", quantity="0") price = etree.SubElement(extent_element, "measure", type="price", unit="EUR", quantity="0") - libeoaconvert.insert_after(extent_element, publication_statement, before=True) + + publication_statement.addprevious(extent_element) publisher_element = etree.SubElement(publication_statement, "publisher") overall_org = etree.SubElement(publisher_element, "orgName", n="EOA", ref=BOILERPLATES.get("Header","eoa_url")) @@ -423,9 +425,8 @@ def fix_tei_header(xml_tree, bibfile_string, bibtype): resp_names = etree.SubElement(resp_stmt, "name", type="serieseditors") resp_names.text = BOILERPLATES.get("Header","mprl_series_editors") series_number = etree.SubElement(series_stmt, "idno", type="number").text = "number" - publication_stmt_parent = publication_statement.getparent() - series_stmt_insertion_point = libeoaconvert.get_place_in_xml_tree(publication_statement, publication_stmt_parent) + 1 - publication_stmt_parent.insert(series_stmt_insertion_point, series_stmt) + + publication_statement.addnext(series_stmt) source_desc = xml_tree.xpath("//t:sourceDesc", namespaces=NS_MAP)[0] suggested_citation = etree.SubElement(source_desc, "ab", type="suggestedcitation").text = "Suggested Citation" @@ -495,29 +496,33 @@ def add_tei_frontpart(): def evaluate_report(report): """Print report of conversion.""" - print("="*60) - print(' '*4, "Conversion report") - print("-"*60) + report_string = "="*60 + "\n" + report_string += ' '*4 + "Conversion report\n" + report_string += "-"*60 + "\n" if len(report["bad_figures"]) > 0: - print("{} {} could not be linked to a file in the image directory:".format(len(report["bad_figures"]), libeoaconvert.plural(len(report["bad_figures"]), "figure"))) + report_string += "{} {} could not be linked to a file in the image directory:\n".format(len(report["bad_figures"]), libeoaconvert.plural(len(report["bad_figures"]), "figure")) for item in report["bad_figures"]: - print(' '*4, item) + report_string += ' '*4 + item + "\n" else: - print("All figures were linked.") + report_string += "All figures were linked.\n" if len(report["citekeys_not_in_bib"]) > 0: - print("{} of {} {} could not be found in the bibliography database:".format(len(report["citekeys_not_in_bib"]), report["len_citekeys"], libeoaconvert.plural(len(report["citekeys_not_in_bib"]), "citation"))) + report_string += "{} of {} {} could not be found in the bibliography database:\n".format(len(report["citekeys_not_in_bib"]), report["len_citekeys"], libeoaconvert.plural(len(report["citekeys_not_in_bib"]), "citation")) for item in report["citekeys_not_in_bib"]: - print(' '*4, item) - print("\nThe missing citations were also stored in the pickle file and can be re-used by the create_tmpbib tool.\n") + report_string += ' '*4 + item + "\n" + report_string += "\nThe missing citations were also stored in the pickle file and can be re-used by the create_tmpbib tool.\n" else: - print("All citekeys were found in the bibliography database.") + report_string += "All citekeys were found in the bibliography database.\n" if len(report["bad_pageref"]) > 0: - print("{} page {} could not be parsed into start and end value:".format(len(report["bad_pageref"]), libeoaconvert.plural(len(report["bad_pageref"]), "reference"))) + report_string += "{} page {} could not be parsed into start and end value:".format(len(report["bad_pageref"]), libeoaconvert.plural(len(report["bad_pageref"]), "reference")) for item in report["bad_pageref"]: - print(' '*4, item) + report_string += ' '*4 + item + "\n" else: - print("All page references could be parsed into discrete values.") - print("="*60) + report_string += ("All page references could be parsed into discrete values.\n") + report_string += "="*60 + "\n" + + print(report_string) + + return # def evaluate_report ends here def main(): @@ -581,7 +586,7 @@ def main(): try: xml_tree2 = etree.fromstring(mod_string3) except etree.XMLSyntaxError: - print("\nXML syntax error when trying to parse modified tree. Dumped it to %s." % debug_output) + logging.error("\nXML syntax error when trying to parse modified tree. Dumped it to %s." % debug_output) print("-"*60) traceback.print_exc(file=sys.stdout) print("-"*60)