diff --git a/tei2imxml.py b/tei2imxml.py index abb6a27..a77c94c 100755 --- a/tei2imxml.py +++ b/tei2imxml.py @@ -226,6 +226,17 @@ def make_publication_cfg(info_dict, translation_file): # def make_publication_cfg ends here +def sanitize_data_string(text_string): + """Remove line breaks and multiple spaces""" + + text_string = text_string.replace('\r', '').replace('\n', '') + + return_string = re.sub("\s\s+" , " ", text_string) + + return return_string.strip() +# def sanitize_data_string ends here + + def check_bibliography(xml_tree): """Check TEI header for bibliography data, return relevant data as dictionary.""" @@ -599,8 +610,10 @@ def handle_refs_hyperimage(ref): logging.error("Citekey %s was not found in the references. Exiting." % citekey) sys.exit(1) - citation.text = formatted_citation - citation.set("data-title", formatted_citation) + sanitized_citation_string = sanitize_data_string(formatted_citation) + + citation.text = sanitized_citation_string + citation.set("data-title", sanitized_citation_string) citation.set("data-content", cited_data[citekey][2]) #############