From 5730c4861e3150582ba2f4eb9adc3e4b78bd0885 Mon Sep 17 00:00:00 2001 From: EsGeh Date: Thu, 4 Apr 2019 17:28:36 +0200 Subject: [PATCH] continued refactoring and adding comments, removed obsolete commented code. --- eoatex2imxml.py | 853 ++++++++++++++++++----------------------- utils/libeoaconvert.py | 1 - 2 files changed, 381 insertions(+), 473 deletions(-) diff --git a/eoatex2imxml.py b/eoatex2imxml.py index 5a01ad1..b5007a9 100755 --- a/eoatex2imxml.py +++ b/eoatex2imxml.py @@ -271,40 +271,29 @@ def TeX2PNG(LaTeXCode, Type, Chapter, Number): return LaTeXCode # def TeX2PNG ends here -def make_bibchecker(bib_database, set_citations): +def make_latex_bibl_file( + bib_database, + set_citations, + files +): """Construct a separate latex file with bibliography. The HTML bibliography is still not perfectly formatted like the LaTeX version. To check both files, a separate file is made that and which is then also converted in the various formats. """ - - tmp_latex = open(TEMPLATE_PATH / "largebib.tex", "r") - largebib_template = tmp_latex.read() - tmp_latex.close() - - tmp_xml = open(TEMPLATE_PATH / "largebib-xml.tex", "r") - largebib_xml_template = tmp_xml.read() - tmp_xml.close() - string_citations = ", ".join(set_citations) - - largebib_template_string = string.Template(largebib_template) - largebib_replacement = largebib_template_string.substitute(INSERT_BIB_DATABASE = bib_database, INSERT_CITEKEYS = string_citations) - - largebib_template_string_xml = string.Template(largebib_xml_template) - largebib_replacement_xml = largebib_template_string_xml.substitute(INSERT_BIB_DATABASE = bib_database, INSERT_CITEKEYS = string_citations) - - tmp_latex_file = DEBUG_DIR / "debug_onlybib.tex" - tmp_latex = open(tmp_latex_file, "w") - tmp_latex.write(largebib_replacement) - tmp_latex.close() - - tmp_xml_file = DEBUG_DIR / "debug_onlybib-xml.tex" - tmp_xml = open(tmp_xml_file, "w") - tmp_xml.write(largebib_replacement_xml) - tmp_xml.close() -# def make_bibchecker ends here + for (input_path, output_path) in files: + with open(input_path, "r") as tmp_latex: + largebib_template = tmp_latex.read() + largebib_template_string = string.Template( largebib_template ) + largebib_replacement = largebib_template_string.substitute( + INSERT_BIB_DATABASE = bib_database, + INSERT_CITEKEYS = string_citations + ) + with open(output_path, "w") as tmp_latex: + tmp_latex.write(largebib_replacement) +# def make_latex_bibl_file ends here def sanitize_bibentry(bibEntry): """Some additional cleanup actions""" @@ -315,6 +304,7 @@ def sanitize_bibentry(bibEntry): return(bibEntry.strip()) # def sanitize_bibentry ends here +''' def createBibEntryAuthorYear(bibEntry, boolSameAuthor): """Function to create a complete Entry of a publication (epub & django) for author-year citation""" strBibEntry = "" @@ -373,6 +363,7 @@ def createBibEntryAuthorYear(bibEntry, boolSameAuthor): return sanitize_bibentry(strBibEntry) # def createBibEntryAuthorYear ends here +''' def createBibEntryNumeric(bibEntry): """Function to create a complete Entry of a publication (epub & django) for numeric citation""" @@ -492,6 +483,7 @@ def run_tralics( ignore_fail = True # :-D ) +# .tex -> .xml run_tralics( input_file = INPUT_PATH_NO_EXT + '.tex', TRALICS_PATH_LIB = TRALICS_PATH_LIB, @@ -527,8 +519,7 @@ def fix_underscore_and_eoatranscripted( logging.info("-----------------------------------------------------") logging.info("Move EOAlanguage from into attribute of EOAchapter") -intChapterNumber = 1 -for xmlChapter in xmlChapters: +for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1): xmlLanguage = xmlChapter.find(".//EOAlanguage") if xmlLanguage is not None: strLanguage = xmlLanguage.text or "english" @@ -536,7 +527,6 @@ def fix_underscore_and_eoatranscripted( xmlLanguage.text = None logging.info("The language of Chapter %d is %s." % (intChapterNumber, strLanguage)) xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage") - intChapterNumber += 1 ############################################################## # Numbering and Typesetting various Elements # @@ -1137,7 +1127,9 @@ def fix_underscore_and_eoatranscripted( if matched_citation is not None: set_citations.add(matched_citation.group(1)) +logging.info("page labels:") logging.info(dictPagelabels) +logging.info("citations:") logging.info(set_citations) logging.info("-----------------------------------------------------") @@ -1166,118 +1158,49 @@ def fix_underscore_and_eoatranscripted( ############################################################## # Preparing the Bibliography # ############################################################## -if xmlTree.find(".//EOAbibliographydatabase") is not None: - bib_database = xmlTree.find(".//EOAbibliographydatabase").text - HAS_BIBLIOGRAPHY = True -else: - logging.waruning("No database found.") - input("Do you want to continue? Press enter.") - HAS_BIBLIOGRAPHY = False -bib_type = xmlTree.find(".//EOAbibliographytype").text -if bib_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]: - logging.error(f"The bibtype must be one of {','.join[bib_type]}. Exiting") - sys.exit() +def bibl_info_from_xml( + xmlTree +): + if xmlTree.find(".//EOAbibliographydatabase") is not None: + bib_database = xmlTree.find(".//EOAbibliographydatabase").text + else: + return None + + bib_type = xmlTree.find(".//EOAbibliographytype").text + if bib_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]: + raise( Exception(f"The bibtype must be one of {','.join[bib_type]}. Exiting") ) -# the new solution: pandoc-citeproc -interim_bib_json_file = INPUT_PATH_NO_EXT + "-bib.json" -citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib" -logging.debug(f"Running citeproc with the following command: {citeproc_command}") -citeproc_arguments = shlex.split(citeproc_command) -citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE) -citeproc_json = citeproc_process.stdout.read() -citations_json = json.loads(citeproc_json) -# for x in citations_json: -# print(x["title"]) - -logging.debug(f"Dumping bib json file: {interim_bib_json_file}.") -with open(interim_bib_json_file, 'w') as ibjf: - json.dump(citeproc_json.decode('utf-8'), ibjf) - - - -#################### -# the old solution # -#################### -# # Copy interim .bbl-File to interim bib.tex file -# interim_bibtex_file = INPUT_PATH_NO_EXT + "bib.tex" -# try: -# shutil.copy(BIBERFILE, interim_bibtex_file) -# except FileNotFoundError: -# print("%s has not been created yet. Switch TeX distribution to TeXLive2016, run biber_2.1 -O biber2-1.bbl %s to obtain this file" % (BIBERFILE, INPUT_PATH_NO_EXT)) -# # Read all lines of Bibliographic TeX -# tmpFile = open(interim_bibtex_file, "r") -# tmpLines = tmpFile.readlines() -# tmpFile.close() - -# # First line should link to Bibliographic Praeambel -# tmpLines[0] = "\\include{%spre_bib}\n" % TEMPLATE_PATH -# # Remove unwanted lines -# for i in range(18,0,-1): -# del tmpLines[i] -# # Save changes -# tmpFile = open(interim_bibtex_file, "w") -# tmpFile.writelines(tmpLines) -# tmpFile.close() -# # TeX has been sanitized, now tralics to make it intermediate XML -# print("TeX has been sanitized, now tralics to make it intermediate XML") -# Kommando = "%s -log_file %s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output -entnames=false %sbib.tex" % (TRALICS_PATH_EXEC, INPUT_PATH_NO_EXT + "-bib-tralics.log", TRALICS_PATH_LIB, TRALICS_PATH_LIB, INPUT_PATH_NO_EXT) -# Argumente = shlex.split(Kommando) -# Prozess = subprocess.call(Argumente) -# # Sanitize XML to make it useable -# tmpFile = open((INPUT_PATH_NO_EXT) + "bib.xml", "r") -# tmpContent = tmpFile.read() -# tmpFile.close() -# listReplace = [ r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"", -# r"uniquename=(.*?),hash=(.*?)", -# r"hash=(.*?)", -# ] -# for strReplace in listReplace: -# tmpContent = re.sub(strReplace, "", tmpContent) - -# # Put Back Underscore _ -# tmpContent = re.sub(r"", "_", tmpContent) - -# # Remove empty Lines -# tmpContent = re.sub(r"\n\n", "\n", tmpContent) - -# # Put back Ampersand -# tmpContent = re.sub(r"&", "&", tmpContent) -# tmpFile = open((INPUT_PATH_NO_EXT) + "bib.xml", "w") -# tmpFile.write(tmpContent) -# tmpFile.close() - -# # TeXML has been sanitized, now load xml-Tree -# xmlParser2 = etree.XMLParser(no_network=False,load_dtd=False) -# xmlBibTree = etree.parse((INPUT_PATH_NO_EXT + "bib.xml"), xmlParser2) -# xml_bib_entries = xmlBibTree.findall(".//entry") - -########################### -# end of the old solution # -########################### - -make_bibchecker(bib_database, set_citations) - -def print_bibliography( + return (bib_type, bib_database) + +# .bib -> .json +# (return json data as python dict) +def write_json_bibl( + bibl_info, + output_file, +): + (bib_type, bib_database) = bibl_info + # the new solution: pandoc-citeproc + # interim_bib_json_file = INPUT_PATH_NO_EXT + "-bib.json" + citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib" + logging.debug(f"Running citeproc with the following command: {citeproc_command}") + citeproc_arguments = shlex.split(citeproc_command) + citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE) + citeproc_json = citeproc_process.stdout.read() + citations_json = json.loads(citeproc_json) + # for x in citations_json: + # print(x["title"]) + + logging.debug(f"Dumping bib json file: {output_file}.") + with open(output_file, 'w') as ibjf: + json.dump(citeproc_json.decode('utf-8'), ibjf) + return citations_json + +def add_bibliography_to_xml( print_bibl_element, - xml_context, + chapter_element, + bib_database, + citations_json, tmp_citation_filename ): bibliography_keyword = print_bibl_element.get("keyword") @@ -1290,11 +1213,8 @@ def print_bibliography( xmlBibliographyDiv = etree.Element("div") xmlBibliography.addnext(xmlBibliographyDiv) - citekeys = xml_context.xpath(".//citekey/text()") - nocite_elements = xml_context.xpath(".//nocite") - - # citekeys = xmlChapter.xpath(".//citekey/text()") - # nocite_elements = xmlChapter.xpath(".//nocite") + citekeys = chapter_element.xpath(".//citekey/text()") + nocite_elements = chapter_element.xpath(".//nocite") if nocite_elements: logging.debug(f"Found {libeoaconvert.plural(len(nocite_elements), 'nocite command')}.") @@ -1329,345 +1249,247 @@ def print_bibliography( for entry in fixed_entries: xmlBibliographyDiv.append(entry) -# If Bibliography-Type is monograph search for EOAbibliography and make it all -if bib_type == "monograph": - - tmp_citation_filename = TEMP_DIR / "used_citations-monograph" - if xmlTree.find(".//EOAprintbibliography") is not None: - # to insert here: with keywords we can have multiple bibliographies - xmlBibliography = xmlTree.find(".//EOAprintbibliography") - print_bibliography( - xmlBibliography, - xmlTree, - tmp_citation_filename - ) +bibl_info = bibl_info_from_xml( + xmlTree +) +if bibl_info is None: + logging.warning("No bibliography database found.") -# If Bibliography-Type is anthology search for EOAbibliography and make one per chapter -if bib_type == "anthology": +if bibl_info is not None: + logging.info( ".bib -> .json") + citations_json = write_json_bibl( + bibl_info, + output_file = TEMP_DIR / (INPUT_PATH_NO_EXT + "-bib.json") + ) + + (bib_type, bib_database) = bibl_info logging.debug(f"bib type is {bib_type}") - intChapterNumber = 1 - for xmlChapter in xmlChapters: - logging.debug(f"Looking at chapter {intChapterNumber}.") - tmp_citation_filename = TEMP_DIR / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber)) - if xmlChapter.find(".//EOAprintbibliography") is not None: - xmlBibliography = xmlChapter.find(".//EOAprintbibliography") + ## only for debugging (?) + make_latex_bibl_file( + bib_database = bib_database, + set_citations = set_citations, + files = [ + (TEMPLATE_PATH / "largebib.tex", DEBUG_DIR / "debug_onlybib.tex"), + (TEMPLATE_PATH / "largebib-xml.tex", DEBUG_DIR / "debug_onlybib-xml.tex"), + ] + ) - print_bibliography( + # If Bibliography-Type is monograph search for EOAbibliography and make it all + if bib_type == "monograph": + tmp_citation_filename = TEMP_DIR / "used_citations-monograph" + if xmlTree.find(".//EOAprintbibliography") is not None: + # to insert here: with keywords we can have multiple bibliographies + xmlBibliography = xmlTree.find(".//EOAprintbibliography") + + add_bibliography_to_xml( xmlBibliography, - xmlChapter, - tmp_citation_filename + xmlTree, + bib_database = bibl_info[1], + citations_json = citations_json, + tmp_citation_filename = tmp_citation_filename ) - else: - # create an empty file - logging.debug("No bibliography found.") - open(TEMP_DIR / (tmp_citation_filename + "_nocitations"), 'a').close() - - """ - - References -
- -
-

Abril Castelló, Vidal (1987). Las Casas contra Vitoria, 1550–1552: La revolución de la duodécima réplica. Causas y consecuencias. Revista de Indias 47(179):83–101.

-

Agrawal, Arun (1995). Dismantling the Divide Between Indigenous and Scientific Knowledge. Development and Change 26:413–439.

-
-
- - """ - - - ############### - # old version # - ############### - # xmlRefsections = xmlBibTree.findall(".//refsection") - # for xmlRefsection in xmlRefsections: - # if xmlRefsection.find(".//number").text == str(intChapterNumber): - # break - # xml_bib_entries = xmlRefsection.findall(".//entry") - # intNumberOfEntry = 0 - # for xmlEntry in xml_bib_entries: - # if intNumberOfEntry == 0: - # # Don't check for previous author if first entry of the Bibliography - # bibEntry = Bibitem(xmlEntry) - # strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "

" - # xmlNew = etree.fromstring(strNewentry) - # xmlBibliographyDiv.append(xmlNew) - # else: - # bibEntry = Bibitem(xmlEntry) - # # Check if author of previous Entry is the same - # bibEntryPrevious = Bibitem(xml_bib_entries[intNumberOfEntry - 1]) - # if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst(): - # print(bibEntry.citekey()) - # strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "

" - # xmlNew = etree.fromstring(strNewentry) - # xmlBibliographyDiv.append(xmlNew) - # else: - # print(bibEntry.citekey()) - # strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "

" - # xmlNew = etree.fromstring(strNewentry) - # xmlBibliographyDiv.append(xmlNew) - # intNumberOfEntry += 1 - - ###################### - # end of old version # - ###################### + # If Bibliography-Type is anthology search for EOAbibliography and make one per chapter + elif bib_type == "anthology": + for intChapterNumber, xmlChapter in enumerate(xmlChapters, start = 1): + logging.debug(f"Looking at chapter {intChapterNumber}.") + tmp_citation_filename = TEMP_DIR / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber)) + if xmlChapter.find(".//EOAprintbibliography") is not None: + xmlBibliography = xmlChapter.find(".//EOAprintbibliography") + + add_bibliography_to_xml( + xmlBibliography, + xmlChapter, + bib_database = bibl_info[1], + citations_json = citations_json, + tmp_citation_filename = tmp_citation_filename + ) - intChapterNumber += 1 -# for the time being -strCitation = "" + else: + # create an empty file + logging.debug("No bibliography found.") + open(TEMP_DIR / (tmp_citation_filename + "_nocitations"), 'a').close() -# Bibliographies are done, now for the citations -if bib_type == "anthology" or bib_type == "monograph": - intChapterNumber = 1 + """ + + References +
+ +
+

Abril Castelló, Vidal (1987). Las Casas contra Vitoria, 1550–1552: La revolución de la duodécima réplica. Causas y consecuencias. Revista de Indias 47(179):83–101.

+

Agrawal, Arun (1995). Dismantling the Divide Between Indigenous and Scientific Knowledge. Development and Change 26:413–439.

+
+
+
+ """ - if bib_type == "monograph": - tmp_citation_filename = "used_citations-monograph" - tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html") - with open(tmp_path_html, "r") as formatted_citations: - form_cit = BeautifulSoup(formatted_citations, "html.parser") + # for the time being + strCitation = "" - for xmlChapter in xmlChapters: - logging.info("-----------------------------------------------------") - logging.info("Processing References for Chapter " + str(intChapterNumber)) - xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual") + # Bibliographies are done, now for the citations + if bib_type == "anthology" or bib_type == "monograph": + intChapterNumber = 1 - if bib_type == "anthology": - tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) + if bib_type == "monograph": + tmp_citation_filename = "used_citations-monograph" tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html") - no_cite_path = TEMP_DIR / (tmp_citation_filename + "_nocitations") - if os.path.exists(tmp_path_html): - with open(tmp_path_html, "r") as formatted_citations: - form_cit = BeautifulSoup(formatted_citations, "html.parser") - elif os.path.exists(no_cite_path): - logging.debug("no citations in this chapter") - intChapterNumber += 1 - continue - - counter_citations = 1 - - for xmlCitation in xmlCitations: - string_citekey = xmlCitation.find("./citekey").text - progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey)) - # If Bibliography-Type is anthology find Refsection for this Chapter - ############### - # old version # - ############### - # if bib_type == "anthology": - # print("Yes, it's anthology time!") - # xmlRefsections = xmlBibTree.findall(".//refsection") - # for xmlRefsection in xmlRefsections: - # if xmlRefsection.find(".//number").text == str(intChapterNumber): - # break - # xml_bib_entries = xmlRefsection.findall(".//entry") - - ################### - # end old version # - ################### - # If Bibliography-Type is monograph find all entries, forget about refsection - - ############### - # old version # - ############### - """ - if bib_type == "monograph": - xml_bib_entries = xmlBibTree.findall(".//entry") - for xmlEntry in xml_bib_entries: - bibEntry = Bibitem(xmlEntry) - if bibEntry.citekey() == xmlCitation.find("./citekey").text: - if xmlCitation.tag == "EOAciteauthoryear": - strCitation = bibEntry.shortauthor() + " " + bibEntry.labelyear() - if bibEntry.labelyearsuffix() is not None: - strCitation = strCitation + bibEntry.labelyearsuffix() - strTitle = bibEntry.title() - if xmlCitation.tag == "EOAciteyear": - strCitation = bibEntry.labelyear() - if bibEntry.labelyearsuffix() is not None: - strCitation = strCitation + bibEntry.labelyearsuffix() - strTitle = bibEntry.title() - if xmlCitation.tag == "EOAcitemanual": - cite_text = xmlCitation.find("citetext") - if cite_text.getchildren(): - tmp_string = xmlCitation.find("citetext") - tmp_string = cite_text.getchildren()[0] - strCitation = etree.tostring(tmp_string) - # BAUSTELLE!!!!! - # tmp_string2 = etree.tostring(tmp_string) - # tmp_string3 = tmp_string2.decode() - # strCitation = tmp_string3.replace("<", "<") - else: - strCitation = xmlCitation.find("citetext").text - strTitle = bibEntry.title() - if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: - strCitation = strCitation + ", " + xmlCitation.find("./page").text - """ - ###################### - # end of old version # - ###################### - - ############### - # new version # - ############### - - # string_citekey = xmlCitation.find("./citekey").text - for entry in citations_json: - if entry["id"] == string_citekey: - current_citation = entry - strTitle = current_citation["title"] - - # [1:-1] to remove parentheses around citations - try: - citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] - except IndexError: - logging.error("Could not find {}. Exiting.".format(string_citekey)) - sys.exit() - data_title_value = citeauthoryear_value - if xmlCitation.tag == "EOAciteauthoryear": - strCitation = citeauthoryear_value - elif xmlCitation.tag == "EOAciteyear": - strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] - elif xmlCitation.tag == "EOAcitemanual": - cite_text = xmlCitation.find("citetext") - if cite_text.getchildren(): - tmp_string = xmlCitation.find("citetext") - tmp_string = cite_text.getchildren()[0] - strCitation = etree.tostring(tmp_string) - # BAUSTELLE!!!!! - # tmp_string2 = etree.tostring(tmp_string) - # tmp_string3 = tmp_string2.decode() - # strCitation = tmp_string3.replace("<", "<") - else: - strCitation = xmlCitation.find("citetext").text - - if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: - pages_text = libeoaconvert.gettext(xmlCitation.find("./page")) - strCitation = strCitation + ", " + pages_text - data_title_value = data_title_value + ", " + pages_text - # strCitation = strCitation + ", " + xmlCitation.find("./page").text - - ###################### - # end of new version # - ###################### - - # Hier den XML-Tag durch die Quellenangabe ersetzen - tmpTail = xmlCitation.tail - xmlCitation.clear() - xmlCitation.tag = "span" - xmlCitation.set("rel", "popover") - xmlCitation.set("class", "citation") - xmlCitation.set("citekey", string_citekey) - xmlCitation.text = strCitation - xmlCitation.tail = tmpTail - # Create Link to be used for website in a popover - xmlCitation.set("data-toggle", "popover") - xmlCitation.set("html", "true") - xmlCitation.set("data-placement", "bottom") - xmlCitation.set("data-title", data_title_value) - try: - xmlCitation.set("data-content", strTitle) - except: - xmlCitation.set("data-content", "missing") - counter_citations += 1 - intChapterNumber += 1 + with open(tmp_path_html, "r") as formatted_citations: + form_cit = BeautifulSoup(formatted_citations, "html.parser") + + for xmlChapter in xmlChapters: + logging.info("-----------------------------------------------------") + logging.info("Processing References for Chapter " + str(intChapterNumber)) + xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual") + + if bib_type == "anthology": + tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) + tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html") + no_cite_path = TEMP_DIR / (tmp_citation_filename + "_nocitations") + if os.path.exists(tmp_path_html): + with open(tmp_path_html, "r") as formatted_citations: + form_cit = BeautifulSoup(formatted_citations, "html.parser") + elif os.path.exists(no_cite_path): + logging.debug("no citations in this chapter") + intChapterNumber += 1 + continue -# If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all -if bib_type == "monograph-numeric": - if xmlTree.find(".//EOAprintbibliography") is not None: - dictCitekeysNumbers = {} - dictCitekeysTitles = {} - xmlBibliography = xmlTree.find(".//EOAprintbibliography") - xmlBibliography.clear() - xmlBibliography.tag = "div" - xmlBibliography.getparent().tag = "div" - xml_bib_entries = xmlBibTree.findall(".//entry") - intNumberOfEntry = 1 - for xmlEntry in xml_bib_entries: - # Go through all entries and assign a number to the citekey - bibEntry = Bibitem(xmlEntry) - strCitekey = bibEntry.citekey() - dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry) - dictCitekeysTitles[strCitekey] = str(bibEntry.title()) - strNewentry = "

[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "

" - xmlNew = etree.fromstring(strNewentry) - xmlBibliography.append(xmlNew) - intNumberOfEntry += 1 - # Now for the references via EOAcitenumeric - xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric") - for xmlCitenumeric in xmlCitenumerics: - logging.info(etree.tostring(xmlCitenumeric)) - strPopover = "" - tmpCitekeys = xmlCitenumeric.find(".//citekey").text - tmpCitekeys = re.sub(" ", "", tmpCitekeys) - tmpCitekeys = re.sub("\n", "", tmpCitekeys) - listCitekeys = re.split("\,", tmpCitekeys) - listCitenumbers = [] - for strCitekey in listCitekeys: - listCitenumbers.append(dictCitekeysNumbers[strCitekey]) - # Create Text to be used on the website in a popover - strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " - listCitenumbers = sorted(listCitenumbers, key=int) - strResult = "[" + listCitenumbers[0] - intNumberOfSequentialCite = 0 - for i in range(1,len(listCitenumbers)): - intPreviousCitenumber = int(listCitenumbers[i-1]) - intCurrentCitenumber = int(listCitenumbers[i]) - if i == (len(listCitenumbers)-1): - if (intPreviousCitenumber + 1) == intCurrentCitenumber: - if intNumberOfSequentialCite == 0: - strResult = strResult + "," + str(listCitenumbers[i]) - else: - strResult = strResult + "-" + str(listCitenumbers[i]) - intNumberOfSequentialCite == 0 - else: - strResult = strResult + "," + str(listCitenumbers[i]) - break - intNextCitenumber = int(listCitenumbers[i+1]) - if (intCurrentCitenumber + 1) != intNextCitenumber: - if intNumberOfSequentialCite != 0: - strResult = strResult + "-" + str(intCurrentCitenumber) - intNumberOfSequentialCite = 0 + counter_citations = 1 + + for xmlCitation in xmlCitations: + string_citekey = xmlCitation.find("./citekey").text + progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey)) + # If Bibliography-Type is anthology find Refsection for this Chapter + ############### + # old version # + ############### + # if bib_type == "anthology": + # print("Yes, it's anthology time!") + # xmlRefsections = xmlBibTree.findall(".//refsection") + # for xmlRefsection in xmlRefsections: + # if xmlRefsection.find(".//number").text == str(intChapterNumber): + # break + # xml_bib_entries = xmlRefsection.findall(".//entry") + + ################### + # end old version # + ################### + # If Bibliography-Type is monograph find all entries, forget about refsection + + ############### + # old version # + ############### + """ + if bib_type == "monograph": + xml_bib_entries = xmlBibTree.findall(".//entry") + for xmlEntry in xml_bib_entries: + bibEntry = Bibitem(xmlEntry) + if bibEntry.citekey() == xmlCitation.find("./citekey").text: + if xmlCitation.tag == "EOAciteauthoryear": + strCitation = bibEntry.shortauthor() + " " + bibEntry.labelyear() + if bibEntry.labelyearsuffix() is not None: + strCitation = strCitation + bibEntry.labelyearsuffix() + strTitle = bibEntry.title() + if xmlCitation.tag == "EOAciteyear": + strCitation = bibEntry.labelyear() + if bibEntry.labelyearsuffix() is not None: + strCitation = strCitation + bibEntry.labelyearsuffix() + strTitle = bibEntry.title() + if xmlCitation.tag == "EOAcitemanual": + cite_text = xmlCitation.find("citetext") + if cite_text.getchildren(): + tmp_string = xmlCitation.find("citetext") + tmp_string = cite_text.getchildren()[0] + strCitation = etree.tostring(tmp_string) + # BAUSTELLE!!!!! + # tmp_string2 = etree.tostring(tmp_string) + # tmp_string3 = tmp_string2.decode() + # strCitation = tmp_string3.replace("<", "<") + else: + strCitation = xmlCitation.find("citetext").text + strTitle = bibEntry.title() + if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: + strCitation = strCitation + ", " + xmlCitation.find("./page").text + """ + ###################### + # end of old version # + ###################### + + ############### + # new version # + ############### + + # string_citekey = xmlCitation.find("./citekey").text + for entry in citations_json: + if entry["id"] == string_citekey: + current_citation = entry + strTitle = current_citation["title"] + + # [1:-1] to remove parentheses around citations + try: + citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] + except IndexError: + logging.error("Could not find {}. Exiting.".format(string_citekey)) + sys.exit() + data_title_value = citeauthoryear_value + if xmlCitation.tag == "EOAciteauthoryear": + strCitation = citeauthoryear_value + elif xmlCitation.tag == "EOAciteyear": + strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] + elif xmlCitation.tag == "EOAcitemanual": + cite_text = xmlCitation.find("citetext") + if cite_text.getchildren(): + tmp_string = xmlCitation.find("citetext") + tmp_string = cite_text.getchildren()[0] + strCitation = etree.tostring(tmp_string) + # BAUSTELLE!!!!! + # tmp_string2 = etree.tostring(tmp_string) + # tmp_string3 = tmp_string2.decode() + # strCitation = tmp_string3.replace("<", "<") else: - strResult = strResult + "," + str(intCurrentCitenumber) - continue - if (intPreviousCitenumber + 1) == intCurrentCitenumber: - intNumberOfSequentialCite += 1 - continue - else: - strResult = strResult + "," + str(intCurrentCitenumber) - intNumberOfSequentialCite = 0 - strResult = strResult + "]" - xmlCitenumeric.text = strResult - # Create Link to be used for website - xmlCitenumeric.set("data-toggle", "popover") - xmlCitenumeric.set("html", "true") - xmlCitenumeric.set("data-content", strPopover) - xmlCitenumeric.set("class","citation") - xmlCitenumeric.set("data-placement", "bottom") - xmlCitenumeric.set("data-title", strResult) - -# author is missing! -# print("xmlBibliography") -# print(etree.tostring(xmlBibliography)) -# input() - -# Numeric citations for the individual chapters -if bib_type == "anthology-numeric": - intChapterNumber = 1 - for xmlChapter in xmlChapters: - logging.info("Processing Bibliography") - if xmlChapter.find(".//EOAprintbibliography") is not None: + strCitation = xmlCitation.find("citetext").text + + if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: + pages_text = libeoaconvert.gettext(xmlCitation.find("./page")) + strCitation = strCitation + ", " + pages_text + data_title_value = data_title_value + ", " + pages_text + # strCitation = strCitation + ", " + xmlCitation.find("./page").text + + ###################### + # end of new version # + ###################### + + # Hier den XML-Tag durch die Quellenangabe ersetzen + tmpTail = xmlCitation.tail + xmlCitation.clear() + xmlCitation.tag = "span" + xmlCitation.set("rel", "popover") + xmlCitation.set("class", "citation") + xmlCitation.set("citekey", string_citekey) + xmlCitation.text = strCitation + xmlCitation.tail = tmpTail + # Create Link to be used for website in a popover + xmlCitation.set("data-toggle", "popover") + xmlCitation.set("html", "true") + xmlCitation.set("data-placement", "bottom") + xmlCitation.set("data-title", data_title_value) + try: + xmlCitation.set("data-content", strTitle) + except: + xmlCitation.set("data-content", "missing") + counter_citations += 1 + intChapterNumber += 1 + + # If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all + if bib_type == "monograph-numeric": + if xmlTree.find(".//EOAprintbibliography") is not None: dictCitekeysNumbers = {} dictCitekeysTitles = {} - xmlBibliography = xmlChapter.find(".//EOAprintbibliography") - #xmlBibliography.clear() + xmlBibliography = xmlTree.find(".//EOAprintbibliography") + xmlBibliography.clear() xmlBibliography.tag = "div" xmlBibliography.getparent().tag = "div" - xmlRefsections = xmlBibTree.findall(".//refsection") - for xmlRefsection in xmlRefsections: - if xmlRefsection.find(".//number").text == str(intChapterNumber): - break - xml_bib_entries = xmlRefsection.findall(".//entry") + xml_bib_entries = xmlBibTree.findall(".//entry") intNumberOfEntry = 1 for xmlEntry in xml_bib_entries: # Go through all entries and assign a number to the citekey @@ -1680,18 +1502,16 @@ def print_bibliography( xmlBibliography.append(xmlNew) intNumberOfEntry += 1 # Now for the references via EOAcitenumeric - xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear") - logging.info("Found numeric citation in chapter " + str(intChapterNumber)) + xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric") for xmlCitenumeric in xmlCitenumerics: + logging.info(etree.tostring(xmlCitenumeric)) strPopover = "" tmpCitekeys = xmlCitenumeric.find(".//citekey").text tmpCitekeys = re.sub(" ", "", tmpCitekeys) tmpCitekeys = re.sub("\n", "", tmpCitekeys) - logging.info(tmpCitekeys) listCitekeys = re.split("\,", tmpCitekeys) listCitenumbers = [] for strCitekey in listCitekeys: - logging.info(strCitekey) listCitenumbers.append(dictCitekeysNumbers[strCitekey]) # Create Text to be used on the website in a popover strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " @@ -1727,13 +1547,102 @@ def print_bibliography( intNumberOfSequentialCite = 0 strResult = strResult + "]" xmlCitenumeric.text = strResult - # Create Link to be used for website in a popover + # Create Link to be used for website xmlCitenumeric.set("data-toggle", "popover") - xmlCitenumeric.set("data-placement", "bottom") - xmlCitenumeric.set("data-title", " " + strResult) + xmlCitenumeric.set("html", "true") xmlCitenumeric.set("data-content", strPopover) xmlCitenumeric.set("class","citation") - intChapterNumber += 1 + xmlCitenumeric.set("data-placement", "bottom") + xmlCitenumeric.set("data-title", strResult) + + # author is missing! + # print("xmlBibliography") + # print(etree.tostring(xmlBibliography)) + # input() + + # Numeric citations for the individual chapters + if bib_type == "anthology-numeric": + intChapterNumber = 1 + for xmlChapter in xmlChapters: + logging.info("Processing Bibliography") + if xmlChapter.find(".//EOAprintbibliography") is not None: + dictCitekeysNumbers = {} + dictCitekeysTitles = {} + xmlBibliography = xmlChapter.find(".//EOAprintbibliography") + #xmlBibliography.clear() + xmlBibliography.tag = "div" + xmlBibliography.getparent().tag = "div" + xmlRefsections = xmlBibTree.findall(".//refsection") + for xmlRefsection in xmlRefsections: + if xmlRefsection.find(".//number").text == str(intChapterNumber): + break + xml_bib_entries = xmlRefsection.findall(".//entry") + intNumberOfEntry = 1 + for xmlEntry in xml_bib_entries: + # Go through all entries and assign a number to the citekey + bibEntry = Bibitem(xmlEntry) + strCitekey = bibEntry.citekey() + dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry) + dictCitekeysTitles[strCitekey] = str(bibEntry.title()) + strNewentry = "

[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "

" + xmlNew = etree.fromstring(strNewentry) + xmlBibliography.append(xmlNew) + intNumberOfEntry += 1 + # Now for the references via EOAcitenumeric + xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear") + logging.info("Found numeric citation in chapter " + str(intChapterNumber)) + for xmlCitenumeric in xmlCitenumerics: + strPopover = "" + tmpCitekeys = xmlCitenumeric.find(".//citekey").text + tmpCitekeys = re.sub(" ", "", tmpCitekeys) + tmpCitekeys = re.sub("\n", "", tmpCitekeys) + logging.info(tmpCitekeys) + listCitekeys = re.split("\,", tmpCitekeys) + listCitenumbers = [] + for strCitekey in listCitekeys: + logging.info(strCitekey) + listCitenumbers.append(dictCitekeysNumbers[strCitekey]) + # Create Text to be used on the website in a popover + strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " + listCitenumbers = sorted(listCitenumbers, key=int) + strResult = "[" + listCitenumbers[0] + intNumberOfSequentialCite = 0 + for i in range(1,len(listCitenumbers)): + intPreviousCitenumber = int(listCitenumbers[i-1]) + intCurrentCitenumber = int(listCitenumbers[i]) + if i == (len(listCitenumbers)-1): + if (intPreviousCitenumber + 1) == intCurrentCitenumber: + if intNumberOfSequentialCite == 0: + strResult = strResult + "," + str(listCitenumbers[i]) + else: + strResult = strResult + "-" + str(listCitenumbers[i]) + intNumberOfSequentialCite == 0 + else: + strResult = strResult + "," + str(listCitenumbers[i]) + break + intNextCitenumber = int(listCitenumbers[i+1]) + if (intCurrentCitenumber + 1) != intNextCitenumber: + if intNumberOfSequentialCite != 0: + strResult = strResult + "-" + str(intCurrentCitenumber) + intNumberOfSequentialCite = 0 + else: + strResult = strResult + "," + str(intCurrentCitenumber) + continue + if (intPreviousCitenumber + 1) == intCurrentCitenumber: + intNumberOfSequentialCite += 1 + continue + else: + strResult = strResult + "," + str(intCurrentCitenumber) + intNumberOfSequentialCite = 0 + strResult = strResult + "]" + xmlCitenumeric.text = strResult + # Create Link to be used for website in a popover + xmlCitenumeric.set("data-toggle", "popover") + xmlCitenumeric.set("data-placement", "bottom") + xmlCitenumeric.set("data-title", " " + strResult) + xmlCitenumeric.set("data-content", strPopover) + xmlCitenumeric.set("class","citation") + intChapterNumber += 1 # this is somewhat luzzini-specific bib_parent_element = xmlBibliography.getparent() diff --git a/utils/libeoaconvert.py b/utils/libeoaconvert.py index 6c5c117..5b12bd5 100644 --- a/utils/libeoaconvert.py +++ b/utils/libeoaconvert.py @@ -36,7 +36,6 @@ def enable_preamble( o.write( "\input{preambel/pre_xml}\n" ) o.write( i.read() ) - def get_bigfoot_data(chapter): """ footnotes are per-chapter