diff --git a/eoaconvert.py b/eoaconvert.py index 6720d50..0c1e30e 100755 --- a/eoaconvert.py +++ b/eoaconvert.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8; mode: python -*- -# Time-stamp: <2018-01-23 10:49:15 (kthoden)> +# Time-stamp: <2018-01-23 16:03:41 (kthoden)> # license? __version__= "1.0" @@ -15,6 +15,7 @@ from copy import deepcopy from copy import copy from libeoabibitem import Bibitem +from bs4 import BeautifulSoup import libeoaconvert import glob import os @@ -262,7 +263,7 @@ def createBibEntryAuthorYear(bibEntry, boolSameAuthor): if bibEntry.entrytype() == "newspaper": strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + "" - print(strBibEntry) + # print(strBibEntry) return sanitize_bibentry(strBibEntry) # def createBibEntryAuthorYear ends here @@ -956,6 +957,9 @@ def cleanup(): HAS_BIBLIOGRAPHY = False input() +bib_type = xmlTree.find(".//EOAbibliographytype").text +assert(bib_type in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]) + # the new solution: pandoc-citeproc interim_bib_json_file = (options.filename) + "-bib.json" citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib" @@ -964,6 +968,9 @@ def cleanup(): citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE) citeproc_json = citeproc_process.stdout.read() +# with open(interim_bib_json_file, 'w') as ibjf: +# json.dump(citeproc_json.decode('utf-8'), ibjf) + citations_json = json.loads(citeproc_json) # for x in citations_json: @@ -1040,7 +1047,7 @@ def cleanup(): # TeXML has been sanitized, now load xml-Tree xmlParser2 = etree.XMLParser(no_network=False,load_dtd=False) xmlBibTree = etree.parse((options.filename + "bib.xml"), xmlParser2) -xmlEntries = xmlBibTree.findall(".//entry") +xml_bib_entries = xmlBibTree.findall(".//entry") ########################### # end of the old solution # @@ -1049,7 +1056,7 @@ def cleanup(): make_bibchecker(bib_database, set_citations) # If Bibliography-Type is monograph search for EOAbibliography and make it all -if xmlTree.find(".//EOAbibliographytype").text == "monograph": +if bib_type == "monograph": if xmlTree.find(".//EOAprintbibliography") is not None: xmlBibliography = xmlTree.find(".//EOAprintbibliography") xmlBibliography.clear() @@ -1081,12 +1088,13 @@ def cleanup(): # end of new version # ###################### + ############### # old version # ############### - xmlEntries = xmlBibTree.findall(".//entry") + xml_bib_entries = xmlBibTree.findall(".//entry") intNumberOfEntry = 0 - for xmlEntry in xmlEntries: + for xmlEntry in xml_bib_entries: if intNumberOfEntry == 0: # Don't check for previous author if first entry of the Bibliography bibEntry = Bibitem(xmlEntry) @@ -1094,7 +1102,7 @@ def cleanup(): else: bibEntry = Bibitem(xmlEntry) # Check if author of previous Entry is the same - bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1]) + bibEntryPrevious = Bibitem(xml_bib_entries[intNumberOfEntry - 1]) if bibEntry.fullauthorlastfirst()[0] == bibEntryPrevious.fullauthorlastfirst()[0]: strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "

" elif bibEntryPrevious.fullauthorlastfirst()[0] == bibEntry.fullauthorlastfirst()[0]: @@ -1115,7 +1123,7 @@ def cleanup(): # If Bibliography-Type is anthology search for EOAbibliography and make one per chapter -if xmlTree.find(".//EOAbibliographytype").text == "anthology": +if bib_type == "anthology": intChapterNumber = 1 for xmlChapter in xmlChapters: if xmlChapter.find(".//EOAprintbibliography") is not None: @@ -1127,9 +1135,9 @@ def cleanup(): for xmlRefsection in xmlRefsections: if xmlRefsection.find(".//number").text == str(intChapterNumber): break - xmlEntries = xmlRefsection.findall(".//entry") + xml_bib_entries = xmlRefsection.findall(".//entry") intNumberOfEntry = 0 - for xmlEntry in xmlEntries: + for xmlEntry in xml_bib_entries: if intNumberOfEntry == 0: # Don't check for previous author if first entry of the Bibliography bibEntry = Bibitem(xmlEntry) @@ -1139,7 +1147,7 @@ def cleanup(): else: bibEntry = Bibitem(xmlEntry) # Check if author of previous Entry is the same - bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1]) + bibEntryPrevious = Bibitem(xml_bib_entries[intNumberOfEntry - 1]) if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst(): print(bibEntry.citekey()) strNewentry = "

" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "

" @@ -1157,8 +1165,12 @@ def cleanup(): strCitation = "" # Bibliographies are done, now for the citations -if xmlTree.find(".//EOAbibliographytype").text == "anthology" or xmlTree.find(".//EOAbibliographytype").text == "monograph": +if bib_type == "anthology" or bib_type == "monograph": intChapterNumber = 1 + + with open("tmp_files/formatted_citations.html", "r") as formatted_citations: + form_cit = BeautifulSoup(formatted_citations, "html.parser") + for xmlChapter in xmlChapters: print ("-----------------------------------------------------") print ("Processing References for Chapter " + str(intChapterNumber)) @@ -1170,16 +1182,21 @@ def cleanup(): string_citekey = xmlCitation.find("./citekey").text progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey)) # If Bibliography-Type is anthology find Refsection for this Chapter - if xmlTree.find(".//EOAbibliographytype").text == "anthology": + if bib_type == "anthology": xmlRefsections = xmlBibTree.findall(".//refsection") for xmlRefsection in xmlRefsections: if xmlRefsection.find(".//number").text == str(intChapterNumber): break - xmlEntries = xmlRefsection.findall(".//entry") + xml_bib_entries = xmlRefsection.findall(".//entry") # If Bibliography-Type is monograph find all entries, forget about refsection - if xmlTree.find(".//EOAbibliographytype").text == "monograph": - xmlEntries = xmlBibTree.findall(".//entry") - for xmlEntry in xmlEntries: + + ############### + # old version # + ############### + """ + if bib_type == "monograph": + xml_bib_entries = xmlBibTree.findall(".//entry") + for xmlEntry in xml_bib_entries: bibEntry = Bibitem(xmlEntry) if bibEntry.citekey() == xmlCitation.find("./citekey").text: if xmlCitation.tag == "EOAciteauthoryear": @@ -1207,12 +1224,51 @@ def cleanup(): strTitle = bibEntry.title() if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: strCitation = strCitation + ", " + xmlCitation.find("./page").text + """ + ###################### + # end of old version # + ###################### + + ############### + # new version # + ############### + + # string_citekey = xmlCitation.find("./citekey").text + for entry in citations_json: + if entry["id"] == string_citekey: + current_citation = entry + strTitle = current_citation["title"] + + if xmlCitation.tag == "EOAciteauthoryear": + strCitation = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text + elif xmlCitation.tag == "EOAciteyear": + strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text + elif xmlCitation.tag == "EOAcitemanual": + cite_text = xmlCitation.find("citetext") + if cite_text.getchildren(): + tmp_string = xmlCitation.find("citetext") + tmp_string = cite_text.getchildren()[0] + strCitation = etree.tostring(tmp_string) + # BAUSTELLE!!!!! + # tmp_string2 = etree.tostring(tmp_string) + # tmp_string3 = tmp_string2.decode() + # strCitation = tmp_string3.replace("<", "<") + else: + strCitation = xmlCitation.find("citetext").text + + if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: + strCitation = strCitation + ", " + xmlCitation.find("./page").text + + ###################### + # end of new version # + ###################### + # Hier den XML-Tag durch die Quellenangabe ersetzen tmpTail = xmlCitation.tail xmlCitation.clear() xmlCitation.tag = "span" - xmlCitation.set("rel","popover") - xmlCitation.set("class","citation") + xmlCitation.set("rel", "popover") + xmlCitation.set("class", "citation") xmlCitation.set("citekey", string_citekey) xmlCitation.text = strCitation xmlCitation.tail = tmpTail @@ -1229,7 +1285,7 @@ def cleanup(): intChapterNumber += 1 # If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all -if xmlTree.find(".//EOAbibliographytype").text == "monograph-numeric": +if bib_type == "monograph-numeric": if xmlTree.find(".//EOAprintbibliography") is not None: dictCitekeysNumbers = {} dictCitekeysTitles = {} @@ -1237,9 +1293,9 @@ def cleanup(): xmlBibliography.clear() xmlBibliography.tag = "div" xmlBibliography.getparent().tag = "div" - xmlEntries = xmlBibTree.findall(".//entry") + xml_bib_entries = xmlBibTree.findall(".//entry") intNumberOfEntry = 1 - for xmlEntry in xmlEntries: + for xmlEntry in xml_bib_entries: # Go through all entries and assign a number to the citekey bibEntry = Bibitem(xmlEntry) strCitekey = bibEntry.citekey() @@ -1309,7 +1365,7 @@ def cleanup(): # input() # Numeric citations for the individual chapters -if xmlTree.find(".//EOAbibliographytype").text == "anthology-numeric": +if bib_type == "anthology-numeric": intChapterNumber = 1 for xmlChapter in xmlChapters: print("Processing Bibliography") @@ -1324,9 +1380,9 @@ def cleanup(): for xmlRefsection in xmlRefsections: if xmlRefsection.find(".//number").text == str(intChapterNumber): break - xmlEntries = xmlRefsection.findall(".//entry") + xml_bib_entries = xmlRefsection.findall(".//entry") intNumberOfEntry = 1 - for xmlEntry in xmlEntries: + for xmlEntry in xml_bib_entries: # Go through all entries and assign a number to the citekey bibEntry = Bibitem(xmlEntry) strCitekey = bibEntry.citekey()