Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Implementing new version
  • Loading branch information
Klaus Thoden committed Jan 23, 2018
1 parent c5148d6 commit 98dbe21
Showing 1 changed file with 81 additions and 25 deletions.
106 changes: 81 additions & 25 deletions eoaconvert.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2018-01-23 10:49:15 (kthoden)>
# Time-stamp: <2018-01-23 16:03:41 (kthoden)>

# license?
__version__= "1.0"
Expand All @@ -15,6 +15,7 @@
from copy import deepcopy
from copy import copy
from libeoabibitem import Bibitem
from bs4 import BeautifulSoup
import libeoaconvert
import glob
import os
Expand Down Expand Up @@ -262,7 +263,7 @@ def createBibEntryAuthorYear(bibEntry, boolSameAuthor):
if bibEntry.entrytype() == "newspaper":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>"

print(strBibEntry)
# print(strBibEntry)

return sanitize_bibentry(strBibEntry)
# def createBibEntryAuthorYear ends here
Expand Down Expand Up @@ -956,6 +957,9 @@ def cleanup():
HAS_BIBLIOGRAPHY = False
input()

bib_type = xmlTree.find(".//EOAbibliographytype").text
assert(bib_type in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"])

# the new solution: pandoc-citeproc
interim_bib_json_file = (options.filename) + "-bib.json"
citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib"
Expand All @@ -964,6 +968,9 @@ def cleanup():
citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE)
citeproc_json = citeproc_process.stdout.read()

# with open(interim_bib_json_file, 'w') as ibjf:
# json.dump(citeproc_json.decode('utf-8'), ibjf)

citations_json = json.loads(citeproc_json)

# for x in citations_json:
Expand Down Expand Up @@ -1040,7 +1047,7 @@ def cleanup():
# TeXML has been sanitized, now load xml-Tree
xmlParser2 = etree.XMLParser(no_network=False,load_dtd=False)
xmlBibTree = etree.parse((options.filename + "bib.xml"), xmlParser2)
xmlEntries = xmlBibTree.findall(".//entry")
xml_bib_entries = xmlBibTree.findall(".//entry")

###########################
# end of the old solution #
Expand All @@ -1049,7 +1056,7 @@ def cleanup():
make_bibchecker(bib_database, set_citations)

# If Bibliography-Type is monograph search for EOAbibliography and make it all
if xmlTree.find(".//EOAbibliographytype").text == "monograph":
if bib_type == "monograph":
if xmlTree.find(".//EOAprintbibliography") is not None:
xmlBibliography = xmlTree.find(".//EOAprintbibliography")
xmlBibliography.clear()
Expand Down Expand Up @@ -1081,20 +1088,21 @@ def cleanup():
# end of new version #
######################


###############
# old version #
###############
xmlEntries = xmlBibTree.findall(".//entry")
xml_bib_entries = xmlBibTree.findall(".//entry")
intNumberOfEntry = 0
for xmlEntry in xmlEntries:
for xmlEntry in xml_bib_entries:
if intNumberOfEntry == 0:
# Don't check for previous author if first entry of the Bibliography
bibEntry = Bibitem(xmlEntry)
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>"
else:
bibEntry = Bibitem(xmlEntry)
# Check if author of previous Entry is the same
bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1])
bibEntryPrevious = Bibitem(xml_bib_entries[intNumberOfEntry - 1])
if bibEntry.fullauthorlastfirst()[0] == bibEntryPrevious.fullauthorlastfirst()[0]:
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>"
elif bibEntryPrevious.fullauthorlastfirst()[0] == bibEntry.fullauthorlastfirst()[0]:
Expand All @@ -1115,7 +1123,7 @@ def cleanup():


# If Bibliography-Type is anthology search for EOAbibliography and make one per chapter
if xmlTree.find(".//EOAbibliographytype").text == "anthology":
if bib_type == "anthology":
intChapterNumber = 1
for xmlChapter in xmlChapters:
if xmlChapter.find(".//EOAprintbibliography") is not None:
Expand All @@ -1127,9 +1135,9 @@ def cleanup():
for xmlRefsection in xmlRefsections:
if xmlRefsection.find(".//number").text == str(intChapterNumber):
break
xmlEntries = xmlRefsection.findall(".//entry")
xml_bib_entries = xmlRefsection.findall(".//entry")
intNumberOfEntry = 0
for xmlEntry in xmlEntries:
for xmlEntry in xml_bib_entries:
if intNumberOfEntry == 0:
# Don't check for previous author if first entry of the Bibliography
bibEntry = Bibitem(xmlEntry)
Expand All @@ -1139,7 +1147,7 @@ def cleanup():
else:
bibEntry = Bibitem(xmlEntry)
# Check if author of previous Entry is the same
bibEntryPrevious = Bibitem(xmlEntries[intNumberOfEntry - 1])
bibEntryPrevious = Bibitem(xml_bib_entries[intNumberOfEntry - 1])
if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst():
print(bibEntry.citekey())
strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>"
Expand All @@ -1157,8 +1165,12 @@ def cleanup():
strCitation = ""

# Bibliographies are done, now for the citations
if xmlTree.find(".//EOAbibliographytype").text == "anthology" or xmlTree.find(".//EOAbibliographytype").text == "monograph":
if bib_type == "anthology" or bib_type == "monograph":
intChapterNumber = 1

with open("tmp_files/formatted_citations.html", "r") as formatted_citations:
form_cit = BeautifulSoup(formatted_citations, "html.parser")

for xmlChapter in xmlChapters:
print ("-----------------------------------------------------")
print ("Processing References for Chapter " + str(intChapterNumber))
Expand All @@ -1170,16 +1182,21 @@ def cleanup():
string_citekey = xmlCitation.find("./citekey").text
progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey))
# If Bibliography-Type is anthology find Refsection for this Chapter
if xmlTree.find(".//EOAbibliographytype").text == "anthology":
if bib_type == "anthology":
xmlRefsections = xmlBibTree.findall(".//refsection")
for xmlRefsection in xmlRefsections:
if xmlRefsection.find(".//number").text == str(intChapterNumber):
break
xmlEntries = xmlRefsection.findall(".//entry")
xml_bib_entries = xmlRefsection.findall(".//entry")
# If Bibliography-Type is monograph find all entries, forget about refsection
if xmlTree.find(".//EOAbibliographytype").text == "monograph":
xmlEntries = xmlBibTree.findall(".//entry")
for xmlEntry in xmlEntries:

###############
# old version #
###############
"""
if bib_type == "monograph":
xml_bib_entries = xmlBibTree.findall(".//entry")
for xmlEntry in xml_bib_entries:
bibEntry = Bibitem(xmlEntry)
if bibEntry.citekey() == xmlCitation.find("./citekey").text:
if xmlCitation.tag == "EOAciteauthoryear":
Expand Down Expand Up @@ -1207,12 +1224,51 @@ def cleanup():
strTitle = bibEntry.title()
if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None:
strCitation = strCitation + ", " + xmlCitation.find("./page").text
"""
######################
# end of old version #
######################

###############
# new version #
###############

# string_citekey = xmlCitation.find("./citekey").text
for entry in citations_json:
if entry["id"] == string_citekey:
current_citation = entry
strTitle = current_citation["title"]

if xmlCitation.tag == "EOAciteauthoryear":
strCitation = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text
elif xmlCitation.tag == "EOAciteyear":
strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text
elif xmlCitation.tag == "EOAcitemanual":
cite_text = xmlCitation.find("citetext")
if cite_text.getchildren():
tmp_string = xmlCitation.find("citetext")
tmp_string = cite_text.getchildren()[0]
strCitation = etree.tostring(tmp_string)
# BAUSTELLE!!!!!
# tmp_string2 = etree.tostring(tmp_string)
# tmp_string3 = tmp_string2.decode()
# strCitation = tmp_string3.replace("&lt;", "<")
else:
strCitation = xmlCitation.find("citetext").text

if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None:
strCitation = strCitation + ", " + xmlCitation.find("./page").text

######################
# end of new version #
######################

# Hier den XML-Tag durch die Quellenangabe ersetzen
tmpTail = xmlCitation.tail
xmlCitation.clear()
xmlCitation.tag = "span"
xmlCitation.set("rel","popover")
xmlCitation.set("class","citation")
xmlCitation.set("rel", "popover")
xmlCitation.set("class", "citation")
xmlCitation.set("citekey", string_citekey)
xmlCitation.text = strCitation
xmlCitation.tail = tmpTail
Expand All @@ -1229,17 +1285,17 @@ def cleanup():
intChapterNumber += 1

# If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all
if xmlTree.find(".//EOAbibliographytype").text == "monograph-numeric":
if bib_type == "monograph-numeric":
if xmlTree.find(".//EOAprintbibliography") is not None:
dictCitekeysNumbers = {}
dictCitekeysTitles = {}
xmlBibliography = xmlTree.find(".//EOAprintbibliography")
xmlBibliography.clear()
xmlBibliography.tag = "div"
xmlBibliography.getparent().tag = "div"
xmlEntries = xmlBibTree.findall(".//entry")
xml_bib_entries = xmlBibTree.findall(".//entry")
intNumberOfEntry = 1
for xmlEntry in xmlEntries:
for xmlEntry in xml_bib_entries:
# Go through all entries and assign a number to the citekey
bibEntry = Bibitem(xmlEntry)
strCitekey = bibEntry.citekey()
Expand Down Expand Up @@ -1309,7 +1365,7 @@ def cleanup():
# input()

# Numeric citations for the individual chapters
if xmlTree.find(".//EOAbibliographytype").text == "anthology-numeric":
if bib_type == "anthology-numeric":
intChapterNumber = 1
for xmlChapter in xmlChapters:
print("Processing Bibliography")
Expand All @@ -1324,9 +1380,9 @@ def cleanup():
for xmlRefsection in xmlRefsections:
if xmlRefsection.find(".//number").text == str(intChapterNumber):
break
xmlEntries = xmlRefsection.findall(".//entry")
xml_bib_entries = xmlRefsection.findall(".//entry")
intNumberOfEntry = 1
for xmlEntry in xmlEntries:
for xmlEntry in xml_bib_entries:
# Go through all entries and assign a number to the citekey
bibEntry = Bibitem(xmlEntry)
strCitekey = bibEntry.citekey()
Expand Down

0 comments on commit 98dbe21

Please sign in to comment.