From 4cb48dd0c0b5cb1b6b86dd48a203a466232d9f6c Mon Sep 17 00:00:00 2001 From: Klaus Thoden Date: Mon, 22 Jan 2018 18:59:01 +0100 Subject: [PATCH] A library function for formatting the bibliography --- libeoaconvert.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/libeoaconvert.py b/libeoaconvert.py index c321d1e..7341718 100644 --- a/libeoaconvert.py +++ b/libeoaconvert.py @@ -7,6 +7,8 @@ import shlex import logging import configparser +from lxml.html import soupparser + ################################## # Reading the configuration file # @@ -126,3 +128,42 @@ def deb_var(obj): name = [name for name in globals() if globals()[name] is obj][0] print("DEBUG: %s: %s" % (name, obj)) # def deb_var ends here + +# next function adapted from TEI2EOADjango +def format_citations(used_citekeys, bibdata): + """Return a formatted xmlstring of the used citations""" + + md_file_header = "---\nlang: en\ntitle: Citations\n...\n\n" + + with open("tmp_files/used_citations.md", "w") as citation_formatter: + citation_formatter.write(md_file_header) + citation_formatter.write("# Full parentheses\n") + for entry in used_citekeys: + citation_formatter.write("[@%s]\n" % entry) + citation_formatter.write("\n# Year parentheses\n") + for entry in used_citekeys: + citation_formatter.write("@%s\n" % entry) + citation_formatter.write("\n# References\n") + + command = "pandoc -o tmp_files/formatted_citations.html -t html --filter=pandoc-citeproc --bibliography=%s --csl=/Users/kthoden/EOAKram/dev/eoa-csl/eoa.csl tmp_files/used_citations.md" % bibdata + arguments = shlex.split(command) + logging.info("Using external command pandoc with command %s" % command) + subprocess.call(arguments) + + with open("tmp_files/formatted_citations.html", "r") as ding: + dd = soupparser.fromstring(ding, features="html.parser") + + references = dd.xpath("//div[@class='references']") + return references + + # full_paren_cites = dd.select("#full-parentheses ~ p > span") + # year_paren_cites = dd.select("#year-parentheses ~ p > span") + # citation_dict = {} + # for entry in used_citekeys: + # title = (bibdata[entry]['title']) + # full_paren = dd.select("#full-parentheses ~ p > span[data-cites='%s']" % entry)[0].text + # year_paren = dd.select("#year-parentheses ~ p > span[data-cites='%s']" % entry)[0].text + # citation_dict[entry] = (full_paren, year_paren, title) + # return citation_dict + +# def format_citations ends here