Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
bibtex_search/bibtex_search.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
93 lines (68 sloc)
2.42 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8; mode: python -*- | |
__version__ = "1.0" | |
__date__ = "20171006" | |
__author__ = "kthoden@mpiwg-berlin.mpg.de" | |
import re | |
import bibtexparser | |
bibtexfile = "data/15_maennig.bib" | |
# find references in XML file | |
# look up reference in database (Author and Year) | |
def parse_bibtex(bibfile): | |
"""Parse the bibtex file, return a dict""" | |
with open(bibfile) as btf: | |
btb = bibtexparser.load(btf) | |
tmp_dict = btb.entries_dict | |
return tmp_dict | |
# def parse_bibtex ends here | |
def search_bibtex(search_string, bibdict): | |
"""Find the right entry in database | |
Try different methods. A standard way is to encode author and year | |
in the citekey. That's a cheap method for a first try. | |
""" | |
import difflib | |
normalized_string = re.sub(r'\s+', '', search_string) | |
citekeys = bibdict.keys() | |
for entries in bibdict: | |
print(entries) | |
# or use a similarity measure | |
candidates = difflib.get_close_matches(search_string, citekeys) | |
print(candidates) | |
for key in citekeys: | |
if key == normalized_string: | |
bibtex_entry = key | |
return bibtex_entry | |
# def search_bibtex ends here | |
def get_local_lyrics_fuzzy(cached_lyrics, song): | |
"""Look for similar titles.""" | |
import difflib | |
other_songs, devnull = get_all_files(ROOT_DIR + "/" + song.artist) | |
candidates = difflib.get_close_matches(song.title + SUFFIX, other_songs) | |
# old_lyrics_filename = song.title + SUFFIX | |
count = 1 | |
if candidates: | |
sys.stdout.write(_("Found \n")) | |
for i in candidates: | |
sys.stdout.write(_("%s. %s\n") % (count, i.replace(SUFFIX,""))) | |
count += 1 | |
answer = input(_("""Does one of those (1 - %s) fit?\nTo abort, just press Enter: """) % (count - 1)) | |
if answer: | |
alternate_lyrics = "%s/%s/%s" % (ROOT_DIR, song.artist, candidates[int(answer) - 1 ]) | |
logging.info(_("Getting lyrics for %s (%s by %s) from %s\n") % | |
(song.filename, song.title, song.artist, alternate_lyrics)) | |
file_string = open(alternate_lyrics,"r") | |
else: | |
file_string = "" | |
else: | |
file_string = "" | |
return file_string | |
# def get_local_lyrics_fuzzy ends here | |
def main(): | |
"""The main bit""" | |
bibdict = parse_bibtex(bibtexfile) | |
result = search_bibtex("Daston 2005", bibdict) | |
print(result) | |
# def main ends here | |
if __name__ == '__main__': | |
main() | |
# finis | |