Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
New script
- Loading branch information
kthoden
committed
Jul 18, 2019
1 parent
f537623
commit 6ab7319
Showing
1 changed file
with
45 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8; mode: python -*- | ||
|
||
""" | ||
Gather some data for further conversion steps. This is originally part of fix_tei. | ||
""" | ||
|
||
__version__ = "1.0" | ||
__date__ = "20190718" | ||
__author__ = "kthoden@mpiwg-berlin.mpg.de" | ||
|
||
import argparse | ||
import logging | ||
import pickle | ||
import fix_tei | ||
from lxml import etree | ||
|
||
ns_tei = "http://www.tei-c.org/ns/1.0" | ||
NS_MAP = {"t" : ns_tei} | ||
|
||
logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s') | ||
|
||
def main(): | ||
"""The main bit""" | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("teifile", help="The XML file from which data is pickled.") | ||
parser.add_argument("bibfile", help="The bibliography file for checking the references.") | ||
args = parser.parse_args() | ||
|
||
xml_tree = etree.parse(args.teifile) | ||
|
||
bibdata = fix_tei.parse_bibtex(args.bibfile) | ||
|
||
cited = xml_tree.xpath("//t:bibl/t:ref/@target", namespaces=NS_MAP) | ||
used_citekeys = [fix_tei.unescape(c[1:]) for c in cited] | ||
citekeys_not_in_bib = fix_tei.validate_citations(used_citekeys, bibdata) | ||
|
||
picklefile = "output/imxml/tmp_files/data.pickle" | ||
fix_tei.pickle_data(citekeys_not_in_bib, used_citekeys, picklefile) | ||
# def main ends here | ||
|
||
if __name__ == '__main__': | ||
main() | ||
# finis |