From b45b6cb2db31334f0df177a9da8c12d512d4af0e Mon Sep 17 00:00:00 2001 From: kthoden Date: Thu, 18 Jul 2019 14:57:58 +0200 Subject: [PATCH] Fix paths --- fix_tei.py | 63 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/fix_tei.py b/fix_tei.py index 56dae92..b53093f 100644 --- a/fix_tei.py +++ b/fix_tei.py @@ -48,7 +48,7 @@ ns_tei = "http://www.tei-c.org/ns/1.0" NS_MAP = {"t" : ns_tei} -TMP_DIR = os.path.expanduser("tmp_files") +TMP_DIR = os.path.expanduser("output/imxml/tmp_files") RUNNING_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) logging.debug("The script is run from {}".format(RUNNING_DIRECTORY)) @@ -618,6 +618,41 @@ def evaluate_report(report, printlog, filename): return # def evaluate_report ends here + +def pickle_data(citekeys_not_in_bib, used_citekeys, picklefile): + """Create a structure for pickling data""" + + dictChapters = {} + dictEquations = {} + dictLists = {} + dictTheorems = {} + dictFigures = {} + dictSections = {} + dictFootnotes = {} + dictTables = {} + dictPagelabels = {} + + data_to_pickle = {'citekey_not_in_bib' : citekeys_not_in_bib, + 'citekeys' : used_citekeys, + 'chapterdict' : dictChapters, + 'eqdict' : dictEquations, + 'listdict' : dictLists, + 'theoremdict' : dictTheorems, + 'figdict' : dictFigures, + 'secdict' : dictSections, + 'fndict' : dictFootnotes, + 'tabdict' : dictTables, + 'pagelabeldict' : dictPagelabels} + + + + with open(picklefile, 'wb') as f: + pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL) + + logging.info(f"Wrote {picklefile}.") +# def pickle_data ends here + + def main(): """The main bit""" @@ -746,30 +781,8 @@ def main(): etree.strip_tags(xml_tree2, "tagtobestripped") - dictChapters = {} - dictEquations = {} - dictLists = {} - dictTheorems = {} - dictFigures = {} - dictSections = {} - dictFootnotes = {} - dictTables = {} - dictPagelabels = {} - - data_to_pickle = {'citekey_not_in_bib' : citekeys_not_in_bib, - 'citekeys' : used_citekeys, - 'chapterdict' : dictChapters, - 'eqdict' : dictEquations, - 'listdict' : dictLists, - 'theoremdict' : dictTheorems, - 'figdict' : dictFigures, - 'secdict' : dictSections, - 'fndict' : dictFootnotes, - 'tabdict' : dictTables, - 'pagelabeldict' : dictPagelabels} - - with open('tmp_files/data.pickle', 'wb') as f: - pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL) + picklefile = "output/imxml/tmp_files/data.pickle" + pickle_data(citekeys_not_in_bib, used_citekeys, picklefile) fix_document_structure(xml_tree2, highest_level) # output