Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fix paths
  • Loading branch information
kthoden committed Jul 18, 2019
1 parent 6ab7319 commit b45b6cb
Showing 1 changed file with 38 additions and 25 deletions.
63 changes: 38 additions & 25 deletions fix_tei.py
Expand Up @@ -48,7 +48,7 @@
ns_tei = "http://www.tei-c.org/ns/1.0"
NS_MAP = {"t" : ns_tei}

TMP_DIR = os.path.expanduser("tmp_files")
TMP_DIR = os.path.expanduser("output/imxml/tmp_files")

RUNNING_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
logging.debug("The script is run from {}".format(RUNNING_DIRECTORY))
Expand Down Expand Up @@ -618,6 +618,41 @@ def evaluate_report(report, printlog, filename):
return
# def evaluate_report ends here


def pickle_data(citekeys_not_in_bib, used_citekeys, picklefile):
"""Create a structure for pickling data"""

dictChapters = {}
dictEquations = {}
dictLists = {}
dictTheorems = {}
dictFigures = {}
dictSections = {}
dictFootnotes = {}
dictTables = {}
dictPagelabels = {}

data_to_pickle = {'citekey_not_in_bib' : citekeys_not_in_bib,
'citekeys' : used_citekeys,
'chapterdict' : dictChapters,
'eqdict' : dictEquations,
'listdict' : dictLists,
'theoremdict' : dictTheorems,
'figdict' : dictFigures,
'secdict' : dictSections,
'fndict' : dictFootnotes,
'tabdict' : dictTables,
'pagelabeldict' : dictPagelabels}



with open(picklefile, 'wb') as f:
pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL)

logging.info(f"Wrote {picklefile}.")
# def pickle_data ends here


def main():
"""The main bit"""

Expand Down Expand Up @@ -746,30 +781,8 @@ def main():

etree.strip_tags(xml_tree2, "tagtobestripped")

dictChapters = {}
dictEquations = {}
dictLists = {}
dictTheorems = {}
dictFigures = {}
dictSections = {}
dictFootnotes = {}
dictTables = {}
dictPagelabels = {}

data_to_pickle = {'citekey_not_in_bib' : citekeys_not_in_bib,
'citekeys' : used_citekeys,
'chapterdict' : dictChapters,
'eqdict' : dictEquations,
'listdict' : dictLists,
'theoremdict' : dictTheorems,
'figdict' : dictFigures,
'secdict' : dictSections,
'fndict' : dictFootnotes,
'tabdict' : dictTables,
'pagelabeldict' : dictPagelabels}

with open('tmp_files/data.pickle', 'wb') as f:
pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL)
picklefile = "output/imxml/tmp_files/data.pickle"
pickle_data(citekeys_not_in_bib, used_citekeys, picklefile)

fix_document_structure(xml_tree2, highest_level)
# output
Expand Down

0 comments on commit b45b6cb

Please sign in to comment.