general_predict.py

'''
Created on Jul 1, 2019

@author: cxchu
'''
'''
Created on Jan 15, 2019

@author: cxchu
'''
from sklearn.externals import joblib
import sys, os

from create_dataset import create_raw_dataset
from src.batcher import Batcher
from src.hook import acc_hook, save_predictions, evaluate_perclass
from src.model.nn_model import Model
import tensorflow as tf

import optparse

optparser = optparse.OptionParser()
optparser.add_option(
    "-b", "--basedir", default="/var/tmp/wikia/entity-typing/deep-learning/",
    help="directory to model of top class prediction"
)
opts = optparser.parse_args()[0]

basedir = opts.basedir

dict = basedir + "general-model/dicts_gillick.pkl"
# dict = basedir + "general-types/all/data/dicts_gillick.pkl"

# universe = "onion"
# raw_data = "/var/tmp/wikia/entity-typing/input-data/" + universe + "/" + universe + "-3-supervised"
# save_data = "/var/tmp/wikia/entity-typing/deep-learning/got/got_test.pkl"

dicts = joblib.load(dict)
label2id = dicts["label2id"]
id2label = dicts["id2label"]
word2id = dicts["word2id"]
feature2id = dicts["feature2id"]
storage,data,sentences, mentions = create_raw_dataset(label2id,word2id,feature2id)
test_dataset = {"storage":storage,"data":data}
# joblib.dump(dataset,save_data)

print ("Loading the dataset")
# test_dataset = joblib.load(save_data)

print
print ("test_size: ", test_dataset["data"].shape[0])

print ("Creating batchers")
# batch_size : 1000, context_length : 10
test_batcher = Batcher(test_dataset["storage"],test_dataset["data"],test_dataset["data"].shape[0],10,dicts["id2vec"])


print('Loading the model..............')
save_dir = './general-model'
model_name = 'model'

checkpoint_file = os.path.join(save_dir, model_name)
graph = tf.Graph()
with graph.as_default():
    sess = tf.Session()
    saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
    saver.restore(sess, checkpoint_file)

    keep_prob = graph.get_operation_by_name("keep_prob").outputs[0]
    mention_representation = graph.get_operation_by_name("mention_representation").outputs[0]

    context_length = 8
    context = [graph.get_operation_by_name("context" + str(i)).outputs[0] for i in range(context_length*2+1)]

    distribution = graph.get_operation_by_name("distribution").outputs[0]

    context_data, mention_representation_data, target_data, feature_data = test_batcher.next()

    feed = {mention_representation: mention_representation_data,
                keep_prob: [1.0]}
#     if self.feature == True and feature_data is not None:
#         feed[self.features] = feature_data
    for i in range(context_length*2+1):
        feed[context[i]] = context_data[:,i,:]
    scores = sess.run(distribution,feed_dict=feed)

    #writing to file.....
#     fname = "/var/tmp/wikia/entity-typing/input-data/" + universe + "/" + universe + "-3-supervised-general-prediction"
#     with open(fname,"w") as f:
    print('results')
    sys.stdout.flush()
    for sent, score in zip(mentions, scores):
        res = []
#             print(sent + "===" + str(score))
        for id, s in enumerate(list(score)):
            if s >= 0.5:
                res.append(id2label[id] + "\t" + str(s))
        if len(res) > 0:
            print(sent + "=====[" + ", ".join([t for t in res]) + "]")
            sys.stdout.flush()
    print('end')
    sys.stdout.flush()
#     f.close()
	'''
	Created on Jul 1, 2019

	@author: cxchu
	'''
	'''
	Created on Jan 15, 2019

	@author: cxchu
	'''
	from sklearn.externals import joblib
	import sys, os

	from create_dataset import create_raw_dataset
	from src.batcher import Batcher
	from src.hook import acc_hook, save_predictions, evaluate_perclass
	from src.model.nn_model import Model
	import tensorflow as tf

	import optparse

	optparser = optparse.OptionParser()
	optparser.add_option(
	"-b", "--basedir", default="/var/tmp/wikia/entity-typing/deep-learning/",
	help="directory to model of top class prediction"
	)
	opts = optparser.parse_args()[0]

	basedir = opts.basedir

	dict = basedir + "general-model/dicts_gillick.pkl"
	# dict = basedir + "general-types/all/data/dicts_gillick.pkl"

	# universe = "onion"
	# raw_data = "/var/tmp/wikia/entity-typing/input-data/" + universe + "/" + universe + "-3-supervised"
	# save_data = "/var/tmp/wikia/entity-typing/deep-learning/got/got_test.pkl"

	dicts = joblib.load(dict)
	label2id = dicts["label2id"]
	id2label = dicts["id2label"]
	word2id = dicts["word2id"]
	feature2id = dicts["feature2id"]
	storage,data,sentences, mentions = create_raw_dataset(label2id,word2id,feature2id)
	test_dataset = {"storage":storage,"data":data}
	# joblib.dump(dataset,save_data)

	print ("Loading the dataset")
	# test_dataset = joblib.load(save_data)

	print
	print ("test_size: ", test_dataset["data"].shape[0])

	print ("Creating batchers")
	# batch_size : 1000, context_length : 10
	test_batcher = Batcher(test_dataset["storage"],test_dataset["data"],test_dataset["data"].shape[0],10,dicts["id2vec"])


	print('Loading the model..............')
	save_dir = './general-model'
	model_name = 'model'

	checkpoint_file = os.path.join(save_dir, model_name)
	graph = tf.Graph()
	with graph.as_default():
	sess = tf.Session()
	saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
	saver.restore(sess, checkpoint_file)

	keep_prob = graph.get_operation_by_name("keep_prob").outputs[0]
	mention_representation = graph.get_operation_by_name("mention_representation").outputs[0]

	context_length = 8
	context = [graph.get_operation_by_name("context" + str(i)).outputs[0] for i in range(context_length*2+1)]

	distribution = graph.get_operation_by_name("distribution").outputs[0]

	context_data, mention_representation_data, target_data, feature_data = test_batcher.next()

	feed = {mention_representation: mention_representation_data,
	keep_prob: [1.0]}
	# if self.feature == True and feature_data is not None:
	# feed[self.features] = feature_data
	for i in range(context_length*2+1):
	feed[context[i]] = context_data[:,i,:]
	scores = sess.run(distribution,feed_dict=feed)

	#writing to file.....
	# fname = "/var/tmp/wikia/entity-typing/input-data/" + universe + "/" + universe + "-3-supervised-general-prediction"
	# with open(fname,"w") as f:
	print('results')
	sys.stdout.flush()
	for sent, score in zip(mentions, scores):
	res = []
	# print(sent + "===" + str(score))
	for id, s in enumerate(list(score)):
	if s >= 0.5:
	res.append(id2label[id] + "\t" + str(s))
	if len(res) > 0:
	print(sent + "=====[" + ", ".join([t for t in res]) + "]")
	sys.stdout.flush()
	print('end')
	sys.stdout.flush()
	# f.close()