Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import extractScriptEntities
import extractSummaryEntities
import spacy
import gensim
from nltk import ngrams
import processAlignment
#from nltk.sentiment.vader import SentimentIntensityAnalyzer
summarySceneMap = dict()
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary = True)
scenes = extractScriptEntities.extractScriptEntities('script.xml', model)
summarySentences = extractSummaryEntities.extractSummaryEntities('wikiplot.txt', model)
similarity_threshold = 0.54
word_inclusion_threshold = 1.0
entity_inclusion_threshold = 0.43
def removeOne(lemma, dictionary):
if lemma not in dictionary:
return
elif dictionary[lemma] == 1:
del dictionary[lemma]
else:
dictionary[lemma] -= 1
def getLength(collection):
if collection is None:
return 0
else:
return len(collection)
def countEntities(sceneEntities, summaryEntities):
if sceneEntities is None or summaryEntities is None:
return matched
matched = 0
#print(sceneEntities, '\n', summaryEntities, '\n')
for sceneEntity in sceneEntities:
if sceneEntity in summaryEntities:
summaryEntities.remove(sceneEntity)
matched += 1
return matched
def countWords(sceneBOW, summarySentenceBOW):
if sceneBOW is None or summarySentenceBOW is None:
return 0
matched = 0
matched_set = set()
for summaryWord in summarySentenceBOW:
for sceneWord in sceneBOW:
try:
if model.similarity(summaryWord, sceneWord) >= similarity_threshold:
matched += 1
matched_set.add (summaryWord)
break
except:
print("Exception: ", summaryWord, ", ", sceneWord)
summarySentenceBOW -= matched_set
return matched
def checkInclusion(matched, total, threshold):
if total == 0:
return True
if matched/total >= threshold:
return True
return False
def getF1Score(groundTruthMap,summarySceneMap):
print("Ground Truth: ", groundTruthMap, '\n', "Generated: ", summarySceneMap)
falsePositives = 0
falseNegatives = 0
truePositives = 0
for sceneNum in groundTruthMap:
if sceneNum not in summarySceneMap:
falseNegatives += len(groundTruthMap[sceneNum])
continue
for sentenceNum in groundTruthMap[sceneNum]:
if sentenceNum in summarySceneMap[sceneNum]:
truePositives += 1
else:
falseNegatives += 1
for sceneNum in summarySceneMap:
if sceneNum not in groundTruthMap:
falsePositives += len(summarySceneMap[sceneNum])
continue
for sentenceNum in summarySceneMap[sceneNum]:
if sentenceNum not in groundTruthMap[sceneNum]:
falsePositives += 1
if truePositives + falsePositives != 0:
precision = truePositives / (truePositives + falsePositives)
else:
precision = 0
if truePositives+falseNegatives != 0:
recall = truePositives / (truePositives + falseNegatives)
else:
recall = 0
if precision == 0 or recall == 0:
F1 = 0
else:
F1 = 2 * precision * recall / (precision + recall)
print("Precision: ", precision, "Recall: ", recall, "F1: ", F1)
sceneID = 0
for summarySentenceID in range(len(summarySentences)):
summarySentenceBOW = summarySentences[summarySentenceID].bagOfWords
summaryNgrams = summarySentences[summarySentenceID].Ngrams
total_Ngrams = getLength(summaryNgrams)
total_summaryBOW = getLength(summarySentenceBOW)
matched_summaryWords = 0
matched_Ngrams = 0
if sceneID >= len(scenes):
sceneID = startSceneID
startSceneID = sceneID
while sceneID < len(scenes):
matched_Ngrams += countEntities(scenes[sceneID].Ngrams, summaryNgrams)
matched_summaryWords += countWords(scenes[sceneID].bagOfWords, summarySentenceBOW)
sceneID += 1
if checkInclusion(matched_summaryWords, total_summaryBOW, word_inclusion_threshold) \
and checkInclusion(matched_Ngrams, total_Ngrams, entity_inclusion_threshold):
summarySceneMap[summarySentenceID] = set(number for number in range(startSceneID, sceneID))
sceneID -= 1
break
groundTruthMap = processAlignment.processAlignment()
getF1Score(groundTruthMap, summarySceneMap)
"""
summarySentences[0].output()
print("\n\n")
scenes[0].output()
nlp = spacy.load('en')
sid = SentimentIntensityAnalyzer()
summarySentenceNum = 0
for summarySentence in summarySentences:
sentence = summarySentence.sentence
scriptSentenceNum = 0
for scene in scenes:
sceneSentences = scene.sentences
for sceneSentence in sceneSentences:
similarity = nlp(sentence).similarity(nlp(sceneSentence))
if similarity >= 0.8:
print (sentence, sid.polarity_scores(sentence), sceneSentence, sid.polarity_scores(sceneSentence), '\n', similarity, '\n')
#print (summarySentenceNum, scriptSentenceNum, '\n')
scriptSentenceNum = scriptSentenceNum + 1
summarySentenceNum = summarySentenceNum + 1
"""