Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import numpy
import re
import spacy
nlp = spacy.load('en_core_web_sm')
scene_Sentence_Map = open("sceneSentenceMap.txt", "r")
script = open("script.txt", "r")
scenes = []
sceneMap = []
for line in scene_Sentence_Map:
mapLine = line.split(",")
scene = mapLine[0].split("\t")
scene = scene[0]
scenes.append(scene)
mapLine[0] = mapLine[0][len(scene):]
for i in range(0, len(mapLine) ):
mapLine[i] = mapLine[i].replace("[","")
mapLine[i] = mapLine[i].replace("\t","")
mapLine[i] = mapLine[i].replace(" ", "")
mapLine[i] = mapLine[i].replace("]", "")
mapLine[i] = mapLine[i].replace("\n", "")
sceneMap.append(mapLine)
scenesText = [""] * len(scenes)
for i in range(len(scenes)):
for j in range(len(sceneMap[i])):
scenesText[i] = scenesText[i] + script.readline()
print (scenesText[0])
summary = open("wikiplot.txt", "r")
summaryLines = []
for line in summary:
summaryLines.append(line)
sceneTermFreq = []
for i in range(len(scenes)):
sceneTermFreq.append(dict())
words = re.split("\W", scenesText[i])
for word in words:
if word != "":
if word in sceneTermFreq[i]:
sceneTermFreq[i][word] = sceneTermFreq[i][word] + 1
else:
sceneTermFreq[i][word] = 1
t1 = nlp(summaryLines[0])
t2 = nlp(scenesText[0])
#similarity = t1.similarity(t2)
#print (t1, "\n", t2, "\n", similarity)
"""
doc = nlp.make_doc(scenesText[0])
nlp.pipeline
for name, proc in nlp.pipeline:
doc = proc(doc)
print([(ent.text, ent.label_) for ent in doc.ents])
#for entity in t2.ents:
# print(entity.text, entity.label_)
"""
"""
for s in sceneTermFreq[0]:
term = nlp(s)
for entity in term.ents:
print(entity.text, entity.label_)
"""