Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import extractScriptEntities
import extractSummaryEntities
import spacy
import numpy as np
import gensim
from sklearn import preprocessing
from gensim.models import Word2Vec
"""
import gensim
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem.snowball import SnowballStemmer
"""
#nlp = spacy.load('en')
"""
scenes = extractScriptEntities.extractScriptEntities('script.xml')
summarySentences = extractSummaryEntities.extractSummaryEntities('wikiplot.txt')
lemmatizer = WordNetLemmatizer()
stemmer = SnowballStemmer("english")
words = word_tokenize('He is incarcerated. He is in incarceration')
lemmas = [lemmatizer.lemmatize(word, pos = 'v') for word in words]
stems = [stemmer.stem(word) for word in words]
print (lemmas)
print(stems)
"""
"""
doc1 = nlp('dog')
doc2 = nlp('murder')
print(doc1, doc2)
print(doc1.similarity(doc2))
#print(doc1.vector)
print((np.dot(doc1.vector/np.linalg.norm(doc1.vector), doc2.vector/np.linalg.norm(doc2.vector))))
"""
script = open('script.txt')
sentences = [sentence for sentence in script]
splitSentences = [sentence.split() for sentence in sentences]
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary = True)
#model = Word2Vec(splitSentences, min_count = 1)
#word_vectors = model.wv
#print('American' in word_vectors.vocab)
#model.train(splitSentences,total_examples = len(splitSentences), epochs = 1)
print(2)
try:
print(model.similarity('need', 'want'))
print(model.similarity('cat', 'dog'))
print(model.similarity('murder', 'kill'))
print(model.similarity('gun', 'kill'))
print(model.similarity('interview', 'interviewing'))
print(model.similarity('cat', 'murder'))
except:
print(3)