Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
5b864a2edb
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
74 lines (52 sloc) 2.44 KB
library(e1017)
library(Seurat)
load("161105_turtle_all_.Robj") # R object with all sequenced cells (unfiltered)
data.info<-turtle@data.info[,c(1:2, 8:12)] # create a data frame containing metadata
colnames(data.info)
# [1] "nGene" "nUMI" "percent.mito" "pCODING" "pUTR"
# [6] "pINTRONIC" "pINTERGENIC" "nReads"
names <- rownames(turtle@data.info)
bad.names<-read.table("161105_bad_names.txt") # names of the cells in the bad quality cluster (cluster 6)
good.names<-names[!names %in% bad.names]
# select cells and data to train the classifier
train.bad.names<-sample(bad.names,100) # selects randomly 100 bad cells for training set
train.good.names<-sample(good.names,100) # selects randomly 100 good cells for training set
train.bad<-data.info[train.bad.names,]
train.good<-data.info[train.good.names,]
train<-rbind(train.good,train.bad)
ytrain <- matrix(c(rep(1,100),rep(-1,100)))
train.final = data.frame(train, ytrain=as.factor(ytrain))
# train the classifier
svmfit=svm(ytrain~., data=train.final, kernel="linear", cost=0.1, scale=F)
set.seed (1)
tune.out=tune(svm,ytrain~.,data=train.final,kernel="linear", ranges=list(cost=c(0.1,1,10,100, 500)))
summary(tune.out)
# this is choosing which is the best cost for the svm
bestmod=tune.out$best.model
summary(bestmod)
# build the test dataset
test.bad.names<-bad.names[!bad.names %in% train.bad.names]
test.good.names<-good.names[!good.names %in% train.good.names]
test.bad<-data.info[test.bad.names,]
test.good<-data.info[test.good.names,]
test<-rbind(test.good,test.bad)
test<-data.info[!rownames(data.info)%in% rownames(train),]
test<-as.matrix(test)
ytest<-matrix(c(rep(1,length(test.good.names)),rep(-1,length(test.bad.names))))
test.final = data.frame(test, ytest=as.factor(ytest))
ypred=predict(bestmod,test.final)
table(predict=ypred, truth=test.final$ytest)
ypred<-as.data.frame(ypred)
# add SVM classification to the seurat object
turtle = AddMetaData(turtle, ypred)
write.table(ypred, "161106_ypred_final.txt")
pred<-as.data.frame(turtle@data.info$ypred)
rownames(pred)<-rownames(turtle@data.info)
pred[train.bad.names,]<-"-1"
pred[train.good.names,]<-"1"
colnames(pred)<-c("svm_class")
turtle = AddMetaData(turtle, pred)
# now the turtle_all object has a svm_class column in the data.info slot, where the +1 and -1 classes are assigned also to the cells used for training.
pdf("SVM_classification.pdf")
DimPlot(turtle, reduction.use="tsne", group.by="svm_class", pt.size=0.7)
dev.off()