This repository has been archived by the owner. It is now read-only.
Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ReptilePallium/Data_filtering/SVM_data_filtering.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
74 lines (52 sloc)
2.44 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(e1017) | |
library(Seurat) | |
load("161105_turtle_all_.Robj") # R object with all sequenced cells (unfiltered) | |
data.info<-turtle@data.info[,c(1:2, 8:12)] # create a data frame containing metadata | |
colnames(data.info) | |
# [1] "nGene" "nUMI" "percent.mito" "pCODING" "pUTR" | |
# [6] "pINTRONIC" "pINTERGENIC" "nReads" | |
names <- rownames(turtle@data.info) | |
bad.names<-read.table("161105_bad_names.txt") # names of the cells in the bad quality cluster (cluster 6) | |
good.names<-names[!names %in% bad.names] | |
# select cells and data to train the classifier | |
train.bad.names<-sample(bad.names,100) # selects randomly 100 bad cells for training set | |
train.good.names<-sample(good.names,100) # selects randomly 100 good cells for training set | |
train.bad<-data.info[train.bad.names,] | |
train.good<-data.info[train.good.names,] | |
train<-rbind(train.good,train.bad) | |
ytrain <- matrix(c(rep(1,100),rep(-1,100))) | |
train.final = data.frame(train, ytrain=as.factor(ytrain)) | |
# train the classifier | |
svmfit=svm(ytrain~., data=train.final, kernel="linear", cost=0.1, scale=F) | |
set.seed (1) | |
tune.out=tune(svm,ytrain~.,data=train.final,kernel="linear", ranges=list(cost=c(0.1,1,10,100, 500))) | |
summary(tune.out) | |
# this is choosing which is the best cost for the svm | |
bestmod=tune.out$best.model | |
summary(bestmod) | |
# build the test dataset | |
test.bad.names<-bad.names[!bad.names %in% train.bad.names] | |
test.good.names<-good.names[!good.names %in% train.good.names] | |
test.bad<-data.info[test.bad.names,] | |
test.good<-data.info[test.good.names,] | |
test<-rbind(test.good,test.bad) | |
test<-data.info[!rownames(data.info)%in% rownames(train),] | |
test<-as.matrix(test) | |
ytest<-matrix(c(rep(1,length(test.good.names)),rep(-1,length(test.bad.names)))) | |
test.final = data.frame(test, ytest=as.factor(ytest)) | |
ypred=predict(bestmod,test.final) | |
table(predict=ypred, truth=test.final$ytest) | |
ypred<-as.data.frame(ypred) | |
# add SVM classification to the seurat object | |
turtle = AddMetaData(turtle, ypred) | |
write.table(ypred, "161106_ypred_final.txt") | |
pred<-as.data.frame(turtle@data.info$ypred) | |
rownames(pred)<-rownames(turtle@data.info) | |
pred[train.bad.names,]<-"-1" | |
pred[train.good.names,]<-"1" | |
colnames(pred)<-c("svm_class") | |
turtle = AddMetaData(turtle, pred) | |
# now the turtle_all object has a svm_class column in the data.info slot, where the +1 and -1 classes are assigned also to the cells used for training. | |
pdf("SVM_classification.pdf") | |
DimPlot(turtle, reduction.use="tsne", group.by="svm_class", pt.size=0.7) | |
dev.off() |