This repository has been archived by the owner. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
update file
- Loading branch information
1 parent
652f6c8
commit 50f1afc
Showing
1 changed file
with
74 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
|
||
library(e1017) | ||
library(Seurat) | ||
|
||
load("161105_turtle_all_.Robj") # R object with all sequenced cells (unfiltered) | ||
|
||
data.info<-turtle@data.info[,c(1:2, 8:12)] # create a data frame containing metadata | ||
colnames(data.info) | ||
# [1] "nGene" "nUMI" "percent.mito" "pCODING" "pUTR" | ||
# [6] "pINTRONIC" "pINTERGENIC" "nReads" | ||
|
||
names <- rownames(turtle@data.info) | ||
bad.names<-read.table("161105_bad_names.txt") # names of the cells in the bad quality cluster (cluster 6) | ||
good.names<-names[!names %in% bad.names] | ||
|
||
# select cells and data to train the classifier | ||
|
||
train.bad.names<-sample(bad.names,100) # selects randomly 100 bad cells for training set | ||
train.good.names<-sample(good.names,100) # selects randomly 100 good cells for training set | ||
|
||
train.bad<-data.info[train.bad.names,] | ||
train.good<-data.info[train.good.names,] | ||
train<-rbind(train.good,train.bad) | ||
ytrain <- matrix(c(rep(1,100),rep(-1,100))) | ||
|
||
train.final = data.frame(train, ytrain=as.factor(ytrain)) | ||
|
||
# train the classifier | ||
|
||
svmfit=svm(ytrain~., data=train.final, kernel="linear", cost=0.1, scale=F) | ||
|
||
set.seed (1) | ||
tune.out=tune(svm,ytrain~.,data=train.final,kernel="linear", ranges=list(cost=c(0.1,1,10,100, 500))) | ||
summary(tune.out) | ||
# this is choosing which is the best cost for the svm | ||
|
||
bestmod=tune.out$best.model | ||
summary(bestmod) | ||
|
||
# build the test dataset | ||
|
||
test.bad.names<-bad.names[!bad.names %in% train.bad.names] | ||
test.good.names<-good.names[!good.names %in% train.good.names] | ||
test.bad<-data.info[test.bad.names,] | ||
test.good<-data.info[test.good.names,] | ||
test<-rbind(test.good,test.bad) | ||
test<-data.info[!rownames(data.info)%in% rownames(train),] | ||
|
||
test<-as.matrix(test) | ||
ytest<-matrix(c(rep(1,length(test.good.names)),rep(-1,length(test.bad.names)))) | ||
|
||
test.final = data.frame(test, ytest=as.factor(ytest)) | ||
|
||
ypred=predict(bestmod,test.final) | ||
table(predict=ypred, truth=test.final$ytest) | ||
|
||
ypred<-as.data.frame(ypred) | ||
|
||
# add SVM classification to the seurat object | ||
turtle = AddMetaData(turtle, ypred) | ||
|
||
write.table(ypred, "161106_ypred_final.txt") | ||
|
||
pred<-as.data.frame(turtle@data.info$ypred) | ||
rownames(pred)<-rownames(turtle@data.info) | ||
pred[train.bad.names,]<-"-1" | ||
pred[train.good.names,]<-"1" | ||
colnames(pred)<-c("svm_class") | ||
turtle = AddMetaData(turtle, pred) | ||
# now the turtle_all object has a svm_class column in the data.info slot, where the +1 and -1 classes are assigned also to the cells used for training. | ||
|
||
pdf("SVM_classification.pdf") | ||
DimPlot(turtle, reduction.use="tsne", group.by="svm_class", pt.size=0.7) | ||
dev.off() |