Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
update file
  • Loading branch information
MPIBR-toschesm committed May 1, 2018
1 parent 652f6c8 commit 50f1afc
Showing 1 changed file with 74 additions and 0 deletions.
74 changes: 74 additions & 0 deletions Preprocessing_quality_control/SVM_data_filtering.R
@@ -0,0 +1,74 @@

library(e1017)
library(Seurat)

load("161105_turtle_all_.Robj") # R object with all sequenced cells (unfiltered)

data.info<-turtle@data.info[,c(1:2, 8:12)] # create a data frame containing metadata
colnames(data.info)
# [1] "nGene" "nUMI" "percent.mito" "pCODING" "pUTR"
# [6] "pINTRONIC" "pINTERGENIC" "nReads"

names <- rownames(turtle@data.info)
bad.names<-read.table("161105_bad_names.txt") # names of the cells in the bad quality cluster (cluster 6)
good.names<-names[!names %in% bad.names]

# select cells and data to train the classifier

train.bad.names<-sample(bad.names,100) # selects randomly 100 bad cells for training set
train.good.names<-sample(good.names,100) # selects randomly 100 good cells for training set

train.bad<-data.info[train.bad.names,]
train.good<-data.info[train.good.names,]
train<-rbind(train.good,train.bad)
ytrain <- matrix(c(rep(1,100),rep(-1,100)))

train.final = data.frame(train, ytrain=as.factor(ytrain))

# train the classifier

svmfit=svm(ytrain~., data=train.final, kernel="linear", cost=0.1, scale=F)

set.seed (1)
tune.out=tune(svm,ytrain~.,data=train.final,kernel="linear", ranges=list(cost=c(0.1,1,10,100, 500)))
summary(tune.out)
# this is choosing which is the best cost for the svm

bestmod=tune.out$best.model
summary(bestmod)

# build the test dataset

test.bad.names<-bad.names[!bad.names %in% train.bad.names]
test.good.names<-good.names[!good.names %in% train.good.names]
test.bad<-data.info[test.bad.names,]
test.good<-data.info[test.good.names,]
test<-rbind(test.good,test.bad)
test<-data.info[!rownames(data.info)%in% rownames(train),]

test<-as.matrix(test)
ytest<-matrix(c(rep(1,length(test.good.names)),rep(-1,length(test.bad.names))))

test.final = data.frame(test, ytest=as.factor(ytest))

ypred=predict(bestmod,test.final)
table(predict=ypred, truth=test.final$ytest)

ypred<-as.data.frame(ypred)

# add SVM classification to the seurat object
turtle = AddMetaData(turtle, ypred)

write.table(ypred, "161106_ypred_final.txt")

pred<-as.data.frame(turtle@data.info$ypred)
rownames(pred)<-rownames(turtle@data.info)
pred[train.bad.names,]<-"-1"
pred[train.good.names,]<-"1"
colnames(pred)<-c("svm_class")
turtle = AddMetaData(turtle, pred)
# now the turtle_all object has a svm_class column in the data.info slot, where the +1 and -1 classes are assigned also to the cells used for training.

pdf("SVM_classification.pdf")
DimPlot(turtle, reduction.use="tsne", group.by="svm_class", pt.size=0.7)
dev.off()

0 comments on commit 50f1afc

Please sign in to comment.