Skip to content

Commit

Permalink
outliers filtering, ruv, some bugs fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
fdabbagh committed Mar 7, 2018
1 parent 3285396 commit 128fe62
Show file tree
Hide file tree
Showing 13 changed files with 331 additions and 90 deletions.
87 changes: 54 additions & 33 deletions app/server.r
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#2 when login again remove previous/next from second page (DONE! I think, will test for several cases later)
#3 remove the matrix and the output should be null again (Maybe use shinyjs::hide() then show again) (not really important now)
#4 Why correlation plot doesn't work (Haven't worked yet) maybe use do.call (Working now :-) DONE! )
#5 Do the batch effect with SVA and Supervised SVA (Almost Done!)
#5 Do the batch effect with SVA and Supervised SVA (Done!)
#6 maybe start splitting the server into chuncks inside the server folder and source the chuncks (almost DONE!)
#7 Check for shiny as a R package (it's doable, need to understand how packages are organized first)
#8 Having some global variables so I won't need to fetch from DeepBlue many times (DONE!)
Expand All @@ -20,22 +20,24 @@
#18 Fix the corrplot download (done!!)
#19 Score matrices and tables other than the first, lines don't need to be selected (Done!)
#20 Filter coverage files from the beginning with a check box (Done!)
#21 Make batch effect analysis matrices as a list
#21 Make batch effect analysis matrices as a list (Done!!)
#22 Notification to choose a chromosome (Done!)
#23 Error for PC selection (Done!)


# TODOs -------------------------------------------------------------------
#7 Check for shiny as a R package
#12 RUV for batch effect (from Markus' scripts to understand how it works)
#13 select expression, for gene expression data
#21 Make batch effect analysis matrices as a list
#22 I can't retrieve chromosomes anymore?
#13 select expression, for gene expression data (almost done!)
#24 Gene expression function (Same function for one gene then all genes)
#25 Gene expression ihec, map sample IDs and experiments name to get the final matrix
#27 Supervised SVA (use the method with the std of ranks)
#28 Add a place to choose outliers that can be used then in the RUV test and other tests



#Problems need to be solved
#1 Can't find chromosomes for each genome anymore (solved!!)
#2 I can't get the rna-seq for most of the projects
#3
#Problems
# Can't use the std of ranks for the control on score matrix because they have some negative values

#New UI with shiny dashboard
library(foreach)
Expand All @@ -48,7 +50,7 @@ library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(ggrepel)
library(stats)
library(stats)
library(plotly)
library(shiny)
library(shinyBS)
Expand Down Expand Up @@ -177,8 +179,8 @@ function(input, output, session) {
updateSelectInput(session, inputId = "interest_var_sva",
choices = c("",colnames(attr(filtered_score_matrix(), "meta"))))

updateSelectInput(session, inputId = "batch_supervised_sva",
choices = colnames(attr(filtered_score_matrix(), "meta")))
# updateSelectInput(session, inputId = "batch_supervised_sva",
# choices = colnames(attr(filtered_score_matrix(), "meta")))

updateSelectInput(session, inputId = "adj_var_supervised_sva",
choices = c("",colnames(attr(filtered_score_matrix(), "meta"))))
Expand All @@ -204,47 +206,64 @@ function(input, output, session) {
if(input$batch_effect_choice == "combat"){
temp_matrices <- batch_adjusted_matrices()
temp_matrices[[paste(input$batch_effect_choice, input$batch_combat,
input$adj_var_combat, input$interest_var_combat, collapse = "-")]]<-
input$adj_var_combat, input$interest_var_combat, sep = "-")]]<-
combat_batch_effect(filtered_score_matrix = filtered_score_matrix(),
batch = input$batch_combat,
outliers = input$outliers,
project = input$project,
adjustment_var = input$adj_var_combat,
interest_var = input$interest_var_combat)

batch_adjusted_matrices(temp_matrices)
# batch_adjusted_matrices[[paste(input$batch_effect_choice, input$batch_combat,
# input$adj_var_combat,
# input$interest_var_combat, collapse = "-")]] <-
#
# combat_batch_effect(filtered_score_matrix = filtered_score_matrix(),
# batch = input$batch_combat,
# adjustment_var = input$adj_var_combat,
# interest_var = input$interest_var_combat)


}else if(input$batch_effect_choice == "sva"){
temp_matrices <- batch_adjusted_matrices()

temp_matrices[[paste(input$batch_effect_choice, input$batch_sva,
input$adj_var_sva, input$interest_var_sva, collapse = "-")]]<-
input$adj_var_sva, input$interest_var_sva, sep = "-")]]<-

sva_batch_effect(filtered_score_matrix = filtered_score_matrix(),
adjustment_var = input$adj_var_sva,
outliers = input$outliers,
project = input$project,
interest_var = input$interest_var_sva)

batch_adjusted_matrices(temp_matrices)

# batch_adjusted_matrices[[paste(input$batch_effect_choice, input$batch_sva,
# input$adj_var_sva,
# input$interest_var_sva, collapse = "-")]] <-
#
# sva_batch_effect(filtered_score_matrix = filtered_score_matrix(),
# adjustment_var = input$adj_var_sva,
# interest_var = input$interest_var_sva)
}else if(input$batch_effect_choice == "supervised_sva"){
temp_matrices <- batch_adjusted_matrices()

temp_matrices[[paste(input$batch_effect_choice, input$batch_sva,
input$adj_var_supervised_sva, input$interest_var_supervised_sva, sep = "-")]]<-

supervised_sva_batch_effect(filtered_score_matrix = filtered_score_matrix(),
adjustment_var = input$adj_var_supervised_sva,
interest_var = input$interest_var_supervised_sva,
project = input$project,
outliers = input$outliers,
project = input$project)
}else{

temp_matrices <- batch_adjusted_matrices()

temp_matrices[["some_name_here"]] <-

ruv_batch_effect(filtered_score_matrix = filtered_score_matrix(),
outliers = input$outliers,
project = input$project,
regularization_par = input$regularization_par,
quantile_prob = input$quantile_prob,
k_rank = input$k_rank,
estimate_hkg = input$house_keeping_genes)
}
updateSelectInput(session, "corrected_matrices", choices = names(temp_matrices))
return(batch_adjusted_matrices())
})

})

#Matrix summary output
observeEvent(input$corrected_matrices,{
if(is.null(input$corrected_matrices)){
output$batch_matrix_summary <- NULL
Expand Down Expand Up @@ -292,11 +311,11 @@ function(input, output, session) {
tempdir = tempdir()
setwd(tempdir)

write.table(batch_adjusted_matrix(),
write.table(batch_adjusted_matrix()[[input$corrected_matrices]],
"adjusted_data_matrix.tsv", row.names = FALSE, col.names = TRUE,
sep = "\t")

write.table(attr(batch_adjusted_matrix(), "meta"),
write.table(attr(batch_adjusted_matrix()[[input$corrected_matrices]], "meta"),
"matrix_metadata.tsv",
sep = "\t", row.names = FALSE, col.names = TRUE)

Expand All @@ -315,6 +334,7 @@ function(input, output, session) {

plotly_pca <- plotly_pca(experiments_info_meta = experiments_info_meta(),
filtered_score_matrix = filtered_score_matrix(),
name = "Raw Matrix",
project = input$project,
type_of_score = input$type_of_score,
color_by = input$color_by,
Expand Down Expand Up @@ -381,7 +401,8 @@ function(input, output, session) {
#get plot

plot_pca_batch <- plotly_pca <- plotly_pca(experiments_info_meta = experiments_info_meta(),
filtered_score_matrix = batch_adjusted_matrix(),
filtered_score_matrix = batch_adjusted_matrix()[[input$corrected_matrices]],
name = input$corrected_matrices,
project = input$project,
type_of_score = input$type_of_score,
color_by = input$color_by_batch,
Expand Down Expand Up @@ -442,7 +463,7 @@ function(input, output, session) {
show_legend = TRUE)

p2 <- plotly_pca(experiments_info_meta = experiments_info_meta(),
filtered_score_matrix = batch_adjusted_matrix(),
filtered_score_matrix = batch_adjusted_matrix()[[input$corrected_matrices]],
project = input$project,
type_of_score = input$type_of_score,
color_by = input$color_by_batch,
Expand Down
44 changes: 39 additions & 5 deletions app/ui.r
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
DT::dataTableOutput("summary"),

size = "large")

),

# score matrix tab------------------------------------------------------------
Expand Down Expand Up @@ -118,7 +117,7 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
choices = c("1000 base pairs" = "1000",
"5000 base pairs" = "5000"),
selected = "5000")),
selectInput('chr', 'Select a Chromosome, or leave empty for the whole genome', c("")),
selectInput('chr', 'Select a Chromosome, or leave empty for the whole genome', c(""), multiple = TRUE),
# tags$div(id = 'addChromosomes'),
actionButton("show_input_check", "Calculate Score matrix")

Expand All @@ -130,7 +129,15 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
c("min", "max", "sum", "mean", "var", "sd", "median", "count", "boolean"),
selected = "mean"),

numericInput("variance","row variation cutoff", value = "0.05"),
radioButtons("filter_rows", "Choose filtering method",
choices = c("Filter out non-complete rows" = "filter_non_complete",
"Filter using Row Variation Cutoff" = "row_variation")),
conditionalPanel(
condition = "input.filter_rows == 'row_variation'",
numericInput("variance","row variation cutoff", value = "0.05")

),

downloadButton("downloadMatrix", "Download Data")

),
Expand Down Expand Up @@ -222,7 +229,9 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
radioButtons("batch_effect_choice", "Choose method to calculate scoring matrix",
choices = c("ComBat" = "combat",
"SVA" = "sva",
"Supervised SVA" = "supervised_sva")),
"Supervised SVA" = "supervised_sva",
"RUV" = "ruv")),
selectInput("outliers", "Select Outliers (Optional)", choices = c(""),multiple = TRUE),
bsButton("batch_pie",label = "", icon = icon("pie-chart"), style = "default", size = "default"),
bsModal("batch_pie_chart", "Values for Key chosen", "batch_pie",size = "large",
fluidPage(
Expand All @@ -249,10 +258,35 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
conditionalPanel(
condition = "input.batch_effect_choice == 'supervised_sva'",

selectInput('batch_supervised_sva', "Select a batch to adjust for",c("")),
# selectInput('batch_supervised_sva', "Select a batch to adjust for",c("")),
selectInput("adj_var_supervised_sva", "Adjustment Variable",c("")),
selectInput("interest_var_supervised_sva", "Variable of interest", c(""))
),
conditionalPanel(
condition = "input.batch_effect_choice == 'ruv'",

numericInput("regularization_par",
"Regularization parameter for the unwanted variation (nu.coeff)", value = 0.00001),

radioButtons("house_keeping_genes", "Method to estimate House Keeping Genes",
choices = c("Estimate using rank analysis" = "estimate_hkg",
"Use a predefined list" = "use_hkg_list")),

conditionalPanel(
condition = "input.house_keeping_genes == 'estimate_hkg'",

numericInput("regularization_par",
"Regularization parameter for the unwanted variation",
value = 0.00001),

numericInput("quantile_prob",
"Quantile probability used in the rank analysis estimation [0,1]",
value = 0.005, min = 0, max = 1),

numericInput("k_rank", "Desired rank for the estimated unwanted variation term",
value = 5)
)
),
downloadButton("downloadAdjustedMatrix", "Download Data")
)
),
Expand Down
31 changes: 25 additions & 6 deletions functions/combat_batch_effect.r
Original file line number Diff line number Diff line change
@@ -1,26 +1,45 @@
combat_batch_effect <- function(filtered_score_matrix,
batch,
adjustment_var,
outliers,
project,
interest_var){


if(project == "TEPIC reprocessed IHEC data"){
outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ",
"R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY",
"R_ENCBS853LFM","E_520VFV"))
}
# filtered_matrix <- filtered_score_matrix$data
metadata <- attr(filtered_score_matrix, "meta")

validate(
need(!anyNA(metadata[,batch]), message = paste(batch, "has NAs and cannot be used in combat")),

need(nlevels(as.factor(metadata[,batch])) > 1, message = paste(batch,"has less than 2 level",
"check levels using the pie chart"))
)

row_sum<-apply(filtered_score_matrix,1,sum)
if(!length(which(row_sum == 0)) == 0){
filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),]
}
if(outliers != ""){
filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))]
metadata <- metadata[-match(outliers, metadata[,"experiment"]),]

}

#validation of the inptus
#The variable selected should have more than 1 level
if(adjustment_var == ""){
adjustment_var = NULL
}else{
for (adj_var in adjustment_var){
validate(
need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model"))
)
need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model")),

validate(
need(nlevels(as.factor(metadata[,adj_var])) > 1, message = paste(adj_var,"has less than 2 level",
"check levels using the pie chart"))
)
Expand All @@ -33,9 +52,8 @@ combat_batch_effect <- function(filtered_score_matrix,
for (inter_var in interest_var){

validate(
need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used to make the model"))
)
validate(
need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used to make the model")),

need(nlevels(as.factor(metadata[,inter_var])) > 1, message = paste(inter_var,"has less than 2 level",
"check levels using the pie chart"))
)
Expand Down Expand Up @@ -64,6 +82,7 @@ combat_batch_effect <- function(filtered_score_matrix,
}
}


batch_adjusted_matrix <- ComBat(dat=filtered_score_matrix,
batch=as.double(as.factor(metadata[,batch])),
mod=modcombat, par.prior=TRUE, prior.plots=FALSE)
Expand Down
2 changes: 1 addition & 1 deletion functions/inserted_ui.r
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ inserted_ui <- function(user_key,user_name,echo, id, genomes) {
selectInput('project', 'Select Project',choices = ""),
selectInput('epigenetic_mark', 'Select a Data Type',
c("Gene Expression",deepblue_list_epigenetic_marks(user_key = user_key)$name),
multiple = TRUE),
multiple = F),

actionButton('list_exper_btn', "List Experiments"),

Expand Down
Loading

0 comments on commit 128fe62

Please sign in to comment.