From 128fe6284c6bcc83408c5d45cd5a49c3eabc70d8 Mon Sep 17 00:00:00 2001
From: Fawaz Dabbaghieh <fdabbagh@mpi-inf.mpg.de>
Date: Wed, 7 Mar 2018 02:14:25 +0100
Subject: [PATCH] outliers filtering, ruv, some bugs fixed

---
 app/server.r                                  | 87 ++++++++++++-------
 app/ui.r                                      | 44 ++++++++--
 functions/combat_batch_effect.r               | 31 +++++--
 functions/inserted_ui.r                       |  2 +-
 functions/plotly_pca.r                        | 21 ++++-
 functions/plotting_rna_seq_ontology_compare.r | 16 ++++
 functions/ruv_batch_effect.r                  | 78 +++++++++++++++++
 functions/score_matrix_annotations.r          | 25 ++++--
 functions/score_matrix_tiling_regions.r       | 31 ++++---
 functions/supervised_sva_batch_effect.r       | 48 +++++++---
 functions/sva_batch_effect.r                  | 27 ++++--
 server/calculate_score_matrix.r               |  5 ++
 server/listing_genomes.r                      |  6 +-
 13 files changed, 331 insertions(+), 90 deletions(-)
 create mode 100644 functions/plotting_rna_seq_ontology_compare.r
 create mode 100644 functions/ruv_batch_effect.r

diff --git a/app/server.r b/app/server.r
index c8ddb2f..fb5ae86 100644
--- a/app/server.r
+++ b/app/server.r
@@ -4,7 +4,7 @@
 #2 when login again remove previous/next from second page (DONE! I think, will test for several cases later)
 #3 remove the matrix and the output should be null again (Maybe use shinyjs::hide() then show again) (not really important now)
 #4 Why correlation plot doesn't work (Haven't worked yet) maybe use do.call (Working now :-) DONE! )
-#5 Do the batch effect with SVA and Supervised SVA (Almost Done!)
+#5 Do the batch effect with SVA and Supervised SVA (Done!)
 #6 maybe start splitting the server into chuncks inside the server folder and source the chuncks (almost DONE!)
 #7 Check for shiny as a R package (it's doable, need to understand how packages are organized first)
 #8 Having some global variables so I won't need to fetch from DeepBlue many times (DONE!)
@@ -20,22 +20,24 @@
 #18 Fix the corrplot download (done!!)
 #19 Score matrices and tables other than the first, lines don't need to be selected (Done!)
 #20 Filter coverage files from the beginning with a check box (Done!)
-#21 Make batch effect analysis matrices as a list
+#21 Make batch effect analysis matrices as a list (Done!!)
 #22 Notification to choose a chromosome (Done!)
+#23 Error for PC selection (Done!)
 
 
 # TODOs -------------------------------------------------------------------
 #7 Check for shiny as a R package 
 #12 RUV for batch effect (from Markus' scripts to understand how it works)
-#13 select expression, for gene expression data
-#21 Make batch effect analysis matrices as a list
-#22 I can't retrieve chromosomes anymore?
+#13 select expression, for gene expression data (almost done!)
+#24 Gene expression function (Same function for one gene then all genes)
+#25 Gene expression ihec, map sample IDs and experiments name to get the final matrix
+#27 Supervised SVA (use the method with the std of ranks)
+#28 Add a place to choose outliers that can be used then in the RUV test and other tests
+
 
 
-#Problems need to be solved
-#1 Can't find chromosomes for each genome anymore (solved!!)
-#2 I can't get the rna-seq for most of the projects
-#3
+#Problems
+# Can't use the std of ranks for the control on score matrix because they have some negative values
 
 #New UI with shiny dashboard
 library(foreach)
@@ -48,7 +50,7 @@ library(dplyr)
 library(ggplot2)
 library(RColorBrewer)
 library(ggrepel)
-library(stats)
+library(stats) 
 library(plotly)
 library(shiny)
 library(shinyBS)
@@ -177,8 +179,8 @@ function(input, output, session) {
     updateSelectInput(session, inputId = "interest_var_sva",
                       choices = c("",colnames(attr(filtered_score_matrix(), "meta"))))
     
-    updateSelectInput(session, inputId = "batch_supervised_sva",
-                      choices = colnames(attr(filtered_score_matrix(), "meta")))
+    # updateSelectInput(session, inputId = "batch_supervised_sva",
+    #                   choices = colnames(attr(filtered_score_matrix(), "meta")))
     
     updateSelectInput(session, inputId = "adj_var_supervised_sva",
                       choices = c("",colnames(attr(filtered_score_matrix(), "meta"))))
@@ -204,47 +206,64 @@ function(input, output, session) {
       if(input$batch_effect_choice == "combat"){
         temp_matrices <- batch_adjusted_matrices()
         temp_matrices[[paste(input$batch_effect_choice, input$batch_combat,
-                           input$adj_var_combat, input$interest_var_combat, collapse = "-")]]<-
+                           input$adj_var_combat, input$interest_var_combat, sep = "-")]]<-
             combat_batch_effect(filtered_score_matrix = filtered_score_matrix(),
                                 batch = input$batch_combat,
+                                outliers = input$outliers,
+                                project = input$project,
                                 adjustment_var = input$adj_var_combat,
                                 interest_var = input$interest_var_combat)
         
         batch_adjusted_matrices(temp_matrices)
-        # batch_adjusted_matrices[[paste(input$batch_effect_choice, input$batch_combat,
-        #                                  input$adj_var_combat,
-        #                                  input$interest_var_combat, collapse = "-")]] <- 
-        #   
-        #   combat_batch_effect(filtered_score_matrix = filtered_score_matrix(),
-        #                       batch = input$batch_combat,
-        #                       adjustment_var = input$adj_var_combat,
-        #                       interest_var = input$interest_var_combat)
+
         
       }else if(input$batch_effect_choice == "sva"){
         temp_matrices <- batch_adjusted_matrices()
         
         temp_matrices[[paste(input$batch_effect_choice, input$batch_sva,
-                             input$adj_var_sva, input$interest_var_sva, collapse = "-")]]<-
+                             input$adj_var_sva, input$interest_var_sva, sep = "-")]]<-
           
           sva_batch_effect(filtered_score_matrix = filtered_score_matrix(),
                            adjustment_var = input$adj_var_sva,
+                           outliers = input$outliers,
+                           project = input$project,
                            interest_var = input$interest_var_sva)
         
         batch_adjusted_matrices(temp_matrices)
         
-        # batch_adjusted_matrices[[paste(input$batch_effect_choice, input$batch_sva,
-        #                                input$adj_var_sva,
-        #                                input$interest_var_sva, collapse = "-")]] <-
-        #   
-        #   sva_batch_effect(filtered_score_matrix = filtered_score_matrix(),
-        #                    adjustment_var = input$adj_var_sva,
-        #                    interest_var = input$interest_var_sva)
+      }else if(input$batch_effect_choice == "supervised_sva"){
+        temp_matrices <- batch_adjusted_matrices()
+        
+        temp_matrices[[paste(input$batch_effect_choice, input$batch_sva,
+                             input$adj_var_supervised_sva, input$interest_var_supervised_sva, sep = "-")]]<-
+          
+          supervised_sva_batch_effect(filtered_score_matrix = filtered_score_matrix(),
+                                      adjustment_var = input$adj_var_supervised_sva,
+                                      interest_var = input$interest_var_supervised_sva,
+                                      project = input$project,
+                                      outliers = input$outliers,
+                                      project = input$project)
+      }else{
+        
+        temp_matrices <- batch_adjusted_matrices()
+        
+        temp_matrices[["some_name_here"]] <-
+          
+          ruv_batch_effect(filtered_score_matrix = filtered_score_matrix(),
+                           outliers = input$outliers,
+                           project = input$project,
+                           regularization_par = input$regularization_par,
+                           quantile_prob = input$quantile_prob,
+                           k_rank = input$k_rank,
+                           estimate_hkg = input$house_keeping_genes)
       }
       updateSelectInput(session, "corrected_matrices", choices = names(temp_matrices))
       return(batch_adjusted_matrices())
     })
 
   })
+  
+  #Matrix summary output
   observeEvent(input$corrected_matrices,{
     if(is.null(input$corrected_matrices)){
       output$batch_matrix_summary <- NULL
@@ -292,11 +311,11 @@ function(input, output, session) {
       tempdir = tempdir()
       setwd(tempdir)
       
-      write.table(batch_adjusted_matrix(),
+      write.table(batch_adjusted_matrix()[[input$corrected_matrices]],
                   "adjusted_data_matrix.tsv", row.names = FALSE, col.names = TRUE,
                   sep = "\t")
       
-      write.table(attr(batch_adjusted_matrix(), "meta"),
+      write.table(attr(batch_adjusted_matrix()[[input$corrected_matrices]], "meta"),
                   "matrix_metadata.tsv",
                   sep = "\t", row.names = FALSE, col.names = TRUE)
       
@@ -315,6 +334,7 @@ function(input, output, session) {
     
     plotly_pca <- plotly_pca(experiments_info_meta = experiments_info_meta(),
                              filtered_score_matrix = filtered_score_matrix(),
+                             name = "Raw Matrix",
                              project = input$project,
                              type_of_score = input$type_of_score,
                              color_by = input$color_by,
@@ -381,7 +401,8 @@ function(input, output, session) {
     #get plot
     
     plot_pca_batch <- plotly_pca <- plotly_pca(experiments_info_meta = experiments_info_meta(),
-                                               filtered_score_matrix = batch_adjusted_matrix(),
+                                               filtered_score_matrix = batch_adjusted_matrix()[[input$corrected_matrices]],
+                                               name = input$corrected_matrices,
                                                project = input$project,
                                                type_of_score = input$type_of_score,
                                                color_by = input$color_by_batch,
@@ -442,7 +463,7 @@ function(input, output, session) {
                      show_legend = TRUE)
     
     p2 <- plotly_pca(experiments_info_meta = experiments_info_meta(),
-                     filtered_score_matrix = batch_adjusted_matrix(),
+                     filtered_score_matrix = batch_adjusted_matrix()[[input$corrected_matrices]],
                      project = input$project,
                      type_of_score = input$type_of_score,
                      color_by = input$color_by_batch,
diff --git a/app/ui.r b/app/ui.r
index 98c6dea..8eadc2d 100644
--- a/app/ui.r
+++ b/app/ui.r
@@ -87,7 +87,6 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
                       DT::dataTableOutput("summary"),
                       
                       size = "large")
-              
               ),
 
 # score matrix tab------------------------------------------------------------
@@ -118,7 +117,7 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
                                                  choices = c("1000 base pairs" = "1000",
                                                              "5000 base pairs" = "5000"),
                                                  selected = "5000")),
-                                  selectInput('chr', 'Select a Chromosome, or leave empty for the whole genome', c("")),
+                                  selectInput('chr', 'Select a Chromosome, or leave empty for the whole genome', c(""), multiple = TRUE),
                                   # tags$div(id = 'addChromosomes'),
                                   actionButton("show_input_check", "Calculate Score matrix")
                                   
@@ -130,7 +129,15 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
                                               c("min", "max", "sum", "mean", "var", "sd", "median", "count", "boolean"),
                                               selected = "mean"),
                                   
-                                  numericInput("variance","row variation cutoff", value = "0.05"),
+                                  radioButtons("filter_rows", "Choose filtering method",
+                                               choices = c("Filter out non-complete rows" = "filter_non_complete",
+                                                           "Filter using Row Variation Cutoff" = "row_variation")),
+                                  conditionalPanel(
+                                    condition = "input.filter_rows == 'row_variation'",
+                                    numericInput("variance","row variation cutoff", value = "0.05")
+                                    
+                                  ),
+                                  
                                   downloadButton("downloadMatrix", "Download Data")
                                   
                                   ),
@@ -222,7 +229,9 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
                                     radioButtons("batch_effect_choice", "Choose method to calculate scoring matrix",
                                                  choices = c("ComBat" = "combat",
                                                              "SVA" = "sva",
-                                                             "Supervised SVA" = "supervised_sva")),
+                                                             "Supervised SVA" = "supervised_sva",
+                                                             "RUV" = "ruv")),
+                                    selectInput("outliers", "Select Outliers (Optional)", choices = c(""),multiple = TRUE),
                                     bsButton("batch_pie",label = "", icon = icon("pie-chart"), style = "default", size = "default"),
                                     bsModal("batch_pie_chart", "Values for Key chosen", "batch_pie",size = "large",
                                             fluidPage(
@@ -249,10 +258,35 @@ dashboardPage(title = "Batch Effect Analysis and Visualization",
                                     conditionalPanel(
                                       condition = "input.batch_effect_choice == 'supervised_sva'",
                                       
-                                      selectInput('batch_supervised_sva', "Select a batch to adjust for",c("")),
+                                      # selectInput('batch_supervised_sva', "Select a batch to adjust for",c("")),
                                       selectInput("adj_var_supervised_sva", "Adjustment Variable",c("")),
                                       selectInput("interest_var_supervised_sva", "Variable of interest", c(""))
                                     ),
+                                    conditionalPanel(
+                                      condition = "input.batch_effect_choice == 'ruv'",
+                                      
+                                      numericInput("regularization_par",
+                                                   "Regularization parameter for the unwanted variation (nu.coeff)", value = 0.00001),
+                                      
+                                      radioButtons("house_keeping_genes", "Method to estimate House Keeping Genes",
+                                                   choices = c("Estimate using rank analysis" = "estimate_hkg",
+                                                               "Use a predefined list" = "use_hkg_list")),
+                                      
+                                      conditionalPanel(
+                                        condition = "input.house_keeping_genes == 'estimate_hkg'",
+                                        
+                                        numericInput("regularization_par",
+                                                     "Regularization parameter for the unwanted variation",
+                                                     value = 0.00001),
+                                        
+                                        numericInput("quantile_prob",
+                                                     "Quantile probability used in the rank analysis estimation [0,1]",
+                                                     value = 0.005, min = 0, max = 1),
+                                        
+                                        numericInput("k_rank", "Desired rank for the estimated unwanted variation term",
+                                                     value = 5)
+                                      )
+                                    ),
                                     downloadButton("downloadAdjustedMatrix", "Download Data")
                 )
               ),
diff --git a/functions/combat_batch_effect.r b/functions/combat_batch_effect.r
index 89f2d2f..56d37b6 100644
--- a/functions/combat_batch_effect.r
+++ b/functions/combat_batch_effect.r
@@ -1,15 +1,36 @@
 combat_batch_effect <- function(filtered_score_matrix,
                                 batch,
                                 adjustment_var,
+                                outliers,
+                                project,
                                 interest_var){
   
   
+  if(project == "TEPIC reprocessed IHEC data"){
+    outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ",
+                         "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY",
+                         "R_ENCBS853LFM","E_520VFV"))
+  }
   # filtered_matrix <- filtered_score_matrix$data
   metadata <- attr(filtered_score_matrix, "meta")
+  
   validate(
+    need(!anyNA(metadata[,batch]), message = paste(batch, "has NAs and cannot be used in combat")),
+    
     need(nlevels(as.factor(metadata[,batch])) > 1, message = paste(batch,"has less than 2 level",
                                                         "check levels using the pie chart"))
   )
+  
+  row_sum<-apply(filtered_score_matrix,1,sum)
+  if(!length(which(row_sum == 0)) == 0){
+    filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),]
+  }
+  if(outliers != ""){
+    filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))]
+    metadata <- metadata[-match(outliers, metadata[,"experiment"]),]
+    
+  }
+  
   #validation of the inptus
   #The variable selected should have more than 1 level
   if(adjustment_var == ""){
@@ -17,10 +38,8 @@ combat_batch_effect <- function(filtered_score_matrix,
   }else{
     for (adj_var in adjustment_var){
       validate(
-        need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model"))
-      )
+        need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model")),
       
-      validate(
         need(nlevels(as.factor(metadata[,adj_var])) > 1, message = paste(adj_var,"has less than 2 level",
                                                         "check levels using the pie chart"))
       )
@@ -33,9 +52,8 @@ combat_batch_effect <- function(filtered_score_matrix,
     for (inter_var in interest_var){
       
       validate(
-        need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used to make the model"))
-      )
-      validate(
+        need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used to make the model")),
+
         need(nlevels(as.factor(metadata[,inter_var])) > 1, message = paste(inter_var,"has less than 2 level",
                                                         "check levels using the pie chart"))
       )
@@ -64,6 +82,7 @@ combat_batch_effect <- function(filtered_score_matrix,
     }
   }
   
+  
   batch_adjusted_matrix <- ComBat(dat=filtered_score_matrix,
                                   batch=as.double(as.factor(metadata[,batch])),
                                   mod=modcombat, par.prior=TRUE, prior.plots=FALSE)
diff --git a/functions/inserted_ui.r b/functions/inserted_ui.r
index ba15a5c..1caf0c1 100644
--- a/functions/inserted_ui.r
+++ b/functions/inserted_ui.r
@@ -34,7 +34,7 @@ inserted_ui <- function(user_key,user_name,echo, id, genomes) {
                               selectInput('project', 'Select Project',choices = ""),
                               selectInput('epigenetic_mark', 'Select a Data Type',
                                           c("Gene Expression",deepblue_list_epigenetic_marks(user_key = user_key)$name),
-                                          multiple = TRUE),
+                                          multiple = F),
                               
                               actionButton('list_exper_btn', "List Experiments"),
                               
diff --git a/functions/plotly_pca.r b/functions/plotly_pca.r
index bb6ccaa..5df31a2 100644
--- a/functions/plotly_pca.r
+++ b/functions/plotly_pca.r
@@ -1,13 +1,28 @@
-plotly_pca <- function(experiments_info_meta,filtered_score_matrix, project,type_of_score,
+plotly_pca <- function(experiments_info_meta,
+                       filtered_score_matrix,
+                       name = NULL,
+                       project,
+                       type_of_score,
                        color_by = "biosource_name",
                        epigenetic_mark = "No Epigenetic mark selected",
                        first_pc="1",
                        second_pc="2",
-                       show_legend = T){
+                       show_legend = TRUE){
   
   #calculating PCA
   pca <- prcomp(filtered_score_matrix, center = TRUE, scale. = TRUE)
   
+  #Checking if PC chosen is available or not
+  validate(
+    need(paste0("PC", first_pc) %in% colnames(pca[['rotation']]),
+         message = paste("The PC you chose is out of bound","upper bound is",
+                         colnames(pca[['rotation']])[length(colnames(pca[['rotation']]))])),
+    
+    need(paste0("PC", second_pc) %in% colnames(pca[['rotation']]),
+         message = paste("The PC you chose is out of bound","upper bound is",
+                         colnames(pca[['rotation']])[length(colnames(pca[['rotation']]))]))
+  )
+  
   #preparing the plot data by taking the PCAs and adding metadata
   plot.data <- as.data.frame(pca$rotation) %>%
     tibble::rownames_to_column(var = "experiment") %>%
@@ -43,7 +58,7 @@ plotly_pca <- function(experiments_info_meta,filtered_score_matrix, project,type
             colors = brewer.pal(9, "Set1"),
             marker = list(size = 17.5)) %>%
     add_markers(y = as.formula(paste0("~","PC", second_pc)), showlegend = show_legend) %>%
-      layout(title = paste("2 PCs plot", type_of_score, epigenetic_mark),
+      layout(title = paste("PC plot", name, type_of_score, epigenetic_mark),
              yaxis = list(title = y_lab, zeroline = FALSE),
              xaxis = list(title = x_lab, zeroline = FALSE))
   
diff --git a/functions/plotting_rna_seq_ontology_compare.r b/functions/plotting_rna_seq_ontology_compare.r
new file mode 100644
index 0000000..7ea4d17
--- /dev/null
+++ b/functions/plotting_rna_seq_ontology_compare.r
@@ -0,0 +1,16 @@
+plotting_rna_seq_ontology_compare <- function(filtered_score_matrix,
+                                              batch_adjusted_matrices){
+  
+  #Loading distance matrices
+  ###Read Cosine Distance Matrix###
+  cosineDistance<-read.table("../distance_matrices/Cosine-Similiarity-Based-Distance-Matrix.txt", header= T, stringsAsFactors = F)
+  ###Generate Similiarity Matrix###
+  cosineSimilarity<-(1-cosineDistance)
+  
+  ###Read Jaccard Distance Matrix###
+  jaccardDistance<-read.table("../distance_matrices/Jaccard-Based-Distance-Matrix.txt", header = T, stringsAsFactors = F)
+  ###Generate Similiarity Matrix###
+  jaccardSimilarity<-(1-jaccardDistance)
+  
+  
+}
\ No newline at end of file
diff --git a/functions/ruv_batch_effect.r b/functions/ruv_batch_effect.r
new file mode 100644
index 0000000..fd5a4d9
--- /dev/null
+++ b/functions/ruv_batch_effect.r
@@ -0,0 +1,78 @@
+ruv_batch_effect <- function(filtered_score_matrix,
+                             outliers,
+                             project,
+                             regularization_par = 0.00001,
+                             quantile_prob,
+                             k_rank = 5,
+                             estimate_hkg){
+  
+  #hkg: House Keeping Genes
+  #regularization_par nu.coeff for unwanted variation for the naiveRandRUV()
+  #k_rank Desired rank for the estimated unwanted variation term for naiveRandRUV()
+  #estimate_hkg if TRUE, hkg will be defined by a rank analysis
+    #if FALSE a list of hkg will be used
+    #quantile_prob used as probability for the quantile() to estimate hkg
+  
+  metadata <- attr(filtered_score_matrix, "meta")
+  
+  if(project == "TEPIC reprocessed IHEC data"){
+    outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ",
+                         "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY",
+                         "R_ENCBS853LFM","E_520VFV"))
+  }
+  
+  if(estimate_hkg == "use_hkg_list"){
+    housekeepingGenes<-c("ENSG00000204574","ENSG00000075624","ENSG00000023330",
+                         "ENSG00000166710","ENSG00000141367","ENSG00000160211",
+                         "ENSG00000111640","ENSG00000169919","ENSG00000165704",
+                         "ENSG00000134333","ENSG00000102144", "ENSG00000125630",
+                         "ENSG00000181222","ENSG00000108298","ENSG00000089157",
+                         "ENSG00000073578", "ENSG00000112592","ENSG00000196230")
+    
+    rowsum<-apply(filtered_score_data,1,sum)
+    if(!length(which(row_sum == 0)) == 0){
+      filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),]
+    }
+    
+    if(outliers != ""){
+      filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))]
+      metadata <- metadata[-match(outliers, metadata["experiment"]),]
+      
+    }   
+    
+    inRUV<-t(as.matrix(log2(filtered_score_matrix+1)))
+    
+    ruv_adjusted_matrix <- naiveRandRUV(inRUV,match(housekeepingGenes,colnames(inRUV)),
+                                        nu.coeff=regularization_par,k=k_rank)
+    
+    #esetimate houskeeping genes
+  }
+  else if(estimate_hkg == "estimate_hkg"){
+    
+    rowsum<-apply(filtered_score_data,1,sum)
+    if(!length(which(row_sum == 0)) == 0){
+      filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),]
+    }
+    
+    if(outliers != ""){
+      filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))]
+      metadata <- metadata[-match(outliers, metadata["experiment"]),]
+      
+    }   
+    
+    rankedData<-apply(filtered_score_matrix,2,rank)
+    stdRanks<-apply(rankedData,1,sd)
+    
+    meanRanks<-apply(rankedData,1,mean)
+    
+    housekeepingGenes <-names(meanRanks[which(stdRanks <= quantile(stdRanks,quantile_prob))])
+  }
+  
+  inRUV<-t(as.matrix(log2(filtered_score_matrix+1)))
+  ruv_adjusted_matrix <-naiveRandRUV(inRUV,match(housekeepingGenes,colnames(inRUV)),
+                                     nu.coeff=regularization_par , k=k_rank)
+  
+  attr(ruv_adjusted_matrix, "meta") <- metadata
+  
+  return(ruv_adjusted_matrix)
+}
\ No newline at end of file
diff --git a/functions/score_matrix_annotations.r b/functions/score_matrix_annotations.r
index e80a20e..c6b8818 100644
--- a/functions/score_matrix_annotations.r
+++ b/functions/score_matrix_annotations.r
@@ -1,6 +1,11 @@
-score_matrix_annotations <- function(experiments, experiments_info_meta, genome, chr,
-                                     annotation, aggregation_function = "mean",
-                                     variation = "0.05", user_key ){
+score_matrix_annotations <- function(experiments,
+                                     experiments_info_meta,
+                                     genome, chr,
+                                     annotation,
+                                     aggregation_function = "mean",
+                                     filtering,
+                                     variation = "0.05",
+                                     user_key ){
   
   if(chr == ""){chr <- NULL}
   #selecting annotations
@@ -37,14 +42,18 @@ score_matrix_annotations <- function(experiments, experiments_info_meta, genome,
   score_matrix <- deepblue_download_request_data(request_id = request_id, user_key = user_key)
   
   #removing the chromosome, start and end columns
-  filtered_score_matrix <- scale(as.matrix(score_matrix[,4:ncol(score_matrix)]))
+  filtered_score_matrix <- as.matrix(score_matrix[,4:ncol(score_matrix)])
   
   #keep only complete cases without NAs
-  filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),]
-  
+  if(filtering == "filter_non_complete"){
+    filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),]
+    
+  }
   #optional filtering for variable regions
-  filtered_score_matrix <- filtered_score_matrix[which(rowSums(is.na(filtered_score_matrix)) /
-                                                          ncol(filtered_score_matrix) < variation),]
+  else if(filtering == "row_variation"){
+    filtered_score_matrix <- filtered_score_matrix[which(rowSums(is.na(filtered_score_matrix)) /
+                                                           ncol(filtered_score_matrix) < variation),]
+  }
   
   #keep only regions with more than 0.05 variance
   filtered_score_matrix_rowVars <- rowVars(filtered_score_matrix, na.rm = T)
diff --git a/functions/score_matrix_tiling_regions.r b/functions/score_matrix_tiling_regions.r
index 785e2e2..bc6909c 100644
--- a/functions/score_matrix_tiling_regions.r
+++ b/functions/score_matrix_tiling_regions.r
@@ -1,9 +1,11 @@
 #Function to get the scoring matrix with tiling regions of the genome
 
-score_matrix_tiling_regions <- function(experiments, experiments_info_meta,
+score_matrix_tiling_regions <- function(experiments,
+                                        experiments_info_meta,
                                         genome, chr,
                                         tiling_size = "5000",
                                         aggregation_function = "mean",
+                                        filtering,
                                         variation = "0.05",
                                         user_key){
   
@@ -16,13 +18,10 @@ score_matrix_tiling_regions <- function(experiments, experiments_info_meta,
   }
   
   #Getting the tiling regions
-  tiling_regions <- tryCatch({deepblue_tiling_regions(size=tiling_size,
+  tiling_regions <- deepblue_tiling_regions(size=tiling_size,
                                             chromosome = chr,
                                             genome= genome,
-                                            user_key = user_key)},
-                             error = function(e){
-                               showNotification()
-                             })
+                                            user_key = user_key)
   
   
   #requesting the score matrix
@@ -40,20 +39,24 @@ score_matrix_tiling_regions <- function(experiments, experiments_info_meta,
   
   #score matrix
   #In case I wanted to print the messages that are shown on the console
-  #I can user withCallingHandlers like done in this link
+  #I can use withCallingHandlers like done in this link
   #https://stackoverflow.com/questions/30474538/possible-to-show-console-messages-written-with-message-in-a-shiny-ui
   score_matrix <- deepblue_download_request_data(request_id = request_id, user_key = user_key)
-  
+
   #removing the chromosome, start and end columns
-  filtered_score_matrix <- scale(as.matrix(score_matrix[,4:ncol(score_matrix)]))
+  filtered_score_matrix <- as.matrix(score_matrix[,4:ncol(score_matrix)])
   
   #keep only complete cases without NAs
-  filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),]
-  
+  if(filtering == "filter_non_complete"){
+    filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),]
+    
+  }
   #optional filtering for variable regions
-  filtered_score_matrix <- filtered_score_matrix[ which(rowSums(is.na(filtered_score_matrix)) /
-                                                          ncol(filtered_score_matrix) < variation),]
-  
+  else if(filtering == "row_variation"){
+    filtered_score_matrix <- filtered_score_matrix[which(rowSums(is.na(filtered_score_matrix)) /
+                                                           ncol(filtered_score_matrix) < variation),]
+  }
+
   #keep only regions with more than 0.05 variance
   filtered_score_matrix_rowVars <- rowVars(filtered_score_matrix, na.rm = T)
   filtered_score_matrix <- filtered_score_matrix[which(filtered_score_matrix_rowVars > variation),]
diff --git a/functions/supervised_sva_batch_effect.r b/functions/supervised_sva_batch_effect.r
index 2ada953..57e0cc8 100644
--- a/functions/supervised_sva_batch_effect.r
+++ b/functions/supervised_sva_batch_effect.r
@@ -1,8 +1,16 @@
 supervised_sva_batch_effect <- function(filtered_score_matrix,
                                         adjustment_var,
-                                        interest_var){
+                                        interest_var,
+                                        project,
+                                        outliers = NULL){
   
   
+  if(outliers == ""){outliers = NULL}
+  if(project == "TEPIC reprocessed IHEC data"){
+    outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ",
+                         "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY",
+                         "R_ENCBS853LFM","E_520VFV"))
+  }
   # filtered_matrix <- filtered_score_matrix$data
   metadata <- attr(filtered_score_matrix, "meta")
   
@@ -14,10 +22,8 @@ supervised_sva_batch_effect <- function(filtered_score_matrix,
     for (adj_var in adjustment_var){
       
       validate(
-        need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model"))
-      )
-      validate(
-        need(nlevels(metadata[,adj_var]) > 1, message = paste(adj_var,"has less than 2 level",
+        need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model")),
+        need(nlevels(as.factor(metadata[,adj_var])) > 1, message = paste(adj_var,"has less than 2 level",
                                                               "check levels using the pie chart"))
       )
     }
@@ -31,15 +37,24 @@ supervised_sva_batch_effect <- function(filtered_score_matrix,
     for (inter_var in interest_var){
       
       validate(
-        need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used for the model"))
-      )
-      validate(
-        need(nlevels(metadata[,inter_var]) > 1, message = paste(inter_var,"has less than 2 level",
+        need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used for the model")),
+        need(nlevels(as.factor(metadata[,inter_var])) > 1, message = paste(inter_var,"has less than 2 level",
                                                                 "check levels using the pie chart"))
       )
     }
   }
   
+  #calculating controls
+  row_sum<-apply(filtered_score_matrix,1,sum)
+  if(!length(which(row_sum == 0)) == 0){
+    filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),]
+  }
+  if(outliers != ""){
+    filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))]
+    metadata <- metadata[-match(outliers, metadata[,"experiment"]),]
+    
+  }
+  
   if(is.null(adjustment_var)){
     #No interest variable, mod0 is the intercept, full mod is the interest_var
     mod0 <- model.matrix(~1, data = metadata)
@@ -55,10 +70,19 @@ supervised_sva_batch_effect <- function(filtered_score_matrix,
     )
     )),data = metadata)
   }
-  n.sv <- num.sv(filtered_score_matrix, mod, method = "leek")
-  showNotification(paste("The number of latent factors estimated is", n.sv), duration = 3)
   
-  sva_object <- sva(filtered_score_matrix, mod, mod0, n.sv = n.sv)
+  
+  rankedData<-apply(filtered_score_matrix,2,rank)
+  meanRanks<-apply(rankedData,1,mean)
+  stdRanks<-apply(rankedData,1,sd)
+  rankesOfStd<-rank(stdRanks)
+  controlProb<-1-(rankesOfStd/max(rankesOfStd))
+  
+  # n.sv <- num.sv(filtered_score_matrix, mod, method = "leek")
+  
+  sva_object <- svaseq(filtered_score_matrix, mod, mod0,
+                       # n.sv = n.sv,
+                       controls = controlProb)
   
   batch_adjusted_matrix <- sva_object$sv
   
diff --git a/functions/sva_batch_effect.r b/functions/sva_batch_effect.r
index fe3d4a3..1f5322b 100644
--- a/functions/sva_batch_effect.r
+++ b/functions/sva_batch_effect.r
@@ -1,8 +1,16 @@
 sva_batch_effect <- function(filtered_score_matrix,
                              adjustment_var,
+                             outliers,
+                             project,
                              interest_var){
   
   
+  # if(outliers == ""){outliers = NULL}
+  if(project == "TEPIC reprocessed IHEC data"){
+    outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ",
+                         "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY",
+                         "R_ENCBS853LFM","E_520VFV"))
+  }
   # filtered_matrix <- filtered_score_matrix$data
   metadata <- attr(filtered_score_matrix, "meta")
 
@@ -17,7 +25,7 @@ sva_batch_effect <- function(filtered_score_matrix,
         need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model"))
       )
       validate(
-        need(nlevels(metadata[,adj_var]) > 1, message = paste(adj_var,"has less than 2 level",
+        need(nlevels(as.factor(metadata[,adj_var])) > 1, message = paste(adj_var,"has less than 2 level",
                                                               "check levels using the pie chart"))
       )
     }
@@ -31,15 +39,23 @@ sva_batch_effect <- function(filtered_score_matrix,
     for (inter_var in interest_var){
       
       validate(
-        need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used for the model"))
-      )
-      validate(
-        need(nlevels(metadata[,inter_var]) > 1, message = paste(inter_var,"has less than 2 level",
+        need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used for the model")),
+        need(nlevels(as.factor(metadata[,inter_var])) > 1, message = paste(inter_var,"has less than 2 level",
                                                                 "check levels using the pie chart"))
       )
     }
   }
   
+  row_sum<-apply(filtered_score_matrix,1,sum)
+  if(!length(which(row_sum == 0)) == 0){
+    filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),]
+  }
+  if(outliers != ""){
+    filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))]
+    metadata <- metadata[-match(outliers, metadata[,"experiment"]),]
+  }
+  
+  
   if(is.null(adjustment_var)){
     #No interest variable, mod0 is the intercept, full mod is the interest_var
     mod0 <- model.matrix(~1, data = metadata)
@@ -58,6 +74,7 @@ sva_batch_effect <- function(filtered_score_matrix,
   n.sv <- num.sv(filtered_score_matrix, mod, method = "leek")
   showNotification(paste("The number of latent factors estimated is", n.sv), duration = 3)
   
+  
   sva_object <- sva(filtered_score_matrix, mod, mod0, n.sv = n.sv)
   
   batch_adjusted_matrix <- sva_object$sv
diff --git a/server/calculate_score_matrix.r b/server/calculate_score_matrix.r
index c1466ae..622ece1 100644
--- a/server/calculate_score_matrix.r
+++ b/server/calculate_score_matrix.r
@@ -30,6 +30,7 @@ filtered_score_matrix <- eventReactive(input$calculate_matrix, {
     filtered_score_matrix<- score_matrix_tiling_regions(experiments = all_experiments()[[1]][input$table_rows_all],
                                                         experiments_info_meta =  experiments_info_meta(),
                                                         variation = input$variance,
+                                                        filtering = input$filter_rows,
                                                         tiling_size = input$tiling_size,
                                                         aggregation_function = input$aggregation,
                                                         genome = input$genome,
@@ -39,17 +40,21 @@ filtered_score_matrix <- eventReactive(input$calculate_matrix, {
     filtered_score_matrix<- score_matrix_annotations(experiments = all_experiments()[[1]][input$table_rows_all],
                                                      experiments_info_meta =  experiments_info_meta(),
                                                      variation = input$variance,
+                                                     filtering = input$filter_rows,
                                                      aggregation_function = input$aggregation,
                                                      annotation = input$annotations_list,
                                                      genome = input$genome,
                                                      chr = input$chr,
                                                      user_key = user_key)
+    
+    
   }
   
   # #This is to check if the matrix has been calculated or not in order to show
   # #the NEXT button on this page, it's switched to false here in case
   # #the user chose different inputs and made another matrix, the NEXT button won't be added twice
   # calculated_matrix(FALSE)
+  updateSelectInput(session, "outliers", choices = c("", colnames(filtered_score_matrix)))
   
   return(filtered_score_matrix)
   
diff --git a/server/listing_genomes.r b/server/listing_genomes.r
index a64dd24..92c91d1 100644
--- a/server/listing_genomes.r
+++ b/server/listing_genomes.r
@@ -11,9 +11,9 @@ observeEvent(input$genome, {
                                                         user_key = user_key)$name)
   
   
-  updateSelectInput(session, inputId = 'chr', choices = c("",deepblue_info(id = genomes$id[grep(input$genome, genomes$name)],
-                                                                           user_key = user_key)$chromosomes$name))
+  updateSelectInput(session, inputId = 'chr', choices = c("",
+                                                          deepblue_chromosomes(genome = input$genome,
+                                                                               user_key = input$user_key)$id))
   
-
   
 })
\ No newline at end of file