From 128fe6284c6bcc83408c5d45cd5a49c3eabc70d8 Mon Sep 17 00:00:00 2001 From: Fawaz Dabbaghieh Date: Wed, 7 Mar 2018 02:14:25 +0100 Subject: [PATCH] outliers filtering, ruv, some bugs fixed --- app/server.r | 87 ++++++++++++------- app/ui.r | 44 ++++++++-- functions/combat_batch_effect.r | 31 +++++-- functions/inserted_ui.r | 2 +- functions/plotly_pca.r | 21 ++++- functions/plotting_rna_seq_ontology_compare.r | 16 ++++ functions/ruv_batch_effect.r | 78 +++++++++++++++++ functions/score_matrix_annotations.r | 25 ++++-- functions/score_matrix_tiling_regions.r | 31 ++++--- functions/supervised_sva_batch_effect.r | 48 +++++++--- functions/sva_batch_effect.r | 27 ++++-- server/calculate_score_matrix.r | 5 ++ server/listing_genomes.r | 6 +- 13 files changed, 331 insertions(+), 90 deletions(-) create mode 100644 functions/plotting_rna_seq_ontology_compare.r create mode 100644 functions/ruv_batch_effect.r diff --git a/app/server.r b/app/server.r index c8ddb2f..fb5ae86 100644 --- a/app/server.r +++ b/app/server.r @@ -4,7 +4,7 @@ #2 when login again remove previous/next from second page (DONE! I think, will test for several cases later) #3 remove the matrix and the output should be null again (Maybe use shinyjs::hide() then show again) (not really important now) #4 Why correlation plot doesn't work (Haven't worked yet) maybe use do.call (Working now :-) DONE! ) -#5 Do the batch effect with SVA and Supervised SVA (Almost Done!) +#5 Do the batch effect with SVA and Supervised SVA (Done!) #6 maybe start splitting the server into chuncks inside the server folder and source the chuncks (almost DONE!) #7 Check for shiny as a R package (it's doable, need to understand how packages are organized first) #8 Having some global variables so I won't need to fetch from DeepBlue many times (DONE!) @@ -20,22 +20,24 @@ #18 Fix the corrplot download (done!!) #19 Score matrices and tables other than the first, lines don't need to be selected (Done!) #20 Filter coverage files from the beginning with a check box (Done!) -#21 Make batch effect analysis matrices as a list +#21 Make batch effect analysis matrices as a list (Done!!) #22 Notification to choose a chromosome (Done!) +#23 Error for PC selection (Done!) # TODOs ------------------------------------------------------------------- #7 Check for shiny as a R package #12 RUV for batch effect (from Markus' scripts to understand how it works) -#13 select expression, for gene expression data -#21 Make batch effect analysis matrices as a list -#22 I can't retrieve chromosomes anymore? +#13 select expression, for gene expression data (almost done!) +#24 Gene expression function (Same function for one gene then all genes) +#25 Gene expression ihec, map sample IDs and experiments name to get the final matrix +#27 Supervised SVA (use the method with the std of ranks) +#28 Add a place to choose outliers that can be used then in the RUV test and other tests + -#Problems need to be solved -#1 Can't find chromosomes for each genome anymore (solved!!) -#2 I can't get the rna-seq for most of the projects -#3 +#Problems +# Can't use the std of ranks for the control on score matrix because they have some negative values #New UI with shiny dashboard library(foreach) @@ -48,7 +50,7 @@ library(dplyr) library(ggplot2) library(RColorBrewer) library(ggrepel) -library(stats) +library(stats) library(plotly) library(shiny) library(shinyBS) @@ -177,8 +179,8 @@ function(input, output, session) { updateSelectInput(session, inputId = "interest_var_sva", choices = c("",colnames(attr(filtered_score_matrix(), "meta")))) - updateSelectInput(session, inputId = "batch_supervised_sva", - choices = colnames(attr(filtered_score_matrix(), "meta"))) + # updateSelectInput(session, inputId = "batch_supervised_sva", + # choices = colnames(attr(filtered_score_matrix(), "meta"))) updateSelectInput(session, inputId = "adj_var_supervised_sva", choices = c("",colnames(attr(filtered_score_matrix(), "meta")))) @@ -204,47 +206,64 @@ function(input, output, session) { if(input$batch_effect_choice == "combat"){ temp_matrices <- batch_adjusted_matrices() temp_matrices[[paste(input$batch_effect_choice, input$batch_combat, - input$adj_var_combat, input$interest_var_combat, collapse = "-")]]<- + input$adj_var_combat, input$interest_var_combat, sep = "-")]]<- combat_batch_effect(filtered_score_matrix = filtered_score_matrix(), batch = input$batch_combat, + outliers = input$outliers, + project = input$project, adjustment_var = input$adj_var_combat, interest_var = input$interest_var_combat) batch_adjusted_matrices(temp_matrices) - # batch_adjusted_matrices[[paste(input$batch_effect_choice, input$batch_combat, - # input$adj_var_combat, - # input$interest_var_combat, collapse = "-")]] <- - # - # combat_batch_effect(filtered_score_matrix = filtered_score_matrix(), - # batch = input$batch_combat, - # adjustment_var = input$adj_var_combat, - # interest_var = input$interest_var_combat) + }else if(input$batch_effect_choice == "sva"){ temp_matrices <- batch_adjusted_matrices() temp_matrices[[paste(input$batch_effect_choice, input$batch_sva, - input$adj_var_sva, input$interest_var_sva, collapse = "-")]]<- + input$adj_var_sva, input$interest_var_sva, sep = "-")]]<- sva_batch_effect(filtered_score_matrix = filtered_score_matrix(), adjustment_var = input$adj_var_sva, + outliers = input$outliers, + project = input$project, interest_var = input$interest_var_sva) batch_adjusted_matrices(temp_matrices) - # batch_adjusted_matrices[[paste(input$batch_effect_choice, input$batch_sva, - # input$adj_var_sva, - # input$interest_var_sva, collapse = "-")]] <- - # - # sva_batch_effect(filtered_score_matrix = filtered_score_matrix(), - # adjustment_var = input$adj_var_sva, - # interest_var = input$interest_var_sva) + }else if(input$batch_effect_choice == "supervised_sva"){ + temp_matrices <- batch_adjusted_matrices() + + temp_matrices[[paste(input$batch_effect_choice, input$batch_sva, + input$adj_var_supervised_sva, input$interest_var_supervised_sva, sep = "-")]]<- + + supervised_sva_batch_effect(filtered_score_matrix = filtered_score_matrix(), + adjustment_var = input$adj_var_supervised_sva, + interest_var = input$interest_var_supervised_sva, + project = input$project, + outliers = input$outliers, + project = input$project) + }else{ + + temp_matrices <- batch_adjusted_matrices() + + temp_matrices[["some_name_here"]] <- + + ruv_batch_effect(filtered_score_matrix = filtered_score_matrix(), + outliers = input$outliers, + project = input$project, + regularization_par = input$regularization_par, + quantile_prob = input$quantile_prob, + k_rank = input$k_rank, + estimate_hkg = input$house_keeping_genes) } updateSelectInput(session, "corrected_matrices", choices = names(temp_matrices)) return(batch_adjusted_matrices()) }) }) + + #Matrix summary output observeEvent(input$corrected_matrices,{ if(is.null(input$corrected_matrices)){ output$batch_matrix_summary <- NULL @@ -292,11 +311,11 @@ function(input, output, session) { tempdir = tempdir() setwd(tempdir) - write.table(batch_adjusted_matrix(), + write.table(batch_adjusted_matrix()[[input$corrected_matrices]], "adjusted_data_matrix.tsv", row.names = FALSE, col.names = TRUE, sep = "\t") - write.table(attr(batch_adjusted_matrix(), "meta"), + write.table(attr(batch_adjusted_matrix()[[input$corrected_matrices]], "meta"), "matrix_metadata.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) @@ -315,6 +334,7 @@ function(input, output, session) { plotly_pca <- plotly_pca(experiments_info_meta = experiments_info_meta(), filtered_score_matrix = filtered_score_matrix(), + name = "Raw Matrix", project = input$project, type_of_score = input$type_of_score, color_by = input$color_by, @@ -381,7 +401,8 @@ function(input, output, session) { #get plot plot_pca_batch <- plotly_pca <- plotly_pca(experiments_info_meta = experiments_info_meta(), - filtered_score_matrix = batch_adjusted_matrix(), + filtered_score_matrix = batch_adjusted_matrix()[[input$corrected_matrices]], + name = input$corrected_matrices, project = input$project, type_of_score = input$type_of_score, color_by = input$color_by_batch, @@ -442,7 +463,7 @@ function(input, output, session) { show_legend = TRUE) p2 <- plotly_pca(experiments_info_meta = experiments_info_meta(), - filtered_score_matrix = batch_adjusted_matrix(), + filtered_score_matrix = batch_adjusted_matrix()[[input$corrected_matrices]], project = input$project, type_of_score = input$type_of_score, color_by = input$color_by_batch, diff --git a/app/ui.r b/app/ui.r index 98c6dea..8eadc2d 100644 --- a/app/ui.r +++ b/app/ui.r @@ -87,7 +87,6 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", DT::dataTableOutput("summary"), size = "large") - ), # score matrix tab------------------------------------------------------------ @@ -118,7 +117,7 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", choices = c("1000 base pairs" = "1000", "5000 base pairs" = "5000"), selected = "5000")), - selectInput('chr', 'Select a Chromosome, or leave empty for the whole genome', c("")), + selectInput('chr', 'Select a Chromosome, or leave empty for the whole genome', c(""), multiple = TRUE), # tags$div(id = 'addChromosomes'), actionButton("show_input_check", "Calculate Score matrix") @@ -130,7 +129,15 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", c("min", "max", "sum", "mean", "var", "sd", "median", "count", "boolean"), selected = "mean"), - numericInput("variance","row variation cutoff", value = "0.05"), + radioButtons("filter_rows", "Choose filtering method", + choices = c("Filter out non-complete rows" = "filter_non_complete", + "Filter using Row Variation Cutoff" = "row_variation")), + conditionalPanel( + condition = "input.filter_rows == 'row_variation'", + numericInput("variance","row variation cutoff", value = "0.05") + + ), + downloadButton("downloadMatrix", "Download Data") ), @@ -222,7 +229,9 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", radioButtons("batch_effect_choice", "Choose method to calculate scoring matrix", choices = c("ComBat" = "combat", "SVA" = "sva", - "Supervised SVA" = "supervised_sva")), + "Supervised SVA" = "supervised_sva", + "RUV" = "ruv")), + selectInput("outliers", "Select Outliers (Optional)", choices = c(""),multiple = TRUE), bsButton("batch_pie",label = "", icon = icon("pie-chart"), style = "default", size = "default"), bsModal("batch_pie_chart", "Values for Key chosen", "batch_pie",size = "large", fluidPage( @@ -249,10 +258,35 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", conditionalPanel( condition = "input.batch_effect_choice == 'supervised_sva'", - selectInput('batch_supervised_sva', "Select a batch to adjust for",c("")), + # selectInput('batch_supervised_sva', "Select a batch to adjust for",c("")), selectInput("adj_var_supervised_sva", "Adjustment Variable",c("")), selectInput("interest_var_supervised_sva", "Variable of interest", c("")) ), + conditionalPanel( + condition = "input.batch_effect_choice == 'ruv'", + + numericInput("regularization_par", + "Regularization parameter for the unwanted variation (nu.coeff)", value = 0.00001), + + radioButtons("house_keeping_genes", "Method to estimate House Keeping Genes", + choices = c("Estimate using rank analysis" = "estimate_hkg", + "Use a predefined list" = "use_hkg_list")), + + conditionalPanel( + condition = "input.house_keeping_genes == 'estimate_hkg'", + + numericInput("regularization_par", + "Regularization parameter for the unwanted variation", + value = 0.00001), + + numericInput("quantile_prob", + "Quantile probability used in the rank analysis estimation [0,1]", + value = 0.005, min = 0, max = 1), + + numericInput("k_rank", "Desired rank for the estimated unwanted variation term", + value = 5) + ) + ), downloadButton("downloadAdjustedMatrix", "Download Data") ) ), diff --git a/functions/combat_batch_effect.r b/functions/combat_batch_effect.r index 89f2d2f..56d37b6 100644 --- a/functions/combat_batch_effect.r +++ b/functions/combat_batch_effect.r @@ -1,15 +1,36 @@ combat_batch_effect <- function(filtered_score_matrix, batch, adjustment_var, + outliers, + project, interest_var){ + if(project == "TEPIC reprocessed IHEC data"){ + outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ", + "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY", + "R_ENCBS853LFM","E_520VFV")) + } # filtered_matrix <- filtered_score_matrix$data metadata <- attr(filtered_score_matrix, "meta") + validate( + need(!anyNA(metadata[,batch]), message = paste(batch, "has NAs and cannot be used in combat")), + need(nlevels(as.factor(metadata[,batch])) > 1, message = paste(batch,"has less than 2 level", "check levels using the pie chart")) ) + + row_sum<-apply(filtered_score_matrix,1,sum) + if(!length(which(row_sum == 0)) == 0){ + filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),] + } + if(outliers != ""){ + filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] + metadata <- metadata[-match(outliers, metadata[,"experiment"]),] + + } + #validation of the inptus #The variable selected should have more than 1 level if(adjustment_var == ""){ @@ -17,10 +38,8 @@ combat_batch_effect <- function(filtered_score_matrix, }else{ for (adj_var in adjustment_var){ validate( - need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model")) - ) + need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model")), - validate( need(nlevels(as.factor(metadata[,adj_var])) > 1, message = paste(adj_var,"has less than 2 level", "check levels using the pie chart")) ) @@ -33,9 +52,8 @@ combat_batch_effect <- function(filtered_score_matrix, for (inter_var in interest_var){ validate( - need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used to make the model")) - ) - validate( + need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used to make the model")), + need(nlevels(as.factor(metadata[,inter_var])) > 1, message = paste(inter_var,"has less than 2 level", "check levels using the pie chart")) ) @@ -64,6 +82,7 @@ combat_batch_effect <- function(filtered_score_matrix, } } + batch_adjusted_matrix <- ComBat(dat=filtered_score_matrix, batch=as.double(as.factor(metadata[,batch])), mod=modcombat, par.prior=TRUE, prior.plots=FALSE) diff --git a/functions/inserted_ui.r b/functions/inserted_ui.r index ba15a5c..1caf0c1 100644 --- a/functions/inserted_ui.r +++ b/functions/inserted_ui.r @@ -34,7 +34,7 @@ inserted_ui <- function(user_key,user_name,echo, id, genomes) { selectInput('project', 'Select Project',choices = ""), selectInput('epigenetic_mark', 'Select a Data Type', c("Gene Expression",deepblue_list_epigenetic_marks(user_key = user_key)$name), - multiple = TRUE), + multiple = F), actionButton('list_exper_btn', "List Experiments"), diff --git a/functions/plotly_pca.r b/functions/plotly_pca.r index bb6ccaa..5df31a2 100644 --- a/functions/plotly_pca.r +++ b/functions/plotly_pca.r @@ -1,13 +1,28 @@ -plotly_pca <- function(experiments_info_meta,filtered_score_matrix, project,type_of_score, +plotly_pca <- function(experiments_info_meta, + filtered_score_matrix, + name = NULL, + project, + type_of_score, color_by = "biosource_name", epigenetic_mark = "No Epigenetic mark selected", first_pc="1", second_pc="2", - show_legend = T){ + show_legend = TRUE){ #calculating PCA pca <- prcomp(filtered_score_matrix, center = TRUE, scale. = TRUE) + #Checking if PC chosen is available or not + validate( + need(paste0("PC", first_pc) %in% colnames(pca[['rotation']]), + message = paste("The PC you chose is out of bound","upper bound is", + colnames(pca[['rotation']])[length(colnames(pca[['rotation']]))])), + + need(paste0("PC", second_pc) %in% colnames(pca[['rotation']]), + message = paste("The PC you chose is out of bound","upper bound is", + colnames(pca[['rotation']])[length(colnames(pca[['rotation']]))])) + ) + #preparing the plot data by taking the PCAs and adding metadata plot.data <- as.data.frame(pca$rotation) %>% tibble::rownames_to_column(var = "experiment") %>% @@ -43,7 +58,7 @@ plotly_pca <- function(experiments_info_meta,filtered_score_matrix, project,type colors = brewer.pal(9, "Set1"), marker = list(size = 17.5)) %>% add_markers(y = as.formula(paste0("~","PC", second_pc)), showlegend = show_legend) %>% - layout(title = paste("2 PCs plot", type_of_score, epigenetic_mark), + layout(title = paste("PC plot", name, type_of_score, epigenetic_mark), yaxis = list(title = y_lab, zeroline = FALSE), xaxis = list(title = x_lab, zeroline = FALSE)) diff --git a/functions/plotting_rna_seq_ontology_compare.r b/functions/plotting_rna_seq_ontology_compare.r new file mode 100644 index 0000000..7ea4d17 --- /dev/null +++ b/functions/plotting_rna_seq_ontology_compare.r @@ -0,0 +1,16 @@ +plotting_rna_seq_ontology_compare <- function(filtered_score_matrix, + batch_adjusted_matrices){ + + #Loading distance matrices + ###Read Cosine Distance Matrix### + cosineDistance<-read.table("../distance_matrices/Cosine-Similiarity-Based-Distance-Matrix.txt", header= T, stringsAsFactors = F) + ###Generate Similiarity Matrix### + cosineSimilarity<-(1-cosineDistance) + + ###Read Jaccard Distance Matrix### + jaccardDistance<-read.table("../distance_matrices/Jaccard-Based-Distance-Matrix.txt", header = T, stringsAsFactors = F) + ###Generate Similiarity Matrix### + jaccardSimilarity<-(1-jaccardDistance) + + +} \ No newline at end of file diff --git a/functions/ruv_batch_effect.r b/functions/ruv_batch_effect.r new file mode 100644 index 0000000..fd5a4d9 --- /dev/null +++ b/functions/ruv_batch_effect.r @@ -0,0 +1,78 @@ +ruv_batch_effect <- function(filtered_score_matrix, + outliers, + project, + regularization_par = 0.00001, + quantile_prob, + k_rank = 5, + estimate_hkg){ + + #hkg: House Keeping Genes + #regularization_par nu.coeff for unwanted variation for the naiveRandRUV() + #k_rank Desired rank for the estimated unwanted variation term for naiveRandRUV() + #estimate_hkg if TRUE, hkg will be defined by a rank analysis + #if FALSE a list of hkg will be used + #quantile_prob used as probability for the quantile() to estimate hkg + + metadata <- attr(filtered_score_matrix, "meta") + + if(project == "TEPIC reprocessed IHEC data"){ + outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ", + "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY", + "R_ENCBS853LFM","E_520VFV")) + } + + if(estimate_hkg == "use_hkg_list"){ + housekeepingGenes<-c("ENSG00000204574","ENSG00000075624","ENSG00000023330", + "ENSG00000166710","ENSG00000141367","ENSG00000160211", + "ENSG00000111640","ENSG00000169919","ENSG00000165704", + "ENSG00000134333","ENSG00000102144", "ENSG00000125630", + "ENSG00000181222","ENSG00000108298","ENSG00000089157", + "ENSG00000073578", "ENSG00000112592","ENSG00000196230") + + rowsum<-apply(filtered_score_data,1,sum) + if(!length(which(row_sum == 0)) == 0){ + filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),] + } + + if(outliers != ""){ + filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] + metadata <- metadata[-match(outliers, metadata["experiment"]),] + + } + + inRUV<-t(as.matrix(log2(filtered_score_matrix+1))) + + ruv_adjusted_matrix <- naiveRandRUV(inRUV,match(housekeepingGenes,colnames(inRUV)), + nu.coeff=regularization_par,k=k_rank) + + #esetimate houskeeping genes + } + else if(estimate_hkg == "estimate_hkg"){ + + rowsum<-apply(filtered_score_data,1,sum) + if(!length(which(row_sum == 0)) == 0){ + filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),] + } + + if(outliers != ""){ + filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] + metadata <- metadata[-match(outliers, metadata["experiment"]),] + + } + + rankedData<-apply(filtered_score_matrix,2,rank) + stdRanks<-apply(rankedData,1,sd) + + meanRanks<-apply(rankedData,1,mean) + + housekeepingGenes <-names(meanRanks[which(stdRanks <= quantile(stdRanks,quantile_prob))]) + } + + inRUV<-t(as.matrix(log2(filtered_score_matrix+1))) + ruv_adjusted_matrix <-naiveRandRUV(inRUV,match(housekeepingGenes,colnames(inRUV)), + nu.coeff=regularization_par , k=k_rank) + + attr(ruv_adjusted_matrix, "meta") <- metadata + + return(ruv_adjusted_matrix) +} \ No newline at end of file diff --git a/functions/score_matrix_annotations.r b/functions/score_matrix_annotations.r index e80a20e..c6b8818 100644 --- a/functions/score_matrix_annotations.r +++ b/functions/score_matrix_annotations.r @@ -1,6 +1,11 @@ -score_matrix_annotations <- function(experiments, experiments_info_meta, genome, chr, - annotation, aggregation_function = "mean", - variation = "0.05", user_key ){ +score_matrix_annotations <- function(experiments, + experiments_info_meta, + genome, chr, + annotation, + aggregation_function = "mean", + filtering, + variation = "0.05", + user_key ){ if(chr == ""){chr <- NULL} #selecting annotations @@ -37,14 +42,18 @@ score_matrix_annotations <- function(experiments, experiments_info_meta, genome, score_matrix <- deepblue_download_request_data(request_id = request_id, user_key = user_key) #removing the chromosome, start and end columns - filtered_score_matrix <- scale(as.matrix(score_matrix[,4:ncol(score_matrix)])) + filtered_score_matrix <- as.matrix(score_matrix[,4:ncol(score_matrix)]) #keep only complete cases without NAs - filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),] - + if(filtering == "filter_non_complete"){ + filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),] + + } #optional filtering for variable regions - filtered_score_matrix <- filtered_score_matrix[which(rowSums(is.na(filtered_score_matrix)) / - ncol(filtered_score_matrix) < variation),] + else if(filtering == "row_variation"){ + filtered_score_matrix <- filtered_score_matrix[which(rowSums(is.na(filtered_score_matrix)) / + ncol(filtered_score_matrix) < variation),] + } #keep only regions with more than 0.05 variance filtered_score_matrix_rowVars <- rowVars(filtered_score_matrix, na.rm = T) diff --git a/functions/score_matrix_tiling_regions.r b/functions/score_matrix_tiling_regions.r index 785e2e2..bc6909c 100644 --- a/functions/score_matrix_tiling_regions.r +++ b/functions/score_matrix_tiling_regions.r @@ -1,9 +1,11 @@ #Function to get the scoring matrix with tiling regions of the genome -score_matrix_tiling_regions <- function(experiments, experiments_info_meta, +score_matrix_tiling_regions <- function(experiments, + experiments_info_meta, genome, chr, tiling_size = "5000", aggregation_function = "mean", + filtering, variation = "0.05", user_key){ @@ -16,13 +18,10 @@ score_matrix_tiling_regions <- function(experiments, experiments_info_meta, } #Getting the tiling regions - tiling_regions <- tryCatch({deepblue_tiling_regions(size=tiling_size, + tiling_regions <- deepblue_tiling_regions(size=tiling_size, chromosome = chr, genome= genome, - user_key = user_key)}, - error = function(e){ - showNotification() - }) + user_key = user_key) #requesting the score matrix @@ -40,20 +39,24 @@ score_matrix_tiling_regions <- function(experiments, experiments_info_meta, #score matrix #In case I wanted to print the messages that are shown on the console - #I can user withCallingHandlers like done in this link + #I can use withCallingHandlers like done in this link #https://stackoverflow.com/questions/30474538/possible-to-show-console-messages-written-with-message-in-a-shiny-ui score_matrix <- deepblue_download_request_data(request_id = request_id, user_key = user_key) - + #removing the chromosome, start and end columns - filtered_score_matrix <- scale(as.matrix(score_matrix[,4:ncol(score_matrix)])) + filtered_score_matrix <- as.matrix(score_matrix[,4:ncol(score_matrix)]) #keep only complete cases without NAs - filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),] - + if(filtering == "filter_non_complete"){ + filtered_score_matrix <- filtered_score_matrix[which(complete.cases(filtered_score_matrix)),] + + } #optional filtering for variable regions - filtered_score_matrix <- filtered_score_matrix[ which(rowSums(is.na(filtered_score_matrix)) / - ncol(filtered_score_matrix) < variation),] - + else if(filtering == "row_variation"){ + filtered_score_matrix <- filtered_score_matrix[which(rowSums(is.na(filtered_score_matrix)) / + ncol(filtered_score_matrix) < variation),] + } + #keep only regions with more than 0.05 variance filtered_score_matrix_rowVars <- rowVars(filtered_score_matrix, na.rm = T) filtered_score_matrix <- filtered_score_matrix[which(filtered_score_matrix_rowVars > variation),] diff --git a/functions/supervised_sva_batch_effect.r b/functions/supervised_sva_batch_effect.r index 2ada953..57e0cc8 100644 --- a/functions/supervised_sva_batch_effect.r +++ b/functions/supervised_sva_batch_effect.r @@ -1,8 +1,16 @@ supervised_sva_batch_effect <- function(filtered_score_matrix, adjustment_var, - interest_var){ + interest_var, + project, + outliers = NULL){ + if(outliers == ""){outliers = NULL} + if(project == "TEPIC reprocessed IHEC data"){ + outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ", + "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY", + "R_ENCBS853LFM","E_520VFV")) + } # filtered_matrix <- filtered_score_matrix$data metadata <- attr(filtered_score_matrix, "meta") @@ -14,10 +22,8 @@ supervised_sva_batch_effect <- function(filtered_score_matrix, for (adj_var in adjustment_var){ validate( - need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model")) - ) - validate( - need(nlevels(metadata[,adj_var]) > 1, message = paste(adj_var,"has less than 2 level", + need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model")), + need(nlevels(as.factor(metadata[,adj_var])) > 1, message = paste(adj_var,"has less than 2 level", "check levels using the pie chart")) ) } @@ -31,15 +37,24 @@ supervised_sva_batch_effect <- function(filtered_score_matrix, for (inter_var in interest_var){ validate( - need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used for the model")) - ) - validate( - need(nlevels(metadata[,inter_var]) > 1, message = paste(inter_var,"has less than 2 level", + need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used for the model")), + need(nlevels(as.factor(metadata[,inter_var])) > 1, message = paste(inter_var,"has less than 2 level", "check levels using the pie chart")) ) } } + #calculating controls + row_sum<-apply(filtered_score_matrix,1,sum) + if(!length(which(row_sum == 0)) == 0){ + filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),] + } + if(outliers != ""){ + filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] + metadata <- metadata[-match(outliers, metadata[,"experiment"]),] + + } + if(is.null(adjustment_var)){ #No interest variable, mod0 is the intercept, full mod is the interest_var mod0 <- model.matrix(~1, data = metadata) @@ -55,10 +70,19 @@ supervised_sva_batch_effect <- function(filtered_score_matrix, ) )),data = metadata) } - n.sv <- num.sv(filtered_score_matrix, mod, method = "leek") - showNotification(paste("The number of latent factors estimated is", n.sv), duration = 3) - sva_object <- sva(filtered_score_matrix, mod, mod0, n.sv = n.sv) + + rankedData<-apply(filtered_score_matrix,2,rank) + meanRanks<-apply(rankedData,1,mean) + stdRanks<-apply(rankedData,1,sd) + rankesOfStd<-rank(stdRanks) + controlProb<-1-(rankesOfStd/max(rankesOfStd)) + + # n.sv <- num.sv(filtered_score_matrix, mod, method = "leek") + + sva_object <- svaseq(filtered_score_matrix, mod, mod0, + # n.sv = n.sv, + controls = controlProb) batch_adjusted_matrix <- sva_object$sv diff --git a/functions/sva_batch_effect.r b/functions/sva_batch_effect.r index fe3d4a3..1f5322b 100644 --- a/functions/sva_batch_effect.r +++ b/functions/sva_batch_effect.r @@ -1,8 +1,16 @@ sva_batch_effect <- function(filtered_score_matrix, adjustment_var, + outliers, + project, interest_var){ + # if(outliers == ""){outliers = NULL} + if(project == "TEPIC reprocessed IHEC data"){ + outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ", + "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY", + "R_ENCBS853LFM","E_520VFV")) + } # filtered_matrix <- filtered_score_matrix$data metadata <- attr(filtered_score_matrix, "meta") @@ -17,7 +25,7 @@ sva_batch_effect <- function(filtered_score_matrix, need(!anyNA(metadata[,adj_var]), message = paste(adj_var, "has NAs and cannot be used to make the model")) ) validate( - need(nlevels(metadata[,adj_var]) > 1, message = paste(adj_var,"has less than 2 level", + need(nlevels(as.factor(metadata[,adj_var])) > 1, message = paste(adj_var,"has less than 2 level", "check levels using the pie chart")) ) } @@ -31,15 +39,23 @@ sva_batch_effect <- function(filtered_score_matrix, for (inter_var in interest_var){ validate( - need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used for the model")) - ) - validate( - need(nlevels(metadata[,inter_var]) > 1, message = paste(inter_var,"has less than 2 level", + need(!anyNA(metadata[,inter_var]), message = paste(inter_var, "has NAs and cannot be used for the model")), + need(nlevels(as.factor(metadata[,inter_var])) > 1, message = paste(inter_var,"has less than 2 level", "check levels using the pie chart")) ) } } + row_sum<-apply(filtered_score_matrix,1,sum) + if(!length(which(row_sum == 0)) == 0){ + filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),] + } + if(outliers != ""){ + filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] + metadata <- metadata[-match(outliers, metadata[,"experiment"]),] + } + + if(is.null(adjustment_var)){ #No interest variable, mod0 is the intercept, full mod is the interest_var mod0 <- model.matrix(~1, data = metadata) @@ -58,6 +74,7 @@ sva_batch_effect <- function(filtered_score_matrix, n.sv <- num.sv(filtered_score_matrix, mod, method = "leek") showNotification(paste("The number of latent factors estimated is", n.sv), duration = 3) + sva_object <- sva(filtered_score_matrix, mod, mod0, n.sv = n.sv) batch_adjusted_matrix <- sva_object$sv diff --git a/server/calculate_score_matrix.r b/server/calculate_score_matrix.r index c1466ae..622ece1 100644 --- a/server/calculate_score_matrix.r +++ b/server/calculate_score_matrix.r @@ -30,6 +30,7 @@ filtered_score_matrix <- eventReactive(input$calculate_matrix, { filtered_score_matrix<- score_matrix_tiling_regions(experiments = all_experiments()[[1]][input$table_rows_all], experiments_info_meta = experiments_info_meta(), variation = input$variance, + filtering = input$filter_rows, tiling_size = input$tiling_size, aggregation_function = input$aggregation, genome = input$genome, @@ -39,17 +40,21 @@ filtered_score_matrix <- eventReactive(input$calculate_matrix, { filtered_score_matrix<- score_matrix_annotations(experiments = all_experiments()[[1]][input$table_rows_all], experiments_info_meta = experiments_info_meta(), variation = input$variance, + filtering = input$filter_rows, aggregation_function = input$aggregation, annotation = input$annotations_list, genome = input$genome, chr = input$chr, user_key = user_key) + + } # #This is to check if the matrix has been calculated or not in order to show # #the NEXT button on this page, it's switched to false here in case # #the user chose different inputs and made another matrix, the NEXT button won't be added twice # calculated_matrix(FALSE) + updateSelectInput(session, "outliers", choices = c("", colnames(filtered_score_matrix))) return(filtered_score_matrix) diff --git a/server/listing_genomes.r b/server/listing_genomes.r index a64dd24..92c91d1 100644 --- a/server/listing_genomes.r +++ b/server/listing_genomes.r @@ -11,9 +11,9 @@ observeEvent(input$genome, { user_key = user_key)$name) - updateSelectInput(session, inputId = 'chr', choices = c("",deepblue_info(id = genomes$id[grep(input$genome, genomes$name)], - user_key = user_key)$chromosomes$name)) + updateSelectInput(session, inputId = 'chr', choices = c("", + deepblue_chromosomes(genome = input$genome, + user_key = input$user_key)$id)) - }) \ No newline at end of file