diff --git a/app/server.r b/app/server.r index e867b71..6ec6dd0 100644 --- a/app/server.r +++ b/app/server.r @@ -34,7 +34,10 @@ #27 Supervised SVA (use the method with the std of ranks) (Done I think) #28 Add a place to choose outliers that can be used then in the RUV test and other tests (Done I think) #29 Hide next buttong for Batch Effect tab until there's a batch effect matrix - +#30 Show user the original number of rows of the raw matrix then the filtered one so they can change the cutt-off +#31 validate for the variation cuttoff not more than 1 +#32 add a validate that user can't used houskeeping genes list for not gene expression +#Documentation in html using https://html-online.com/editor/ or https://html5-editor.net/ #Problems # Can't use the std of ranks for the control on score matrix because they have some negative values @@ -93,9 +96,15 @@ function(input, output, session) { help_md <- markdown_pages[grep(input$tabs, markdown_pages)] output$help_output <- renderUI({ - includeMarkdown(file.path("..","help",help_md)) + # includeMarkdown(file.path("..","help",help_md)) + includeHTML(file.path("..","help",help_md)) }) }) + # output$myImage <- renderImage({ + # + # list(src = "../help/ezgif.com-gif-to-mp4.gif", + # contentType = 'image/gif') + # }) # logging in -------------------------------------------------------------- source(file.path("..","server","logging_in.r"), local = TRUE) @@ -243,6 +252,7 @@ function(input, output, session) { input$k_rank, sep = "-")]] <- ruv_batch_effect(filtered_score_matrix = filtered_score_matrix(), + experiments = all_experiments()[[1]], outliers = input$outliers, project = input$project, regularization_par = input$regularization_par, @@ -283,15 +293,15 @@ function(input, output, session) { observeEvent(input$batch_effect_previous_tab,{ updateTabItems(session, "tabs", "corr_plot_tab") - header <- paste("Correlation Plot") - shinyjs::html("pageHeader", header) + # header <- paste("Correlation Plot") + # shinyjs::html("pageHeader", header) }) observeEvent(input$batch_effect_next_tab,{ updateTabItems(session, "tabs", "batch_effect_plot_tab") - header <- paste("Batch Effect Plot") - shinyjs::html("pageHeader", header) + # header <- paste("Batch Effect Plot") + # shinyjs::html("pageHeader", header) }) @@ -330,7 +340,8 @@ function(input, output, session) { plotly_pca <- plotly_pca(experiments_info_meta = experiments_info_meta(), filtered_score_matrix = filtered_score_matrix(), - name = "Raw Matrix", + name = paste("Raw Matrix", input$epigenetic_mark, + input$project), project = input$project, type_of_score = input$type_of_score, color_by = input$color_by, @@ -360,16 +371,16 @@ function(input, output, session) { # }else{ updateTabItems(session, "tabs", "score_matrix_tab") - header <- paste("Calculate Score Matrix") - shinyjs::html("pageHeader", header) + # header <- paste("Calculate Score Matrix") + # shinyjs::html("pageHeader", header) # } }) observeEvent(input$plot_matrix_next_tab,{ updateTabItems(session, "tabs", "corr_plot_tab") - header <- paste("Correlation Plot") - shinyjs::html("pageHeader", header) + # header <- paste("Correlation Plot") + # shinyjs::html("pageHeader", header) }) # Download first plot ----------------------------------------------------------- @@ -461,6 +472,7 @@ function(input, output, session) { p1 <- plotly_pca(experiments_info_meta = experiments_info_meta(), filtered_score_matrix = filtered_score_matrix(), project = input$project, + name = "Raw Matrix", type_of_score = input$type_of_score, color_by = input$color_by, epigenetic_mark = input$epigenetic_mark, @@ -471,6 +483,7 @@ function(input, output, session) { p2 <- plotly_pca(experiments_info_meta = experiments_info_meta(), filtered_score_matrix = batch_adjusted_matrix()[[input$corrected_matrices]], project = input$project, + name = input$corrected_matrices, type_of_score = input$type_of_score, color_by = input$color_by_batch, epigenetic_mark = input$epigenetic_mark, @@ -501,5 +514,14 @@ function(input, output, session) { updateTabItems(session, "tabs", "batch_effect_tab") }) + +# Extra Stuff ------------------------------------------------------ + #Updating the pageHeader according to which tab we're on + #tab_panel_header just returns the tab name using the tab i + observeEvent(input$tabs,{ + # browser() + header <- paste(tab_panel_header(tab_panel = input$tabs)) + shinyjs::html("pageHeader", header) + }) } diff --git a/app/ui.r b/app/ui.r index b21b4a0..d6f8e1e 100644 --- a/app/ui.r +++ b/app/ui.r @@ -21,6 +21,7 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", ) ), + # Sidebar items ----------------------------------------------------------- dashboardSidebar( @@ -40,7 +41,21 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", dashboardBody( # Log-in popup ------------------------------------------------------------ useShinyjs(), +# +# #help modal style +# tags$style(".modal-content {background-color:#ECEFF4;} +# .modal-header {padding: 10px 16px; +# background-color: #3C8DBC; +# color: white;}"), + + # #Login modal style + # tags$style("#login_popup .modal-header {padding: 10px 16px; + # background-colo: #3C8DBC;}"), + #help modal style + tags$style(".modal-header {padding: 12px 16px; + background-color: #3C8DBC; + color: white;}"), tags$script(HTML(' $(document).ready(function() { $("header").find("nav").append(\'\'); @@ -62,7 +77,7 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", style="display:inline-block; vertical-align: middle;", #The ? buttong and the popover that will come up from clicking bsButton("user_key_info",label = "", icon = icon("question"), style = "default", size = "extra-small"), - bsPopover("user_key_info", title = "info", content = "You can login with your user key, or stay anonymous", + bsPopover("user_key_info", title = "info", content = "You can login with your user key, or stay anonymous using anonymous_key", placement = "right", trigger = "focus") ) ), @@ -74,6 +89,10 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", bsModal(id="help_page", title = "Help!", trigger = "info", #In Server I will check which tab is it and output the html #of the help part of that tab + # htmlOutput("help_output"), + # #Cats Cat + # imageOutput("myImage"), + # uiOutput("help_output"), size = "large" @@ -87,7 +106,13 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", tags$div(id = 'login_warning', - HTML('

Please Log-in first, so you can list experiments

') + HTML('
+
+

+ Please Log-in first, you can use the key "anonymous_key" + or your own key in case you were registered.
You can also register + here +

') ), tags$div(id = 'addOptions'), @@ -114,7 +139,7 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", )), br(), - shinydashboard::box(collapsible = TRUE, + shinydashboard::box(collapsible = TRUE,width = 8, radioButtons("type_of_score", "Choose method to calculate scoring matrix", choices = c("Tiling Regions" = "tiling", "Annotations" = "annotation")), @@ -131,11 +156,13 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", selected = "5000")), selectInput('chr', 'Select a Chromosome, or leave empty for the whole genome', c(""), multiple = TRUE), # tags$div(id = 'addChromosomes'), - actionButton("show_input_check", "Calculate Score matrix") + fluidRow(column(width = 8, offset = 9, + actionButton("show_input_check", "Calculate Score matrix"), + style = "default")) ), - shinydashboard::box(collapsible = TRUE, + shinydashboard::box(collapsible = TRUE,width = 4, selectInput('aggregation', "Select Aggregation function", c("min", "max", "sum", "mean", "var", "sd", "median", "count", "boolean"), @@ -149,12 +176,14 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", numericInput("variance","row variation cutoff", value = "0.05") ), - - downloadButton("downloadMatrix", "Download Data") + fluidRow(column(width = 4,offset = 6, + downloadButton("downloadMatrix", "Download Data"), + style = "default")) ), bsModal(id ="input_check", title = "Input Check", trigger = "show_input_check", textOutput(outputId = "warning_message"), + # uiOutput(outputID = " warning_message"), actionButton("calculate_matrix", "Continue!") ), @@ -261,9 +290,12 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", fluidPage( selectInput("key_pie", "Select Key to check the value distribution", c("")), plotlyOutput("batch_pie_plot", width = "auto", height = "auto"))), - br(),br(), + # br(),br(), selectInput("corrected_matrices", "Batch Corrected Matrices", choices = NULL), - actionButton("calculate_batch_matrix", "Adjust matrix") + fluidRow(column(width = 6, offset = 6), + actionButton("calculate_batch_matrix", "Adjust matrix"), + style = "default") + # actionButton("calculate_batch_matrix", "Adjust matrix") ), shinydashboard::box(width = 6, conditionalPanel( @@ -292,9 +324,31 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", numericInput("regularization_par", "Regularization parameter for the unwanted variation (nu.coeff)", value = 0.00001), - radioButtons("house_keeping_genes", "Method to estimate House Keeping Genes", - choices = c("Estimate using rank analysis" = "estimate_hkg", - "Use a predefined list" = "use_hkg_list")), + # radioButtons("house_keeping_genes", "Method to estimate House Keeping Genes", + # choices = c("Estimate using rank analysis" = "estimate_hkg", + # "Use a predefined list" = "use_hkg_list")), + #This html is generated from the previous radiobuttons function + #I just wanted to have a href there and I tweaked the raw html and re-introduced it + div( + HTML('
+ +
+
+ +
+
+ +
+
+
') + ), + conditionalPanel( condition = "input.house_keeping_genes == 'estimate_hkg'", @@ -305,13 +359,16 @@ dashboardPage(title = "Batch Effect Analysis and Visualization", numericInput("quantile_prob", "Quantile probability used in the rank analysis estimation [0,1]", - value = 0.005, min = 0, max = 1), - - numericInput("k_rank", "Desired rank for the estimated unwanted variation term", - value = 5) - ) + value = 0.005, min = 0, max = 1) + + ), + + numericInput("k_rank", "Desired rank for the estimated unwanted variation term", + value = 5) ), - downloadButton("downloadAdjustedMatrix", "Download Data") + fluidRow(column(width = 6, offset = 6), + downloadButton("downloadAdjustedMatrix", "Download Data"), + style = "default") ) ), diff --git a/functions/combat_batch_effect.r b/functions/combat_batch_effect.r index 9413a93..5bb7f36 100644 --- a/functions/combat_batch_effect.r +++ b/functions/combat_batch_effect.r @@ -5,14 +5,13 @@ combat_batch_effect <- function(filtered_score_matrix, project, interest_var){ - if(project == "TEPIC reprocessed IHEC data"){ outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ", "R_ENCBS559QNR_ENCBS568FYY_ENCBS945MCY", "R_ENCBS853LFM","E_520VFV")) } # filtered_matrix <- filtered_score_matrix$data - metadata <- attr(filtered_score_matrix, "meta") + metadata <- as.data.frame(attr(filtered_score_matrix, "meta")) validate( need(!anyNA(metadata[,batch]), message = paste(batch, "has NAs and cannot be used in combat")), @@ -23,12 +22,11 @@ combat_batch_effect <- function(filtered_score_matrix, row_sum<-apply(filtered_score_matrix,1,sum) if(!length(which(row_sum == 0)) == 0){ - filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),] + filtered_score_matrix<-filtered_score_matrix[!(row_sum == 0),] } - if(outliers != ""){ - filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] - metadata <- metadata[-match(outliers, metadata[,"experiment"]),] - + if(length(outliers) != 0){ + filtered_score_matrix <- filtered_score_matrix[,!(colnames(filtered_score_matrix) %in% outliers)] + metadata <- metadata[!(colnames(filtered_score_matrix) %in% outliers),] } #validation of the inptus @@ -94,7 +92,6 @@ combat_batch_effect <- function(filtered_score_matrix, ) }) - attr(batch_adjusted_matrix, "meta") <- metadata return(batch_adjusted_matrix) diff --git a/functions/inserted_ui.r b/functions/inserted_ui.r index f71b860..1e8cc40 100644 --- a/functions/inserted_ui.r +++ b/functions/inserted_ui.r @@ -26,8 +26,8 @@ inserted_ui <- function(user_key,user_name,echo, id, genomes) { ui = tags$div( id = id, - - shinydashboard::box(collapsible = TRUE, + + shinydashboard::box(collapsible = TRUE,width = 9, selectInput('genome', "Select Genome", c(genomes$name)), @@ -35,10 +35,10 @@ inserted_ui <- function(user_key,user_name,echo, id, genomes) { selectInput('epigenetic_mark', 'Select a Data Type', c("","Gene Expression",deepblue_list_epigenetic_marks(user_key = user_key)$name), multiple = F), - - actionButton('list_exper_btn', "List Experiments"), - - checkboxInput("filter_coverage", "Filter coverage files", TRUE) + fluidRow(column(width = 9, offset = 9, + checkboxInput("filter_coverage", "Filter coverage files", TRUE), + actionButton('list_exper_btn', "List Experiments"), + style = "default")) ) ) ) diff --git a/functions/list_rna_seq_data.r b/functions/list_rna_seq_data.r index 6605f0b..cc3aebd 100644 --- a/functions/list_rna_seq_data.r +++ b/functions/list_rna_seq_data.r @@ -3,8 +3,6 @@ list_rna_seq_data <- function(project, protein_coding_genes_id <- readLines(file.path("..","gene_ids","protein_coding_HAVANA_genes_ids.txt")) - output_format <- "CHROMOSOME,START,END,@STRAND(gencode v23),@GENE_NAME(gencode v23),@SAMPLE_ID,TPM" - gene_expr_query <- deepblue_select_expressions( expression_type = "gene", gene_model = "gencode v23", diff --git a/functions/pie_chart.r b/functions/pie_chart.r index 392c7f1..085418e 100644 --- a/functions/pie_chart.r +++ b/functions/pie_chart.r @@ -1,10 +1,18 @@ pie_chart <- function(metadata, key){ + metadata <- as.data.frame(metadata) + + if(startsWith(key, "@")){ + validate( + need(FALSE, message = "Please choose another key that doesn't have @ at the beginning!") + ) + } values <- list() - values["unique_values"] <- list(levels(factor(metadata[,key]))) - values["percentages"] <- NA + + values[["unique_values"]] <- levels(factor(metadata[[key]])) + values[["percentages"]] <- NA for(i in 1:length(values[[1]])){ - values[[2]][i] <- length(which(metadata[,key] == values[[1]][i]))/nrow(metadata)*100 + values[[2]][i] <- length(which(metadata[[key]] == values[[1]][i]))/nrow(metadata)*100 } values <- as.data.frame(values) diff --git a/functions/plotly_pca.r b/functions/plotly_pca.r index cceef89..6c269ac 100644 --- a/functions/plotly_pca.r +++ b/functions/plotly_pca.r @@ -70,7 +70,6 @@ plotly_pca <- function(experiments_info_meta, yaxis = list(title = y_lab, zeroline = FALSE), xaxis = list(title = x_lab, zeroline = FALSE)) - browser() return(p) } \ No newline at end of file diff --git a/functions/ruv_batch_effect.r b/functions/ruv_batch_effect.r index 97aa5fa..38f8281 100644 --- a/functions/ruv_batch_effect.r +++ b/functions/ruv_batch_effect.r @@ -1,4 +1,5 @@ ruv_batch_effect <- function(filtered_score_matrix, + experiments, outliers, project, regularization_par = 0.00001, @@ -6,14 +7,13 @@ ruv_batch_effect <- function(filtered_score_matrix, k_rank = 5, estimate_hkg){ - #hkg: House Keeping Genes + #hkg: House Keeping Genes, estimate_hkg = T or F #regularization_par nu.coeff for unwanted variation for the naiveRandRUV() #k_rank Desired rank for the estimated unwanted variation term for naiveRandRUV() #estimate_hkg if TRUE, hkg will be defined by a rank analysis #if FALSE a list of hkg will be used #quantile_prob used as probability for the quantile() to estimate hkg - - metadata <- attr(filtered_score_matrix, "meta") + metadata <- as.data.frame(attr(filtered_score_matrix, "meta")) if(project == "TEPIC reprocessed IHEC data"){ outliers <- unique(c(outliers,"R_ENCBS150HBC_ENCBS376RZJ", @@ -21,44 +21,55 @@ ruv_batch_effect <- function(filtered_score_matrix, "R_ENCBS853LFM","E_520VFV")) } + filtered_score_matrix <- filtered_score_matrix[,!(colnames(filtered_score_matrix) %in% outliers)] + metadata <- metadata[!(colnames(filtered_score_matrix) %in% outliers),] + if(estimate_hkg == "use_hkg_list"){ - housekeepingGenes<-c("ENSG00000204574","ENSG00000075624","ENSG00000023330", - "ENSG00000166710","ENSG00000141367","ENSG00000160211", - "ENSG00000111640","ENSG00000169919","ENSG00000165704", - "ENSG00000134333","ENSG00000102144", "ENSG00000125630", - "ENSG00000181222","ENSG00000108298","ENSG00000089157", - "ENSG00000073578", "ENSG00000112592","ENSG00000196230") + # housekeepingGenes<-c("ENSG00000204574","ENSG00000075624","ENSG00000023330", + # "ENSG00000166710","ENSG00000141367","ENSG00000160211", + # "ENSG00000111640","ENSG00000169919","ENSG00000165704", + # "ENSG00000134333","ENSG00000102144","ENSG00000125630", + # "ENSG00000181222","ENSG00000108298","ENSG00000089157", + # "ENSG00000073578","ENSG00000112592","ENSG00000196230") + + housekeepingGenes <- c("ABCF1", "ACTB", "ALAS1", "B2M","CLTC","G6PD","GAPDH", + "GUSB","HPRT1","LDHA","PGK1","POLR1B","POLR2A", + "RPL19", "RPLP0","SDHA","TBP","TUBB") rowsum<-apply(filtered_score_matrix,1,sum) - if(!length(which(rowsum == 0)) == 0){ - filtered_score_matrix<-filtered_score_matrix[-which(rowsum==0),] - } + filtered_score_matrix <- filtered_score_matrix[!(rowsum == 0),] - if(outliers != ""){ - filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] - metadata <- metadata[-match(outliers, metadata["experiment"]),] - - } + # if(!length(which(rowsum == 0)) == 0){ + # filtered_score_matrix<-filtered_score_matrix[-which(rowsum==0),] + # } + + # if(outliers != ""){ + # filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] + # metadata <- metadata[-match(outliers, metadata["experiment"]),] + # + # } inRUV<-t(as.matrix(log2(filtered_score_matrix+1))) - ruv_adjusted_matrix <- naiveRandRUV(inRUV,match(housekeepingGenes,colnames(inRUV)), + ruv_adjusted_matrix <- naiveRandRUV(inRUV,which(housekeepingGenes %in% colnames(inRUV)), nu.coeff=regularization_par,k=k_rank) #esetimate houskeeping genes - } - else if(estimate_hkg == "estimate_hkg"){ + + }else if(estimate_hkg == "estimate_hkg"){ rowsum<-apply(filtered_score_matrix,1,sum) - if(!length(which(rowsum == 0)) == 0){ - filtered_score_matrix<-filtered_score_matrix[-which(rowsum==0),] - } + filtered_score_matrix <- filtered_score_matrix[!(rowsum == 0),] - if(outliers != ""){ - filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] - metadata <- metadata[-match(outliers, metadata["experiment"]),] - - } + # if(!length(which(rowsum == 0)) == 0){ + # filtered_score_matrix<-filtered_score_matrix[-which(rowsum==0),] + # } + + # if(outliers != ""){ + # filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] + # metadata <- metadata[-match(outliers, metadata["experiment"]),] + # + # } rankedData<-apply(filtered_score_matrix,2,rank) stdRanks<-apply(rankedData,1,sd) @@ -66,14 +77,15 @@ ruv_batch_effect <- function(filtered_score_matrix, meanRanks<-apply(rankedData,1,mean) if(project != "TEPIC reprocessed IHEC data"){ - housekeepingGenes <- match(meanRanks[which(stdRanks <= quantile(stdRanks,quantile_prob))], + housekeepingGenes <- match(meanRanks[stdRanks <= quantile(stdRanks,quantile_prob)], meanRanks) inRUV<-t(as.matrix(log2(filtered_score_matrix+1))) }else{ inRUV<-t(as.matrix(log2(filtered_score_matrix+1))) - housekeepingGenes <-names(meanRanks[which(stdRanks <= quantile(stdRanks,quantile_prob))]) + # housekeepingGenes <- names(meanRanks[which(stdRanks <= quantile(stdRanks,quantile_prob))]) + housekeepingGenes <- names(meanRanks[stdRanks <= quantile(stdRanks, quantile_prob)]) housekeepingGenes <- match(housekeepingGenes, colnames(inRUV)) } @@ -81,6 +93,7 @@ ruv_batch_effect <- function(filtered_score_matrix, nu.coeff=regularization_par , k=k_rank) } + ruv_adjusted_matrix <- t(ruv_adjusted_matrix) attr(ruv_adjusted_matrix, "meta") <- metadata diff --git a/functions/score_matrix_tiling_regions.r b/functions/score_matrix_tiling_regions.r index bc6909c..f723c53 100644 --- a/functions/score_matrix_tiling_regions.r +++ b/functions/score_matrix_tiling_regions.r @@ -9,7 +9,7 @@ score_matrix_tiling_regions <- function(experiments, variation = "0.05", user_key){ - if(chr == ""){chr <- NULL} + # if(chr == ""){chr <- NULL} #Making a list with to call the VALUE column for each of our experiments experiments_columns <- list() diff --git a/functions/supervised_sva_batch_effect.r b/functions/supervised_sva_batch_effect.r index b4deea7..4368b91 100644 --- a/functions/supervised_sva_batch_effect.r +++ b/functions/supervised_sva_batch_effect.r @@ -47,12 +47,11 @@ supervised_sva_batch_effect <- function(filtered_score_matrix, #calculating controls row_sum<-apply(filtered_score_matrix,1,sum) if(!length(which(row_sum == 0)) == 0){ - filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),] + filtered_score_matrix<-filtered_score_matrix[!(row_sum == 0),] } if(outliers != ""){ - filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] - metadata <- metadata[-match(outliers, metadata[,"experiment"]),] - + filtered_score_matrix <- filtered_score_matrix[,!(colnames(filtered_score_matrix) %in% outliers)] + metadata <- metadata[!(colnames(filtered_score_matrix) %in% outliers),] } if(is.null(adjustment_var)){ diff --git a/functions/sva_batch_effect.r b/functions/sva_batch_effect.r index 8f29b5a..bb06086 100644 --- a/functions/sva_batch_effect.r +++ b/functions/sva_batch_effect.r @@ -48,12 +48,12 @@ sva_batch_effect <- function(filtered_score_matrix, row_sum<-apply(filtered_score_matrix,1,sum) if(!length(which(row_sum == 0)) == 0){ - filtered_score_matrix<-filtered_score_matrix[-which(row_sum==0),] + filtered_score_matrix<-filtered_score_matrix[!(row_sum == 0),] } - if(length(outliers) != 0){ - filtered_score_matrix <- filtered_score_matrix[,-match(outliers,colnames(filtered_score_matrix))] - metadata <- metadata[-match(outliers, metadata[,"experiment"]),] + if(!is.null(outliers)){ + filtered_score_matrix <- filtered_score_matrix[,!(colnames(filtered_score_matrix) %in% outliers)] + metadata <- metadata[!(colnames(filtered_score_matrix) %in% outliers),] } diff --git a/functions/tab_panel_header.r b/functions/tab_panel_header.r new file mode 100644 index 0000000..69861f7 --- /dev/null +++ b/functions/tab_panel_header.r @@ -0,0 +1,22 @@ +tab_panel_header <- function(tab_panel){ + + if(tab_panel == "list_experiments_tab"){ + return("List Experiments") + }else if(tab_panel == "score_matrix_tab"){ + return("Score Matrix") + }else if(tab_panel == "rna_seq_matrix"){ + return("RNA seq Matrix") + }else if(tab_panel == "score_matrix_dnase1_tab"){ + return("Score Matrix DNase1") + }else if(tab_panel == "plot_matrix_tab"){ + return("Plot Matrix") + }else if(tab_panel == "corr_plot_tab"){ + return("Correlation Plot") + }else if(tab_panel == "batch_effect_tab"){ + return("Batch Effects") + }else if(tab_panel == "batch_effect_plot_tab"){ + return("Batch Effect Plot") + }else{ + return(NULL) + } +} \ No newline at end of file diff --git a/functions/tepic_sample_id_mapping.r b/functions/tepic_sample_id_mapping.r new file mode 100644 index 0000000..cd51529 --- /dev/null +++ b/functions/tepic_sample_id_mapping.r @@ -0,0 +1,32 @@ +#mapping sample names to experiments names reprocessed data +tepic_sample_id_mapping <- function(sample_cols, + user_key){ + + experiments <- deepblue_list_experiments(project = "TEPIC reprocessed IHEC data", + user_key = user_key) + + metadata_exp <- deepblue_info(experiments$id, user_key = user_key) + + sample_name <- list() + + for (i in 1:length(metadata_exp)){ + sample_name[["sample"]][i] <- metadata_exp[[i]]$sample_id + sample_name[["name"]][i] <- strsplit(metadata_exp[[i]]$name, + split = "[.]")[[1]][1] + } + + all_samples <- list() + + for (i in 1:length(sample_cols)){ + if(sample_cols[i] %in% sample_name$sample){ + all_samples[["id"]][i] <- sample_cols[i] + all_samples[["name"]][i] <- sample_name$name[match(sample_cols[i], + sample_name$sample)[1]] + + }else{ + all_samples[["id"]][i] <- sample_cols[i] + all_samples[["name"]][i] <- NA + } + } + return(as.data.frame(all_samples)) +} \ No newline at end of file diff --git a/help/batch_effect_tab.html b/help/batch_effect_tab.html new file mode 100644 index 0000000..c78970c --- /dev/null +++ b/help/batch_effect_tab.html @@ -0,0 +1,53 @@ + + +

Batch Effect

+
+

This is the most important panel in this app, which allows the user to run several batch analysis functions and models on the matrix retrieved from before.

+

Left Panel:

+ +

Let's discuss a bit about the different batch effect analysis methods that can be used here.

+
    +
  1. ComBat: Is a method for adjusting for known batches and uses an empirical Bayesian framework [1]. The basic idea is that the user needs to choose one of the features from the metadata as a batch (e.g. BATCH, BIOMATERIAL_PROVIDER...) anything that might cause non-biological variety in the data, ComBat then needs a model matrix with (optionally) the adjustment variable and variables of interest.
    +
        batch_adjusted_matrix <- ComBat(dat=data_matrix,
    +                                    batch=as.factor(metadata[,batch]),
    +                                    mod=model_matrix,
    +                                    par.prior=TRUE, prior.plots=FALSE)
    +
  2. +
  3. SVA: This method has two steps. First, it calculates the number of latent variables that need to be estimated, then it estimates the surrogate variables. This method needs two model matrices, the null model that has only the adjustment variable (optional) otherwise has only the intercept, the full model containing both adjustment and interest variables.
    the SVA function will return a list with 4 components, we use the first component which is a matrix with columns as the estimated surrogate variables and can be used -with some math magic- to remove the batch effect from the raw matrix. +
    mod <- model.matrix(~as.factor(variable_of_interest), data=pheno)
    +mod0 <- model.matrix(~1,data=pheno)
    +n.sv <- num.sv(edata,mod,method="leek")
    +svobj < sva(edata,mod,mod0,n.sv=n.sv)
    +
    +
  4. +
  5. Supervised SVA: In this method, SVA uses a control probe to estimate the surrogate variables, the controls here are a vector of probabilities (between 0 and 1) indicating that a gene is a control (a value of 1 means the gene is certainly a control and a value of 0 means the gene is certainly not a control), these are calculated mathematically from the matrix: +
    ranked_data <- apply(data_matrix, 2,rank)
    +std_ranks <- apply(ranked_data,1,sd)
    +ranks_of_std <- rank(std_ranks)
    +control_probe <- 1-(rankes_of_std / max(rank_of_std))
    +Just live SVA, this method needs a null model matrix and a full model matrix along with the control probe, and it will return similar list with 4 components and we use the first component to remove the batch effect from the raw matrix.
  6. +
  7. RUV: This method is used to normalize expression data and is meant to remove unwanted variation from the data when the interest variable is not known. Once the user chooses this method, option related to RUV appear on the right panel: +
      +
    • nu.coeff: which is the regularization parameter of the unwanted variation, and should be inversely proportional to the (expected) magnitude of the unwanted variation.
    • +
    • Desired rank (K): quoting from the RUV manual "the rank k can be thought of as the number of independent sources of unwanted variation in the data (i.e., if one source is a linear combination of other sources, it does not increase the rank)".
    • +
    • House Keeping Genes: The predefined list can only be used with gene expression data, as DeepBlue server gives back the matrix with gene names in columns that can be matched to the predefined list and given to the RUV function. But can be estimated mathematically by a rank analysis: +
      ranked_data <- apply(data_matrix, 2,rank)
      +std_ranks <- apply(ranked_data,1,sd)
      +mean_ranks <- apply(ranked_data,1,mean)
      +house_keeping_genes <- names(mean_ranks[std_ranks <= quantile(std_ranks,quantile_prob)])
      +
    • +
    +More references on the RUV package can be found on the original paper and the package manual.
  8. +
+Each batch corrected matrix can be downloaded along with its metadata with the download button that appears on the right panel after running one of the methods successfully, the selected matrix from the "Batch Corrected Matrices" list is the one that will be downloaded and can be read the same way described in "Score Matrix" tab help page.
\ No newline at end of file diff --git a/help/corr_plot_tab.html b/help/corr_plot_tab.html new file mode 100644 index 0000000..24da1f8 --- /dev/null +++ b/help/corr_plot_tab.html @@ -0,0 +1,21 @@ + + +

Correlation Plot

+
+

User can make a correlation plot of the data matrix using + +corrplot + function from the corrplot CRAN package in R. Depending on how many experiments chosen, and how long the experiment names are, +the plots might not show up, but the "Download Plot" tab should be visible for the user to download the plot. + The Width and Height are in inches and used as parameters for the + +pdf() +function to download the plot as a PDF.

+
\ No newline at end of file diff --git a/help/example.jpg b/help/example.jpg deleted file mode 100644 index bccb7b4..0000000 Binary files a/help/example.jpg and /dev/null differ diff --git a/help/list_experiments_tab.html b/help/list_experiments_tab.html new file mode 100644 index 0000000..62fec15 --- /dev/null +++ b/help/list_experiments_tab.html @@ -0,0 +1,27 @@ + + +

Listing Experiments

+
+

First, users need to log in from the top right corner

+ +

After logging in, users can choose a genome from the dropdown list, the projects dropdown list reacts to the genome the users chose, then only shows you projects that have data related to the chosen genome. The "Data Type" dropdown list has "gene expression" and epigenetic marks, and users can choose which kind of data type they would like to list experiments for. The "Filter Coverage files" checkbox is for the DEEP project, as some of the experiments retrieved are the coverage files which are used as control and users can filter them out.

+

Once the user lists the experiments, on the right bottom corner, notifications appear telling the user which step of the listing process is being performed, this step might take a while depending on the number of experiments that need to be retrieved.

+

The output is a table with IDs and Names of experiments, the IDs can be clicked on to see the metadata. Both the experiments table and metadata table are searchable using case-sensitive strings or regular expression, e.g. 51_Hf0[34] will show the user experiments starting with 51_Hf03 and 51_Hf04.

+

Some of the functions used in this panel:

+
    +
  1. list_experiments
  2. +
  3. list_genomes
  4. +
  5. list_projects
  6. +
  7. info
  8. +
+
\ No newline at end of file diff --git a/help/plot_matrix_tab.html b/help/plot_matrix_tab.html new file mode 100644 index 0000000..d07ca99 --- /dev/null +++ b/help/plot_matrix_tab.html @@ -0,0 +1,27 @@ + + + +

Plot Matrix

+
+

User can Plot the raw data matrix generated from the previous tab, this is a simple PCA plot.

+ +
\ No newline at end of file diff --git a/help/score_matrix_tab.html b/help/score_matrix_tab.html new file mode 100644 index 0000000..1c7dba2 --- /dev/null +++ b/help/score_matrix_tab.html @@ -0,0 +1,29 @@ + + +

Score Matrix

+
+

After selecting the experiments from the previous tab, the user can build a score matrix using one of two methods from the left box:

+
    +
  1. Tiling Regions: User can then select the size of the tiling regions, whether 1 kb or 5 kb.
  2. +
  3. List of annotation: User can then select an annotation related to the genome selected from the annotations list that appears if "Annotations" was chosen.
  4. +
+

The user can select one or multiple chromosomes which are parsed to the function to calculate the matrix. If the user is trying to test some functionalities of the app, we advise choosing one chromosome, because the time for calculating the whole genome is proportional to the number of regions to be calculated and the size of chromosome or genome.

+

On the right panel, there are two advanced options:

+
    +
  1. Aggregation function: which is the method used to aggregate the score for a region on the chromosome, this is needed for the deepblue_score_matrix function that will build the matrix on the DeepBlue server.
  2. +
  3. Filtering method: Once the matrix is retrieved, the user can filter either by removing all non-complete rows (i.e. rows that have NA for one or more of the experiments), or removing rows with user-defined cutoff, the cutoff is calculated as +
    (Number of NAsrow ∕ Number of columns) < variation cutoff
    +and only those rows are kept. Example: +
    number of columns is 50, Number of NAsrow15 is 2, and cutoff is 0.5
    2/50 = 0.4 < 0.5
    and this row is not filtered-out
    +
  4. +
+

After the matrix been calculated and downloaded from the server, a "Download Data" button will appear on the right panel, and the user can download the raw matrix and metadata as .tsv files, and the user can then use them for any further analysis with other tools. Both files can be read with R easily using the following code:

+
metadata <- read.table("filepath/matrix_metadata.tsv", header = T, stringsAsFactors = F)
+data_matrix <- as.matrix(read.table("filepath/data_matrix.tsv", header = T))
+
\ No newline at end of file diff --git a/help/template.md b/help/template.md new file mode 100644 index 0000000..e8a43fc --- /dev/null +++ b/help/template.md @@ -0,0 +1,157 @@ +Score Matrix Tab Help Page +============ + +Paragraphs are separated by a blank line. + +2nd paragraph. *Italic*, **bold**, and `monospace`. Itemized lists +look like: + + * this one + * that one + * the other one + +Note that --- not considering the asterisk --- the actual text +content starts at 4-columns in. + +> Block quotes are +> written like so. +> +> They can span multiple paragraphs, +> if you like. + +Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., "it's all +in chapters 12--14"). Three dots ... will be converted to an ellipsis. +Unicode is supported. ☺ + + + +An h2 header +------------ + +Here's a numbered list: + + 1. first item + 2. second item + 3. third item + +Note again how the actual text starts at 4 columns in (4 characters +from the left side). Here's a code sample: + + # Let me re-iterate ... + for i in 1 .. 10 { do-something(i) } + +As you probably guessed, indented 4 spaces. By the way, instead of +indenting the block, you can use delimited blocks, if you like: + +~~~ +define foobar() { + print "Welcome to flavor country!"; +} +~~~ + +(which makes copying & pasting easier). You can optionally mark the +delimited block for Pandoc to syntax highlight it: + +~~~python +import time +# Quick, count to ten! +for i in range(10): + # (but not *too* quick) + time.sleep(0.5) + print i +~~~ + + + +### An h3 header ### + +Now a nested list: + + 1. First, get these ingredients: + + * carrots + * celery + * lentils + + 2. Boil some water. + + 3. Dump everything in the pot and follow + this algorithm: + + find wooden spoon + uncover pot + stir + cover pot + balance wooden spoon precariously on pot handle + wait 10 minutes + goto first step (or shut off burner when done) + + Do not bump wooden spoon or it will fall. + +Notice again how text always lines up on 4-space indents (including +that last line which continues item 3 above). + +Here's a link to [a website](http://foo.bar), to a [local +doc](local-doc.html), and to a [section heading in the current +doc](#an-h2-header). Here's a footnote [^1]. + +[^1]: Footnote text goes here. + +Tables can look like this: + +size material color +---- ------------ ------------ +9 leather brown +10 hemp canvas natural +11 glass transparent + +Table: Shoes, their sizes, and what they're made of + +(The above is the caption for the table.) Pandoc also supports +multi-line tables: + +-------- ----------------------- +keyword text +-------- ----------------------- +red Sunsets, apples, and + other red or reddish + things. + +green Leaves, grass, frogs + and other things it's + not easy being. +-------- ----------------------- + +A horizontal rule follows. + +*** + +Here's a definition list: + +apples + : Good for making applesauce. +oranges + : Citrus! +tomatoes + : There's no "e" in tomatoe. + +Again, text is indented 4 spaces. (Put a blank line between each +term/definition pair to spread things out more.) + +Here's a "line block": + +| Line one +| Line too +| Line tree + +and images can be specified like so: + +![example image](example.jpg "An exemplary image") + +Inline math equations go in like so: $\omega = d\phi / dt$. Display +math should get its own line and be put in in double-dollarsigns: + +$$I = \int \rho R^{2} dV$$ + +And note that you can backslash-escape any punctuation characters +which you wish to be displayed literally, ex.: \`foo\`, \*bar\*, etc. \ No newline at end of file diff --git a/server/calculate_score_matrix.r b/server/calculate_score_matrix.r index 1607bd6..c41853f 100644 --- a/server/calculate_score_matrix.r +++ b/server/calculate_score_matrix.r @@ -5,6 +5,7 @@ observeEvent(input$show_input_check,{ which means the whole genome is selected and the calculation might take sometime. Are you sure?") + # output$warning_message <- includeHTML(tags$html("

lskdjfkdjf

")) }else{ output$warning_message <- renderText("Just making sure you have the right input!") } @@ -20,6 +21,11 @@ filtered_score_matrix <- eventReactive(input$calculate_matrix, { need(!is.null(all_experiments()), "There were no experiments found. Please, try again") ) + + validate( + need(input$variance <= 1 & input$variance >= 0, + "The row variation has to be between 0 and 1") + ) # if(input$project == "TEPIC reprocessed IHEC data"){ # filtered_score_matrix <- dnase1_score_matrix(dnase1_experiments = all_experiments()[[1]][input$table_rows_all], # experiments_info_meta = experiments_info_meta(), @@ -95,15 +101,15 @@ observeEvent(filtered_score_matrix(),{ observeEvent(input$score_matrix_previous_tab,{ updateTabItems(session, "tabs", "list_experiments_tab") - header <- paste("List Experiments") - shinyjs::html("pageHeader", header) + # header <- paste("List Experiments") + # shinyjs::html("pageHeader", header) }) observeEvent(input$score_matrix_next_tab,{ updateTabItems(session, "tabs", "plot_matrix_tab") - header <- paste("Plotting PCA") - shinyjs::html("pageHeader", header) + # header <- paste("Plotting PCA") + # shinyjs::html("pageHeader", header) }) diff --git a/server/correlation_plot.r b/server/correlation_plot.r index bc89241..ca96ae3 100644 --- a/server/correlation_plot.r +++ b/server/correlation_plot.r @@ -43,14 +43,14 @@ output$corr_plot_down <- downloadHandler( observeEvent(input$corr_plot_previous_tab,{ updateTabItems(session, "tabs", "plot_matrix_tab") - header <- paste("Plotting PCA") - shinyjs::html("pageHeader", header) + # header <- paste("Plotting PCA") + # shinyjs::html("pageHeader", header) }) observeEvent(input$corr_plot_next_tab,{ updateTabItems(session, "tabs", "batch_effect_tab") - header <- paste("Calculating Batch Effect") - shinyjs::html("pageHeader", header) + # header <- paste("Calculating Batch Effect") + # shinyjs::html("pageHeader", header) }) diff --git a/server/listing_experiments.r b/server/listing_experiments.r index 8eddfae..6afb421 100644 --- a/server/listing_experiments.r +++ b/server/listing_experiments.r @@ -2,24 +2,56 @@ all_experiments <- eventReactive(input$list_exper_btn, { #getting experments - withProgress(message = "Fetching experiments",min = 0, - max = 3, value = 0, - { - if(input$epigenetic_mark != "Gene Expression"){ - all_experiments <- list_experiments(genome = input$genome, - epigenetic_mark = input$epigenetic_mark, - project = input$project, - user_key = user_key, - filter_coverage = input$filter_coverage) - }else{ - all_experiments <- list_rna_seq_data(project = input$project, - user_key = user_key) - } + isolate({ + withProgress(message = "Fetching experiments",min = 0, + max = 3, value = 0, + { + all_experiments <- c() + if(input$epigenetic_mark != "Gene Expression"){ + all_experiments <- list_experiments(genome = input$genome, + epigenetic_mark = input$epigenetic_mark, + project = input$project, + user_key = user_key, + filter_coverage = input$filter_coverage) + }else{ + all_experiments <- list_rna_seq_data(project = input$project, + user_key = user_key) + } + + }) - }) + #rendering table + output$table <- DT::renderDataTable({ + + #all_experiments reutrns "\n" if no experiments were listed + validate( + need(!is.null(all_experiments), + message = "We couldn't find experiments according to the inputs you chose") + ) + + if(input$epigenetic_mark == "Gene Expression"){ + experiments_list <- all_experiments()[[1]] + experiments_list <- as.data.frame(experiments_list) + experiments_list$id <- sapply(experiments_list$id, convert_to_link) + }else{ + #converting experiments ids to interactive elements to show + #metadata in a popup window + experiments_list <- all_experiments()[[1]] + experiments_list$id <- sapply(experiments_list$id, convert_to_link) + } + + #all_experiments()[[1]] + DT::datatable(experiments_list, filter = list(position = 'top', clear = FALSE), + escape = FALSE,selection = 'none', + options = list( + search = list(regex = TRUE, caseInsensitive = FALSE), + pageLength = 10) + ) + }) + + return(all_experiments) + }) - - return(all_experiments) }) #Adding the next buttong if the experiments were successfully listed @@ -40,34 +72,6 @@ observeEvent(all_experiments(),{ } }) -#rendering table -output$table <- DT::renderDataTable({ - #all_experiments reutrns "\n" if no experiments were listed - validate( - need(!is.null(all_experiments()), - message = "We couldn't find experiments according to the inputs you chose") - ) - - if(input$epigenetic_mark == "Gene Expression"){ - experiments_list <- all_experiments()[[1]] - experiments_list <- as.data.frame(experiments_list) - experiments_list$id <- sapply(experiments_list$id, convert_to_link) - }else{ - #converting experiments ids to interactive elements to show - #metadata in a popup window - experiments_list <- all_experiments()[[1]] - experiments_list$id <- sapply(experiments_list$id, convert_to_link) - } - - #all_experiments()[[1]] - DT::datatable(experiments_list, filter = list(position = 'top', clear = FALSE), - escape = FALSE,selection = 'none', - options = list( - search = list(regex = TRUE, caseInsensitive = FALSE), - pageLength = 10) - ) -}) - #Controlling the next button observeEvent(input$list_experiments_next_tab,{ # if((input$project == "TEPIC reprocessed IHEC data") & @@ -82,8 +86,8 @@ observeEvent(input$list_experiments_next_tab,{ # }else{ updateTabItems(session, "tabs", "score_matrix_tab") - header <- paste("Calculate Score Matrix") - shinyjs::html("pageHeader", header) + # header <- paste("Calculate Score Matrix") + # shinyjs::html("pageHeader", header) # } }) diff --git a/server/logging_in.r b/server/logging_in.r index 4a12577..a296939 100644 --- a/server/logging_in.r +++ b/server/logging_in.r @@ -54,8 +54,8 @@ observeEvent(input$log_in_btn, { updateTabItems(session, "tabs", "list_experiments_tab") - header <- paste("Listing Experiments") - shinyjs::html("pageHeader", header) + # header <- paste("Listing Experiments") + # shinyjs::html("pageHeader", header) #Used to check if the experiments and matrix has been listed in order to add the nav buttons