diff --git a/DESCRIPTION b/DESCRIPTION index b4aa4e6..76a72bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: wilson Type: Package Title: Web-Based Interactive Omics Visualization -Version: 2.1.0 +Version: 2.1.1 Authors@R: c( person("Hendrik", "Schultheis", email = "hendrik.schultheis@mpi-bn.mpg.de", role = c("aut", "cre")), person("Jens", "Preussner", email = "jens.preussner@mpi-bn.mpg.de", role = "aut"), diff --git a/NEWS.md b/NEWS.md index b73fa15..4b8d2f4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# wilson 2.1.1 +- fixed multiple unique_id bug in tobias_parser # wilson 2.1.0 - implemented tobias_parser # wilson 2.0.3 diff --git a/R/clarion.R b/R/clarion.R index 15e3e1a..3c6e1e3 100644 --- a/R/clarion.R +++ b/R/clarion.R @@ -56,7 +56,7 @@ Clarion <- R6::R6Class("Clarion", # return unique_id # if no type return first feature if (is.element("type", names(self$metadata))) { - return(self$metadata[type == "unique_id"][["key"]]) + return(self$metadata[type == "unique_id"][["key"]][1]) } else { return(self$metadata[level == "feature"][["key"]][1]) } @@ -201,6 +201,10 @@ Clarion <- R6::R6Class("Clarion", if (!is.element("unique_id", self$metadata[["type"]])) { stop("Metadata: No unique_id defined in type! Please define a unique_id.") } + # case: multiple unique_ids + if (sum(is.element(self$metadata[["type"]], "unique_id")) > 1) { + warning("Metadata: Found multiple unique_ids! Only first will be used.") + } # case: type = array but no delimiter if (is.element("array", self$metadata[["type"]]) && !is.element("delimiter", names(self$header))) { stop("Found type=array but no delimiter! Columns with multi-value fields require delimiter (in header) and type=array (in metadata).") diff --git a/R/parser.R b/R/parser.R index 9e38855..c2ca756 100644 --- a/R/parser.R +++ b/R/parser.R @@ -393,14 +393,14 @@ parser <- function(file, dec = ".") { #' @param condition_pattern Used to identify condition names by matching und removing given pattern with \code{\link[base]{grep}}. Ignored when condition_names is set. #' @param in_field_delimiter Delimiter for multi value fields. Default = ','. #' @param dec Decimal separator. Used in file reading and writing. -#' @param unique_id Whether the table contains an unique id column. If FALSE (default) will create one at first position. #' @param ... Used as header information. #' #' @details During conversion the parser will try to use the given config (if provided) to create the \href{https://github.molgen.mpg.de/loosolab/wilson-apps/wiki/CLARION-Format}{Clarion} metadata. In the case of insufficient config information it will try to approximate by referencing condition names issuing warnings in the process. +#' @details As the format requires an unqiue id the parser will create one if necessary. #' @details Factor grouping (metadata factor columns) is currently not implemented! #' #' @export -tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern = NULL, config = system.file("extdata", "tobias_config.json", package = "wilson"), omit_NA = FALSE, condition_names = NULL, condition_pattern = "_bound$", in_field_delimiter = ",", dec = ".", unique_id = FALSE, ...) { +tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern = NULL, config = system.file("extdata", "tobias_config.json", package = "wilson"), omit_NA = FALSE, condition_names = NULL, condition_pattern = "_bound$", in_field_delimiter = ",", dec = ".", ...) { ## filter data columns # check if filter columns is a file or a vector if (!is.null(filter_columns) && file.exists(filter_columns)) { @@ -432,14 +432,6 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern = data <- stats::na.omit(data) } - # create id column - if (!unique_id) { - data[, "id" := seq_len(nrow(data))] - # move id column to first position - new_order <- c("id", names(data)[ names(data) != "id"]) - data <- data[, new_order, with = FALSE] - } - ##### metadata metadata <- data.table::data.table(names(data)) @@ -558,7 +550,22 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern = # set unique_id fallback if (!any(metadata[["type"]] == "unique_id")) { - metadata[key == unique_id_fallback, "type"] <- "unique_id" + if (!is.null(unique_id_fallback)) { + metadata[key == unique_id_fallback, "type"] <- "unique_id" + } else { + # setup unique_id column if there is neither a defined column nor a fallback + + # create id column + data[, "id" := seq_len(nrow(data))] + # move id column to first position + new_order <- c("id", names(data)[ names(data) != "id"]) + data <- data[, new_order, with = FALSE] + + id_row <- data.table::data.table("id", level = "feature", type = "unique_id", label = "id", sub_label = "") + names(id_row)[1] <- "key" + # add meta entry + metadata <- rbind(id_row, metadata) + } } ##### header diff --git a/inst/extdata/tobias_config.json b/inst/extdata/tobias_config.json index f8e050a..9a2c5dc 100644 --- a/inst/extdata/tobias_config.json +++ b/inst/extdata/tobias_config.json @@ -1,12 +1,5 @@ { "meta": [ - { - "col_name": "id", - "level": "feature", - "type": "unique_id", - "label": "id", - "sublabel": "" - }, { "col_name": "TFBS_chr", "level": "feature", @@ -140,146 +133,6 @@ "label": "name", "sublabel": "gene" }, - { - "col_name": "2C_score", - "level": "condition", - "type": "score", - "label": "2C", - "sublabel": "score" - }, - { - "col_name": "8C_score", - "level": "condition", - "type": "score", - "label": "8C", - "sublabel": "score" - }, - { - "col_name": "ICM_score", - "level": "condition", - "type": "score", - "label": "ICM", - "sublabel": "score" - }, - { - "col_name": "naive_hESC_score", - "level": "condition", - "type": "score", - "label": "naive hESC", - "sublabel": "score" - }, - { - "col_name": "hESC_score", - "level": "condition", - "type": "score", - "label": "hESC", - "sublabel": "score" - }, - { - "col_name": "2C_bound", - "level": "condition", - "type": "score", - "label": "2C", - "sublabel": "bound" - }, - { - "col_name": "8C_bound", - "level": "condition", - "type": "score", - "label": "8C", - "sublabel": "bound" - }, - { - "col_name": "ICM_bound", - "level": "condition", - "type": "score", - "label": "ICM", - "sublabel": "bound" - }, - { - "col_name": "naive_hESC_bound", - "level": "condition", - "type": "score", - "label": "naive hESC", - "sublabel": "bound" - }, - { - "col_name": "hESC_bound", - "level": "condition", - "type": "score", - "label": "hESC", - "sublabel": "bound" - }, - { - "col_name": "2C_8C_log2fc", - "level": "contrast", - "type": "ratio", - "label": "2C|8C", - "sublabel": "log2fc" - }, - { - "col_name": "2C_ICM_log2fc", - "level": "contrast", - "type": "ratio", - "label": "2C|ICM", - "sublabel": "log2fc" - }, - { - "col_name": "2C_naive_hESC_log2fc", - "level": "contrast", - "type": "ratio", - "label": "2C|naive hESC", - "sublabel": "log2fc" - }, - { - "col_name": "2C_hESC_log2fc", - "level": "contrast", - "type": "ratio", - "label": "2C|hESC", - "sublabel": "log2fc" - }, - { - "col_name": "8C_ICM_log2fc", - "level": "contrast", - "type": "ratio", - "label": "8C|ICM", - "sublabel": "log2fc" - }, - { - "col_name": "8C_naive_hESC_log2fc", - "level": "contrast", - "type": "ratio", - "label": "8C|naive hESC", - "sublabel": "log2fc" - }, - { - "col_name": "8C_hESC_log2fc", - "level": "contrast", - "type": "ratio", - "label": "8C|hESC", - "sublabel": "log2fc" - }, - { - "col_name": "ICM_naive_hESC_log2fc", - "level": "contrast", - "type": "ratio", - "label": "ICM|naive hESC", - "sublabel": "log2fc" - }, - { - "col_name": "ICM_hESC_log2fc", - "level": "contrast", - "type": "ratio", - "label": "ICM|hESC", - "sublabel": "log2fc" - }, - { - "col_name": "naive_hESC_hESC_log2fc", - "level": "contrast", - "type": "ratio", - "label": "naive hESC|hESC", - "sublabel": "log2fc" - }, // overview columns { "col_name": "TF_name", @@ -301,216 +154,6 @@ "type": "category", "label": "total TFBS", "sublabel": "" - }, - { - "col_name": "2C_mean_score", - "level": "condition", - "type": "score", - "label": "2C", - "sublabel": "mean" - }, - { - "col_name": "2C_bound", - "level": "condition", - "type": "score", - "label": "2C", - "sublabel": "bound" - }, - { - "col_name": "8C_mean_score", - "level": "condition", - "type": "score", - "label": "8C", - "sublabel": "mean" - }, - { - "col_name": "8C_bound", - "level": "condition", - "type": "score", - "label": "8C", - "sublabel": "bound" - }, - { - "col_name": "ICM_mean_score", - "level": "condition", - "type": "score", - "label": "ICM", - "sublabel": "mean" - }, - { - "col_name": "ICM_bound", - "level": "condition", - "type": "score", - "label": "ICM", - "sublabel": "bound" - }, - { - "col_name": "naive_hESC_mean_score", - "level": "condition", - "type": "score", - "label": "naive_hESC", - "sublabel": "mean" - }, - { - "col_name": "naive_hESC_bound", - "level": "condition", - "type": "score", - "label": "naive_hESC", - "sublabel": "bound" - }, - { - "col_name": "hESC_mean_score", - "level": "condition", - "type": "score", - "label": "hESC", - "sublabel": "mean" - }, - { - "col_name": "hESC_bound", - "level": "condition", - "type": "score", - "label": "hESC", - "sublabel": "bound" - }, - { - "col_name": "2C_8C_change", - "level": "contrast", - "type": "ratio", - "label": "2C|8C", - "sublabel": "change" - }, - { - "col_name": "2C_8C_pvalue", - "level": "contrast", - "type": "probability", - "label": "2C|8C", - "sublabel": "p-value" - }, - { - "col_name": "2C_ICM_change", - "level": "contrast", - "type": "ratio", - "label": "2C|ICM", - "sublabel": "change" - }, - { - "col_name": "2C_ICM_pvalue", - "level": "contrast", - "type": "probability", - "label": "2C|ICM", - "sublabel": "p-value" - }, - { - "col_name": "2C_naive_hESC_change", - "level": "contrast", - "type": "ratio", - "label": "2C|naive hESC", - "sublabel": "change" - }, - { - "col_name": "2C_naive_hESC_pvalue", - "level": "contrast", - "type": "probability", - "label": "2C|naive hESC", - "sublabel": "p-value" - }, - { - "col_name": "2C_hESC_change", - "level": "contrast", - "type": "ratio", - "label": "2C|hESC", - "sublabel": "change" - }, - { - "col_name": "2C_hESC_pvalue", - "level": "contrast", - "type": "probability", - "label": "2C|hESC", - "sublabel": "p-value" - }, - { - "col_name": "8C_ICM_change", - "level": "contrast", - "type": "ratio", - "label": "8C|ICM", - "sublabel": "change" - }, - { - "col_name": "8C_ICM_pvalue", - "level": "contrast", - "type": "probability", - "label": "8C|ICM", - "sublabel": "p-value" - }, - { - "col_name": "8C_naive_hESC_change", - "level": "contrast", - "type": "ratio", - "label": "8C|naive hESC", - "sublabel": "change" - }, - { - "col_name": "8C_naive_hESC_pvalue", - "level": "contrast", - "type": "probability", - "label": "8C|naive hESC", - "sublabel": "p-value" - }, - { - "col_name": "8C_hESC_change", - "level": "contrast", - "type": "ratio", - "label": "8C|hESC", - "sublabel": "change" - }, - { - "col_name": "8C_hESC_pvalue", - "level": "contrast", - "type": "probability", - "label": "8C|hESC", - "sublabel": "p-value" - }, - { - "col_name": "ICM_naive_hESC_change", - "level": "contrast", - "type": "ratio", - "label": "ICM|naive hESC", - "sublabel": "change" - }, - { - "col_name": "ICM_naive_hESC_pvalue", - "level": "contrast", - "type": "probability", - "label": "ICM|naive hESC", - "sublabel": "p-value" - }, - { - "col_name": "ICM_hESC_change", - "level": "contrast", - "type": "ratio", - "label": "ICM|hESC", - "sublabel": "change" - }, - { - "col_name": "ICM_hESC_pvalue", - "level": "contrast", - "type": "probability", - "label": "ICM|hESC", - "sublabel": "p-value" - }, - { - "col_name": "naive_hESC_hESC_change", - "level": "contrast", - "type": "ratio", - "label": "naive hESC|hESC", - "sublabel": "p-value" - }, - { - "col_name": "naive_hESC_hESC_pvalue", - "level": "contrast", - "type": "probability", - "label": "naive hESC|hESC", - "sublabel": "change" } ] } \ No newline at end of file diff --git a/man/tobias_parser.Rd b/man/tobias_parser.Rd index c7cafdc..66ce2d6 100644 --- a/man/tobias_parser.Rd +++ b/man/tobias_parser.Rd @@ -8,7 +8,7 @@ tobias_parser(input, output, filter_columns = NULL, filter_pattern = NULL, config = system.file("extdata", "tobias_config.json", package = "wilson"), omit_NA = FALSE, condition_names = NULL, condition_pattern = "_bound$", - in_field_delimiter = ",", dec = ".", unique_id = FALSE, ...) + in_field_delimiter = ",", dec = ".", ...) } \arguments{ \item{input}{Path to input table} @@ -31,8 +31,6 @@ tobias_parser(input, output, filter_columns = NULL, \item{dec}{Decimal separator. Used in file reading and writing.} -\item{unique_id}{Whether the table contains an unique id column. If FALSE (default) will create one at first position.} - \item{...}{Used as header information.} } \description{ @@ -41,5 +39,7 @@ Click \href{https://github.molgen.mpg.de/loosolab/TOBIAS}{here} for more informa \details{ During conversion the parser will try to use the given config (if provided) to create the \href{https://github.molgen.mpg.de/loosolab/wilson-apps/wiki/CLARION-Format}{Clarion} metadata. In the case of insufficient config information it will try to approximate by referencing condition names issuing warnings in the process. +As the format requires an unqiue id the parser will create one if necessary. + Factor grouping (metadata factor columns) is currently not implemented! }