Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
b1f94385b0
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
260 lines (254 sloc) 14.8 KB
#' Clarion R6-class definition
#'
#' Use this to create a clarion object.
#' This object is used by all top-level wilson modules.
#'
#' @section Methods:
#' \describe{
#' \item{\code{get_id()}}{
#' Returns name of unique identifier column. Assumes first feature to be unique if not specified.
#' }
#' \item{\code{get_name()}}{
#' Returns name of name column. If not specified return unique Id.
#' }
#' \item{\code{get_delimiter()}}{
#' Return delimiter used within multi-value fields (no delimiter = NULL).
#' }
#' \item{\code{is_delimited(x)}}{
#' Logical whether the given column name is delimited.
#' }
#' \item{\code{get_factors()}}{
#' Returns a data.table columns: key and factor(s) if any. Named factors (e.g. factor1="name") will be cropped to their name.
#' }
#' \item{\code{validate(solve = TRUE)}}{
#' Check the object for inconsistencies. For solve = TRUE try to resolve some warnings.
#' }
#' }
#'
#' @param header A named list. Defaults to NULL.
#' @param metadata Clarion metadata in form of a data.table.
#' @param data Data.table according to metadata.
#' @param validate Logical value to validate on initialization. Defaults to TRUE.
#'
#' @field header List of global information regarding the whole experiment.
#' @field metadata Data.table with additional information for each column.
#' @field data Data.table containing experiment result data.
#'
#' @examples
#' \dontrun{
#' # initializing a new object
#' object <- Clarion$new(header, metadata, data, validate = TRUE)
#'
#' # create a deep copy
#' object_copy <- object$clone(deep = TRUE)
#' }
#'
#' @format NULL
#' @usage NULL
#' @importFrom R6 R6Class
#' @export
Clarion <- R6::R6Class("Clarion",
public = list(
header = NULL,
metadata = NULL,
data = NULL,
get_id = function() {
# return unique_id
# if no type return first feature
if (is.element("type", names(self$metadata))) {
return(self$metadata[type == "unique_id"][["key"]][1])
} else {
return(self$metadata[level == "feature"][["key"]][1])
}
},
get_name = function() {
# return name
# if not existing fall back to unqiue_id
if (is.element("type", names(self$metadata)) && is.element("name", self$metadata[["type"]])) {
return(self$metadata[type == "name"][["key"]])
}
return(self$get_id())
},
get_delimiter = function() {
self$header$delimiter
},
is_delimited = function(x) {
if (is.element("type", names(self$metadata))) {
return(self$metadata[key == x, type] == "array")
} else {
return(FALSE)
}
},
get_factors = function() {
# returns data.table key(, factor columns)
# only name for named factors (e.g. factor1="name")
# get factor columns
columns <- grep("^factor\\d+", names(self$metadata), perl = TRUE, value = TRUE)
# on no factors return key column
if (length(columns) == 0) return(self$metadata[, "key"])
# extract names
ext_names <- sub("^factor\\d+=\"(.*)\"", replacement = "\\1", columns, perl = TRUE)
# get factor table
factor_table <- self$metadata[, c("key", columns), with = FALSE]
# rename columns
names(factor_table)[-1] <- ext_names
return(factor_table)
},
validate = function(solve = TRUE) {
# validate header
private$check_delimiter()
# validate metadata
private$check_metadata_header()
private$check_key()
private$check_level()
private$check_type()
private$check_label()
# validate data
private$check_data_header(solve)
private$check_data_min()
private$check_data_column_types()
},
initialize = function(header = NULL, metadata, data, validate = TRUE) {
self$header <- header
self$metadata <- metadata
self$data <- data
if (validate) self$validate()
# coerce unique_id and name to character
if (self$get_id() == self$get_name()) {
cols <- self$get_id()
} else {
cols <- c(self$get_id(), self$get_name())
}
self$data[, (cols) := lapply(.SD, as.character), .SDcols = cols]
}
),
private = list(
# deep clone to force data.table copy
deep_clone = function(name, value) {
# invoke a deep copy for metadata and data field
if (name %in% c("metadata", "data")) {
data.table::copy(value)
} else {
value
}
},
## header checks
check_delimiter = function() {
if (is.element("delimiter", names(self$header))) {
# case: no type column/ no type = array
if (!is.element("type", names(self$metadata)) || !is.element("array", self$metadata[["type"]])) {
warning("Found in-field-delimiter '", self$header$delimiter, "' but no type=array columns (in metadata) to apply to.")
}
}
},
## metadata checks
check_metadata_header = function() {
# case: invalid column names
valid_names <- c("key", "factor\\d+(=\".*\")?", "level", "type", "label", "sub_label")
regex <- paste0("^", valid_names, "$", collapse = "|")
invalid_names <- grep(regex, names(self$metadata), invert = TRUE, value = TRUE, perl = TRUE)
if (length(invalid_names) > 0) {
warning("Metadata: Unexpected column names detected: ", paste0(invalid_names, collapse = ", "))
}
# case: missing mandatory column
requires <- c("key", "level")
missing <- !is.element(requires, names(self$metadata))
if (any(missing)) {
stop("Metadata: Mandatory column(s) missing! ", paste0(requires[missing], collapse = ", "))
}
},
check_key = function() {
# case: duplicated keys
if (anyDuplicated(self$metadata[["key"]])) {
stop("Metadata: Duplicate(s) in key detected! The following key(s) are duplicated: ", paste0(unique(self$metadata[["key"]][duplicated(self$metadata[["key"]])]), collapse = ", "))
}
# case: key not in data
missing <- setdiff(self$metadata[["key"]], names(self$data))
if (length(missing) > 0) {
warning("Metadata rows and data columns differ! Following rows are not defined in data: ", paste0(missing, collapse = ", "))
}
},
check_level = function() {
# case: invalid level
valid <- c("feature", "sample", "condition", "contrast")
unknown <- grep(pattern = paste0(valid, collapse = "|"), x = self$metadata[["level"]], perl = TRUE, invert = TRUE, value = TRUE)
if (length(unknown) > 0) {
warning("Metadata: Unknown level(s) found: ", paste0(unknown, collapse = ", "))
}
# case: minimal level requirements (feature + sample|condition|contrast)
if (!is.element("feature", self$metadata[["level"]]) && !any(is.element(c("sample", "condition", "contrast"), self$metadata[["level"]]))) {
stop("Metadata: Minimum level requirements not met! At least one feature (unique_id) and one sample, condition or contrast needed.")
}
},
check_type = function() {
if (is.element("type", names(self$metadata))) {
feature_types <- c("unique_id", "name", "category", "array")
remaining_types <- c("score", "ratio", "probability", "array")
# case: type doesn't fit level
# select and return keys with unknown type
unknown <- self$metadata[level == "feature"][!type %in% feature_types][["key"]]
unknown <- append(unknown, self$metadata[level %in% c("sample", "condition", "contrast")][!type %in% remaining_types][["key"]])
if (length(unknown) > 0) {
warning("Metadata: Level doesn't match type:", paste0(unknown, collapse = ", "))
}
# case: no unique_id defined
if (!is.element("unique_id", self$metadata[["type"]])) {
stop("Metadata: No unique_id defined in type! Please define a unique_id.")
}
# case: multiple unique_ids
if (sum(is.element(self$metadata[["type"]], "unique_id")) > 1) {
warning("Metadata: Found multiple unique_ids! Only first will be used.")
}
# case: type = array but no delimiter
if (is.element("array", self$metadata[["type"]]) && !is.element("delimiter", names(self$header))) {
stop("Found type=array but no delimiter! Columns with multi-value fields require delimiter (in header) and type=array (in metadata).")
}
}
},
check_label = function() {
if (is.element("label", names(self$metadata))) {
# case: contrast label not delimited by '|'
contrast_labels <- grep(pattern = "\\|", x = self$metadata[level == "contrast"][["label"]], perl = TRUE, invert = TRUE, value = TRUE)
if (length(contrast_labels) > 0) {
warning("Metadata: Missing '|' delimiter in contrast label(s): ", paste0(contrast_labels, collapse = ", "))
}
}
},
## data checks
check_data_header = function(solve = TRUE) {
# case: column not defined in metadata
missing <- setdiff(names(self$data), self$metadata[["key"]])
if (length(missing) > 0) {
if (solve) {
# omit undefined columns
self$data[, (missing) := NULL]
}
warning("Metadata rows and data columns differ! Following rows are missing in metadata: ", paste0(missing, collapse = ", "), if (solve) "\nOmitting data column(s)!")
}
# case: duplicated column names
if (anyDuplicated(names(self$data))) {
stop("Data: Column names not unique! Following names occur more than once: ", paste0(unique(names(self$data)[duplicated(names(self$data))]), collapse = ", "))
}
},
check_data_min = function() {
# case: minimum requirements not met (two columns: feature(unique_id) + sample|condition|contrast)
if (ncol(self$data) < 2) {
stop("Data: Minimum requirements not met! At least two columns needed, one with unique identifier and one with numeric values.")
}
},
check_data_column_types = function() {
# case: level = sample, condition, contrast not numeric
# except type=array because of delimiter
if (is.element("type", names(self$metadata))) {
expected_numeric_cols <- self$metadata[level %in% c("sample", "condition", "contrast")][type != "array"][["key"]]
} else {
expected_numeric_cols <- self$metadata[level %in% c("sample", "condition", "contrast")][["key"]]
}
not_numeric <- names(self$data[, expected_numeric_cols, with = FALSE][, which(!vapply(self$data[, expected_numeric_cols, with = FALSE], is.numeric, FUN.VALUE = logical(1)))])
if (length(not_numeric) > 0) {
stop("Data: Column(s): ", paste0(not_numeric, collapse = ", "), " not numeric! Probably wrong decimal separator.")
}
}
),
lock_class = TRUE # prevent class modification
)