From 63b8bc88ffe949fc86a47974fa6d38341ba5b33e Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Thu, 7 Mar 2019 09:10:48 +0100
Subject: [PATCH 01/17] parser: implemented first half of metadata

---
 R/parser.R | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/R/parser.R b/R/parser.R
index 475ecb1..626ac51 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -378,3 +378,83 @@ parser <- function(file, dec = ".") {
 
   return(Clarion$new(header = header, metadata = metadata, data = data))
 }
+
+#' TOBIAS single TFBS table to clarion converter
+#'
+#' @param input Path to input table
+#' @param output Output path.
+#' @param omit_NA Logical whether all rows containing NA should be removed.
+#' @param groups Keep columns related to given groups. Default = c("TFBS", "peak", "feature", "condition").
+#' @param in_field_delimiter Only applied on non numeric columns (?)
+#' @param dec Decimal separator.
+#'
+tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS", "peak", "feature", "condition"), in_field_delimiter = NULL, dec = ".") {
+  ##### data
+  data <- data.table::fread(input, dec = dec, header = TRUE)
+  columns <- names(data)
+
+  # filter groups
+  if (is.element("feature", groups)) {
+    groups <- c(groups[which(groups != "feature")], "feat", "distance", "gene_biotype", "gene_id", "gene_name")
+  }
+
+  conditions <- gsub(pattern = "_bound", replacement = "", x = grep(pattern = "_bound$", x = columns, value = TRUE))
+  if (is.element("condition", groups)) {
+    groups <- c(groups[which(groups != "condition")], conditions)
+  }
+
+  delete_columns <- grep(pattern = paste0(groups, collapse = "|"), x = columns, value = TRUE, invert = TRUE)
+  data[, (delete_columns) := NULL]
+
+  # omit na
+  if (omit_NA) {
+    data <- na.omit(data)
+  }
+
+  # create id column
+  data[, id := seq_len(nrow(data))]
+
+  ##### metadata
+  metadata <- data.table::data.table(names(data))
+  names(metadata) <- "key"
+
+  meta_columns <- c("level", "type", "label", "sub_label")
+
+  # create metadata row by row
+  condition_pattern <- paste0(conditions, collape = "|")
+  meta_rows <- vapply(metadata[["key"]], FUN.VALUE = character(4), function(x) {
+    # level
+    if (grepl(pattern = condition_pattern, x = x)) {
+      if (grepl(pattern = "score|bound", x = x)) {
+        level <- "condition"
+      } else {
+        level <- "contrast"
+      }
+    } else {
+      level <- "feature"
+    }
+
+    # type
+    if (level == "feature") {
+      if (x == "id") {
+        type <- "unique_id"
+      } else if (any(grepl(pattern = in_field_delimiter, x = data[[x]], fixed = TRUE))) {
+        type <- "array"
+      } else {
+        type <- "category"
+      }
+    } else {
+      if (!is.numeric(data[[x]])) {
+        type <- "array"
+      } else if (grepl(pattern = "score|bound", x = x)) {
+        type <- "score"
+      }
+    }
+
+    return(c(level, type, label, sub_label))
+  })
+
+
+
+  return(data)
+}

From 69e3913d47f3d007666f46f40396a0311261bc29 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Thu, 7 Mar 2019 13:51:50 +0100
Subject: [PATCH 02/17] implemented tobias_converter (needs refactoring)

---
 R/parser.R | 85 +++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 13 deletions(-)

diff --git a/R/parser.R b/R/parser.R
index 626ac51..bc5a0b3 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -387,8 +387,9 @@ parser <- function(file, dec = ".") {
 #' @param groups Keep columns related to given groups. Default = c("TFBS", "peak", "feature", "condition").
 #' @param in_field_delimiter Only applied on non numeric columns (?)
 #' @param dec Decimal separator.
+#' @param ... Used as header information
 #'
-tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS", "peak", "feature", "condition"), in_field_delimiter = NULL, dec = ".") {
+tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS", "peak", "feature", "condition"), in_field_delimiter = ",", dec = ".", ...) {
   ##### data
   data <- data.table::fread(input, dec = dec, header = TRUE)
   columns <- names(data)
@@ -404,7 +405,10 @@ tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS",
   }
 
   delete_columns <- grep(pattern = paste0(groups, collapse = "|"), x = columns, value = TRUE, invert = TRUE)
-  data[, (delete_columns) := NULL]
+  if (length(delete_columns) > 0) {
+    data[, (delete_columns) := NULL]
+  }
+
 
   # omit na
   if (omit_NA) {
@@ -413,19 +417,19 @@ tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS",
 
   # create id column
   data[, id := seq_len(nrow(data))]
+  # move id column to first position
+  new_order <- c("id", names(data)[ names(data) != "id"])
+  data <- data[, new_order, with = FALSE]
 
   ##### metadata
   metadata <- data.table::data.table(names(data))
-  names(metadata) <- "key"
-
-  meta_columns <- c("level", "type", "label", "sub_label")
 
   # create metadata row by row
-  condition_pattern <- paste0(conditions, collape = "|")
-  meta_rows <- vapply(metadata[["key"]], FUN.VALUE = character(4), function(x) {
+  condition_pattern <- paste0(conditions, collapse = "|")
+  meta_rows <- lapply(metadata[[1]], function(x) {
     # level
-    if (grepl(pattern = condition_pattern, x = x)) {
-      if (grepl(pattern = "score|bound", x = x)) {
+    if (grepl(pattern = condition_pattern, x = x, perl = TRUE)) {
+      if (grepl(pattern = "score|bound", x = x, perl = TRUE)) {
         level <- "condition"
       } else {
         level <- "contrast"
@@ -446,15 +450,70 @@ tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS",
     } else {
       if (!is.numeric(data[[x]])) {
         type <- "array"
-      } else if (grepl(pattern = "score|bound", x = x)) {
+      } else if (grepl(pattern = "score|bound", x = x, perl = TRUE)) {
         type <- "score"
+      } else if (grepl(pattern = "log2fc$", x = x, perl = TRUE)) {
+        type <- "ratio"
       }
     }
 
+    # label/ sub_label
+    label <- sub_label <- ""
+    label_parts <- unlist(strsplit(x = x, split = "_"))
+
+    if (length(label_parts) == 1) {
+      label <- label_parts
+    } else if (length(label_parts) == 2) {
+      label <- label_parts[1]
+      sub_label <- label_parts[2]
+    } else if (length(label_parts) >= 3 && level == "contrast") {
+      # replace '_' with whitespace
+      condition_pattern <- gsub(pattern = "_", replacement = " ", x = condition_pattern, fixed = TRUE)
+      x <- gsub(pattern = "_", replacement = " ", x = x, fixed = TRUE)
+      # get first condition using all identified conditions as pattern
+      first_condition <- gsub(pattern = paste0("(^", condition_pattern, ").*"), replacement = "\\1", x = x)
+      # strip first condition
+      stripped_condition <- substring(x, first = nchar(first_condition) + 2) # + 1 because parameter is inclusive and + 1 because of whitespace
+      # get new first condition
+      second_condition <- gsub(pattern = paste0("(^", condition_pattern, ").*"), replacement = "\\1", x = stripped_condition)
+
+      label <- paste0(first_condition, "|", second_condition)
+      sub_label <- substring(stripped_condition, first = nchar(second_condition) + 1)
+    } else {
+      label <- paste0(label_parts[-length(label_parts)], collapse = " ")
+      sub_label <- label_parts[length(label_parts)]
+    }
+
     return(c(level, type, label, sub_label))
   })
 
-
-
-  return(data)
+  # list of vectors (rows) to matrix
+  meta_matrix <- do.call(rbind, meta_rows)
+  metadata <- cbind(metadata, meta_matrix)
+  names(metadata) <- c("key", "level", "type", "label", "sub_label")
+
+  ##### header
+
+  header <- c(
+    format = "Clarion",
+    version = "1.0",
+    delimiter = in_field_delimiter,
+    list(...)
+  )
+
+  # create clarion object for validation
+  clarion <- Clarion$new(header = header, metadata = metadata, data = data)
+
+  # TODO implement and use clarion write function
+  # write clarion
+  # header
+  flat_header <- data.table::data.table(paste0("!", names(clarion$header), "=", clarion$header))
+  data.table::fwrite(x = flat_header, file = output, col.names = FALSE, sep = "\t")
+  # metadata
+  # add '#'
+  names(clarion$metadata)[1] <- paste0("#", names(clarion$metadata)[1])
+  clarion$metadata[, names(clarion$metadata)[1] := paste0("#", clarion$metadata[[1]])]
+  data.table::fwrite(x = clarion$metadata, file = output, col.names = TRUE, sep = "\t", append = TRUE, quote = FALSE)
+  # data
+  data.table::fwrite(x = clarion$data, file = output, col.names = TRUE, sep = "\t", append = TRUE)
 }

From 9baee2790e09b7ce3ecd6cdfaa7957a4f60d723f Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Thu, 7 Mar 2019 13:58:01 +0100
Subject: [PATCH 03/17] omit leading whitespace for contrast sub_labels

---
 R/parser.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/parser.R b/R/parser.R
index bc5a0b3..46996b8 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -478,7 +478,7 @@ tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS",
       second_condition <- gsub(pattern = paste0("(^", condition_pattern, ").*"), replacement = "\\1", x = stripped_condition)
 
       label <- paste0(first_condition, "|", second_condition)
-      sub_label <- substring(stripped_condition, first = nchar(second_condition) + 1)
+      sub_label <- substring(stripped_condition, first = nchar(second_condition) + 2) # + 1 because parameter is inclusive and + 1 because of whitespace
     } else {
       label <- paste0(label_parts[-length(label_parts)], collapse = " ")
       sub_label <- label_parts[length(label_parts)]

From cfddb18ffcd7b7a623d99b364a6fe051f320f986 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Fri, 8 Mar 2019 15:09:59 +0100
Subject: [PATCH 04/17] clarion: fixed bug caused by to late object validation

---
 R/clarion.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/clarion.R b/R/clarion.R
index 235c95f..15e3e1a 100644
--- a/R/clarion.R
+++ b/R/clarion.R
@@ -117,6 +117,8 @@ Clarion <- R6::R6Class("Clarion",
                            self$metadata <- metadata
                            self$data <- data
 
+                           if (validate) self$validate()
+
                            # coerce unique_id and name to character
                            if (self$get_id() == self$get_name()) {
                              cols <- self$get_id()
@@ -124,8 +126,6 @@ Clarion <- R6::R6Class("Clarion",
                              cols <- c(self$get_id(), self$get_name())
                            }
                            self$data[, (cols) := lapply(.SD, as.character), .SDcols = cols]
-
-                           if (validate) self$validate()
                          }
                        ),
                        private = list(

From cb49e556f9badb49ca0bdeb77d94d13c41f9e036 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Fri, 8 Mar 2019 15:12:25 +0100
Subject: [PATCH 05/17] added tobias config file

---
 inst/extdata/tobias_config.json | 284 ++++++++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)
 create mode 100644 inst/extdata/tobias_config.json

diff --git a/inst/extdata/tobias_config.json b/inst/extdata/tobias_config.json
new file mode 100644
index 0000000..c739b94
--- /dev/null
+++ b/inst/extdata/tobias_config.json
@@ -0,0 +1,284 @@
+{
+  "meta": [
+    {
+      "col_name": "id",
+      "level": "feature",
+      "type": "unique_id",
+      "label": "id",
+      "sublabel": ""
+    },
+    {
+      "col_name": "TFBS_chr",
+      "level": "feature",
+      "type": "category",
+      "label": "chr",
+      "sublabel": "TBFS"
+    },
+    {
+      "col_name": "TFBS_start",
+      "level": "feature",
+      "type": "category",
+      "label": "start",
+      "sublabel": "TFBS"
+    },
+    {
+      "col_name": "TFBS_end",
+      "level": "feature",
+      "type": "category",
+      "label": "end",
+      "sublabel": "TFBS"
+    },
+    {
+      "col_name": "TFBS_name",
+      "level": "feature",
+      "type": "category",
+      "label": "name",
+      "sublabel": "TFBS"
+    },
+    {
+      "col_name": "TFBS_score",
+      "level": "feature",
+      "type": "category",
+      "label": "score",
+      "sublabel": "TFBS"
+    },
+    {
+      "col_name": "TFBS_strand",
+      "level": "feature",
+      "type": "category",
+      "label": "strand",
+      "sublabel": "TFBS"
+    },
+    {
+      "col_name": "peak_chr",
+      "level": "feature",
+      "type": "category",
+      "label": "chr",
+      "sublabel": "peak"
+    },
+    {
+      "col_name": "peak_start",
+      "level": "feature",
+      "type": "category",
+      "label": "start",
+      "sublabel": "peak"
+    },
+    {
+      "col_name": "peak_end",
+      "level": "feature",
+      "type": "category",
+      "label": "end",
+      "sublabel": "peak"
+    },
+    {
+      "col_name": "peak_id",
+      "level": "feature",
+      "type": "array",
+      "label": "id",
+      "sublabel": "peak"
+    },
+    {
+      "col_name": "feature",
+      "level": "feature",
+      "type": "category",
+      "label": "feature",
+      "sublabel": ""
+    },
+    {
+      "col_name": "feat_start",
+      "level": "feature",
+      "type": "category",
+      "label": "start",
+      "sublabel": "feature"
+    },
+    {
+      "col_name": "feat_end",
+      "level": "feature",
+      "type": "category",
+      "label": "end",
+      "sublabel": "feature"
+    },
+    {
+      "col_name": "feat_strand",
+      "level": "feature",
+      "type": "category",
+      "label": "strand",
+      "sublabel": "feature"
+    },
+    {
+      "col_name": "feat_anchor",
+      "level": "feature",
+      "type": "category",
+      "label": "anchor",
+      "sublabel": "feature"
+    },
+    {
+      "col_name": "distance",
+      "level": "feature",
+      "type": "category",
+      "label": "distance",
+      "sublabel": ""
+    },
+    {
+      "col_name": "gene_biotype",
+      "level": "feature",
+      "type": "category",
+      "label": "biotype",
+      "sublabel": "gene"
+    },
+    {
+      "col_name": "gene_id",
+      "level": "feature",
+      "type": "category",
+      "label": "id",
+      "sublabel": "gene"
+    },
+    {
+      "col_name": "gene_name",
+      "level": "feature",
+      "type": "category",
+      "label": "name",
+      "sublabel": "gene"
+    },
+    {
+      "col_name": "2C_score",
+      "level": "condition",
+      "type": "score",
+      "label": "2C",
+      "sublabel": "score"
+    },
+    {
+      "col_name": "8C_score",
+      "level": "condition",
+      "type": "score",
+      "label": "8C",
+      "sublabel": "score"
+    },
+    {
+      "col_name": "ICM_score",
+      "level": "condition",
+      "type": "score",
+      "label": "ICM",
+      "sublabel": "score"
+    },
+    {
+      "col_name": "naive_hESC_score",
+      "level": "condition",
+      "type": "score",
+      "label": "naive hESC",
+      "sublabel": "score"
+    },
+    {
+      "col_name": "hESC_score",
+      "level": "condition",
+      "type": "score",
+      "label": "hESC",
+      "sublabel": "score"
+    },
+    {
+      "col_name": "2C_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "2C",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "8C_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "8C",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "ICM_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "ICM",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "naive_hESC_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "naive hESC",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "hESC_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "hESC",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "2C_8C_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "2C|8C",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "2C_ICM_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "2C|ICM",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "2C_naive_hESC_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "2C|naive hESC",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "2C_hESC_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "2C|hESC",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "8C_ICM_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "8C|ICM",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "8C_naive_hESC_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "8C|naive hESC",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "8C_hESC_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "8C|hESC",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "ICM_naive_hESC_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "ICM|naive hESC",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "ICM_hESC_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "ICM|hESC",
+      "sublabel": "log2fc"
+    },
+    {
+      "col_name": "naive_hESC_hESC_log2fc",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "naive hESC|hESC",
+      "sublabel": "log2fc"
+    }
+  ]
+}
\ No newline at end of file

From 2d8148386c4dc04d0916b30d32d8b8594f2e608e Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Fri, 8 Mar 2019 15:15:03 +0100
Subject: [PATCH 06/17] tobias_parser: added column specific filtering; use
 condition names as fallback to config file; code refactoring and cleanup

---
 R/parser.R           | 147 +++++++++++++++++++++++++++++--------------
 man/tobias_parser.Rd |  38 +++++++++++
 2 files changed, 138 insertions(+), 47 deletions(-)
 create mode 100644 man/tobias_parser.Rd

diff --git a/R/parser.R b/R/parser.R
index 46996b8..195b11c 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -379,38 +379,45 @@ parser <- function(file, dec = ".") {
   return(Clarion$new(header = header, metadata = metadata, data = data))
 }
 
-#' TOBIAS single TFBS table to clarion converter
+#' TOBIAS TFBS table to clarion parser
 #'
 #' @param input Path to input table
 #' @param output Output path.
+#' @param filter_columns Either a vector of columnnames or a file containing one columnname per row.
+#' @param filter_pattern Keep columns matching the given pattern. Parameter filter_columns will be combined with the actual columnnames from the table before pattern is matched. In the case of no matches a warning will be issued and all columns will be used.
+#' @param config Json file containing metadata information for all columns.
 #' @param omit_NA Logical whether all rows containing NA should be removed.
-#' @param groups Keep columns related to given groups. Default = c("TFBS", "peak", "feature", "condition").
-#' @param in_field_delimiter Only applied on non numeric columns (?)
-#' @param dec Decimal separator.
-#' @param ... Used as header information
+#' @param condition_names Vector of condition names. Default = NULL. Used to classify columns not provided in config.
+#' @param condition_pattern Used to identify condition names by matching und removing given pattern with \code{\link[base]{grep}}. Ignored when condition_names is set.
+#' @param in_field_delimiter Delimiter for multi value fields. Default = ','.
+#' @param dec Decimal separator. Used in file reading and writing.
+#' @param ... Used as header information.
 #'
-tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS", "peak", "feature", "condition"), in_field_delimiter = ",", dec = ".", ...) {
-  ##### data
-  data <- data.table::fread(input, dec = dec, header = TRUE)
-  columns <- names(data)
-
-  # filter groups
-  if (is.element("feature", groups)) {
-    groups <- c(groups[which(groups != "feature")], "feat", "distance", "gene_biotype", "gene_id", "gene_name")
+#' @export
+tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern = NULL, config = system.file("extdata", "tobias_config.json", package = "wilson"), omit_NA = FALSE, condition_names = NULL, condition_pattern = "_bound$", in_field_delimiter = ",", dec = ".", ...) {
+  ## filter data columns
+  # check if filter columns is a file or a vector
+  if (!is.null(filter_columns) && file.exists(filter_columns)) {
+    select_columns <- scan(file = filter_columns, what = character())
+  } else {
+    select_columns <- filter_columns
   }
 
-  conditions <- gsub(pattern = "_bound", replacement = "", x = grep(pattern = "_bound$", x = columns, value = TRUE))
-  if (is.element("condition", groups)) {
-    groups <- c(groups[which(groups != "condition")], conditions)
-  }
+  # filter pattern
+  if (!is.null(filter_pattern)) {
+    # only read header
+    columns <- names(data.table::fread(input = input, header = TRUE, nrows = 0))
 
-  delete_columns <- grep(pattern = paste0(groups, collapse = "|"), x = columns, value = TRUE, invert = TRUE)
-  if (length(delete_columns) > 0) {
-    data[, (delete_columns) := NULL]
-  }
+    select_columns <- grep(pattern = filter_pattern, x = unique(c(select_columns, columns)), value = TRUE)
 
+    if (identical(select_columns, character(0))) {
+      warning("No column matches for given filter pattern! Proceeding with all columns")
+    }
+  }
+  ##### data
+  data <- data.table::fread(input, dec = dec, select = select_columns, header = TRUE)
 
-  # omit na
+  # omit na rows
   if (omit_NA) {
     data <- na.omit(data)
   }
@@ -424,40 +431,75 @@ tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS",
   ##### metadata
   metadata <- data.table::data.table(names(data))
 
-  # create metadata row by row
+  # load config
+  if (!is.null(config)) {
+    config_file <- RJSONIO::fromJSON(config)
+    # get column names from config file
+    col_names <- vapply(X = config_file$meta, FUN.VALUE = character(1), FUN = function(x) {
+      x[["col_name"]]
+    })
+  } else {
+    config_file <- NULL
+  }
+
+  # identify conditions
+  if (is.null(condition_names)) {
+    conditions <- gsub(pattern = condition_pattern, replacement = "", x = grep(pattern = condition_pattern, x = metadata[[1]], value = TRUE))
+  } else {
+    conditions <- condition_names
+  }
+
+
+  ## create metadata row by row
+  unique_id_fallback <- NULL
   condition_pattern <- paste0(conditions, collapse = "|")
+
   meta_rows <- lapply(metadata[[1]], function(x) {
-    # level
-    if (grepl(pattern = condition_pattern, x = x, perl = TRUE)) {
-      if (grepl(pattern = "score|bound", x = x, perl = TRUE)) {
-        level <- "condition"
-      } else {
-        level <- "contrast"
-      }
+    # is column information provided in config?
+    if (!is.null(config_file) && is.element(x, col_names)) {
+      return(config_file$meta[[which(col_names == x)]][-1])
+    }
+
+    # if no information is provided via config try to guess meta-information
+    # utilize condition names to do so
+    warning("No information for ", x, " in config! Trying to guess meta-information.")
+
+    ## level
+    # get distance of all conditions matched to x
+    # count number of exact substring matches
+    match_dist <- adist(conditions, x) - nchar(x) + nchar(conditions)
+    count_matches <- sum(match_dist == 0)
+
+    if (count_matches == 1) {
+      level <- "condition"
+    } else if (count_matches > 1) {
+      level <- "contrast"
     } else {
       level <- "feature"
     }
 
-    # type
+    ## type
     if (level == "feature") {
-      if (x == "id") {
-        type <- "unique_id"
-      } else if (any(grepl(pattern = in_field_delimiter, x = data[[x]], fixed = TRUE))) {
+      if (any(grepl(pattern = in_field_delimiter, x = data[[x]], fixed = TRUE))) {
         type <- "array"
       } else {
+        # define fallback unique_id in case none is defined through config
+        if (is.null(unique_id_fallback) && anyDuplicated(data[[x]]) == 0) {
+          unique_id_fallback <<- x
+        }
         type <- "category"
       }
     } else {
       if (!is.numeric(data[[x]])) {
         type <- "array"
-      } else if (grepl(pattern = "score|bound", x = x, perl = TRUE)) {
-        type <- "score"
-      } else if (grepl(pattern = "log2fc$", x = x, perl = TRUE)) {
+      } else if (grepl(pattern = "fc|foldchange", x = x, perl = TRUE, ignore.case = TRUE)) {
         type <- "ratio"
+      } else {
+        type <- "score"
       }
     }
 
-    # label/ sub_label
+    ## label/ sub_label
     label <- sub_label <- ""
     label_parts <- unlist(strsplit(x = x, split = "_"))
 
@@ -473,12 +515,12 @@ tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS",
       # get first condition using all identified conditions as pattern
       first_condition <- gsub(pattern = paste0("(^", condition_pattern, ").*"), replacement = "\\1", x = x)
       # strip first condition
-      stripped_condition <- substring(x, first = nchar(first_condition) + 2) # + 1 because parameter is inclusive and + 1 because of whitespace
+      stripped_condition <- substring(x, first = nchar(first_condition) + 2) # + 1 because parameter is inclusive and + 1 for whitespace
       # get new first condition
       second_condition <- gsub(pattern = paste0("(^", condition_pattern, ").*"), replacement = "\\1", x = stripped_condition)
 
       label <- paste0(first_condition, "|", second_condition)
-      sub_label <- substring(stripped_condition, first = nchar(second_condition) + 2) # + 1 because parameter is inclusive and + 1 because of whitespace
+      sub_label <- substring(stripped_condition, first = nchar(second_condition) + 2) # + 1 because parameter is inclusive and + 1 for whitespace
     } else {
       label <- paste0(label_parts[-length(label_parts)], collapse = " ")
       sub_label <- label_parts[length(label_parts)]
@@ -492,28 +534,39 @@ tobias_converter <- function(input, output, omit_NA = FALSE, groups = c("TFBS",
   metadata <- cbind(metadata, meta_matrix)
   names(metadata) <- c("key", "level", "type", "label", "sub_label")
 
-  ##### header
+  # set unique_id fallback
+  if (!any(metadata[["type"]] == "unique_id")) {
+    metadata[key == unique_id_fallback, "type"] <- "unique_id"
+  }
 
+  ##### header
   header <- c(
-    format = "Clarion",
-    version = "1.0",
-    delimiter = in_field_delimiter,
+    list(
+      format = "Clarion",
+      version = "1.0"),
     list(...)
   )
 
+  # add delimiter if necessary
+  if (any(metadata[["type"]] == "array")) {
+    header <- append(x = header, values = list(delimiter = in_field_delimiter), after = 2)
+  }
+
+  ##### validate
   # create clarion object for validation
   clarion <- Clarion$new(header = header, metadata = metadata, data = data)
 
+  ##### write
   # TODO implement and use clarion write function
   # write clarion
   # header
   flat_header <- data.table::data.table(paste0("!", names(clarion$header), "=", clarion$header))
-  data.table::fwrite(x = flat_header, file = output, col.names = FALSE, sep = "\t")
+  data.table::fwrite(x = flat_header, file = output, col.names = FALSE, sep = "\t", dec = dec)
   # metadata
   # add '#'
   names(clarion$metadata)[1] <- paste0("#", names(clarion$metadata)[1])
   clarion$metadata[, names(clarion$metadata)[1] := paste0("#", clarion$metadata[[1]])]
-  data.table::fwrite(x = clarion$metadata, file = output, col.names = TRUE, sep = "\t", append = TRUE, quote = FALSE)
+  data.table::fwrite(x = clarion$metadata, file = output, col.names = TRUE, sep = "\t", append = TRUE, quote = FALSE, dec = dec)
   # data
-  data.table::fwrite(x = clarion$data, file = output, col.names = TRUE, sep = "\t", append = TRUE)
+  data.table::fwrite(x = clarion$data, file = output, col.names = TRUE, sep = "\t", append = TRUE, dec = dec)
 }
diff --git a/man/tobias_parser.Rd b/man/tobias_parser.Rd
new file mode 100644
index 0000000..576be38
--- /dev/null
+++ b/man/tobias_parser.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parser.R
+\name{tobias_parser}
+\alias{tobias_parser}
+\title{TOBIAS TFBS table to clarion parser}
+\usage{
+tobias_parser(input, output, filter_columns = NULL,
+  filter_pattern = NULL, config = system.file("extdata",
+  "tobias_config.json", package = "wilson"), omit_NA = FALSE,
+  condition_names = NULL, condition_pattern = "_bound$",
+  in_field_delimiter = ",", dec = ".", ...)
+}
+\arguments{
+\item{input}{Path to input table}
+
+\item{output}{Output path.}
+
+\item{filter_columns}{Either a vector of columnnames or a file containing one columnname per row.}
+
+\item{filter_pattern}{Keep columns matching the given pattern. Parameter filter_columns will be combined with the actual columnnames from the table before pattern is matched. In the case of no matches a warning will be issued and all columns will be used.}
+
+\item{config}{Json file containing metadata information for all columns.}
+
+\item{omit_NA}{Logical whether all rows containing NA should be removed.}
+
+\item{condition_names}{Vector of condition names. Default = NULL. Used to classify columns not provided in config.}
+
+\item{condition_pattern}{Used to identify condition names by matching und removing given pattern with \code{\link[base]{grep}}. Ignored when condition_names is set.}
+
+\item{in_field_delimiter}{Delimiter for multi value fields. Default = ','.}
+
+\item{dec}{Decimal separator. Used in file reading and writing.}
+
+\item{...}{Used as header information.}
+}
+\description{
+TOBIAS TFBS table to clarion parser
+}

From 7badae96a5406997f756c7d0024d512c33218951 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Fri, 8 Mar 2019 15:35:06 +0100
Subject: [PATCH 07/17] tobias_parser: added details

---
 R/parser.R           | 5 +++++
 man/tobias_parser.Rd | 7 ++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/R/parser.R b/R/parser.R
index 195b11c..13591b8 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -381,6 +381,8 @@ parser <- function(file, dec = ".") {
 
 #' TOBIAS TFBS table to clarion parser
 #'
+#' Click \href{https://github.molgen.mpg.de/loosolab/TOBIAS}{here} for more information about TOBIAS.
+#'
 #' @param input Path to input table
 #' @param output Output path.
 #' @param filter_columns Either a vector of columnnames or a file containing one columnname per row.
@@ -393,6 +395,9 @@ parser <- function(file, dec = ".") {
 #' @param dec Decimal separator. Used in file reading and writing.
 #' @param ... Used as header information.
 #'
+#' @details During conversion the parser will try to use the given config (if provided) to create the \href{https://github.molgen.mpg.de/loosolab/wilson-apps/wiki/CLARION-Format}{Clarion} metadata. In the case of insufficient config information it will try to approximate by referencing condition names issuing warnings in the process.
+#' @details Factor grouping (metadata factor columns) is currently not implemented!
+#'
 #' @export
 tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern = NULL, config = system.file("extdata", "tobias_config.json", package = "wilson"), omit_NA = FALSE, condition_names = NULL, condition_pattern = "_bound$", in_field_delimiter = ",", dec = ".", ...) {
   ## filter data columns
diff --git a/man/tobias_parser.Rd b/man/tobias_parser.Rd
index 576be38..0606214 100644
--- a/man/tobias_parser.Rd
+++ b/man/tobias_parser.Rd
@@ -34,5 +34,10 @@ tobias_parser(input, output, filter_columns = NULL,
 \item{...}{Used as header information.}
 }
 \description{
-TOBIAS TFBS table to clarion parser
+Click \href{https://github.molgen.mpg.de/loosolab/TOBIAS}{here} for more information about TOBIAS.
+}
+\details{
+During conversion the parser will try to use the given config (if provided) to create the \href{https://github.molgen.mpg.de/loosolab/wilson-apps/wiki/CLARION-Format}{Clarion} metadata. In the case of insufficient config information it will try to approximate by referencing condition names issuing warnings in the process.
+
+Factor grouping (metadata factor columns) is currently not implemented!
 }

From 7dd7d3a22c35ee83867a9be21a52fa22c88a6c8f Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Fri, 8 Mar 2019 15:46:39 +0100
Subject: [PATCH 08/17] updated DESCRIPTION, NAMESPACE, NEWS.md

---
 DESCRIPTION | 4 ++--
 NAMESPACE   | 1 +
 NEWS.md     | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b05b5fa..27b5b46 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: wilson
 Type: Package
 Title: Web-Based Interactive Omics Visualization
-Version: 2.0.2
+Version: 2.0.3
 Authors@R: c(
     person("Hendrik", "Schultheis", email = "hendrik.schultheis@mpi-bn.mpg.de", role = c("aut", "cre")),
     person("Jens", "Preussner", email = "jens.preussner@mpi-bn.mpg.de", role = "aut"),
@@ -46,7 +46,7 @@ Imports: shiny,
     openssl,
     methods,
     R6
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1
 biocViews: 
 Suggests: knitr,
     rmarkdown,
diff --git a/NAMESPACE b/NAMESPACE
index df0d7dd..355b8a2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -32,6 +32,7 @@ export(pcaUI)
 export(scatterPlot)
 export(scatterPlotUI)
 export(set_logger)
+export(tobias_parser)
 export(transformation)
 export(transformationUI)
 import(data.table)
diff --git a/NEWS.md b/NEWS.md
index 2f513b2..fb02d69 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+# wilson 2.0.3
+- implemented tobias_parser
 # wilson 2.0.2
 - fixed CRAN check Note/ Error
 # wilson 2.0.1

From e5fc83c920371e13ccc6b1315a3424b87ddf3eb8 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Fri, 8 Mar 2019 15:51:07 +0100
Subject: [PATCH 09/17] tobias_parser: fixed devtools::check() warnings/notes

---
 R/parser.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/parser.R b/R/parser.R
index 13591b8..92eecf8 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -424,11 +424,11 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
 
   # omit na rows
   if (omit_NA) {
-    data <- na.omit(data)
+    data <- stats::na.omit(data)
   }
 
   # create id column
-  data[, id := seq_len(nrow(data))]
+  data[, "id" := seq_len(nrow(data))]
   # move id column to first position
   new_order <- c("id", names(data)[ names(data) != "id"])
   data <- data[, new_order, with = FALSE]
@@ -472,7 +472,7 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
     ## level
     # get distance of all conditions matched to x
     # count number of exact substring matches
-    match_dist <- adist(conditions, x) - nchar(x) + nchar(conditions)
+    match_dist <- utils::adist(conditions, x) - nchar(x) + nchar(conditions)
     count_matches <- sum(match_dist == 0)
 
     if (count_matches == 1) {

From 5db0610c240785219285bd932121896653b2765b Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Wed, 13 Mar 2019 10:23:33 +0100
Subject: [PATCH 10/17] tobias_parser: do not combine filter_columns with table
 columns for pattern matching

---
 R/parser.R           | 13 ++++++++-----
 man/tobias_parser.Rd |  2 +-
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/R/parser.R b/R/parser.R
index 92eecf8..1103316 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -386,7 +386,7 @@ parser <- function(file, dec = ".") {
 #' @param input Path to input table
 #' @param output Output path.
 #' @param filter_columns Either a vector of columnnames or a file containing one columnname per row.
-#' @param filter_pattern Keep columns matching the given pattern. Parameter filter_columns will be combined with the actual columnnames from the table before pattern is matched. In the case of no matches a warning will be issued and all columns will be used.
+#' @param filter_pattern Keep columns matching the given pattern. Uses parameter filter_columns for matching if set. In the case of no matches a warning will be issued and all columns will be used.
 #' @param config Json file containing metadata information for all columns.
 #' @param omit_NA Logical whether all rows containing NA should be removed.
 #' @param condition_names Vector of condition names. Default = NULL. Used to classify columns not provided in config.
@@ -410,13 +410,16 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
 
   # filter pattern
   if (!is.null(filter_pattern)) {
-    # only read header
-    columns <- names(data.table::fread(input = input, header = TRUE, nrows = 0))
+    # use file header if filter_columns is empty
+    if (is.null(select_columns)) {
+      # only read header
+      select_columns <- names(data.table::fread(input = input, header = TRUE, nrows = 0))
+    }
 
-    select_columns <- grep(pattern = filter_pattern, x = unique(c(select_columns, columns)), value = TRUE)
+    select_columns <- grep(pattern = filter_pattern, x = select_columns, value = TRUE)
 
     if (identical(select_columns, character(0))) {
-      warning("No column matches for given filter pattern! Proceeding with all columns")
+      warning("No column matches for given filter pattern! Proceeding with all columns.")
     }
   }
   ##### data
diff --git a/man/tobias_parser.Rd b/man/tobias_parser.Rd
index 0606214..f1d0a48 100644
--- a/man/tobias_parser.Rd
+++ b/man/tobias_parser.Rd
@@ -17,7 +17,7 @@ tobias_parser(input, output, filter_columns = NULL,
 
 \item{filter_columns}{Either a vector of columnnames or a file containing one columnname per row.}
 
-\item{filter_pattern}{Keep columns matching the given pattern. Parameter filter_columns will be combined with the actual columnnames from the table before pattern is matched. In the case of no matches a warning will be issued and all columns will be used.}
+\item{filter_pattern}{Keep columns matching the given pattern. Uses parameter filter_columns for matching if set. In the case of no matches a warning will be issued and all columns will be used.}
 
 \item{config}{Json file containing metadata information for all columns.}
 

From af0c3d91f27503c68d99bd79f4bb065a90ef6aeb Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Wed, 13 Mar 2019 10:46:56 +0100
Subject: [PATCH 11/17] tobias_parser: added missing probability type

---
 R/parser.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/parser.R b/R/parser.R
index 1103316..3c5041f 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -492,6 +492,7 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
         type <- "array"
       } else {
         # define fallback unique_id in case none is defined through config
+        # this will define first unique feature category as unqiue_id
         if (is.null(unique_id_fallback) && anyDuplicated(data[[x]]) == 0) {
           unique_id_fallback <<- x
         }
@@ -500,8 +501,10 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
     } else {
       if (!is.numeric(data[[x]])) {
         type <- "array"
-      } else if (grepl(pattern = "fc|foldchange", x = x, perl = TRUE, ignore.case = TRUE)) {
+      } else if (grepl(pattern = "fc|fold[\\._\\- ]?change", x = x, perl = TRUE, ignore.case = TRUE)) {
         type <- "ratio"
+      } else if (grepl(pattern = "p[\\._\\- ]?val|p[\\._\\- ]?adj", x = x, perl = TRUE, ignore.case = TRUE)) {
+        type <- "probability"
       } else {
         type <- "score"
       }

From 9178e7f09284aba04d63d6a3169eedb045008ac8 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Wed, 13 Mar 2019 11:03:56 +0100
Subject: [PATCH 12/17] tobias_parser: implemented unique_id parameter; print
 single warning for columns missing in config

---
 R/parser.R | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/R/parser.R b/R/parser.R
index 3c5041f..cd8856d 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -393,13 +393,14 @@ parser <- function(file, dec = ".") {
 #' @param condition_pattern Used to identify condition names by matching und removing given pattern with \code{\link[base]{grep}}. Ignored when condition_names is set.
 #' @param in_field_delimiter Delimiter for multi value fields. Default = ','.
 #' @param dec Decimal separator. Used in file reading and writing.
+#' @param unique_id Whether the table contains an unique id column. If FALSE (default) will create one at first position.
 #' @param ... Used as header information.
 #'
 #' @details During conversion the parser will try to use the given config (if provided) to create the \href{https://github.molgen.mpg.de/loosolab/wilson-apps/wiki/CLARION-Format}{Clarion} metadata. In the case of insufficient config information it will try to approximate by referencing condition names issuing warnings in the process.
 #' @details Factor grouping (metadata factor columns) is currently not implemented!
 #'
 #' @export
-tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern = NULL, config = system.file("extdata", "tobias_config.json", package = "wilson"), omit_NA = FALSE, condition_names = NULL, condition_pattern = "_bound$", in_field_delimiter = ",", dec = ".", ...) {
+tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern = NULL, config = system.file("extdata", "tobias_config.json", package = "wilson"), omit_NA = FALSE, condition_names = NULL, condition_pattern = "_bound$", in_field_delimiter = ",", dec = ".", unique_id = FALSE, ...) {
   ## filter data columns
   # check if filter columns is a file or a vector
   if (!is.null(filter_columns) && file.exists(filter_columns)) {
@@ -422,6 +423,7 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
       warning("No column matches for given filter pattern! Proceeding with all columns.")
     }
   }
+
   ##### data
   data <- data.table::fread(input, dec = dec, select = select_columns, header = TRUE)
 
@@ -431,10 +433,12 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
   }
 
   # create id column
-  data[, "id" := seq_len(nrow(data))]
-  # move id column to first position
-  new_order <- c("id", names(data)[ names(data) != "id"])
-  data <- data[, new_order, with = FALSE]
+  if (!unique_id) {
+    data[, "id" := seq_len(nrow(data))]
+    # move id column to first position
+    new_order <- c("id", names(data)[ names(data) != "id"])
+    data <- data[, new_order, with = FALSE]
+  }
 
   ##### metadata
   metadata <- data.table::data.table(names(data))
@@ -461,6 +465,7 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
   ## create metadata row by row
   unique_id_fallback <- NULL
   condition_pattern <- paste0(conditions, collapse = "|")
+  approx_columns <- NULL
 
   meta_rows <- lapply(metadata[[1]], function(x) {
     # is column information provided in config?
@@ -470,7 +475,7 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
 
     # if no information is provided via config try to guess meta-information
     # utilize condition names to do so
-    warning("No information for ", x, " in config! Trying to guess meta-information.")
+    approx_columns <<- c(approx_columns, x)
 
     ## level
     # get distance of all conditions matched to x
@@ -540,6 +545,11 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
     return(c(level, type, label, sub_label))
   })
 
+  # meta approximation warning
+  if (!is.null(approx_columns)) {
+    warning("Missing information in config! Tried guessing meta-information for ", paste0(approx_columns, collapse = ", "))
+  }
+
   # list of vectors (rows) to matrix
   meta_matrix <- do.call(rbind, meta_rows)
   metadata <- cbind(metadata, meta_matrix)

From 75d0252448eb6e1650e92f84aa25c359abb0b4c8 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Wed, 13 Mar 2019 11:07:12 +0100
Subject: [PATCH 13/17] tobias_parser: docu update

---
 R/parser.R           | 2 +-
 man/tobias_parser.Rd | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/parser.R b/R/parser.R
index cd8856d..c5a57c2 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -497,7 +497,7 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
         type <- "array"
       } else {
         # define fallback unique_id in case none is defined through config
-        # this will define first unique feature category as unqiue_id
+        # this will redefine first unique feature with type category as unqiue_id
         if (is.null(unique_id_fallback) && anyDuplicated(data[[x]]) == 0) {
           unique_id_fallback <<- x
         }
diff --git a/man/tobias_parser.Rd b/man/tobias_parser.Rd
index f1d0a48..de9fcd4 100644
--- a/man/tobias_parser.Rd
+++ b/man/tobias_parser.Rd
@@ -8,7 +8,7 @@ tobias_parser(input, output, filter_columns = NULL,
   filter_pattern = NULL, config = system.file("extdata",
   "tobias_config.json", package = "wilson"), omit_NA = FALSE,
   condition_names = NULL, condition_pattern = "_bound$",
-  in_field_delimiter = ",", dec = ".", ...)
+  in_field_delimiter = ",", dec = ".", unique_id = FALSE, ...)
 }
 \arguments{
 \item{input}{Path to input table}
@@ -31,6 +31,8 @@ tobias_parser(input, output, filter_columns = NULL,
 
 \item{dec}{Decimal separator. Used in file reading and writing.}
 
+\item{unique_id}{Whether the table contains an unique id column. If FALSE (default) will create one at first position.}
+
 \item{...}{Used as header information.}
 }
 \description{

From 30ecb5eca80e363f3b200201c95ee49720016edc Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Wed, 13 Mar 2019 11:38:04 +0100
Subject: [PATCH 14/17] tobias_parser: which.min -> which.max (get first TRUE)

---
 R/parser.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/parser.R b/R/parser.R
index c5a57c2..9e38855 100644
--- a/R/parser.R
+++ b/R/parser.R
@@ -387,7 +387,7 @@ parser <- function(file, dec = ".") {
 #' @param output Output path.
 #' @param filter_columns Either a vector of columnnames or a file containing one columnname per row.
 #' @param filter_pattern Keep columns matching the given pattern. Uses parameter filter_columns for matching if set. In the case of no matches a warning will be issued and all columns will be used.
-#' @param config Json file containing metadata information for all columns.
+#' @param config Json file containing metadata information for all columns. Will use first occurence for duplicate column names.
 #' @param omit_NA Logical whether all rows containing NA should be removed.
 #' @param condition_names Vector of condition names. Default = NULL. Used to classify columns not provided in config.
 #' @param condition_pattern Used to identify condition names by matching und removing given pattern with \code{\link[base]{grep}}. Ignored when condition_names is set.
@@ -470,7 +470,8 @@ tobias_parser <- function(input, output, filter_columns = NULL, filter_pattern =
   meta_rows <- lapply(metadata[[1]], function(x) {
     # is column information provided in config?
     if (!is.null(config_file) && is.element(x, col_names)) {
-      return(config_file$meta[[which(col_names == x)]][-1])
+      # uses first appearance in config file in case of duplicates
+      return(config_file$meta[[which.max(col_names == x)]][-1])
     }
 
     # if no information is provided via config try to guess meta-information

From 48840c88fd3ff5441b4bc68d00012ebd79a98dda Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Wed, 13 Mar 2019 13:51:17 +0100
Subject: [PATCH 15/17] add overview columns to config

---
 inst/extdata/tobias_config.json | 232 ++++++++++++++++++++++++++++++++
 1 file changed, 232 insertions(+)

diff --git a/inst/extdata/tobias_config.json b/inst/extdata/tobias_config.json
index c739b94..f8e050a 100644
--- a/inst/extdata/tobias_config.json
+++ b/inst/extdata/tobias_config.json
@@ -279,6 +279,238 @@
       "type": "ratio",
       "label": "naive hESC|hESC",
       "sublabel": "log2fc"
+    },
+    // overview columns
+    {
+      "col_name": "TF_name",
+      "level": "feature",
+      "type": "unique_id",
+      "label": "TF name",
+      "sublabel": ""
+    },
+    {
+      "col_name": "cluster",
+      "level": "feature",
+      "type": "category",
+      "label": "cluster",
+      "sublabel": ""
+    },
+    {
+      "col_name": "total_tfbs",
+      "level": "feature",
+      "type": "category",
+      "label": "total TFBS",
+      "sublabel": ""
+    },
+    {
+      "col_name": "2C_mean_score",
+      "level": "condition",
+      "type": "score",
+      "label": "2C",
+      "sublabel": "mean"
+    },
+    {
+      "col_name": "2C_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "2C",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "8C_mean_score",
+      "level": "condition",
+      "type": "score",
+      "label": "8C",
+      "sublabel": "mean"
+    },
+    {
+      "col_name": "8C_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "8C",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "ICM_mean_score",
+      "level": "condition",
+      "type": "score",
+      "label": "ICM",
+      "sublabel": "mean"
+    },
+    {
+      "col_name": "ICM_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "ICM",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "naive_hESC_mean_score",
+      "level": "condition",
+      "type": "score",
+      "label": "naive_hESC",
+      "sublabel": "mean"
+    },
+    {
+      "col_name": "naive_hESC_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "naive_hESC",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "hESC_mean_score",
+      "level": "condition",
+      "type": "score",
+      "label": "hESC",
+      "sublabel": "mean"
+    },
+    {
+      "col_name": "hESC_bound",
+      "level": "condition",
+      "type": "score",
+      "label": "hESC",
+      "sublabel": "bound"
+    },
+    {
+      "col_name": "2C_8C_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "2C|8C",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "2C_8C_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "2C|8C",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "2C_ICM_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "2C|ICM",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "2C_ICM_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "2C|ICM",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "2C_naive_hESC_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "2C|naive hESC",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "2C_naive_hESC_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "2C|naive hESC",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "2C_hESC_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "2C|hESC",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "2C_hESC_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "2C|hESC",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "8C_ICM_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "8C|ICM",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "8C_ICM_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "8C|ICM",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "8C_naive_hESC_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "8C|naive hESC",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "8C_naive_hESC_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "8C|naive hESC",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "8C_hESC_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "8C|hESC",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "8C_hESC_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "8C|hESC",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "ICM_naive_hESC_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "ICM|naive hESC",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "ICM_naive_hESC_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "ICM|naive hESC",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "ICM_hESC_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "ICM|hESC",
+      "sublabel": "change"
+    },
+    {
+      "col_name": "ICM_hESC_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "ICM|hESC",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "naive_hESC_hESC_change",
+      "level": "contrast",
+      "type": "ratio",
+      "label": "naive hESC|hESC",
+      "sublabel": "p-value"
+    },
+    {
+      "col_name": "naive_hESC_hESC_pvalue",
+      "level": "contrast",
+      "type": "probability",
+      "label": "naive hESC|hESC",
+      "sublabel": "change"
     }
   ]
 }
\ No newline at end of file

From fe303364264b69e0ef499bfd95f5d2c551ba1529 Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Wed, 13 Mar 2019 15:28:52 +0100
Subject: [PATCH 16/17] updated tobias_parser.md

---
 man/tobias_parser.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/tobias_parser.Rd b/man/tobias_parser.Rd
index de9fcd4..c7cafdc 100644
--- a/man/tobias_parser.Rd
+++ b/man/tobias_parser.Rd
@@ -19,7 +19,7 @@ tobias_parser(input, output, filter_columns = NULL,
 
 \item{filter_pattern}{Keep columns matching the given pattern. Uses parameter filter_columns for matching if set. In the case of no matches a warning will be issued and all columns will be used.}
 
-\item{config}{Json file containing metadata information for all columns.}
+\item{config}{Json file containing metadata information for all columns. Will use first occurence for duplicate column names.}
 
 \item{omit_NA}{Logical whether all rows containing NA should be removed.}
 

From 27c9eff7cafdf8da052ba06ba953b51d18cff78f Mon Sep 17 00:00:00 2001
From: Schultheis <hschult@kerckhoff.mpg.de>
Date: Thu, 14 Mar 2019 15:22:51 +0100
Subject: [PATCH 17/17] fixed version, new feature = minor upgrade (2.1.0)

---
 DESCRIPTION | 2 +-
 NEWS.md     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 27b5b46..b4aa4e6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: wilson
 Type: Package
 Title: Web-Based Interactive Omics Visualization
-Version: 2.0.3
+Version: 2.1.0
 Authors@R: c(
     person("Hendrik", "Schultheis", email = "hendrik.schultheis@mpi-bn.mpg.de", role = c("aut", "cre")),
     person("Jens", "Preussner", email = "jens.preussner@mpi-bn.mpg.de", role = "aut"),
diff --git a/NEWS.md b/NEWS.md
index fb02d69..f61bbd9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# wilson 2.0.3
+# wilson 2.1.0
 - implemented tobias_parser
 # wilson 2.0.2
 - fixed CRAN check Note/ Error