From c55e8ff1c5fb1525eb52e3e15c6cf25423e8ba05 Mon Sep 17 00:00:00 2001 From: JannikHamp Date: Tue, 8 Jan 2019 16:03:57 +0100 Subject: [PATCH] added dicumentation and parameter for .stats output file --- bin/1.2_filter_motifs/compareBed_runinfo.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/bin/1.2_filter_motifs/compareBed_runinfo.R b/bin/1.2_filter_motifs/compareBed_runinfo.R index 4a7af6f..4c5686d 100644 --- a/bin/1.2_filter_motifs/compareBed_runinfo.R +++ b/bin/1.2_filter_motifs/compareBed_runinfo.R @@ -8,7 +8,13 @@ # adds a column with a flag "contains_maxpos" # and creates a file with information of the bedtool comparison -# TODO: check number of columns, implement make.unique in a better way +# parameters: Parameters are not named. Respect the parameter order. +# min: minimum footprint size threshold +# max: maximum footprint size threshold +# input_raw: unfiltered BED-file +# input_filtered: filtered BED-file (after bedtools subtract) +# output: output path/file +# output_stats: output file with general info of the comparison # parsing parameters library(data.table) @@ -18,8 +24,10 @@ max = as.numeric(args[2]) input_raw = args[3] input_filtered = args[4] output = args[5] +output_stats = args[6] + +data_filtered = fread(input_filtered, sep='\t') -data_filtered = fread(input_filtered) # check if data has less than 9 columns if (ncol(data_filtered) < 9) { stop("footprint file has less than 9 columns. exiting.") @@ -51,7 +59,7 @@ data_filtered[[8]] = data_filtered[[8]] - data_filtered[[2]] fwrite(data_filtered, output, col.names=FALSE, quote = FALSE, sep = '\t') # data is the initial data before any comparisons have been done (-d parameter of compareBed.sh) -data = fread(input_raw) +data = fread(input_raw, sep='\t') # some statistics about the bedtool comparisons are stored in FilterMotifs.stats sum_data = sum(data[[3]]-data[[2]]) @@ -62,4 +70,4 @@ length_data = formatC(mean(data[[3]]-data[[2]]), digits = 4) length_filtered = formatC(mean(data_filtered[[7]]), digits = 4) stats = data.frame(sum_nt_input = sum_data, sum_nt_filtered = sum_filtered, quotient_of_nt = difference_nt, loss_of_nt = loss_nt, mean_length_input = length_data, mean_length_filtered = length_filtered, flag_1_ratio = length(which(data_filtered$containsMaxpos == 1))/dim(data_filtered)[1]) stats = t(stats) -write.table(stats, "./FilterMotifs.stats", col.names = FALSE, quote = FALSE, sep = '\t') +write.table(stats, output_stats, col.names = FALSE, quote = FALSE, sep = '\t')