diff --git a/bin/1.2_filter_motifs/compareBed_runinfo.R b/bin/1.2_filter_motifs/compareBed_runinfo.R index 4a7af6f..4c5686d 100644 --- a/bin/1.2_filter_motifs/compareBed_runinfo.R +++ b/bin/1.2_filter_motifs/compareBed_runinfo.R @@ -8,7 +8,13 @@ # adds a column with a flag "contains_maxpos" # and creates a file with information of the bedtool comparison -# TODO: check number of columns, implement make.unique in a better way +# parameters: Parameters are not named. Respect the parameter order. +# min: minimum footprint size threshold +# max: maximum footprint size threshold +# input_raw: unfiltered BED-file +# input_filtered: filtered BED-file (after bedtools subtract) +# output: output path/file +# output_stats: output file with general info of the comparison # parsing parameters library(data.table) @@ -18,8 +24,10 @@ max = as.numeric(args[2]) input_raw = args[3] input_filtered = args[4] output = args[5] +output_stats = args[6] + +data_filtered = fread(input_filtered, sep='\t') -data_filtered = fread(input_filtered) # check if data has less than 9 columns if (ncol(data_filtered) < 9) { stop("footprint file has less than 9 columns. exiting.") @@ -51,7 +59,7 @@ data_filtered[[8]] = data_filtered[[8]] - data_filtered[[2]] fwrite(data_filtered, output, col.names=FALSE, quote = FALSE, sep = '\t') # data is the initial data before any comparisons have been done (-d parameter of compareBed.sh) -data = fread(input_raw) +data = fread(input_raw, sep='\t') # some statistics about the bedtool comparisons are stored in FilterMotifs.stats sum_data = sum(data[[3]]-data[[2]]) @@ -62,4 +70,4 @@ length_data = formatC(mean(data[[3]]-data[[2]]), digits = 4) length_filtered = formatC(mean(data_filtered[[7]]), digits = 4) stats = data.frame(sum_nt_input = sum_data, sum_nt_filtered = sum_filtered, quotient_of_nt = difference_nt, loss_of_nt = loss_nt, mean_length_input = length_data, mean_length_filtered = length_filtered, flag_1_ratio = length(which(data_filtered$containsMaxpos == 1))/dim(data_filtered)[1]) stats = t(stats) -write.table(stats, "./FilterMotifs.stats", col.names = FALSE, quote = FALSE, sep = '\t') +write.table(stats, output_stats, col.names = FALSE, quote = FALSE, sep = '\t')