Skip to content

Commit

Permalink
added dicumentation and parameter for .stats output file
Browse files Browse the repository at this point in the history
  • Loading branch information
JannikHamp authored Jan 8, 2019
1 parent becfeae commit c55e8ff
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions bin/1.2_filter_motifs/compareBed_runinfo.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
# adds a column with a flag "contains_maxpos"
# and creates a file with information of the bedtool comparison

# TODO: check number of columns, implement make.unique in a better way
# parameters: Parameters are not named. Respect the parameter order.
# min: minimum footprint size threshold
# max: maximum footprint size threshold
# input_raw: unfiltered BED-file
# input_filtered: filtered BED-file (after bedtools subtract)
# output: output path/file
# output_stats: output file with general info of the comparison

# parsing parameters
library(data.table)
Expand All @@ -18,8 +24,10 @@ max = as.numeric(args[2])
input_raw = args[3]
input_filtered = args[4]
output = args[5]
output_stats = args[6]

data_filtered = fread(input_filtered, sep='\t')

data_filtered = fread(input_filtered)
# check if data has less than 9 columns
if (ncol(data_filtered) < 9) {
stop("footprint file has less than 9 columns. exiting.")
Expand Down Expand Up @@ -51,7 +59,7 @@ data_filtered[[8]] = data_filtered[[8]] - data_filtered[[2]]
fwrite(data_filtered, output, col.names=FALSE, quote = FALSE, sep = '\t')

# data is the initial data before any comparisons have been done (-d parameter of compareBed.sh)
data = fread(input_raw)
data = fread(input_raw, sep='\t')

# some statistics about the bedtool comparisons are stored in FilterMotifs.stats
sum_data = sum(data[[3]]-data[[2]])
Expand All @@ -62,4 +70,4 @@ length_data = formatC(mean(data[[3]]-data[[2]]), digits = 4)
length_filtered = formatC(mean(data_filtered[[7]]), digits = 4)
stats = data.frame(sum_nt_input = sum_data, sum_nt_filtered = sum_filtered, quotient_of_nt = difference_nt, loss_of_nt = loss_nt, mean_length_input = length_data, mean_length_filtered = length_filtered, flag_1_ratio = length(which(data_filtered$containsMaxpos == 1))/dim(data_filtered)[1])
stats = t(stats)
write.table(stats, "./FilterMotifs.stats", col.names = FALSE, quote = FALSE, sep = '\t')
write.table(stats, output_stats, col.names = FALSE, quote = FALSE, sep = '\t')

0 comments on commit c55e8ff

Please sign in to comment.