Skip to content

Jannik hamp patch 1 #54

Merged
merged 15 commits into from
Jan 11, 2019
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions bin/1.2_filter_motifs/abs_max_score.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/Rscript

# author: Jannik Hamp
# email: jannik.hamp@googlemail.com

# desciption: This script gets called by compareBed.sh

# It calculates the absolute position of maximum score of the footprints.
# This information is later used for an additional flag column in the bed file.

library(data.table)
args = commandArgs(TRUE)
file = args[1]

tab = fread(file, sep='\t')

# check if data has less than 9 columns
if (ncol(tab) < 9) {
stop("footprint file has less than 9 columns. exiting.")
}

tab[[8]] = tab[[2]] + tab[[8]]

fwrite(tab, file, row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')
12 changes: 10 additions & 2 deletions bin/1.2_filter_motifs/compareBed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# For parameter description, run the script without parameters or -h.
# The output is a file with the filtered footprints and the log file FilterMotivs.stats

# One R script is used, compareBed_runinfo.R, stored in the same directory.
# Two R scripts are used, compareBed_runinfo.R and abs_max_score.R, stored in the same directory.

# default parameters
workdir=$PWD
Expand Down Expand Up @@ -181,7 +181,7 @@ cat $data | sed 's/[ \t]*$//' > "$workdir"/filtered.bed
if [ -d "$motifs" ]
then
# creates an array of all files with bed in its name in the directory $motifs
declare -a motiffiles=(`ls $motifs | grep bed | sed "s|^|$motifs\/|g" | tr '\n' ' ' | sed "s|//|/|g"`)
declare -a motiffiles=(`ls $motifs | grep -i '.bed$' | sed "s|^|$motifs\/|g" | tr '\n' ' ' | sed "s|//|/|g"`)

# the else case means, that the motiffiles were passed comma separated with no whitespace.
else
Expand Down Expand Up @@ -219,6 +219,14 @@ if [ $all_empty == true ]
exit 1
fi

#calculate absolute max_score position
Rscript abs_max_score.R "$workdir"/filtered.bed
# check if Rscript executed without errors
if [ $? -gt 0 ]
then
exit 1
fi

# comparing unknown footprints with regions of known motifs
# comparison is done iteratively
# remove overlapping regions in unknown footprints
Expand Down
2 changes: 1 addition & 1 deletion bin/1.2_filter_motifs/compareBed_runinfo.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# adds a column with a flag "contains_maxpos"
# and creates a file with information of the bedtool comparison

# parameters: Parameters are not named. Respect the parameter order.
# parameters: Parameters are not named, respect the parameter order.
# min: minimum footprint size threshold
# max: maximum footprint size threshold
# input_raw: unfiltered BED-file
Expand Down