Skip to content

Commit

Permalink
added information for logfile
Browse files Browse the repository at this point in the history
  • Loading branch information
JannikHamp authored Jan 8, 2019
1 parent b787d14 commit becfeae
Showing 1 changed file with 61 additions and 85 deletions.
146 changes: 61 additions & 85 deletions bin/1.2_filter_motifs/compareBed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,19 @@ min=10
max=200
path=`echo $0 | sed 's/\/[^\/]*$/\//g'`
help=false
# required parameters given
da=false
mo=false
fa=false

# display help when no parameters chosen
if [ $# -eq 0 ]
then
he=true
help=true
fi

# parsing parameters
wrong=()
while [[ $# -gt 0 ]]
do
key="$1"
if [[ "^-" =~ $2 ]]
if [[ ${2:0:1} == "-" ]]
then
echo "Each parameter needs a value (except the help parameter), values must not start with a '-'!"
exit 1
Expand All @@ -43,13 +39,11 @@ do
case $key in
-d|--data)
data="$2"
da=true
shift
shift
;;
-m|--motifs)
motifs="$2"
mo=true
shift
shift
;;
Expand All @@ -60,7 +54,6 @@ do
;;
-f|--fasta)
fasta="$2"
fa=true
shift
shift
;;
Expand Down Expand Up @@ -102,10 +95,10 @@ then
for i in ${wrong[@]}
do
echo wrong parameter $i
echo call script without parameters for help or call --help
echo exit
done
exit 1

echo call script without parameters for help or call --help
exit 1
fi

# the help message
Expand Down Expand Up @@ -151,105 +144,78 @@ echo path of scripts: $path
# check required parameters
if [ -z $data ] || [ -z $motifs ] || [ -z $fasta ]
then
echo ERROR
echo required parameters not given.
echo required are: --data \<path/data.bed\> --motifs \<path/motifs.bed\> --fasta \<path/file.fasta\>
exit 1
fi

# comparing unknown footprints with regions of known motifs
# comparison is done iteratively
# remove overlapping regions in unknown footprints

# remove trailing tabs in footprints
cat $data | sed 's/[ \t]*$//' > "$workdir"/filtered.bed
temp_switch=true
all_empty=true

# motiffiles either from a directory OR comma separated list
if [ -d "$motifs" ]
then
# check if all motiffiles are empty/only consist of header. exit if all are empty
for i in "$motifs"/*.bed
do
# creates an array of all files with bed in its name in the directory $motifs
declare -a motiffiles=(`ls $motifs | grep bed | sed "s|^|$motifs\/|g" | tr '\n' ' ' | sed "s|//|/|g"`)

# the else case means, that the motiffiles were passed comma separated with no whitespace.
else
declare -a motiffiles=(`echo $motifs | sed 's/,/ /g'`)
fi

# check if files exist and if they are all empty (exiting if all empty)
for i in ${motiffiles[@]}
do
if [ -f $i ]
then
if [ $all_empty == true ]
then
lines=`cat $i | wc -l`
if [ $lines -gt 1 ]
then
all_empty=false
break
fi
fi
done
if [ $all_empty == true ]
then
echo All motiffiles were empty!
echo Fix motiffiles and try again.
echo exiting
else
echo file $i does not exist
echo please use correct paths. exiting.
exit 1
fi
done

# bedtools comparisons
for i in "$motifs"/*.bed
do
# remove trailing tabs in motiffile
sed -i 's/[ \t]*$//' $i

if [ $temp_switch == true ]
then
temp_switch=false
bedtools subtract -a "$workdir"/filtered.bed -b $i > "$workdir"/filtered_temp.bed
else
temp_switch=true
bedtools subtract -a "$workdir"/filtered_temp.bed -b $i > "$workdir"/filtered.bed
fi
echo $i
done

# the else case means, that the motiffiles were passed comma separated with no whitespace.
else
declare -a motiffiles=(`echo $motifs | sed 's/,/ /g'`)
# check if files exist and if they are all empty (exiting if all empty)
for i in ${motiffiles[@]}
do
if [ -f $i ]
then
if [ $all_empty == true ]
then
lines=`cat $i | wc -l`
if [ $lines -gt 1 ]
then
all_empty=false
fi
fi
else
echo file $i does not exist
echo please use correct paths. exiting.
exit 1
fi
done
if [ $all_empty == true ]
# error report of rare case of only empty motiffiles
if [ $all_empty == true ]
then
echo ERROR
echo All motiffiles were empty!
echo Fix motiffiles and try again.
echo exiting
exit 1
fi
fi

# bedtools comparisons
for i in ${motiffiles[@]}
do
# remove trailing tabs in motiffile
sed -i 's/[ \t]*$//' $i
# comparing unknown footprints with regions of known motifs
# comparison is done iteratively
# remove overlapping regions in unknown footprints
temp_switch=true
counter=1
for i in ${motiffiles[@]}
do
# remove trailing tabs in motiffile
sed -i 's/[ \t]*$//' $i

if [ $temp_switch == true ]
then
help=false
bedtools subtract -a "$workdir"/filtered.bed -b $i > "$workdir"/filtered_temp.bed
else
temp_switch=true
bedtools subtract -a "$workdir"/filtered_temp.bed -b $i > "$workdir"/filtered.bed
fi
echo $i
done
fi
if [ $temp_switch == true ]
then
temp_switch=false
bedtools subtract -a "$workdir"/filtered.bed -b $i > "$workdir"/filtered_temp.bed
else
temp_switch=true
bedtools subtract -a "$workdir"/filtered_temp.bed -b $i > "$workdir"/filtered.bed
fi
echo "$i --- $counter of ${#motiffiles[@]}"
counter=`expr $counter + 1`
done

# get file of last iteration an write its content into filtered.bed
if [ $temp_switch == false ]
Expand All @@ -259,7 +225,7 @@ fi

# remove short/long motivs, make unique ids (relevant for some splitted tfbs from subtract) and handle maxScorePosition
# also creates a small output file with information about the comparison
Rscript $path/compareBed_runinfo.R $min $max $data "$workdir"/filtered.bed "$workdir"/filtered_flagged.bed
Rscript $path/compareBed_runinfo.R $min $max $data "$workdir"/filtered.bed "$workdir"/filtered_flagged.bed "$workdir"/FilterMotifs.stats
# check if Rscript executed without errors
if [ $? -gt 0 ]
then
Expand All @@ -271,9 +237,19 @@ first_line=`sed -n 1p $data | sed "s/$/\tcontains_maxpos\tsequence/"`
if [[ ${first_line:0:1} == "#" ]]
then
echo "$first_line" > $output
# add some final values to the log file
fp_initial=`cat $data | wc -l`
fp_initial=`expr $fp_initial - 1`
fp_final=`cat "$workdir"/filtered.bed | wc -l`
fp_final=`expr $fp_final - 1`
echo $fp_initial | sed 's/^/initial number of footprints: /g' >> "$workdir"/FilterMotifs.stats
echo $fp_final | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/FilterMotifs.stats
else
# output will be overwritten if it exists
rm -f $output
# add some final values to the log file
cat $data | wc -l | sed 's/^/initial number of footprints: /g' >> "$workdir"/FilterMotifs.stats
cat "$workdir"/filtered.bed | wc -l | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/FilterMotifs.stats
fi

# add fasta sequences to bed and create fasta file
Expand Down

0 comments on commit becfeae

Please sign in to comment.