diff --git a/bin/1.2_filter_motifs/compareBed.sh b/bin/1.2_filter_motifs/compareBed.sh index 1571065..1840ba8 100644 --- a/bin/1.2_filter_motifs/compareBed.sh +++ b/bin/1.2_filter_motifs/compareBed.sh @@ -18,15 +18,11 @@ min=10 max=200 path=`echo $0 | sed 's/\/[^\/]*$/\//g'` help=false -# required parameters given -da=false -mo=false -fa=false # display help when no parameters chosen if [ $# -eq 0 ] then - he=true + help=true fi # parsing parameters @@ -34,7 +30,7 @@ wrong=() while [[ $# -gt 0 ]] do key="$1" - if [[ "^-" =~ $2 ]] + if [[ ${2:0:1} == "-" ]] then echo "Each parameter needs a value (except the help parameter), values must not start with a '-'!" exit 1 @@ -43,13 +39,11 @@ do case $key in -d|--data) data="$2" - da=true shift shift ;; -m|--motifs) motifs="$2" - mo=true shift shift ;; @@ -60,7 +54,6 @@ do ;; -f|--fasta) fasta="$2" - fa=true shift shift ;; @@ -102,10 +95,10 @@ then for i in ${wrong[@]} do echo wrong parameter $i - echo call script without parameters for help or call --help - echo exit done -exit 1 + + echo call script without parameters for help or call --help + exit 1 fi # the help message @@ -151,105 +144,78 @@ echo path of scripts: $path # check required parameters if [ -z $data ] || [ -z $motifs ] || [ -z $fasta ] then + echo ERROR echo required parameters not given. echo required are: --data \ --motifs \ --fasta \ exit 1 fi -# comparing unknown footprints with regions of known motifs -# comparison is done iteratively -# remove overlapping regions in unknown footprints - # remove trailing tabs in footprints cat $data | sed 's/[ \t]*$//' > "$workdir"/filtered.bed -temp_switch=true all_empty=true + +# motiffiles either from a directory OR comma separated list if [ -d "$motifs" ] then - # check if all motiffiles are empty/only consist of header. exit if all are empty - for i in "$motifs"/*.bed - do + # creates an array of all files with bed in its name in the directory $motifs + declare -a motiffiles=(`ls $motifs | grep bed | sed "s|^|$motifs\/|g" | tr '\n' ' ' | sed "s|//|/|g"`) + +# the else case means, that the motiffiles were passed comma separated with no whitespace. +else + declare -a motiffiles=(`echo $motifs | sed 's/,/ /g'`) +fi + +# check if files exist and if they are all empty (exiting if all empty) +for i in ${motiffiles[@]} +do + if [ -f $i ] + then if [ $all_empty == true ] then lines=`cat $i | wc -l` if [ $lines -gt 1 ] then all_empty=false + break fi fi - done - if [ $all_empty == true ] - then - echo All motiffiles were empty! - echo Fix motiffiles and try again. - echo exiting + else + echo file $i does not exist + echo please use correct paths. exiting. exit 1 fi +done - # bedtools comparisons - for i in "$motifs"/*.bed - do - # remove trailing tabs in motiffile - sed -i 's/[ \t]*$//' $i - - if [ $temp_switch == true ] - then - temp_switch=false - bedtools subtract -a "$workdir"/filtered.bed -b $i > "$workdir"/filtered_temp.bed - else - temp_switch=true - bedtools subtract -a "$workdir"/filtered_temp.bed -b $i > "$workdir"/filtered.bed - fi - echo $i - done - -# the else case means, that the motiffiles were passed comma separated with no whitespace. -else - declare -a motiffiles=(`echo $motifs | sed 's/,/ /g'`) - # check if files exist and if they are all empty (exiting if all empty) - for i in ${motiffiles[@]} - do - if [ -f $i ] - then - if [ $all_empty == true ] - then - lines=`cat $i | wc -l` - if [ $lines -gt 1 ] - then - all_empty=false - fi - fi - else - echo file $i does not exist - echo please use correct paths. exiting. - exit 1 - fi - done - if [ $all_empty == true ] +# error report of rare case of only empty motiffiles +if [ $all_empty == true ] then + echo ERROR echo All motiffiles were empty! echo Fix motiffiles and try again. - echo exiting exit 1 - fi +fi - # bedtools comparisons - for i in ${motiffiles[@]} - do - # remove trailing tabs in motiffile - sed -i 's/[ \t]*$//' $i +# comparing unknown footprints with regions of known motifs +# comparison is done iteratively +# remove overlapping regions in unknown footprints +temp_switch=true +counter=1 +for i in ${motiffiles[@]} +do + # remove trailing tabs in motiffile + sed -i 's/[ \t]*$//' $i - if [ $temp_switch == true ] - then - help=false - bedtools subtract -a "$workdir"/filtered.bed -b $i > "$workdir"/filtered_temp.bed - else - temp_switch=true - bedtools subtract -a "$workdir"/filtered_temp.bed -b $i > "$workdir"/filtered.bed - fi - echo $i - done -fi + if [ $temp_switch == true ] + then + temp_switch=false + bedtools subtract -a "$workdir"/filtered.bed -b $i > "$workdir"/filtered_temp.bed + else + temp_switch=true + bedtools subtract -a "$workdir"/filtered_temp.bed -b $i > "$workdir"/filtered.bed + fi + echo "$i --- $counter of ${#motiffiles[@]}" + counter=`expr $counter + 1` +done # get file of last iteration an write its content into filtered.bed if [ $temp_switch == false ] @@ -259,7 +225,7 @@ fi # remove short/long motivs, make unique ids (relevant for some splitted tfbs from subtract) and handle maxScorePosition # also creates a small output file with information about the comparison -Rscript $path/compareBed_runinfo.R $min $max $data "$workdir"/filtered.bed "$workdir"/filtered_flagged.bed +Rscript $path/compareBed_runinfo.R $min $max $data "$workdir"/filtered.bed "$workdir"/filtered_flagged.bed "$workdir"/FilterMotifs.stats # check if Rscript executed without errors if [ $? -gt 0 ] then @@ -271,9 +237,19 @@ first_line=`sed -n 1p $data | sed "s/$/\tcontains_maxpos\tsequence/"` if [[ ${first_line:0:1} == "#" ]] then echo "$first_line" > $output + # add some final values to the log file + fp_initial=`cat $data | wc -l` + fp_initial=`expr $fp_initial - 1` + fp_final=`cat "$workdir"/filtered.bed | wc -l` + fp_final=`expr $fp_final - 1` + echo $fp_initial | sed 's/^/initial number of footprints: /g' >> "$workdir"/FilterMotifs.stats + echo $fp_final | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/FilterMotifs.stats else # output will be overwritten if it exists rm -f $output + # add some final values to the log file + cat $data | wc -l | sed 's/^/initial number of footprints: /g' >> "$workdir"/FilterMotifs.stats + cat "$workdir"/filtered.bed | wc -l | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/FilterMotifs.stats fi # add fasta sequences to bed and create fasta file