Skip to content

Commit

Permalink
updated check for trailing tabs in motiffiles
Browse files Browse the repository at this point in the history
  • Loading branch information
JannikHamp authored Jan 9, 2019
1 parent 4acc20f commit 90c8c05
Showing 1 changed file with 55 additions and 22 deletions.
77 changes: 55 additions & 22 deletions bin/1.2_filter_motifs/compareBed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@
# BED-format.

# For parameter description, run the script without parameters or -h.
# The output is a file with the filtered footprints and the log file FilterMotivs.stats

# One R script is used, called compareBed_runinfo.R, stored in the same directory
# One R script is used, compareBed_runinfo.R, stored in the same directory.

# default parameters
workdir=$PWD
output="newMotifs.bed"
min=10
max=200
path=`echo $0 | sed 's/\/[^\/]*$/\//g'`
help=false

path=`echo $0 | sed 's/\/[^\/]*$/\//g'`

# display help when no parameters chosen
if [ $# -eq 0 ]
then
Expand All @@ -32,7 +34,7 @@ do
key="$1"
if [[ ${2:0:1} == "-" ]]
then
echo "Each parameter needs a value (except the help parameter), values must not start with a '-'!"
echo "ERROR: Each parameter needs a value (except the help parameter), values must not start with a '-'!"
exit 1
fi

Expand Down Expand Up @@ -72,11 +74,6 @@ do
shift
shift
;;
-p|--path)
path="$2"
shift
shift
;;
-h|--help)
help=true
shift
Expand All @@ -94,7 +91,7 @@ if [ $count -gt 0 ]
then
for i in ${wrong[@]}
do
echo wrong parameter $i
echo ERROR: wrong parameter $i
done

echo call script without parameters for help or call --help
Expand All @@ -104,11 +101,11 @@ fi
# the help message
if [ $help == true ]
then
echo "This script utilies bedtools to select new footprints from data."
echo "This script utilizes bedtools to select new footprints from data."
echo "Therefore the data is compared with known footprint motifs."
echo "The output is a new .bed file with added sequence information and a column with flags for better sequences (1)"
echo "Required arguments are data and motifs, both in bed-format, and the fasta genome sequences."
echo "If a parameter is chosen, a value must be provided or an error will occur."
echo "The output is a new BED-file with added sequence information and a flag contains_maxpos (1/0)"
echo "Required arguments are data, motifs, both in bed-format and the fasta genome sequences."
echo "If a parameter is chosen, a value must be provided aswell. (exception -h)"
echo "--------------------"
echo "usage: compareBed.sh --data <path/to/file.bed> --motifs <path/to/known/motifs.bed> --fasta <path/to/genome.fasta> [optional_parameter <value>] ..."
echo "--------------------"
Expand All @@ -125,8 +122,7 @@ then
echo " -max --max maximum size of footprints; default is 80"
echo " -o --output output path/file ; default dir current directory and filename is newMotifs.bed and newMotifs.bed.fasta"
echo " -h --help shows this help message"
echo " -p --path the path where the required script compareBed_runinfo.R is stored. Default: same path as this scripts path"
exit 0
exit 0
fi

# summary of parameters
Expand All @@ -139,7 +135,6 @@ echo fasta: $fasta \(required\)
echo min: $min
echo max: $max
echo output: $output
echo relative path of subscript: $path

# check required parameters
if [ -z $data ] || [ -z $motifs ] || [ -z $fasta ]
Expand All @@ -149,10 +144,38 @@ then
echo required are: --data \<path/data.bed\> --motifs \<path/motifs.bed\> --fasta \<path/file.fasta\>
exit 1
fi
if [ ! -f $data ]
then
echo ERROR
echo $data does not exist. Please check input parameter -d / --data
exit 1
fi
if [ ! -f $fasta ]
then
echo ERROR
echo $fasta does not exist. Please check input parameter -f / --fasta
exit 1
fi
#check other parameters
if [ $min -lt 0 ]
then
min=10
echo "min can't be negative. Default value of 10 is choosen"
fi
if [ $max -lt $min ]
then
max=200
echo "max must be greater than min. Default value of 200 is choosen"
fi
if [ ! -d $workdir ]
then
echo ERROR
echo "directory $workdir does not exist. Please check parameter -w / --workdir"
exit 1
fi

# remove trailing tabs in footprints
cat $data | sed 's/[ \t]*$//' > "$workdir"/filtered.bed
all_empty=true

# motiffiles either from a directory OR comma separated list
if [ -d "$motifs" ]
Expand All @@ -166,6 +189,7 @@ else
fi

# check if files exist and if they are all empty (exiting if all empty)
all_empty=true
for i in ${motiffiles[@]}
do
if [ -f $i ]
Expand All @@ -180,7 +204,7 @@ do
fi
fi
else
echo file $i does not exist
echo ERROR: file $i does not exist
echo please use correct paths. exiting.
exit 1
fi
Expand All @@ -202,18 +226,26 @@ temp_switch=true
counter=1
for i in ${motiffiles[@]}
do
# remove trailing tabs in motiffile
sed -i 's/[ \t]*$//' $i
echo "$i --- $counter of ${#motiffiles[@]}"
# remove trailing tabs in motiffile, but only if the second line in the file ends with a tab.
# checking all lines for trailing tabs would be time consuming.
secnd_line=`sed -n 2p $i | tr '\t' '#'`
echo $secnd_line
if [[ ${secnd_line: -1} == "#" ]]
then
echo trailing tabs have been found. removing trailing tabs.
sed -i 's/[ \t]*$//' $i
fi

# bedtools comparisons
if [ $temp_switch == true ]
then
temp_switch=false
bedtools subtract -a "$workdir"/filtered.bed -b $i > "$workdir"/filtered_temp.bed
else
temp_switch=true
bedtools subtract -a "$workdir"/filtered_temp.bed -b $i > "$workdir"/filtered.bed
bedtools subtract -a "$workdir"/filtered_temp.bed -b $i > "$workdir"/filtered.bed
fi
echo "$i --- $counter of ${#motiffiles[@]}"
counter=`expr $counter + 1`
done

Expand Down Expand Up @@ -253,5 +285,6 @@ else
fi

# add fasta sequences to bed and create fasta file
echo getting sequences from fasta-file
bedtools getfasta -fi $fasta -bed "$workdir"/filtered_flagged.bed -bedOut >> $output
bedtools getfasta -name -fi $fasta -bed "$output" -fo "$output".fasta

0 comments on commit 90c8c05

Please sign in to comment.