loosolab · HendrikSchultheis · Jan 21, 2019 · Jan 16, 2019 · Jan 18, 2019 · Jan 18, 2019
diff --git a/README.md b/README.md
@@ -29,22 +29,28 @@ conda activate masterenv
 4. Set the wd parameter in the nextflow.config file as path where the repository is saved. For example: '~/masterJLU2018/'.
 
 
-**Important Note:** For conda the channel bioconda needs to be set as highest priority! This is required due to two different packages with the same name in different channels. For the pipeline the package jellyfish from the channel bioconda is needed and **NOT** the jellyfish package from the channel conda-forge!
-
+**Important Notes:**
+1. For conda the channel bioconda needs to be set as highest priority! This is required due to two different packages with the same name in different channels. For the pipeline the package jellyfish from the channel bioconda is needed and **NOT** the jellyfish package from the channel conda-forge!
+2. The parameters --create_known_tfbs_path and --tfbs_path need an absolute path. If a relative path is given it will not work due to nextflow changing the working directory. This will soon be updated.
 
 
 ## Quick Start
 ```console
-nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --organism [mm10|mm9|hg19|hg38]
+nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --organism [mm10|mm9|hg19|hg38] (--tfbs_path [absolute PATH] || --create_known_tfbs_path [absolute PATH])
 ```
 
 ### Demo run
 There are files provided inside  ./demo/ for a demo run.
 Go to the main directory and run following command:
 ```
-nextflow run pipeline.nf --bigwig ./demo/buenrostro50k_chr1_fp.bw --bed ./demo/buenrostro50k_chr1_peaks.bed --genome_fasta ./demo/hg38/hg38_chr1.fa --motif_db ./demo/motif_database/jaspar_vertebrates.meme --out ./demo/buenrostro50k_chr1_out/ --create_known_tfbs_path ./demo/known_tfbs_hg38_chr1/ --organism hg38
+nextflow run pipeline.nf --bigwig ./demo/buenrostro50k_chr1_fp.bw --bed ./demo/buenrostro50k_chr1_peaks.bed --genome_fasta ./demo/hg38/hg38_chr1.fa --motif_db ./demo/motif_database/jaspar_vertebrates.meme --out ./demo/buenrostro50k_chr1_out/ --create_known_tfbs_path [absolute path]/demo/known_tfbs/ --organism hg38
+```
+**Important Note:** It can happen that tfbsscan does not work due to an unknown bug. If this is the case you will see the error message shown in "Known Issues". If the error occurs first try to change the tfbsscan_method to fimo. If it persists try the following command:
+```
+nextflow run pipeline.nf --bigwig ./demo/buenrostro50k_chr1_fp.bw --bed ./demo/buenrostro50k_chr1_peaks.bed --genome_fasta ./demo/hg38/hg38_chr1.fa --motif_db ./demo/motif_database/jaspar_vertebrates.meme --out ./demo/buenrostro50k_chr1_out/ --tfbs_path [absolute path]/demo/known_tfbs_chr1_hg38/ --organism hg38
 ```
 
+
 ## Parameters
 For a detailed overview for all parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/Configuration).
 ```
@@ -56,14 +62,17 @@ Required arguments:
 	--config		 Path to UROPA configuration file
  	--organism 		 Input organism [hg38 | hg19 | mm9 | mm10]
 	--out			 Output Directory (Default: './out/')
+	[
+	--create_known_tfbs_path Path to directory where output from tfbsscan (known motifs) are stored.
+				 Path can be set as tfbs_path in next run. (Needs absolute path)
+	or
+	--tfbs_path 		Path to directory with output from tfbsscan. If given tfbsscan will be skipped. (Needs absolute path)
+	]
 
 Optional arguments:
 
 	--help [0|1]		1 to show this help message. (Default: 0)
-	--tfbs_path 		Path to directory with output from tfbsscan. If given tfbsscan will not be run.
-	--create_known_tfbs_path Path to directory where output from tfbsscan (known motifs) are stored.
-				 Path can be set as tfbs_path in next run. (Default: './')
-	--gtf_path			Path to gtf-file. If path is set the process which creats a gtf-file is skipped.
+	--gtf_path		Path to gtf-file. If path is set the process which creats a gtf-file is skipped.
 
 	Footprint extraction:
 	--window_length INT	This parameter sets the length of a sliding window. (Default: 200)
@@ -97,6 +106,7 @@ Optional arguments:
 	--iteration INT		Number of iterations done by GLAM2. More Iterations: better results, higher runtime. (Default: 10000)
 	--tomtom_treshold FLOAT	Threshold for similarity score. (Default: 0.01)
 	--best_motif INT	Get the best X motifs per cluster. (Default: 3)
+	--gap_penalty INT	Set penalty for gaps in GLAM2 (Default: 1000)
 	Moitf clustering:
 	--cluster_motif	Boolean	If 1 pipeline clusters motifs. If its 0 it does not. (Default: 0)
 	--edge_weight INT	Minimum weight of edges in motif-cluster-graph (Default: 5)

diff --git a/bin/1.2_filter_motifs/compareBed.sh b/bin/1.2_filter_motifs/compareBed.sh
@@ -276,20 +276,18 @@ first_line=`sed -n 1p $data | sed "s/$/\tcontains_maxpos\tsequence/"`
 if [[ ${first_line:0:1} == "#" ]]
 then
 	echo "$first_line" > $output
-	# add some final values to the log file
+	# add initial number of footprints to the log file
 	fp_initial=`cat $data | wc -l`
 	fp_initial=`expr $fp_initial - 1`
-	fp_final=`cat "$workdir"/filtered.bed | wc -l`
-	fp_final=`expr $fp_final - 1`
 	echo $fp_initial | sed 's/^/initial number of footprints: /g' >> "$workdir"/compareBed.stats
-	echo $fp_final | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/compareBed.stats
 else
 	# output will be overwritten if it exists
 	rm -f $output
-	# add some final values to the log file
+	# add initial number of footprints to the log file
 	cat $data | wc -l | sed 's/^/initial number of footprints: /g' >> "$workdir"/compareBed.stats
-	cat "$workdir"/filtered.bed | wc -l | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/compareBed.stats
 fi
+# add number of footprints after filtering to the log file
+cat "$workdir"/filtered_flagged.bed | wc -l | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/compareBed.stats
 
 # add fasta sequences to bed and create fasta file
 out_fasta=`echo $output | sed "s|.bed$|.fasta|g"`

diff --git a/bin/2.2_motif_estimation/get_motif_seq.R b/bin/2.2_motif_estimation/get_motif_seq.R
@@ -51,12 +51,12 @@ create_seq_json <- function(input, output, num, tmp_path, cluster_id) {
     stop(paste0("CLUSTER ID could not be found. Please make sure that your file path contains _[cluster_id] at the end. Found: ", cluster_id,"\n For example: /test_cluster_1/glam.txt"))
   }
 
-  dir.create(tmp_path, showWarnings = FALSE)
+  dir.create(tmp_path, showWarnings = FALSE, recursive = TRUE)
 
   file_dir <- tmp_path
 
   # Split glam.txt file on lines that start with Score:
-  system(paste0("csplit ", input, " '/^Score:.*/' '{*}' -f ", file_dir, "/f_id_test.pholder"))
+  system(paste0("csplit ", input, " '/^Score:.*/' '{*}' -f ", file_dir, "/f_id.pholder"))
   # Only keep the lines that start with 'f' to get the lines with the sequence ids
   system(paste0("for i in ", file_dir, "/*.pholder0[1-", num, "];do grep \"^f\" $i > \"${i}.done\";done"))
 

diff --git a/demo/known_tfbs_chr1_hg38/ALX3_MA0634.1.bed b/demo/known_tfbs_chr1_hg38/ALX3_MA0634.1.bed
diff --git a/demo/known_tfbs_chr1_hg38/ARNT_HIF1A_MA0259.1.bed b/demo/known_tfbs_chr1_hg38/ARNT_HIF1A_MA0259.1.bed