From 5d9fb6c15c3844b9c553ac2abf0f72ec519f0389 Mon Sep 17 00:00:00 2001
From: renewiegandt <rene.wiegandt@mpi-bn.mpg.de>
Date: Tue, 26 Mar 2019 17:30:23 +0100
Subject: [PATCH] Minor changes

---
 pipeline.nf | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/pipeline.nf b/pipeline.nf
index f7f6295..bb338ce 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -1,5 +1,7 @@
 #!/usr/bin/env nextflow
 
+disable_mo_clu = 1
+
 //setting default values
 	params.bigwig=""
 	params.bed=""
@@ -9,6 +11,7 @@
 	params.tfbs_path=""
 	params.help = 0
 	params.gtf_path=""
+	params.gtf2=""
 	params.out = "./out/"
 
 //footprint_extraction
@@ -26,7 +29,7 @@
   //reduce_sequence
 	params.kmer=10
 	params.aprox_motif_len=10
-	params.motif_occurence=1
+	params.motif_occurrence=1
 	params.min_seq_length=10
 
 	//cdhit_wrapper
@@ -65,7 +68,7 @@
 //evaluation
 	params.max_uropa_runs = 10
 
-if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == "" || "${params.help}" != "0" ) {
+if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == "" || params.gtf2 == "" || "${params.help}" != "0" ) {
 	log.info """
 	Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --config [UROPA-config-file]
 
@@ -82,7 +85,7 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
 
 		--help [0|1]		1 to show this help message. (Default: 0)
 		--gtf_path		Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
-		--tfbs_path 		Path to directory with output from tfbsscan. If given tfbsscan will be skipped.
+		--tfbs_path 		Path to directory with tfbsscan output. If given tfbsscan will be skipped.
 
 		Footprint extraction:
 		--window_length INT	This parameter sets the length of a sliding window. (Default: 200)
@@ -99,7 +102,7 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
 		Sequence preparation/ reduction:
 		--kmer INT		K-mer length (Default: 10)
 		--aprox_motif_len INT	Motif length (Default: 10)
-		--motif_occurence FLOAT	Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
+		--motif_occurrence FLOAT	Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
 		--min_seq_length Interations	Remove all sequences below this value. (Default: 10)
 		Clustering:
 		--global INT		Global (=1) or local (=0) alignment. (Default: 0)
@@ -127,7 +130,7 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
 		--tissues List/String 	List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON
 					config
 		Evaluation:
-		--max_uropa_runs INT	Number of UROPA runs started in parralel (Default: 10)
+		--max_uropa_runs INT	 Maximum number UROPA runs running parallelized (Default: 10)
 	All arguments can be set in the configuration files
 	 ```
 	"""
@@ -144,11 +147,13 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
 	}
 }
 
+
+
 /*
 Checking for parameter input!
 */
 int_params = ["window_length", "step", "min_size_fp", "max_size_fp", "kmer",
-             "aprox_motif_len", "motif_occurence", "min_seq_length", "global",
+             "aprox_motif_len", "motif_occurrence", "min_seq_length", "global",
              "sequence_coverage", "memory", "throw_away_seq", "strand",
              "min_seq", "motif_min_key", "motif_max_key", "iteration",
              "edge_weight", "best_motif", "min_gap", "gap_penalty", "edge_weight",
@@ -242,7 +247,7 @@ Find postitions of known tfbs with tfbsscan and discard the overlaps with compar
 */
 process overlap_with_known_TFBS {
 	//conda "${path_env}"
-	publishDir "${params.out}/1.2_filter_motifs", mode :'copy'
+	publishDir "${params.out}/1.2_filter_motifs", mode :'copy', pattern: '*_unknown.bed'
 	publishDir "${params.out}/log", mode: 'copy', pattern: '*.log'
 	tag{name}
 	errorStrategy 'finish'
@@ -299,7 +304,7 @@ process reduce_sequence {
 
 	script:
 	"""
-	Rscript ${path_bin}/2.1_clustering/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurence} -s ${params.min_seq_length} --summary reduce_sequence.log
+	Rscript ${path_bin}/2.1_clustering/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurrence} -s ${params.min_seq_length} --summary reduce_sequence.log
 	"""
 }
 
@@ -402,6 +407,7 @@ process merge_meme {
 
 	when:
 	params.cluster_motif == 1
+	disable_mo_clu == 0
 
 	script:
 	//sorting
@@ -687,7 +693,6 @@ process get_best_motif_seq_id {
 
 best_motif.combine(fa_scan).combine(bed_for_rescan).set {files_for_re_scan}
 
-
 /*
 */
 process re_scan {
@@ -706,8 +711,8 @@ process re_scan {
 	"""
 	mkdir -p ./tmp/re_scan/${name}
 	python ${path_bin}/1.2_filter_motifs/tfbsscan.py -m ${meme} -g ${fasta} -b ${peaks} -o . --cores 4 --use moods --resolve_overlaps
-	mkdir -p ${workflow.workDir}/tmp/re_scan_final/
-	cp ./*.bed ${workflow.workDir}/tmp/re_scan_final/
+	mkdir -p ${workflow.workDir}/tmp/re_scan_final
+	cp ./Cluster*.bed ${workflow.workDir}/tmp/re_scan_final
 	"""
 }
 
@@ -722,7 +727,7 @@ process venn {
 	publishDir "${params.out}/3.2_evaluation/venn", mode: 'copy'
 
 	input:
-	set name, file(seq_ids), link,file (clustered_bed) from for_venn
+	set name, file(seq_ids), link, file (clustered_bed) from for_venn
 
 	output:
 	file ('*.pdf')
@@ -730,7 +735,7 @@ process venn {
 	script:
 	cluster_id = name.split('_')[-1]
 	"""
-	Rscript ${path_bin}/3.2_evaluation/venn.R -i ${workflow.workDir}/tmp/re_scan_final -p Cluster_${cluster_id} -l ${seq_ids} -c ${clustered_bed} -o .
+	Rscript ${path_bin}/3.2_evaluation/venn.R -i ${workflow.workDir}/tmp/re_scan_final/ -p Cluster_${cluster_id}_ -l ${seq_ids} -c ${clustered_bed} -o .
 	"""
 }
 
@@ -813,7 +818,7 @@ process UROPA {
 	maxForks params.max_uropa_runs
 	publishDir "${params.out}/3.2_evaluation/uropa", mode: 'copy', pattern: "*.txt"
 	publishDir "${params.out}/3.2_evaluation/uropa/summary", mode: 'copy', pattern: "*.pdf"
-	publishDir "${params.out}/log", mode: 'copy', pattern: "*.log"
+	publishDir "${params.out}/log/uropa/${name}", mode: 'copy', pattern: "*.log"
 
 	input:
 	set name, file (config) from uropa_config