minor fixes

renewiegandt · renewiegandt · commit 62add26c4748 · 2018-12-15T13:44:40.000-05:00
diff --git a/bin/get_best_motif.py b/bin/get_best_motif.py
@@ -8,7 +8,7 @@ def parse_arguments():
     args = parser.parse_args()
     return args
 
-
+# write lines of file till certain line (MOTIF + [num])
 def main():
     args = parse_arguments()
     out = open(args.output, "w+")
diff --git a/pipeline.nf b/pipeline.nf
@@ -59,7 +59,7 @@
 if (params.bigwig == "" || params.bed == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == ""){
 log.info """
 Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file]
- 
+
 Required arguments:
 	--bigwig		 Path to BigWig-file
 	--bed			 Path to BED-file
@@ -71,44 +71,44 @@ Required arguments:
 Optional arguments:
 
 	--tfbs_path Path to directory with output from tfbsscan. If given tfbsscan will not be run.
-	
+
 	Footprint extraction:
 	--window_length INT	(Default: 200)
 	--step INT		(Default: 100)
 	--percentage INT	(Default: 0)
-	
+
 	Filter unknown motifs:
 	--min_size_fp INT	(Default: 10)
 	--max_size_fp INT	(Default: 100)
-	
+
 	Clustering:
 	Sequence preparation/ reduction:
 	--kmer INT		Kmer length (Default: 10)
 	--aprox_motif_len INT	Motif length (Default: 10)
 	--motif_occurence FLOAT	Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
 	--min_seq_length Interations	Remove all sequences below this value. (Default: 10)
-	
+
 	Clustering:
 	--global INT		Global (=1) or local (=0) alignment. (Default: 0)
 	--identity FLOAT	Identity threshold. (Default: 0.8)
 	--sequence_coverage INT	Minimum aligned nucleotides on both sequences. (Default: 8)
 	--memory INT		Memory limit in MB. 0 for unlimited. (Default: 800)
 	--throw_away_seq INT	Remove all sequences equal or below this length before clustering. (Default: 9)
 	--strand INT		Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
-	
+
 	Motif estimation:
 	--min_seq INT 		Sets the minimum number of sequences required for the FASTA-files given to GLAM2. (Default: 100)
 	--motif_min_key INT	Minimum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 8)
 	--motif_max_key INT	Maximum number of key positions (aligned columns) in the alignment done by GLAM2.f (Default: 20)
 	--iteration INT		Number of iterations done by glam2. More Iterations: better results, higher runtime. (Default: 10000)
 	--tomtom_treshold float	Threshold for similarity score. (Default: 0.01)
 	--best_motif INT	Get the best X motifs per cluster. (Default: 3)
-	
+
 	Moitf clustering:
 	--cluster_motif	Boolean	If 1 pipeline clusters motifs. If its 0 it does not. (Defaul: 0)
 	--edge_weight INT	Minimum weight of edges in motif-cluster-graph (Default: 5)
 	--motif_similarity_thresh FLOAT	Threshold for motif similarity score (Default: 0.00001)
-	
+
 	Creating GTF:
 	--organism [hg38 | hg19 | mm9 | mm10]	Input organism
 	--tissues List/String 	List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON
@@ -195,7 +195,7 @@ process footprint_extraction {
 
 	script:
 	"""
-	python ${path_bin}/call_peaks.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
+	python ${path_bin}/footprints_extraction.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
 	"""
 }
 
@@ -265,6 +265,7 @@ process overlap_with_known_TFBS {
 process reduce_bed {
 	conda "${path_env}"
 	echo true
+	publishDir "${params.out}/cluster/reduced_bed/", mode: 'copy'
 
 	input:
 	set name, file (bed) from bed_for_reducing
@@ -404,7 +405,6 @@ process find_similar_motifs {
 files_for_merge_fasta = motif_similarity.combine(fasta_for_motif_cluster)
 
 
-
 /*
 Merging FASTA-files of similar clusters
 */
@@ -443,8 +443,8 @@ process clustered_glam2 {
 
 	output:
 	set name, file ('*.meme') into clustered_meme_for_tomtom
-  set name, file ('*.meme') into clustered_meme_for_filter
-  file('*')
+  	set name, file ('*.meme') into clustered_meme_for_filter
+  	file('*')
 
 	when:
 	params.cluster_motif == 1
@@ -472,7 +472,7 @@ Tomtom searches motifs in databases.
 process tomtom {
 
     tag{name}
-	publishDir "${params.out}/final_esimated_motifs/tomtom/", mode: 'copy'
+	publishDir "${params.out}/esimated_motifs/tomtom/", mode: 'copy'
 
     input:
     set name, file (meme) from for_tomtom