Merge remote-tracking branch 'origin/dev' into dev

# Conflicts: # masterenv.yml
loosolab · Dec 10, 2018 · f0ad6b3 · f0ad6b3
2 parents 30c17cc + 15aa68a
commit f0ad6b3
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ export PATH=[meme-suite instalation path]/bin:$PATH
 
 Download all files from the [GitHub repository](https://github.molgen.mpg.de/loosolab/masterJLU2018). 
 The Nextflow-script needs a conda enviroment to run. Nextflow can create the needed enviroment from the given yaml-file.
-On some systems Nrxtflow exits the run with following error:
+On some systems Nextflow exits the run with following error:
 ```
 Caused by:
   Failed to create Conda environment
@@ -42,20 +42,21 @@ When the enviroment is created, set the variable 'path_env' in the configuration
 nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --jaspar_db [MEME-file]
 ```
 ## Parameters
+For a detailed overview for all parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/Configuration).
 ```
 Required arguments:
-	--input Path to BigWig-file
-	--bed Path to BED-file
+	--input Path to BigWig-file with scores on the peaks of interest
+	--bed Path to BED-file with peaks of interest corresponding to the BigWig file
 	--genome_fasta Path to genome in FASTA-format
 	--jaspar_db Path to motif-database in MEME-format
 	--organism STRING Source organism: [ hg19 | hg 38 or mm9 | mm10 ]
 
 
 Optional arguments:
 	Footprint extraction:
-	--window_length INT (Default: 200)
-	--step INT (Default: 100)
-	--percentage INT(Default: 0)
+	--window_length INT (Default: 200) a length of a window
+	--step INT (Default: 100) an interval to slide the window
+	--percentage INT(Default: 0) a percentage to be added to background while searching for footprints
 
 	Filter unknown motifs:
 	--min_size_fp INT (Default: 10)
@@ -82,7 +83,7 @@ Optional arguments:
 	--interation INT	Number of iterations done by glam2. More Interations: better results, higher runtime. (Default: 10000)
 	--tomtom_treshold float	Threshold for similarity score. (Default: 0.01)
 
-	Moitf clustering:
+	Motif clustering:
 	--edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5)
 	--motif_similarity_thresh FLOAT threshold for motif similarity score (Default: 0.00001)
 

diff --git a/masterenv.yml b/masterenv.yml
@@ -4,7 +4,6 @@ channels:
   - bioconda
   - conda-forge
 dependencies:
-  - bedtools
   - r-seqinr
   - numpy
   - pybigWig
@@ -25,4 +24,4 @@ dependencies:
   - pybedtools
   - matplotlib
   - seaborn
-  - crossmap
+  - crossmap
diff --git a/pipeline.nf b/pipeline.nf
@@ -1,10 +1,4 @@
-//!/usr/bin/env nextflow
-
-Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input}
-Channel.fromPath(params.bed).set {bed_input}
-Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
-Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom}
-Channel.fromPath(params.config).set {config}
+#!/usr/bin/env nextflow
 
 //setting default values
 params.input=""
@@ -65,36 +59,36 @@ log.info """
 Usage: nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --jaspar_db [MEME-file]
 
 Required arguments:
-	--input Path to BigWig-file
-	--bed Path to BED-file
-	--genome_fasta Path to genome in FASTA-format
-	--jaspar_db Path to motif-database in MEME-format
+	--input	Path to BigWig-file
+	--bed	Path to BED-file
+	--genome_fasta	Path to genome in FASTA-format
+	--jaspar_db	Path to motif-database in MEME-format
 
 
 Optional arguments:
 	Footprint extraction:
-	--window_length INT (Default: 200)
-	--step INT (Default: 100)
-	--percentage INT(Default: 0)
+	--window_length INT	(Default: 200)
+	--step INT	(Default: 100)
+	--percentage INT	(Default: 0)
 
 	Filter unknown motifs:
-	--min_size_fp INT (Default: 10)
-	--max_size_fp INT (Default: 100)
+	--min_size_fp INT	(Default: 10)
+	--max_size_fp INT	(Default: 100)
+
+	Clustering:
+	Sequence preparation/ reduction:
+	--kmer INT	Kmer length (Default: 10)
+	--aprox_motif_len INT	Motif length (Default: 10)
+	--motif_occurence FLOAT	Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
+	--min_seq_length Interations	Remove all sequences below this value. (Default: 10)
 
 	Clustering:
-	  Sequence preparation/ reduction:
-	  --kmer INT Kmer length (Default: 10)
-	  --aprox_motif_len INT Motif length (Default: 10)
-	  --motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
-	  --min_seq_length INT Remove all sequences below this value. (Default: 10)
-
-	  Clustering:
-	  --global INT Global (=1) or local (=0) alignment. (Default: 0)
-	  --identity FLOAT Identity threshold. (Default: 0.8)
-	  --sequence_coverage INT Minimum aligned nucleotides on both sequences. (Default: 8)
-	  --memory INT Memory limit in MB. 0 for unlimited. (Default: 800)
-	  --throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9)
-	  --strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
+	--global INT	Global (=1) or local (=0) alignment. (Default: 0)
+	--identity FLOAT	Identity threshold. (Default: 0.8)
+	--sequence_coverage INT	Minimum aligned nucleotides on both sequences. (Default: 8)
+	--memory INT	Memory limit in MB. 0 for unlimited. (Default: 800)
+	--throw_away_seq INT	Remove all sequences equal or below this length before clustering. (Default: 9)
+	--strand INT	Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
 
 	Motif estimation:
 	--motif_min_len INT	Minimum length of Motif (Default: 8)
@@ -103,21 +97,27 @@ Optional arguments:
 	--tomtom_treshold float	Threshold for similarity score. (Default: 0.01)
 
 	Moitf clustering:
-	--edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5)
-	--motif_similarity_thresh FLOAT threshold for motif similarity score (Default: 0.00001)
+	--edge_weight INT	Minimum weight of edges in motif-cluster-graph (Default: 5)
+	--motif_similarity_thresh FLOAT	Threshold for motif similarity score (Default: 0.00001)
 
 	Creating GTF:
-	--organism [homo_sapiens | mus_musculus]
-	--tissues
+	--organism [homo_sapiens | mus_musculus]	
+	--tissues	
 
 All arguments can be set in the configuration files.
 """
+} else {
+	Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input}
+	Channel.fromPath(params.bed).set {bed_input}
+	Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
+	Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom}
+	Channel.fromPath(params.config).set {config}
 }
 
 
-bigwig_input.combine(bed_input).into {footprint_in}
+bigwig_input.combine(bed_input).set{footprint_in}
 /*
-
+This process uses the uncontinuous score from a bigWig file to estimate footpints within peaks of interest
 */
 process footprint_extraction {
 	conda "${path_env}"
@@ -154,7 +154,7 @@ process extract_known_TFBS {
 
 	script:
 	"""
-	python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ./
+	python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o .
 	"""
 }
 
@@ -225,7 +225,7 @@ process bed_to_clustered_fasta {
     tag{name}
 
     input:
-    set name, file (bed) from clustered_bed
+    set name, file (bed) from bed_for_motif_esitmation
 
     when:
     params.fasta == false
@@ -349,7 +349,6 @@ process clustered_glam2 {
 	"""
 }
 
-*/
 
 /*
 Running Tomtom on meme-files generated by GLAM2.