From 9c3f325f196a702183ae5a36caf7a9f182b0160d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Wiegandt?= Date: Thu, 6 Dec 2018 10:08:16 -0500 Subject: [PATCH 1/9] minor fix --- pipeline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline.nf b/pipeline.nf index 00c23a1..000230f 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -105,7 +105,7 @@ All arguments can be set in the configuration files. } -bigwig_input.combine(bed_input).into {footprint_in} +bigwig_input.combine(bed_input).set{footprint_in} /* */ From e861fdcdb584816acdb7c3be24fc63f86be09811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Wiegandt?= Date: Mon, 10 Dec 2018 05:42:40 -0500 Subject: [PATCH 2/9] minor changes --- masterenv.yml | 6 +----- pipeline.nf | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/masterenv.yml b/masterenv.yml index a977fe1..f776235 100644 --- a/masterenv.yml +++ b/masterenv.yml @@ -1,10 +1,6 @@ name: masterenv -channels: - - bioconda - - conda-forge dependencies: - - bedtools - r-seqinr - numpy - pybigWig @@ -24,4 +20,4 @@ dependencies: - biopython - pybedtools - matplotlib - - seaborn \ No newline at end of file + - seaborn diff --git a/pipeline.nf b/pipeline.nf index 000230f..3becaf6 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -144,7 +144,7 @@ process extract_known_TFBS { script: """ - python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ./ + python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o . """ } From 0a0e9b8cf35c27ab95b7693e247a60d10a772a36 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Mon, 10 Dec 2018 13:56:53 +0100 Subject: [PATCH 3/9] correcting some errors --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 02f4ec4..5c93985 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ export PATH=[meme-suite instalation path]/bin:$PATH Download all files from the [GitHub repository](https://github.molgen.mpg.de/loosolab/masterJLU2018). The Nextflow-script needs a conda enviroment to run. Nextflow can create the needed enviroment from the given yaml-file. -On some systems Nrxtflow exits the run with following error: +On some systems Nextflow exits the run with following error: ``` Caused by: Failed to create Conda environment @@ -81,7 +81,7 @@ Optional arguments: --interation INT Number of iterations done by glam2. More Interations: better results, higher runtime. (Default: 10000) --tomtom_treshold float Threshold for similarity score. (Default: 0.01) - Moitf clustering: + Motif clustering: --edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5) --motif_similarity_thresh FLOAT threshold for motif similarity score (Default: 0.00001) From c1cb8ca922c1b5a439745cd22da17765e4ba8879 Mon Sep 17 00:00:00 2001 From: renewiegandt Date: Mon, 10 Dec 2018 14:38:02 +0100 Subject: [PATCH 4/9] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5c93985..2e16064 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ When the enviroment is created, set the variable 'path_env' in the configuration nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --jaspar_db [MEME-file] ``` ## Parameters +For a detailed overview of all Parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/Configuration). ``` Required arguments: --input Path to BigWig-file From 42d32387b14945a46b53e234cac0dff80d63d825 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Mon, 10 Dec 2018 14:43:14 +0100 Subject: [PATCH 5/9] adding comment about the footprints_extraction --- pipeline.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline.nf b/pipeline.nf index b1084df..866def8 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -117,7 +117,7 @@ All arguments can be set in the configuration files. bigwig_input.combine(bed_input).into {footprint_in} /* - +this process uses the uncontinuous score from a bigWig file to estimate footpints within peaks of interest */ process footprint_extraction { conda "${path_env}" From 198334ab0c44ec2dc65431e2a13e351948785d38 Mon Sep 17 00:00:00 2001 From: renewiegandt Date: Mon, 10 Dec 2018 14:45:15 +0100 Subject: [PATCH 6/9] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2e16064..f53d9df 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ When the enviroment is created, set the variable 'path_env' in the configuration nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --jaspar_db [MEME-file] ``` ## Parameters -For a detailed overview of all Parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/Configuration). +For a detailed overview for all parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/Configuration). ``` Required arguments: --input Path to BigWig-file From 5a7a52683b15b85946779d8e04abc792866fa586 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Mon, 10 Dec 2018 14:52:53 +0100 Subject: [PATCH 7/9] improvement of parameters for my part --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f53d9df..09b7ee3 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [ For a detailed overview for all parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/Configuration). ``` Required arguments: - --input Path to BigWig-file - --bed Path to BED-file + --input Path to BigWig-file with scores on the peaks of interest + --bed Path to BED-file with peaks of interest corresponding to the BigWig file --genome_fasta Path to genome in FASTA-format --jaspar_db Path to motif-database in MEME-format From ffed012ffbdfbbccf27464576a77ec4eb083c118 Mon Sep 17 00:00:00 2001 From: anastasiia Date: Mon, 10 Dec 2018 14:54:19 +0100 Subject: [PATCH 8/9] improve parameters for my part once more --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 09b7ee3..66b2b1f 100644 --- a/README.md +++ b/README.md @@ -53,9 +53,9 @@ Required arguments: Optional arguments: Footprint extraction: - --window_length INT (Default: 200) - --step INT (Default: 100) - --percentage INT(Default: 0) + --window_length INT (Default: 200) a length of a window + --step INT (Default: 100) an interval to slide the window + --percentage INT(Default: 0) a percentage to be added to background while searching for footprints Filter unknown motifs: --min_size_fp INT (Default: 10) From 15aa68a214d05e7581714623db1a215f2d3a21e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Wiegandt?= Date: Mon, 10 Dec 2018 09:13:39 -0500 Subject: [PATCH 9/9] help message fix --- pipeline.nf | 71 ++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/pipeline.nf b/pipeline.nf index f458f83..516b7da 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -1,10 +1,4 @@ -//!/usr/bin/env nextflow - -Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input} -Channel.fromPath(params.bed).set {bed_input} -Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2} -Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom} -Channel.fromPath(params.config).set {config} +#!/usr/bin/env nextflow //setting default values params.input="" @@ -65,36 +59,36 @@ log.info """ Usage: nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --jaspar_db [MEME-file] Required arguments: - --input Path to BigWig-file - --bed Path to BED-file - --genome_fasta Path to genome in FASTA-format - --jaspar_db Path to motif-database in MEME-format + --input Path to BigWig-file + --bed Path to BED-file + --genome_fasta Path to genome in FASTA-format + --jaspar_db Path to motif-database in MEME-format Optional arguments: Footprint extraction: - --window_length INT (Default: 200) - --step INT (Default: 100) - --percentage INT(Default: 0) + --window_length INT (Default: 200) + --step INT (Default: 100) + --percentage INT (Default: 0) Filter unknown motifs: - --min_size_fp INT (Default: 10) - --max_size_fp INT (Default: 100) + --min_size_fp INT (Default: 10) + --max_size_fp INT (Default: 100) + + Clustering: + Sequence preparation/ reduction: + --kmer INT Kmer length (Default: 10) + --aprox_motif_len INT Motif length (Default: 10) + --motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif. + --min_seq_length Interations Remove all sequences below this value. (Default: 10) Clustering: - Sequence preparation/ reduction: - --kmer INT Kmer length (Default: 10) - --aprox_motif_len INT Motif length (Default: 10) - --motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif. - --min_seq_length INT Remove all sequences below this value. (Default: 10) - - Clustering: - --global INT Global (=1) or local (=0) alignment. (Default: 0) - --identity FLOAT Identity threshold. (Default: 0.8) - --sequence_coverage INT Minimum aligned nucleotides on both sequences. (Default: 8) - --memory INT Memory limit in MB. 0 for unlimited. (Default: 800) - --throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9) - --strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0) + --global INT Global (=1) or local (=0) alignment. (Default: 0) + --identity FLOAT Identity threshold. (Default: 0.8) + --sequence_coverage INT Minimum aligned nucleotides on both sequences. (Default: 8) + --memory INT Memory limit in MB. 0 for unlimited. (Default: 800) + --throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9) + --strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0) Motif estimation: --motif_min_len INT Minimum length of Motif (Default: 8) @@ -103,21 +97,27 @@ Optional arguments: --tomtom_treshold float Threshold for similarity score. (Default: 0.01) Moitf clustering: - --edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5) - --motif_similarity_thresh FLOAT threshold for motif similarity score (Default: 0.00001) + --edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5) + --motif_similarity_thresh FLOAT Threshold for motif similarity score (Default: 0.00001) Creating GTF: - --organism [homo_sapiens | mus_musculus] - --tissues + --organism [homo_sapiens | mus_musculus] + --tissues All arguments can be set in the configuration files. """ +} else { + Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input} + Channel.fromPath(params.bed).set {bed_input} + Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2} + Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom} + Channel.fromPath(params.config).set {config} } bigwig_input.combine(bed_input).set{footprint_in} /* -this process uses the uncontinuous score from a bigWig file to estimate footpints within peaks of interest +This process uses the uncontinuous score from a bigWig file to estimate footpints within peaks of interest */ process footprint_extraction { conda "${path_env}" @@ -225,7 +225,7 @@ process bed_to_clustered_fasta { tag{name} input: - set name, file (bed) from clustered_bed + set name, file (bed) from bed_for_motif_esitmation when: params.fasta == false @@ -349,7 +349,6 @@ process clustered_glam2 { """ } -*/ /* Running Tomtom on meme-files generated by GLAM2.