diff --git a/pipeline.nf b/pipeline.nf index f458f83..516b7da 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -1,10 +1,4 @@ -//!/usr/bin/env nextflow - -Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input} -Channel.fromPath(params.bed).set {bed_input} -Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2} -Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom} -Channel.fromPath(params.config).set {config} +#!/usr/bin/env nextflow //setting default values params.input="" @@ -65,36 +59,36 @@ log.info """ Usage: nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --jaspar_db [MEME-file] Required arguments: - --input Path to BigWig-file - --bed Path to BED-file - --genome_fasta Path to genome in FASTA-format - --jaspar_db Path to motif-database in MEME-format + --input Path to BigWig-file + --bed Path to BED-file + --genome_fasta Path to genome in FASTA-format + --jaspar_db Path to motif-database in MEME-format Optional arguments: Footprint extraction: - --window_length INT (Default: 200) - --step INT (Default: 100) - --percentage INT(Default: 0) + --window_length INT (Default: 200) + --step INT (Default: 100) + --percentage INT (Default: 0) Filter unknown motifs: - --min_size_fp INT (Default: 10) - --max_size_fp INT (Default: 100) + --min_size_fp INT (Default: 10) + --max_size_fp INT (Default: 100) + + Clustering: + Sequence preparation/ reduction: + --kmer INT Kmer length (Default: 10) + --aprox_motif_len INT Motif length (Default: 10) + --motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif. + --min_seq_length Interations Remove all sequences below this value. (Default: 10) Clustering: - Sequence preparation/ reduction: - --kmer INT Kmer length (Default: 10) - --aprox_motif_len INT Motif length (Default: 10) - --motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif. - --min_seq_length INT Remove all sequences below this value. (Default: 10) - - Clustering: - --global INT Global (=1) or local (=0) alignment. (Default: 0) - --identity FLOAT Identity threshold. (Default: 0.8) - --sequence_coverage INT Minimum aligned nucleotides on both sequences. (Default: 8) - --memory INT Memory limit in MB. 0 for unlimited. (Default: 800) - --throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9) - --strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0) + --global INT Global (=1) or local (=0) alignment. (Default: 0) + --identity FLOAT Identity threshold. (Default: 0.8) + --sequence_coverage INT Minimum aligned nucleotides on both sequences. (Default: 8) + --memory INT Memory limit in MB. 0 for unlimited. (Default: 800) + --throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9) + --strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0) Motif estimation: --motif_min_len INT Minimum length of Motif (Default: 8) @@ -103,21 +97,27 @@ Optional arguments: --tomtom_treshold float Threshold for similarity score. (Default: 0.01) Moitf clustering: - --edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5) - --motif_similarity_thresh FLOAT threshold for motif similarity score (Default: 0.00001) + --edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5) + --motif_similarity_thresh FLOAT Threshold for motif similarity score (Default: 0.00001) Creating GTF: - --organism [homo_sapiens | mus_musculus] - --tissues + --organism [homo_sapiens | mus_musculus] + --tissues All arguments can be set in the configuration files. """ +} else { + Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input} + Channel.fromPath(params.bed).set {bed_input} + Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2} + Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom} + Channel.fromPath(params.config).set {config} } bigwig_input.combine(bed_input).set{footprint_in} /* -this process uses the uncontinuous score from a bigWig file to estimate footpints within peaks of interest +This process uses the uncontinuous score from a bigWig file to estimate footpints within peaks of interest */ process footprint_extraction { conda "${path_env}" @@ -225,7 +225,7 @@ process bed_to_clustered_fasta { tag{name} input: - set name, file (bed) from clustered_bed + set name, file (bed) from bed_for_motif_esitmation when: params.fasta == false @@ -349,7 +349,6 @@ process clustered_glam2 { """ } -*/ /* Running Tomtom on meme-files generated by GLAM2.