From 5d9fb6c15c3844b9c553ac2abf0f72ec519f0389 Mon Sep 17 00:00:00 2001 From: renewiegandt Date: Tue, 26 Mar 2019 17:30:23 +0100 Subject: [PATCH] Minor changes --- pipeline.nf | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/pipeline.nf b/pipeline.nf index f7f6295..bb338ce 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -1,5 +1,7 @@ #!/usr/bin/env nextflow +disable_mo_clu = 1 + //setting default values params.bigwig="" params.bed="" @@ -9,6 +11,7 @@ params.tfbs_path="" params.help = 0 params.gtf_path="" + params.gtf2="" params.out = "./out/" //footprint_extraction @@ -26,7 +29,7 @@ //reduce_sequence params.kmer=10 params.aprox_motif_len=10 - params.motif_occurence=1 + params.motif_occurrence=1 params.min_seq_length=10 //cdhit_wrapper @@ -65,7 +68,7 @@ //evaluation params.max_uropa_runs = 10 -if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == "" || "${params.help}" != "0" ) { +if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == "" || params.gtf2 == "" || "${params.help}" != "0" ) { log.info """ Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --config [UROPA-config-file] @@ -82,7 +85,7 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g --help [0|1] 1 to show this help message. (Default: 0) --gtf_path Path to gtf-file. If path is set the process which creates a gtf-file is skipped. - --tfbs_path Path to directory with output from tfbsscan. If given tfbsscan will be skipped. + --tfbs_path Path to directory with tfbsscan output. If given tfbsscan will be skipped. Footprint extraction: --window_length INT This parameter sets the length of a sliding window. (Default: 200) @@ -99,7 +102,7 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g Sequence preparation/ reduction: --kmer INT K-mer length (Default: 10) --aprox_motif_len INT Motif length (Default: 10) - --motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif. + --motif_occurrence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif. --min_seq_length Interations Remove all sequences below this value. (Default: 10) Clustering: --global INT Global (=1) or local (=0) alignment. (Default: 0) @@ -127,7 +130,7 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g --tissues List/String List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON config Evaluation: - --max_uropa_runs INT Number of UROPA runs started in parralel (Default: 10) + --max_uropa_runs INT Maximum number UROPA runs running parallelized (Default: 10) All arguments can be set in the configuration files ``` """ @@ -144,11 +147,13 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g } } + + /* Checking for parameter input! */ int_params = ["window_length", "step", "min_size_fp", "max_size_fp", "kmer", - "aprox_motif_len", "motif_occurence", "min_seq_length", "global", + "aprox_motif_len", "motif_occurrence", "min_seq_length", "global", "sequence_coverage", "memory", "throw_away_seq", "strand", "min_seq", "motif_min_key", "motif_max_key", "iteration", "edge_weight", "best_motif", "min_gap", "gap_penalty", "edge_weight", @@ -242,7 +247,7 @@ Find postitions of known tfbs with tfbsscan and discard the overlaps with compar */ process overlap_with_known_TFBS { //conda "${path_env}" - publishDir "${params.out}/1.2_filter_motifs", mode :'copy' + publishDir "${params.out}/1.2_filter_motifs", mode :'copy', pattern: '*_unknown.bed' publishDir "${params.out}/log", mode: 'copy', pattern: '*.log' tag{name} errorStrategy 'finish' @@ -299,7 +304,7 @@ process reduce_sequence { script: """ - Rscript ${path_bin}/2.1_clustering/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurence} -s ${params.min_seq_length} --summary reduce_sequence.log + Rscript ${path_bin}/2.1_clustering/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurrence} -s ${params.min_seq_length} --summary reduce_sequence.log """ } @@ -402,6 +407,7 @@ process merge_meme { when: params.cluster_motif == 1 + disable_mo_clu == 0 script: //sorting @@ -687,7 +693,6 @@ process get_best_motif_seq_id { best_motif.combine(fa_scan).combine(bed_for_rescan).set {files_for_re_scan} - /* */ process re_scan { @@ -706,8 +711,8 @@ process re_scan { """ mkdir -p ./tmp/re_scan/${name} python ${path_bin}/1.2_filter_motifs/tfbsscan.py -m ${meme} -g ${fasta} -b ${peaks} -o . --cores 4 --use moods --resolve_overlaps - mkdir -p ${workflow.workDir}/tmp/re_scan_final/ - cp ./*.bed ${workflow.workDir}/tmp/re_scan_final/ + mkdir -p ${workflow.workDir}/tmp/re_scan_final + cp ./Cluster*.bed ${workflow.workDir}/tmp/re_scan_final """ } @@ -722,7 +727,7 @@ process venn { publishDir "${params.out}/3.2_evaluation/venn", mode: 'copy' input: - set name, file(seq_ids), link,file (clustered_bed) from for_venn + set name, file(seq_ids), link, file (clustered_bed) from for_venn output: file ('*.pdf') @@ -730,7 +735,7 @@ process venn { script: cluster_id = name.split('_')[-1] """ - Rscript ${path_bin}/3.2_evaluation/venn.R -i ${workflow.workDir}/tmp/re_scan_final -p Cluster_${cluster_id} -l ${seq_ids} -c ${clustered_bed} -o . + Rscript ${path_bin}/3.2_evaluation/venn.R -i ${workflow.workDir}/tmp/re_scan_final/ -p Cluster_${cluster_id}_ -l ${seq_ids} -c ${clustered_bed} -o . """ } @@ -813,7 +818,7 @@ process UROPA { maxForks params.max_uropa_runs publishDir "${params.out}/3.2_evaluation/uropa", mode: 'copy', pattern: "*.txt" publishDir "${params.out}/3.2_evaluation/uropa/summary", mode: 'copy', pattern: "*.pdf" - publishDir "${params.out}/log", mode: 'copy', pattern: "*.log" + publishDir "${params.out}/log/uropa/${name}", mode: 'copy', pattern: "*.log" input: set name, file (config) from uropa_config