diff --git a/bin/get_best_motif.py b/bin/get_best_motif.py index 0e7334e..cc24949 100644 --- a/bin/get_best_motif.py +++ b/bin/get_best_motif.py @@ -8,7 +8,7 @@ def parse_arguments(): args = parser.parse_args() return args - +# write lines of file till certain line (MOTIF + [num]) def main(): args = parse_arguments() out = open(args.output, "w+") diff --git a/pipeline.nf b/pipeline.nf index d8d5ad5..e1977d2 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -59,7 +59,7 @@ if (params.bigwig == "" || params.bed == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == ""){ log.info """ Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] - + Required arguments: --bigwig Path to BigWig-file --bed Path to BED-file @@ -71,23 +71,23 @@ Required arguments: Optional arguments: --tfbs_path Path to directory with output from tfbsscan. If given tfbsscan will not be run. - + Footprint extraction: --window_length INT (Default: 200) --step INT (Default: 100) --percentage INT (Default: 0) - + Filter unknown motifs: --min_size_fp INT (Default: 10) --max_size_fp INT (Default: 100) - + Clustering: Sequence preparation/ reduction: --kmer INT Kmer length (Default: 10) --aprox_motif_len INT Motif length (Default: 10) --motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif. --min_seq_length Interations Remove all sequences below this value. (Default: 10) - + Clustering: --global INT Global (=1) or local (=0) alignment. (Default: 0) --identity FLOAT Identity threshold. (Default: 0.8) @@ -95,7 +95,7 @@ Optional arguments: --memory INT Memory limit in MB. 0 for unlimited. (Default: 800) --throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9) --strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0) - + Motif estimation: --min_seq INT Sets the minimum number of sequences required for the FASTA-files given to GLAM2. (Default: 100) --motif_min_key INT Minimum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 8) @@ -103,12 +103,12 @@ Optional arguments: --iteration INT Number of iterations done by glam2. More Iterations: better results, higher runtime. (Default: 10000) --tomtom_treshold float Threshold for similarity score. (Default: 0.01) --best_motif INT Get the best X motifs per cluster. (Default: 3) - + Moitf clustering: --cluster_motif Boolean If 1 pipeline clusters motifs. If its 0 it does not. (Defaul: 0) --edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5) --motif_similarity_thresh FLOAT Threshold for motif similarity score (Default: 0.00001) - + Creating GTF: --organism [hg38 | hg19 | mm9 | mm10] Input organism --tissues List/String List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON @@ -195,7 +195,7 @@ process footprint_extraction { script: """ - python ${path_bin}/call_peaks.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage} + python ${path_bin}/footprints_extraction.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage} """ } @@ -265,6 +265,7 @@ process overlap_with_known_TFBS { process reduce_bed { conda "${path_env}" echo true + publishDir "${params.out}/cluster/reduced_bed/", mode: 'copy' input: set name, file (bed) from bed_for_reducing @@ -404,7 +405,6 @@ process find_similar_motifs { files_for_merge_fasta = motif_similarity.combine(fasta_for_motif_cluster) - /* Merging FASTA-files of similar clusters */ @@ -443,8 +443,8 @@ process clustered_glam2 { output: set name, file ('*.meme') into clustered_meme_for_tomtom - set name, file ('*.meme') into clustered_meme_for_filter - file('*') + set name, file ('*.meme') into clustered_meme_for_filter + file('*') when: params.cluster_motif == 1 @@ -472,7 +472,7 @@ Tomtom searches motifs in databases. process tomtom { tag{name} - publishDir "${params.out}/final_esimated_motifs/tomtom/", mode: 'copy' + publishDir "${params.out}/esimated_motifs/tomtom/", mode: 'copy' input: set name, file (meme) from for_tomtom