Skip to content
Merged
merged 2 commits into from
Dec 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/get_best_motif.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def parse_arguments():
args = parser.parse_args()
return args


# write lines of file till certain line (MOTIF + [num])
def main():
args = parse_arguments()
out = open(args.output, "w+")
Expand Down
26 changes: 13 additions & 13 deletions pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
if (params.bigwig == "" || params.bed == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == ""){
log.info """
Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file]

Required arguments:
--bigwig Path to BigWig-file
--bed Path to BED-file
Expand All @@ -71,44 +71,44 @@ Required arguments:
Optional arguments:

--tfbs_path Path to directory with output from tfbsscan. If given tfbsscan will not be run.

Footprint extraction:
--window_length INT (Default: 200)
--step INT (Default: 100)
--percentage INT (Default: 0)

Filter unknown motifs:
--min_size_fp INT (Default: 10)
--max_size_fp INT (Default: 100)

Clustering:
Sequence preparation/ reduction:
--kmer INT Kmer length (Default: 10)
--aprox_motif_len INT Motif length (Default: 10)
--motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
--min_seq_length Interations Remove all sequences below this value. (Default: 10)

Clustering:
--global INT Global (=1) or local (=0) alignment. (Default: 0)
--identity FLOAT Identity threshold. (Default: 0.8)
--sequence_coverage INT Minimum aligned nucleotides on both sequences. (Default: 8)
--memory INT Memory limit in MB. 0 for unlimited. (Default: 800)
--throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9)
--strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)

Motif estimation:
--min_seq INT Sets the minimum number of sequences required for the FASTA-files given to GLAM2. (Default: 100)
--motif_min_key INT Minimum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 8)
--motif_max_key INT Maximum number of key positions (aligned columns) in the alignment done by GLAM2.f (Default: 20)
--iteration INT Number of iterations done by glam2. More Iterations: better results, higher runtime. (Default: 10000)
--tomtom_treshold float Threshold for similarity score. (Default: 0.01)
--best_motif INT Get the best X motifs per cluster. (Default: 3)

Moitf clustering:
--cluster_motif Boolean If 1 pipeline clusters motifs. If its 0 it does not. (Defaul: 0)
--edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5)
--motif_similarity_thresh FLOAT Threshold for motif similarity score (Default: 0.00001)

Creating GTF:
--organism [hg38 | hg19 | mm9 | mm10] Input organism
--tissues List/String List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON
Expand Down Expand Up @@ -195,7 +195,7 @@ process footprint_extraction {

script:
"""
python ${path_bin}/call_peaks.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
python ${path_bin}/footprints_extraction.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
"""
}

Expand Down Expand Up @@ -265,6 +265,7 @@ process overlap_with_known_TFBS {
process reduce_bed {
conda "${path_env}"
echo true
publishDir "${params.out}/cluster/reduced_bed/", mode: 'copy'

input:
set name, file (bed) from bed_for_reducing
Expand Down Expand Up @@ -404,7 +405,6 @@ process find_similar_motifs {
files_for_merge_fasta = motif_similarity.combine(fasta_for_motif_cluster)



/*
Merging FASTA-files of similar clusters
*/
Expand Down Expand Up @@ -443,8 +443,8 @@ process clustered_glam2 {

output:
set name, file ('*.meme') into clustered_meme_for_tomtom
set name, file ('*.meme') into clustered_meme_for_filter
file('*')
set name, file ('*.meme') into clustered_meme_for_filter
file('*')

when:
params.cluster_motif == 1
Expand Down Expand Up @@ -472,7 +472,7 @@ Tomtom searches motifs in databases.
process tomtom {

tag{name}
publishDir "${params.out}/final_esimated_motifs/tomtom/", mode: 'copy'
publishDir "${params.out}/esimated_motifs/tomtom/", mode: 'copy'

input:
set name, file (meme) from for_tomtom
Expand Down