Skip to content
Permalink
a53aa722fe
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
286 lines (194 sloc) 5.91 KB
//!/usr/bin/env nextflow
Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input}
Channel.fromPath(params.bed).set {bed_input}
Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom}
Channel.fromPath(params.config).set {config}
bigwig_input.combine(bed_input).set{footprint_in}
process footprint_extraction {
conda "${path_env}"
tag{name}
publishDir '${out}', mode: 'copy', pattern: '*.bed'
publishDir '/mnt/agnerds/Rene.Wiegandt/log', mode: 'copy', pattern: '*.log'
input:
set name, file (bigWig), file (bed) from footprint_in
output:
set name, file ('*.bed') into bed_for_overlap_with_TFBS
script:
"""
python ${path_bin}/call_peaks.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
"""
}
//Abfrage ob ausgeführt werden muss.
process extract_known_TFBS {
conda "${path_env}"
input:
file (fasta) from fa_overlap
file (db) from db_for_motivscan
output:
file ('*.bed') into known_TFBS_for_overlap
script:
"""
"""
}
bed_for_overlap_with_TFBS.combine(known_TFBS_for_overlap).combine(fa_overlap_2).set {for_overlap}
process overlap_with_known_TFBS {
conda "${path_env}"
input:
set file (bed_footprints), val (bed_motifs), file (fasta) from for_overlap
output:
file ('*.bed') into bed_for_clustering
script:
motif_list = bed_motifs.toString().replaceAll(/\s|\[|\]/,"")
"""
${path_bin}/compareBed.sh --data ${bed_footprints} --motifs ${motif_list} --fasta ${fasta} -o ${name_placeholder} -min ${params.min_size_fp} -max ${params.max_size_fp}
"""
}
process clustering {
conda "${path_env}"
input:
file (bed) from bed_for_clustering
output:
set name, file ('*.bed') into bed_for_motif_esitmation
script:
"""
"""
}
// Converting BED-File to one FASTA-File per cluster
process bed_to_clustered_fasta {
conda "${path_env}"
tag{name}
publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/tmp/', mode: 'copy'
input:
set name, file (bed) from bed_for_motif_esitmation
output:
file ('*.FASTA') into fasta_for_glam2
script:
"""
Rscript ${path_bin}/bed_to_fasta.R ${bed} ${name} ${params.min_seq}
"""
}
//flatten list and adding name of file to channel value
fasta_for_glam2 = fasta_for_glam2.flatten().map {it -> [it.simpleName, it]}
//Running GLAM2 on FASTA-files.
//Generating Motifs through alignment and scoring best local matches.
process glam2 {
conda "${path_env}"
tag{name}
input:
set name, file (fasta) from fasta_for_glam2
output:
set name, file('*.meme') into meme_for_tomtom, meme_for_filter
script:
"""
glam2 n ${fasta} -O . -a ${params.motif_min_len} -b ${params.motif_max_len} -z 5
"""
}
//Running Tomtom on meme-files generated by GLAM2.
//Tomtom searches motifs in databases.
process tomtom {
conda "${path_env}"
tag{name}
publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/tmp/', mode: 'copy'
input:
set name, file (meme), file (jaspar_db) from meme_for_tomtom.combine(db_for_tomtom)
output:
set name, file ('*.tsv') into tsv_for_filter
script:
"""
tomtom ${meme} ${jaspar_db} -thresh ${params.tomtom_treshold} -text --norc | sed '/^#/ d' | sed '/^\$/d' > ${name}_known_motif.tsv
"""
}
//Joining channels with meme and tsv files. Filter joined channel on line count.
//Only meme-files which corresponding tsv files have linecount <= 1 are writen to next channel.
for_filter = meme_for_filter.join( tsv_for_filter )
for_filter
.filter { name, meme, tsv ->
long count = tsv.readLines().size()
count <= 1
}
.into { meme_for_scan; check }
//If channel 'check' is empty print errormessage
process check_for_unknown_motifs {
echo true
input:
val x from check.ifEmpty('EMPTY')
when:
x == 'EMPTY'
"""
echo '>>> STOPPED: No unknown Motifs were found.'
"""
}
//Get the best(first) Motif from each MEME-file
process get_best_motif {
conda "${path_env}"
input:
set name, file(meme), file(tsv) from meme_for_scan
output:
set name, file('*_best.meme') into best_motif
script:
"""
python ${path_bin}/get_best_motif.py ${meme} ${name}_best.meme
"""
}
best_motif.combine(fa_scan).set {files_for_genome_scan}
process genome_scan {
conda "${path_env}"
input:
set name, file(meme), file(fasta) from files_for_genome_scan
output:
file ('.bed') into bed_for_uropa, bed_for_cluster_quality
script:
"""
"""
}
process cluster_quality {
input:
file (bed) from bed_for_cluster_quality
output:
file ('*.bed') into bed_for_final_filter
script:
"""
"""
}
process create_GTF {
conda "${path_env}"
publishDir 'Path', mode:'copy'
output:
file ('*.gtf') into gtf_for_uropa
script:
"""
python ${path_bin}/RegGTFExtractor.py ${params.organism} --tissue ${params.tissues}
"""
}
bed_for_final_filter.combine(gtf_for_uropa).set {uropa_in}
// Create configuration file for UROPA.
// Takes template and replaces bed- and gtf-placeholders with actual paths.
process create_uropa_config {
publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/', mode: 'copy'
input:
set val(bed), val(gtf) from uropa_in.toList()
file (conf) from config
output:
file ('uropa.config') into uropa_config
script:
"""
sed -- 's/placeholder_gtf/${gtf}/g; s/placeholder_bed/${bed}/g' ${conf} > uropa.config.final
"""
}
process UROPA {
input:
file (config) from uropa_config
output:
set file ("*_allhits.txt"), file ("*_finalhits.txt") into uropa_for_filter
script:
"""
"""
}
process filter {
input:
output:
script:
"""
"""
}