pipeline.nf

//!/usr/bin/env nextflow

Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input}
Channel.fromPath(params.bed).set {bed_input}
Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom}
Channel.fromPath(params.config).set {config}

bigwig_input.combine(bed_input).set{footprint_in}

process footprint_extraction {
	conda "${path_env}"

	tag{name}
	publishDir '${out}', mode: 'copy', pattern: '*.bed'
	publishDir '/mnt/agnerds/Rene.Wiegandt/log', mode: 'copy', pattern: '*.log'

	input:
	set name, file (bigWig), file (bed) from footprint_in

	output:
	set name, file ('*.bed') into bed_for_overlap_with_TFBS

	script:
	"""
	python ${path_bin}/call_peaks.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
	"""
}

//Abfrage ob ausgeführt werden muss.
process extract_known_TFBS {
	conda "${path_env}"

	input:
	file (fasta) from fa_overlap
	file (db) from db_for_motivscan

	output:
	file ('*.bed') into known_TFBS_for_overlap

	script:
	"""
	"""
}


bed_for_overlap_with_TFBS.combine(known_TFBS_for_overlap).combine(fa_overlap_2).set {for_overlap}


process overlap_with_known_TFBS {
	conda "${path_env}"

	input:
	set file (bed_footprints), val (bed_motifs), file (fasta) from for_overlap

	output:
	file ('*.bed') into bed_for_clustering

	script:
	motif_list = bed_motifs.toString().replaceAll(/\s|\[|\]/,"")
	"""
	${path_bin}/compareBed.sh --data ${bed_footprints} --motifs ${motif_list} --fasta ${fasta} -o ${name_placeholder} -min ${params.min_size_fp} -max ${params.max_size_fp}
	"""
}


process clustering {
	conda "${path_env}"

	input:
	file (bed) from bed_for_clustering

	output:
	set name, file ('*.bed') into bed_for_motif_esitmation

	script:
	"""
	"""
}


// Converting BED-File to one FASTA-File per cluster
process bed_to_clustered_fasta {
		conda "${path_env}"

    tag{name}
    publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/tmp/', mode: 'copy'

    input:
    set name, file (bed) from bed_for_motif_esitmation

    output:
    file ('*.FASTA') into fasta_for_glam2

    script:
    """
    Rscript ${path_bin}/bed_to_fasta.R ${bed} ${name} ${params.min_seq}
    """
}


//flatten list and adding name of file to channel value
fasta_for_glam2 = fasta_for_glam2.flatten().map {it -> [it.simpleName, it]}


//Running GLAM2 on FASTA-files.
//Generating Motifs through alignment and scoring best local matches.
process glam2 {
		conda "${path_env}"

    tag{name}

    input:
    set name, file (fasta) from fasta_for_glam2

    output:
    set name, file('*.meme') into meme_for_tomtom, meme_for_filter

    script:
    """
    glam2 n ${fasta} -O . -a ${params.motif_min_len} -b ${params.motif_max_len} -z 5
    """
}


//Running Tomtom on meme-files generated by GLAM2.
//Tomtom searches motifs in databases.
process tomtom {
		conda "${path_env}"

    tag{name}

    publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/tmp/', mode: 'copy'

    input:
    set name, file (meme), file (jaspar_db) from meme_for_tomtom.combine(db_for_tomtom)

    output:
    set name, file ('*.tsv') into tsv_for_filter

    script:
    """
    tomtom ${meme} ${jaspar_db} -thresh ${params.tomtom_treshold} -text --norc | sed '/^#/ d' | sed '/^\$/d' > ${name}_known_motif.tsv
    """
}


//Joining channels with meme and tsv files. Filter joined channel on line count.
//Only meme-files which corresponding tsv files have linecount <= 1 are writen to next channel.
for_filter = meme_for_filter.join( tsv_for_filter )
for_filter
    .filter { name, meme, tsv ->
        long count = tsv.readLines().size()
        count <= 1
        }
    .into { meme_for_scan; check }


//If channel 'check' is empty print errormessage
process check_for_unknown_motifs {
    echo true

    input:
    val x from check.ifEmpty('EMPTY')

    when:
    x == 'EMPTY'

    """
    echo '>>> STOPPED: No unknown Motifs were found.'
    """

}


//Get the best(first) Motif from each MEME-file
process get_best_motif {
	conda "${path_env}"

	input:
	set name, file(meme), file(tsv) from meme_for_scan

	output:
	set name, file('*_best.meme') into best_motif

	script:
	"""
	python ${path_bin}/get_best_motif.py ${meme} ${name}_best.meme
	"""
}


best_motif.combine(fa_scan).set {files_for_genome_scan}


process genome_scan {
	conda "${path_env}"

	input:
	set name, file(meme), file(fasta) from files_for_genome_scan

	output:
	file ('.bed') into bed_for_uropa, bed_for_cluster_quality

	script:
	"""
	"""
}


process cluster_quality {

	input:
	file (bed) from bed_for_cluster_quality

	output:
	file ('*.bed') into bed_for_final_filter

	script:
	"""
	"""
}


process create_GTF {
	conda "${path_env}"

	publishDir 'Path', mode:'copy'

	output:
	file ('*.gtf') into gtf_for_uropa

	script:
	"""
	python ${path_bin}/RegGTFExtractor.py ${params.organism} --tissue ${params.tissues}
	"""
}


bed_for_final_filter.combine(gtf_for_uropa).set {uropa_in}


// Create configuration file for UROPA.
// Takes template and replaces bed- and gtf-placeholders with actual paths.
process create_uropa_config {

	publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/', mode: 'copy'

	input:
	set val(bed), val(gtf) from uropa_in.toList()
	file (conf) from config

	output:
	file ('uropa.config') into uropa_config

	script:
	"""
  	sed -- 's/placeholder_gtf/${gtf}/g; s/placeholder_bed/${bed}/g' ${conf} > uropa.config.final
	"""
}


process UROPA {

	input:
	file (config) from uropa_config

	output:
	set file ("*_allhits.txt"), file ("*_finalhits.txt") into uropa_for_filter

	script:
	"""
	"""
}


process filter {

	input:

	output:

	script:
	"""
	"""
}
	//!/usr/bin/env nextflow

	Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input}
	Channel.fromPath(params.bed).set {bed_input}
	Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
	Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom}
	Channel.fromPath(params.config).set {config}

	bigwig_input.combine(bed_input).set{footprint_in}

	process footprint_extraction {
	conda "${path_env}"

	tag{name}
	publishDir '${out}', mode: 'copy', pattern: '*.bed'
	publishDir '/mnt/agnerds/Rene.Wiegandt/log', mode: 'copy', pattern: '*.log'

	input:
	set name, file (bigWig), file (bed) from footprint_in

	output:
	set name, file ('*.bed') into bed_for_overlap_with_TFBS

	script:
	"""
	python ${path_bin}/call_peaks.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
	"""
	}

	//Abfrage ob ausgeführt werden muss.
	process extract_known_TFBS {
	conda "${path_env}"

	input:
	file (fasta) from fa_overlap
	file (db) from db_for_motivscan

	output:
	file ('*.bed') into known_TFBS_for_overlap

	script:
	"""
	"""
	}


	bed_for_overlap_with_TFBS.combine(known_TFBS_for_overlap).combine(fa_overlap_2).set {for_overlap}


	process overlap_with_known_TFBS {
	conda "${path_env}"

	input:
	set file (bed_footprints), val (bed_motifs), file (fasta) from for_overlap

	output:
	file ('*.bed') into bed_for_clustering

	script:
	motif_list = bed_motifs.toString().replaceAll(/\s\|\[\|\]/,"")
	"""
	${path_bin}/compareBed.sh --data ${bed_footprints} --motifs ${motif_list} --fasta ${fasta} -o ${name_placeholder} -min ${params.min_size_fp} -max ${params.max_size_fp}
	"""
	}


	process clustering {
	conda "${path_env}"

	input:
	file (bed) from bed_for_clustering

	output:
	set name, file ('*.bed') into bed_for_motif_esitmation

	script:
	"""
	"""
	}


	// Converting BED-File to one FASTA-File per cluster
	process bed_to_clustered_fasta {
	conda "${path_env}"

	tag{name}
	publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/tmp/', mode: 'copy'

	input:
	set name, file (bed) from bed_for_motif_esitmation

	output:
	file ('*.FASTA') into fasta_for_glam2

	script:
	"""
	Rscript ${path_bin}/bed_to_fasta.R ${bed} ${name} ${params.min_seq}
	"""
	}


	//flatten list and adding name of file to channel value
	fasta_for_glam2 = fasta_for_glam2.flatten().map {it -> [it.simpleName, it]}


	//Running GLAM2 on FASTA-files.
	//Generating Motifs through alignment and scoring best local matches.
	process glam2 {
	conda "${path_env}"

	tag{name}

	input:
	set name, file (fasta) from fasta_for_glam2

	output:
	set name, file('*.meme') into meme_for_tomtom, meme_for_filter

	script:
	"""
	glam2 n ${fasta} -O . -a ${params.motif_min_len} -b ${params.motif_max_len} -z 5
	"""
	}


	//Running Tomtom on meme-files generated by GLAM2.
	//Tomtom searches motifs in databases.
	process tomtom {
	conda "${path_env}"

	tag{name}

	publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/tmp/', mode: 'copy'

	input:
	set name, file (meme), file (jaspar_db) from meme_for_tomtom.combine(db_for_tomtom)

	output:
	set name, file ('*.tsv') into tsv_for_filter

	script:
	"""
	tomtom ${meme} ${jaspar_db} -thresh ${params.tomtom_treshold} -text --norc \| sed '/^#/ d' \| sed '/^\$/d' > ${name}_known_motif.tsv
	"""
	}


	//Joining channels with meme and tsv files. Filter joined channel on line count.
	//Only meme-files which corresponding tsv files have linecount <= 1 are writen to next channel.
	for_filter = meme_for_filter.join( tsv_for_filter )
	for_filter
	.filter { name, meme, tsv ->
	long count = tsv.readLines().size()
	count <= 1
	}
	.into { meme_for_scan; check }


	//If channel 'check' is empty print errormessage
	process check_for_unknown_motifs {
	echo true

	input:
	val x from check.ifEmpty('EMPTY')

	when:
	x == 'EMPTY'

	"""
	echo '>>> STOPPED: No unknown Motifs were found.'
	"""

	}


	//Get the best(first) Motif from each MEME-file
	process get_best_motif {
	conda "${path_env}"

	input:
	set name, file(meme), file(tsv) from meme_for_scan

	output:
	set name, file('*_best.meme') into best_motif

	script:
	"""
	python ${path_bin}/get_best_motif.py ${meme} ${name}_best.meme
	"""
	}


	best_motif.combine(fa_scan).set {files_for_genome_scan}


	process genome_scan {
	conda "${path_env}"

	input:
	set name, file(meme), file(fasta) from files_for_genome_scan

	output:
	file ('.bed') into bed_for_uropa, bed_for_cluster_quality

	script:
	"""
	"""
	}


	process cluster_quality {

	input:
	file (bed) from bed_for_cluster_quality

	output:
	file ('*.bed') into bed_for_final_filter

	script:
	"""
	"""
	}


	process create_GTF {
	conda "${path_env}"

	publishDir 'Path', mode:'copy'

	output:
	file ('*.gtf') into gtf_for_uropa

	script:
	"""
	python ${path_bin}/RegGTFExtractor.py ${params.organism} --tissue ${params.tissues}
	"""
	}


	bed_for_final_filter.combine(gtf_for_uropa).set {uropa_in}


	// Create configuration file for UROPA.
	// Takes template and replaces bed- and gtf-placeholders with actual paths.
	process create_uropa_config {

	publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/', mode: 'copy'

	input:
	set val(bed), val(gtf) from uropa_in.toList()
	file (conf) from config

	output:
	file ('uropa.config') into uropa_config

	script:
	"""
	sed -- 's/placeholder_gtf/${gtf}/g; s/placeholder_bed/${bed}/g' ${conf} > uropa.config.final
	"""
	}


	process UROPA {

	input:
	file (config) from uropa_config

	output:
	set file ("_allhits.txt"), file ("_finalhits.txt") into uropa_for_filter

	script:
	"""
	"""
	}


	process filter {

	input:

	output:

	script:
	"""
	"""
	}