TOuCAN.nf

#!/usr/bin/env nextflow

params.mode = "help"
params.in = ""
params.out = ""
params.path_matrix = "" //only for --mode plot
params.bam = ""
params.safe_all_files = 0
params.check_res_maps = 0
params.organism = "mm10"
params.pn = "Project"

// parameter for T2C plots
params.chr = ""
params.start = 0
params.end = 0
params.score_min = 0
params.score_max = 0

params.int_score = 50

// parameter for hic

params.aln = "bwa"
params.bin = 100000

if(params.mode == "help" || params.mode == "h"){
log.info """
==========================================

                                  `| |
                      -"- __       |  '
                    /  ,-|  ```'-, ||   `
                   /  /@)|_       `-,  |
                  /  ( ~ \\_```""-- ,_',  :	.___________.  ______    __    __    ______     ___      .__   __.
               _ '    \\   /```''''""`^		|           | /  __  \\  |  |  |  |  /      |   /   \\     |  \\ |  |
              /        | /        `|   :	`---|  |----`|  |  |  | |  |  |  | |  ,----'  /  ^  \\    |   \\|  |
             /         \\/          | |		    |  |     |  |  |  | |  |  |  | |  |      /  /_\\  \\   |  . `  |
            /      __/  |          |  |		    |  |     |  `--'  | |  `--'  | |  `----./  _____  \\  |  |\\   |
           |     __/  / |          |     |	    |__|      \\______/   \\______/   \\______/__/     \\__\\ |__| \\__|
 "- _      |    _/ _/=_//          ||  :
 '   `~~`~^|   /=/-_/_/`~~`~~~`~~`~^~`
   `> '     \\  ~/_/_ /"   `     ` '     |			Targeted chrOmatin Capture ANalysis
   ' ,'~^~`^|   |~^`^~~`~~~^~~~^~`;       '				Version 0.1
   -'       | | |                  \\ ` : `
            |  :|                  | : |
      jgs   |:  |                  ||    '
            | |/                   |  |   :
            |_/                    |    '

==========================================

Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi] [options]


--mode help, h					- For showing this help message

--mode plot					- Plot data [currently only T2C plots]
	parameters:
		--path_matrix [PATH]		- Path to directory with *.normalized.bed files.
		--chr [chr1,chr2,...,chrY]	- On which chromosome is the target region.
		--start [INT]			- Start of target region.
		--end [INT]			- End of target region.
		--score_min [INT]		- Score range: minimum. [default: 0]
		--score_max [INT]		- Score range: maximium. [default: autoscale]
                --pn [STRING]                  - Name of the Project [default: 'Project']

--mode T2C					- Full T2C analysis
	parameters:
		--in [PATH]			- Path to directory with fastq / fastq.gz files.
                --bam [PATH]                   - Path to directory with bam files. [if given --in [PATH] is ignored]
		--out [PATH]			- Path to output directory.
		--safe_all_files [0|1]		- If 1 safes all temporary files into "OUTPUT/02_analysis/". [default: 0]
		--check_res_maps [0|1]		- If 1 prints first 5 lines of every file from restriction maps. [default: 0]
		--chr [chr1,chr2,...,chrY]	- On which chromosome is the target region.
		--start [INT]			- Start of target region.
		--end [INT]			- End of target region.
		--score_min [INT]		- Score range: minimum. [default: 0]
		--score_max [INT]		- Score range: maximium. [default: autoscale]
                --pn [STRING]                  - Name of the Project [default: 'Project']

--mode uropa			   	       - Uropa annoation [T2C]
	parameters:
		--in [PATH]			- Path to directory with *.normalized.bed
		--out [PATH]			- Path to output directory.
		--chr [chr1,chr2,...,chrY]	- On which chromosome is the target region.
		--start [INT]			- Start of target region.
		--end [INT]			- End of target region.
                --pn [STRING]                  - Name of the Project [default: 'Project']

--mode multiplot			       - creating a plot with interaction map, TAD graph and gene annotation
	parameters:
		--in [PATH]			- Path to directory with *.normalized.bed
		--out [PATH]			- Path to output directory.
		--chr [chr1,chr2,...,chrY]	- On which chromosome is the target region.
		--start [INT]			- Start of target region.
		--end [INT]			- End of target region.
                --score_min [INT]		- Score range: minimum. [default: 0]
                --score_max [INT]		- Score range: maximium. [default: autoscale]
                --pn [STRING]                  - Name of the Project [default: 'Project']

--mode HiC 					- Full HiC analysis
	parameters:
		--in [PATH]			- Path to directory with fastq / fastq.gz files.
		--out [PATH]			- Path to output directory.
		--aln [bwa|bowtie2]		- Choose alignment tool. [default: bwa]
		--bin [INT]			- Binsize [default: 10000]

Skip Aligment -> BAM files as Input:
The BAM files need to be from this Pipeline with follwing
file extension: "[NAME].(normalized|matrix).bam" !

Skip creating restritction maps:
After creating the restriction maps write their path into the config file to skip
creating the restrction maps again. [path_T2C_restriction_maps]

"""
System.exit(0)
}


log.info """
=======================================================================================================================

                                  `| |
                      -"- __       |  '
                    /  ,-|  ```'-, ||   `
                   /  /@)|_       `-,  |
                  /  ( ~ \\_```""-- ,_',  :	.___________.  ______    __    __    ______     ___      .__   __.
               _ '    \\   /```''''""`^		|           | /  __  \\  |  |  |  |  /      |   /   \\     |  \\ |  |
              /        | /        `|   :	`---|  |----`|  |  |  | |  |  |  | |  ,----'  /  ^  \\    |   \\|  |
             /         \\/          | |		    |  |     |  |  |  | |  |  |  | |  |      /  /_\\  \\   |  . `  |
            /      __/  |          |  |		    |  |     |  `--'  | |  `--'  | |  `----./  _____  \\  |  |\\   |
           |     __/  / |          |     |	    |__|      \\______/   \\______/   \\______/__/     \\__\\ |__| \\__|
 "- _      |    _/ _/=_//          ||  :
 '   `~~`~^|   /=/-_/_/`~~`~~~`~~`~^~`
   `> '     \\  ~/_/_ /"   `     ` '     |			Targeted chrOmatin Capture ANalysis
   ' ,'~^~`^|   |~^`^~~`~~~^~~~^~`;       '				Version 0.1
   -'       | | |                  \\ ` : `
            |  :|                  | : |
      jgs   |:  |                  ||    '
            | |/                   |  |   :
            |_/
             |    '

=======================================================================================================================

Mode: ${params.mode}
Project: ${params.pn}

Enzyme A: ${params.enzyme_a_name} -> ${params.enzyme_a_sequence}
Enzyme B: ${params.enzyme_b_name} -> ${params.enzyme_b_sequence}

Target Region
Chromosome: ${params.chr}
Start: ${params.start}
End: ${params.end}

Dependencies
Python: ${path_python}
BWA: ${path_bwa}
samtools: ${path_samtools}
bedtools: ${path_bedtools}

Path to bin: ${path_bin}
Path to genome: ${path_genome}
Path to restriction maps: ${path_T2C_restriction_maps}
"""
if ( ! params.bam == "" ){
    log.info """
    Path to bam files: ${params.bam}
    """
}

log.info """
Minor fixed parameters for BWA and SAMtools
BWA options: ${params.bwa_T2C_options}
samtools sort options: ${params.sort_options}
Library Label: ${params.library_label}
Platform Label: ${params.platform_label}
Center Label: ${params.center_label}

=======================================================================================================================
"""

outpath = params.out?.endsWith('/') ? params.out.substring( 0, params.out.length() -1 ) : params.out

filtered_interaction_bed_for_multiplot_1 = Channel.empty()

multiplot_bed_1 = Channel.empty()
matrix_to_plot = Channel.empty()
res_map = Channel.empty()
bam_f = Channel.empty()
interaction_uropa = Channel.empty()
fastqGzFiles = Channel.empty()

if("${params.path_matrix}" != "" && params.mode == "plot"){
	matrix_to_plot = Channel
		.fromPath("${params.path_matrix}/*.{normalized,matrix}.bed")
		.map {it -> [it.simpleName, it]}
}


if(params.mode == "uropa"){
	interaction_uropa = Channel
		.fromPath("${params.in}/*.normalized.bed")
		.map {it -> [it.simpleName, it]}
}

if(params.mode == "multiplot"){
	multiplot_bed_1 = Channel
		.fromPath("${params.in}/*.normalized.bed")
		.map {it -> [it.simpleName, it]}

	filtered_interaction_bed_for_multiplot_1 = Channel
		.fromPath("${params.in}/*.normalized.bed")
		.map {it -> [it.simpleName, it]}
}

if (params.mode != "plot"){

	if ( "${path_T2C_restriction_maps}" != "" ){
		res_map = Channel.fromPath("${path_T2C_restriction_maps}/*.bed")
		create_res_map = false
	} else {
		create_res_map = true
	}

	if ( "${params.bam}" != "" ){
		bam_f = Channel
			.fromPath("${params.bam}/*.bam")
			.map {it -> [it.simpleName, it]}
		create_bam = false
	} else {
		create_bam = true
		fastqGzFiles = Channel.fromPath("${params.in}/*.fastq*")
	}
}

modeList = ["T2C", "HiC", "help", "h", "plot", "uropa", "multiplot"]
alnList = ["bowtie2", "bwa", "histat"]
organismList = ["mm10", "mm9", "hg19"]


process check_parameters {

	script:
	// check if chr input is correct
	if (!(params.mode in modeList )){
		println("ERROR: Mode not existing.\n\nList of all modes:\n\t-T2C\n\t-HiC\n\t-help\n\t-h\n\t-plot\n\nYour Input: '${params.mode}'")
		System.exit(0)
	}

    if (!(params.organism in organismList )){
        println("ERROR: Organism not supported.\n\nList of all supported organisms:\n\t-mm10\n\t-mm9\n\t-hg19\nYour Input: '${params.mode}'")
        System.exit(0)
    }

	if (!(params.aln in alnList ) && params.mode == "HiC"){
		println("ERROR: aln not existing.\n\nList of all supported alignment tools:\n\t-bwa\n\t-bowtie2\n\t-histat\n\nYour Input: '${params.mode}'")
		System.exit(0)
	}

    if (params.mode == "T2C"){

        if (!("${params.chr}" ==~ /chr([1-9]|1[0-9]|X|Y)/ ) && (params.mode != "multiplot") && (params.mode != "uropa")){
            println("ERROR: No or false chromosome parameter given.\nCorrect example: 'chr4'.\nYour Input: '${params.chr}'")
            System.exit(0)
        }

        if ((params.start == null) || (params.end == null)){
            println("ERROR: --start/--end not given")
    		System.exit(0)
        }

    	if (!("${params.start}" ==~ /\d+/ )){
    		println("ERROR: --start needs to be a number. Your Input: '${params.start}'")
    		System.exit(0)
    	}

    	if (!("${params.end}" ==~ /\d+/ )){
    		println("ERROR: --end needs to be a number. Your Input: '${params.end}'")
    		System.exit(0)
    	}

        if(params.start.toInteger() >= params.end.toInteger()){
            println("ERROR: start cannot be bigger/equal than end ")
    		System.exit(0)
        }

    	if (!("${params.score_min}" ==~ /[-]?\d+/ )){
    		println("ERROR: --score_min needs to be a number. Your Input: '${params.score_min}'")
    		System.exit(0)
    	}

    	if (!("${params.score_max}" ==~ /\d+/ )){
    		println("ERROR: --score_max needs to be a number. Your Input: '${params.score_max}'")
    		System.exit(0)
    	}

    	if (("${params.check_res_maps}" != "0") && ("${params.check_res_maps}" != "1")){
    		println("ERROR: --check_res_maps is a boolean parameter. Enter 0 or 1!")
    		System.exit(0)
    	}
    }

	if ( "${params.safe_all_files}" != "0" && "${params.safe_all_files}" != "1"){
		println("ERROR: --safe_all_files is a boolean parameter. Enter 0 or 1!")
		System.exit(0)
	}

    if (params.mode == "HiC") {
        if ( ! params.bin.toString().isInteger() ) {
            println("ERROR: --bin has to be an Integer! Your input: " + params.bin )
            System.exit(0)
        }
    }

	if (params.mode != "plot"){
		if(params.in == "" && params.bam == ""){
			println("ERROR: Input/BAM path is empty")
			System.exit(0)
		} else {
			if(params.in != ""){
				if (!(file(params.in).exists())){
					println("ERROR: Input Path does not exist!")
					System.exit(0)
			    }
			}
		}
	} else {
        if(outpath == ""){
            println("ERROR: Missing output path")
            System.exit(0)
        }
    }
	"""
	echo check
	"""
}

/*Checks if given Output directory exsists.
*If it exists it will be deleted or die Pipeline stops. (HiC Mode only)
*/
process check_output_directory {

	when:
	params.mode == 'not_used'

	script:
	valIn = false
	if(outfile.exists()){
		while(!valIn){
			answer = System.console().readLine 'Output directory "' + outfile + '" already exists: delete directory? (Y/n) \n'
			if(answer == 'n'){
				println("WARNING: It is required to delete given directory or choose a differnt directory!\nExiting...")
				System.exit(0)
			} else if (answer == 'y' | answer == '') {
				valIn = true
				println("Deleting directory...")
			} else {
				println("Error: Invalid input!")
			}
		}
	}

	if(valIn){
		"""
		if [ -d $outpath ]; then
			rm -r ${outpath}
		fi
		echo 'Check of output directory done'
		"""
	} else {
		"""
		echo 'Check of output directory done'
		"""
	}
}

/*
* T2C Step 1
*Creates restriction maps needed for T2C analysis
*/
process create_restriction_maps_for_T2C {

	publishDir "${outpath}/${params.pn}_restriction_maps/", mode: 'move'

	output:
	file ("*.bed") into restriction_maps

	when:
	params.mode == 'T2C'
	create_res_map == true

	script:
	if(params.enzyme_b_sequence == ""){
		enzyme_b_option = ""
	} else {
		enzyme_b_option = "--sequence " + params.enzyme_b_sequence
	}

	"""
	${path_python}/python ${path_bin}/bin/match_sequences.py \
		--fasta ${path_genome} \
		--bed "restriction_sites_${params.enzyme_a_name}.bed" \
		--sequence ${params.enzyme_a_sequence}

	${path_python}/python ${path_bin}/bin/target_fragments.py \
		--input "restriction_sites_${params.enzyme_a_name}.bed" \
		--output "restriction_fragments_${params.enzyme_a_name}.bed"

	${path_python}/python ${path_bin}/bin/match_sequences.py \
		--fasta ${path_genome} \
		--bed restriction_sites.bed \
		--sequence ${params.enzyme_a_sequence} \
		${enzyme_b_option}

	${path_python}/python ${path_bin}/bin/target_fragments.py \
		--input restriction_sites.bed \
		--output restriction_fragments.bed

	grep ${params.enzyme_a_sequence} restriction_fragments.bed > restriction_targets.bed
	"""
}

res_map.concat(restriction_maps).into {final_res_map_for_check; final_res_map; final_res_map_for_intersection}


process check_restriction_maps {

	tag{params.enzyme_a_name}

	input:
	file (bed) from final_res_map_for_check

	when:
	params.check_res_maps == 1

	script:
	"""
	head -5 $bed
	"""
}
result.subscribe {println it}


// Decompresses fastq files
process decompress {

	tag{fastqGz}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_fastq/", mode: 'move', overwrite: 'false'
	}

	input:
	file (fastqGz) from fastqGzFiles

	output:
	set basisname, fastqName ,file ("${fastqName}.fastq") into decompressedfastq, decompressedfastq_for_HiC_bwa,
                                                               decompressedfastq_for_HiC_bowtie2, decompressedfastq_for_HiC_histat

	script:
	fastqName = fastqGz.simpleName
	basisname = fastqName.replaceFirst("${params.sample_extension}","")
	fastqBasic = fastqGz.name.lastIndexOf('.').with {it != -1 ? fastqGz.name.substring(it+1):''}
	if(fastqBasic == 'gz')
		"""
		zcat -d $fastqGz > ${fastqName}.fastq
		"""
	else if (fastqBasic == 'fastq')
		"""
		echo $fastqGz
		"""
	else
		"""
		echo "ERROR: Invalid datatype / file ending"
		"""
}

process create_bwa_index {

	output:
	file ('f') into index
	file ('f') into index_hic_bwa

	when:
	(params.mode == "T2C" || (params.mode == "HiC" && params.aln == "bwa")) && create_bam == true

	script:
	genome_file = file (path_genome)

	genome_name = genome_file.simpleName
	amb = file("${path_genome}.amb")
	ann = file("${path_genome}.ann")
	bwt = file("${path_genome}.bwt")
	pac = file("${path_genome}.pac")
	sa = file("${path_genome}.sa")
	if(amb.exists() && ann.exists() && bwt.exists() && pac.exists() && sa.exists())
		"""
		echo done > f
		"""
	else
		"""
		${path_bwa}/bwa index ${path_genome}
		echo done > f
		"""

}


// T2C Step 2
process bwa_aln {

	tag{fastq}

	executor 'local'

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/sai/", mode: 'move', pattern: '*.sai'
	}

	input:
	set basisname, fastqName ,file (fastq) from decompressedfastq
	file f from index

	when:
	params.mode == "T2C" && create_bam == true

	output:
	set basisname, file ('*.sai'), file (fastq) into sai_files

	script:
	"""
	${path_bwa}/bwa aln \
		${params.bwa_T2C_options} \
		${path_genome} \
		$fastq > ${fastqName}.sai
	"""
}

// T2C Step 3
process bwa_sampe_to_create_sam {

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'move'
	}

	tag{name}

	input:
	set name, sai, fastq from sai_files.groupTuple()

	output:
	set name, file ("${name}.sam") into samfiles

	when:
	params.mode == "T2C"
	create_bam == true

	script:
	fullname = sai[0].simpleName
	if(fullname.contains("_R1")){
		sai_R1 = sai[0]
		fastq_R1 = fastq[0]
		sai_R2 = sai[1]
		fastq_R2 = fastq[1]
	} else {
		sai_R1 = sai[1]
		fastq_R1 = fastq[1]
		sai_R2 = sai[0]
		fastq_R2 = fastq[0]
	}

 	"""
	${path_bwa}/bwa sampe \
		-A \
		${path_genome} \
		${sai_R1} ${sai_R2} \
		${fastq_R2} ${fastq_R1} > ${name}.sam
	"""
}


// T2C Step 4
process samtools_addreplacerg {

	tag{sam}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'move'
	}

	input:
	set name, file (sam) from samfiles

	output:
	set name, file ("${name}_rg.sam") into samfiles_read_groups

	when:
	params.mode == "T2C"
	create_bam == true

	script:
	ds = "${path_working}/*.srt.bam"
	"""
	${path_samtools}/samtools addreplacerg \
				-r "ID:${name}" \
				-r "CN:${params.center_label}" \
				-r "LB:${params.library_label}" \
				-r "SM:${name}" \
				-r "PL:${params.platform_label}" \
				-r "DS:${ds}" \
				-o ${name}_rg.sam \
				${sam}
	"""
}


// T2C Step 5
process samtools_sort {

	executor 'local'

	publishDir "${outpath}/${params.pn}_bam/", mode: 'move'

	tag{rgsam}

	input:
	set name, file (rgsam) from samfiles_read_groups

	output:
	set name, file ("${name}.bam") into bamfiles

	when:
	params.mode == "T2C"
	create_bam == true

	script:
	"""
	${path_samtools}/samtools sort \
					   ${params.sort_options} \
					   -T ${name}_tmp_sort \
					   -o ${name}.bam \
					   ${rgsam}
	"""
}

bam_f.concat(bamfiles).into {finalbam; bamfiles_for_flagstats;}


// T2C Step 6
process samtools_flagstat {

	publishDir "${outpath}/${params.pn}_stats/", mode: 'move'

	tag{bam}

	input:
	set name, file (bam) from bamfiles_for_flagstats

	output:
	file ("${name}.flagstat.txt") into flagstats

	when:
	params.mode == "T2C"

	script:
	"""
	${path_samtools}/samtools flagstat ${bam} > ${name}.flagstat.txt
	"""
}


/*
* T2C Step 7
* index the BAM files
*/
process index_bamfiles {

	tag{bam}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bai/", mode: 'move'
	}

	input:
	set name, file (bam) from finalbam

	output:
	set name, file ("${name}.bam.bai"), file ("${bam}") into bam_bai_files

	when:
	params.mode == "T2C"

	script:
	"""
	${path_samtools}/samtools index ${bam} > ${name}.bam.bai
	"""
}


/*
* T2C Step 8
* get the reads overlapping the targets
*/
process overlapping_reads_targets {

	tag{bam}

	executor 'local'

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move'
	}

	input:
	set name, file (bai), file (bam), file (res_target) from bam_bai_files.combine(final_res_map.filter{ it.simpleName == "restriction_targets"})

	output:
	set name, file ("${name}.target.bam") into target_bam_files

	when:
	params.mode == "T2C"

	script:
	"""
	${path_python}/python ${path_bin}/bin/add_fragment_to_bam.py \
		--input ${bam} \
		--output ${name}.target.bam \
		--bed ${res_target}
	"""
}


/*
* T2C Step 9
* namesort the bam file in preparation for the matrix
*/
process namesorting_bam_files {

	tag{target_bam}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move'
	}

	input:
	set name, file (target_bam) from target_bam_files

	output:
	set name, file ("${name}.namesort.bam") into namesort_bam_files

	when:
	params.mode == "T2C"

	script:
	"""
	${path_samtools}/samtools sort \
		-n \
		-o ${name}.namesort.bam \
		${target_bam}
	"""
}


/*
* T2C step 10
* SAMtools flags to filter aligments
* 0x0001	p	the read is paired in sequencing
* 0x0004	u	the query sequence itself is unmapped
* 0x0008	U	the mate is unmapped
* 0x0100	s	the alignment is not primary
* 0x0800	S	the alignment is supplementary
*/
process filter_aligments {

	tag{namesort_bam}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move'
	}

	input:
	set name, file (namesort_bam) from namesort_bam_files

	output:
	set name , file ("${name}.filtered.bam") into filtered_bam_files

	when:
	params.mode == "T2C"

	script:
	"""
	${path_samtools}/samtools view -h \
 		-f 0x0001 \
		-F 0x0004 \
		-F 0x0008 \
 		-F 0x0100 \
 		-F 0x0800 \
		${namesort_bam} > ${name}.filtered.bam
	"""
}


/*
* T2C step 11
* creating first ray matrix
*/
process create_matrix {

	tag{filtered_bam}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
	}

	input:
	set name , file (filtered_bam) from filtered_bam_files

	output:
	set name, file ("${name}.reads.bed") into reads_bed_files, reads_bed_files_for_stats

	when:
	params.mode == "T2C"

	script:
	"""
	cat ${filtered_bam} | \
	${path_python}/python ${path_bin}/bin/create_matrix.py -o ${name}.reads.bed
	"""
}

/*
* T2C step 12
* finish matrix file by
* creating a paired-end bed file on single base-pair level
*/
process finish_raw_matrix {

	tag{reads_bed}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
	}

	input:
	set name, file (reads_bed) from reads_bed_files

	output:
	//set name_raw, file ("${name}.matrix.bed") into raw_matrix_bed_for_plots
	set name, file ("${name}.matrix.bed") into raw_matrix_bed_to_reassign

	when:
	params.mode == "T2C"

	script:
	//name_raw =  "${name}_raw"
	"""
	cat ${reads_bed} | \
		sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n | \
			${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.matrix.bed
	"""
}


/*
* T2C step 13
*
*/
process	split_matrix_for_re_evaluation {

	tag{matrix_bed}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
	}

	input:
 	set name, file (matrix_bed) from raw_matrix_bed_to_reassign

	output:
	set name, file ("${name}.loca.bed"), file ("${name}.locb.bed"), file (matrix_bed) into split_matrix

	when:
	params.mode == "T2C"

	script:
	"""
	awk -F "\t" '//{print \$1 "\t" \$2 "\t" \$3}' ${matrix_bed} > ${name}.loca.bed
	awk -F "\t" '//{print \$4 "\t" \$5 "\t" \$6}' ${matrix_bed} > ${name}.locb.bed
	"""
}


/*
* T2C step 14
* Intersect bed file with fragments
*/
process intersect_beds_to_fragments {

	tag{name}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
	}

	input:
	set name, file (a_bed), file (b_bed), file (matrix), file (res_frag) from split_matrix.combine(final_res_map_for_intersection.filter{ it.simpleName == "restriction_fragments_${params.enzyme_a_name}"})

	output:
	set name, file ("${name}.frga.bed"), file ("${name}.frgb.bed"), file (matrix) into fragments_bed

	when:
	params.mode == "T2C"

	script:
	"""
	sed 's/[[:blank:]]*\$\$//p' ${a_bed} | ${path_bedtools}/bedtools intersect -a ${a_bed} -b ${res_frag} -wao > ${name}.frga.bed
	sed 's/[[:blank:]]*\$\$//p' ${b_bed} | ${path_bedtools}/bedtools intersect -a ${b_bed} -b ${res_frag} -wao > ${name}.frgb.bed
	"""
}

/*
* T2C step 15
* creating pre normalized matrix
*/
process reassign_matrix {

	tag{matrix}

	if(params.safe_all_files == 1){
		publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
	}

	input:
	set name, file (frga), file (frgb), file (matrix) from fragments_bed

	output:
	set name, file ("${name}.temp.bed") into temp_bed_file

	when:
	params.mode == "T2C"

	script:
	"""
	${path_python}/python ${path_bin}/bin/reassign_matrix.py \
		-m ${matrix} \
		-a ${frga} \
		-b ${frgb} > ${name}.temp.bed
	"""
}

/*
* T2C step 16
* Aggregate Matrix
*/
process finalize_matrix {

	tag{temp_bed}

	publishDir "${outpath}/${params.pn}_Interactions/01_all_interactions_bed_raw/", mode: 'move', pattern: "${name_raw}.reassigned.bed"

	input:
	set name, file (temp_bed) from temp_bed_file

	output:
	set name, file ("${name_raw}.reassigned.bed") into reassigned_matrix
	set name_raw, file ("${name_raw}.reassigned.bed") into reassigned_matrix_raw

	when:
	params.mode == "T2C"

	script:
	name_raw =  "${name}_raw"
	"""
	cat ${temp_bed} | \
		sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n |\
			${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name_raw}.reassigned.bed
	"""
}

/*
* T2C step 17
* Normalize Matrix
*/
process normalize_matrix {

	tag{matrix}

	publishDir "${outpath}/${params.pn}_Interactions/02_all_interactions_bed_norm/", mode: 'move'

	input:
	set name, file (matrix) from reassigned_matrix

	output:
	set name, file ("${name}.normalized.bed") into normalized_matrix, multiplot_bed_2, filtered_interaction_bed_for_multiplot_2

	when:
	params.mode == "T2C"

	script:
	"""
	Rscript ${path_bin}/bin/normalize_paired_bed.R --bedfile ${matrix} \
    	--outfile ${name}.normalized.bed \
    	--library ${path_bin}/RLIB/ \
    	--method ${params.norm_method}
	"""
}
to_plot = matrix_to_plot.concat(normalized_matrix).concat(reassigned_matrix_raw)


/*
* T2C step 18
* Creating Interaction matrix plot
*/
process plotting {

	tag{name}

	if(params.mode == "plot" ){
		publishDir "${outpath}/plot/", mode: 'move'
	} else {
		publishDir "${outpath}/${params.pn}_plots/interaction_plot/", mode: 'move'
	}

	input:
	set name, file (matrix) from to_plot

	output:
	file ("*.pdf")

	when:
	params.mode == "T2C" || params.mode == "plot"

	script:
	"""
	Rscript  ${path_bin}/bin/plot_cis_matrix.R \
	    --bedfile ${matrix} \
	    --outfile ${name}.cis.pdf \
	    --library ${path_bin}/RLIB/ \
	    --chromosome ${params.chr} \
	    --start ${params.start} \
	    --end ${params.end} \
	    --colour-minimum ${params.score_min} \
	    --colour-maximum ${params.score_max} \
		${params.plot_options_T2C}
	"""
}

filtered_interaction_bed_merged = filtered_interaction_bed_for_multiplot_1.concat(filtered_interaction_bed_for_multiplot_2)
filtered_interaction_bed_final_merge = interaction_uropa.concat(filtered_interaction_bed_merged)

process create_interaction_table {

	tag{name}

	publishDir "${outpath}/${params.pn}_Interactions/03_target_region_interactions_bed/", mode: 'move'

	input:
	set name, file (norm) from filtered_interaction_bed_final_merge

	output:
	set name, file ("${name}.filtered.interactions.bed") into interaction_bed_for_merge, to_ifmatrix, for_uropa_1, for_uropa_2

	when:
	params.mode == "T2C" || params.mode == "uropa" || params.mode == "multiplot"

	script:
	"""
	Rscript ${path_bin}/bin/create_interaction_table.R \
		${norm} ${name}.filtered.interactions.bed \
		${params.chr} ${params.start} ${params.end} ${params.int_score} \
		$workflow.workDir/${name}.filtered.interactions.bed
	"""
}
//interaction_empty.concat(interaction_check).into {for_uropa1; for_uropa2}

process create_stats {

	tag{name}

	publishDir "${outpath}/${params.pn}_stats/", mode: 'move'

	input:
	set name, file (reads) from reads_bed_files_for_stats

	output:
	file ("${name}_stats.pdf")

	when:
	params.mode == "T2C"

	script:
	"""
	Rscript ${path_bin}/bin/create_T2C_stats.R \
		${reads} ${name}_stats.pdf \
		${params.chr} ${params.start} ${params.end} ${name}
	"""
}


process create_uropa_config_1 {

	tag{name}

	input:
	set name, file (int_bed) from for_uropa_1

	output:
	set name, file ("${name}_uropa_config") into uropa_config_1

	when:
	params.mode == "T2C" || params.mode == "uropa"


	script:
    config_parameters = [uropa_feature,uropa_anchor,uropa_strand,uropa_direction,
                        uropa_filter_attr,uropa_attr_value,uropa_show_attr]

    conf_list = config_parameters.collect { par ->
        return "\\\"" + par.replace(",","\\\",\\\"") + "\\\""
    }

    conf_list_final = conf_list.collect {p ->
        if(p.split(",").size() > 1) {
            return "[" + p + "]"
        }
        return p
    }

	"""
    echo "{ \
        \\"queries\\" : [ \
            { \
                \\"feature\\" : ${conf_list_final[0]}, \
                \\"feature.anchor\\" : ${conf_list_final[1]},  \
                \\"distance\\" : [ \
                    ${uropa_dist_1}, \
                    ${uropa_dist_2} \
                ], \
                \\"strand\\" : ${conf_list_final[2]}, \
                \\"direction\\" : ${conf_list_final[3]}, \
                \\"internals\\" : \\"True\\", \
                \\"filter.attribute\\" : ${conf_list_final[4]}, \
                \\"attribute.value\\" : ${conf_list_final[5]}, \
                \\"show.attributes\\" : ${conf_list_final[6]} \
            } \
        ], \
        \\"priority\\" : \\"FALSE\\", \
        \\"gtf\\" : \\"${path_gtf}\\", \
		\\"bed\\" : \\"$workflow.workDir/${name}.filtered.interactions.bed\\" \
	}" > ${name}_uropa_config
	"""
}

process run_uropa_1 {

	tag{name}

    if(params.safe_all_files == 1){
	       publishDir "${outpath}/${params.pn}_uropa/", mode: 'move'
    }

	input:
	set name, file (config) from uropa_config_1

	output:
	set name, file ("${name}_allhits.txt"), file ("${name}_finalhits.txt") into uropa_1

	when:
	params.mode == "T2C" || params.mode == "uropa"

	script:
	"""
	uropa -i ${config} -d -p ${name} -t ${params.uropa_threads}
	"""
}

process swap_fragments {

	tag{name}

	input:
	set name, file (int_bed) from for_uropa_2

	output:
	set name, file ("${name}.swap.interaction.bed") into swaped_interactions_bed

	when:
	params.mode == "T2C" || params.mode == "uropa"

	script:
	"""
	Rscript ${path_bin}/bin/swap_fragments.R ${int_bed} ${name}.swap.interaction.bed $workflow.workDir/${name}.swap.interaction.bed
	"""
}


process create_uropa_config_2 {

	tag{name}

	input:
	set name, file (int_bed) from swaped_interactions_bed

	output:
	set name, file ("${name}_uropa_config_2") into uropa_config_2

	when:
	params.mode == "T2C" || params.mode == "uropa"

	script:

    config_parameters = [uropa_feature,uropa_anchor,uropa_strand,uropa_direction,
                        uropa_filter_attr,uropa_attr_value,uropa_show_attr]

    conf_list = config_parameters.collect { par ->
        return "\\\"" + par.replace(",","\\\",\\\"") + "\\\""
    }

    conf_list_final = conf_list.collect {p ->
        if(p.split(",").size() > 1) {
            return "[" + p + "]"
        }
        return p
    }

    """
    echo "{ \
        \\"queries\\" : [ \
            { \
                \\"feature\\" : ${conf_list_final[0]}, \
                \\"feature.anchor\\" : ${conf_list_final[1]},  \
                \\"distance\\" : [ \
                    ${uropa_dist_1}, \
                    ${uropa_dist_2} \
                ], \
                \\"strand\\" : ${conf_list_final[2]}, \
                \\"direction\\" : ${conf_list_final[3]}, \
                \\"internals\\" : \\"True\\", \
                \\"filter.attribute\\" : ${conf_list_final[4]}, \
                \\"attribute.value\\" : ${conf_list_final[5]}, \
                \\"show.attributes\\" : ${conf_list_final[6]} \
            } \
        ], \
        \\"priority\\" : \\"FALSE\\", \
        \\"gtf\\" : \\"${path_gtf}\\", \
        \\"bed\\" : \\"$workflow.workDir/${name}.filtered.interactions.bed\\" \
    }" > ${name}_uropa_config_2
    """
}


process run_uropa_2 {

	tag{name}

    if(params.safe_all_files == 1){
           publishDir "${outpath}/${params.pn}_uropa/", mode: 'move'
    }

	input:
	set name, file (config) from uropa_config_2

	output:
	set name, file ("${name}_2_allhits.txt"), file ("${name}_2_finalhits.txt") into uropa_2

	when:
	params.mode == "T2C" || params.mode == "uropa"

	script:
	"""
	uropa -i ${config} -d -p ${name}_2 -t ${params.uropa_threads}
	"""
}
tmp = uropa_1.combine(uropa_2, by: 0)
final_uropa_set = tmp.combine(interaction_bed_for_merge, by: 0)


process uropa_merge {

	tag{name}

	publishDir "${outpath}/${params.pn}_Interactions/04_final_interactions_annotated/", mode: 'move'

	input:
	set name, file (allhits_1), file (finalhits_1), file (allhits_2), file (finalhits_2), file (int_bed) from final_uropa_set

	output:
	set name, file ("${name}.finalhits.merged.txt") into merged_uropa
    file ("${name}.finalhits.merged.txt") into for_xlsx

	when:
	params.mode == "T2C" || params.mode == "uropa"

	script:
	"""
	Rscript ${path_bin}/bin/merge_uropa_interactions.R ${int_bed} ${finalhits_1} ${finalhits_2} ${name}.finalhits.merged.txt
	"""

}
xlsx_list = for_xlsx.toSortedList()


process xlsx_writer {


    publishDir "${outpath}/", mode: 'move'

    input:
    val f from xlsx_list

    output:
    file ("${params.pn}.xlsx")

    when:
    params.mode == "T2C" || params.mode == "uropa"

    script:

    file_list = f.sort {it.getBaseName()}
    file_list_str = file_list.toListString().replace(', ',',') - "[" - "]"

    """
    ${path_python}/python ${path_bin}/bin/julianos_xlsxgen_vTOuCAN.py ${path_bin}/bin/julianos_config.json --out ${params.pn} --flist ${file_list_str}
    """

}


process create_viewpoint {

	tag{name}

	publishDir "${outpath}/${params.pn}_Interactions/05_interactions_viewpoint_annotated/", mode: 'move'

	input:
	set name, file (merged_finalhits) from merged_uropa

	output:
	file ("${name}.interactions.viewpoint.txt")

	when:
	params.mode == "T2C" || params.mode == "uropa"

	script:
	"""
	Rscript ${path_bin}/bin/create_interaction_viewpoint.R ${merged_finalhits} ${name}.interactions.viewpoint.txt
	"""
}

//to_ifmatrix = interaction_multiplot.concat(interaction_bed_TAD)

process convert_to_IFMatrix {

	tag{name}

    if(params.safe_all_files == 1){
	       publishDir "${outpath}/${params.pn}_additional_files/IFMatrix/", mode: 'move'
    }

	input:
	set name, file (int_bed) from to_ifmatrix

	output:
	set name, file ("${name}.ifm.txt") into ifmatrix

	when:
	params.mode == "T2C" || params.mode == "multiplot"

	script:
	"""
	Rscript ${path_bin}/bin/convert_to_IFMatrix.R ${int_bed} ${name}.ifm.txt
	"""
}


process run_robusTAD {

	tag{name}

	publishDir "${outpath}/${params.pn}_TAD/", mode: 'move'

	input:
	set name, file (tfm) from ifmatrix

	output:
	set name, file ("TADBoundaryCalls_*.txt") into tadbc, multiplot_TAD

	when:
	params.mode == "T2C" || params.mode == "multiplot"

	script:
	"""
	Rscript ${path_bin}/bin/RobusTAD.R -i ${tfm} --header FALSE -n norm
	"""
}


process plot_TAD_graph {

	tag{name}

	publishDir "${outpath}/${params.pn}_plots/TAD/", mode: 'move'

	input:
	set name, file (tad) from tadbc

	output:
	file ("${name}.TAD.pdf")

	when:
	params.mode == "T2C" || params.mode == "multiplot"

	script:
	"""
	Rscript ${path_bin}/bin/plotTAD.R ${tad} ${name}.TAD.pdf
	"""

}

multiplot_bed_merged = multiplot_bed_1.concat(multiplot_bed_2)

process multiplot {

	tag{name}

	publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'move'

	input:
	set name, file (tad) from multiplot_TAD
	set name, file (bed) from multiplot_bed_merged

	output:
	file ("*.png") into for_pptx

	when:
	params.mode == "T2C" || params.mode == "multiplot"

	script:
	"""
	Rscript  ${path_bin}/bin/plot_multi.R \
	    --bedfile ${bed} \
	    --outfile ${name}.multi.png \
	    --library ${path_bin}/RLIB/ \
	    --chromosome ${params.chr} \
	    --start ${params.start} \
	    --end ${params.end} \
	    --colour-minimum ${params.score_min} \
	    --colour-maximum ${params.score_max} \
		--tad ${tad} \
        --organism ${params.organism} \
		${params.plot_options_T2C}
	"""
}
for_pptx_list = for_pptx.toSortedList()


process pptx_writer {


    publishDir "${outpath}/", mode: 'move'

    input:
    val pptxList from for_pptx_list

    output:
    file ("*.pptx")

    when:
    params.mode == "T2C" || params.mode == "multiplot"

    script:
    file_list = pptxList.sort {it.getBaseName()}
    file_list_str = file_list.toListString().replace(', ',',') - "[" - "]"
    """
    ${path_python}/python ${path_bin}/bin/pptx_writer_vTOuCAN.py ${path_bin}/bin/pptx_config.json --output ${params.pn} --flist ${file_list_str}
    """
}


//================================= HiC ========================================

/*
process create_restriction_maps_for_HiC {


	output:
	file ("rest_site_${params.enzyme_a_name}.bed") into restriction_map_for_hic

	when:
	params.mode == "HiC"

	script:
	"""
	findRestSite --fasta ${path_genome} --searchPattern ${params.enzyme_a_sequence} -o rest_site_${params.enzyme_a_name}.bed
	"""
} */

/*
if(params.aln == "bwa"){
	bowtie2_alignment = Channel.empty()
	//histat_alignment = Channel.empty()
} else if (params.aln == "bowtie2") {
	//histat_alignment = Channel.empty()
	bwa_alignment = Channel.empty()
} else if (params.mode == "histat") {
	bowtie2_alignment = Channel.empty()
	bwa_alignment = Channel.empty()
} */


process alignment_with_bwa {

	tag{fastq}

	input:
	set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bwa
	file f from index_hic_bwa

	output:
	set basisname, file ("${fastqName}.bam") into bwa_alignment

	when:
	params.mode == "HiC" && params.aln == "bwa"

	script:
	"""
	${path_bwa}/bwa mem \
		${params.bwa_HiC_options} \
		${path_genome} \
		$fastq | ${path_samtools}/samtools view -Shb > ${fastqName}.bam
	"""
}


process bowtie2_index {

	when:
	params.mode == "HiC" && params.aln == "bowtie2"

	script:
	genome_file = file (path_genome)
	genome_name = genome_file.name
	"""
	${path_bowtie2}/bowtie2-build ${path_genome} ${genome_name}
	"""
}


process alignment_with_bowtie2 {

	tag{fastq}

	input:
	set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bowtie2

	output:
	set basisname, file ("${fastqName}.bam") into bowtie2_alignment

	when:
	params.mode == "HiC" && params.aln == "bowtie2"

	script:
	"""
	${path_bowtie2}/bowtie2 -x ${path_genome} -U ${fastq} --very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder -p 12 \
	| ${path_samtools}/samtools view -Shb - > ${fastqName}.bam
	"""
}

hic_alignment = bwa_alignment.concat(bowtie2_alignment) //.concat(histat_alignment)

process create_HiC_matrix {

	tag{name}

	input:
	set name, bams from hic_alignment.groupTuple()

	output:
	set name, file("${name}_matrix.h5") into hic_matrix, hic_matrix_for_diagnostic
	file ("${name}.bam") into hic_bam

	when:
	params.mode == "HiC"

	script:
	bam_r1 = bams[0]
	bam_r2 = bams[1]
	"""
	hicBuildMatrix --samFiles ${bam_r1} ${bam_r2} \
				 --QCfolder ${outpath}/QC_${name}/  \
                 --binSize ${params.bin} \
				 -b ${name}.bam \
                 --restrictionSequence ${params.enzyme_a_sequence} \
                 -o ${name}_matrix.h5

	"""
}


process diagnostic_plot_of_HiC_Matrix {

    publishDir "${outpath}/diagnostic_plots/", mode: 'move'

	input:
	set name, file (matrix) from hic_matrix_for_diagnostic

	output:
	set name, file ("${name}.png") // into hic_matrix_corrected

	when:
	params.mode == "HiC"

	script:
	"""
	hicCorrectMatrix diagnostic_plot -m ${matrix} -o ${name}.png
	"""
}

process correct_matrix {

    publishDir "${outpath}/matrix/", mode: 'copy'

    input:
    set name, file (matrix) from hic_matrix

    output:
    set name, file("${name}_corrected.h5") into hic_matrix_corrected

    script:
    """
    hicCorrectMatrix correct -m ${matrix} --filterThreshold -1.5 5 -o ${name}_corrected.h5
    """
}


process plot_matrix {

    publishDir "${params.out}/plot/", mode: 'move'

	input:
	set name, file (corrected_matrix) from hic_matrix_corrected

	output:
	file ("${name}.hic.matrix.png")

	when:
	params.mode == "HiC"

	script:
	"""
	hicPlotMatrix -m ${corrected_matrix} -o ${name}.hic.matrix.png \
            --chromosomeOrder chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY
	"""
}