Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/env nextflow
params.mode = "help"
params.in = ""
params.out = ""
params.path_matrix = "" //only for --mode plot
params.bam = ""
params.safe_all_files = 0
params.check_res_maps = 0
params.organism = "mm10"
params.pn = "Project"
params.reassin = 1
// parameter for T2C plots
params.chr = ""
params.start = 0
params.end = 0
params.score_min = 0
params.score_max = 0
params.int_score = 50
// parameter for hic
params.aln = "bwa"
params.bin = 100000
params.hic_thresh_min = -1.5
params.hic_thresh_max = 5
params.inputBufferSize = 400000
params.bt2_index_threads = 4
if(params.mode == "help" || params.mode == "h"){
log.info """
==========================================
`| |
-"- __ | '
/ ,-| ```'-, || `
/ /@)|_ `-, |
/ ( ~ \\_```""-- ,_', : .___________. ______ __ __ ______ ___ .__ __.
_ ' \\ /```''''""`^ | | / __ \\ | | | | / | / \\ | \\ | |
/ | / `| : `---| |----`| | | | | | | | | ,----' / ^ \\ | \\| |
/ \\/ | | | | | | | | | | | | | | / /_\\ \\ | . ` |
/ __/ | | | | | | `--' | | `--' | | `----./ _____ \\ | |\\ |
| __/ / | | | |__| \\______/ \\______/ \\______/__/ \\__\\ |__| \\__|
"- _ | _/ _/=_// || :
' `~~`~^| /=/-_/_/`~~`~~~`~~`~^~`
`> ' \\ ~/_/_ /" ` ` ' | Targeted chrOmatin Capture ANalysis
' ,'~^~`^| |~^`^~~`~~~^~~~^~`; ' Version 0.1
-' | | | \\ ` : `
| :| | : |
jgs |: | || '
| |/ | | :
|_/ | '
==========================================
Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi] [options]
--mode help, h - For showing this help message
--mode plot - Plot data [currently only T2C plots]
parameters:
--path_matrix [PATH] - Path to directory with *.normalized.bed files.
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region.
--start [INT] - Start of target region.
--end [INT] - End of target region.
--score_min [INT] - Score range: minimum. [default: 0]
--score_max [INT] - Score range: maximium. [default: autoscale]
--pn [STRING] - Name of the Project [default: 'Project']
--mode T2C - Full T2C analysis
parameters:
--in [PATH] - Path to directory with fastq / fastq.gz files.
--bam [PATH] - Path to directory with bam files. [if given --in [PATH] is ignored]
--out [PATH] - Path to output directory.
--safe_all_files [0|1] - If 1 safes all temporary files into "OUTPUT/02_analysis/". [default: 0]
--check_res_maps [0|1] - If 1 prints first 5 lines of every file from restriction maps. [default: 0]
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region.
--start [INT] - Start of target region.
--end [INT] - End of target region.
--score_min [INT] - Score range: minimum. [default: 0]
--score_max [INT] - Score range: maximium. [default: autoscale]
--pn [STRING] - Name of the project [default: 'Project']
--organsim [mm10,mm9,hg19] - Type of the genome
--mode uropa - Uropa annoation [T2C]
parameters:
--in [PATH] - Path to directory with *.normalized.bed
--out [PATH] - Path to output directory.
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region.
--start [INT] - Start of target region.
--end [INT] - End of target region.
--pn [STRING] - Name of the Project [default: 'Project']
--mode multiplot - Creating a plot with interaction map, TAD graph and gene annotation
parameters:
--in [PATH] - Path to directory with *.normalized.bed
--out [PATH] - Path to output directory.
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region.
--start [INT] - Start of target region.
--end [INT] - End of target region.
--score_min [INT] - Score range: minimum. [default: 0]
--score_max [INT] - Score range: maximium. [default: autoscale]
--pn [STRING] - Name of the Project [default: 'Project']
--organsim [mm10,mm9,hg19] - Type of the genome
--mode HiC - Full HiC analysis
parameters:
--in [PATH] - Path to directory with fastq / fastq.gz files.
--bam [PATH] - Path to directory with bam files. Two bam files (forward + reversed) are required per sample. [if given --in [PATH] is ignored]
--out [PATH] - Path to output directory.
--aln [bwa|bowtie2] - Choose alignment tool. [default: bwa]
--bin [INT] - Binsize [default: 10000]
--hic_thresh_min [(-)INT] - Threshold filter miniumum [default: -1.5]
--hic_thresh_max [INT] - Threshold filter maximum [default: 5]
--inputBufferSize [INT] - Buffersize for creating hic matrix [default: 400000]
Skip Aligment -> BAM files as Input:
The BAM files need to be from this Pipeline with follwing
file extension: "[NAME].(normalized|matrix).bam" !
Skip creating restritction maps:
After creating the restriction maps write their path into the config file to skip
creating the restrction maps again. [path_T2C_restriction_maps]
"""
System.exit(0)
}
log.info """
=======================================================================================================================
`| |
-"- __ | '
/ ,-| ```'-, || `
/ /@)|_ `-, |
/ ( ~ \\_```""-- ,_', : .___________. ______ __ __ ______ ___ .__ __.
_ ' \\ /```''''""`^ | | / __ \\ | | | | / | / \\ | \\ | |
/ | / `| : `---| |----`| | | | | | | | | ,----' / ^ \\ | \\| |
/ \\/ | | | | | | | | | | | | | | / /_\\ \\ | . ` |
/ __/ | | | | | | `--' | | `--' | | `----./ _____ \\ | |\\ |
| __/ / | | | |__| \\______/ \\______/ \\______/__/ \\__\\ |__| \\__|
"- _ | _/ _/=_// || :
' `~~`~^| /=/-_/_/`~~`~~~`~~`~^~`
`> ' \\ ~/_/_ /" ` ` ' | Targeted chrOmatin Capture ANalysis
' ,'~^~`^| |~^`^~~`~~~^~~~^~`; ' Version 0.1
-' | | | \\ ` : `
| :| | : |
jgs |: | || '
| |/ | | :
|_/
| '
=======================================================================================================================
Mode: ${params.mode}
Project: ${params.pn}
Enzyme A: ${params.enzyme_a_name} -> ${params.enzyme_a_sequence}
Enzyme B: ${params.enzyme_b_name} -> ${params.enzyme_b_sequence}
Target Region
Chromosome: ${params.chr}
Start: ${params.start}
End: ${params.end}
Path to bin: ${path_bin}
Path to genome: ${path_genome}
Path to restriction maps: ${path_T2C_restriction_maps}
"""
if ( "${params.bam}" != "" ){
log.info"""
Path to bam files: ${params.bam}
"""
}
log.info """
Minor fixed parameters for BWA and SAMtools
BWA options: ${params.bwa_T2C_options}
samtools sort options: ${params.sort_options}
Library Label: ${params.library_label}
Platform Label: ${params.platform_label}
Center Label: ${params.center_label}
=======================================================================================================================
"""
outpath = params.out?.endsWith('/') ? params.out.substring( 0, params.out.length() -1 ) : params.out
filtered_interaction_bed_for_multiplot_1 = Channel.empty()
multiplot_bed_1 = Channel.empty()
matrix_to_plot = Channel.empty()
res_map = Channel.empty()
bam_f = Channel.empty()
bam2 = Channel.empty()
interaction_uropa = Channel.empty()
fastqGzFiles = Channel.empty()
hic_matrix_f = Channel.empty()
if("${params.path_matrix}" != "" && params.mode == "plot"){
matrix_to_plot = Channel
.fromPath("${params.path_matrix}/*.{normalized,matrix}.bed")
.map {it -> [it.simpleName, it]}
}
if(params.mode == "uropa"){
interaction_uropa = Channel
.fromPath("${params.in}/*.normalized.bed")
.map {it -> [it.simpleName, it]}
}
if(params.mode == "multiplot"){
multiplot_bed_1 = Channel
.fromPath("${params.in}/*.normalized.bed")
.map {it -> [it.simpleName, it]}
filtered_interaction_bed_for_multiplot_1 = Channel
.fromPath("${params.in}/*.normalized.bed")
.map {it -> [it.simpleName, it]}
}
if (params.mode != "plot"){
if ( "${path_T2C_restriction_maps}" != "none"){
if (! file ("${path_T2C_restriction_maps}/restriction_targets.bed").exists()){
println("Warning: restriction_target.bed does not exists on given path (path_T2C_restriction_maps in configuration file) -> starting to create restriction maps.")
create_res_map = true
} else {
res_map = Channel.fromPath("${path_T2C_restriction_maps}/*.bed")
create_res_map = false
}
} else {
create_res_map = true
}
if ( "${params.bam}" != "" ){
Channel
.fromPath("${params.bam}/*.bam")
.map {it -> [it.simpleName, it]}
.set {bam_f}
create_bam = false
if("${params.mode}" == "HiC"){
Channel
.fromPath("${params.bam}/*.bam")
.map {it -> [it.simpleName.replaceAll(params.sample_extension, ""), it]}
.set {bam2}
create_bam = false
}
} else if ("${params.mode}" == "HiC" & "${params.mode}" == "h5") {
hic_matrix_f = Channel
.fromPath("${params.in}/*.h5")
.map {it -> [it.simpleName, it]}
}else {
create_bam = true
fastqGzFiles = Channel.fromPath("${params.in}/*.fastq*")
}
}
modeList = ["T2C", "HiC", "help", "h", "plot", "uropa", "multiplot", "h5"]
alnList = ["bowtie2", "bwa"]
organismList = ["mm10", "mm9", "hg19"]
process check_parameters {
script:
// check if chr input is correct
if (!(params.mode in modeList )){
println("ERROR: Mode not existing.\n\nList of all modes:\n\t-T2C\n\t-HiC\n\t-help\n\t-h\n\t-plot\n\nYour Input: '${params.mode}'")
System.exit(0)
}
if (!(params.organism in organismList )){
println("ERROR: Organism not supported.\n\nList of all supported organisms:\n\t-mm10\n\t-mm9\n\t-hg19\nYour Input: '${params.mode}'")
System.exit(0)
}
if (!(params.aln in alnList ) && params.mode == "HiC"){
println("ERROR: aln not existing.\n\nList of all supported alignment tools:\n\t-bwa\n\t-bowtie2\n\t-histat\n\nYour Input: '${params.mode}'")
System.exit(0)
}
if (params.mode == "T2C"){
if (!("${params.chr}" ==~ /chr([1-9]|1[0-9]|X|Y)/ ) && (params.mode != "multiplot") && (params.mode != "uropa")){
println("ERROR: No or false chromosome parameter given.\nCorrect example: 'chr4'.\nYour Input: '${params.chr}'")
System.exit(0)
}
if ((params.start == null) || (params.end == null)){
println("ERROR: --start/--end not given")
System.exit(0)
}
if (!("${params.start}" ==~ /\d+/ )){
println("ERROR: --start needs to be a number. Your Input: '${params.start}'")
System.exit(0)
}
if (!("${params.end}" ==~ /\d+/ )){
println("ERROR: --end needs to be a number. Your Input: '${params.end}'")
System.exit(0)
}
if(params.start.toInteger() >= params.end.toInteger()){
println("ERROR: start cannot be bigger/equal than end ")
System.exit(0)
}
if (!("${params.score_min}" ==~ /[-]?\d+/ )){
println("ERROR: --score_min needs to be a number. Your Input: '${params.score_min}'")
System.exit(0)
}
if (!("${params.score_max}" ==~ /\d+/ )){
println("ERROR: --score_max needs to be a number. Your Input: '${params.score_max}'")
System.exit(0)
}
if (("${params.check_res_maps}" != "0") && ("${params.check_res_maps}" != "1")){
println("ERROR: --check_res_maps is a boolean parameter. Enter 0 or 1!")
System.exit(0)
}
}
if ( "${params.safe_all_files}" != "0" && "${params.safe_all_files}" != "1"){
println("ERROR: --safe_all_files is a boolean parameter. Enter 0 or 1!")
System.exit(0)
}
if (params.mode == "HiC") {
if ( ! params.bin.toString().isInteger() ) {
println("ERROR: --bin has to be an Integer! Your input: " + params.bin )
System.exit(0)
}
}
if (params.mode != "plot"){
if(params.in == "" && params.bam == ""){
println("ERROR: Input/BAM path is empty")
System.exit(0)
} else {
if(params.in != ""){
if (!(file(params.in).exists())){
println("ERROR: Input Path does not exist!")
System.exit(0)
}
}
}
} else {
if(outpath == ""){
println("ERROR: Missing output path")
System.exit(0)
}
}
"""
echo check
"""
}
/*
* T2C Step 1
*Creates restriction maps needed for T2C analysis
*/
process create_restriction_maps_for_T2C {
publishDir "${outpath}/${params.pn}_restriction_maps/", mode: 'copy'
output:
file ("restriction_sites_${params.enzyme_a_name}.bed") into restriction_maps_1
file ("restriction_fragments_${params.enzyme_a_name}.bed") into restriction_maps_2
file ("restriction_sites.bed") into restriction_maps_3
file ("restriction_fragments.bed") into restriction_maps_4
file ("restriction_targets.bed") into restriction_maps_5
when:
params.mode == 'T2C' || params.mode == 'multiplot'
create_res_map == true
script:
if(params.enzyme_b_sequence == ""){
enzyme_b_option = ""
} else {
enzyme_b_option = "--sequence " + params.enzyme_b_sequence
}
"""
${path_python}/python ${path_bin}/bin/match_sequences.py \
--fasta ${path_genome} \
--bed "restriction_sites_${params.enzyme_a_name}.bed" \
--sequence ${params.enzyme_a_sequence}
${path_python}/python ${path_bin}/bin/target_fragments.py \
--input "restriction_sites_${params.enzyme_a_name}.bed" \
--output "restriction_fragments_${params.enzyme_a_name}.bed"
${path_python}/python ${path_bin}/bin/match_sequences.py \
--fasta ${path_genome} \
--bed restriction_sites.bed \
--sequence ${params.enzyme_a_sequence} \
${enzyme_b_option}
${path_python}/python ${path_bin}/bin/target_fragments.py \
--input restriction_sites.bed \
--output restriction_fragments.bed
grep ${params.enzyme_a_sequence} restriction_fragments.bed > restriction_targets.bed
"""
}
restriction_maps = restriction_maps_1.concat(restriction_maps_2, restriction_maps_3, restriction_maps_4, restriction_maps_5)
res_map.concat(restriction_maps).into {final_res_map_for_check; final_res_map; final_res_map_for_intersection; res_map_for_ifm}
process check_restriction_maps {
tag{params.enzyme_a_name}
input:
file (bed) from final_res_map_for_check
when:
params.check_res_maps == 1
script:
"""
head -5 $bed
"""
}
result.subscribe {println it}
// Decompresses fastq files
process decompress {
tag{fastqGz}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_fastq/", mode: 'copy', overwrite: 'false'
}
input:
file (fastqGz) from fastqGzFiles
output:
set basisname, fastqName ,file ("${fastqName}.fastq") into decompressedfastq, decompressedfastq_for_HiC_bwa,
decompressedfastq_for_HiC_bowtie2
script:
fastqName = fastqGz.simpleName
basisname = fastqName.replaceFirst("${params.sample_extension}","")
fastqBasic = fastqGz.name.lastIndexOf('.').with {it != -1 ? fastqGz.name.substring(it+1):''}
if(fastqBasic == 'gz')
"""
zcat -d $fastqGz > ${fastqName}.fastq
"""
else if (fastqBasic == 'fastq')
"""
echo $fastqGz
"""
else
"""
echo "ERROR: Invalid datatype / file ending"
"""
}
process create_bwa_index {
conda "$workflow.projectDir/envtoucan.yaml"
output:
file ('f') into index
file ('f') into index_hic_bwa
when:
(params.mode == "T2C" || (params.mode == "HiC" && params.aln == "bwa")) && create_bam == true
script:
genome_file = file (path_genome)
genome_name = genome_file.simpleName
amb = file("${path_genome}.amb")
ann = file("${path_genome}.ann")
bwt = file("${path_genome}.bwt")
pac = file("${path_genome}.pac")
sa = file("${path_genome}.sa")
if(amb.exists() && ann.exists() && bwt.exists() && pac.exists() && sa.exists())
"""
echo done > f
"""
else
"""
bwa index ${path_genome}
echo done > f
"""
}
// T2C Step 2
process bwa_aln {
//conda 'bioconda::bwa=0.7.15'
conda "$workflow.projectDir/envtoucan.yaml"
tag{fastq}
executor 'local'
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/sai/", mode: 'copy', pattern: '*.sai'
}
input:
set basisname, fastqName ,file (fastq) from decompressedfastq
file f from index
when:
params.mode == "T2C" && create_bam == true
output:
set basisname, file ('*.sai'), file (fastq) into sai_files
script:
"""
bwa aln \
${params.bwa_T2C_options} \
${path_genome} \
$fastq > ${fastqName}.sai
"""
}
// T2C Step 3
process bwa_sampe_to_create_sam {
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'copy'
}
conda "$workflow.projectDir/envtoucan.yaml"
tag{name}
input:
set name, sai, fastq from sai_files.groupTuple()
output:
set name, file ("${name}.sam") into samfiles
when:
params.mode == "T2C"
create_bam == true
script:
sortbasename = [sai[0].getBaseName(), sai[1].getBaseName()].sort()
if(sai[0].contains(sortbasename[0])){
sai_R1 = sai[0]
fastq_R1 = fastq[0]
sai_R2 = sai[1]
fastq_R2 = fastq[1]
} else {
sai_R1 = sai[1]
fastq_R1 = fastq[1]
sai_R2 = sai[0]
fastq_R2 = fastq[0]
}
"""
bwa sampe \
-A \
${path_genome} \
${sai_R1} ${sai_R2} \
${fastq_R2} ${fastq_R1} > ${name}.sam
"""
}
// T2C Step 4
process samtools_addreplacerg {
tag{sam}
conda "$workflow.projectDir/envtoucan.yaml"
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'copy'
}
input:
set name, file (sam) from samfiles
output:
set name, file ("${name}_rg.sam") into samfiles_read_groups
when:
params.mode == "T2C"
create_bam == true
script:
ds = "${path_working}/*.srt.bam"
"""
samtools addreplacerg \
-r "ID:${name}" \
-r "CN:${params.center_label}" \
-r "LB:${params.library_label}" \
-r "SM:${name}" \
-r "PL:${params.platform_label}" \
-r "DS:${ds}" \
-o ${name}_rg.sam \
${sam}
"""
}
// T2C Step 5
process samtools_sort {
executor 'local'
conda "$workflow.projectDir/envtoucan.yaml"
publishDir "${outpath}/${params.pn}_bam/", mode: 'copy'
tag{rgsam}
input:
set name, file (rgsam) from samfiles_read_groups
output:
set name, file ("${name}.bam") into bamfiles
when:
params.mode == "T2C"
create_bam == true
script:
"""
samtools sort \
${params.sort_options} \
-T ${name}_tmp_sort \
-o ${name}.bam \
${rgsam}
"""
}
bam_f.concat(bamfiles).into {finalbam; bamfiles_for_flagstats; workaround}
// T2C Step 6
process samtools_flagstat {
publishDir "${outpath}/${params.pn}_stats/", mode: 'copy'
conda "$workflow.projectDir/envtoucan.yaml"
tag{bam}
input:
set name, file (bam) from bamfiles_for_flagstats
output:
file ("${name}.flagstat.txt") into flagstats
when:
params.mode == "T2C"
script:
"""
samtools flagstat ${bam} > ${name}.flagstat.txt
"""
}
/*
* T2C Step 7
* index the BAM files
*/
process index_bamfiles {
tag{bam}
conda "$workflow.projectDir/envtoucan.yaml"
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bai/", mode: 'copy'
}
input:
set name, file (bam) from finalbam
output:
set name, file ("${name}.bam.bai") into bam_bai_files
when:
params.mode == "T2C"
script:
"""
samtools index ${bam} > ${name}.bam.bai
"""
}
bam_bai_files.combine(workaround, by: 0).set {bam_bai_files_final}
/*
* T2C Step 8
* get the reads overlapping the targets
*/
process overlapping_reads_targets {
tag{bam}
executor 'local'
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy'
}
input:
set name, file (bai), file (bam), file (res_target) from bam_bai_files_final.combine(final_res_map.filter{ it.simpleName == "restriction_targets"})
output:
set name, file ("${name}.target.bam") into target_bam_files
when:
params.mode == "T2C"
script:
"""
${path_python}/python ${path_bin}/bin/add_fragment_to_bam.py \
--input ${bam} \
--output ${name}.target.bam \
--bed ${res_target}
"""
}
/*
* T2C Step 9
* namesort the bam file in preparation for the matrix
*/
process namesorting_bam_files {
tag{target_bam}
conda "$workflow.projectDir/envtoucan.yaml"
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy'
}
input:
set name, file (target_bam) from target_bam_files
output:
set name, file ("${name}.namesort.bam") into namesort_bam_files
when:
params.mode == "T2C"
script:
"""
samtools sort \
-n \
-o ${name}.namesort.bam \
${target_bam}
"""
}
/*
* T2C step 10
* SAMtools flags to filter aligments
* 0x0001 p the read is paired in sequencing
* 0x0004 u the query sequence itself is unmapped
* 0x0008 U the mate is unmapped
* 0x0100 s the alignment is not primary
* 0x0800 S the alignment is supplementary
*/
process filter_aligments {
tag{namesort_bam}
conda "$workflow.projectDir/envtoucan.yaml"
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy'
}
input:
set name, file (namesort_bam) from namesort_bam_files
output:
set name , file ("${name}.filtered.bam") into filtered_bam_files
when:
params.mode == "T2C"
script:
"""
samtools view -h \
-f 0x0001 \
-F 0x0004 \
-F 0x0008 \
-F 0x0100 \
-F 0x0800 \
${namesort_bam} > ${name}.filtered.bam
"""
}
/*
* T2C step 11
* creating first ray matrix
*/
process create_matrix {
tag{filtered_bam}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy'
}
input:
set name , file (filtered_bam) from filtered_bam_files
output:
set name, file ("${name}.reads.bed") into reads_bed_files, reads_bed_files_for_stats
when:
params.mode == "T2C"
script:
"""
cat ${filtered_bam} | \
${path_python}/python ${path_bin}/bin/create_matrix.py -o ${name}.reads.bed
"""
}
/*
* T2C step 12
* finish matrix file by
* creating a paired-end bed file on single base-pair level
*/
process finish_raw_matrix {
tag{reads_bed}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy'
}
input:
set name, file (reads_bed) from reads_bed_files
output:
//set name_raw, file ("${name}.matrix.bed") into raw_matrix_bed_for_plots
set name, file ("${name}.matrix.bed") into raw_matrix_bed_to_reassign
set name, file ("${name}.matrix.bed") into raw_matrix_for_norm
when:
params.mode == "T2C"
script:
//name_raw = "${name}_raw"
"""
cat ${reads_bed} | \
sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n | \
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.matrix.bed
"""
}
/*
* T2C step 13
*
*/
process split_matrix_for_re_evaluation {
tag{matrix_bed}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy'
}
input:
set name, file (matrix_bed) from raw_matrix_bed_to_reassign
output:
set name, file ("${name}.loca.bed"), file ("${name}.locb.bed"), file (matrix_bed) into split_matrix
when:
params.mode == "T2C"
params.reassin == 1
script:
"""
awk -F "\t" '//{print \$1 "\t" \$2 "\t" \$3}' ${matrix_bed} > ${name}.loca.bed
awk -F "\t" '//{print \$4 "\t" \$5 "\t" \$6}' ${matrix_bed} > ${name}.locb.bed
"""
}
/*
* T2C step 14
* Intersect bed file with fragments
*/
process intersect_beds_to_fragments {
tag{name}
conda "$workflow.projectDir/envtoucan.yaml"
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy'
}
input:
set name, file (a_bed), file (b_bed), file (matrix), file (res_frag) from split_matrix.combine(final_res_map_for_intersection.filter{ it.simpleName == "restriction_fragments_${params.enzyme_a_name}"})
output:
set name, file ("${name}.frga.bed"), file ("${name}.frgb.bed"), file (matrix) into fragments_bed
when:
params.mode == "T2C"
params.reassin == 1
script:
"""
sed 's/[[:blank:]]*\$\$//p' ${a_bed} | bedtools intersect -a ${a_bed} -b ${res_frag} -wao > ${name}.frga.bed
sed 's/[[:blank:]]*\$\$//p' ${b_bed} | bedtools intersect -a ${b_bed} -b ${res_frag} -wao > ${name}.frgb.bed
"""
}
/*
* T2C step 15
* creating pre normalized matrix
*/
process reassign_matrix {
tag{matrix}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy'
}
input:
set name, file (frga), file (frgb), file (matrix) from fragments_bed
output:
set name, file ("${name}.temp.bed") into temp_bed_file
when:
params.mode == "T2C"
params.reassin == 1
script:
"""
${path_python}/python ${path_bin}/bin/reassign_matrix.py \
-m ${matrix} \
-a ${frga} \
-b ${frgb} > ${name}.temp.bed
"""
}
/*
* T2C step 16
* Aggregate Matrix
*/
process finalize_matrix {
tag{temp_bed}
publishDir "${outpath}/${params.pn}_Interactions/01_all_interactions_bed_raw/", mode: 'copy', pattern: "${name_raw}.reassigned.bed"
input:
set name, file (temp_bed) from temp_bed_file
output:
set name, file ("${name}.reassigned.bed") into reassigned_matrix
when:
params.mode == "T2C"
params.reassin == 1
script:
"""
cat ${temp_bed} | \
sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n |\
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.reassigned.bed
"""
}
if (params.reassin == 1){
matrix = reassigned_matrix
} else {
matrix = raw_matrix_for_norm
}
/*
* T2C step 17
* Normalize Matrix
*/
process normalize_matrix {
tag{matrix}
publishDir "${outpath}/${params.pn}_Interactions/02_all_interactions_bed_norm/", mode: 'copy'
input:
set name, file (matrix) from matrix
output:
set name, file ("${name}.normalized.bed") into normalized_matrix, multiplot_bed_2, filtered_interaction_bed_for_multiplot_2
when:
params.mode == "T2C"
script:
"""
${path_R}/Rscript ${path_bin}/bin/normalize_paired_bed.R --bedfile ${matrix} \
--outfile ${name}.normalized.bed \
--library ${path_bin}/RLIB/ \
--method ${params.norm_method}
"""
}
to_plot = matrix_to_plot.concat(normalized_matrix)
/*
* T2C step 18
* Creating Interaction matrix plot
*/
process plotting {
tag{name}
if(params.mode == "plot" ){
publishDir "${outpath}/plot/", mode: 'copy'
} else {
publishDir "${outpath}/${params.pn}_plots/interaction_plot/", mode: 'copy'
}
input:
set name, file (matrix) from to_plot
output:
file ("*.pdf")
when:
params.mode == "T2C" || params.mode == "plot"
script:
"""
${path_R}/Rscript ${path_bin}/bin/plot_cis_matrix.R \
--bedfile ${matrix} \
--outfile ${name}.cis.pdf \
--library ${path_bin}/RLIB/ \
--chromosome ${params.chr} \
--start ${params.start} \
--end ${params.end} \
--colour-minimum ${params.score_min} \
--colour-maximum ${params.score_max} \
${params.plot_options_T2C}
"""
}
filtered_interaction_bed_merged = filtered_interaction_bed_for_multiplot_1.concat(filtered_interaction_bed_for_multiplot_2)
filtered_interaction_bed_final_merge = interaction_uropa.concat(filtered_interaction_bed_merged)
process create_interaction_table {
tag{name}
publishDir "${outpath}/${params.pn}_Interactions/03_target_region_interactions_bed/", mode: 'copy'
input:
set name, file (norm) from filtered_interaction_bed_final_merge
output:
set name, file ("${name}.filtered.interactions.bed") into interaction_bed_for_merge, to_ifmatrix, for_uropa_1, for_uropa_2
when:
params.mode == "T2C" || params.mode == "uropa" || params.mode == "multiplot"
script:
"""
${path_R}/Rscript ${path_bin}/bin/create_interaction_table.R \
${norm} ${name}.filtered.interactions.bed \
${params.chr} ${params.start} ${params.end} ${params.int_score} \
$workflow.workDir/${name}.filtered.interactions.bed
"""
}
//interaction_empty.concat(interaction_check).into {for_uropa1; for_uropa2}
process create_stats {
tag{name}
publishDir "${outpath}/${params.pn}_stats/", mode: 'copy'
input:
set name, file (reads) from reads_bed_files_for_stats
output:
file ("${name}_stats.pdf")
when:
params.mode == "T2C"
script:
"""
${path_R}/Rscript ${path_bin}/bin/create_T2C_stats.R \
${reads} ${name}_stats.pdf \
${params.chr} ${params.start} ${params.end} ${name}
"""
}
process create_uropa_config_1 {
tag{name}
input:
set name, file (int_bed) from for_uropa_1
output:
set name, file ("${name}_uropa_config") into uropa_config_1
when:
params.mode == "T2C" || params.mode == "uropa"
script:
config_parameters = [params.uropa_feature,params.uropa_anchor,params.uropa_strand,params.uropa_direction,
params.uropa_filter_attr,params.uropa_attr_value,params.uropa_show_attr]
conf_list = config_parameters.collect { par ->
return "\\\"" + par.replace(",","\\\",\\\"") + "\\\""
}
conf_list_final = conf_list.collect {p ->
if(p.split(",").size() > 1) {
return "[" + p + "]"
}
return p
}
"""
echo "{ \
\\"queries\\" : [ \
{ \
\\"feature\\" : ${conf_list_final[0]}, \
\\"feature.anchor\\" : ${conf_list_final[1]}, \
\\"distance\\" : [ \
${params.uropa_dist_1}, \
${params.uropa_dist_2} \
], \
\\"strand\\" : ${conf_list_final[2]}, \
\\"direction\\" : ${conf_list_final[3]}, \
\\"internals\\" : \\"True\\", \
\\"filter.attribute\\" : ${conf_list_final[4]}, \
\\"attribute.value\\" : ${conf_list_final[5]}, \
\\"show.attributes\\" : ${conf_list_final[6]} \
} \
], \
\\"priority\\" : \\"FALSE\\", \
\\"gtf\\" : \\"${path_gtf}\\", \
\\"bed\\" : \\"$workflow.workDir/${name}.filtered.interactions.bed\\" \
}" > ${name}_uropa_config
"""
}
process run_uropa_1 {
tag{name}
//conda "$workflow.projectDir/envtoucan.yaml"
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy'
}
input:
set name, file (config) from uropa_config_1
output:
set name, file ("${name}_allhits.txt"), file ("${name}_finalhits.txt") into uropa_1
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
uropa -i ${config} -d -p ${name} -t ${params.uropa_threads}
"""
}
process swap_fragments {
tag{name}
input:
set name, file (int_bed) from for_uropa_2
output:
set name, file ("${name}.swap.interaction.bed") into swaped_interactions_bed
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
${path_R}/Rscript ${path_bin}/bin/swap_fragments.R ${int_bed} ${name}.swap.interaction.bed $workflow.workDir/${name}.swap.interaction.bed
"""
}
process create_uropa_config_2 {
tag{name}
input:
set name, file (int_bed) from swaped_interactions_bed
output:
set name, file ("${name}_uropa_config_2") into uropa_config_2
when:
params.mode == "T2C" || params.mode == "uropa"
script:
config_parameters = [params.uropa_feature,params.uropa_anchor,params.uropa_strand,params.uropa_direction,
params.uropa_filter_attr,params.uropa_attr_value,params.uropa_show_attr]
conf_list = config_parameters.collect { par ->
return "\\\"" + par.replace(",","\\\",\\\"") + "\\\""
}
conf_list_final = conf_list.collect {p ->
if(p.split(",").size() > 1) {
return "[" + p + "]"
}
return p
}
"""
echo "{ \
\\"queries\\" : [ \
{ \
\\"feature\\" : ${conf_list_final[0]}, \
\\"feature.anchor\\" : ${conf_list_final[1]}, \
\\"distance\\" : [ \
${params.uropa_dist_1}, \
${params.uropa_dist_2} \
], \
\\"strand\\" : ${conf_list_final[2]}, \
\\"direction\\" : ${conf_list_final[3]}, \
\\"internals\\" : \\"True\\", \
\\"filter.attribute\\" : ${conf_list_final[4]}, \
\\"attribute.value\\" : ${conf_list_final[5]}, \
\\"show.attributes\\" : ${conf_list_final[6]} \
} \
], \
\\"priority\\" : \\"FALSE\\", \
\\"gtf\\" : \\"${path_gtf}\\", \
\\"bed\\" : \\"$workflow.workDir/${name}.filtered.interactions.bed\\" \
}" > ${name}_uropa_config_2
"""
}
process run_uropa_2 {
tag{name}
//conda "$workflow.projectDir/envtoucan.yaml"
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy'
}
input:
set name, file (config) from uropa_config_2
output:
set name, file ("${name}_2_allhits.txt"), file ("${name}_2_finalhits.txt") into uropa_2
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
uropa -i ${config} -d -p ${name}_2 -t ${params.uropa_threads}
"""
}
tmp = uropa_1.combine(uropa_2, by: 0)
final_uropa_set = tmp.combine(interaction_bed_for_merge, by: 0)
process uropa_merge {
tag{name}
publishDir "${outpath}/${params.pn}_Interactions/04_final_interactions_annotated/", mode: 'copy'
input:
set name, file (allhits_1), file (finalhits_1), file (allhits_2), file (finalhits_2), file (int_bed) from final_uropa_set
output:
set name, file ("${name}.finalhits.merged.txt") into merged_uropa
file ("${name}.finalhits.merged.txt") into for_xlsx
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
${path_R}/Rscript ${path_bin}/bin/merge_uropa_interactions.R ${int_bed} ${finalhits_1} ${finalhits_2} ${name}.finalhits.merged.txt
"""
}
xlsx_list = for_xlsx.toSortedList()
process xlsx_writer {
publishDir "${outpath}/", mode: 'copy'
input:
val f from xlsx_list
output:
file ("${params.pn}.xlsx")
when:
params.mode == "T2C" || params.mode == "uropa"
script:
file_list = f.sort {it.getBaseName()}
file_list_str = file_list.toListString().replace(', ',',') - "[" - "]"
"""
${path_python}/python ${path_bin}/bin/julianos_xlsxgen_vTOuCAN.py ${path_bin}/bin/julianos_config.json --out ${params.pn} --flist ${file_list_str}
"""
}
process create_viewpoint {
tag{name}
publishDir "${outpath}/${params.pn}_Interactions/05_interactions_viewpoint_annotated/", mode: 'copy'
input:
set name, file (merged_finalhits) from merged_uropa
output:
file ("${name}.interactions.viewpoint.txt")
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
${path_R}/Rscript ${path_bin}/bin/create_interaction_viewpoint.R ${merged_finalhits} ${name}.interactions.viewpoint.txt
"""
}
//to_ifmatrix = interaction_multiplot.concat(interaction_bed_TAD)
process convert_to_IFMatrix {
tag{name}
echo true
publishDir "${outpath}/${params.pn}_additional_files/IFMatrix/", mode: 'copy'
input:
set name, file (int_bed), file (res) from to_ifmatrix.combine(res_map_for_ifm.filter{ it.simpleName == "restriction_targets"})
output:
set name, file ("${name}.ifm.txt") into ifmatrix
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
"""
${path_R}/Rscript ${path_bin}/bin/convert_to_IFMatrix.R ${int_bed} ${res} ${name}.ifm.txt ${params.chr} ${params.start} ${params.end}
"""
}
process run_robusTAD {
tag{name}
publishDir "${outpath}/${params.pn}_TAD/", mode: 'copy'
input:
set name, file (tfm) from ifmatrix
output:
set name, file ("TADBoundaryCalls_*.txt") into tadbc
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
"""
${path_R}/Rscript ${path_bin}/bin/RobusTAD.R -i ${tfm} --header FALSE -n norm --minWin 1 --maxWin 500
"""
}
//${path_R}/Rscript ${path_bin}/bin/RobusTAD.R -i ${tfm} --header FALSE -n norm --minWin 1 --maxWin 500
process x_axis_scaling {
tag{name}
publishDir "${outpath}/${params.pn}_TAD/scaled/", mode: 'copy'
input:
set name, file (tad) from tadbc
output:
set name, file ("${bn}_scaled.txt") into tadx, multiplot_TAD
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
bn = tad.baseName
"""
${path_R}/Rscript ${path_bin}/bin/xaxis_scaling.R ${tad} ${params.start} ${params.end} ${bn}_scaled.txt
"""
}
process plot_TAD_graph {
tag{name}
publishDir "${outpath}/${params.pn}_plots/TAD/", mode: 'copy'
input:
set name, file (tad) from tadx
output:
file ("${name}.TAD.pdf")
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
"""
${path_R}/Rscript ${path_bin}/bin/plotTAD.R ${tad} ${name}.TAD.pdf
"""
}
multiplot_bed_1.concat(multiplot_bed_2).set {multiplot_bed_merged}
multiplot_TAD.join(multiplot_bed_merged).set {multi}
process multiplot {
tag{name}
publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'copy'
input:
set name, file (tad), file (bed) from multi
output:
file ("*.png") into for_pptx
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
"""
${path_R}/Rscript ${path_bin}/bin/plot_multi.R \
--bedfile ${bed} \
--outfile ${name}.multi.png \
--library ${path_bin}/RLIB/ \
--chromosome ${params.chr} \
--start ${params.start} \
--end ${params.end} \
--colour-minimum ${params.score_min} \
--colour-maximum ${params.score_max} \
--tad ${tad} \
--organism ${params.organism} \
${params.plot_options_T2C}
"""
}
for_pptx_list = for_pptx.toSortedList()
process pptx_writer {
publishDir "${outpath}/", mode: 'copy'
input:
val pptxList from for_pptx_list
output:
file ("*.pptx")
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
file_list = pptxList.sort {it.getBaseName()}
file_list_str = file_list.toListString().replace(', ',',') - "[" - "]"
"""
${path_python}/python ${path_bin}/bin/pptx_writer_vTOuCAN.py ${path_bin}/bin/pptx_config.json --output ${params.pn} --flist ${file_list_str}
"""
}
//================================= HiC ========================================
process alignment_with_bwa {
tag{fastq}
publishDir "${outpath}/${params.pn}_bam/", mode: 'copy'
conda "$workflow.projectDir/envtoucan.yaml"
input:
set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bwa
file f from index_hic_bwa
output:
set basisname, file ("${fastqName}.bam") into bwa_alignment
when:
params.mode == "HiC" && params.aln == "bwa" && create_bam == true
script:
"""
bwa mem \
${params.bwa_HiC_options} \
${path_genome} \
$fastq | samtools view -Shb > ${fastqName}.bam
"""
}
process bowtie2_index {
publishDir "${path_genome}", mode: 'copy'
conda "$workflow.projectDir/envtoucan.yaml"
when:
params.mode == "HiC" && params.aln == "bowtie2" && create_bam == true
output:
val ("${genome_name}") into index_bt2
script:
genome_file = file (path_genome)
genome_name = genome_file.name
indx = file ("${path_genome}").getBaseName()
path = file ("${path_genome}").getParent()
i = "${path}/bowtie2/${indx}"
bt1 = file("${i}.1.bt2")
bt2 = file("${i}.2.bt2")
bt3 = file("${i}.3.bt2")
bt4 = file("${i}.4.bt2")
rev1 = file("${i}.rev.1.bt2")
rev2 = file("${i}.rev.2.bt2")
if(bt1.exists() && bt2.exists() && bt3.exists() && bt4.exists() && rev1.exists() && rev2.exists())
"""
echo "Skiped creating bt2 index"
"""
else
"""
bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads}
"""
}
process alignment_with_bowtie2 {
tag{fastq}
publishDir "${outpath}/${params.pn}_bam/", mode: 'copy'
conda "$workflow.projectDir/envtoucan.yaml"
input:
set basisname, fastqName ,file (fastq), bt2_idx from decompressedfastq_for_HiC_bowtie2.combine(index_bt2)
output:
set basisname, file ("${fastqName}.sam") into bowtie2_alignment
when:
params.mode == "HiC" && params.aln == "bowtie2" && create_bam == true
script:
indx = file ("${path_genome}").getBaseName()
path = file ("${path_genome}").getParent()
i = "${path}/bowtie2/${indx}"
"""
bowtie2 -x ${i} -U ${fastq} --very-sensitive --reorder -p ${params.threads_bowtie2} > ${fastqName}.sam
"""
}
bam2.concat(bwa_alignment, bowtie2_alignment).set {hic_alignment}
process create_HiC_matrix {
tag{name}
publishDir "${outpath}/${params.pn}_h5_matrix/", mode: 'copy'
input:
set name, bams from hic_alignment.groupTuple()
output:
set name, file("${name}_matrix.h5") into hic_matrix, hic_matrix_for_diagnostic
file ("${name}.bam")
when:
params.mode == "HiC"
script:
sortbasename = [bams[0].getBaseName(), bams[1].getBaseName()].sort()
if(bams[0].contains(sortbasename[0])){
bam_r1 = bams[0]
bam_r2 = bams[1]
} else {
bam_r1 = bams[1]
bam_r2 = bams[0]
}
"""
hicBuildMatrix --samFiles ${bam_r1} ${bam_r2} \
--QCfolder ${outpath}/${params.pn}_QC_${name}/ \
--binSize ${params.bin} \
-b ${name}.bam \
--inputBufferSize ${params.inputBufferSize} \
--restrictionSequence ${params.enzyme_a_sequence} \
--threads ${params.hicBuildMatrix_threads}\
-o ${name}_matrix.h5
"""
}
process diagnostic_plot_of_HiC_Matrix {
publishDir "${outpath}/${params.pn}_diagnostic_plots/", mode: 'copy'
tag{name}
input:
set name, file (matrix) from hic_matrix_for_diagnostic
output:
set name, file ("${name}.png")
when:
params.mode == "HiC"
script:
"""
hicCorrectMatrix diagnostic_plot -m ${matrix} -o ${name}.png
"""
}
hic_matrix_final = hic_matrix_f.concat(hic_matrix)
process correct_matrix {
tag{name}
publishDir "${outpath}/${params.pn}_h5_matrix/corrected/", mode: 'copy'
input:
set name, file (matrix) from hic_matrix_final
output:
set name, file("${name}_corrected.h5") into hic_matrix_corrected
when:
params.mode == "HiC" || params.mode == "h5"
script:
"""
hicCorrectMatrix correct -m ${matrix} --filterThreshold \
${params.hic_thresh_min} ${params.hic_thresh_max} -o ${name}_corrected.h5
"""
}
process plot_matrix {
tag{name}
publishDir "${outpath}/${params.pn}_plot/", mode: 'copy'
input:
set name, file (corrected_matrix) from hic_matrix_corrected
output:
file ("${name}.hic.matrix.png")
when:
params.mode == "HiC"
script:
if ((params.chr != "") && (params.start != 0) && p(arams.end != 0))
"""
hicPlotMatrix -m ${corrected_matrix} -o ${name}.hic.matrix.png \
--dpi 400 --region ${params.chr}:${params.start}-${params.end}
"""
else
"""
hicPlotMatrix -m ${corrected_matrix} -o ${name}.hic.matrix.png \
--dpi 400
"""
}