Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
TOuCAN/TOuCAN.nf
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1715 lines (1305 sloc)
42.7 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env nextflow | |
params.mode = "help" | |
params.in = "" | |
params.out = "" | |
params.path_matrix = "" //only for --mode plot | |
params.bam = "" | |
params.safe_all_files = 0 | |
params.check_res_maps = 0 | |
params.organism = "mm10" | |
params.pn = "Project" | |
params.reassin = 1 | |
// parameter for T2C plots | |
params.chr = "" | |
params.start = 0 | |
params.end = 0 | |
params.score_min = 0 | |
params.score_max = 0 | |
params.int_score = 50 | |
// parameter for hic | |
params.aln = "bwa" | |
params.bin = 100000 | |
params.hic_thresh_min = -1.5 | |
params.hic_thresh_max = 5 | |
params.inputBufferSize = 400000 | |
params.bt2_index_threads = 4 | |
if(params.mode == "help" || params.mode == "h"){ | |
log.info """ | |
========================================== | |
`| | | |
-"- __ | ' | |
/ ,-| ```'-, || ` | |
/ /@)|_ `-, | | |
/ ( ~ \\_```""-- ,_', : .___________. ______ __ __ ______ ___ .__ __. | |
_ ' \\ /```''''""`^ | | / __ \\ | | | | / | / \\ | \\ | | | |
/ | / `| : `---| |----`| | | | | | | | | ,----' / ^ \\ | \\| | | |
/ \\/ | | | | | | | | | | | | | | / /_\\ \\ | . ` | | |
/ __/ | | | | | | `--' | | `--' | | `----./ _____ \\ | |\\ | | |
| __/ / | | | |__| \\______/ \\______/ \\______/__/ \\__\\ |__| \\__| | |
"- _ | _/ _/=_// || : | |
' `~~`~^| /=/-_/_/`~~`~~~`~~`~^~` | |
`> ' \\ ~/_/_ /" ` ` ' | Targeted chrOmatin Capture ANalysis | |
' ,'~^~`^| |~^`^~~`~~~^~~~^~`; ' Version 0.1 | |
-' | | | \\ ` : ` | |
| :| | : | | |
jgs |: | || ' | |
| |/ | | : | |
|_/ | ' | |
========================================== | |
Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi] [options] | |
--mode help, h - For showing this help message | |
--mode plot - Plot data [currently only T2C plots] | |
parameters: | |
--path_matrix [PATH] - Path to directory with *.normalized.bed files. | |
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region. | |
--start [INT] - Start of target region. | |
--end [INT] - End of target region. | |
--score_min [INT] - Score range: minimum. [default: 0] | |
--score_max [INT] - Score range: maximium. [default: autoscale] | |
--pn [STRING] - Name of the Project [default: 'Project'] | |
--mode T2C - Full T2C analysis | |
parameters: | |
--in [PATH] - Path to directory with fastq / fastq.gz files. | |
--bam [PATH] - Path to directory with bam files. [if given --in [PATH] is ignored] | |
--out [PATH] - Path to output directory. | |
--safe_all_files [0|1] - If 1 safes all temporary files into "OUTPUT/02_analysis/". [default: 0] | |
--check_res_maps [0|1] - If 1 prints first 5 lines of every file from restriction maps. [default: 0] | |
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region. | |
--start [INT] - Start of target region. | |
--end [INT] - End of target region. | |
--score_min [INT] - Score range: minimum. [default: 0] | |
--score_max [INT] - Score range: maximium. [default: autoscale] | |
--pn [STRING] - Name of the project [default: 'Project'] | |
--organsim [mm10,mm9,hg19] - Type of the genome | |
--mode uropa - Uropa annoation [T2C] | |
parameters: | |
--in [PATH] - Path to directory with *.normalized.bed | |
--out [PATH] - Path to output directory. | |
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region. | |
--start [INT] - Start of target region. | |
--end [INT] - End of target region. | |
--pn [STRING] - Name of the Project [default: 'Project'] | |
--mode multiplot - Creating a plot with interaction map, TAD graph and gene annotation | |
parameters: | |
--in [PATH] - Path to directory with *.normalized.bed | |
--out [PATH] - Path to output directory. | |
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region. | |
--start [INT] - Start of target region. | |
--end [INT] - End of target region. | |
--score_min [INT] - Score range: minimum. [default: 0] | |
--score_max [INT] - Score range: maximium. [default: autoscale] | |
--pn [STRING] - Name of the Project [default: 'Project'] | |
--organsim [mm10,mm9,hg19] - Type of the genome | |
--mode HiC - Full HiC analysis | |
parameters: | |
--in [PATH] - Path to directory with fastq / fastq.gz files. | |
--bam [PATH] - Path to directory with bam files. Two bam files (forward + reversed) are required per sample. [if given --in [PATH] is ignored] | |
--out [PATH] - Path to output directory. | |
--aln [bwa|bowtie2] - Choose alignment tool. [default: bwa] | |
--bin [INT] - Binsize [default: 10000] | |
--hic_thresh_min [(-)INT] - Threshold filter miniumum [default: -1.5] | |
--hic_thresh_max [INT] - Threshold filter maximum [default: 5] | |
--inputBufferSize [INT] - Buffersize for creating hic matrix [default: 400000] | |
Skip Aligment -> BAM files as Input: | |
The BAM files need to be from this Pipeline with follwing | |
file extension: "[NAME].(normalized|matrix).bam" ! | |
Skip creating restritction maps: | |
After creating the restriction maps write their path into the config file to skip | |
creating the restrction maps again. [path_T2C_restriction_maps] | |
""" | |
System.exit(0) | |
} | |
log.info """ | |
======================================================================================================================= | |
`| | | |
-"- __ | ' | |
/ ,-| ```'-, || ` | |
/ /@)|_ `-, | | |
/ ( ~ \\_```""-- ,_', : .___________. ______ __ __ ______ ___ .__ __. | |
_ ' \\ /```''''""`^ | | / __ \\ | | | | / | / \\ | \\ | | | |
/ | / `| : `---| |----`| | | | | | | | | ,----' / ^ \\ | \\| | | |
/ \\/ | | | | | | | | | | | | | | / /_\\ \\ | . ` | | |
/ __/ | | | | | | `--' | | `--' | | `----./ _____ \\ | |\\ | | |
| __/ / | | | |__| \\______/ \\______/ \\______/__/ \\__\\ |__| \\__| | |
"- _ | _/ _/=_// || : | |
' `~~`~^| /=/-_/_/`~~`~~~`~~`~^~` | |
`> ' \\ ~/_/_ /" ` ` ' | Targeted chrOmatin Capture ANalysis | |
' ,'~^~`^| |~^`^~~`~~~^~~~^~`; ' Version 0.1 | |
-' | | | \\ ` : ` | |
| :| | : | | |
jgs |: | || ' | |
| |/ | | : | |
|_/ | |
| ' | |
======================================================================================================================= | |
Mode: ${params.mode} | |
Project: ${params.pn} | |
Enzyme A: ${params.enzyme_a_name} -> ${params.enzyme_a_sequence} | |
Enzyme B: ${params.enzyme_b_name} -> ${params.enzyme_b_sequence} | |
Target Region | |
Chromosome: ${params.chr} | |
Start: ${params.start} | |
End: ${params.end} | |
Path to bin: ${path_bin} | |
Path to genome: ${path_genome} | |
Path to restriction maps: ${path_T2C_restriction_maps} | |
""" | |
if ( "${params.bam}" != "" ){ | |
log.info""" | |
Path to bam files: ${params.bam} | |
""" | |
} | |
log.info """ | |
Minor fixed parameters for BWA and SAMtools | |
BWA options: ${params.bwa_T2C_options} | |
samtools sort options: ${params.sort_options} | |
Library Label: ${params.library_label} | |
Platform Label: ${params.platform_label} | |
Center Label: ${params.center_label} | |
======================================================================================================================= | |
""" | |
outpath = params.out?.endsWith('/') ? params.out.substring( 0, params.out.length() -1 ) : params.out | |
filtered_interaction_bed_for_multiplot_1 = Channel.empty() | |
multiplot_bed_1 = Channel.empty() | |
matrix_to_plot = Channel.empty() | |
res_map = Channel.empty() | |
bam_f = Channel.empty() | |
bam2 = Channel.empty() | |
interaction_uropa = Channel.empty() | |
fastqGzFiles = Channel.empty() | |
hic_matrix_f = Channel.empty() | |
if("${params.path_matrix}" != "" && params.mode == "plot"){ | |
matrix_to_plot = Channel | |
.fromPath("${params.path_matrix}/*.{normalized,matrix}.bed") | |
.map {it -> [it.simpleName, it]} | |
} | |
if(params.mode == "uropa"){ | |
interaction_uropa = Channel | |
.fromPath("${params.in}/*.normalized.bed") | |
.map {it -> [it.simpleName, it]} | |
} | |
if(params.mode == "multiplot"){ | |
multiplot_bed_1 = Channel | |
.fromPath("${params.in}/*.normalized.bed") | |
.map {it -> [it.simpleName, it]} | |
filtered_interaction_bed_for_multiplot_1 = Channel | |
.fromPath("${params.in}/*.normalized.bed") | |
.map {it -> [it.simpleName, it]} | |
} | |
if (params.mode != "plot"){ | |
if ( "${path_T2C_restriction_maps}" != "none"){ | |
if (! file ("${path_T2C_restriction_maps}/restriction_targets.bed").exists()){ | |
println("Warning: restriction_target.bed does not exists on given path (path_T2C_restriction_maps in configuration file) -> starting to create restriction maps.") | |
create_res_map = true | |
} else { | |
res_map = Channel.fromPath("${path_T2C_restriction_maps}/*.bed") | |
create_res_map = false | |
} | |
} else { | |
create_res_map = true | |
} | |
if ( "${params.bam}" != "" ){ | |
Channel | |
.fromPath("${params.bam}/*.bam") | |
.map {it -> [it.simpleName, it]} | |
.set {bam_f} | |
create_bam = false | |
if("${params.mode}" == "HiC"){ | |
Channel | |
.fromPath("${params.bam}/*.bam") | |
.map {it -> [it.simpleName.replaceAll(params.sample_extension, ""), it]} | |
.set {bam2} | |
create_bam = false | |
} | |
} else if ("${params.mode}" == "HiC" & "${params.mode}" == "h5") { | |
hic_matrix_f = Channel | |
.fromPath("${params.in}/*.h5") | |
.map {it -> [it.simpleName, it]} | |
}else { | |
create_bam = true | |
fastqGzFiles = Channel.fromPath("${params.in}/*.fastq*") | |
} | |
} | |
modeList = ["T2C", "HiC", "help", "h", "plot", "uropa", "multiplot", "h5"] | |
alnList = ["bowtie2", "bwa"] | |
organismList = ["mm10", "mm9", "hg19"] | |
process check_parameters { | |
script: | |
// check if chr input is correct | |
if (!(params.mode in modeList )){ | |
println("ERROR: Mode not existing.\n\nList of all modes:\n\t-T2C\n\t-HiC\n\t-help\n\t-h\n\t-plot\n\nYour Input: '${params.mode}'") | |
System.exit(0) | |
} | |
if (!(params.organism in organismList )){ | |
println("ERROR: Organism not supported.\n\nList of all supported organisms:\n\t-mm10\n\t-mm9\n\t-hg19\nYour Input: '${params.mode}'") | |
System.exit(0) | |
} | |
if (!(params.aln in alnList ) && params.mode == "HiC"){ | |
println("ERROR: aln not existing.\n\nList of all supported alignment tools:\n\t-bwa\n\t-bowtie2\n\t-histat\n\nYour Input: '${params.mode}'") | |
System.exit(0) | |
} | |
if (params.mode == "T2C"){ | |
if (!("${params.chr}" ==~ /chr([1-9]|1[0-9]|X|Y)/ ) && (params.mode != "multiplot") && (params.mode != "uropa")){ | |
println("ERROR: No or false chromosome parameter given.\nCorrect example: 'chr4'.\nYour Input: '${params.chr}'") | |
System.exit(0) | |
} | |
if ((params.start == null) || (params.end == null)){ | |
println("ERROR: --start/--end not given") | |
System.exit(0) | |
} | |
if (!("${params.start}" ==~ /\d+/ )){ | |
println("ERROR: --start needs to be a number. Your Input: '${params.start}'") | |
System.exit(0) | |
} | |
if (!("${params.end}" ==~ /\d+/ )){ | |
println("ERROR: --end needs to be a number. Your Input: '${params.end}'") | |
System.exit(0) | |
} | |
if(params.start.toInteger() >= params.end.toInteger()){ | |
println("ERROR: start cannot be bigger/equal than end ") | |
System.exit(0) | |
} | |
if (!("${params.score_min}" ==~ /[-]?\d+/ )){ | |
println("ERROR: --score_min needs to be a number. Your Input: '${params.score_min}'") | |
System.exit(0) | |
} | |
if (!("${params.score_max}" ==~ /\d+/ )){ | |
println("ERROR: --score_max needs to be a number. Your Input: '${params.score_max}'") | |
System.exit(0) | |
} | |
if (("${params.check_res_maps}" != "0") && ("${params.check_res_maps}" != "1")){ | |
println("ERROR: --check_res_maps is a boolean parameter. Enter 0 or 1!") | |
System.exit(0) | |
} | |
} | |
if ( "${params.safe_all_files}" != "0" && "${params.safe_all_files}" != "1"){ | |
println("ERROR: --safe_all_files is a boolean parameter. Enter 0 or 1!") | |
System.exit(0) | |
} | |
if (params.mode == "HiC") { | |
if ( ! params.bin.toString().isInteger() ) { | |
println("ERROR: --bin has to be an Integer! Your input: " + params.bin ) | |
System.exit(0) | |
} | |
} | |
if (params.mode != "plot"){ | |
if(params.in == "" && params.bam == ""){ | |
println("ERROR: Input/BAM path is empty") | |
System.exit(0) | |
} else { | |
if(params.in != ""){ | |
if (!(file(params.in).exists())){ | |
println("ERROR: Input Path does not exist!") | |
System.exit(0) | |
} | |
} | |
} | |
} else { | |
if(outpath == ""){ | |
println("ERROR: Missing output path") | |
System.exit(0) | |
} | |
} | |
""" | |
echo check | |
""" | |
} | |
/* | |
* T2C Step 1 | |
*Creates restriction maps needed for T2C analysis | |
*/ | |
process create_restriction_maps_for_T2C { | |
publishDir "${outpath}/${params.pn}_restriction_maps/", mode: 'copy' | |
output: | |
file ("restriction_sites_${params.enzyme_a_name}.bed") into restriction_maps_1 | |
file ("restriction_fragments_${params.enzyme_a_name}.bed") into restriction_maps_2 | |
file ("restriction_sites.bed") into restriction_maps_3 | |
file ("restriction_fragments.bed") into restriction_maps_4 | |
file ("restriction_targets.bed") into restriction_maps_5 | |
when: | |
params.mode == 'T2C' || params.mode == 'multiplot' | |
create_res_map == true | |
script: | |
if(params.enzyme_b_sequence == ""){ | |
enzyme_b_option = "" | |
} else { | |
enzyme_b_option = "--sequence " + params.enzyme_b_sequence | |
} | |
""" | |
${path_python}/python ${path_bin}/bin/match_sequences.py \ | |
--fasta ${path_genome} \ | |
--bed "restriction_sites_${params.enzyme_a_name}.bed" \ | |
--sequence ${params.enzyme_a_sequence} | |
${path_python}/python ${path_bin}/bin/target_fragments.py \ | |
--input "restriction_sites_${params.enzyme_a_name}.bed" \ | |
--output "restriction_fragments_${params.enzyme_a_name}.bed" | |
${path_python}/python ${path_bin}/bin/match_sequences.py \ | |
--fasta ${path_genome} \ | |
--bed restriction_sites.bed \ | |
--sequence ${params.enzyme_a_sequence} \ | |
${enzyme_b_option} | |
${path_python}/python ${path_bin}/bin/target_fragments.py \ | |
--input restriction_sites.bed \ | |
--output restriction_fragments.bed | |
grep ${params.enzyme_a_sequence} restriction_fragments.bed > restriction_targets.bed | |
""" | |
} | |
restriction_maps = restriction_maps_1.concat(restriction_maps_2, restriction_maps_3, restriction_maps_4, restriction_maps_5) | |
res_map.concat(restriction_maps).into {final_res_map_for_check; final_res_map; final_res_map_for_intersection; res_map_for_ifm} | |
process check_restriction_maps { | |
tag{params.enzyme_a_name} | |
input: | |
file (bed) from final_res_map_for_check | |
when: | |
params.check_res_maps == 1 | |
script: | |
""" | |
head -5 $bed | |
""" | |
} | |
result.subscribe {println it} | |
// Decompresses fastq files | |
process decompress { | |
tag{fastqGz} | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_fastq/", mode: 'copy', overwrite: 'false' | |
} | |
input: | |
file (fastqGz) from fastqGzFiles | |
output: | |
set basisname, fastqName ,file ("${fastqName}.fastq") into decompressedfastq, decompressedfastq_for_HiC_bwa, | |
decompressedfastq_for_HiC_bowtie2 | |
script: | |
fastqName = fastqGz.simpleName | |
basisname = fastqName.replaceFirst("${params.sample_extension}","") | |
fastqBasic = fastqGz.name.lastIndexOf('.').with {it != -1 ? fastqGz.name.substring(it+1):''} | |
if(fastqBasic == 'gz') | |
""" | |
zcat -d $fastqGz > ${fastqName}.fastq | |
""" | |
else if (fastqBasic == 'fastq') | |
""" | |
echo $fastqGz | |
""" | |
else | |
""" | |
echo "ERROR: Invalid datatype / file ending" | |
""" | |
} | |
process create_bwa_index { | |
conda "$workflow.projectDir/envtoucan.yaml" | |
output: | |
file ('f') into index | |
file ('f') into index_hic_bwa | |
when: | |
(params.mode == "T2C" || (params.mode == "HiC" && params.aln == "bwa")) && create_bam == true | |
script: | |
genome_file = file (path_genome) | |
genome_name = genome_file.simpleName | |
amb = file("${path_genome}.amb") | |
ann = file("${path_genome}.ann") | |
bwt = file("${path_genome}.bwt") | |
pac = file("${path_genome}.pac") | |
sa = file("${path_genome}.sa") | |
if(amb.exists() && ann.exists() && bwt.exists() && pac.exists() && sa.exists()) | |
""" | |
echo done > f | |
""" | |
else | |
""" | |
bwa index ${path_genome} | |
echo done > f | |
""" | |
} | |
// T2C Step 2 | |
process bwa_aln { | |
//conda 'bioconda::bwa=0.7.15' | |
conda "$workflow.projectDir/envtoucan.yaml" | |
tag{fastq} | |
executor 'local' | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/sai/", mode: 'copy', pattern: '*.sai' | |
} | |
input: | |
set basisname, fastqName ,file (fastq) from decompressedfastq | |
file f from index | |
when: | |
params.mode == "T2C" && create_bam == true | |
output: | |
set basisname, file ('*.sai'), file (fastq) into sai_files | |
script: | |
""" | |
bwa aln \ | |
${params.bwa_T2C_options} \ | |
${path_genome} \ | |
$fastq > ${fastqName}.sai | |
""" | |
} | |
// T2C Step 3 | |
process bwa_sampe_to_create_sam { | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'copy' | |
} | |
conda "$workflow.projectDir/envtoucan.yaml" | |
tag{name} | |
input: | |
set name, sai, fastq from sai_files.groupTuple() | |
output: | |
set name, file ("${name}.sam") into samfiles | |
when: | |
params.mode == "T2C" | |
create_bam == true | |
script: | |
sortbasename = [sai[0].getBaseName(), sai[1].getBaseName()].sort() | |
if(sai[0].contains(sortbasename[0])){ | |
sai_R1 = sai[0] | |
fastq_R1 = fastq[0] | |
sai_R2 = sai[1] | |
fastq_R2 = fastq[1] | |
} else { | |
sai_R1 = sai[1] | |
fastq_R1 = fastq[1] | |
sai_R2 = sai[0] | |
fastq_R2 = fastq[0] | |
} | |
""" | |
bwa sampe \ | |
-A \ | |
${path_genome} \ | |
${sai_R1} ${sai_R2} \ | |
${fastq_R2} ${fastq_R1} > ${name}.sam | |
""" | |
} | |
// T2C Step 4 | |
process samtools_addreplacerg { | |
tag{sam} | |
conda "$workflow.projectDir/envtoucan.yaml" | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'copy' | |
} | |
input: | |
set name, file (sam) from samfiles | |
output: | |
set name, file ("${name}_rg.sam") into samfiles_read_groups | |
when: | |
params.mode == "T2C" | |
create_bam == true | |
script: | |
ds = "${path_working}/*.srt.bam" | |
""" | |
samtools addreplacerg \ | |
-r "ID:${name}" \ | |
-r "CN:${params.center_label}" \ | |
-r "LB:${params.library_label}" \ | |
-r "SM:${name}" \ | |
-r "PL:${params.platform_label}" \ | |
-r "DS:${ds}" \ | |
-o ${name}_rg.sam \ | |
${sam} | |
""" | |
} | |
// T2C Step 5 | |
process samtools_sort { | |
executor 'local' | |
conda "$workflow.projectDir/envtoucan.yaml" | |
publishDir "${outpath}/${params.pn}_bam/", mode: 'copy' | |
tag{rgsam} | |
input: | |
set name, file (rgsam) from samfiles_read_groups | |
output: | |
set name, file ("${name}.bam") into bamfiles | |
when: | |
params.mode == "T2C" | |
create_bam == true | |
script: | |
""" | |
samtools sort \ | |
${params.sort_options} \ | |
-T ${name}_tmp_sort \ | |
-o ${name}.bam \ | |
${rgsam} | |
""" | |
} | |
bam_f.concat(bamfiles).into {finalbam; bamfiles_for_flagstats; workaround} | |
// T2C Step 6 | |
process samtools_flagstat { | |
publishDir "${outpath}/${params.pn}_stats/", mode: 'copy' | |
conda "$workflow.projectDir/envtoucan.yaml" | |
tag{bam} | |
input: | |
set name, file (bam) from bamfiles_for_flagstats | |
output: | |
file ("${name}.flagstat.txt") into flagstats | |
when: | |
params.mode == "T2C" | |
script: | |
""" | |
samtools flagstat ${bam} > ${name}.flagstat.txt | |
""" | |
} | |
/* | |
* T2C Step 7 | |
* index the BAM files | |
*/ | |
process index_bamfiles { | |
tag{bam} | |
conda "$workflow.projectDir/envtoucan.yaml" | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bai/", mode: 'copy' | |
} | |
input: | |
set name, file (bam) from finalbam | |
output: | |
set name, file ("${name}.bam.bai") into bam_bai_files | |
when: | |
params.mode == "T2C" | |
script: | |
""" | |
samtools index ${bam} > ${name}.bam.bai | |
""" | |
} | |
bam_bai_files.combine(workaround, by: 0).set {bam_bai_files_final} | |
/* | |
* T2C Step 8 | |
* get the reads overlapping the targets | |
*/ | |
process overlapping_reads_targets { | |
tag{bam} | |
executor 'local' | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy' | |
} | |
input: | |
set name, file (bai), file (bam), file (res_target) from bam_bai_files_final.combine(final_res_map.filter{ it.simpleName == "restriction_targets"}) | |
output: | |
set name, file ("${name}.target.bam") into target_bam_files | |
when: | |
params.mode == "T2C" | |
script: | |
""" | |
${path_python}/python ${path_bin}/bin/add_fragment_to_bam.py \ | |
--input ${bam} \ | |
--output ${name}.target.bam \ | |
--bed ${res_target} | |
""" | |
} | |
/* | |
* T2C Step 9 | |
* namesort the bam file in preparation for the matrix | |
*/ | |
process namesorting_bam_files { | |
tag{target_bam} | |
conda "$workflow.projectDir/envtoucan.yaml" | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy' | |
} | |
input: | |
set name, file (target_bam) from target_bam_files | |
output: | |
set name, file ("${name}.namesort.bam") into namesort_bam_files | |
when: | |
params.mode == "T2C" | |
script: | |
""" | |
samtools sort \ | |
-n \ | |
-o ${name}.namesort.bam \ | |
${target_bam} | |
""" | |
} | |
/* | |
* T2C step 10 | |
* SAMtools flags to filter aligments | |
* 0x0001 p the read is paired in sequencing | |
* 0x0004 u the query sequence itself is unmapped | |
* 0x0008 U the mate is unmapped | |
* 0x0100 s the alignment is not primary | |
* 0x0800 S the alignment is supplementary | |
*/ | |
process filter_aligments { | |
tag{namesort_bam} | |
conda "$workflow.projectDir/envtoucan.yaml" | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy' | |
} | |
input: | |
set name, file (namesort_bam) from namesort_bam_files | |
output: | |
set name , file ("${name}.filtered.bam") into filtered_bam_files | |
when: | |
params.mode == "T2C" | |
script: | |
""" | |
samtools view -h \ | |
-f 0x0001 \ | |
-F 0x0004 \ | |
-F 0x0008 \ | |
-F 0x0100 \ | |
-F 0x0800 \ | |
${namesort_bam} > ${name}.filtered.bam | |
""" | |
} | |
/* | |
* T2C step 11 | |
* creating first ray matrix | |
*/ | |
process create_matrix { | |
tag{filtered_bam} | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' | |
} | |
input: | |
set name , file (filtered_bam) from filtered_bam_files | |
output: | |
set name, file ("${name}.reads.bed") into reads_bed_files, reads_bed_files_for_stats | |
when: | |
params.mode == "T2C" | |
script: | |
""" | |
cat ${filtered_bam} | \ | |
${path_python}/python ${path_bin}/bin/create_matrix.py -o ${name}.reads.bed | |
""" | |
} | |
/* | |
* T2C step 12 | |
* finish matrix file by | |
* creating a paired-end bed file on single base-pair level | |
*/ | |
process finish_raw_matrix { | |
tag{reads_bed} | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' | |
} | |
input: | |
set name, file (reads_bed) from reads_bed_files | |
output: | |
//set name_raw, file ("${name}.matrix.bed") into raw_matrix_bed_for_plots | |
set name, file ("${name}.matrix.bed") into raw_matrix_bed_to_reassign | |
set name, file ("${name}.matrix.bed") into raw_matrix_for_norm | |
when: | |
params.mode == "T2C" | |
script: | |
//name_raw = "${name}_raw" | |
""" | |
cat ${reads_bed} | \ | |
sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n | \ | |
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.matrix.bed | |
""" | |
} | |
/* | |
* T2C step 13 | |
* | |
*/ | |
process split_matrix_for_re_evaluation { | |
tag{matrix_bed} | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' | |
} | |
input: | |
set name, file (matrix_bed) from raw_matrix_bed_to_reassign | |
output: | |
set name, file ("${name}.loca.bed"), file ("${name}.locb.bed"), file (matrix_bed) into split_matrix | |
when: | |
params.mode == "T2C" | |
params.reassin == 1 | |
script: | |
""" | |
awk -F "\t" '//{print \$1 "\t" \$2 "\t" \$3}' ${matrix_bed} > ${name}.loca.bed | |
awk -F "\t" '//{print \$4 "\t" \$5 "\t" \$6}' ${matrix_bed} > ${name}.locb.bed | |
""" | |
} | |
/* | |
* T2C step 14 | |
* Intersect bed file with fragments | |
*/ | |
process intersect_beds_to_fragments { | |
tag{name} | |
conda "$workflow.projectDir/envtoucan.yaml" | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' | |
} | |
input: | |
set name, file (a_bed), file (b_bed), file (matrix), file (res_frag) from split_matrix.combine(final_res_map_for_intersection.filter{ it.simpleName == "restriction_fragments_${params.enzyme_a_name}"}) | |
output: | |
set name, file ("${name}.frga.bed"), file ("${name}.frgb.bed"), file (matrix) into fragments_bed | |
when: | |
params.mode == "T2C" | |
params.reassin == 1 | |
script: | |
""" | |
sed 's/[[:blank:]]*\$\$//p' ${a_bed} | bedtools intersect -a ${a_bed} -b ${res_frag} -wao > ${name}.frga.bed | |
sed 's/[[:blank:]]*\$\$//p' ${b_bed} | bedtools intersect -a ${b_bed} -b ${res_frag} -wao > ${name}.frgb.bed | |
""" | |
} | |
/* | |
* T2C step 15 | |
* creating pre normalized matrix | |
*/ | |
process reassign_matrix { | |
tag{matrix} | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' | |
} | |
input: | |
set name, file (frga), file (frgb), file (matrix) from fragments_bed | |
output: | |
set name, file ("${name}.temp.bed") into temp_bed_file | |
when: | |
params.mode == "T2C" | |
params.reassin == 1 | |
script: | |
""" | |
${path_python}/python ${path_bin}/bin/reassign_matrix.py \ | |
-m ${matrix} \ | |
-a ${frga} \ | |
-b ${frgb} > ${name}.temp.bed | |
""" | |
} | |
/* | |
* T2C step 16 | |
* Aggregate Matrix | |
*/ | |
process finalize_matrix { | |
tag{temp_bed} | |
publishDir "${outpath}/${params.pn}_Interactions/01_all_interactions_bed_raw/", mode: 'copy', pattern: "${name_raw}.reassigned.bed" | |
input: | |
set name, file (temp_bed) from temp_bed_file | |
output: | |
set name, file ("${name}.reassigned.bed") into reassigned_matrix | |
when: | |
params.mode == "T2C" | |
params.reassin == 1 | |
script: | |
""" | |
cat ${temp_bed} | \ | |
sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n |\ | |
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.reassigned.bed | |
""" | |
} | |
if (params.reassin == 1){ | |
matrix = reassigned_matrix | |
} else { | |
matrix = raw_matrix_for_norm | |
} | |
/* | |
* T2C step 17 | |
* Normalize Matrix | |
*/ | |
process normalize_matrix { | |
tag{matrix} | |
publishDir "${outpath}/${params.pn}_Interactions/02_all_interactions_bed_norm/", mode: 'copy' | |
input: | |
set name, file (matrix) from matrix | |
output: | |
set name, file ("${name}.normalized.bed") into normalized_matrix, multiplot_bed_2, filtered_interaction_bed_for_multiplot_2 | |
when: | |
params.mode == "T2C" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/normalize_paired_bed.R --bedfile ${matrix} \ | |
--outfile ${name}.normalized.bed \ | |
--library ${path_bin}/RLIB/ \ | |
--method ${params.norm_method} | |
""" | |
} | |
to_plot = matrix_to_plot.concat(normalized_matrix) | |
/* | |
* T2C step 18 | |
* Creating Interaction matrix plot | |
*/ | |
process plotting { | |
tag{name} | |
if(params.mode == "plot" ){ | |
publishDir "${outpath}/plot/", mode: 'copy' | |
} else { | |
publishDir "${outpath}/${params.pn}_plots/interaction_plot/", mode: 'copy' | |
} | |
input: | |
set name, file (matrix) from to_plot | |
output: | |
file ("*.pdf") | |
when: | |
params.mode == "T2C" || params.mode == "plot" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/plot_cis_matrix.R \ | |
--bedfile ${matrix} \ | |
--outfile ${name}.cis.pdf \ | |
--library ${path_bin}/RLIB/ \ | |
--chromosome ${params.chr} \ | |
--start ${params.start} \ | |
--end ${params.end} \ | |
--colour-minimum ${params.score_min} \ | |
--colour-maximum ${params.score_max} \ | |
${params.plot_options_T2C} | |
""" | |
} | |
filtered_interaction_bed_merged = filtered_interaction_bed_for_multiplot_1.concat(filtered_interaction_bed_for_multiplot_2) | |
filtered_interaction_bed_final_merge = interaction_uropa.concat(filtered_interaction_bed_merged) | |
process create_interaction_table { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_Interactions/03_target_region_interactions_bed/", mode: 'copy' | |
input: | |
set name, file (norm) from filtered_interaction_bed_final_merge | |
output: | |
set name, file ("${name}.filtered.interactions.bed") into interaction_bed_for_merge, to_ifmatrix, for_uropa_1, for_uropa_2 | |
when: | |
params.mode == "T2C" || params.mode == "uropa" || params.mode == "multiplot" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/create_interaction_table.R \ | |
${norm} ${name}.filtered.interactions.bed \ | |
${params.chr} ${params.start} ${params.end} ${params.int_score} \ | |
$workflow.workDir/${name}.filtered.interactions.bed | |
""" | |
} | |
//interaction_empty.concat(interaction_check).into {for_uropa1; for_uropa2} | |
process create_stats { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_stats/", mode: 'copy' | |
input: | |
set name, file (reads) from reads_bed_files_for_stats | |
output: | |
file ("${name}_stats.pdf") | |
when: | |
params.mode == "T2C" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/create_T2C_stats.R \ | |
${reads} ${name}_stats.pdf \ | |
${params.chr} ${params.start} ${params.end} ${name} | |
""" | |
} | |
process create_uropa_config_1 { | |
tag{name} | |
input: | |
set name, file (int_bed) from for_uropa_1 | |
output: | |
set name, file ("${name}_uropa_config") into uropa_config_1 | |
when: | |
params.mode == "T2C" || params.mode == "uropa" | |
script: | |
config_parameters = [params.uropa_feature,params.uropa_anchor,params.uropa_strand,params.uropa_direction, | |
params.uropa_filter_attr,params.uropa_attr_value,params.uropa_show_attr] | |
conf_list = config_parameters.collect { par -> | |
return "\\\"" + par.replace(",","\\\",\\\"") + "\\\"" | |
} | |
conf_list_final = conf_list.collect {p -> | |
if(p.split(",").size() > 1) { | |
return "[" + p + "]" | |
} | |
return p | |
} | |
""" | |
echo "{ \ | |
\\"queries\\" : [ \ | |
{ \ | |
\\"feature\\" : ${conf_list_final[0]}, \ | |
\\"feature.anchor\\" : ${conf_list_final[1]}, \ | |
\\"distance\\" : [ \ | |
${params.uropa_dist_1}, \ | |
${params.uropa_dist_2} \ | |
], \ | |
\\"strand\\" : ${conf_list_final[2]}, \ | |
\\"direction\\" : ${conf_list_final[3]}, \ | |
\\"internals\\" : \\"True\\", \ | |
\\"filter.attribute\\" : ${conf_list_final[4]}, \ | |
\\"attribute.value\\" : ${conf_list_final[5]}, \ | |
\\"show.attributes\\" : ${conf_list_final[6]} \ | |
} \ | |
], \ | |
\\"priority\\" : \\"FALSE\\", \ | |
\\"gtf\\" : \\"${path_gtf}\\", \ | |
\\"bed\\" : \\"$workflow.workDir/${name}.filtered.interactions.bed\\" \ | |
}" > ${name}_uropa_config | |
""" | |
} | |
process run_uropa_1 { | |
tag{name} | |
//conda "$workflow.projectDir/envtoucan.yaml" | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy' | |
} | |
input: | |
set name, file (config) from uropa_config_1 | |
output: | |
set name, file ("${name}_allhits.txt"), file ("${name}_finalhits.txt") into uropa_1 | |
when: | |
params.mode == "T2C" || params.mode == "uropa" | |
script: | |
""" | |
uropa -i ${config} -d -p ${name} -t ${params.uropa_threads} | |
""" | |
} | |
process swap_fragments { | |
tag{name} | |
input: | |
set name, file (int_bed) from for_uropa_2 | |
output: | |
set name, file ("${name}.swap.interaction.bed") into swaped_interactions_bed | |
when: | |
params.mode == "T2C" || params.mode == "uropa" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/swap_fragments.R ${int_bed} ${name}.swap.interaction.bed $workflow.workDir/${name}.swap.interaction.bed | |
""" | |
} | |
process create_uropa_config_2 { | |
tag{name} | |
input: | |
set name, file (int_bed) from swaped_interactions_bed | |
output: | |
set name, file ("${name}_uropa_config_2") into uropa_config_2 | |
when: | |
params.mode == "T2C" || params.mode == "uropa" | |
script: | |
config_parameters = [params.uropa_feature,params.uropa_anchor,params.uropa_strand,params.uropa_direction, | |
params.uropa_filter_attr,params.uropa_attr_value,params.uropa_show_attr] | |
conf_list = config_parameters.collect { par -> | |
return "\\\"" + par.replace(",","\\\",\\\"") + "\\\"" | |
} | |
conf_list_final = conf_list.collect {p -> | |
if(p.split(",").size() > 1) { | |
return "[" + p + "]" | |
} | |
return p | |
} | |
""" | |
echo "{ \ | |
\\"queries\\" : [ \ | |
{ \ | |
\\"feature\\" : ${conf_list_final[0]}, \ | |
\\"feature.anchor\\" : ${conf_list_final[1]}, \ | |
\\"distance\\" : [ \ | |
${params.uropa_dist_1}, \ | |
${params.uropa_dist_2} \ | |
], \ | |
\\"strand\\" : ${conf_list_final[2]}, \ | |
\\"direction\\" : ${conf_list_final[3]}, \ | |
\\"internals\\" : \\"True\\", \ | |
\\"filter.attribute\\" : ${conf_list_final[4]}, \ | |
\\"attribute.value\\" : ${conf_list_final[5]}, \ | |
\\"show.attributes\\" : ${conf_list_final[6]} \ | |
} \ | |
], \ | |
\\"priority\\" : \\"FALSE\\", \ | |
\\"gtf\\" : \\"${path_gtf}\\", \ | |
\\"bed\\" : \\"$workflow.workDir/${name}.filtered.interactions.bed\\" \ | |
}" > ${name}_uropa_config_2 | |
""" | |
} | |
process run_uropa_2 { | |
tag{name} | |
//conda "$workflow.projectDir/envtoucan.yaml" | |
if(params.safe_all_files == 1){ | |
publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy' | |
} | |
input: | |
set name, file (config) from uropa_config_2 | |
output: | |
set name, file ("${name}_2_allhits.txt"), file ("${name}_2_finalhits.txt") into uropa_2 | |
when: | |
params.mode == "T2C" || params.mode == "uropa" | |
script: | |
""" | |
uropa -i ${config} -d -p ${name}_2 -t ${params.uropa_threads} | |
""" | |
} | |
tmp = uropa_1.combine(uropa_2, by: 0) | |
final_uropa_set = tmp.combine(interaction_bed_for_merge, by: 0) | |
process uropa_merge { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_Interactions/04_final_interactions_annotated/", mode: 'copy' | |
input: | |
set name, file (allhits_1), file (finalhits_1), file (allhits_2), file (finalhits_2), file (int_bed) from final_uropa_set | |
output: | |
set name, file ("${name}.finalhits.merged.txt") into merged_uropa | |
file ("${name}.finalhits.merged.txt") into for_xlsx | |
when: | |
params.mode == "T2C" || params.mode == "uropa" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/merge_uropa_interactions.R ${int_bed} ${finalhits_1} ${finalhits_2} ${name}.finalhits.merged.txt | |
""" | |
} | |
xlsx_list = for_xlsx.toSortedList() | |
process xlsx_writer { | |
publishDir "${outpath}/", mode: 'copy' | |
input: | |
val f from xlsx_list | |
output: | |
file ("${params.pn}.xlsx") | |
when: | |
params.mode == "T2C" || params.mode == "uropa" | |
script: | |
file_list = f.sort {it.getBaseName()} | |
file_list_str = file_list.toListString().replace(', ',',') - "[" - "]" | |
""" | |
${path_python}/python ${path_bin}/bin/julianos_xlsxgen_vTOuCAN.py ${path_bin}/bin/julianos_config.json --out ${params.pn} --flist ${file_list_str} | |
""" | |
} | |
process create_viewpoint { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_Interactions/05_interactions_viewpoint_annotated/", mode: 'copy' | |
input: | |
set name, file (merged_finalhits) from merged_uropa | |
output: | |
file ("${name}.interactions.viewpoint.txt") | |
when: | |
params.mode == "T2C" || params.mode == "uropa" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/create_interaction_viewpoint.R ${merged_finalhits} ${name}.interactions.viewpoint.txt | |
""" | |
} | |
//to_ifmatrix = interaction_multiplot.concat(interaction_bed_TAD) | |
process convert_to_IFMatrix { | |
tag{name} | |
echo true | |
publishDir "${outpath}/${params.pn}_additional_files/IFMatrix/", mode: 'copy' | |
input: | |
set name, file (int_bed), file (res) from to_ifmatrix.combine(res_map_for_ifm.filter{ it.simpleName == "restriction_targets"}) | |
output: | |
set name, file ("${name}.ifm.txt") into ifmatrix | |
when: | |
params.mode == "T2C" || params.mode == "multiplot" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/convert_to_IFMatrix.R ${int_bed} ${res} ${name}.ifm.txt ${params.chr} ${params.start} ${params.end} | |
""" | |
} | |
process run_robusTAD { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_TAD/", mode: 'copy' | |
input: | |
set name, file (tfm) from ifmatrix | |
output: | |
set name, file ("TADBoundaryCalls_*.txt") into tadbc | |
when: | |
params.mode == "T2C" || params.mode == "multiplot" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/RobusTAD.R -i ${tfm} --header FALSE -n norm --minWin 1 --maxWin 500 | |
""" | |
} | |
//${path_R}/Rscript ${path_bin}/bin/RobusTAD.R -i ${tfm} --header FALSE -n norm --minWin 1 --maxWin 500 | |
process x_axis_scaling { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_TAD/scaled/", mode: 'copy' | |
input: | |
set name, file (tad) from tadbc | |
output: | |
set name, file ("${bn}_scaled.txt") into tadx, multiplot_TAD | |
when: | |
params.mode == "T2C" || params.mode == "multiplot" | |
script: | |
bn = tad.baseName | |
""" | |
${path_R}/Rscript ${path_bin}/bin/xaxis_scaling.R ${tad} ${params.start} ${params.end} ${bn}_scaled.txt | |
""" | |
} | |
process plot_TAD_graph { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_plots/TAD/", mode: 'copy' | |
input: | |
set name, file (tad) from tadx | |
output: | |
file ("${name}.TAD.pdf") | |
when: | |
params.mode == "T2C" || params.mode == "multiplot" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/plotTAD.R ${tad} ${name}.TAD.pdf | |
""" | |
} | |
multiplot_bed_1.concat(multiplot_bed_2).set {multiplot_bed_merged} | |
multiplot_TAD.join(multiplot_bed_merged).set {multi} | |
process multiplot { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'copy' | |
input: | |
set name, file (tad), file (bed) from multi | |
output: | |
file ("*.png") into for_pptx | |
when: | |
params.mode == "T2C" || params.mode == "multiplot" | |
script: | |
""" | |
${path_R}/Rscript ${path_bin}/bin/plot_multi.R \ | |
--bedfile ${bed} \ | |
--outfile ${name}.multi.png \ | |
--library ${path_bin}/RLIB/ \ | |
--chromosome ${params.chr} \ | |
--start ${params.start} \ | |
--end ${params.end} \ | |
--colour-minimum ${params.score_min} \ | |
--colour-maximum ${params.score_max} \ | |
--tad ${tad} \ | |
--organism ${params.organism} \ | |
${params.plot_options_T2C} | |
""" | |
} | |
for_pptx_list = for_pptx.toSortedList() | |
process pptx_writer { | |
publishDir "${outpath}/", mode: 'copy' | |
input: | |
val pptxList from for_pptx_list | |
output: | |
file ("*.pptx") | |
when: | |
params.mode == "T2C" || params.mode == "multiplot" | |
script: | |
file_list = pptxList.sort {it.getBaseName()} | |
file_list_str = file_list.toListString().replace(', ',',') - "[" - "]" | |
""" | |
${path_python}/python ${path_bin}/bin/pptx_writer_vTOuCAN.py ${path_bin}/bin/pptx_config.json --output ${params.pn} --flist ${file_list_str} | |
""" | |
} | |
//================================= HiC ======================================== | |
process alignment_with_bwa { | |
tag{fastq} | |
publishDir "${outpath}/${params.pn}_bam/", mode: 'copy' | |
conda "$workflow.projectDir/envtoucan.yaml" | |
input: | |
set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bwa | |
file f from index_hic_bwa | |
output: | |
set basisname, file ("${fastqName}.bam") into bwa_alignment | |
when: | |
params.mode == "HiC" && params.aln == "bwa" && create_bam == true | |
script: | |
""" | |
bwa mem \ | |
${params.bwa_HiC_options} \ | |
${path_genome} \ | |
$fastq | samtools view -Shb > ${fastqName}.bam | |
""" | |
} | |
process bowtie2_index { | |
publishDir "${path_genome}", mode: 'copy' | |
conda "$workflow.projectDir/envtoucan.yaml" | |
when: | |
params.mode == "HiC" && params.aln == "bowtie2" && create_bam == true | |
output: | |
val ("${genome_name}") into index_bt2 | |
script: | |
genome_file = file (path_genome) | |
genome_name = genome_file.name | |
indx = file ("${path_genome}").getBaseName() | |
path = file ("${path_genome}").getParent() | |
i = "${path}/bowtie2/${indx}" | |
bt1 = file("${i}.1.bt2") | |
bt2 = file("${i}.2.bt2") | |
bt3 = file("${i}.3.bt2") | |
bt4 = file("${i}.4.bt2") | |
rev1 = file("${i}.rev.1.bt2") | |
rev2 = file("${i}.rev.2.bt2") | |
if(bt1.exists() && bt2.exists() && bt3.exists() && bt4.exists() && rev1.exists() && rev2.exists()) | |
""" | |
echo "Skiped creating bt2 index" | |
""" | |
else | |
""" | |
bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads} | |
""" | |
} | |
process alignment_with_bowtie2 { | |
tag{fastq} | |
publishDir "${outpath}/${params.pn}_bam/", mode: 'copy' | |
conda "$workflow.projectDir/envtoucan.yaml" | |
input: | |
set basisname, fastqName ,file (fastq), bt2_idx from decompressedfastq_for_HiC_bowtie2.combine(index_bt2) | |
output: | |
set basisname, file ("${fastqName}.sam") into bowtie2_alignment | |
when: | |
params.mode == "HiC" && params.aln == "bowtie2" && create_bam == true | |
script: | |
indx = file ("${path_genome}").getBaseName() | |
path = file ("${path_genome}").getParent() | |
i = "${path}/bowtie2/${indx}" | |
""" | |
bowtie2 -x ${i} -U ${fastq} --very-sensitive --reorder -p ${params.threads_bowtie2} > ${fastqName}.sam | |
""" | |
} | |
bam2.concat(bwa_alignment, bowtie2_alignment).set {hic_alignment} | |
process create_HiC_matrix { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_h5_matrix/", mode: 'copy' | |
input: | |
set name, bams from hic_alignment.groupTuple() | |
output: | |
set name, file("${name}_matrix.h5") into hic_matrix, hic_matrix_for_diagnostic | |
file ("${name}.bam") | |
when: | |
params.mode == "HiC" | |
script: | |
sortbasename = [bams[0].getBaseName(), bams[1].getBaseName()].sort() | |
if(bams[0].contains(sortbasename[0])){ | |
bam_r1 = bams[0] | |
bam_r2 = bams[1] | |
} else { | |
bam_r1 = bams[1] | |
bam_r2 = bams[0] | |
} | |
""" | |
hicBuildMatrix --samFiles ${bam_r1} ${bam_r2} \ | |
--QCfolder ${outpath}/${params.pn}_QC_${name}/ \ | |
--binSize ${params.bin} \ | |
-b ${name}.bam \ | |
--inputBufferSize ${params.inputBufferSize} \ | |
--restrictionSequence ${params.enzyme_a_sequence} \ | |
--threads ${params.hicBuildMatrix_threads}\ | |
-o ${name}_matrix.h5 | |
""" | |
} | |
process diagnostic_plot_of_HiC_Matrix { | |
publishDir "${outpath}/${params.pn}_diagnostic_plots/", mode: 'copy' | |
tag{name} | |
input: | |
set name, file (matrix) from hic_matrix_for_diagnostic | |
output: | |
set name, file ("${name}.png") | |
when: | |
params.mode == "HiC" | |
script: | |
""" | |
hicCorrectMatrix diagnostic_plot -m ${matrix} -o ${name}.png | |
""" | |
} | |
hic_matrix_final = hic_matrix_f.concat(hic_matrix) | |
process correct_matrix { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_h5_matrix/corrected/", mode: 'copy' | |
input: | |
set name, file (matrix) from hic_matrix_final | |
output: | |
set name, file("${name}_corrected.h5") into hic_matrix_corrected | |
when: | |
params.mode == "HiC" || params.mode == "h5" | |
script: | |
""" | |
hicCorrectMatrix correct -m ${matrix} --filterThreshold \ | |
${params.hic_thresh_min} ${params.hic_thresh_max} -o ${name}_corrected.h5 | |
""" | |
} | |
process plot_matrix { | |
tag{name} | |
publishDir "${outpath}/${params.pn}_plot/", mode: 'copy' | |
input: | |
set name, file (corrected_matrix) from hic_matrix_corrected | |
output: | |
file ("${name}.hic.matrix.png") | |
when: | |
params.mode == "HiC" | |
script: | |
if ((params.chr != "") && (params.start != 0) && p(arams.end != 0)) | |
""" | |
hicPlotMatrix -m ${corrected_matrix} -o ${name}.hic.matrix.png \ | |
--dpi 400 --region ${params.chr}:${params.start}-${params.end} | |
""" | |
else | |
""" | |
hicPlotMatrix -m ${corrected_matrix} -o ${name}.hic.matrix.png \ | |
--dpi 400 | |
""" | |
} |