Skip to content
Permalink
1f2da67991
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
1659 lines (1249 sloc) 39.1 KB
#!/usr/bin/env nextflow
params.mode = "help"
params.in = ""
params.out = ""
params.path_matrix = "" //only for --mode plot
params.bam = ""
params.safe_all_files = 0
params.check_res_maps = 0
params.organism = "mm10"
params.pn = "Project"
// parameter for T2C plots
params.chr = ""
params.start = 0
params.end = 0
params.score_min = 0
params.score_max = 0
params.int_score = 50
// parameter for hic
params.aln = "bwa"
params.bin = 100000
if(params.mode == "help" || params.mode == "h"){
log.info """
==========================================
`| |
-"- __ | '
/ ,-| ```'-, || `
/ /@)|_ `-, |
/ ( ~ \\_```""-- ,_', : .___________. ______ __ __ ______ ___ .__ __.
_ ' \\ /```''''""`^ | | / __ \\ | | | | / | / \\ | \\ | |
/ | / `| : `---| |----`| | | | | | | | | ,----' / ^ \\ | \\| |
/ \\/ | | | | | | | | | | | | | | / /_\\ \\ | . ` |
/ __/ | | | | | | `--' | | `--' | | `----./ _____ \\ | |\\ |
| __/ / | | | |__| \\______/ \\______/ \\______/__/ \\__\\ |__| \\__|
"- _ | _/ _/=_// || :
' `~~`~^| /=/-_/_/`~~`~~~`~~`~^~`
`> ' \\ ~/_/_ /" ` ` ' | Targeted chrOmatin Capture ANalysis
' ,'~^~`^| |~^`^~~`~~~^~~~^~`; ' Version 0.1
-' | | | \\ ` : `
| :| | : |
jgs |: | || '
| |/ | | :
|_/ | '
==========================================
Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi] [options]
--mode help, h - For showing this help message
--mode plot - Plot data [currently only T2C plots]
parameters:
--path_matrix [PATH] - Path to directory with *.normalized.bed files.
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region.
--start [INT] - Start of target region.
--end [INT] - End of target region.
--score_min [INT] - Score range: minimum. [default: 0]
--score_max [INT] - Score range: maximium. [default: autoscale]
--pn [STRING] - Name of the Project [default: 'Project']
--mode T2C - Full T2C analysis
parameters:
--in [PATH] - Path to directory with fastq / fastq.gz files.
--bam [PATH] - Path to directory with bam files. [if given --in [PATH] is ignored]
--out [PATH] - Path to output directory.
--safe_all_files [0|1] - If 1 safes all temporary files into "OUTPUT/02_analysis/". [default: 0]
--check_res_maps [0|1] - If 1 prints first 5 lines of every file from restriction maps. [default: 0]
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region.
--start [INT] - Start of target region.
--end [INT] - End of target region.
--score_min [INT] - Score range: minimum. [default: 0]
--score_max [INT] - Score range: maximium. [default: autoscale]
--pn [STRING] - Name of the Project [default: 'Project']
--mode uropa - Uropa annoation [T2C]
parameters:
--in [PATH] - Path to directory with *.normalized.bed
--out [PATH] - Path to output directory.
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region.
--start [INT] - Start of target region.
--end [INT] - End of target region.
--pn [STRING] - Name of the Project [default: 'Project']
--mode multiplot - creating a plot with interaction map, TAD graph and gene annotation
parameters:
--in [PATH] - Path to directory with *.normalized.bed
--out [PATH] - Path to output directory.
--chr [chr1,chr2,...,chrY] - On which chromosome is the target region.
--start [INT] - Start of target region.
--end [INT] - End of target region.
--score_min [INT] - Score range: minimum. [default: 0]
--score_max [INT] - Score range: maximium. [default: autoscale]
--pn [STRING] - Name of the Project [default: 'Project']
--mode HiC - Full HiC analysis
parameters:
--in [PATH] - Path to directory with fastq / fastq.gz files.
--out [PATH] - Path to output directory.
--aln [bwa|bowtie2] - Choose alignment tool. [default: bwa]
--bin [INT] - Binsize [default: 10000]
Skip Aligment -> BAM files as Input:
The BAM files need to be from this Pipeline with follwing
file extension: "[NAME].(normalized|matrix).bam" !
Skip creating restritction maps:
After creating the restriction maps write their path into the config file to skip
creating the restrction maps again. [path_T2C_restriction_maps]
"""
System.exit(0)
}
log.info """
=======================================================================================================================
`| |
-"- __ | '
/ ,-| ```'-, || `
/ /@)|_ `-, |
/ ( ~ \\_```""-- ,_', : .___________. ______ __ __ ______ ___ .__ __.
_ ' \\ /```''''""`^ | | / __ \\ | | | | / | / \\ | \\ | |
/ | / `| : `---| |----`| | | | | | | | | ,----' / ^ \\ | \\| |
/ \\/ | | | | | | | | | | | | | | / /_\\ \\ | . ` |
/ __/ | | | | | | `--' | | `--' | | `----./ _____ \\ | |\\ |
| __/ / | | | |__| \\______/ \\______/ \\______/__/ \\__\\ |__| \\__|
"- _ | _/ _/=_// || :
' `~~`~^| /=/-_/_/`~~`~~~`~~`~^~`
`> ' \\ ~/_/_ /" ` ` ' | Targeted chrOmatin Capture ANalysis
' ,'~^~`^| |~^`^~~`~~~^~~~^~`; ' Version 0.1
-' | | | \\ ` : `
| :| | : |
jgs |: | || '
| |/ | | :
|_/
| '
=======================================================================================================================
Mode: ${params.mode}
Project: ${params.pn}
Enzyme A: ${params.enzyme_a_name} -> ${params.enzyme_a_sequence}
Enzyme B: ${params.enzyme_b_name} -> ${params.enzyme_b_sequence}
Target Region
Chromosome: ${params.chr}
Start: ${params.start}
End: ${params.end}
Dependencies
Python: ${path_python}
BWA: ${path_bwa}
samtools: ${path_samtools}
bedtools: ${path_bedtools}
Path to bin: ${path_bin}
Path to genome: ${path_genome}
Path to restriction maps: ${path_T2C_restriction_maps}
"""
if ( ! params.bam == "" ){
log.info """
Path to bam files: ${params.bam}
"""
}
log.info """
Minor fixed parameters for BWA and SAMtools
BWA options: ${params.bwa_T2C_options}
samtools sort options: ${params.sort_options}
Library Label: ${params.library_label}
Platform Label: ${params.platform_label}
Center Label: ${params.center_label}
=======================================================================================================================
"""
outpath = params.out?.endsWith('/') ? params.out.substring( 0, params.out.length() -1 ) : params.out
filtered_interaction_bed_for_multiplot_1 = Channel.empty()
multiplot_bed_1 = Channel.empty()
matrix_to_plot = Channel.empty()
res_map = Channel.empty()
bam_f = Channel.empty()
interaction_uropa = Channel.empty()
fastqGzFiles = Channel.empty()
if("${params.path_matrix}" != "" && params.mode == "plot"){
matrix_to_plot = Channel
.fromPath("${params.path_matrix}/*.{normalized,matrix}.bed")
.map {it -> [it.simpleName, it]}
}
if(params.mode == "uropa"){
interaction_uropa = Channel
.fromPath("${params.in}/*.normalized.bed")
.map {it -> [it.simpleName, it]}
}
if(params.mode == "multiplot"){
multiplot_bed_1 = Channel
.fromPath("${params.in}/*.normalized.bed")
.map {it -> [it.simpleName, it]}
filtered_interaction_bed_for_multiplot_1 = Channel
.fromPath("${params.in}/*.normalized.bed")
.map {it -> [it.simpleName, it]}
}
if (params.mode != "plot"){
if ( "${path_T2C_restriction_maps}" != "" ){
res_map = Channel.fromPath("${path_T2C_restriction_maps}/*.bed")
create_res_map = false
} else {
create_res_map = true
}
if ( "${params.bam}" != "" ){
bam_f = Channel
.fromPath("${params.bam}/*.bam")
.map {it -> [it.simpleName, it]}
create_bam = false
} else {
create_bam = true
fastqGzFiles = Channel.fromPath("${params.in}/*.fastq*")
}
}
modeList = ["T2C", "HiC", "help", "h", "plot", "uropa", "multiplot"]
alnList = ["bowtie2", "bwa", "histat"]
organismList = ["mm10", "mm9", "hg19"]
process check_parameters {
script:
// check if chr input is correct
if (!(params.mode in modeList )){
println("ERROR: Mode not existing.\n\nList of all modes:\n\t-T2C\n\t-HiC\n\t-help\n\t-h\n\t-plot\n\nYour Input: '${params.mode}'")
System.exit(0)
}
if (!(params.organism in organismList )){
println("ERROR: Organism not supported.\n\nList of all supported organisms:\n\t-mm10\n\t-mm9\n\t-hg19\nYour Input: '${params.mode}'")
System.exit(0)
}
if (!(params.aln in alnList ) && params.mode == "HiC"){
println("ERROR: aln not existing.\n\nList of all supported alignment tools:\n\t-bwa\n\t-bowtie2\n\t-histat\n\nYour Input: '${params.mode}'")
System.exit(0)
}
if (params.mode == "T2C"){
if (!("${params.chr}" ==~ /chr([1-9]|1[0-9]|X|Y)/ ) && (params.mode != "multiplot") && (params.mode != "uropa")){
println("ERROR: No or false chromosome parameter given.\nCorrect example: 'chr4'.\nYour Input: '${params.chr}'")
System.exit(0)
}
if ((params.start == null) || (params.end == null)){
println("ERROR: --start/--end not given")
System.exit(0)
}
if (!("${params.start}" ==~ /\d+/ )){
println("ERROR: --start needs to be a number. Your Input: '${params.start}'")
System.exit(0)
}
if (!("${params.end}" ==~ /\d+/ )){
println("ERROR: --end needs to be a number. Your Input: '${params.end}'")
System.exit(0)
}
if(params.start.toInteger() >= params.end.toInteger()){
println("ERROR: start cannot be bigger/equal than end ")
System.exit(0)
}
if (!("${params.score_min}" ==~ /[-]?\d+/ )){
println("ERROR: --score_min needs to be a number. Your Input: '${params.score_min}'")
System.exit(0)
}
if (!("${params.score_max}" ==~ /\d+/ )){
println("ERROR: --score_max needs to be a number. Your Input: '${params.score_max}'")
System.exit(0)
}
if (("${params.check_res_maps}" != "0") && ("${params.check_res_maps}" != "1")){
println("ERROR: --check_res_maps is a boolean parameter. Enter 0 or 1!")
System.exit(0)
}
}
if ( "${params.safe_all_files}" != "0" && "${params.safe_all_files}" != "1"){
println("ERROR: --safe_all_files is a boolean parameter. Enter 0 or 1!")
System.exit(0)
}
if (params.mode == "HiC") {
if ( ! params.bin.toString().isInteger() ) {
println("ERROR: --bin has to be an Integer! Your input: " + params.bin )
System.exit(0)
}
}
if (params.mode != "plot"){
if(params.in == "" && params.bam == ""){
println("ERROR: Input/BAM path is empty")
System.exit(0)
} else {
if(params.in != ""){
if (!(file(params.in).exists())){
println("ERROR: Input Path does not exist!")
System.exit(0)
}
}
}
} else {
if(outpath == ""){
println("ERROR: Missing output path")
System.exit(0)
}
}
"""
echo check
"""
}
/*Checks if given Output directory exsists.
*If it exists it will be deleted or die Pipeline stops. (HiC Mode only)
*/
process check_output_directory {
when:
params.mode == 'not_used'
script:
valIn = false
if(outfile.exists()){
while(!valIn){
answer = System.console().readLine 'Output directory "' + outfile + '" already exists: delete directory? (Y/n) \n'
if(answer == 'n'){
println("WARNING: It is required to delete given directory or choose a differnt directory!\nExiting...")
System.exit(0)
} else if (answer == 'y' | answer == '') {
valIn = true
println("Deleting directory...")
} else {
println("Error: Invalid input!")
}
}
}
if(valIn){
"""
if [ -d $outpath ]; then
rm -r ${outpath}
fi
echo 'Check of output directory done'
"""
} else {
"""
echo 'Check of output directory done'
"""
}
}
/*
* T2C Step 1
*Creates restriction maps needed for T2C analysis
*/
process create_restriction_maps_for_T2C {
publishDir "${outpath}/${params.pn}_restriction_maps/", mode: 'move'
output:
file ("*.bed") into restriction_maps
when:
params.mode == 'T2C'
create_res_map == true
script:
if(params.enzyme_b_sequence == ""){
enzyme_b_option = ""
} else {
enzyme_b_option = "--sequence " + params.enzyme_b_sequence
}
"""
${path_python}/python ${path_bin}/bin/match_sequences.py \
--fasta ${path_genome} \
--bed "restriction_sites_${params.enzyme_a_name}.bed" \
--sequence ${params.enzyme_a_sequence}
${path_python}/python ${path_bin}/bin/target_fragments.py \
--input "restriction_sites_${params.enzyme_a_name}.bed" \
--output "restriction_fragments_${params.enzyme_a_name}.bed"
${path_python}/python ${path_bin}/bin/match_sequences.py \
--fasta ${path_genome} \
--bed restriction_sites.bed \
--sequence ${params.enzyme_a_sequence} \
${enzyme_b_option}
${path_python}/python ${path_bin}/bin/target_fragments.py \
--input restriction_sites.bed \
--output restriction_fragments.bed
grep ${params.enzyme_a_sequence} restriction_fragments.bed > restriction_targets.bed
"""
}
res_map.concat(restriction_maps).into {final_res_map_for_check; final_res_map; final_res_map_for_intersection}
process check_restriction_maps {
tag{params.enzyme_a_name}
input:
file (bed) from final_res_map_for_check
when:
params.check_res_maps == 1
script:
"""
head -5 $bed
"""
}
result.subscribe {println it}
// Decompresses fastq files
process decompress {
tag{fastqGz}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_fastq/", mode: 'move', overwrite: 'false'
}
input:
file (fastqGz) from fastqGzFiles
output:
set basisname, fastqName ,file ("${fastqName}.fastq") into decompressedfastq, decompressedfastq_for_HiC_bwa,
decompressedfastq_for_HiC_bowtie2, decompressedfastq_for_HiC_histat
script:
fastqName = fastqGz.simpleName
basisname = fastqName.replaceFirst("${params.sample_extension}","")
fastqBasic = fastqGz.name.lastIndexOf('.').with {it != -1 ? fastqGz.name.substring(it+1):''}
if(fastqBasic == 'gz')
"""
zcat -d $fastqGz > ${fastqName}.fastq
"""
else if (fastqBasic == 'fastq')
"""
echo $fastqGz
"""
else
"""
echo "ERROR: Invalid datatype / file ending"
"""
}
process create_bwa_index {
output:
file ('f') into index
file ('f') into index_hic_bwa
when:
(params.mode == "T2C" || (params.mode == "HiC" && params.aln == "bwa")) && create_bam == true
script:
genome_file = file (path_genome)
genome_name = genome_file.simpleName
amb = file("${path_genome}.amb")
ann = file("${path_genome}.ann")
bwt = file("${path_genome}.bwt")
pac = file("${path_genome}.pac")
sa = file("${path_genome}.sa")
if(amb.exists() && ann.exists() && bwt.exists() && pac.exists() && sa.exists())
"""
echo done > f
"""
else
"""
${path_bwa}/bwa index ${path_genome}
echo done > f
"""
}
// T2C Step 2
process bwa_aln {
tag{fastq}
executor 'local'
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/sai/", mode: 'move', pattern: '*.sai'
}
input:
set basisname, fastqName ,file (fastq) from decompressedfastq
file f from index
when:
params.mode == "T2C" && create_bam == true
output:
set basisname, file ('*.sai'), file (fastq) into sai_files
script:
"""
${path_bwa}/bwa aln \
${params.bwa_T2C_options} \
${path_genome} \
$fastq > ${fastqName}.sai
"""
}
// T2C Step 3
process bwa_sampe_to_create_sam {
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'move'
}
tag{name}
input:
set name, sai, fastq from sai_files.groupTuple()
output:
set name, file ("${name}.sam") into samfiles
when:
params.mode == "T2C"
create_bam == true
script:
fullname = sai[0].simpleName
if(fullname.contains("_R1")){
sai_R1 = sai[0]
fastq_R1 = fastq[0]
sai_R2 = sai[1]
fastq_R2 = fastq[1]
} else {
sai_R1 = sai[1]
fastq_R1 = fastq[1]
sai_R2 = sai[0]
fastq_R2 = fastq[0]
}
"""
${path_bwa}/bwa sampe \
-A \
${path_genome} \
${sai_R1} ${sai_R2} \
${fastq_R2} ${fastq_R1} > ${name}.sam
"""
}
// T2C Step 4
process samtools_addreplacerg {
tag{sam}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'move'
}
input:
set name, file (sam) from samfiles
output:
set name, file ("${name}_rg.sam") into samfiles_read_groups
when:
params.mode == "T2C"
create_bam == true
script:
ds = "${path_working}/*.srt.bam"
"""
${path_samtools}/samtools addreplacerg \
-r "ID:${name}" \
-r "CN:${params.center_label}" \
-r "LB:${params.library_label}" \
-r "SM:${name}" \
-r "PL:${params.platform_label}" \
-r "DS:${ds}" \
-o ${name}_rg.sam \
${sam}
"""
}
// T2C Step 5
process samtools_sort {
executor 'local'
publishDir "${outpath}/${params.pn}_bam/", mode: 'move'
tag{rgsam}
input:
set name, file (rgsam) from samfiles_read_groups
output:
set name, file ("${name}.bam") into bamfiles
when:
params.mode == "T2C"
create_bam == true
script:
"""
${path_samtools}/samtools sort \
${params.sort_options} \
-T ${name}_tmp_sort \
-o ${name}.bam \
${rgsam}
"""
}
bam_f.concat(bamfiles).into {finalbam; bamfiles_for_flagstats;}
// T2C Step 6
process samtools_flagstat {
publishDir "${outpath}/${params.pn}_stats/", mode: 'move'
tag{bam}
input:
set name, file (bam) from bamfiles_for_flagstats
output:
file ("${name}.flagstat.txt") into flagstats
when:
params.mode == "T2C"
script:
"""
${path_samtools}/samtools flagstat ${bam} > ${name}.flagstat.txt
"""
}
/*
* T2C Step 7
* index the BAM files
*/
process index_bamfiles {
tag{bam}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bai/", mode: 'move'
}
input:
set name, file (bam) from finalbam
output:
set name, file ("${name}.bam.bai"), file ("${bam}") into bam_bai_files
when:
params.mode == "T2C"
script:
"""
${path_samtools}/samtools index ${bam} > ${name}.bam.bai
"""
}
/*
* T2C Step 8
* get the reads overlapping the targets
*/
process overlapping_reads_targets {
tag{bam}
executor 'local'
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move'
}
input:
set name, file (bai), file (bam), file (res_target) from bam_bai_files.combine(final_res_map.filter{ it.simpleName == "restriction_targets"})
output:
set name, file ("${name}.target.bam") into target_bam_files
when:
params.mode == "T2C"
script:
"""
${path_python}/python ${path_bin}/bin/add_fragment_to_bam.py \
--input ${bam} \
--output ${name}.target.bam \
--bed ${res_target}
"""
}
/*
* T2C Step 9
* namesort the bam file in preparation for the matrix
*/
process namesorting_bam_files {
tag{target_bam}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move'
}
input:
set name, file (target_bam) from target_bam_files
output:
set name, file ("${name}.namesort.bam") into namesort_bam_files
when:
params.mode == "T2C"
script:
"""
${path_samtools}/samtools sort \
-n \
-o ${name}.namesort.bam \
${target_bam}
"""
}
/*
* T2C step 10
* SAMtools flags to filter aligments
* 0x0001 p the read is paired in sequencing
* 0x0004 u the query sequence itself is unmapped
* 0x0008 U the mate is unmapped
* 0x0100 s the alignment is not primary
* 0x0800 S the alignment is supplementary
*/
process filter_aligments {
tag{namesort_bam}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move'
}
input:
set name, file (namesort_bam) from namesort_bam_files
output:
set name , file ("${name}.filtered.bam") into filtered_bam_files
when:
params.mode == "T2C"
script:
"""
${path_samtools}/samtools view -h \
-f 0x0001 \
-F 0x0004 \
-F 0x0008 \
-F 0x0100 \
-F 0x0800 \
${namesort_bam} > ${name}.filtered.bam
"""
}
/*
* T2C step 11
* creating first ray matrix
*/
process create_matrix {
tag{filtered_bam}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
}
input:
set name , file (filtered_bam) from filtered_bam_files
output:
set name, file ("${name}.reads.bed") into reads_bed_files, reads_bed_files_for_stats
when:
params.mode == "T2C"
script:
"""
cat ${filtered_bam} | \
${path_python}/python ${path_bin}/bin/create_matrix.py -o ${name}.reads.bed
"""
}
/*
* T2C step 12
* finish matrix file by
* creating a paired-end bed file on single base-pair level
*/
process finish_raw_matrix {
tag{reads_bed}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
}
input:
set name, file (reads_bed) from reads_bed_files
output:
//set name_raw, file ("${name}.matrix.bed") into raw_matrix_bed_for_plots
set name, file ("${name}.matrix.bed") into raw_matrix_bed_to_reassign
when:
params.mode == "T2C"
script:
//name_raw = "${name}_raw"
"""
cat ${reads_bed} | \
sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n | \
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.matrix.bed
"""
}
/*
* T2C step 13
*
*/
process split_matrix_for_re_evaluation {
tag{matrix_bed}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
}
input:
set name, file (matrix_bed) from raw_matrix_bed_to_reassign
output:
set name, file ("${name}.loca.bed"), file ("${name}.locb.bed"), file (matrix_bed) into split_matrix
when:
params.mode == "T2C"
script:
"""
awk -F "\t" '//{print \$1 "\t" \$2 "\t" \$3}' ${matrix_bed} > ${name}.loca.bed
awk -F "\t" '//{print \$4 "\t" \$5 "\t" \$6}' ${matrix_bed} > ${name}.locb.bed
"""
}
/*
* T2C step 14
* Intersect bed file with fragments
*/
process intersect_beds_to_fragments {
tag{name}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
}
input:
set name, file (a_bed), file (b_bed), file (matrix), file (res_frag) from split_matrix.combine(final_res_map_for_intersection.filter{ it.simpleName == "restriction_fragments_${params.enzyme_a_name}"})
output:
set name, file ("${name}.frga.bed"), file ("${name}.frgb.bed"), file (matrix) into fragments_bed
when:
params.mode == "T2C"
script:
"""
sed 's/[[:blank:]]*\$\$//p' ${a_bed} | ${path_bedtools}/bedtools intersect -a ${a_bed} -b ${res_frag} -wao > ${name}.frga.bed
sed 's/[[:blank:]]*\$\$//p' ${b_bed} | ${path_bedtools}/bedtools intersect -a ${b_bed} -b ${res_frag} -wao > ${name}.frgb.bed
"""
}
/*
* T2C step 15
* creating pre normalized matrix
*/
process reassign_matrix {
tag{matrix}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move'
}
input:
set name, file (frga), file (frgb), file (matrix) from fragments_bed
output:
set name, file ("${name}.temp.bed") into temp_bed_file
when:
params.mode == "T2C"
script:
"""
${path_python}/python ${path_bin}/bin/reassign_matrix.py \
-m ${matrix} \
-a ${frga} \
-b ${frgb} > ${name}.temp.bed
"""
}
/*
* T2C step 16
* Aggregate Matrix
*/
process finalize_matrix {
tag{temp_bed}
publishDir "${outpath}/${params.pn}_Interactions/01_all_interactions_bed_raw/", mode: 'move', pattern: "${name_raw}.reassigned.bed"
input:
set name, file (temp_bed) from temp_bed_file
output:
set name, file ("${name_raw}.reassigned.bed") into reassigned_matrix
set name_raw, file ("${name_raw}.reassigned.bed") into reassigned_matrix_raw
when:
params.mode == "T2C"
script:
name_raw = "${name}_raw"
"""
cat ${temp_bed} | \
sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n |\
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name_raw}.reassigned.bed
"""
}
/*
* T2C step 17
* Normalize Matrix
*/
process normalize_matrix {
tag{matrix}
publishDir "${outpath}/${params.pn}_Interactions/02_all_interactions_bed_norm/", mode: 'move'
input:
set name, file (matrix) from reassigned_matrix
output:
set name, file ("${name}.normalized.bed") into normalized_matrix, multiplot_bed_2, filtered_interaction_bed_for_multiplot_2
when:
params.mode == "T2C"
script:
"""
Rscript ${path_bin}/bin/normalize_paired_bed.R --bedfile ${matrix} \
--outfile ${name}.normalized.bed \
--library ${path_bin}/RLIB/ \
--method ${params.norm_method}
"""
}
to_plot = matrix_to_plot.concat(normalized_matrix).concat(reassigned_matrix_raw)
/*
* T2C step 18
* Creating Interaction matrix plot
*/
process plotting {
tag{name}
if(params.mode == "plot" ){
publishDir "${outpath}/plot/", mode: 'move'
} else {
publishDir "${outpath}/${params.pn}_plots/interaction_plot/", mode: 'move'
}
input:
set name, file (matrix) from to_plot
output:
file ("*.pdf")
when:
params.mode == "T2C" || params.mode == "plot"
script:
"""
Rscript ${path_bin}/bin/plot_cis_matrix.R \
--bedfile ${matrix} \
--outfile ${name}.cis.pdf \
--library ${path_bin}/RLIB/ \
--chromosome ${params.chr} \
--start ${params.start} \
--end ${params.end} \
--colour-minimum ${params.score_min} \
--colour-maximum ${params.score_max} \
${params.plot_options_T2C}
"""
}
filtered_interaction_bed_merged = filtered_interaction_bed_for_multiplot_1.concat(filtered_interaction_bed_for_multiplot_2)
filtered_interaction_bed_final_merge = interaction_uropa.concat(filtered_interaction_bed_merged)
process create_interaction_table {
tag{name}
publishDir "${outpath}/${params.pn}_Interactions/03_target_region_interactions_bed/", mode: 'move'
input:
set name, file (norm) from filtered_interaction_bed_final_merge
output:
set name, file ("${name}.filtered.interactions.bed") into interaction_bed_for_merge, to_ifmatrix, for_uropa_1, for_uropa_2
when:
params.mode == "T2C" || params.mode == "uropa" || params.mode == "multiplot"
script:
"""
Rscript ${path_bin}/bin/create_interaction_table.R \
${norm} ${name}.filtered.interactions.bed \
${params.chr} ${params.start} ${params.end} ${params.int_score} \
$workflow.workDir/${name}.filtered.interactions.bed
"""
}
//interaction_empty.concat(interaction_check).into {for_uropa1; for_uropa2}
process create_stats {
tag{name}
publishDir "${outpath}/${params.pn}_stats/", mode: 'move'
input:
set name, file (reads) from reads_bed_files_for_stats
output:
file ("${name}_stats.pdf")
when:
params.mode == "T2C"
script:
"""
Rscript ${path_bin}/bin/create_T2C_stats.R \
${reads} ${name}_stats.pdf \
${params.chr} ${params.start} ${params.end} ${name}
"""
}
process create_uropa_config_1 {
tag{name}
input:
set name, file (int_bed) from for_uropa_1
output:
set name, file ("${name}_uropa_config") into uropa_config_1
when:
params.mode == "T2C" || params.mode == "uropa"
script:
config_parameters = [uropa_feature,uropa_anchor,uropa_strand,uropa_direction,
uropa_filter_attr,uropa_attr_value,uropa_show_attr]
conf_list = config_parameters.collect { par ->
return "\\\"" + par.replace(",","\\\",\\\"") + "\\\""
}
conf_list_final = conf_list.collect {p ->
if(p.split(",").size() > 1) {
return "[" + p + "]"
}
return p
}
"""
echo "{ \
\\"queries\\" : [ \
{ \
\\"feature\\" : ${conf_list_final[0]}, \
\\"feature.anchor\\" : ${conf_list_final[1]}, \
\\"distance\\" : [ \
${uropa_dist_1}, \
${uropa_dist_2} \
], \
\\"strand\\" : ${conf_list_final[2]}, \
\\"direction\\" : ${conf_list_final[3]}, \
\\"internals\\" : \\"True\\", \
\\"filter.attribute\\" : ${conf_list_final[4]}, \
\\"attribute.value\\" : ${conf_list_final[5]}, \
\\"show.attributes\\" : ${conf_list_final[6]} \
} \
], \
\\"priority\\" : \\"FALSE\\", \
\\"gtf\\" : \\"${path_gtf}\\", \
\\"bed\\" : \\"$workflow.workDir/${name}.filtered.interactions.bed\\" \
}" > ${name}_uropa_config
"""
}
process run_uropa_1 {
tag{name}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_uropa/", mode: 'move'
}
input:
set name, file (config) from uropa_config_1
output:
set name, file ("${name}_allhits.txt"), file ("${name}_finalhits.txt") into uropa_1
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
uropa -i ${config} -d -p ${name} -t ${params.uropa_threads}
"""
}
process swap_fragments {
tag{name}
input:
set name, file (int_bed) from for_uropa_2
output:
set name, file ("${name}.swap.interaction.bed") into swaped_interactions_bed
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
Rscript ${path_bin}/bin/swap_fragments.R ${int_bed} ${name}.swap.interaction.bed $workflow.workDir/${name}.swap.interaction.bed
"""
}
process create_uropa_config_2 {
tag{name}
input:
set name, file (int_bed) from swaped_interactions_bed
output:
set name, file ("${name}_uropa_config_2") into uropa_config_2
when:
params.mode == "T2C" || params.mode == "uropa"
script:
config_parameters = [uropa_feature,uropa_anchor,uropa_strand,uropa_direction,
uropa_filter_attr,uropa_attr_value,uropa_show_attr]
conf_list = config_parameters.collect { par ->
return "\\\"" + par.replace(",","\\\",\\\"") + "\\\""
}
conf_list_final = conf_list.collect {p ->
if(p.split(",").size() > 1) {
return "[" + p + "]"
}
return p
}
"""
echo "{ \
\\"queries\\" : [ \
{ \
\\"feature\\" : ${conf_list_final[0]}, \
\\"feature.anchor\\" : ${conf_list_final[1]}, \
\\"distance\\" : [ \
${uropa_dist_1}, \
${uropa_dist_2} \
], \
\\"strand\\" : ${conf_list_final[2]}, \
\\"direction\\" : ${conf_list_final[3]}, \
\\"internals\\" : \\"True\\", \
\\"filter.attribute\\" : ${conf_list_final[4]}, \
\\"attribute.value\\" : ${conf_list_final[5]}, \
\\"show.attributes\\" : ${conf_list_final[6]} \
} \
], \
\\"priority\\" : \\"FALSE\\", \
\\"gtf\\" : \\"${path_gtf}\\", \
\\"bed\\" : \\"$workflow.workDir/${name}.filtered.interactions.bed\\" \
}" > ${name}_uropa_config_2
"""
}
process run_uropa_2 {
tag{name}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_uropa/", mode: 'move'
}
input:
set name, file (config) from uropa_config_2
output:
set name, file ("${name}_2_allhits.txt"), file ("${name}_2_finalhits.txt") into uropa_2
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
uropa -i ${config} -d -p ${name}_2 -t ${params.uropa_threads}
"""
}
tmp = uropa_1.combine(uropa_2, by: 0)
final_uropa_set = tmp.combine(interaction_bed_for_merge, by: 0)
process uropa_merge {
tag{name}
publishDir "${outpath}/${params.pn}_Interactions/04_final_interactions_annotated/", mode: 'move'
input:
set name, file (allhits_1), file (finalhits_1), file (allhits_2), file (finalhits_2), file (int_bed) from final_uropa_set
output:
set name, file ("${name}.finalhits.merged.txt") into merged_uropa
file ("${name}.finalhits.merged.txt") into for_xlsx
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
Rscript ${path_bin}/bin/merge_uropa_interactions.R ${int_bed} ${finalhits_1} ${finalhits_2} ${name}.finalhits.merged.txt
"""
}
xlsx_list = for_xlsx.toSortedList()
process xlsx_writer {
publishDir "${outpath}/", mode: 'move'
input:
val f from xlsx_list
output:
file ("${params.pn}.xlsx")
when:
params.mode == "T2C" || params.mode == "uropa"
script:
file_list = f.sort {it.getBaseName()}
file_list_str = file_list.toListString().replace(', ',',') - "[" - "]"
"""
${path_python}/python ${path_bin}/bin/julianos_xlsxgen_vTOuCAN.py ${path_bin}/bin/julianos_config.json --out ${params.pn} --flist ${file_list_str}
"""
}
process create_viewpoint {
tag{name}
publishDir "${outpath}/${params.pn}_Interactions/05_interactions_viewpoint_annotated/", mode: 'move'
input:
set name, file (merged_finalhits) from merged_uropa
output:
file ("${name}.interactions.viewpoint.txt")
when:
params.mode == "T2C" || params.mode == "uropa"
script:
"""
Rscript ${path_bin}/bin/create_interaction_viewpoint.R ${merged_finalhits} ${name}.interactions.viewpoint.txt
"""
}
//to_ifmatrix = interaction_multiplot.concat(interaction_bed_TAD)
process convert_to_IFMatrix {
tag{name}
if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_additional_files/IFMatrix/", mode: 'move'
}
input:
set name, file (int_bed) from to_ifmatrix
output:
set name, file ("${name}.ifm.txt") into ifmatrix
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
"""
Rscript ${path_bin}/bin/convert_to_IFMatrix.R ${int_bed} ${name}.ifm.txt
"""
}
process run_robusTAD {
tag{name}
publishDir "${outpath}/${params.pn}_TAD/", mode: 'move'
input:
set name, file (tfm) from ifmatrix
output:
set name, file ("TADBoundaryCalls_*.txt") into tadbc, multiplot_TAD
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
"""
Rscript ${path_bin}/bin/RobusTAD.R -i ${tfm} --header FALSE -n norm
"""
}
process plot_TAD_graph {
tag{name}
publishDir "${outpath}/${params.pn}_plots/TAD/", mode: 'move'
input:
set name, file (tad) from tadbc
output:
file ("${name}.TAD.pdf")
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
"""
Rscript ${path_bin}/bin/plotTAD.R ${tad} ${name}.TAD.pdf
"""
}
multiplot_bed_merged = multiplot_bed_1.concat(multiplot_bed_2)
process multiplot {
tag{name}
publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'move'
input:
set name, file (tad) from multiplot_TAD
set name, file (bed) from multiplot_bed_merged
output:
file ("*.png") into for_pptx
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
"""
Rscript ${path_bin}/bin/plot_multi.R \
--bedfile ${bed} \
--outfile ${name}.multi.png \
--library ${path_bin}/RLIB/ \
--chromosome ${params.chr} \
--start ${params.start} \
--end ${params.end} \
--colour-minimum ${params.score_min} \
--colour-maximum ${params.score_max} \
--tad ${tad} \
--organism ${params.organism} \
${params.plot_options_T2C}
"""
}
for_pptx_list = for_pptx.toSortedList()
process pptx_writer {
publishDir "${outpath}/", mode: 'move'
input:
val pptxList from for_pptx_list
output:
file ("*.pptx")
when:
params.mode == "T2C" || params.mode == "multiplot"
script:
file_list = pptxList.sort {it.getBaseName()}
file_list_str = file_list.toListString().replace(', ',',') - "[" - "]"
"""
${path_python}/python ${path_bin}/bin/pptx_writer_vTOuCAN.py ${path_bin}/bin/pptx_config.json --output ${params.pn} --flist ${file_list_str}
"""
}
//================================= HiC ========================================
/*
process create_restriction_maps_for_HiC {
output:
file ("rest_site_${params.enzyme_a_name}.bed") into restriction_map_for_hic
when:
params.mode == "HiC"
script:
"""
findRestSite --fasta ${path_genome} --searchPattern ${params.enzyme_a_sequence} -o rest_site_${params.enzyme_a_name}.bed
"""
} */
/*
if(params.aln == "bwa"){
bowtie2_alignment = Channel.empty()
//histat_alignment = Channel.empty()
} else if (params.aln == "bowtie2") {
//histat_alignment = Channel.empty()
bwa_alignment = Channel.empty()
} else if (params.mode == "histat") {
bowtie2_alignment = Channel.empty()
bwa_alignment = Channel.empty()
} */
process alignment_with_bwa {
tag{fastq}
input:
set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bwa
file f from index_hic_bwa
output:
set basisname, file ("${fastqName}.bam") into bwa_alignment
when:
params.mode == "HiC" && params.aln == "bwa"
script:
"""
${path_bwa}/bwa mem \
${params.bwa_HiC_options} \
${path_genome} \
$fastq | ${path_samtools}/samtools view -Shb > ${fastqName}.bam
"""
}
process bowtie2_index {
when:
params.mode == "HiC" && params.aln == "bowtie2"
script:
genome_file = file (path_genome)
genome_name = genome_file.name
"""
${path_bowtie2}/bowtie2-build ${path_genome} ${genome_name}
"""
}
process alignment_with_bowtie2 {
tag{fastq}
input:
set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bowtie2
output:
set basisname, file ("${fastqName}.bam") into bowtie2_alignment
when:
params.mode == "HiC" && params.aln == "bowtie2"
script:
"""
${path_bowtie2}/bowtie2 -x ${path_genome} -U ${fastq} --very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder -p 12 \
| ${path_samtools}/samtools view -Shb - > ${fastqName}.bam
"""
}
hic_alignment = bwa_alignment.concat(bowtie2_alignment) //.concat(histat_alignment)
process create_HiC_matrix {
tag{name}
input:
set name, bams from hic_alignment.groupTuple()
output:
set name, file("${name}_matrix.h5") into hic_matrix, hic_matrix_for_diagnostic
file ("${name}.bam") into hic_bam
when:
params.mode == "HiC"
script:
bam_r1 = bams[0]
bam_r2 = bams[1]
"""
hicBuildMatrix --samFiles ${bam_r1} ${bam_r2} \
--QCfolder ${outpath}/QC_${name}/ \
--binSize ${params.bin} \
-b ${name}.bam \
--restrictionSequence ${params.enzyme_a_sequence} \
-o ${name}_matrix.h5
"""
}
process diagnostic_plot_of_HiC_Matrix {
publishDir "${outpath}/diagnostic_plots/", mode: 'move'
input:
set name, file (matrix) from hic_matrix_for_diagnostic
output:
set name, file ("${name}.png") // into hic_matrix_corrected
when:
params.mode == "HiC"
script:
"""
hicCorrectMatrix diagnostic_plot -m ${matrix} -o ${name}.png
"""
}
process correct_matrix {
publishDir "${outpath}/matrix/", mode: 'copy'
input:
set name, file (matrix) from hic_matrix
output:
set name, file("${name}_corrected.h5") into hic_matrix_corrected
script:
"""
hicCorrectMatrix correct -m ${matrix} --filterThreshold -1.5 5 -o ${name}_corrected.h5
"""
}
process plot_matrix {
publishDir "${params.out}/plot/", mode: 'move'
input:
set name, file (corrected_matrix) from hic_matrix_corrected
output:
file ("${name}.hic.matrix.png")
when:
params.mode == "HiC"
script:
"""
hicPlotMatrix -m ${corrected_matrix} -o ${name}.hic.matrix.png \
--chromosomeOrder chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY
"""
}