Skip to content

bugfixes and minor changes #6

Merged
merged 1 commit into from
Sep 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
93 changes: 59 additions & 34 deletions TOuCAN.nf
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi
--hic_thresh_min [(-)INT] - Threshold filter miniumum [default: -1.5]
--hic_thresh_max [INT] - Threshold filter maximum [default: 5]
--inputBufferSize [INT] - Buffersize for creating hic matrix [default: 400000]
--bt2_index_threads [INT] - Number of threads used or creating bt2 index

Skip Aligment -> BAM files as Input:
The BAM files need to be from this Pipeline with follwing
Expand Down Expand Up @@ -172,10 +171,10 @@ Path to bin: ${path_bin}
Path to genome: ${path_genome}
Path to restriction maps: ${path_T2C_restriction_maps}
"""
if ( ! params.bam == "" ){
log.info """
Path to bam files: ${params.bam}
"""
if ( "${params.bam}" != "" ){
log.info"""
Path to bam files: ${params.bam}
"""
}

log.info """
Expand All @@ -197,7 +196,7 @@ multiplot_bed_1 = Channel.empty()
matrix_to_plot = Channel.empty()
res_map = Channel.empty()
bam_f = Channel.empty()
bam_f2 = Channel.empty()
bam2 = Channel.empty()
interaction_uropa = Channel.empty()
fastqGzFiles = Channel.empty()
hic_matrix_f = Channel.empty()
Expand Down Expand Up @@ -242,8 +241,15 @@ if (params.mode != "plot"){
Channel
.fromPath("${params.bam}/*.bam")
.map {it -> [it.simpleName, it]}
.into {bam_f; bam_f2}
.set {bam_f}
create_bam = false
if("${params.mode}" == "HiC"){
Channel
.fromPath("${params.bam}/*.bam")
.map {it -> [it.simpleName.replaceAll(params.sample_extension, ""), it]}
.set {bam2}
create_bam = false
}
} else if ("${params.mode}" == "HiC" & "${params.mode}" == "h5") {
hic_matrix_f = Channel
.fromPath("${params.in}/*.h5")
Expand Down Expand Up @@ -437,7 +443,7 @@ process decompress {

output:
set basisname, fastqName ,file ("${fastqName}.fastq") into decompressedfastq, decompressedfastq_for_HiC_bwa,
decompressedfastq_for_HiC_bowtie2, decompressedfastq_for_HiC_histat
decompressedfastq_for_HiC_bowtie2

script:
fastqName = fastqGz.simpleName
Expand Down Expand Up @@ -675,7 +681,7 @@ process index_bamfiles {
set name, file (bam) from finalbam

output:
set name, file ("${name}.bam.bai") into bam_bai_files, test
set name, file ("${name}.bam.bai") into bam_bai_files

when:
params.mode == "T2C"
Expand Down Expand Up @@ -960,19 +966,17 @@ process finalize_matrix {
set name, file (temp_bed) from temp_bed_file

output:
set name, file ("${name_raw}.reassigned.bed") into reassigned_matrix
set name_raw, file ("${name_raw}.reassigned.bed") into reassigned_matrix_raw
set name, file ("${name}.reassigned.bed") into reassigned_matrix

when:
params.mode == "T2C"
params.reassin == 1

script:
name_raw = "${name}_raw"
"""
cat ${temp_bed} | \
sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n |\
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name_raw}.reassigned.bed
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.reassigned.bed
"""
}

Expand Down Expand Up @@ -1008,7 +1012,7 @@ process normalize_matrix {
--method ${params.norm_method}
"""
}
to_plot = matrix_to_plot.concat(normalized_matrix).concat(reassigned_matrix_raw)
to_plot = matrix_to_plot.concat(normalized_matrix)


/*
Expand Down Expand Up @@ -1159,7 +1163,7 @@ process create_uropa_config_1 {
process run_uropa_1 {

tag{name}
conda "$workflow.projectDir/uropa.yaml"
//conda "$workflow.projectDir/envtoucan.yaml"

if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy'
Expand Down Expand Up @@ -1258,7 +1262,7 @@ process create_uropa_config_2 {
process run_uropa_2 {

tag{name}
conda "$workflow.projectDir/uropa.yaml"
//conda "$workflow.projectDir/envtoucan.yaml"

if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy'
Expand Down Expand Up @@ -1446,7 +1450,11 @@ process plot_TAD_graph {

}

multiplot_bed_merged = multiplot_bed_1.concat(multiplot_bed_2)
multiplot_bed_1.concat(multiplot_bed_2).into {multiplot_bed_merged; testa}

multiplot_TAD.join(multiplot_bed_merged).into {multi; testb}
testa.println()
testb.println()

process multiplot {

Expand All @@ -1455,8 +1463,7 @@ process multiplot {
publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'copy'

input:
set name, file (tad) from multiplot_TAD
set name, file (bed) from multiplot_bed_merged
set name, file (tad), file (bed) from multi

output:
file ("*.png") into for_pptx
Expand Down Expand Up @@ -1522,8 +1529,7 @@ process alignment_with_bwa {
set basisname, file ("${fastqName}.bam") into bwa_alignment

when:
params.mode == "HiC" && params.aln == "bwa"
create_bam == true
params.mode == "HiC" && params.aln == "bwa" && create_bam == true

script:
"""
Expand All @@ -1549,9 +1555,26 @@ process bowtie2_index {
script:
genome_file = file (path_genome)
genome_name = genome_file.name
"""
bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads}
"""

indx = file ("${path_genome}").getBaseName()
path = file ("${path_genome}").getParent()
i = "${path}/bowtie2/${indx}"

bt1 = file("${i}.1.bt2")
bt2 = file("${i}.2.bt2")
bt3 = file("${i}.3.bt2")
bt4 = file("${i}.4.bt2")
rev1 = file("${i}.rev.1.bt2")
rev2 = file("${i}.rev.2.bt2")

if(bt1.exists() && bt2.exists() && bt3.exists() && bt4.exists() && rev1.exists() && rev2.exists())
"""
echo "Skiped creating bt2 index"
"""
else
"""
bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads}
"""
}


Expand All @@ -1562,23 +1585,24 @@ process alignment_with_bowtie2 {
conda "$workflow.projectDir/envtoucan.yaml"

input:
set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bowtie2
val bt2_idx from index_bt2
set basisname, fastqName ,file (fastq), bt2_idx from decompressedfastq_for_HiC_bowtie2.combine(index_bt2)

output:
set basisname, file ("${fastqName}.bam") into bowtie2_alignment
set basisname, file ("${fastqName}.sam") into bowtie2_alignment

when:
params.mode == "HiC" && params.aln == "bowtie2" && create_bam == true

script:
indx = file ("${path_genome}").getBaseName()
path = file ("${path_genome}").getParent()
i = "${path}/bowtie2/${indx}"
"""
bowtie2 -x ${path_genome} -U ${fastq} --very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder -p 12 \
| samtools view -Shb - > ${fastqName}.bam
bowtie2 -x ${i} -U ${fastq} --very-sensitive --reorder -p ${params.threads_bowtie2} > ${fastqName}.sam
"""
}

hic_alignment = bam_f2.concat(bwa_alignment.concat(bowtie2_alignment)) //.concat(histat_alignment)
bam2.concat(bwa_alignment, bowtie2_alignment).set {hic_alignment}

process create_HiC_matrix {

Expand All @@ -1590,7 +1614,7 @@ process create_HiC_matrix {

output:
set name, file("${name}_matrix.h5") into hic_matrix, hic_matrix_for_diagnostic
file ("${name}.bam") into hic_bam
file ("${name}.bam")

when:
params.mode == "HiC"
Expand All @@ -1606,11 +1630,12 @@ process create_HiC_matrix {
}
"""
hicBuildMatrix --samFiles ${bam_r1} ${bam_r2} \
--QCfolder ${outpath}/QC_${name}/ \
--QCfolder ${outpath}/${params.pn}_QC_${name}/ \
--binSize ${params.bin} \
-b ${name}.bam \
--inputBufferSize ${params.inputBufferSize} \
--restrictionSequence ${params.enzyme_a_sequence} \
--threads ${params.hicBuildMatrix_threads}\
-o ${name}_matrix.h5

"""
Expand All @@ -1626,7 +1651,7 @@ process diagnostic_plot_of_HiC_Matrix {
set name, file (matrix) from hic_matrix_for_diagnostic

output:
set name, file ("${name}.png") // into hic_matrix_corrected
set name, file ("${name}.png")

when:
params.mode == "HiC"
Expand All @@ -1642,7 +1667,7 @@ hic_matrix_final = hic_matrix_f.concat(hic_matrix)
process correct_matrix {

tag{name}
publishDir "${outpath}/${params.pn}_h5_matrix/", mode: 'copy'
publishDir "${outpath}/${params.pn}_h5_matrix/corrected/", mode: 'copy'

input:
set name, file (matrix) from hic_matrix_final
Expand Down
4 changes: 3 additions & 1 deletion envtoucan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ dependencies:
- bwa=0.7.15
- samtools=1.3.1
- bedtools=2.27.1
- bowtie2=2.3.3.1
- bowtie2=2.3.4.1
- python=2.7
- perl=5.26.0
12 changes: 7 additions & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ env {
}

params {
sample_extension = "_R[12]_001" // regex for sample extension [e.g "_R[12]_001" or "_R[12]"]
sample_extension = "_R[12]" // regex for sample extension [e.g "_R[12]_001" or "_R[12]"]

// Enzyme Information
// -------------------------------------------
Expand All @@ -26,23 +26,25 @@ params {

// Minor fixed parameters for BWA and SAMtools
// -------------------------------------------
bwa_T2C_options = "-t 16" // bwa aln
sort_options = "--threads 16"
bwa_T2C_options = "-t 32" // bwa aln
sort_options = "--threads 32"
library_label = "capture"
platform_label = "ILLUMINA"
center_label = "ECB"

//Parameter for normalization and plotting T2C
// -------------------------------------------
plot_options_T2C = ""
norm_method = "FPM" //log, fpm, array and none
norm_method = "array" //log, fpm, array and none

uropa_threads = 32

//Parameter for HiC matrix
//--------------------------------------------
hicBuildMatrix_options = "--threads 16 --inputBufferSize 100000"
hicBuildMatrix_threads = 16
bwa_HiC_options = "-t 4" // bwa mem
threads_bowtie2 = 12
inputBufferSize = 400000

//Parameter for uropa configuration
uropa_feature = "" // "String,String,String,..."
Expand Down