From cde20dca3289366ccb86185aed03cced88b2358b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Wiegandt?= Date: Mon, 24 Sep 2018 05:08:30 -0400 Subject: [PATCH] bugfixes and minor changes --- TOuCAN.nf | 93 +++++++++++++++++++++++++++++++------------------ envtoucan.yaml | 4 ++- nextflow.config | 12 ++++--- 3 files changed, 69 insertions(+), 40 deletions(-) diff --git a/TOuCAN.nf b/TOuCAN.nf index a2a346b..d03061b 100644 --- a/TOuCAN.nf +++ b/TOuCAN.nf @@ -116,7 +116,6 @@ Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi --hic_thresh_min [(-)INT] - Threshold filter miniumum [default: -1.5] --hic_thresh_max [INT] - Threshold filter maximum [default: 5] --inputBufferSize [INT] - Buffersize for creating hic matrix [default: 400000] - --bt2_index_threads [INT] - Number of threads used or creating bt2 index Skip Aligment -> BAM files as Input: The BAM files need to be from this Pipeline with follwing @@ -172,10 +171,10 @@ Path to bin: ${path_bin} Path to genome: ${path_genome} Path to restriction maps: ${path_T2C_restriction_maps} """ -if ( ! params.bam == "" ){ - log.info """ - Path to bam files: ${params.bam} - """ +if ( "${params.bam}" != "" ){ +log.info""" +Path to bam files: ${params.bam} +""" } log.info """ @@ -197,7 +196,7 @@ multiplot_bed_1 = Channel.empty() matrix_to_plot = Channel.empty() res_map = Channel.empty() bam_f = Channel.empty() -bam_f2 = Channel.empty() +bam2 = Channel.empty() interaction_uropa = Channel.empty() fastqGzFiles = Channel.empty() hic_matrix_f = Channel.empty() @@ -242,8 +241,15 @@ if (params.mode != "plot"){ Channel .fromPath("${params.bam}/*.bam") .map {it -> [it.simpleName, it]} - .into {bam_f; bam_f2} + .set {bam_f} create_bam = false + if("${params.mode}" == "HiC"){ + Channel + .fromPath("${params.bam}/*.bam") + .map {it -> [it.simpleName.replaceAll(params.sample_extension, ""), it]} + .set {bam2} + create_bam = false + } } else if ("${params.mode}" == "HiC" & "${params.mode}" == "h5") { hic_matrix_f = Channel .fromPath("${params.in}/*.h5") @@ -437,7 +443,7 @@ process decompress { output: set basisname, fastqName ,file ("${fastqName}.fastq") into decompressedfastq, decompressedfastq_for_HiC_bwa, - decompressedfastq_for_HiC_bowtie2, decompressedfastq_for_HiC_histat + decompressedfastq_for_HiC_bowtie2 script: fastqName = fastqGz.simpleName @@ -675,7 +681,7 @@ process index_bamfiles { set name, file (bam) from finalbam output: - set name, file ("${name}.bam.bai") into bam_bai_files, test + set name, file ("${name}.bam.bai") into bam_bai_files when: params.mode == "T2C" @@ -960,19 +966,17 @@ process finalize_matrix { set name, file (temp_bed) from temp_bed_file output: - set name, file ("${name_raw}.reassigned.bed") into reassigned_matrix - set name_raw, file ("${name_raw}.reassigned.bed") into reassigned_matrix_raw + set name, file ("${name}.reassigned.bed") into reassigned_matrix when: params.mode == "T2C" params.reassin == 1 script: - name_raw = "${name}_raw" """ cat ${temp_bed} | \ sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n |\ - ${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name_raw}.reassigned.bed + ${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.reassigned.bed """ } @@ -1008,7 +1012,7 @@ process normalize_matrix { --method ${params.norm_method} """ } -to_plot = matrix_to_plot.concat(normalized_matrix).concat(reassigned_matrix_raw) +to_plot = matrix_to_plot.concat(normalized_matrix) /* @@ -1159,7 +1163,7 @@ process create_uropa_config_1 { process run_uropa_1 { tag{name} - conda "$workflow.projectDir/uropa.yaml" + //conda "$workflow.projectDir/envtoucan.yaml" if(params.safe_all_files == 1){ publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy' @@ -1258,7 +1262,7 @@ process create_uropa_config_2 { process run_uropa_2 { tag{name} - conda "$workflow.projectDir/uropa.yaml" + //conda "$workflow.projectDir/envtoucan.yaml" if(params.safe_all_files == 1){ publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy' @@ -1446,7 +1450,11 @@ process plot_TAD_graph { } -multiplot_bed_merged = multiplot_bed_1.concat(multiplot_bed_2) +multiplot_bed_1.concat(multiplot_bed_2).into {multiplot_bed_merged; testa} + +multiplot_TAD.join(multiplot_bed_merged).into {multi; testb} +testa.println() +testb.println() process multiplot { @@ -1455,8 +1463,7 @@ process multiplot { publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'copy' input: - set name, file (tad) from multiplot_TAD - set name, file (bed) from multiplot_bed_merged + set name, file (tad), file (bed) from multi output: file ("*.png") into for_pptx @@ -1522,8 +1529,7 @@ process alignment_with_bwa { set basisname, file ("${fastqName}.bam") into bwa_alignment when: - params.mode == "HiC" && params.aln == "bwa" - create_bam == true + params.mode == "HiC" && params.aln == "bwa" && create_bam == true script: """ @@ -1549,9 +1555,26 @@ process bowtie2_index { script: genome_file = file (path_genome) genome_name = genome_file.name - """ - bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads} - """ + + indx = file ("${path_genome}").getBaseName() + path = file ("${path_genome}").getParent() + i = "${path}/bowtie2/${indx}" + + bt1 = file("${i}.1.bt2") + bt2 = file("${i}.2.bt2") + bt3 = file("${i}.3.bt2") + bt4 = file("${i}.4.bt2") + rev1 = file("${i}.rev.1.bt2") + rev2 = file("${i}.rev.2.bt2") + + if(bt1.exists() && bt2.exists() && bt3.exists() && bt4.exists() && rev1.exists() && rev2.exists()) + """ + echo "Skiped creating bt2 index" + """ + else + """ + bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads} + """ } @@ -1562,23 +1585,24 @@ process alignment_with_bowtie2 { conda "$workflow.projectDir/envtoucan.yaml" input: - set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bowtie2 - val bt2_idx from index_bt2 + set basisname, fastqName ,file (fastq), bt2_idx from decompressedfastq_for_HiC_bowtie2.combine(index_bt2) output: - set basisname, file ("${fastqName}.bam") into bowtie2_alignment + set basisname, file ("${fastqName}.sam") into bowtie2_alignment when: params.mode == "HiC" && params.aln == "bowtie2" && create_bam == true script: + indx = file ("${path_genome}").getBaseName() + path = file ("${path_genome}").getParent() + i = "${path}/bowtie2/${indx}" """ - bowtie2 -x ${path_genome} -U ${fastq} --very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder -p 12 \ - | samtools view -Shb - > ${fastqName}.bam + bowtie2 -x ${i} -U ${fastq} --very-sensitive --reorder -p ${params.threads_bowtie2} > ${fastqName}.sam """ } -hic_alignment = bam_f2.concat(bwa_alignment.concat(bowtie2_alignment)) //.concat(histat_alignment) +bam2.concat(bwa_alignment, bowtie2_alignment).set {hic_alignment} process create_HiC_matrix { @@ -1590,7 +1614,7 @@ process create_HiC_matrix { output: set name, file("${name}_matrix.h5") into hic_matrix, hic_matrix_for_diagnostic - file ("${name}.bam") into hic_bam + file ("${name}.bam") when: params.mode == "HiC" @@ -1606,11 +1630,12 @@ process create_HiC_matrix { } """ hicBuildMatrix --samFiles ${bam_r1} ${bam_r2} \ - --QCfolder ${outpath}/QC_${name}/ \ + --QCfolder ${outpath}/${params.pn}_QC_${name}/ \ --binSize ${params.bin} \ -b ${name}.bam \ --inputBufferSize ${params.inputBufferSize} \ --restrictionSequence ${params.enzyme_a_sequence} \ + --threads ${params.hicBuildMatrix_threads}\ -o ${name}_matrix.h5 """ @@ -1626,7 +1651,7 @@ process diagnostic_plot_of_HiC_Matrix { set name, file (matrix) from hic_matrix_for_diagnostic output: - set name, file ("${name}.png") // into hic_matrix_corrected + set name, file ("${name}.png") when: params.mode == "HiC" @@ -1642,7 +1667,7 @@ hic_matrix_final = hic_matrix_f.concat(hic_matrix) process correct_matrix { tag{name} - publishDir "${outpath}/${params.pn}_h5_matrix/", mode: 'copy' + publishDir "${outpath}/${params.pn}_h5_matrix/corrected/", mode: 'copy' input: set name, file (matrix) from hic_matrix_final diff --git a/envtoucan.yaml b/envtoucan.yaml index ee602a2..cafe73c 100644 --- a/envtoucan.yaml +++ b/envtoucan.yaml @@ -7,4 +7,6 @@ dependencies: - bwa=0.7.15 - samtools=1.3.1 - bedtools=2.27.1 - - bowtie2=2.3.3.1 + - bowtie2=2.3.4.1 + - python=2.7 + - perl=5.26.0 diff --git a/nextflow.config b/nextflow.config index 03f9dd4..d7960c4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,7 @@ env { } params { - sample_extension = "_R[12]_001" // regex for sample extension [e.g "_R[12]_001" or "_R[12]"] + sample_extension = "_R[12]" // regex for sample extension [e.g "_R[12]_001" or "_R[12]"] // Enzyme Information // ------------------------------------------- @@ -26,8 +26,8 @@ params { // Minor fixed parameters for BWA and SAMtools // ------------------------------------------- - bwa_T2C_options = "-t 16" // bwa aln - sort_options = "--threads 16" + bwa_T2C_options = "-t 32" // bwa aln + sort_options = "--threads 32" library_label = "capture" platform_label = "ILLUMINA" center_label = "ECB" @@ -35,14 +35,16 @@ params { //Parameter for normalization and plotting T2C // ------------------------------------------- plot_options_T2C = "" - norm_method = "FPM" //log, fpm, array and none + norm_method = "array" //log, fpm, array and none uropa_threads = 32 //Parameter for HiC matrix //-------------------------------------------- - hicBuildMatrix_options = "--threads 16 --inputBufferSize 100000" + hicBuildMatrix_threads = 16 bwa_HiC_options = "-t 4" // bwa mem + threads_bowtie2 = 12 + inputBufferSize = 400000 //Parameter for uropa configuration uropa_feature = "" // "String,String,String,..."