Skip to content

Commit

Permalink
Merge pull request #6 from loosolab/hic_dev
Browse files Browse the repository at this point in the history
bugfixes and minor changes
  • Loading branch information
renewiegandt authored Sep 24, 2018
2 parents 6fa758e + cde20dc commit 2497552
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 40 deletions.
93 changes: 59 additions & 34 deletions TOuCAN.nf
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi
--hic_thresh_min [(-)INT] - Threshold filter miniumum [default: -1.5]
--hic_thresh_max [INT] - Threshold filter maximum [default: 5]
--inputBufferSize [INT] - Buffersize for creating hic matrix [default: 400000]
--bt2_index_threads [INT] - Number of threads used or creating bt2 index
Skip Aligment -> BAM files as Input:
The BAM files need to be from this Pipeline with follwing
Expand Down Expand Up @@ -172,10 +171,10 @@ Path to bin: ${path_bin}
Path to genome: ${path_genome}
Path to restriction maps: ${path_T2C_restriction_maps}
"""
if ( ! params.bam == "" ){
log.info """
Path to bam files: ${params.bam}
"""
if ( "${params.bam}" != "" ){
log.info"""
Path to bam files: ${params.bam}
"""
}

log.info """
Expand All @@ -197,7 +196,7 @@ multiplot_bed_1 = Channel.empty()
matrix_to_plot = Channel.empty()
res_map = Channel.empty()
bam_f = Channel.empty()
bam_f2 = Channel.empty()
bam2 = Channel.empty()
interaction_uropa = Channel.empty()
fastqGzFiles = Channel.empty()
hic_matrix_f = Channel.empty()
Expand Down Expand Up @@ -242,8 +241,15 @@ if (params.mode != "plot"){
Channel
.fromPath("${params.bam}/*.bam")
.map {it -> [it.simpleName, it]}
.into {bam_f; bam_f2}
.set {bam_f}
create_bam = false
if("${params.mode}" == "HiC"){
Channel
.fromPath("${params.bam}/*.bam")
.map {it -> [it.simpleName.replaceAll(params.sample_extension, ""), it]}
.set {bam2}
create_bam = false
}
} else if ("${params.mode}" == "HiC" & "${params.mode}" == "h5") {
hic_matrix_f = Channel
.fromPath("${params.in}/*.h5")
Expand Down Expand Up @@ -437,7 +443,7 @@ process decompress {

output:
set basisname, fastqName ,file ("${fastqName}.fastq") into decompressedfastq, decompressedfastq_for_HiC_bwa,
decompressedfastq_for_HiC_bowtie2, decompressedfastq_for_HiC_histat
decompressedfastq_for_HiC_bowtie2

script:
fastqName = fastqGz.simpleName
Expand Down Expand Up @@ -675,7 +681,7 @@ process index_bamfiles {
set name, file (bam) from finalbam

output:
set name, file ("${name}.bam.bai") into bam_bai_files, test
set name, file ("${name}.bam.bai") into bam_bai_files

when:
params.mode == "T2C"
Expand Down Expand Up @@ -960,19 +966,17 @@ process finalize_matrix {
set name, file (temp_bed) from temp_bed_file

output:
set name, file ("${name_raw}.reassigned.bed") into reassigned_matrix
set name_raw, file ("${name_raw}.reassigned.bed") into reassigned_matrix_raw
set name, file ("${name}.reassigned.bed") into reassigned_matrix

when:
params.mode == "T2C"
params.reassin == 1

script:
name_raw = "${name}_raw"
"""
cat ${temp_bed} | \
sort -k 1,1 -k 2,2n -k 4,4 -k 5,5n |\
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name_raw}.reassigned.bed
${path_python}/python ${path_bin}/bin/aggregate_matrix.py -o ${name}.reassigned.bed
"""
}

Expand Down Expand Up @@ -1008,7 +1012,7 @@ process normalize_matrix {
--method ${params.norm_method}
"""
}
to_plot = matrix_to_plot.concat(normalized_matrix).concat(reassigned_matrix_raw)
to_plot = matrix_to_plot.concat(normalized_matrix)


/*
Expand Down Expand Up @@ -1159,7 +1163,7 @@ process create_uropa_config_1 {
process run_uropa_1 {

tag{name}
conda "$workflow.projectDir/uropa.yaml"
//conda "$workflow.projectDir/envtoucan.yaml"

if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy'
Expand Down Expand Up @@ -1258,7 +1262,7 @@ process create_uropa_config_2 {
process run_uropa_2 {

tag{name}
conda "$workflow.projectDir/uropa.yaml"
//conda "$workflow.projectDir/envtoucan.yaml"

if(params.safe_all_files == 1){
publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy'
Expand Down Expand Up @@ -1446,7 +1450,11 @@ process plot_TAD_graph {

}

multiplot_bed_merged = multiplot_bed_1.concat(multiplot_bed_2)
multiplot_bed_1.concat(multiplot_bed_2).into {multiplot_bed_merged; testa}

multiplot_TAD.join(multiplot_bed_merged).into {multi; testb}
testa.println()
testb.println()

process multiplot {

Expand All @@ -1455,8 +1463,7 @@ process multiplot {
publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'copy'

input:
set name, file (tad) from multiplot_TAD
set name, file (bed) from multiplot_bed_merged
set name, file (tad), file (bed) from multi

output:
file ("*.png") into for_pptx
Expand Down Expand Up @@ -1522,8 +1529,7 @@ process alignment_with_bwa {
set basisname, file ("${fastqName}.bam") into bwa_alignment

when:
params.mode == "HiC" && params.aln == "bwa"
create_bam == true
params.mode == "HiC" && params.aln == "bwa" && create_bam == true

script:
"""
Expand All @@ -1549,9 +1555,26 @@ process bowtie2_index {
script:
genome_file = file (path_genome)
genome_name = genome_file.name
"""
bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads}
"""

indx = file ("${path_genome}").getBaseName()
path = file ("${path_genome}").getParent()
i = "${path}/bowtie2/${indx}"

bt1 = file("${i}.1.bt2")
bt2 = file("${i}.2.bt2")
bt3 = file("${i}.3.bt2")
bt4 = file("${i}.4.bt2")
rev1 = file("${i}.rev.1.bt2")
rev2 = file("${i}.rev.2.bt2")

if(bt1.exists() && bt2.exists() && bt3.exists() && bt4.exists() && rev1.exists() && rev2.exists())
"""
echo "Skiped creating bt2 index"
"""
else
"""
bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads}
"""
}


Expand All @@ -1562,23 +1585,24 @@ process alignment_with_bowtie2 {
conda "$workflow.projectDir/envtoucan.yaml"

input:
set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bowtie2
val bt2_idx from index_bt2
set basisname, fastqName ,file (fastq), bt2_idx from decompressedfastq_for_HiC_bowtie2.combine(index_bt2)

output:
set basisname, file ("${fastqName}.bam") into bowtie2_alignment
set basisname, file ("${fastqName}.sam") into bowtie2_alignment

when:
params.mode == "HiC" && params.aln == "bowtie2" && create_bam == true

script:
indx = file ("${path_genome}").getBaseName()
path = file ("${path_genome}").getParent()
i = "${path}/bowtie2/${indx}"
"""
bowtie2 -x ${path_genome} -U ${fastq} --very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder -p 12 \
| samtools view -Shb - > ${fastqName}.bam
bowtie2 -x ${i} -U ${fastq} --very-sensitive --reorder -p ${params.threads_bowtie2} > ${fastqName}.sam
"""
}

hic_alignment = bam_f2.concat(bwa_alignment.concat(bowtie2_alignment)) //.concat(histat_alignment)
bam2.concat(bwa_alignment, bowtie2_alignment).set {hic_alignment}

process create_HiC_matrix {

Expand All @@ -1590,7 +1614,7 @@ process create_HiC_matrix {

output:
set name, file("${name}_matrix.h5") into hic_matrix, hic_matrix_for_diagnostic
file ("${name}.bam") into hic_bam
file ("${name}.bam")

when:
params.mode == "HiC"
Expand All @@ -1606,11 +1630,12 @@ process create_HiC_matrix {
}
"""
hicBuildMatrix --samFiles ${bam_r1} ${bam_r2} \
--QCfolder ${outpath}/QC_${name}/ \
--QCfolder ${outpath}/${params.pn}_QC_${name}/ \
--binSize ${params.bin} \
-b ${name}.bam \
--inputBufferSize ${params.inputBufferSize} \
--restrictionSequence ${params.enzyme_a_sequence} \
--threads ${params.hicBuildMatrix_threads}\
-o ${name}_matrix.h5
"""
Expand All @@ -1626,7 +1651,7 @@ process diagnostic_plot_of_HiC_Matrix {
set name, file (matrix) from hic_matrix_for_diagnostic

output:
set name, file ("${name}.png") // into hic_matrix_corrected
set name, file ("${name}.png")

when:
params.mode == "HiC"
Expand All @@ -1642,7 +1667,7 @@ hic_matrix_final = hic_matrix_f.concat(hic_matrix)
process correct_matrix {

tag{name}
publishDir "${outpath}/${params.pn}_h5_matrix/", mode: 'copy'
publishDir "${outpath}/${params.pn}_h5_matrix/corrected/", mode: 'copy'

input:
set name, file (matrix) from hic_matrix_final
Expand Down
4 changes: 3 additions & 1 deletion envtoucan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ dependencies:
- bwa=0.7.15
- samtools=1.3.1
- bedtools=2.27.1
- bowtie2=2.3.3.1
- bowtie2=2.3.4.1
- python=2.7
- perl=5.26.0
12 changes: 7 additions & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ env {
}

params {
sample_extension = "_R[12]_001" // regex for sample extension [e.g "_R[12]_001" or "_R[12]"]
sample_extension = "_R[12]" // regex for sample extension [e.g "_R[12]_001" or "_R[12]"]

// Enzyme Information
// -------------------------------------------
Expand All @@ -26,23 +26,25 @@ params {

// Minor fixed parameters for BWA and SAMtools
// -------------------------------------------
bwa_T2C_options = "-t 16" // bwa aln
sort_options = "--threads 16"
bwa_T2C_options = "-t 32" // bwa aln
sort_options = "--threads 32"
library_label = "capture"
platform_label = "ILLUMINA"
center_label = "ECB"

//Parameter for normalization and plotting T2C
// -------------------------------------------
plot_options_T2C = ""
norm_method = "FPM" //log, fpm, array and none
norm_method = "array" //log, fpm, array and none

uropa_threads = 32

//Parameter for HiC matrix
//--------------------------------------------
hicBuildMatrix_options = "--threads 16 --inputBufferSize 100000"
hicBuildMatrix_threads = 16
bwa_HiC_options = "-t 4" // bwa mem
threads_bowtie2 = 12
inputBufferSize = 400000

//Parameter for uropa configuration
uropa_feature = "" // "String,String,String,..."
Expand Down

0 comments on commit 2497552

Please sign in to comment.