diff --git a/RLIB/cis_matrix_body_multi.R b/RLIB/cis_matrix_body_multi.R index 7c5cc02..da00fa3 100644 --- a/RLIB/cis_matrix_body_multi.R +++ b/RLIB/cis_matrix_body_multi.R @@ -47,7 +47,7 @@ require(plyr, quietly = TRUE) require(RColorBrewer, quietly = TRUE) require(gridExtra, quietly = TRUE) require(grid, quietly = TRUE) -require(ggplot2 ,quietly = TRUE) +##require(ggplot2 ,quietly = TRUE) require(ggpmisc, quietly = TRUE) if ( !(require("ggbio")) ) { biocLite("ggbio") @@ -145,7 +145,7 @@ if(colour_range[2] == 0){ #} wh <- GenomicRanges::GRanges(region[["chr"]], IRanges::IRanges(as.integer(region[["start"]]),as.integer(region[["end"]]))) -tset <- ggbio::autoplot(txdb , which = wh, genome.axis = FALSE, names.expr = "tx_name") +tset <- ggbio::autoplot(txdb , which = wh, genome.axis = FALSE, names.expr = "gene_id") p2 <- tset@ggplot @@ -154,7 +154,8 @@ palette <- c( rgb(0,0,255,maxColorValue=255), rgb(152,20,18,maxColorValue=255), rgb(240,48,0,maxColorValue=255), - rgb(255,188,39,maxColorValue=255)) + rgb(255,188,39,maxColorValue=255), + rgb(124,252,0,maxColorValue=255)) # triangle plot message(paste("Constructing plot")) @@ -162,7 +163,7 @@ p <- ggplot2::ggplot() p <- p + geom_polygon(aes(x=x, y=y, group=group), fill="black", data=rotated_bg) if(add_borders_to_elements){ p <- p + geom_polygon(aes(x=x, y=y, group=group, fill=signal, colour=signal), data=rotated_data, size=0.2) - p <- p + scale_colour_gradientn(limits=colour_range, colours=palette, na.value = "green") + p <- p + scale_colour_gradientn(limits=colour_range, colours=palette, na.value = "green", guide = "colourbar") } else { p <- p + geom_polygon(aes(x=x, y=y, group=group, fill=signal), data=rotated_data) } diff --git a/TOuCAN.nf b/TOuCAN.nf index 812f73a..4b294f7 100644 --- a/TOuCAN.nf +++ b/TOuCAN.nf @@ -23,6 +23,10 @@ params.int_score = 50 params.aln = "bwa" params.bin = 100000 +params.hic_thresh_min = -1.5 +params.hic_thresh_max = 5 +params.inputBufferSize = 400000 +params.bt2_index_threads = 4 if(params.mode == "help" || params.mode == "h"){ log.info """ @@ -77,7 +81,8 @@ Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi --end [INT] - End of target region. --score_min [INT] - Score range: minimum. [default: 0] --score_max [INT] - Score range: maximium. [default: autoscale] - --pn [STRING] - Name of the Project [default: 'Project'] + --pn [STRING] - Name of the project [default: 'Project'] + --organsim [mm10,mm9,hg19] - Type of the genome --mode uropa - Uropa annoation [T2C] parameters: @@ -88,7 +93,7 @@ Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi --end [INT] - End of target region. --pn [STRING] - Name of the Project [default: 'Project'] ---mode multiplot - creating a plot with interaction map, TAD graph and gene annotation +--mode multiplot - Creating a plot with interaction map, TAD graph and gene annotation parameters: --in [PATH] - Path to directory with *.normalized.bed --out [PATH] - Path to output directory. @@ -105,6 +110,10 @@ Usage: nextflow run TOuCAN.nf --in [Input Path] --out [Output Path] --mode [Modi --out [PATH] - Path to output directory. --aln [bwa|bowtie2] - Choose alignment tool. [default: bwa] --bin [INT] - Binsize [default: 10000] + --hic_thresh_min [(-)INT] - Threshold filter miniumum [default: -1.5] + --hic_thresh_max [INT] - Threshold filter maximum [default: 5] + --inputBufferSize [INT] - Buffersize for creating hic matrix [default: 400000] + --bt2_index_threads [INT] - Number of threads used or creating bt2 index Skip Aligment -> BAM files as Input: The BAM files need to be from this Pipeline with follwing @@ -156,12 +165,6 @@ Chromosome: ${params.chr} Start: ${params.start} End: ${params.end} -Dependencies -Python: ${path_python} -BWA: ${path_bwa} -samtools: ${path_samtools} -bedtools: ${path_bedtools} - Path to bin: ${path_bin} Path to genome: ${path_genome} Path to restriction maps: ${path_T2C_restriction_maps} @@ -219,9 +222,14 @@ if(params.mode == "multiplot"){ if (params.mode != "plot"){ - if ( "${path_T2C_restriction_maps}" != "" ){ + if ( "${path_T2C_restriction_maps}" != ""){ + if (! file ("${path_T2C_restriction_maps}/restriction_targets.bed").exists()){ + println("Warning: restriction_target.bed does not exists on given path (path_T2C_restriction_maps in configuration file) -> starting to create restriction maps.") + create_res_map = true + } else { res_map = Channel.fromPath("${path_T2C_restriction_maps}/*.bed") create_res_map = false + } } else { create_res_map = true } @@ -384,7 +392,7 @@ process check_output_directory { */ process create_restriction_maps_for_T2C { - publishDir "${outpath}/${params.pn}_restriction_maps/", mode: 'move' + publishDir "${outpath}/${params.pn}_restriction_maps/", mode: 'copy' output: file ("*.bed") into restriction_maps @@ -451,7 +459,7 @@ process decompress { tag{fastqGz} if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_fastq/", mode: 'move', overwrite: 'false' + publishDir "${outpath}/${params.pn}_fastq/", mode: 'copy', overwrite: 'false' } input: @@ -481,6 +489,8 @@ process decompress { process create_bwa_index { + conda "$workflow.projectDir/envtoucan.yaml" + output: file ('f') into index file ('f') into index_hic_bwa @@ -503,7 +513,7 @@ process create_bwa_index { """ else """ - ${path_bwa}/bwa index ${path_genome} + bwa index ${path_genome} echo done > f """ @@ -513,12 +523,15 @@ process create_bwa_index { // T2C Step 2 process bwa_aln { + //conda 'bioconda::bwa=0.7.15' + conda "$workflow.projectDir/envtoucan.yaml" + tag{fastq} executor 'local' if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/sai/", mode: 'move', pattern: '*.sai' + publishDir "${outpath}/${params.pn}_additional_files/sai/", mode: 'copy', pattern: '*.sai' } input: @@ -533,7 +546,7 @@ process bwa_aln { script: """ - ${path_bwa}/bwa aln \ + bwa aln \ ${params.bwa_T2C_options} \ ${path_genome} \ $fastq > ${fastqName}.sai @@ -544,9 +557,9 @@ process bwa_aln { process bwa_sampe_to_create_sam { if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'copy' } - + conda "$workflow.projectDir/envtoucan.yaml" tag{name} input: @@ -560,8 +573,8 @@ process bwa_sampe_to_create_sam { create_bam == true script: - fullname = sai[0].simpleName - if(fullname.contains("_R1")){ + sortbasename = [sai[0].getBaseName(), sai[1].getBaseName()].sort() + if(sai[0].contains(sortbasename[0])){ sai_R1 = sai[0] fastq_R1 = fastq[0] sai_R2 = sai[1] @@ -574,7 +587,7 @@ process bwa_sampe_to_create_sam { } """ - ${path_bwa}/bwa sampe \ + bwa sampe \ -A \ ${path_genome} \ ${sai_R1} ${sai_R2} \ @@ -587,9 +600,10 @@ process bwa_sampe_to_create_sam { process samtools_addreplacerg { tag{sam} + conda "$workflow.projectDir/envtoucan.yaml" if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/sam/", mode: 'copy' } input: @@ -605,7 +619,7 @@ process samtools_addreplacerg { script: ds = "${path_working}/*.srt.bam" """ - ${path_samtools}/samtools addreplacerg \ + samtools addreplacerg \ -r "ID:${name}" \ -r "CN:${params.center_label}" \ -r "LB:${params.library_label}" \ @@ -622,8 +636,9 @@ process samtools_addreplacerg { process samtools_sort { executor 'local' + conda "$workflow.projectDir/envtoucan.yaml" - publishDir "${outpath}/${params.pn}_bam/", mode: 'move' + publishDir "${outpath}/${params.pn}_bam/", mode: 'copy' tag{rgsam} @@ -639,7 +654,7 @@ process samtools_sort { script: """ - ${path_samtools}/samtools sort \ + samtools sort \ ${params.sort_options} \ -T ${name}_tmp_sort \ -o ${name}.bam \ @@ -647,15 +662,15 @@ process samtools_sort { """ } -bam_f.concat(bamfiles).into {finalbam; bamfiles_for_flagstats;} +bam_f.concat(bamfiles).into {finalbam; bamfiles_for_flagstats; workaround} // T2C Step 6 process samtools_flagstat { - publishDir "${outpath}/${params.pn}_stats/", mode: 'move' - - tag{bam} + publishDir "${outpath}/${params.pn}_stats/", mode: 'copy' + conda "$workflow.projectDir/envtoucan.yaml" + tag{bam} input: set name, file (bam) from bamfiles_for_flagstats @@ -668,7 +683,7 @@ process samtools_flagstat { script: """ - ${path_samtools}/samtools flagstat ${bam} > ${name}.flagstat.txt + samtools flagstat ${bam} > ${name}.flagstat.txt """ } @@ -680,26 +695,28 @@ process samtools_flagstat { process index_bamfiles { tag{bam} + conda "$workflow.projectDir/envtoucan.yaml" if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bai/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bai/", mode: 'copy' } input: set name, file (bam) from finalbam output: - set name, file ("${name}.bam.bai"), file ("${bam}") into bam_bai_files + set name, file ("${name}.bam.bai") into bam_bai_files, test when: params.mode == "T2C" script: """ - ${path_samtools}/samtools index ${bam} > ${name}.bam.bai + samtools index ${bam} > ${name}.bam.bai """ } +bam_bai_files_final = bam_bai_files.combine(workaround, by: 0) /* * T2C Step 8 @@ -712,11 +729,11 @@ process overlapping_reads_targets { executor 'local' if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy' } input: - set name, file (bai), file (bam), file (res_target) from bam_bai_files.combine(final_res_map.filter{ it.simpleName == "restriction_targets"}) + set name, file (bai), file (bam), file (res_target) from bam_bai_files_final.combine(final_res_map.filter{ it.simpleName == "restriction_targets"}) output: set name, file ("${name}.target.bam") into target_bam_files @@ -741,9 +758,10 @@ process overlapping_reads_targets { process namesorting_bam_files { tag{target_bam} + conda "$workflow.projectDir/envtoucan.yaml" if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy' } input: @@ -757,7 +775,7 @@ process namesorting_bam_files { script: """ - ${path_samtools}/samtools sort \ + samtools sort \ -n \ -o ${name}.namesort.bam \ ${target_bam} @@ -777,9 +795,10 @@ process namesorting_bam_files { process filter_aligments { tag{namesort_bam} + conda "$workflow.projectDir/envtoucan.yaml" if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bam/", mode: 'copy' } input: @@ -793,7 +812,7 @@ process filter_aligments { script: """ - ${path_samtools}/samtools view -h \ + samtools view -h \ -f 0x0001 \ -F 0x0004 \ -F 0x0008 \ @@ -813,7 +832,7 @@ process create_matrix { tag{filtered_bam} if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' } input: @@ -842,7 +861,7 @@ process finish_raw_matrix { tag{reads_bed} if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' } input: @@ -874,7 +893,7 @@ process split_matrix_for_re_evaluation { tag{matrix_bed} if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' } input: @@ -903,7 +922,7 @@ process intersect_beds_to_fragments { tag{name} if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' } input: @@ -931,7 +950,7 @@ process reassign_matrix { tag{matrix} if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/bed/", mode: 'copy' } input: @@ -960,7 +979,7 @@ process finalize_matrix { tag{temp_bed} - publishDir "${outpath}/${params.pn}_Interactions/01_all_interactions_bed_raw/", mode: 'move', pattern: "${name_raw}.reassigned.bed" + publishDir "${outpath}/${params.pn}_Interactions/01_all_interactions_bed_raw/", mode: 'copy', pattern: "${name_raw}.reassigned.bed" input: set name, file (temp_bed) from temp_bed_file @@ -989,7 +1008,7 @@ process normalize_matrix { tag{matrix} - publishDir "${outpath}/${params.pn}_Interactions/02_all_interactions_bed_norm/", mode: 'move' + publishDir "${outpath}/${params.pn}_Interactions/02_all_interactions_bed_norm/", mode: 'copy' input: set name, file (matrix) from reassigned_matrix @@ -1002,7 +1021,7 @@ process normalize_matrix { script: """ - Rscript ${path_bin}/bin/normalize_paired_bed.R --bedfile ${matrix} \ + ${path_R}/Rscript ${path_bin}/bin/normalize_paired_bed.R --bedfile ${matrix} \ --outfile ${name}.normalized.bed \ --library ${path_bin}/RLIB/ \ --method ${params.norm_method} @@ -1020,9 +1039,9 @@ process plotting { tag{name} if(params.mode == "plot" ){ - publishDir "${outpath}/plot/", mode: 'move' + publishDir "${outpath}/plot/", mode: 'copy' } else { - publishDir "${outpath}/${params.pn}_plots/interaction_plot/", mode: 'move' + publishDir "${outpath}/${params.pn}_plots/interaction_plot/", mode: 'copy' } input: @@ -1036,7 +1055,7 @@ process plotting { script: """ - Rscript ${path_bin}/bin/plot_cis_matrix.R \ + ${path_R}/Rscript ${path_bin}/bin/plot_cis_matrix.R \ --bedfile ${matrix} \ --outfile ${name}.cis.pdf \ --library ${path_bin}/RLIB/ \ @@ -1056,7 +1075,7 @@ process create_interaction_table { tag{name} - publishDir "${outpath}/${params.pn}_Interactions/03_target_region_interactions_bed/", mode: 'move' + publishDir "${outpath}/${params.pn}_Interactions/03_target_region_interactions_bed/", mode: 'copy' input: set name, file (norm) from filtered_interaction_bed_final_merge @@ -1069,7 +1088,7 @@ process create_interaction_table { script: """ - Rscript ${path_bin}/bin/create_interaction_table.R \ + ${path_R}/Rscript ${path_bin}/bin/create_interaction_table.R \ ${norm} ${name}.filtered.interactions.bed \ ${params.chr} ${params.start} ${params.end} ${params.int_score} \ $workflow.workDir/${name}.filtered.interactions.bed @@ -1081,7 +1100,7 @@ process create_stats { tag{name} - publishDir "${outpath}/${params.pn}_stats/", mode: 'move' + publishDir "${outpath}/${params.pn}_stats/", mode: 'copy' input: set name, file (reads) from reads_bed_files_for_stats @@ -1094,7 +1113,7 @@ process create_stats { script: """ - Rscript ${path_bin}/bin/create_T2C_stats.R \ + ${path_R}/Rscript ${path_bin}/bin/create_T2C_stats.R \ ${reads} ${name}_stats.pdf \ ${params.chr} ${params.start} ${params.end} ${name} """ @@ -1117,8 +1136,8 @@ process create_uropa_config_1 { script: - config_parameters = [uropa_feature,uropa_anchor,uropa_strand,uropa_direction, - uropa_filter_attr,uropa_attr_value,uropa_show_attr] + config_parameters = [params.uropa_feature,params.uropa_anchor,params.uropa_strand,params.uropa_direction, + params.uropa_filter_attr,params.uropa_attr_value,params.uropa_show_attr] conf_list = config_parameters.collect { par -> return "\\\"" + par.replace(",","\\\",\\\"") + "\\\"" @@ -1159,9 +1178,10 @@ process create_uropa_config_1 { process run_uropa_1 { tag{name} + conda "$workflow.projectDir/uropa.yaml" if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_uropa/", mode: 'move' + publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy' } input: @@ -1194,7 +1214,7 @@ process swap_fragments { script: """ - Rscript ${path_bin}/bin/swap_fragments.R ${int_bed} ${name}.swap.interaction.bed $workflow.workDir/${name}.swap.interaction.bed + ${path_R}/Rscript ${path_bin}/bin/swap_fragments.R ${int_bed} ${name}.swap.interaction.bed $workflow.workDir/${name}.swap.interaction.bed """ } @@ -1214,8 +1234,8 @@ process create_uropa_config_2 { script: - config_parameters = [uropa_feature,uropa_anchor,uropa_strand,uropa_direction, - uropa_filter_attr,uropa_attr_value,uropa_show_attr] + config_parameters = [params.uropa_feature,params.uropa_anchor,params.uropa_strand,params.uropa_direction, + params.uropa_filter_attr,params.uropa_attr_value,params.uropa_show_attr] conf_list = config_parameters.collect { par -> return "\\\"" + par.replace(",","\\\",\\\"") + "\\\"" @@ -1257,9 +1277,10 @@ process create_uropa_config_2 { process run_uropa_2 { tag{name} + conda "$workflow.projectDir/uropa.yaml" if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_uropa/", mode: 'move' + publishDir "${outpath}/${params.pn}_uropa/", mode: 'copy' } input: @@ -1284,7 +1305,7 @@ process uropa_merge { tag{name} - publishDir "${outpath}/${params.pn}_Interactions/04_final_interactions_annotated/", mode: 'move' + publishDir "${outpath}/${params.pn}_Interactions/04_final_interactions_annotated/", mode: 'copy' input: set name, file (allhits_1), file (finalhits_1), file (allhits_2), file (finalhits_2), file (int_bed) from final_uropa_set @@ -1298,7 +1319,7 @@ process uropa_merge { script: """ - Rscript ${path_bin}/bin/merge_uropa_interactions.R ${int_bed} ${finalhits_1} ${finalhits_2} ${name}.finalhits.merged.txt + ${path_R}/Rscript ${path_bin}/bin/merge_uropa_interactions.R ${int_bed} ${finalhits_1} ${finalhits_2} ${name}.finalhits.merged.txt """ } @@ -1308,7 +1329,7 @@ xlsx_list = for_xlsx.toSortedList() process xlsx_writer { - publishDir "${outpath}/", mode: 'move' + publishDir "${outpath}/", mode: 'copy' input: val f from xlsx_list @@ -1335,7 +1356,7 @@ process create_viewpoint { tag{name} - publishDir "${outpath}/${params.pn}_Interactions/05_interactions_viewpoint_annotated/", mode: 'move' + publishDir "${outpath}/${params.pn}_Interactions/05_interactions_viewpoint_annotated/", mode: 'copy' input: set name, file (merged_finalhits) from merged_uropa @@ -1348,7 +1369,7 @@ process create_viewpoint { script: """ - Rscript ${path_bin}/bin/create_interaction_viewpoint.R ${merged_finalhits} ${name}.interactions.viewpoint.txt + ${path_R}/Rscript ${path_bin}/bin/create_interaction_viewpoint.R ${merged_finalhits} ${name}.interactions.viewpoint.txt """ } @@ -1359,7 +1380,7 @@ process convert_to_IFMatrix { tag{name} if(params.safe_all_files == 1){ - publishDir "${outpath}/${params.pn}_additional_files/IFMatrix/", mode: 'move' + publishDir "${outpath}/${params.pn}_additional_files/IFMatrix/", mode: 'copy' } input: @@ -1373,7 +1394,7 @@ process convert_to_IFMatrix { script: """ - Rscript ${path_bin}/bin/convert_to_IFMatrix.R ${int_bed} ${name}.ifm.txt + ${path_R}/Rscript ${path_bin}/bin/convert_to_IFMatrix.R ${int_bed} ${name}.ifm.txt """ } @@ -1382,32 +1403,52 @@ process run_robusTAD { tag{name} - publishDir "${outpath}/${params.pn}_TAD/", mode: 'move' + publishDir "${outpath}/${params.pn}_TAD/", mode: 'copy' input: set name, file (tfm) from ifmatrix output: - set name, file ("TADBoundaryCalls_*.txt") into tadbc, multiplot_TAD + set name, file ("TADBoundaryCalls_*.txt") into tadbc when: params.mode == "T2C" || params.mode == "multiplot" script: """ - Rscript ${path_bin}/bin/RobusTAD.R -i ${tfm} --header FALSE -n norm + ${path_R}/Rscript ${path_bin}/bin/RobusTAD.R -i ${tfm} --header FALSE -n norm """ } +process x_axis_scaling { + + tag{name} + + input: + set name, file (tad) from tadbc + + output: + set name, file ("${bn}.txt") into tadx, multiplot_TAD + + when: + params.mode == "T2C" || params.mode == "multiplot" + + script: + bn = tad.baseName + """ + ${path_R}/Rscript ${path_bin}/bin/xaxis_scaling.R ${tad} ${params.start} ${params.end} ${bn}.txt + """ +} + process plot_TAD_graph { tag{name} - publishDir "${outpath}/${params.pn}_plots/TAD/", mode: 'move' + publishDir "${outpath}/${params.pn}_plots/TAD/", mode: 'copy' input: - set name, file (tad) from tadbc + set name, file (tad) from tadx output: file ("${name}.TAD.pdf") @@ -1417,7 +1458,7 @@ process plot_TAD_graph { script: """ - Rscript ${path_bin}/bin/plotTAD.R ${tad} ${name}.TAD.pdf + ${path_R}/Rscript ${path_bin}/bin/plotTAD.R ${tad} ${name}.TAD.pdf """ } @@ -1428,7 +1469,7 @@ process multiplot { tag{name} - publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'move' + publishDir "${outpath}/${params.pn}_plots/multiplot/", mode: 'copy' input: set name, file (tad) from multiplot_TAD @@ -1442,7 +1483,7 @@ process multiplot { script: """ - Rscript ${path_bin}/bin/plot_multi.R \ + ${path_R}/Rscript ${path_bin}/bin/plot_multi.R \ --bedfile ${bed} \ --outfile ${name}.multi.png \ --library ${path_bin}/RLIB/ \ @@ -1462,7 +1503,7 @@ for_pptx_list = for_pptx.toSortedList() process pptx_writer { - publishDir "${outpath}/", mode: 'move' + publishDir "${outpath}/", mode: 'copy' input: val pptxList from for_pptx_list @@ -1516,6 +1557,8 @@ if(params.aln == "bwa"){ process alignment_with_bwa { tag{fastq} + publishDir "${outpath}/bam/", mode: 'copy' + conda "$workflow.projectDir/envtoucan.yaml" input: set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bwa @@ -1529,7 +1572,7 @@ process alignment_with_bwa { script: """ - ${path_bwa}/bwa mem \ + bwa mem \ ${params.bwa_HiC_options} \ ${path_genome} \ $fastq | ${path_samtools}/samtools view -Shb > ${fastqName}.bam @@ -1539,14 +1582,20 @@ process alignment_with_bwa { process bowtie2_index { + publishDir "${path_genome}", mode: 'copy' + conda "$workflow.projectDir/envtoucan.yaml" + when: params.mode == "HiC" && params.aln == "bowtie2" + output: + val ("${genome_name}") into index_bt2 + script: genome_file = file (path_genome) genome_name = genome_file.name """ - ${path_bowtie2}/bowtie2-build ${path_genome} ${genome_name} + bowtie2-build ${path_genome} ${genome_name} --threads ${params.bt2_index_threads} """ } @@ -1554,9 +1603,12 @@ process bowtie2_index { process alignment_with_bowtie2 { tag{fastq} + publishDir "${outpath}/bam/", mode: 'copy' + conda "$workflow.projectDir/envtoucan.yaml" input: set basisname, fastqName ,file (fastq) from decompressedfastq_for_HiC_bowtie2 + val bt2_idx from index_bt2 output: set basisname, file ("${fastqName}.bam") into bowtie2_alignment @@ -1566,8 +1618,8 @@ process alignment_with_bowtie2 { script: """ - ${path_bowtie2}/bowtie2 -x ${path_genome} -U ${fastq} --very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder -p 12 \ - | ${path_samtools}/samtools view -Shb - > ${fastqName}.bam + bowtie2 -x ${path_genome} -U ${fastq} --very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder -p 12 \ + | samtools view -Shb - > ${fastqName}.bam """ } @@ -1576,6 +1628,7 @@ hic_alignment = bwa_alignment.concat(bowtie2_alignment) //.concat(histat_alignme process create_HiC_matrix { tag{name} + publishDir "${outpath}/h5_matrix/", mode: 'copy' input: set name, bams from hic_alignment.groupTuple() @@ -1588,13 +1641,20 @@ process create_HiC_matrix { params.mode == "HiC" script: - bam_r1 = bams[0] - bam_r2 = bams[1] + sortbasename = [bams[0].getBaseName(), bams[1].getBaseName()].sort() + if(bams[0].contains(sortbasename[0])){ + bam_r1 = bams[0] + bam_r2 = bams[1] + } else { + bam_r1 = bams[1] + bam_r2 = bams[0] + } """ hicBuildMatrix --samFiles ${bam_r1} ${bam_r2} \ --QCfolder ${outpath}/QC_${name}/ \ --binSize ${params.bin} \ -b ${name}.bam \ + --inputBufferSize ${params.inputBufferSize} \ --restrictionSequence ${params.enzyme_a_sequence} \ -o ${name}_matrix.h5 @@ -1604,7 +1664,7 @@ process create_HiC_matrix { process diagnostic_plot_of_HiC_Matrix { - publishDir "${outpath}/diagnostic_plots/", mode: 'move' + publishDir "${outpath}/diagnostic_plots/", mode: 'copy' input: set name, file (matrix) from hic_matrix_for_diagnostic @@ -1633,14 +1693,14 @@ process correct_matrix { script: """ - hicCorrectMatrix correct -m ${matrix} --filterThreshold -1.5 5 -o ${name}_corrected.h5 + hicCorrectMatrix correct -m ${matrix} --filterThreshold ${params.hic_thresh_min} ${params.hic_thresh_max} -o ${name}_corrected.h5 """ } process plot_matrix { - publishDir "${params.out}/plot/", mode: 'move' + publishDir "${outpath}/plot/", mode: 'copy' input: set name, file (corrected_matrix) from hic_matrix_corrected @@ -1654,6 +1714,6 @@ process plot_matrix { script: """ hicPlotMatrix -m ${corrected_matrix} -o ${name}.hic.matrix.png \ - --chromosomeOrder chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY + --dpi 400 --region ${params.chr}:${params.start}-${params.end} """ } diff --git a/bin/xaxis_scaling.R b/bin/xaxis_scaling.R new file mode 100644 index 0000000..48d517e --- /dev/null +++ b/bin/xaxis_scaling.R @@ -0,0 +1,19 @@ +#!/usr/bin/env Rscript +args <- commandArgs(trailingOnly=TRUE) + +path <- args[1] +start <- as.numeric(args[2]) +end <- as.numeric(args[3]) +out <- args[4] #"./out.txt" + +tad <- data.table::fread(path, header = TRUE , sep = ' ') +xaxis <- tad[,1] +xmax <- max(xaxis) + +newxaxis <- lapply(xaxis, function(x){ + new_x <- (((end - start) * x) / xmax) + start +}) + +tad[,1] <- round(unlist(newxaxis[1])) + +data.table::fwrite(tad, file = out, col.names = TRUE, sep = ' ', quote = FALSE) diff --git a/envtoucan.yaml b/envtoucan.yaml new file mode 100644 index 0000000..ee602a2 --- /dev/null +++ b/envtoucan.yaml @@ -0,0 +1,10 @@ +name: envtoucan +channels: + - bioconda + - conda-forge + - defaults +dependencies: + - bwa=0.7.15 + - samtools=1.3.1 + - bedtools=2.27.1 + - bowtie2=2.3.3.1 diff --git a/multiplot.png b/multiplot.png index 5a06049..f5fa0db 100755 Binary files a/multiplot.png and b/multiplot.png differ diff --git a/nextflow.config b/nextflow.config index 4f7902d..03f9dd4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,35 +1,21 @@ //Paths to dependencies env { - path_bowtie2 = "/mnt/software/x86_64/packages/bowtie2/2.3.3.1/" //Path to bowtie2 path_python = "/mnt/software/x86_64/packages/python/2.7.8-anaconda-5.1.0-stretch/bin" //Path to python - path_bwa = "/mnt/software/x86_64/packages/bwa/0.7.12/bin" //Path to bwa - path_samtools = "/mnt/software/x86_64/packages/samtools/1.3.1/bin" //Path to samtools - path_bedtools = "/mnt/software/x86_64/packages/bedtools/2.27.1/bin" //Path to bedtools + path_R = "/mnt/software/x86_64/packages/r/3.4.4-stretch-local/bin" //Path to R - path_bin = "/mnt/workspace1/rene.wiegandt/NextFlow/script/THCPipe" // Path to T2C analysis scripts - path_working = "/mnt/workspace1/rene.wiegandt/NextFlow/script/THCPipe" //Path to directory where files are stored. If restriction maps are already existing put them in folder 01_restriction_maps inside this working_dir + path_bin = "/mnt/workspace1/rene.wiegandt/NextFlow/script/TOuCAN" // Path to T2C analysis scripts + path_working = "/mnt/workspace1/rene.wiegandt/NextFlow/script/TOuCAN" //Path to directory where files are stored. If restriction maps are already existing put them in folder 01_restriction_maps inside this working_dir path_genome = "/mnt/workspace1/rene.wiegandt/NextFlow/script/THCPipe/index_bwa/GRCm38.p5.genome_whitelist.fa" // Path to full genome in fasta format + basis name of index [e.g. GRCm38.p5.genome_whitelist.fa] path_gtf = "/mnt/agnerds/Rene.Wiegandt/fromMario/gencode.vM15.annotation.gtf" path_T2C_restriction_maps = "${path_working}/01_restriction_maps" //If empty restriction maps are generated [typically: path_working + /01_restriction_maps/]. If empty files are generated - //path_bam_files = "/mnt/workspace1/rene.wiegandt/NextFlow/script/THCPipe/temp/" //Path to alingment files. If empty files are generated - - uropa_feature = "" // "String,String,String,..." - uropa_anchor = "" // "String,String,String,..." - uropa_dist_1 = 1000 - uropa_dist_2 = 1000 - uropa_strand = "" // single argument "String" - uropa_direction = "any_direction" // "String,String,String,..." - uropa_filter_attr = "" // "String,String,String,..." - uropa_attr_value = "" // "String,String,String,..." - uropa_show_attr = "gene_id,gene_type,gene_name,level,transcript_type,transcript_support_level" // "String,String,String,..." } params { - sample_extension = "_R[12]_001"//"_R[12]_001" // regex for sample extension [e.g "_R[12]_001" or "_R[12]"] + sample_extension = "_R[12]_001" // regex for sample extension [e.g "_R[12]_001" or "_R[12]"] // Enzyme Information // ------------------------------------------- @@ -40,8 +26,8 @@ params { // Minor fixed parameters for BWA and SAMtools // ------------------------------------------- - bwa_T2C_options = "-t 4" // bwa aln - sort_options = "--threads 4" + bwa_T2C_options = "-t 16" // bwa aln + sort_options = "--threads 16" library_label = "capture" platform_label = "ILLUMINA" center_label = "ECB" @@ -55,9 +41,19 @@ params { //Parameter for HiC matrix //-------------------------------------------- - hicBuildMatrix_options = "--threads 4 --inputBufferSize 100000" - bwa_HiC_options = "" // bwa mem + hicBuildMatrix_options = "--threads 16 --inputBufferSize 100000" + bwa_HiC_options = "-t 4" // bwa mem + //Parameter for uropa configuration + uropa_feature = "" // "String,String,String,..." + uropa_anchor = "" // "String,String,String,..." + uropa_dist_1 = 1000 + uropa_dist_2 = 1000 + uropa_strand = "" // single argument "String" + uropa_direction = "any_direction" // "String,String,String,..." + uropa_filter_attr = "" // "String,String,String,..." + uropa_attr_value = "" // "String,String,String,..." + uropa_show_attr = "gene_id,gene_type,gene_name,level,transcript_type,transcript_support_level" // "String,String,String,..." } diff --git a/uropa.yaml b/uropa.yaml new file mode 100755 index 0000000..ee0e212 --- /dev/null +++ b/uropa.yaml @@ -0,0 +1,8 @@ +name: uropa + +channels: + - bioconda + - conda-forge + +dependencies: + - uropa