From 14390450c3056a6db8b9c8fb11a6586679ee0d6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Wiegandt?= Date: Wed, 9 Jan 2019 09:39:58 -0500 Subject: [PATCH] Added log file for part 2.2_motif_estimation --- bin/2.2_motif_estimation/bed_to_fasta.R | 17 +++++++++++++++- pipeline.nf | 27 ++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/bin/2.2_motif_estimation/bed_to_fasta.R b/bin/2.2_motif_estimation/bed_to_fasta.R index 7e716b7..7dbb36f 100644 --- a/bin/2.2_motif_estimation/bed_to_fasta.R +++ b/bin/2.2_motif_estimation/bed_to_fasta.R @@ -1,5 +1,5 @@ #!/usr/bin/env Rscript -if (!require(optparse)) install.packages("optparse"); library(optparse) +if (!require(optparse, quietly = T)) install.packages("optparse"); library(optparse) option_list <- list( make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input bed-file. Second last column must be sequences and last column must be the cluster id.", metavar = "character"), @@ -36,6 +36,9 @@ bed_to_fasta <- function(bedInput, prefix = "", min_seq = 100){ # Split data.table bed on its last column (cluster_no) into list of data.frames clusters <- split(bed, cluster_no, sorted = TRUE, flatten = FALSE) + # Get number of all clusters + num_clusters <- length(clusters) + # For each data.frame(cluster) in list clusters: discard <- lapply(seq_len(length(clusters)), function(i){ clust <- as.data.frame(clusters[i]) @@ -47,10 +50,22 @@ bed_to_fasta <- function(bedInput, prefix = "", min_seq = 100){ outfile <- paste0(prefix,"_cluster_",i - 1,".FASTA") # Write fasta file seqinr::write.fasta(sequences = sequences, names = clust[[4]], file.out = outfile, as.string = TRUE) + return(TRUE) } else { print(paste0("Cluster: ",i," is to small")) + return(FALSE) } }) + + # Get number of clusters which contain enought sequences + tmp <- unlist(discard) + count_clust <- length(tmp[tmp == FALSE]) + + # Write log-file + write(paste0( "------------------------------------------------------------\nfile: ", + bedInput,"\nNumber of all clusters: ", num_clusters, "\nRemoved small clusters ( < ", + min_seq," sequences ): " ,count_clust , "\nRemaining number of clusters: ", num_clusters - count_clust), + file = "bed_to_fasta.log", append = T) } # run function bed_to_fasta with given parameteres if not in interactive context (e.g. run from shell) diff --git a/pipeline.nf b/pipeline.nf index 4ebab19..48b1455 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -200,6 +200,7 @@ process footprint_extraction { output: set name, file ('*.bed') into bed_for_overlap_with_TFBS + file('*.log') script: """ @@ -317,7 +318,7 @@ Converting BED-File to one FASTA-File per cluster process bed_to_clustered_fasta { //conda "${path_env}" - publishDir "${params.out}/2.2_motif_estimation/fasta/", mode: 'copy' + publishDir "${params.out}/2.2_motif_estimation/fasta/", mode: 'copy', pattern: '*.FASTA' tag{name} input: @@ -326,6 +327,7 @@ process bed_to_clustered_fasta { output: file ('*.FASTA') into fasta_for_glam2 file ('*.FASTA') into fasta_for_motif_cluster + file ('*.log') into 22_log script: """ @@ -572,14 +574,33 @@ process tomtom { //Joining channels with meme and tsv files. Filter joined channel on line count. //Only meme-files which corresponding tsv files have linecount <= 1 are writen to next channel. -for_filter2 = for_filter.join( tsv_for_filter ) +for_filter.join( tsv_for_filter ).into {for_filter2; for_log} for_filter2 .filter { name, meme, tsv -> long count = tsv.readLines().size() count <= 1 } - .into { meme_for_scan; check } + .into { meme_for_scan; check; num_cluster } +count_cluster = num_cluster.count() +count_cluster_before_filter = for_log.count() + +process write_log_for_motif_estimation { + + publishDir "${params.out}/2.2_motif_estimation/log/", mode: 'copy' + + input: + file log, val (after_filter), val (before_filter) from 22_log.combine(count_cluster).combine(count_cluster_before_filter = for_log.count()) + + output: + file ('*.log') + + script: + removed = before_filter - after_filter + """ + printf "\nMotifs found in Database: ${removed}\nNumber of remaining unknown motifs/cluster${after_filter}" >> log + """ +} //If channel 'check' is empty print errormessage process check_for_unknown_motifs {