Skip to content

Commit

Permalink
Added log file for part 2.2_motif_estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
renewiegandt committed Jan 9, 2019
1 parent 8e5049e commit 1439045
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 4 deletions.
17 changes: 16 additions & 1 deletion bin/2.2_motif_estimation/bed_to_fasta.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env Rscript
if (!require(optparse)) install.packages("optparse"); library(optparse)
if (!require(optparse, quietly = T)) install.packages("optparse"); library(optparse)

option_list <- list(
make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input bed-file. Second last column must be sequences and last column must be the cluster id.", metavar = "character"),
Expand Down Expand Up @@ -36,6 +36,9 @@ bed_to_fasta <- function(bedInput, prefix = "", min_seq = 100){
# Split data.table bed on its last column (cluster_no) into list of data.frames
clusters <- split(bed, cluster_no, sorted = TRUE, flatten = FALSE)

# Get number of all clusters
num_clusters <- length(clusters)

# For each data.frame(cluster) in list clusters:
discard <- lapply(seq_len(length(clusters)), function(i){
clust <- as.data.frame(clusters[i])
Expand All @@ -47,10 +50,22 @@ bed_to_fasta <- function(bedInput, prefix = "", min_seq = 100){
outfile <- paste0(prefix,"_cluster_",i - 1,".FASTA")
# Write fasta file
seqinr::write.fasta(sequences = sequences, names = clust[[4]], file.out = outfile, as.string = TRUE)
return(TRUE)
} else {
print(paste0("Cluster: ",i," is to small"))
return(FALSE)
}
})

# Get number of clusters which contain enought sequences
tmp <- unlist(discard)
count_clust <- length(tmp[tmp == FALSE])

# Write log-file
write(paste0( "------------------------------------------------------------\nfile: ",
bedInput,"\nNumber of all clusters: ", num_clusters, "\nRemoved small clusters ( < ",
min_seq," sequences ): " ,count_clust , "\nRemaining number of clusters: ", num_clusters - count_clust),
file = "bed_to_fasta.log", append = T)
}

# run function bed_to_fasta with given parameteres if not in interactive context (e.g. run from shell)
Expand Down
27 changes: 24 additions & 3 deletions pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ process footprint_extraction {

output:
set name, file ('*.bed') into bed_for_overlap_with_TFBS
file('*.log')

script:
"""
Expand Down Expand Up @@ -317,7 +318,7 @@ Converting BED-File to one FASTA-File per cluster
process bed_to_clustered_fasta {

//conda "${path_env}"
publishDir "${params.out}/2.2_motif_estimation/fasta/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/fasta/", mode: 'copy', pattern: '*.FASTA'
tag{name}

input:
Expand All @@ -326,6 +327,7 @@ process bed_to_clustered_fasta {
output:
file ('*.FASTA') into fasta_for_glam2
file ('*.FASTA') into fasta_for_motif_cluster
file ('*.log') into 22_log

script:
"""
Expand Down Expand Up @@ -572,14 +574,33 @@ process tomtom {

//Joining channels with meme and tsv files. Filter joined channel on line count.
//Only meme-files which corresponding tsv files have linecount <= 1 are writen to next channel.
for_filter2 = for_filter.join( tsv_for_filter )
for_filter.join( tsv_for_filter ).into {for_filter2; for_log}
for_filter2
.filter { name, meme, tsv ->
long count = tsv.readLines().size()
count <= 1
}
.into { meme_for_scan; check }
.into { meme_for_scan; check; num_cluster }

count_cluster = num_cluster.count()
count_cluster_before_filter = for_log.count()

process write_log_for_motif_estimation {

publishDir "${params.out}/2.2_motif_estimation/log/", mode: 'copy'

input:
file log, val (after_filter), val (before_filter) from 22_log.combine(count_cluster).combine(count_cluster_before_filter = for_log.count())

output:
file ('*.log')

script:
removed = before_filter - after_filter
"""
printf "\nMotifs found in Database: ${removed}\nNumber of remaining unknown motifs/cluster${after_filter}" >> log
"""
}

//If channel 'check' is empty print errormessage
process check_for_unknown_motifs {
Expand Down

0 comments on commit 1439045

Please sign in to comment.