Skip to content

Commit

Permalink
Bugfix in bed_to_fasta.R: Get last and second last instead of fixed i…
Browse files Browse the repository at this point in the history
…ndices
  • Loading branch information
renewiegandt committed Dec 18, 2018
1 parent 58c8478 commit cc23250
Showing 1 changed file with 34 additions and 28 deletions.
62 changes: 34 additions & 28 deletions bin/bed_to_fasta.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
#!/usr/bin/env Rscript

# Splitting BED-files depending on their cluster.
# The Sequences of each cluster are writen as an FASTA-file.
# @parameter bedInput <string> BED-file with sequences and cluster-id as columns: Sequence: Column 7; ID:Column 8
# @parameter prefix <string> prefix for filenames
# @parameter min_seq <INT> min. number of sequences per cluster

args = commandArgs(trailingOnly = TRUE)

bedInput <- args[1]
prefix <- args[2]
min_seq <- args[3]

bed <- data.table::fread(bedInput, header = FALSE, sep = "\t")

clusters <- split(bed, bed$V11, sorted = TRUE, flatten = FALSE) # <---- Cluster column
discard <- lapply(1:length(clusters), function(i){
clust <- as.data.frame(clusters[i])
print(nrow(clust))
if (nrow(clust) >= as.numeric(min_seq) ) {
sequences <- as.list(clust[[10]]) # <---- sequenze column
outfile <- paste0(prefix,"_cluster_",i,".FASTA")
seqinr::write.fasta(sequences = sequences, names = clust[[4]], file.out = outfile, as.string = TRUE) # <---- Name column
} else {
print(paste0("Cluster: ",i," is to small"))
}
})
#!/usr/bin/env Rscript

#' Splitting BED-files depending on their cluster.
#' The Sequences of each cluster are writen as an FASTA-file.
#' @parameter bedInput <string> BED-file with sequences and cluster-id as columns: Sequence: Column 7; ID:Column 8
#' @parameter prefix <string> prefix for filenames
#' @parameter min_seq <INT> min. number of sequences per cluster
#'
#' @author René Wiegandt
#' @contact rene.wiegandt(at)mpi-bn.mpg.de

args = commandArgs(trailingOnly = TRUE)

bedInput <- args[1]
prefix <- args[2]
min_seq <- args[3]

bed <- data.table::fread(bedInput, header = FALSE, sep = "\t")

# Get last column of data.table, which refers to the cluster, as a vector.
cluster_no <- as.vector(bed[[ncol(bed)]])

clusters <- split(bed, cluster_no, sorted = TRUE, flatten = FALSE) # <---- Cluster column

discard <- lapply(1:length(clusters), function(i){
clust <- as.data.frame(clusters[i])
if (nrow(clust) >= as.numeric(min_seq) ) {
sequences <- as.list(clust[[ncol(clust) - 1]]) # <---- sequenze column
outfile <- paste0(prefix,"_cluster_",i - 1,".FASTA")
seqinr::write.fasta(sequences = sequences, names = clust[[4]], file.out = outfile, as.string = TRUE) # <---- Name column
} else {
print(paste0("Cluster: ",i," is to small"))
}
})

0 comments on commit cc23250

Please sign in to comment.