bed_to_fasta.R: improved syntax; fixed typos

loosolab · Jan 6, 2019 · f800e77 · f800e77
1 parent 2064e52
commit f800e77
Showing 1 changed file with 6 additions and 5 deletions.
diff --git a/bin/2.2_motif_estimation/bed_to_fasta.R b/bin/2.2_motif_estimation/bed_to_fasta.R
@@ -2,18 +2,19 @@
 if (!require(optparse)) install.packages("optparse"); library(optparse)
 
 option_list <- list(
-  make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input bed-file. Second last column must be sequences and last column must be the cluster_id.", metavar = "character"),
+  make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input bed-file. Second last column must be sequences and last column must be the cluster id.", metavar = "character"),
   make_option(opt_str = c("-p", "--prefix"), default = "" , help = "Prefix for file names. Default = '%default'", metavar = "character"),
   make_option(opt_str = c("-m", "--min_seq"), default = 100, help = "Minimum amount of sequences in clusters. Default = %default", metavar = "integer")
 )
 
 opt_parser <- OptionParser(option_list = option_list,
-                           description = "Convert BED-file to one FASTA-file per cluster")
+                           description = "Convert BED-file to one FASTA-file per cluster.",
+                           epilogue = "Author: Rene Wiegandt <Rene.Wiegandt@mpi-bn.mpg.de>")
 
 opt <- parse_args(opt_parser)
 
 #' Splitting BED-files depending on their cluster.
-#' The Sequences of each cluster are written as an FASTA-file.
+#' The Sequences of each cluster are written as a FASTA-file.
 #' @param bedInput <string> BED-file with sequences and cluster-id as last two columns:
 #'                              Sequence: second last column; Cluster ID: last column
 #' @param prefix <string> prefix for filenames
@@ -30,13 +31,13 @@ bed_to_fasta <- function(bedInput, prefix = "", min_seq = 100){
   bed <- data.table::fread(bedInput, sep = "\t")
 
   # Get last column of data.table, which refers to the cluster, as a vector.
-  cluster_no <- as.vector(bed[[ncol(bed)]])
+  cluster_no <- bed[[ncol(bed)]]
 
   # Split data.table bed on its last column (cluster_no) into list of data.frames
   clusters <- split(bed, cluster_no, sorted = TRUE, flatten = FALSE)
 
   # For each data.frame(cluster) in list clusters:
-  discard <- lapply(1:length(clusters), function(i){
+  discard <- lapply(seq_len(length(clusters)), function(i){
     clust <- as.data.frame(clusters[i])
     # Filter data.tables(clusters), which are to small
     if (nrow(clust) >= as.numeric(min_seq) ) {