From f800e77658e321bfc8e78e238cddc5e573cae464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Wiegandt?= Date: Sun, 6 Jan 2019 05:51:55 -0500 Subject: [PATCH] bed_to_fasta.R: improved syntax; fixed typos --- bin/2.2_motif_estimation/bed_to_fasta.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bin/2.2_motif_estimation/bed_to_fasta.R b/bin/2.2_motif_estimation/bed_to_fasta.R index 8006e64..7e716b7 100644 --- a/bin/2.2_motif_estimation/bed_to_fasta.R +++ b/bin/2.2_motif_estimation/bed_to_fasta.R @@ -2,18 +2,19 @@ if (!require(optparse)) install.packages("optparse"); library(optparse) option_list <- list( - make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input bed-file. Second last column must be sequences and last column must be the cluster_id.", metavar = "character"), + make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input bed-file. Second last column must be sequences and last column must be the cluster id.", metavar = "character"), make_option(opt_str = c("-p", "--prefix"), default = "" , help = "Prefix for file names. Default = '%default'", metavar = "character"), make_option(opt_str = c("-m", "--min_seq"), default = 100, help = "Minimum amount of sequences in clusters. Default = %default", metavar = "integer") ) opt_parser <- OptionParser(option_list = option_list, - description = "Convert BED-file to one FASTA-file per cluster") + description = "Convert BED-file to one FASTA-file per cluster.", + epilogue = "Author: Rene Wiegandt ") opt <- parse_args(opt_parser) #' Splitting BED-files depending on their cluster. -#' The Sequences of each cluster are written as an FASTA-file. +#' The Sequences of each cluster are written as a FASTA-file. #' @param bedInput BED-file with sequences and cluster-id as last two columns: #' Sequence: second last column; Cluster ID: last column #' @param prefix prefix for filenames @@ -30,13 +31,13 @@ bed_to_fasta <- function(bedInput, prefix = "", min_seq = 100){ bed <- data.table::fread(bedInput, sep = "\t") # Get last column of data.table, which refers to the cluster, as a vector. - cluster_no <- as.vector(bed[[ncol(bed)]]) + cluster_no <- bed[[ncol(bed)]] # Split data.table bed on its last column (cluster_no) into list of data.frames clusters <- split(bed, cluster_no, sorted = TRUE, flatten = FALSE) # For each data.frame(cluster) in list clusters: - discard <- lapply(1:length(clusters), function(i){ + discard <- lapply(seq_len(length(clusters)), function(i){ clust <- as.data.frame(clusters[i]) # Filter data.tables(clusters), which are to small if (nrow(clust) >= as.numeric(min_seq) ) {