diff --git a/bin/2.2_motif_estimation/bed_to_fasta.R b/bin/2.2_motif_estimation/bed_to_fasta.R index e0ade14..8006e64 100644 --- a/bin/2.2_motif_estimation/bed_to_fasta.R +++ b/bin/2.2_motif_estimation/bed_to_fasta.R @@ -1,5 +1,5 @@ #!/usr/bin/env Rscript -library("optparse") +if (!require(optparse)) install.packages("optparse"); library(optparse) option_list <- list( make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input bed-file. Second last column must be sequences and last column must be the cluster_id.", metavar = "character"), @@ -7,7 +7,7 @@ option_list <- list( make_option(opt_str = c("-m", "--min_seq"), default = 100, help = "Minimum amount of sequences in clusters. Default = %default", metavar = "integer") ) -opt_parser <- OptionParser(option_list = option_list, +opt_parser <- OptionParser(option_list = option_list, description = "Convert BED-file to one FASTA-file per cluster") opt <- parse_args(opt_parser) @@ -22,19 +22,19 @@ opt <- parse_args(opt_parser) #' @author René Wiegandt #' @contact rene.wiegandt(at)mpi-bn.mpg.de bed_to_fasta <- function(bedInput, prefix = "", min_seq = 100){ - + if (is.null(bedInput)) { stop("ERROR: Input parameter cannot be null! Please specify the input parameter.") } bed <- data.table::fread(bedInput, sep = "\t") - + # Get last column of data.table, which refers to the cluster, as a vector. cluster_no <- as.vector(bed[[ncol(bed)]]) - + # Split data.table bed on its last column (cluster_no) into list of data.frames clusters <- split(bed, cluster_no, sorted = TRUE, flatten = FALSE) - + # For each data.frame(cluster) in list clusters: discard <- lapply(1:length(clusters), function(i){ clust <- as.data.frame(clusters[i])