From ea2e1710e418e9ef61f3542b33b7fa8f11ff5f48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Wiegandt?= <rene.wiegandt@mpi-bn.mpg.de>
Date: Wed, 9 Jan 2019 08:24:12 -0500
Subject: [PATCH] get_motif_seq.R: added parameter tmp_path and cluster_id

---
 bin/2.2_motif_estimation/get_motif_seq.R | 42 ++++++++++++------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/bin/2.2_motif_estimation/get_motif_seq.R b/bin/2.2_motif_estimation/get_motif_seq.R
index b79ac8d..6f298b3 100644
--- a/bin/2.2_motif_estimation/get_motif_seq.R
+++ b/bin/2.2_motif_estimation/get_motif_seq.R
@@ -4,7 +4,9 @@ if (!require(optparse, quietly = T)) install.packages("optparse"); library(optpa
 option_list <- list(
   make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input file. Output txt-file from GLAM2.", metavar = "character"),
   make_option(opt_str = c("-o", "--output"), default = "sequences.json" , help = "Output JSON-file. Default = '%default'", metavar = "character"),
-  make_option(opt_str = c("-n", "--num"), default = 3 , help = "Get best (num) motifs. Default = '%default'", metavar = "numeric")
+  make_option(opt_str = c("-n", "--num"), default = 3 , help = "Get best (num) motifs. Default = '%default'", metavar = "numeric"),
+  make_option(opt_str = c("-c", "--cluster_id"), default = "./" , help = "Cluster ID", metavar = "numeric"),
+  make_option(opt_str = c("-t", "--tmp"), default = "./" , help = "Path for tmp-files. Default = '%default'", metavar = "character")
 )
 
 opt_parser <- OptionParser(option_list = option_list,
@@ -18,55 +20,53 @@ opt <- parse_args(opt_parser)
 #' @param path Path to file
 #' @return first column as vector
 read_data <- function(path){
-  
+
   f <- data.table::fread(path, select = 1)
   return(f[[1]])
 }
 
 
 #' Creating JSON-file with sequence ids which were used to create the best (num) motifs.
-#' 
+#'
 #' @param input Input file.Output txt-file from GLAM2.
 #' @param output Output JSON-file
 #' @param num Get best (num) motifs.
-#' 
-#' @author René Wiegandt <Rene.Wiegandt(at)mpi-bn.mpg.de>
-create_seq_json <- function(input, output, num) {
-  
+#'
+#' @author Renďż˝ Wiegandt <Rene.Wiegandt(at)mpi-bn.mpg.de>
+create_seq_json <- function(input, output, num, tmp_path, cluster_id) {
+
   if (!file.exists(input)) {
     stop(paste0("Input file does not exists. Please check the given path: ", input))
   }
-  
+
   if ( !is.numeric(num)) {
     stop("Parameter num needs to be an integer")
   }
-  
+
   if (num > 10 || num <= 0 ) {
     stop(paste0("Parameter 'num' needs to be an number between 1 and 10! Your input: ", num))
   }
 
-  # Getting cluster id
-  split_path <- unlist(strsplit(dirname(input),'_'))
-  cluster_id <- split_path[[length(split_path)]]
-  
   if ( !varhandle::check.numeric(cluster_id)) {
     stop(paste0("CLUSTER ID could not be found. Please make sure that your file path contains _[cluster_id] at the end. Found: ", cluster_id,"\n For example: /test_cluster_1/glam.txt"))
   }
-  
-  file_dir <- dirname(input)
-  
+
+  dir.create(tmp_path, showWarnings = FALSE)
+
+  file_dir <- tmp_path
+
   # Split glam.txt file on lines that start with Score:
   system(paste0("csplit ", input, " '/^Score:.*/' '{*}' -f ", file_dir, "/f_id_test.pholder"))
   # Only keep the lines that start with 'f' to get the lines with the sequence ids
   system(paste0("for i in ", file_dir, "/*.pholder0[1-", num, "];do grep \"^f\" $i > \"${i}.done\";done"))
-  
+
   # Getting the filepaths of first 3 files with sequence ids
   fnames <- file.path(file_dir,dir(file_dir, pattern = "done"))
-  
+
   # Running read_data on files
   datalist <- lapply(fnames, read_data)
-  
-  
+
+
   # Create json file
   ## naming
   names(datalist) <- paste0(c("Motif_", "Motif_", "Motif_"),seq(1,as.numeric(num),1) , " Cluster_", cluster_id)
@@ -81,6 +81,6 @@ if (!interactive()) {
   if (length(commandArgs(trailingOnly = TRUE)) <= 0) {
     print_help(opt_parser)
   } else {
-    create_seq_json(opt$input, opt$output, opt$num)
+    create_seq_json(opt$input, opt$output, opt$num, opt$tmp, opt$cluster_id)
   }
 }