#!/usr/bin/env Rscript
if (!require(optparse, quietly = T)) install.packages("optparse"); library(optparse)

option_list <- list(
  make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input file. Output txt-file from GLAM2.", metavar = "character"),
  make_option(opt_str = c("-o", "--output"), default = "sequences.json" , help = "Output JSON-file. Default = '%default'", metavar = "character"),
  make_option(opt_str = c("-n", "--num"), default = 3 , help = "Get best (num) motifs. Default = '%default'", metavar = "numeric"),
  make_option(opt_str = c("-c", "--cluster_id"), default = "./" , help = "Cluster ID", metavar = "numeric"),
  make_option(opt_str = c("-t", "--tmp"), default = "./" , help = "Path for tmp-files. Default = '%default'", metavar = "character")
)

opt_parser <- OptionParser(option_list = option_list,
                           description = "Creating JSON-file with sequence ids which were used to create the best (num) motifs.",
                           epilogue = "Author: Rene Wiegandt <Rene.Wiegandt@mpi-bn.mpg.de>")

opt <- parse_args(opt_parser)

#' Reading files with fread.
#' Only read the first column.
#' @param path Path to file
#' @return first column as vector
read_data <- function(path){

  f <- data.table::fread(path, select = 1)
  return(f[[1]])
}


#' Creating JSON-file with sequence ids which were used to create the best (num) motifs.
#'
#' @param input Input file.Output txt-file from GLAM2.
#' @param output Output JSON-file
#' @param num Get best (num) motifs.
#'
#' @author René Wiegandt <Rene.Wiegandt(at)mpi-bn.mpg.de>
create_seq_json <- function(input, output, num, tmp_path, cluster_id) {

  if (!file.exists(input)) {
    stop(paste0("Input file does not exists. Please check the given path: ", input))
  }

  if ( !is.numeric(num)) {
    stop("Parameter num needs to be an integer")
  }

  if (num > 10 || num <= 0 ) {
    stop(paste0("Parameter 'num' needs to be an number between 1 and 10! Your input: ", num))
  }

  if ( !varhandle::check.numeric(cluster_id)) {
    stop(paste0("CLUSTER ID could not be found. Please make sure that your file path contains _[cluster_id] at the end. Found: ", cluster_id,"\n For example: /test_cluster_1/glam.txt"))
  }

  dir.create(tmp_path, showWarnings = FALSE)

  file_dir <- tmp_path

  # Split glam.txt file on lines that start with Score:
  system(paste0("csplit ", input, " '/^Score:.*/' '{*}' -f ", file_dir, "/f_id_test.pholder"))
  # Only keep the lines that start with 'f' to get the lines with the sequence ids
  system(paste0("for i in ", file_dir, "/*.pholder0[1-", num, "];do grep \"^f\" $i > \"${i}.done\";done"))

  # Getting the filepaths of first 3 files with sequence ids
  fnames <- file.path(file_dir,dir(file_dir, pattern = "done"))

  # Running read_data on files
  datalist <- lapply(fnames, read_data)

  # Create json file
  ## naming
  names(datalist) <- paste0(c("Motif_", "Motif_", "Motif_"),seq(1,as.numeric(num),1) , " Cluster_", cluster_id)
  ## creating json object
  json <- RJSONIO::toJSON(datalist, pretty = T , .withNames = T)
  ## writing file
  write(json, file = output )
}

# run function create_seq_json with given parameteres if not in interactive context (e.g. run from shell)
if (!interactive()) {
  if (length(commandArgs(trailingOnly = TRUE)) <= 0) {
    print_help(opt_parser)
  } else {
    create_seq_json(opt$input, opt$output, opt$num, opt$tmp, opt$cluster_id)
  }
}