concatenate_rna_info.R

library(data.table)

if (!require(optparse)) install.packages("optparse"); library(optparse)

option_list <- list(
  make_option(opt_str = c("-b", "--rna_input"), default = NULL, help = "Input TSV-file with RNA-seq information", metavar = "character"),
  make_option(opt_str = c("-t", "--treshold"), default = 10, help = "Input the threshold to cut the baseMeanB", type = "integer")
)

opt_parser <- OptionParser(option_list = option_list, 
                           description = "This script creates a TSV-file with information from the RNA-seq with EnsemblID and B-baseMean",
                           epilogue = "Author: Anastasiia Petrova <Anastasiia.Petrova@mpi-bn.mpg.de>")

opt <- parse_args(opt_parser)

test_fun <- function(tobias_input, sample_size, rna_input, threshold){
  #message(input)
  
  #read the input file as data table
  rna_seq_file <- rna_input
  rna_seq <- fread(rna_seq_file, header = TRUE, sep = "\t", fill = TRUE)
  
  subset_rna_seq <- rna_seq[rna_seq$`mdux-GFPneg-rna_vs_mdux-GFPpos-rna baseMeanB mdux-GFPpos-rna` > threshold]
  
  #sort the table by the column baseMean descending
  rna_seq_sorted <- subset_rna_seq[order(-subset_rna_seq$`mdux-GFPneg-rna_vs_mdux-GFPpos-rna baseMeanB mdux-GFPpos-rna`), ]
 
  
  ens_ids <- unlist(rna_seq_sorted$`Ensembl gene id`)
  base_Means <- unlist(rna_seq_sorted$`mdux-GFPneg-rna_vs_mdux-GFPpos-rna baseMeanB mdux-GFPpos-rna`)
  
  output <- cbind(ens_ids, base_Means)
  
  file_test <- file("test.txt")
  write.table(output, file = file_test, row.names = FALSE, col.names = FALSE, quote = FALSE)
  close(file_test)
  
  
  #tobias_results_file <- "./bindetect_results.txt"
  #sample_size = 10
  #tobias_results_file <- tobias_input
  #tobias_results <- fread(tobias_results_file, header = TRUE, sep = "\t", fill = TRUE)
  
  #sort the table by the column mDuxNeg_mDuxPos_change
  #tobias_results_sorted <- tobias_results[order(-tobias_results$mDuxNeg_mDuxPos_change), ]
  
  #take top 10 from the tobias results and save only the gene names
  #top_10 <- tobias_results_sorted[1:sample_size, ]$TF_name
  #c_top_10 <- unlist(top_10, use.names = FALSE)
  
  #concatenate the Jaspar ID
  #c_top_10 <- gsub("_.*", "", c_top_10)
  
  #make a subset of the same length as top_10, with random samples, replace = False excludes using one gene twice
  #random_10 <- tobias_results_sorted[sample(sample_size + 1:nrow(tobias_results_sorted), sample_size, replace=FALSE), ]$TF_name
  #c_random_10 <- unlist(random_10, use.names = FALSE)
  
  #concatenate the Jaspar ID
  #c_random_10 <- gsub("_.*", "", c_random_10)
  
  
  #genes_exons_correlation <- "./genes_exons_correlation.txt"
  
  #genes_exons_table <- fread(genes_exons_correlation, header = TRUE, sep = "\t")
  
  #genes <- genes_exons_table$gene_name
  #genes_ids <- unlist(genes_exons_table$gene_id)
}

#delete the help message from the parameter
params <- opt[-length(opt)]
do.call(test_fun, args = params)