#!/bin/env Rscript


# the input file
infile <- "../test_data/SRR2922388.srt.matrix.bed"

# the output file
outfile <- "normalized_data.bed"

# the normalization to perform C("FPM", "array")
normalization <- "none"

# don't use the self self locations
remove_self_ligations <- FALSE

# load the libraries
library_path <- "."

# should we provide verbose output
verbose <- FALSE

# predefine the region
region <- NULL

valid_normalization_methods <- c("FPM", "array", "none", "log")

#
require(getopt, quietly = TRUE)

# set the commandline option specifications
specification <- matrix(c(
	"bedfile", "b", 1, "character",
	"outfile", "o", 2, "character",
	"chromosome", "c", 2, "character",
	"start", "s", 2, "integer",
	"end", "e", 2, "integer",
	"remove-self", "r", 0, "logical",
	"method", "m", 1, "character",
	"library", "l", 1, "character",
	"verbose", "v", 0, "logical",
	"help", "h", 0, "logical"
), byrow = TRUE, ncol = 4)

# parse the commandline options
commandline_options <- getopt(specification)

# print the usage if help was asked
if(!is.null(commandline_options[["help"]])) {
	cat(getopt(specification, usage = TRUE))
	q(status = 1)
}

if(!is.null(commandline_options[["verbose"]])) {
	verbose <- TRUE	
}

if(!is.null(commandline_options[["bedfile"]])){
	infile <- commandline_options[["bedfile"]]
}

if(!is.null(commandline_options[["outfile"]])){
	outfile <- commandline_options[["outfile"]]
}

if(!is.null(commandline_options[["chromosome"]]) && !is.null(commandline_options[["start"]]) && !is.null(commandline_options[["end"]])){
	region <- list(
		"chr" = commandline_options[["chromosome"]],
		"start" = commandline_options[["start"]],
		"end" = commandline_options[["end"]])
}

if(!is.null(commandline_options[["method"]])){
	if(!commandline_options[["method"]] %in% valid_normalization_methods){
		message(paste("Normalization method", commandline_options[["method"]], "is invalid"))
		message(paste("Valid normalization methods are", paste(valid_normalization_methods, collapse = " ")))
		message(getopt(specification, usage = TRUE))
		q(status = 1)
	}
	normalization <- commandline_options[["method"]]
}

if(!is.null(commandline_options[["remove-self"]])){
	remove_self_ligations <- TRUE
}

if(!is.null(commandline_options[["library"]])){
	library_path <- commandline_options[["library"]]
}

if(verbose){
	message(paste("BED file:", infile))
	message(paste("Output file:", outfile))
	message(paste("Region:", paste(region, collapse = " ")))
	message(paste("Normalization method:", normalization))
	message(paste("Valid normalization methods:", paste(valid_normalization_methods, collapse = ", ")))
	message(paste("Remove self:", remove_self_ligations))
	message(paste("Library path:", library_path))
}

#' Load the data
#'
source(file.path(library_path, "bed_readers.R"))
source(file.path(library_path, "normalize.R"))
 
#'
#' Main analysis loop 
#'

#' Load the datasets
#'
message(paste("Reading file", infile))
data <- read_paired_bed(infile)

#' Remove self-self ligations
#'
message(paste("Filtering entries"))
if(remove_self_ligations) {
	tf.self <- is_self_ligation(data)
	data <- data[!tf.self,]
}

#' Remove the fragments not in the target region
#'
if(!is.null(region)){
	tf.in_region <- select_region(data, region)
	data <- data[tf.region,]
}

#' Normalize the data
#'
if(!is.null(normalization)) {
	
	if(normalization == "FPM") {
		message("Performing fragments per million (FPM) normalization")
		data <- normalization_fpm(data)
	}
	
	if(normalization == "array") {
		message("Performing array normalization")
		data <- normalization_array(data)
	}
	
	if(normalization == "log") {
		message("Performing log normalization")
		data <- normalization_log(data)
	}

}

#' Write the BED files
#'
message(paste("Writing output to", outfile))
write_paired_bed(outfile, data)