#!/bin/env Rscript # the input file infile <- "../test_data/SRR2922388.srt.matrix.bed" # the output file outfile <- "normalized_data.bed" # the normalization to perform C("FPM", "array") normalization <- "none" # don't use the self self locations remove_self_ligations <- FALSE # load the libraries library_path <- "." # should we provide verbose output verbose <- FALSE # predefine the region region <- NULL valid_normalization_methods <- c("FPM", "array", "none", "log") # require(getopt, quietly = TRUE) # set the commandline option specifications specification <- matrix(c( "bedfile", "b", 1, "character", "outfile", "o", 2, "character", "chromosome", "c", 2, "character", "start", "s", 2, "integer", "end", "e", 2, "integer", "remove-self", "r", 0, "logical", "method", "m", 1, "character", "library", "l", 1, "character", "verbose", "v", 0, "logical", "help", "h", 0, "logical" ), byrow = TRUE, ncol = 4) # parse the commandline options commandline_options <- getopt(specification) # print the usage if help was asked if(!is.null(commandline_options[["help"]])) { cat(getopt(specification, usage = TRUE)) q(status = 1) } if(!is.null(commandline_options[["verbose"]])) { verbose <- TRUE } if(!is.null(commandline_options[["bedfile"]])){ infile <- commandline_options[["bedfile"]] } if(!is.null(commandline_options[["outfile"]])){ outfile <- commandline_options[["outfile"]] } if(!is.null(commandline_options[["chromosome"]]) && !is.null(commandline_options[["start"]]) && !is.null(commandline_options[["end"]])){ region <- list( "chr" = commandline_options[["chromosome"]], "start" = commandline_options[["start"]], "end" = commandline_options[["end"]]) } if(!is.null(commandline_options[["method"]])){ if(!commandline_options[["method"]] %in% valid_normalization_methods){ message(paste("Normalization method", commandline_options[["method"]], "is invalid")) message(paste("Valid normalization methods are", paste(valid_normalization_methods, collapse = " "))) message(getopt(specification, usage = TRUE)) q(status = 1) } normalization <- commandline_options[["method"]] } if(!is.null(commandline_options[["remove-self"]])){ remove_self_ligations <- TRUE } if(!is.null(commandline_options[["library"]])){ library_path <- commandline_options[["library"]] } if(verbose){ message(paste("BED file:", infile)) message(paste("Output file:", outfile)) message(paste("Region:", paste(region, collapse = " "))) message(paste("Normalization method:", normalization)) message(paste("Valid normalization methods:", paste(valid_normalization_methods, collapse = ", "))) message(paste("Remove self:", remove_self_ligations)) message(paste("Library path:", library_path)) } #' Load the data #' source(file.path(library_path, "bed_readers.R")) source(file.path(library_path, "normalize.R")) #' #' Main analysis loop #' #' Load the datasets #' message(paste("Reading file", infile)) data <- read_paired_bed(infile) #' Remove self-self ligations #' message(paste("Filtering entries")) if(remove_self_ligations) { tf.self <- is_self_ligation(data) data <- data[!tf.self,] } #' Remove the fragments not in the target region #' if(!is.null(region)){ tf.in_region <- select_region(data, region) data <- data[tf.region,] } #' Normalize the data #' if(!is.null(normalization)) { if(normalization == "FPM") { message("Performing fragments per million (FPM) normalization") data <- normalization_fpm(data) } if(normalization == "array") { message("Performing array normalization") data <- normalization_array(data) } if(normalization == "log") { message("Performing log normalization") data <- normalization_log(data) } } #' Write the BED files #' message(paste("Writing output to", outfile)) write_paired_bed(outfile, data)