Skip to content
Permalink
e7fbf8d086
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
153 lines (121 sloc) 3.72 KB
#!/bin/env Rscript
# the input file
infile <- "../test_data/SRR2922388.srt.matrix.bed"
# the output file
outfile <- "normalized_data.bed"
# the normalization to perform C("FPM", "array")
normalization <- "none"
# don't use the self self locations
remove_self_ligations <- FALSE
# load the libraries
library_path <- "."
# should we provide verbose output
verbose <- FALSE
# predefine the region
region <- NULL
valid_normalization_methods <- c("FPM", "array", "none", "log")
#
require(getopt, quietly = TRUE)
# set the commandline option specifications
specification <- matrix(c(
"bedfile", "b", 1, "character",
"outfile", "o", 2, "character",
"chromosome", "c", 2, "character",
"start", "s", 2, "integer",
"end", "e", 2, "integer",
"remove-self", "r", 0, "logical",
"method", "m", 1, "character",
"library", "l", 1, "character",
"verbose", "v", 0, "logical",
"help", "h", 0, "logical"
), byrow = TRUE, ncol = 4)
# parse the commandline options
commandline_options <- getopt(specification)
# print the usage if help was asked
if(!is.null(commandline_options[["help"]])) {
cat(getopt(specification, usage = TRUE))
q(status = 1)
}
if(!is.null(commandline_options[["verbose"]])) {
verbose <- TRUE
}
if(!is.null(commandline_options[["bedfile"]])){
infile <- commandline_options[["bedfile"]]
}
if(!is.null(commandline_options[["outfile"]])){
outfile <- commandline_options[["outfile"]]
}
if(!is.null(commandline_options[["chromosome"]]) && !is.null(commandline_options[["start"]]) && !is.null(commandline_options[["end"]])){
region <- list(
"chr" = commandline_options[["chromosome"]],
"start" = commandline_options[["start"]],
"end" = commandline_options[["end"]])
}
if(!is.null(commandline_options[["method"]])){
if(!commandline_options[["method"]] %in% valid_normalization_methods){
message(paste("Normalization method", commandline_options[["method"]], "is invalid"))
message(paste("Valid normalization methods are", paste(valid_normalization_methods, collapse = " ")))
message(getopt(specification, usage = TRUE))
q(status = 1)
}
normalization <- commandline_options[["method"]]
}
if(!is.null(commandline_options[["remove-self"]])){
remove_self_ligations <- TRUE
}
if(!is.null(commandline_options[["library"]])){
library_path <- commandline_options[["library"]]
}
if(verbose){
message(paste("BED file:", infile))
message(paste("Output file:", outfile))
message(paste("Region:", paste(region, collapse = " ")))
message(paste("Normalization method:", normalization))
message(paste("Valid normalization methods:", paste(valid_normalization_methods, collapse = ", ")))
message(paste("Remove self:", remove_self_ligations))
message(paste("Library path:", library_path))
}
#' Load the data
#'
source(file.path(library_path, "bed_readers.R"))
source(file.path(library_path, "normalize.R"))
#'
#' Main analysis loop
#'
#' Load the datasets
#'
message(paste("Reading file", infile))
data <- read_paired_bed(infile)
#' Remove self-self ligations
#'
message(paste("Filtering entries"))
if(remove_self_ligations) {
tf.self <- is_self_ligation(data)
data <- data[!tf.self,]
}
#' Remove the fragments not in the target region
#'
if(!is.null(region)){
tf.in_region <- select_region(data, region)
data <- data[tf.region,]
}
#' Normalize the data
#'
if(!is.null(normalization)) {
if(normalization == "FPM") {
message("Performing fragments per million (FPM) normalization")
data <- normalization_fpm(data)
}
if(normalization == "array") {
message("Performing array normalization")
data <- normalization_array(data)
}
if(normalization == "log") {
message("Performing log normalization")
data <- normalization_log(data)
}
}
#' Write the BED files
#'
message(paste("Writing output to", outfile))
write_paired_bed(outfile, data)