Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
TOuCAN/bin/normalize_paired_bed.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
153 lines (121 sloc)
3.72 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env Rscript | |
# the input file | |
infile <- "../test_data/SRR2922388.srt.matrix.bed" | |
# the output file | |
outfile <- "normalized_data.bed" | |
# the normalization to perform C("FPM", "array") | |
normalization <- "none" | |
# don't use the self self locations | |
remove_self_ligations <- FALSE | |
# load the libraries | |
library_path <- "." | |
# should we provide verbose output | |
verbose <- FALSE | |
# predefine the region | |
region <- NULL | |
valid_normalization_methods <- c("FPM", "array", "none", "log") | |
# | |
require(getopt, quietly = TRUE) | |
# set the commandline option specifications | |
specification <- matrix(c( | |
"bedfile", "b", 1, "character", | |
"outfile", "o", 2, "character", | |
"chromosome", "c", 2, "character", | |
"start", "s", 2, "integer", | |
"end", "e", 2, "integer", | |
"remove-self", "r", 0, "logical", | |
"method", "m", 1, "character", | |
"library", "l", 1, "character", | |
"verbose", "v", 0, "logical", | |
"help", "h", 0, "logical" | |
), byrow = TRUE, ncol = 4) | |
# parse the commandline options | |
commandline_options <- getopt(specification) | |
# print the usage if help was asked | |
if(!is.null(commandline_options[["help"]])) { | |
cat(getopt(specification, usage = TRUE)) | |
q(status = 1) | |
} | |
if(!is.null(commandline_options[["verbose"]])) { | |
verbose <- TRUE | |
} | |
if(!is.null(commandline_options[["bedfile"]])){ | |
infile <- commandline_options[["bedfile"]] | |
} | |
if(!is.null(commandline_options[["outfile"]])){ | |
outfile <- commandline_options[["outfile"]] | |
} | |
if(!is.null(commandline_options[["chromosome"]]) && !is.null(commandline_options[["start"]]) && !is.null(commandline_options[["end"]])){ | |
region <- list( | |
"chr" = commandline_options[["chromosome"]], | |
"start" = commandline_options[["start"]], | |
"end" = commandline_options[["end"]]) | |
} | |
if(!is.null(commandline_options[["method"]])){ | |
if(!commandline_options[["method"]] %in% valid_normalization_methods){ | |
message(paste("Normalization method", commandline_options[["method"]], "is invalid")) | |
message(paste("Valid normalization methods are", paste(valid_normalization_methods, collapse = " "))) | |
message(getopt(specification, usage = TRUE)) | |
q(status = 1) | |
} | |
normalization <- commandline_options[["method"]] | |
} | |
if(!is.null(commandline_options[["remove-self"]])){ | |
remove_self_ligations <- TRUE | |
} | |
if(!is.null(commandline_options[["library"]])){ | |
library_path <- commandline_options[["library"]] | |
} | |
if(verbose){ | |
message(paste("BED file:", infile)) | |
message(paste("Output file:", outfile)) | |
message(paste("Region:", paste(region, collapse = " "))) | |
message(paste("Normalization method:", normalization)) | |
message(paste("Valid normalization methods:", paste(valid_normalization_methods, collapse = ", "))) | |
message(paste("Remove self:", remove_self_ligations)) | |
message(paste("Library path:", library_path)) | |
} | |
#' Load the data | |
#' | |
source(file.path(library_path, "bed_readers.R")) | |
source(file.path(library_path, "normalize.R")) | |
#' | |
#' Main analysis loop | |
#' | |
#' Load the datasets | |
#' | |
message(paste("Reading file", infile)) | |
data <- read_paired_bed(infile) | |
#' Remove self-self ligations | |
#' | |
message(paste("Filtering entries")) | |
if(remove_self_ligations) { | |
tf.self <- is_self_ligation(data) | |
data <- data[!tf.self,] | |
} | |
#' Remove the fragments not in the target region | |
#' | |
if(!is.null(region)){ | |
tf.in_region <- select_region(data, region) | |
data <- data[tf.region,] | |
} | |
#' Normalize the data | |
#' | |
if(!is.null(normalization)) { | |
if(normalization == "FPM") { | |
message("Performing fragments per million (FPM) normalization") | |
data <- normalization_fpm(data) | |
} | |
if(normalization == "array") { | |
message("Performing array normalization") | |
data <- normalization_array(data) | |
} | |
if(normalization == "log") { | |
message("Performing log normalization") | |
data <- normalization_log(data) | |
} | |
} | |
#' Write the BED files | |
#' | |
message(paste("Writing output to", outfile)) | |
write_paired_bed(outfile, data) |