Skip to content

Cluster #3

Merged
merged 15 commits into from
Dec 6, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Prev Previous commit
Next Next commit
implemented minoverlap_kmer & minoverlap_motif
HendrikSchultheis committed Dec 5, 2018
commit 689331e5f4cf7156f118db1f2aa5ebb3041eb817
18 changes: 10 additions & 8 deletions bin/reduce_bed.R
Original file line number Diff line number Diff line change
@@ -8,7 +8,9 @@ option_list <- list(
make_option(opt_str = c("-o", "--output"), default = "reduced.bed", help = "Output file. Default = %default", metavar = "character"),
make_option(opt_str = c("-t", "--threads"), default = 1, help = "Number of threads to use. Use -1 for all available cores. Default = %default", metavar = "integer"),
make_option(opt_str = c("-c", "--clean"), default = TRUE, help = "Delete all temporary files. Default = %default", metavar = "logical"),
make_option(opt_str = c("-s", "--min_seq_length"), default = NULL, help = "Remove sequences below this length. Defaults to the maximum value of motif and kmer and can not be lower.", metavar = "integer", type = "integer")
make_option(opt_str = c("-s", "--min_seq_length"), default = NULL, help = "Remove sequences below this length. Defaults to the maximum value of motif and kmer and can not be lower.", metavar = "integer", type = "integer"),
make_option(opt_str = c("-ko", "--minoverlap_kmer"), default = NULL, help = "Minimum required overlap between kmer to merge kmer. Used to create reduced sequence ranges. Can not be greater than kmer length. Default = kmer - 1", metavar = "integer", type = "integer"),
make_option(opt_str = c("-mo", "--minoverlap_motif"), default = NULL, help = "Minimum required overlap between motif and kmer to consider kmer significant. Used for kmer cutoff calculation. Can not be greater than motif and kmer length. Default = ceiling(motif / 2)", metavar = "integer", type = "integer")
# TODO more args
)

@@ -25,13 +27,13 @@ opt <- parse_args(opt_parser)
#' @param output Output file
#' @param threads Number of threads.
#' @param clean Delete all temporary files.
#' @param minoverlap_kmer Minimum required overlap between kmer to merge kmer. Used to create reduced sequence ranges.# TODO
#' @param minoverlap_motif Minimum required overlap between motif and kmer to consider kmer significant. Used for kmer cutoff calculation.# TODO
#' @param minoverlap_kmer Minimum required overlap between kmer to merge kmer. Used to create reduced sequence ranges. Can not be greater than kmer length. Default = kmer - 1
#' @param minoverlap_motif Minimum required overlap between motif and kmer to consider kmer significant. Used for kmer cutoff calculation. Can not be greater than motif and kmer length. Default = ceiling(motif / 2)
#' @param min_seq_length Must be greater or equal to kmer and motif. Default = max(c(motif, kmer)).
#'
#' @return reduced bed
#' TODO check whether jellyfish is installed
reduce_bed <- function(input, kmer = 10, motif = 10, output = "reduced.bed", threads = NULL, clean = TRUE, minoverlap_kmer, minoverlap_motif, min_seq_length = max(c(motif, kmer))) {
reduce_bed <- function(input, kmer = 10, motif = 10, output = "reduced.bed", threads = NULL, clean = TRUE, minoverlap_kmer = kmer - 1, minoverlap_motif = ceiling(motif / 2), min_seq_length = max(c(motif, kmer))) {
# get number of available cores
if (threads == -1) {
threads <- parallel::detectCores()
@@ -91,16 +93,16 @@ reduce_bed <- function(input, kmer = 10, motif = 10, output = "reduced.bed", thr
data.table::setorder(kmer_counts, -V2)

# compute number of hits to keep
keep_hits <- significant_kmer(bed_table, kmer = kmer, motif = motif)
keep_hits <- significant_kmer(bed_table, kmer = kmer, motif = motif, minoverlap = minoverlap_motif)

# reduce kmer
reduced_kmer <- reduce_kmer(kmer = kmer_counts, keep_hits)
reduced_kmer <- reduce_kmer(kmer = kmer_counts, significant = keep_hits)

message("Find kmer in sequences.")
# find k-mer in sequences
# TODO minoverlap as parameter
# columns: name, start, end, width
ranges_table <- find_kmer_regions(bed = bed_table, kmer_counts = reduced_kmer, minoverlap = kmer - 1, threads = threads)
ranges_table <- find_kmer_regions(bed = bed_table, kmer_counts = reduced_kmer, minoverlap = minoverlap_kmer, threads = threads)
names(ranges_table)[1:2] <- c("relative_start", "relative_end")

# merge ranged_table with bed_table + keep column order
@@ -139,7 +141,7 @@ reduce_bed <- function(input, kmer = 10, motif = 10, output = "reduced.bed", thr
#' @return Number of interesting kmer.
significant_kmer <- function(bed, kmer, motif, minoverlap = ceiling(motif / 2)) {
if (minoverlap > kmer || minoverlap > motif) {
stop("Kmer & motif must be greater or equal than minoverlap!")
stop("Kmer & motif must be greater or equal to minoverlap!")
}

# minimum sequence length to get all interesting overlaps