Skip to content

Cluster #3

Merged
merged 15 commits into from
Dec 6, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixed ambiguous flags + nameing bug in find_kmer_regions
HendrikSchultheis committed Dec 6, 2018
commit cd53d3c4dc58f4eaadfdbe556dbb8532c2ea46af
10 changes: 5 additions & 5 deletions bin/reduce_bed.R
Original file line number Diff line number Diff line change
@@ -9,8 +9,8 @@ option_list <- list(
make_option(opt_str = c("-t", "--threads"), default = 1, help = "Number of threads to use. Use 0 for all available cores. Default = %default", metavar = "integer"),
make_option(opt_str = c("-c", "--clean"), default = TRUE, help = "Delete all temporary files. Default = %default", metavar = "logical"),
make_option(opt_str = c("-s", "--min_seq_length"), default = NULL, help = "Remove sequences below this length. Defaults to the maximum value of motif and kmer and can not be lower.", metavar = "integer", type = "integer"),
make_option(opt_str = c("-ko", "--minoverlap_kmer"), default = NULL, help = "Minimum required overlap between kmer to merge kmer. Used to create reduced sequence ranges. Can not be greater than kmer length. Default = kmer - 1", metavar = "integer", type = "integer"),
make_option(opt_str = c("-mo", "--minoverlap_motif"), default = NULL, help = "Minimum required overlap between motif and kmer to consider kmer significant. Used for kmer cutoff calculation. Can not be greater than motif and kmer length. Default = ceiling(motif / 2)", metavar = "integer", type = "integer"),
make_option(opt_str = c("-n", "--minoverlap_kmer"), default = NULL, help = "Minimum required overlap between kmer to merge kmer. Used to create reduced sequence ranges. Can not be greater than kmer length. Default = kmer - 1", metavar = "integer", type = "integer"),
make_option(opt_str = c("-v", "--minoverlap_motif"), default = NULL, help = "Minimum required overlap between motif and kmer to consider kmer significant. Used for kmer cutoff calculation. Can not be greater than motif and kmer length. Default = ceiling(motif / 2)", metavar = "integer", type = "integer"),
make_option(opt_str = c("-f", "--motif_occurence"), default = 1, help = "Number of motifs per sequence any value above 0. Default = %default.", metavar = "double")
)

@@ -196,14 +196,15 @@ find_kmer_regions <- function(bed, kmer_counts, minoverlap = 1 , threads = NULL)

seq_ranges <- pbapply::pblapply(seq_len(nrow(bed)), cl = threads, function(x) {
seq <- bed[x][[ncol(bed)]]
name <- bed[x][[4]]

#### locate ranges
ranges <- data.table::data.table(do.call(rbind, stringi::stri_locate_all_fixed(seq, pattern = kmer_counts[[1]])))

ranges <- na.omit(ranges, cols = c("start", "end"))

if (nrow(ranges) < 1) {
return(data.table::data.table(start = NA, end = NA, width = NA))
return(data.table::data.table(start = NA, end = NA, width = NA, name = name))
}

# add kmer sequences
@@ -237,14 +238,13 @@ find_kmer_regions <- function(bed, kmer_counts, minoverlap = 1 , threads = NULL)
# reduce selected ranges
reduced_ranges <- IRanges::reduce(reduced_ranges[selected_ranges])

reduced_ranges <- data.table::as.data.table(reduced_ranges)
reduced_ranges <- data.table::as.data.table(reduced_ranges)[, name := name]

return(reduced_ranges)
})

# create ranges table
conserved_regions_table <- data.table::rbindlist(seq_ranges)
conserved_regions_table[, name := bed[[4]]]

return(conserved_regions_table)
}