Skip to content

Dev to my branch #37

Merged
merged 44 commits into from
Jan 4, 2019
Merged
Changes from 1 commit
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
58c8478
Merge pull request #9 from loosolab/dev
renewiegandt Dec 18, 2018
cc23250
Bugfix in bed_to_fasta.R: Get last and second last instead of fixed i…
renewiegandt Dec 18, 2018
62f6f3f
bed_to_fasta.R: Improved documentation
renewiegandt Dec 18, 2018
cf9dcd8
bed_to_fasta.R: Imporved parametercalling with optparse
renewiegandt Dec 19, 2018
6d5c604
adaption of pipeline.nf to changes in bed_to_fasta.R
renewiegandt Dec 19, 2018
ce52871
Refactoring
renewiegandt Dec 19, 2018
98985d1
refactoring; renamed reduce_bed to reduce_sequence
HendrikSchultheis Dec 19, 2018
8faf399
Merge pull request #15 from loosolab/peak_calling
renewiegandt Dec 19, 2018
e0b9d38
check whether jellyfish is installed
HendrikSchultheis Dec 19, 2018
1730868
reduce_bed renamed to reduce_sequence
HendrikSchultheis Dec 19, 2018
3c4f733
get_best_motif.py: fixed bug which caused to print motif header as la…
renewiegandt Dec 19, 2018
88fa298
check whether jellyfish is installed
HendrikSchultheis Dec 19, 2018
e17d1db
check whether cdhit is installed
HendrikSchultheis Dec 19, 2018
dcd185e
omit TODO
HendrikSchultheis Dec 19, 2018
4c16f6f
check for header and forward it if provided
HendrikSchultheis Dec 19, 2018
5a7c84e
automatically detect and keep column names if provided
HendrikSchultheis Dec 19, 2018
97464ca
added author; better missing input error
HendrikSchultheis Dec 19, 2018
cc532bf
added author
HendrikSchultheis Dec 19, 2018
8389226
Fixed typos in get_best_motif.py
renewiegandt Dec 19, 2018
2fca158
Reads BED-files with or without header
renewiegandt Dec 19, 2018
4dea8e4
Imporved description for installation in README.md
renewiegandt Dec 20, 2018
1a7a812
Removed snakemake from yaml-file
renewiegandt Dec 20, 2018
4844609
Set parameter organism as required wihtout an default value
renewiegandt Dec 20, 2018
d60faa7
spell check
HendrikSchultheis Dec 20, 2018
46cfc59
Added Parameter gtf_path. If path is set process create_gtf will be s…
renewiegandt Dec 20, 2018
6507643
spell check
HendrikSchultheis Dec 20, 2018
d86f788
fixed more typos
HendrikSchultheis Dec 20, 2018
756e98f
process description for reduce_sequence and clustering
HendrikSchultheis Dec 20, 2018
5e46266
Fixed typo in bed_to_fasta.R
renewiegandt Dec 20, 2018
80963a3
Merge pull request #12 from loosolab/motif_estiamtion
renewiegandt Dec 20, 2018
1c6bcf1
Added new parameter list to README.mf
renewiegandt Dec 20, 2018
d70610e
Merge branch 'motif_estiamtion' of https://github.molgen.mpg.de/looso…
renewiegandt Dec 20, 2018
935ba3f
Merge pull request #17 from loosolab/cluster
HendrikSchultheis Dec 21, 2018
13bccda
Fixed bug in pipeline.nf: parameter gtf_path is now working
renewiegandt Dec 21, 2018
181fc68
Merge pull request #21 from loosolab/motif_estiamtion
renewiegandt Dec 22, 2018
e29ad65
Merge pull request #24 from loosolab/peak_calling
renewiegandt Jan 3, 2019
b7c80c8
sorting scripts depending on their function
renewiegandt Jan 3, 2019
83460e9
Renaming output paths
renewiegandt Jan 3, 2019
fde8a8b
install optparse if not yet installed; added missing author to docume…
HendrikSchultheis Jan 3, 2019
1c392da
Merge pull request #30 from loosolab/cluster
HendrikSchultheis Jan 3, 2019
ab6f883
Merge branch 'dev' into estimation_motifs
renewiegandt Jan 3, 2019
8993670
Merge pull request #28 from loosolab/estimation_motifs
renewiegandt Jan 3, 2019
e629131
missing points in readme
anastasiia Jan 3, 2019
c9e7c82
Merge pull request #31 from loosolab/anastasiia-patch-1
renewiegandt Jan 3, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Prev Previous commit
Next Next commit
refactoring; renamed reduce_bed to reduce_sequence
HendrikSchultheis committed Dec 19, 2018
commit 98985d13512ede1d8c352207ea002d84522a2766
38 changes: 21 additions & 17 deletions bin/reduce_bed.R → bin/reduce_sequence.R
Original file line number Diff line number Diff line change
@@ -9,32 +9,35 @@ option_list <- list(
make_option(opt_str = c("-t", "--threads"), default = 1, help = "Number of threads to use. Use 0 for all available cores. Default = %default", metavar = "integer"),
make_option(opt_str = c("-c", "--clean"), default = TRUE, help = "Delete all temporary files. Default = %default", metavar = "logical"),
make_option(opt_str = c("-s", "--min_seq_length"), default = NULL, help = "Remove sequences below this length. Defaults to the maximum value of motif and kmer and can not be lower.", metavar = "integer", type = "integer"),
make_option(opt_str = c("-n", "--minoverlap_kmer"), default = NULL, help = "Minimum required overlap between kmer to merge kmer. Used to create reduced sequence ranges. Can not be greater than kmer length. Default = kmer - 1", metavar = "integer", type = "integer"),
make_option(opt_str = c("-n", "--minoverlap_kmer"), default = NULL, help = "Minimum required overlap between kmer. Used to create reduced sequence ranges out of merged kmer. Can not be greater than kmer length. Default = kmer - 1", metavar = "integer", type = "integer"),
make_option(opt_str = c("-v", "--minoverlap_motif"), default = NULL, help = "Minimum required overlap between motif and kmer to consider kmer significant. Used for kmer cutoff calculation. Can not be greater than motif and kmer length. Default = ceiling(motif / 2)", metavar = "integer", type = "integer"),
make_option(opt_str = c("-f", "--motif_occurence"), default = 1, help = "Number of motifs per sequence any value above 0. Default = %default.", metavar = "double")
make_option(opt_str = c("-f", "--motif_occurence"), default = 1, help = "Define how many motifs are expected per sequence. This value is used during kmer cutoff calculation. Default = %default meaning that there should be approximately one motif per sequence.", metavar = "double")
)

opt_parser <- OptionParser(option_list = option_list,
description = "Reduce sequences to frequent regions.")
description = "Reduces each sequence to its most frequent region.")

opt <- parse_args(opt_parser)

#' Reduce bed file to conserved regions
#' Reduces each sequence to its most frequent region.
#'
#' @param input bed file
#' @param kmer Length of kmer.
#' @param motif Estimated motif length.
#' @param output Output file
#' @param threads Number of threads. Default = 1. 0 for all cores.
#' @param input Input bed-file. Last column must be sequences.
#' @param kmer Kmer length. Default = 10
#' @param motif Estimated motif length. Default = 10
#' @param output Output file. Default = reduced.bed
#' @param threads Number of threads to use. Default = 1. Use 0 for all cores.
#' @param clean Delete all temporary files.
#' @param minoverlap_kmer Minimum required overlap between kmer to merge kmer. Used to create reduced sequence ranges. Can not be greater than kmer length. Default = kmer - 1
#' @param minoverlap_kmer Minimum required overlap between kmer. Used to create reduced sequence ranges out of merged kmer. Can not be greater than kmer length . Default = kmer - 1
#' @param minoverlap_motif Minimum required overlap between motif and kmer to consider kmer significant. Used for kmer cutoff calculation. Can not be greater than motif and kmer length. Default = ceiling(motif / 2)
#' @param min_seq_length Must be greater or equal to kmer and motif. Default = max(c(motif, kmer)).
#' @param min_seq_length Remove sequences below this length. Defaults to the maximum value of motif and kmer and can not be lower.
#' @param motif_occurence Define how many motifs are expected per sequence. This value is used during kmer cutoff calculation. Default = 1 meaning that there should be approximately one motif per sequence.
#'
#' @return reduced bed
#' TODO check whether jellyfish is installed
reduce_bed <- function(input, kmer = 10, motif = 10, output = "reduced.bed", threads = NULL, clean = TRUE, minoverlap_kmer = kmer - 1, minoverlap_motif = ceiling(motif / 2), min_seq_length = max(c(motif, kmer)), motif_occurence = 1) {
reduce_sequence <- function(input, kmer = 10, motif = 10, output = "reduced.bed", threads = NULL, clean = TRUE, minoverlap_kmer = kmer - 1, minoverlap_motif = ceiling(motif / 2), min_seq_length = max(c(motif, kmer)), motif_occurence = 1) {
if (missing(input)) {
stop("No input specified! Please forward a valid bed-file.")
}

# get number of available cores
if (threads == 0) {
threads <- parallel::detectCores()
@@ -117,12 +120,12 @@ reduce_bed <- function(input, kmer = 10, motif = 10, output = "reduced.bed", thr
merged[, sequence := stringr::str_sub(sequence, relative_start, relative_end)]

# bed files count from 0
merged[, `:=`(relative_start = relative_start - 1, relative_end = relative_end - 1)]
merged[, data.table::`:=`(relative_start = relative_start - 1, relative_end = relative_end - 1)]
# change start end location
merged[, `:=`(start = start + relative_start, end = start + relative_end)]
merged[, data.table::`:=`(start = start + relative_start, end = start + relative_end)]

# clean table
merged[, `:=`(relative_start = NULL, relative_end = NULL, width = NULL)]
merged[, data.table::`:=`(relative_start = NULL, relative_end = NULL, width = NULL)]

if (clean) {
file.remove(fasta_file, count_output_binary, mer_count_table)
@@ -171,6 +174,7 @@ significant_kmer <- function(bed, kmer, motif, minoverlap = ceiling(motif / 2),
reduce_kmer <- function(kmer, significant) {
data.table::setorderv(kmer, cols = names(kmer)[2], order = -1)

# TODO don't use 'V2'
kmer[, cumsum := cumsum(V2)]

return(kmer[cumsum <= significant])
@@ -255,5 +259,5 @@ if (!interactive()) {
pbo <- pbapply::pboptions(type = "timer")
# remove last parameter (help param)
params <- opt[-length(opt)]
do.call(reduce_bed, args = params)
do.call(reduce_sequence, args = params)
}