-
Notifications
You must be signed in to change notification settings - Fork 0
2.2 creates json file containing motif sequences #47
Merged
Merged
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
5b8bb7a
Merge branch 'dev' into estimation_motifs
renewiegandt fe5ab42
get_best_motif.py: Added alternative name to best_motif file
renewiegandt 79b7e24
get_best_motif.py: removed whitespace from motif header
renewiegandt 8377340
added script get_motif_seq.R
renewiegandt 3f9e4bc
pipeline.nf: implemented get_motif_seq.R
renewiegandt 49ec389
added r packages: RJSONIO, varhandle to masterenv.yml
renewiegandt 98ab76a
Merge branch 'dev' into estimation_motifs
renewiegandt d979e16
get_best_motif.py: added parameter cluster id
renewiegandt ea2e171
get_motif_seq.R: added parameter tmp_path and cluster_id
renewiegandt 8e5049e
pipeline.nf: adjusting to new parameters required by get_best_motif;…
renewiegandt 1439045
Added log file for part 2.2_motif_estimation
renewiegandt cff01fc
Merge branch 'dev' into estimation_motifs
renewiegandt 0f4bf46
added missing shebangs to 2.2 scripts #44
renewiegandt c0a462b
pipeline.nf: adjusting to new parameter required by 2.1 scripts
renewiegandt e62bb36
pipeline.nf: fixed typos
renewiegandt f3b3db4
get_best_motif.py: Removed additional whitespace in motif header
renewiegandt 2d2f8cb
pipeline.nf: removed code for debuging
renewiegandt 43c55ae
bed_to_fasta.R: changes lapply to vapply
renewiegandt 310702e
Removed import os from get_best_motif.py
renewiegandt b70a38b
Removed newline from get_motif_seq
renewiegandt File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/usr/bin/env Rscript | ||
if (!require(optparse, quietly = T)) install.packages("optparse"); library(optparse) | ||
|
||
option_list <- list( | ||
make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input file. Output txt-file from GLAM2.", metavar = "character"), | ||
make_option(opt_str = c("-o", "--output"), default = "sequences.json" , help = "Output JSON-file. Default = '%default'", metavar = "character"), | ||
make_option(opt_str = c("-n", "--num"), default = 3 , help = "Get best (num) motifs. Default = '%default'", metavar = "numeric"), | ||
make_option(opt_str = c("-c", "--cluster_id"), default = "./" , help = "Cluster ID", metavar = "numeric"), | ||
make_option(opt_str = c("-t", "--tmp"), default = "./" , help = "Path for tmp-files. Default = '%default'", metavar = "character") | ||
) | ||
|
||
opt_parser <- OptionParser(option_list = option_list, | ||
description = "Creating JSON-file with sequence ids which were used to create the best (num) motifs.", | ||
epilogue = "Author: Rene Wiegandt <Rene.Wiegandt@mpi-bn.mpg.de>") | ||
|
||
opt <- parse_args(opt_parser) | ||
|
||
#' Reading files with fread. | ||
#' Only read the first column. | ||
#' @param path Path to file | ||
#' @return first column as vector | ||
read_data <- function(path){ | ||
renewiegandt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
f <- data.table::fread(path, select = 1) | ||
return(f[[1]]) | ||
} | ||
|
||
|
||
#' Creating JSON-file with sequence ids which were used to create the best (num) motifs. | ||
#' | ||
#' @param input Input file.Output txt-file from GLAM2. | ||
#' @param output Output JSON-file | ||
#' @param num Get best (num) motifs. | ||
#' | ||
#' @author René Wiegandt <Rene.Wiegandt(at)mpi-bn.mpg.de> | ||
create_seq_json <- function(input, output, num, tmp_path, cluster_id) { | ||
|
||
if (!file.exists(input)) { | ||
stop(paste0("Input file does not exists. Please check the given path: ", input)) | ||
} | ||
|
||
if ( !is.numeric(num)) { | ||
stop("Parameter num needs to be an integer") | ||
} | ||
|
||
if (num > 10 || num <= 0 ) { | ||
stop(paste0("Parameter 'num' needs to be an number between 1 and 10! Your input: ", num)) | ||
} | ||
|
||
if ( !varhandle::check.numeric(cluster_id)) { | ||
stop(paste0("CLUSTER ID could not be found. Please make sure that your file path contains _[cluster_id] at the end. Found: ", cluster_id,"\n For example: /test_cluster_1/glam.txt")) | ||
} | ||
|
||
dir.create(tmp_path, showWarnings = FALSE) | ||
|
||
file_dir <- tmp_path | ||
|
||
# Split glam.txt file on lines that start with Score: | ||
system(paste0("csplit ", input, " '/^Score:.*/' '{*}' -f ", file_dir, "/f_id_test.pholder")) | ||
# Only keep the lines that start with 'f' to get the lines with the sequence ids | ||
system(paste0("for i in ", file_dir, "/*.pholder0[1-", num, "];do grep \"^f\" $i > \"${i}.done\";done")) | ||
|
||
# Getting the filepaths of first 3 files with sequence ids | ||
fnames <- file.path(file_dir,dir(file_dir, pattern = "done")) | ||
|
||
# Running read_data on files | ||
datalist <- lapply(fnames, read_data) | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove second empty line. |
||
# Create json file | ||
## naming | ||
names(datalist) <- paste0(c("Motif_", "Motif_", "Motif_"),seq(1,as.numeric(num),1) , " Cluster_", cluster_id) | ||
## creating json object | ||
json <- RJSONIO::toJSON(datalist, pretty = T , .withNames = T) | ||
## writing file | ||
write(json, file = output ) | ||
} | ||
|
||
# run function create_seq_json with given parameteres if not in interactive context (e.g. run from shell) | ||
if (!interactive()) { | ||
if (length(commandArgs(trailingOnly = TRUE)) <= 0) { | ||
print_help(opt_parser) | ||
} else { | ||
create_seq_json(opt$input, opt$output, opt$num, opt$tmp, opt$cluster_id) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
params.bed = "" | ||
params.out = "" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
import os
is not used. Should be removed.