Skip to content

Estimation motifs #95

Merged
merged 5 commits into from
Apr 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 9 additions & 11 deletions bin/2.2_motif_estimation/png_to_pdf.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@ png_to_pdf <- function(png_top, png_list, cluster_ids, new_id, out = "cluster.pd
grobs <- lapply(png_split,function(p){rasterGrob(readPNG(p),interpolate = FALSE)})

split_grobs <- split(grobs, rep(1:ceiling(length(grobs)/3), each = 3)[1:length(grobs)])

rows <- lapply(seq(1,length(split_grobs)), function(sg){
arrangeGrob(grobs = split_grobs[[sg]], top = textGrob(paste0("Cluster ",cluster_list[sg]),gp = gpar(fontsize = 20,font = 3)), ncol = 3)
})
split_rows <- split(rows, rep(1:ceiling(length(rows)/4), each = 4)[1:length(rows)])

pdf(out,width = 17, height = 11)
grid.arrange(grobs = top_grob, nrow = 3, top = textGrob(paste0("New Cluster ",new_id),gp = gpar(fontsize = 30,font = 3)))
lapply(split_rows, function(r){
grid.arrange(grobs = r, nrow = 4, top = textGrob("Generated from...",gp = gpar(fontsize = 30,font = 3)))
grid.arrange(grobs = r, nrow = 4, top = textGrob("Generated from...",gp = gpar(fontsize = 30,font = 3)))
})

dev.off()
Expand All @@ -68,27 +68,25 @@ get_index <- function(index_file){
index <- f[,c(2,8)]
index$V8 <- strsplit(index$V8,",")
return(index[unlist(lapply(index$V8, function(v){ifelse(length(v) > 1, TRUE, FALSE)}))])

}

}

@TODO
png_to_pdf_set_up <- function(png_top, png_list, index_file){

index <- get_index(index_file)
t <- lapply(seq(nrow(index)), function(i){
new_id <- index[i,1]
cluster_ids <- index[i,2]
out <- paste0("Summary_cluster_", new_id, ".pdf")
png_to_pdf(png_top = ,png_list = ,cluster_ids = cluster_ids, new_id = new_id, out = out )
#regex_png <- paste0("*_cluster_", new_id , "/logo${NUM}.png") #????
png_to_pdf(png_top = "TODO" ,png_list = "TODO" ,cluster_ids = cluster_ids, new_id = new_id, out = out )
})

}


# run function merge_similar with given parameteres if not in interactive context (e.g. run from shell)
if (!interactive()) {
png_to_pdf_set_up(opt$png_new, opt$png_old , opt$index)
}



77 changes: 50 additions & 27 deletions pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,51 @@ disable_mo_clu = 1
//evaluation
params.max_uropa_runs = 10

if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == "" || (params.gtf_annotation == "" && params.gtf_merged == "" ) || "${params.help}" != "0" ) {
log.info """
Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --config [UROPA-config-file]
/*
Checking for parameter input!
*/
int_params = ["window_length", "step", "min_size_fp", "max_size_fp", "kmer",
"aprox_motif_len", "motif_occurrence", "min_seq_length", "global",
"sequence_coverage", "memory", "throw_away_seq", "strand",
"min_seq", "motif_min_key", "motif_max_key", "iteration",
"edge_weight", "best_motif", "min_gap", "gap_penalty", "edge_weight",
"threads", "max_uropa_runs"]
file_params = ["bigwig", "bed", "genome_fasta", "motif_db", "config", "gtf_annotation",]
all_params = int_params + file_params + ["organism" , "identity", "tfbsscan_method",
"percentage", "tomtom_treshold", "motif_similarity_thresh", "out",
"tissues", "gtf_merged", "cluster_motif", "tfbs_path", "help", "seed"]
req_params = file_params + ["organism"]

valid_organism = ["hg38", "hg19", "mm9", "mm10"]
valid_tfbsscan_methods = ["moods","fimo"]
val_missing = false
send_help = false
missing_params = []

req_params.each { key ->
if (req_params.contains(key)){
if (key == "gtf_annotation") {
if (params[key] == "" && params.gtf_merged == "") {
val_missing = true
missing_params.add("$key or gtf_merged")
}
} else {
if (params[key] == ""){
val_missing = true
missing_params.add(key)
}
}
}
}

if (val_missing){
send_help = true
println("Error: Following required parameters are missing: $missing_params")
}

if (send_help || "${params.help}" != "0") {
log.info """
Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --config [UROPA-config-file]

Required arguments:
--bigwig Path to BigWig-file
Expand All @@ -79,7 +121,7 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
--motif_db Path to motif-database in MEME-format
--config Path to UROPA configuration file
--gtf_annotation Path to gtf annotation file
--organism Input organism [hg38 | hg19 | mm9 | mm10]
--organism Input organism [hg38 | hg19 | mm9 | mm10]
--out Output Directory (Default: './out/')

Optional arguments:
Expand Down Expand Up @@ -132,9 +174,8 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
config
Evaluation:
--max_uropa_runs INT Maximum number UROPA runs running parallelized (Default: 10)
All arguments can be set in the configuration files
```
"""
All arguments can be set in the configuration files
"""
System.exit(2)
} else {
Channel.fromPath(params.bigwig).map {it -> [it.simpleName, it]}.set {bigwig_input}
Expand All @@ -150,25 +191,6 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
}
}



/*
Checking for parameter input!
*/
int_params = ["window_length", "step", "min_size_fp", "max_size_fp", "kmer",
"aprox_motif_len", "motif_occurrence", "min_seq_length", "global",
"sequence_coverage", "memory", "throw_away_seq", "strand",
"min_seq", "motif_min_key", "motif_max_key", "iteration",
"edge_weight", "best_motif", "min_gap", "gap_penalty", "edge_weight",
"threads", "max_uropa_runs"]
req_params = ["bigwig", "bed", "genome_fasta", "motif_db", "config"]
all_params = int_params + req_params + ["organism" , "identity", "tfbsscan_method",
"percentage", "tomtom_treshold", "motif_similarity_thresh", "out",
"tissues", "gtf_merged", "cluster_motif", "tfbs_path", "help", "gtf_annotation", "seed"]

valid_organism = ["hg38", "hg19", "mm9", "mm10"]
valid_tfbsscan_methods = ["moods","fimo"]

params.each { key, value ->
if (!(all_params.contains(key))){
println("Warning: Parameter $key is unknown. Please check for typos or the parameter list!")
Expand All @@ -179,13 +201,14 @@ params.each { key, value ->
System.exit(2)
}
}
if(req_params.contains(key) || (key == "gtf_merged" && value != "") ) {
if(file_params.contains(key) || (key == "gtf_merged" && value != "") ) {
if(!file(value).exists()) {
println("ERROR: $key not found. Please check the given path.")
System.exit(2)
}
}
}

if (!("${params.identity}" ==~ /^0\.[8-9][[0-9]*]?|^1(\.0)?/ )){
println("ERROR: --identity needs to be float in range 0.8 to 1.0")
System.exit(2)
Expand Down