Skip to content

Commit

Permalink
Merge pull request #93 from loosolab/estimation_motifs
Browse files Browse the repository at this point in the history
Estimation motifs
  • Loading branch information
renewiegandt authored Mar 28, 2019
2 parents d6b44ea + bce6e1b commit 8f9fbee
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 24 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,14 @@ Required arguments:
--genome_fasta Path to genome in FASTA-format
--motif_db Path to motif-database in MEME-format
--config Path to UROPA configuration file
--gtf_annotation Path to gtf annotation file
--organism Input organism [hg38 | hg19 | mm9 | mm10]
--out Output Directory (Default: './out/')
Optional arguments:
--help [0|1] 1 to show this help message. (Default: 0)
--gtf_path Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
--gtf_merged Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
--tfbs_path Path to directory with tfbsscan output. If given tfbsscan will be skipped.
Footprint extraction:
Expand Down
10 changes: 5 additions & 5 deletions bin/1.2_filter_motifs/compareBed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# BED-format.

# For parameter description, run the script without parameters or -h.
# The output is a file with the filtered footprints and the log file compareBed.log
# The output is a file with the filtered footprints and the log file filter_motifs.log

# One R scripts is used, compareBed_runinfo.R, stored in the same directory.
# Parameter values (file/directory paths) must not contain the | symbol, because it is chosen as "sed"-field separator.
Expand Down Expand Up @@ -264,7 +264,7 @@ fi

# remove short/long motivs, make unique ids (relevant for some splitted tfbs from subtract) and handle maxScorePosition
# also creates a small output file with information about the comparison
Rscript $path/compareBed_runinfo.R $min $max $data "$workdir"/filtered.bed "$workdir"/filtered_flagged.bed "$workdir"/compareBed.log
Rscript $path/compareBed_runinfo.R $min $max $data "$workdir"/filtered.bed "$workdir"/filtered_flagged.bed "$workdir"/filter_motifs.log
# check if Rscript executed without errors
if [ $? -gt 0 ]
then
Expand All @@ -279,15 +279,15 @@ then
# add initial number of footprints to the log file
fp_initial=`cat $data | wc -l`
fp_initial=`expr $fp_initial - 1`
echo $fp_initial | sed 's/^/initial number of footprints: /g' >> "$workdir"/compareBed.log
echo $fp_initial | sed 's/^/initial number of footprints: /g' >> "$workdir"/filter_motifs.log
else
# output will be overwritten if it exists
rm -f $output
# add initial number of footprints to the log file
cat $data | wc -l | sed 's/^/initial number of footprints: /g' >> "$workdir"/compareBed.log
cat $data | wc -l | sed 's/^/initial number of footprints: /g' >> "$workdir"/filter_motifs.log
fi
# add number of footprints after filtering to the log file
cat "$workdir"/filtered_flagged.bed | wc -l | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/compareBed.log
cat "$workdir"/filtered_flagged.bed | wc -l | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/filter_motifs.log

# add fasta sequences to bed and create fasta file
out_fasta=`echo $output | sed "s|.bed$|.fasta|g"`
Expand Down
40 changes: 22 additions & 18 deletions pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ disable_mo_clu = 1
params.config=""
params.tfbs_path=""
params.help = 0
params.gtf_path=""
params.gtf2=""
params.gtf_merged=""
params.gtf_annotation=""
params.out = "./out/"

//footprint_extraction
Expand Down Expand Up @@ -68,7 +68,7 @@ disable_mo_clu = 1
//evaluation
params.max_uropa_runs = 10

if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == "" || params.gtf2 == "" || "${params.help}" != "0" ) {
if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == "" || (params.gtf_annotation == "" && params.gtf_merged == "" ) || "${params.help}" != "0" ) {
log.info """
Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --config [UROPA-config-file]
Expand All @@ -78,13 +78,14 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
--genome_fasta Path to genome in FASTA-format
--motif_db Path to motif-database in MEME-format
--config Path to UROPA configuration file
--gtf_annotation Path to gtf annotation file
--organism Input organism [hg38 | hg19 | mm9 | mm10]
--out Output Directory (Default: './out/')
Optional arguments:
--help [0|1] 1 to show this help message. (Default: 0)
--gtf_path Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
--gtf_merged Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
--tfbs_path Path to directory with tfbsscan output. If given tfbsscan will be skipped.
Footprint extraction:
Expand Down Expand Up @@ -141,7 +142,9 @@ if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.g
Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
Channel.fromPath(params.motif_db).into {db_for_motivscan; db_for_tomtom}
Channel.fromPath(params.config).set {config}
Channel.fromPath(params.gtf2).set {gtf_to_merge}
if (params.gtf_annotation != ""){
Channel.fromPath(params.gtf_annotation).set {gtf_to_merge}
}
if (params.tfbs_path != ""){
known_tfbs = file(params.tfbs_path).toAbsolutePath()
}
Expand All @@ -161,7 +164,7 @@ int_params = ["window_length", "step", "min_size_fp", "max_size_fp", "kmer",
req_params = ["bigwig", "bed", "genome_fasta", "motif_db", "config"]
all_params = int_params + req_params + ["organism" , "identity", "tfbsscan_method",
"percentage", "tomtom_treshold", "motif_similarity_thresh", "out",
"tissues", "gtf_path", "cluster_motif", "tfbs_path", "help", "gtf2", "seed"]
"tissues", "gtf_merged", "cluster_motif", "tfbs_path", "help", "gtf_annotation", "seed"]

valid_organism = ["hg38", "hg19", "mm9", "mm10"]
valid_tfbsscan_methods = ["moods","fimo"]
Expand All @@ -176,7 +179,7 @@ params.each { key, value ->
System.exit(2)
}
}
if(req_params.contains(key) || (key == "gtf_path" && value != "") ) {
if(req_params.contains(key) || (key == "gtf_merged" && value != "") ) {
if(!file(value).exists()) {
println("ERROR: $key not found. Please check the given path.")
System.exit(2)
Expand Down Expand Up @@ -247,7 +250,7 @@ Find postitions of known tfbs with tfbsscan and discard the overlaps with compar
*/
process overlap_with_known_TFBS {
//conda "${path_env}"
publishDir "${params.out}/1.2_filter_motifs", mode :'copy', pattern: '*_unknown.bed'
publishDir "${params.out}/1.2_filter_motifs", mode :'copy', pattern: "${name}_unknown.bed"
publishDir "${params.out}/log", mode: 'copy', pattern: '*.log'
tag{name}
errorStrategy 'finish'
Expand All @@ -257,7 +260,6 @@ process overlap_with_known_TFBS {

output:
set name, file ("${name}_unknown.bed") into bed_for_reducing
file ('./known_tfbs/*.bed') optional true
file ('*.log')

script:
Expand Down Expand Up @@ -749,17 +751,19 @@ process create_GTF {
file ('*.gtf') into gtf

when:
params.gtf_path == ""
params.gtf_merged == ""

script:
"""
python ${path_bin}/3.1_create_gtf/RegGTFExtractor.py ${params.organism} --tissue ${params.tissues} --wd ${path_bin}/3.1_create_gtf
"""
}


gtf.combine(gtf_to_merge).set {gtf_list}

//TODO put operators in process mwerge_gtf
gtf_list = Channel.empty()
if (params.gtf_merged == "") {
gtf.combine(gtf_to_merge).set {gtf_list}
}

process merge_gtf {

Expand All @@ -773,19 +777,20 @@ process merge_gtf {
file ('merged.gtf') into merged_gtf

when:
params.gtf_path == ""
params.gtf_merged == ""

script:
"""
cat ${gtf_genes} > merged.gtf
cat ${gtf_reg} >> merged.gtf
cat ${gtf_reg} >> merged.gtf | sort -k 1,1 -k 4,4n -k 5,5n
"""
}

if (params.gtf_path == "") {

if (params.gtf_merged == "") {
gtf_for_uropa = merged_gtf
} else {
gtf_for_uropa = Channel.fromPath(params.gtf_path)
Channel.fromPath( params.gtf_merged ).set {gtf_for_uropa}
}

re_scan_for_uropa.flatten().combine(gtf_for_uropa).combine(config).set {uropa_in}
Expand All @@ -795,7 +800,6 @@ re_scan_for_uropa.flatten().combine(gtf_for_uropa).combine(config).set {uropa_in
process create_uropa_config {

tag{bed}
echo true
publishDir "${params.out}/3.2_evaluation/uropa/config", mode: 'copy'

input:
Expand Down

0 comments on commit 8f9fbee

Please sign in to comment.