diff --git a/README.md b/README.md
index d080783..317c3c2 100644
--- a/README.md
+++ b/README.md
@@ -6,19 +6,11 @@ For further information read the [documentation](https://github.molgen.mpg.de/lo
 
 ## Dependencies
 * [conda](https://conda.io/docs/user-guide/install/linux.html)
-* [Nextflow](https://www.nextflow.io/)
-* [MEME-Suite](http://meme-suite.org/doc/install.html?man_type=web)
 
 ## Installation
-1. Start with installing all dependencies listed above (Nextflow, conda, MEME-Suite) and downloading all files from the [GitHub repository](https://github.molgen.mpg.de/loosolab/masterJLU2018).
-2. It is required to set the [environment paths for meme-suite](http://meme-suite.org/doc/install.html?man_type=web#installingtar).
-this can be done with following commands:
-```
-export PATH=[meme-suite instalation path]/libexec/meme-[meme-suite version]:$PATH
-export PATH=[meme-suite instalation path]/bin:$PATH
-```
+1. Start with installing conda and downloading all files from the [GitHub repository](https://github.molgen.mpg.de/loosolab/masterJLU2018).
 
-3. Every other dependency will be automatically installed using conda. For that a conda environment has to be created from the yaml-file given in this repository.
+2. Every other dependency will be automatically installed using conda. For that a conda environment has to be created from the yaml-file given in this repository.
 It is required to create and activate the environment from the yaml-file beforehand.
 This can be done with following commands:
 ```condsole
@@ -26,84 +18,87 @@ conda env create -f masterenv.yml
 conda activate masterenv
 ```
 
-4. Set the wd parameter in the nextflow.config file as path where the repository is saved. For example: '~/masterJLU2018/'.
+3. Set the wd parameter in the nextflow.config file as path where the repository is saved. For example: '~/masterJLU2018/'.
 
 
 **Important Notes:**
-1. For conda the channel bioconda needs to be set as highest priority! This is required due to two different packages with the same name in different channels. For the pipeline the package jellyfish from the channel bioconda is needed and **NOT** the jellyfish package from the channel conda-forge!
+1. For the pipeline the package jellyfish from the channel bioconda is needed and **NOT** the jellyfish package from the channel conda-forge! Please make sure that the right jellyfish package is installed.
 
 
 ## Quick Start
 ```console
-nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --organism [mm10|mm9|hg19|hg38]
+nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --organism [mm10|mm9|hg19|hg38] --gtf_annotation [GTF-file]
 ```
 
 ### Demo run
 There are files provided inside  ./demo/ for a demo run.
 Go to the main directory and run following command:
 ```
-nextflow run pipeline.nf --bigwig ./demo/buenrostro50k_chr1_fp.bw --bed ./demo/buenrostro50k_chr1_peaks.bed --genome_fasta ./demo/hg38_chr1.fa --motif_db ./demo/jaspar_vertebrates.meme --out ./demo/buenrostro50k_chr1_out/ --organism hg38
+nextflow run pipeline.nf --bigwig ./demo/buenrostro50k_chr1_fp.bw --bed ./demo/buenrostro50k_chr1_peaks.bed --genome_fasta ./demo/hg38_chr1.fa --motif_db ./demo/jaspar_vertebrates.meme --out ./demo/buenrostro50k_chr1_out/ --organism hg38 --gtf_annotation ./demo/homo_sapiens.94.mainChr.gtf
 ```
 
 ## Parameters
-For a detailed overview for all parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/Configuration).
+For a detailed overview for all parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/List-of-Parameters).
 ```
 Required arguments:
-	--bigwig		 Path to BigWig-file
-	--bed			 Path to BED-file
-	--genome_fasta		 Path to genome in FASTA-format
-	--motif_db		 Path to motif-database in MEME-format
-	--config		 Path to UROPA configuration file
- 	--organism 		 Input organism [hg38 | hg19 | mm9 | mm10]
-	--out			 Output Directory (Default: './out/')
+	--bigwig			Path to BigWig-file
+	--bed				Path to BED-file
+	--genome_fasta			Path to genome in FASTA-format
+	--motif_db			Path to motif-database in MEME-format
+	--config			Path to UROPA configuration file
+	--gtf_annotation		Path to gtf annotation file
+	--organism 			Input organism [hg38 | hg19 | mm9 | mm10]
+	--out				Output Directory (Default: './out/')
 
 Optional arguments:
 
-	--help [0|1]		1 to show this help message. (Default: 0)
-	--gtf_path		Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
-	--tfbs_path 		Path to directory with output from tfbsscan. If given tfbsscan will be skipped.
+	--help [0|1]			1 to show this help message. (Default: 0)
+	--gtf_merged			Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
+	--tfbs_path 			Path to directory with tfbsscan output. If given tfbsscan will be skipped.
 
 	Footprint extraction:
-	--window_length INT	This parameter sets the length of a sliding window. (Default: 200)
-	--step INT		This parameter sets the number of positions to slide the window forward. (Default: 100)
-	--percentage INT	Threshold in percent (Default: 0)
-	--min_gap INT		If footprints are less than X bases apart the footprints will be merged (Default: 6)
+	--window_length INT		This parameter sets the length of a sliding window. (Default: 200)
+	--step INT			This parameter sets the number of positions to slide the window forward. (Default: 100)
+	--percentage INT		Threshold in percent (Default: 0)
+	--min_gap INT			If footprints are less than X bases apart the footprints will be merged (Default: 6)
 
 	Filter motifs:
-	--min_size_fp INT	Minimum sequence length threshold. Smaller sequences are discarded. (Default: 10)
-	--max_size_fp INT	Maximum sequence length threshold. Discards all sequences longer than this value. (Default: 200)
-	--tfbsscan_method [moods|fimo] Method used by tfbsscan. (Default: moods)
+	--min_size_fp INT		Minimum sequence length threshold. Smaller sequences are discarded. (Default: 10)
+	--max_size_fp INT		Maximum sequence length threshold. Discards all sequences longer than this value. (Default: 200)
+	--tfbsscan_method [moods|fimo] 	Method used by tfbsscan. (Default: moods)
 
 	Cluster:
 	Sequence preparation/ reduction:
-	--kmer INT		K-mer length (Default: 10)
-	--aprox_motif_len INT	Motif length (Default: 10)
-	--motif_occurence FLOAT	Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
+	--kmer INT			K-mer length (Default: 10)
+	--aprox_motif_len INT		Motif length (Default: 10)
+	--motif_occurrence FLOAT	Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
 	--min_seq_length Interations	Remove all sequences below this value. (Default: 10)
 	Clustering:
-	--global INT		Global (=1) or local (=0) alignment. (Default: 0)
-	--identity FLOAT	Identity threshold. (Default: 0.8)
-	--sequence_coverage INT	Minimum aligned nucleotides on both sequences. (Default: 8)
-	--memory INT		Memory limit in MB. 0 for unlimited. (Default: 800)
-	--throw_away_seq INT	Remove all sequences equal or below this length before clustering. (Default: 9)
-	--strand INT		Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
+	--global INT			Global (=1) or local (=0) alignment. (Default: 0)
+	--identity FLOAT		Identity threshold. (Default: 0.8)
+	--sequence_coverage INT		Minimum aligned nucleotides on both sequences. (Default: 8)
+	--memory INT			Memory limit in MB. 0 for unlimited. (Default: 800)
+	--throw_away_seq INT		Remove all sequences equal or below this length before clustering. (Default: 9)
+	--strand INT			Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
 
 	Motif estimation:
-	--min_seq INT 		Sets the minimum number of sequences required for the FASTA-files given to GLAM2. (Default: 100)
-	--motif_min_key INT	Minimum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 8)
-	--motif_max_key INT	Maximum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 20)
-	--iteration INT		Number of iterations done by GLAM2. More Iterations: better results, higher runtime. (Default: 10000)
-	--tomtom_treshold FLOAT	Threshold for similarity score. (Default: 0.01)
-	--best_motif INT	Get the best X motifs per cluster. (Default: 3)
-	--gap_penalty INT	Set penalty for gaps in GLAM2 (Default: 1000)
+	--min_seq INT 			Sets the minimum number of sequences required for the FASTA-files given to GLAM2. (Default: 100)
+	--motif_min_key INT		Minimum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 8)
+	--motif_max_key INT		Maximum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 20)
+	--iteration INT			Number of iterations done by GLAM2. More Iterations: better results, higher runtime. (Default: 10000)
+	--tomtom_treshold FLOAT	T	hreshold for similarity score. (Default: 0.01)
+	--best_motif INT		Get the best X motifs per cluster. (Default: 3)
+	--gap_penalty INT		Set penalty for gaps in GLAM2 (Default: 1000)
+	--seed String			Set seed for GLAM2 (Default: 123456789)
 	Moitf clustering:
-	--cluster_motif	Boolean	If 1 pipeline clusters motifs. If its 0 it does not. (Default: 0)
-	--edge_weight INT	Minimum weight of edges in motif-cluster-graph (Default: 5)
+	--cluster_motif	Boolean		If 1 pipeline clusters motifs. If its 0 it does not. (Defaul: 0)
+	--edge_weight INT		Minimum weight of edges in motif-cluster-graph (Default: 5)
 	--motif_similarity_thresh FLOAT	Threshold for motif similarity score (Default: 0.00001)
 
 	Creating GTF:
-	--tissues List/String 	List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON
-				config
+	--tissues List/String 		List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON config
+	Evaluation:
+	--max_uropa_runs INT	 	Maximum number UROPA runs running parallelized (Default: 10)
 All arguments can be set in the configuration files
  ```
 
diff --git a/bin/1.2_filter_motifs/compareBed.sh b/bin/1.2_filter_motifs/compareBed.sh
index aa97c95..5097004 100644
--- a/bin/1.2_filter_motifs/compareBed.sh
+++ b/bin/1.2_filter_motifs/compareBed.sh
@@ -5,10 +5,10 @@
 
 # desciption: This scripts uses bedtools to find new, non overlapping regions
 #             between input data in BED-format and a group of known motifs in
-#             BED-format. 
+#             BED-format.
 
 #             For parameter description, run the script without parameters or -h.
-#             The output is a file with the filtered footprints and the log file compareBed.stats
+#             The output is a file with the filtered footprints and the log file filter_motifs.log
 
 # One R scripts is used, compareBed_runinfo.R, stored in the same directory.
 # Parameter values (file/directory paths) must not contain the | symbol, because it is chosen as "sed"-field separator.
@@ -124,7 +124,7 @@ then
 	echo "              -o   --output       output path/file ; default dir current directory and filename is newMotifs.bed and newMotifs.bed.fasta"
 	echo "              -h --help           shows this help message"
 	echo ""
-	echo " Note: file paths must not contain the '|' pipe symbol" 
+	echo " Note: file paths must not contain the '|' pipe symbol"
 	exit 0
 fi
 
@@ -188,7 +188,7 @@ cat $data | sed 's/[ \t]*$//' > "$workdir"/filtered.bed
 # motiffiles either from a directory OR comma separated list
 if [ -d "$motifs" ]
 then
-	# creates an array of all files with bed in its name in the directory $motifs  
+	# creates an array of all files with bed in its name in the directory $motifs
 	declare -a motiffiles=(`ls $motifs | grep -i '.bed$' | sed "s|^|$motifs\/|g" | tr '\n' ' ' | sed "s|//|/|g"`)
 
 # the else case means, that the motiffiles were passed comma separated with no whitespace.
@@ -264,9 +264,9 @@ fi
 
 # remove short/long motivs, make unique ids (relevant for some splitted tfbs from subtract) and handle maxScorePosition
 # also creates a small output file with information about the comparison
-Rscript $path/compareBed_runinfo.R $min $max $data "$workdir"/filtered.bed "$workdir"/filtered_flagged.bed "$workdir"/compareBed.stats
+Rscript $path/compareBed_runinfo.R $min $max $data "$workdir"/filtered.bed "$workdir"/filtered_flagged.bed "$workdir"/filter_motifs.log
 # check if Rscript executed without errors
-if [ $? -gt 0 ] 
+if [ $? -gt 0 ]
 then
 	exit 1
 fi
@@ -279,15 +279,15 @@ then
 	# add initial number of footprints to the log file
 	fp_initial=`cat $data | wc -l`
 	fp_initial=`expr $fp_initial - 1`
-	echo $fp_initial | sed 's/^/initial number of footprints: /g' >> "$workdir"/compareBed.stats
+	echo $fp_initial | sed 's/^/initial number of footprints: /g' >> "$workdir"/filter_motifs.log
 else
 	# output will be overwritten if it exists
 	rm -f $output
 	# add initial number of footprints to the log file
-	cat $data | wc -l | sed 's/^/initial number of footprints: /g' >> "$workdir"/compareBed.stats
+	cat $data | wc -l | sed 's/^/initial number of footprints: /g' >> "$workdir"/filter_motifs.log
 fi
 # add number of footprints after filtering to the log file
-cat "$workdir"/filtered_flagged.bed | wc -l | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/compareBed.stats
+cat "$workdir"/filtered_flagged.bed | wc -l | sed 's/^/number of footprints after subtract: /g' >> "$workdir"/filter_motifs.log
 
 # add fasta sequences to bed and create fasta file
 out_fasta=`echo $output | sed "s|.bed$|.fasta|g"`
diff --git a/bin/2.2_motif_estimation/get_motif_seq.R b/bin/2.2_motif_estimation/get_motif_seq_id.R
similarity index 94%
rename from bin/2.2_motif_estimation/get_motif_seq.R
rename to bin/2.2_motif_estimation/get_motif_seq_id.R
index bbcaff6..ca07eb6 100644
--- a/bin/2.2_motif_estimation/get_motif_seq.R
+++ b/bin/2.2_motif_estimation/get_motif_seq_id.R
@@ -68,7 +68,7 @@ create_seq_json <- function(input, output, num, tmp_path, cluster_id) {
 
   # Create json file
   ## naming
-  names(datalist) <- paste0(c("Motif_", "Motif_", "Motif_"),seq(1,as.numeric(num),1) , " Cluster_", cluster_id)
+  names(datalist) <- paste0("Motif_", seq(1,as.numeric(num),1), " Cluster_", cluster_id)
   ## creating json object
   json <- RJSONIO::toJSON(datalist, pretty = T , .withNames = T)
   ## writing file
diff --git a/bin/2.2_motif_estimation/merge_similar_clusters.R b/bin/2.2_motif_estimation/merge_similar_clusters.R
index d9b2373..4cf8c47 100644
--- a/bin/2.2_motif_estimation/merge_similar_clusters.R
+++ b/bin/2.2_motif_estimation/merge_similar_clusters.R
@@ -1,5 +1,6 @@
 #!/usr/bin/env Rscript
 if (!require(optparse)) install.packages("optparse"); library(optparse)
+if (!require(igraph)) install.packages("igraph"); library(igraph)
 
 option_list <- list(
   make_option(opt_str = c("-i", "--input"), default = NULL, help = "Input TSV-file. Output from merged tomtom results", metavar = "character"),
@@ -40,37 +41,39 @@ merge_similar <- function(tsv_in, file_list, min_weight){
 
   # remove rows if column 1 is identical to column 2
   edgelist <- sim_not_unique[query_cluster != target_cluster]
-
+  unique_edgelist <- unique(edgelist)
+  
   # create graph from data.frame
-  g <- igraph::graph_from_edgelist(as.matrix(edgelist))
-  # converting graph to adjacency matrix
-  adj_matrix <- igraph::get.adjacency(g, names = T)
-  # generating weighted graph from adjacency matrix
-  g_adj <- igraph::graph_from_adjacency_matrix(adj_matrix, weighted = T)
+  g <- graph_from_edgelist(as.matrix(unique(edgelist)), directed = F)
+  g_tidy <- delete.vertices(simplify(g), degree(g) == 0)
+  g_tidy <- set_vertex_attr(g_tidy,"name",V(g_tidy), value = (sort(unique(c(unique_edgelist$V1,unique_edgelist$V2)))) - 1)
 
-  # get subgraphs from graph with edges of weight > min_weight
-  s1 <- igraph::subgraph.edges(g_adj, igraph::E(g_adj)[igraph::E(g_adj)$weight > min_weight], del = F)
+  # plotting
   png('motif_clusters.png')
-  plot(s1)
+  plot(g_tidy)
   dev.off()
-  clust <- igraph::clusters(s1)
+  clust <- clusters(g)
   if (clust$no < 1) {
     b <- lapply(files, function(f){
       system(paste("cat",f,">",basename(f)))
     })
   }
+  clust_out <- file("cluster.txt")
   # merge FASTA-files depending on the clustered graphs
-  a <- lapply(seq(from = 1, to = clust$no, by = 1), function(i){
+  a <- vapply(seq(from = 1, to = clust$no, by = 1), function(i){
     #get vector with cluster ids, which are clustered together in "motif cluster" i
     cl <- as.vector(which(clust$membership %in% c(i)))
     #create string, which represents a list, containing all FASTA-files to be merged
     fasta_list <- paste(files[cl], collapse = " ")
     #create name for merged FASTA-file
-    name <- paste0("Cluster_",i,".fasta")
+    name <- paste0("Cluster_",i - 1 ,".fasta")
     #merge fasta files
     system(paste("cat",fasta_list,">",name))
-  })
+    paste("Cluster",i - 1,"is generated by merging cluster:",paste(cl - 1,collapse = ","))
+  }, "")
+  writeLines(a[a != ""], con = clust_out)
 }
+  
 
 
 # run function merge_similar with given parameteres if not in interactive context (e.g. run from shell)
diff --git a/bin/2.2_motif_estimation/png_to_pdf.R b/bin/2.2_motif_estimation/png_to_pdf.R
new file mode 100644
index 0000000..4ea85b9
--- /dev/null
+++ b/bin/2.2_motif_estimation/png_to_pdf.R
@@ -0,0 +1,92 @@
+#!/usr/bin/env Rscript
+if (!require(optparse)) install.packages("optparse"); library(optparse)
+if (!require(png)) install.packages("png"); library(png)
+if (!require(grid)) install.packages("grid"); library(grid)
+if (!require(gridExtra)) install.packages("gridExtra"); library(gridExtra)
+
+
+option_list <- list(
+  make_option(opt_str = c("-i", "--png_new"), default = NULL, help = "List of png paths separated by \',\'", metavar = "character"),
+  make_option(opt_str = c("-p", "--png_old"), default = NULL, help = "List of png paths separated by \',\'", metavar = "character"),
+  make_option(opt_str = c("-f", "--index"), default = NULL, help = "Path to index file", metavar = "character")
+)
+
+opt_parser <- OptionParser(option_list = option_list,
+                           description = "Generating a PDF from multiple png files",
+                           epilogue = "Author: Rene Wiegandt <Rene.Wiegandt@mpi-bn.mpg.de>")
+
+opt <- parse_args(opt_parser)
+
+
+
+#' Generating a PDF from multiple png files.
+#'
+#' @parameter png_new <string> List of png paths separated by ,
+#' @parameter png_old <string> List of png paths separated by ,
+#' @parameter cluster_ids <string> List of IDs separated by ,
+#' @parameter new_id <int> New Cluster ID
+#'
+#' @author Rene Wiegandt
+#' @contact rene.wiegandt(at)mpi-bn.mpg.de
+png_to_pdf <- function(png_top, png_list, cluster_ids, new_id, out = "cluster.pdf"){
+  #png_top <- "G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/03_motif_cluster/04_glam2/Cluster_0/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/03_motif_cluster/04_glam2/Cluster_0/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/03_motif_cluster/04_glam2/Cluster_0/logo3.png"
+  #png_list <- "G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_0/logo3.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo1.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo2.png,G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/02_glam2/buenrostro50k_chr1_fp_cluster_1/logo3.png"
+  #cluster_ids <- c(1,2,3,4,5,6,7,8,9,10)
+  top_split <- unlist(strsplit(png_top, ","))
+  png_split <- unlist(strsplit(png_list, ","))
+  cluster_list <- unlist(strsplit(cluster_ids, ","))
+  top_grob <- lapply(top_split,function(p){rasterGrob(readPNG(p),interpolate = FALSE)})
+  grobs <- lapply(png_split,function(p){rasterGrob(readPNG(p),interpolate = FALSE)})
+
+  split_grobs <- split(grobs, rep(1:ceiling(length(grobs)/3), each = 3)[1:length(grobs)])
+
+  rows <- lapply(seq(1,length(split_grobs)), function(sg){
+    arrangeGrob(grobs = split_grobs[[sg]], top = textGrob(paste0("Cluster ",cluster_list[sg]),gp = gpar(fontsize = 20,font = 3)), ncol = 3)
+  })
+  split_rows <- split(rows, rep(1:ceiling(length(rows)/4), each = 4)[1:length(rows)])
+
+  pdf(out,width = 17, height = 11)
+  grid.arrange(grobs = top_grob, nrow = 3, top = textGrob(paste0("New Cluster ",new_id),gp = gpar(fontsize = 30,font = 3)))
+  lapply(split_rows, function(r){
+    grid.arrange(grobs = r, nrow = 4, top = textGrob("Generated from...",gp = gpar(fontsize = 30,font = 3)))
+  })
+
+  dev.off()
+
+}
+#### index_file <- "G://Rene.Wiegandt/10_Master/tmp/test_pipeline_11_3_19/2.2_motif_estimation/03_motif_cluster/03_merged_fasta/cluster.txt"
+
+
+#' Parsing index file.
+#'
+#' @parameter index_file Path to index file.
+#'
+#' @author Rene Wiegandt
+#' @contact rene.wiegandt(at)mpi-bn.mpg.de
+get_index <- function(index_file){
+  f <- data.table::fread(index_file, header = F)
+  index <- f[,c(2,8)]
+  index$V8 <- strsplit(index$V8,",")
+  return(index[unlist(lapply(index$V8, function(v){ifelse(length(v) > 1, TRUE, FALSE)}))])
+
+}
+
+@TODO
+png_to_pdf_set_up <- function(png_top, png_list, index_file){
+
+  index <- get_index(index_file)
+  t <- lapply(seq(nrow(index)), function(i){
+    new_id <- index[i,1]
+    cluster_ids <- index[i,2]
+    out <- paste0("Summary_cluster_", new_id, ".pdf")
+    #regex_png <- paste0("*_cluster_", new_id , "/logo${NUM}.png") #????
+    png_to_pdf(png_top = "TODO" ,png_list = "TODO" ,cluster_ids = cluster_ids, new_id = new_id, out = out )
+  })
+
+}
+
+
+# run function merge_similar with given parameteres if not in interactive context (e.g. run from shell)
+if (!interactive()) {
+  png_to_pdf_set_up(opt$png_new, opt$png_old , opt$index)
+}
diff --git a/bin/3.2_evaluation/venn.R b/bin/3.2_evaluation/venn.R
new file mode 100644
index 0000000..94d8cd2
--- /dev/null
+++ b/bin/3.2_evaluation/venn.R
@@ -0,0 +1,87 @@
+#!/usr/bin/env Rscript
+
+### master JLU 2018 Modul 3.2 Motif Evaluation ###
+# author: afust
+# capabilities:
+# ratio: sequenced used to create motif vs sites found by rescan
+
+# dependencies
+suppressMessages(library(data.table, quietly = TRUE))
+suppressMessages(library(VennDiagram, quietly = TRUE))
+suppressMessages(library(RJSONIO, quietly = TRUE))
+suppressMessages(library(getopt, quietly = TRUE))
+
+# ---------------------------- options ----------------------------------------- #
+# 0 flag
+# 1 mandatory parameter
+# 2 optional parameter
+options(warn = 1)
+options <- matrix(c(
+# tfbs is either the directory including all accessible regions
+# or the directory including precalculated score matrices etc
+"cluster_dir", "i", 1, "character", "directory with rescan results",
+"pattern", "p", 1, "character", "cluster pattern, e.g. 'Cluster_96'",
+"seq_dir", "l", 1, "character", "directory with fp assignment per cluster",
+"clustered_fp", "c", 1, "character", "clustering results",
+"outdir", "o", 1, "character", "output directory",
+"help", "h", 0, "logical", "Provides command line help"
+), byrow = TRUE, ncol = 5)
+opt <- getopt(options)
+
+# ---------------------------- options processing ----------------------------- #
+# check for mandatory parameters and print help if required
+if (!is.null(opt$help) ){
+	cat(getopt(options, usage = TRUE), file = stderr()); q(status = 0)
+}
+
+# load input
+
+# list used files: due to rescan script there is one output per motif for each cluster
+# all should be used in one plot result -> list files with pattern
+rescan_files <- list.files(opt$cluster_dir, pattern = opt$pattern, ignore.case = FALSE)
+# list of fps representing a motif
+fp_list <- fromJSON(opt$seq_dir)
+
+# clustering information needed to extract genomic locations of fps
+# -> reduce to current cluster
+dt.clustering <- fread(opt$clustered_fp, sep="\t", header = TRUE)
+colnames(dt.clustering) <- sub("#chr", "chr", colnames(dt.clustering))
+cl <- as.numeric(strsplit(opt$pattern, "_")[[1]][2])
+dt.clustering <- dt.clustering[dt.clustering$cluster == cl, ]
+
+# pdf including venn diagrams for all motifs
+pdf(paper="a4", file=file.path(opt$outdir, paste0("Cluster_",cl,".pdf")), useDingbats = FALSE)
+plot.new()
+for(f in rescan_files){
+	# load genomic locations found by rescan with motif
+	dt.rescan <- fread(file.path(opt$cluster_dir,f), sep="\t", header = FALSE, select = c(1:4), stringsAsFactors=FALSE)
+	colnames(dt.rescan) <- c("chr", "start", "end", "name")
+
+	# extract fps that build up current motif
+	tmp <- strsplit(as.character(dt.rescan[1,"name"]), "_")[[1]]
+	cluster <- paste(tmp[1],tmp[2],sep="_")
+	id <- paste(paste("Motif", tmp[3], sep="_"), cluster, sep=" ")
+	#cat("\nfor",id, "there are", length(unname(unlist(fp_list[id]))), "sequences assigned")
+	# reduce cluster info to sites used for current motif to find overlaps
+	dt.motif_seq <- dt.clustering[dt.clustering$name %in% unname(unlist(fp_list[id])),c("chr","start", "end")]
+	setkeyv(dt.rescan, c("chr","start","end"))
+	setkeyv(dt.motif_seq, c("chr","start","end"))
+	dt.overlaps <- foverlaps(dt.motif_seq, dt.rescan, mult="first")
+	dt.overlaps <- dt.overlaps[complete.cases(dt.overlaps),]
+	dt.overlaps <- dt.overlaps[!duplicated(dt.overlaps$i.start),]
+	# number of locations found in both: motif setup sequences and rescan
+	hits <- nrow(dt.overlaps)
+	#cat("\nnumbers:\nregions assignet to motif", nrow(dt.motif_seq), "\nregions found by rescan", nrow(dt.rescan), "\noverlaps", hits)
+	# draw venn
+	draw.pairwise.venn(area1 = nrow(dt.motif_seq), area2 = nrow(dt.rescan), cross.area = hits,
+		category = c(paste0("related motif sites of ",cluster), paste0("rescan with ", id)),
+		lty = rep("blank", 2), scaled = TRUE,
+		fill= c("blue", "red"), alpha = c(0.5,0.5),
+		cat.pos = c(0, 0), cat.dist = rep(0.025, 2) )
+	title(id)
+	if(f != tail(rescan_files,1)){
+		grid.newpage()
+	}
+}
+
+invisible(dev.off())
diff --git a/config/cluster.config b/config/cluster.config
index 1ba0b33..9ca8601 100644
--- a/config/cluster.config
+++ b/config/cluster.config
@@ -2,7 +2,7 @@ params{
   //reduce_bed
   kmer=10
   aprox_motif_len=10
-  motif_occurence=1
+  motif_occurrence=1
   min_seq_length=10
 
   //cdhit_wrapper
diff --git a/config/create_gtf.config b/config/create_gtf.config
index c38afaa..192b793 100644
--- a/config/create_gtf.config
+++ b/config/create_gtf.config
@@ -1,3 +1,3 @@
 params{
-  tissue=""
+  tissues=""
 }
diff --git a/config/footprint_extraction.config b/config/footprint_extraction.config
index 0661e2a..bcf54cf 100644
--- a/config/footprint_extraction.config
+++ b/config/footprint_extraction.config
@@ -2,5 +2,5 @@ params{
 	window_length = 200
 	step = 100
 	percentage = 0
-	max_bp_between = 6
+	gap_penalty = 6
 }
diff --git a/config/motif_estimation.config b/config/motif_estimation.config
index 2445d12..e82c4c6 100644
--- a/config/motif_estimation.config
+++ b/config/motif_estimation.config
@@ -1,11 +1,12 @@
 params {
 	//bed_to_clustered_fasta
-	min_seq = 10
+	min_seq = 100
 
 	//glam2
-	motif_min_len = 8
-	motif_max_len = 20
-	interation = 10000
+	motif_min_key = 8
+	motif_max_key = 20
+	iteration = 10000
+	gap_penalty = 1000
 
 	//tomtom
 	tomtom_treshold = 0.01
diff --git a/config/uropa.config b/config/uropa.config
index 3c4a189..0b93f22 100644
--- a/config/uropa.config
+++ b/config/uropa.config
@@ -1,16 +1,8 @@
 {
-"queries":[
-    {"feature":"",
-    "feature.anchor": "",
-    "distance": [ , ],
-    "strand":"",
-    "direction":"",
-    "internals":"",
-    "filter.attribute":"",
-    "attribute.value":"",
-    "show.attributes":"" }
-          ],
-"priority": "",
-"gtf": "placeholder_gtf",
-"bed": "placeholder_bed"
-}
\ No newline at end of file
+	"queries":[
+	{"feature":"gene","distance":[5000,1000], "show.attributes":"gene_id"},
+	{"feature":["ctcf_binding_site","enhancer", "open_chromatin", "promoter", "promoter_flanking_region", "tf_binding_site"], "distance":[1000], "internals":"TRUE"}
+	],
+  "gtf": "placeholder_gtf",
+  "bed": "placeholder_bed"
+}
diff --git a/demo/homo_sapiens.94.mainChr.gtf b/demo/homo_sapiens.94.mainChr.gtf
new file mode 100644
index 0000000..e4bed46
Binary files /dev/null and b/demo/homo_sapiens.94.mainChr.gtf differ
diff --git a/masterenv.yml b/masterenv.yml
index dc19c1a..9c50870 100644
--- a/masterenv.yml
+++ b/masterenv.yml
@@ -6,7 +6,7 @@ dependencies:
   - numpy=1.15.4
   - pybigWig
   - cd-hit
-  - jellyfish
+  - bioconda::jellyfish
   - r-base>=3.5.1
   - r-data.table
   - r-pbapply
@@ -24,3 +24,5 @@ dependencies:
   - seaborn
   - crossmap
   - r-bit64
+  - uropa
+  - nextflow
diff --git a/meme_suite.yml b/meme_suite.yml
new file mode 100644
index 0000000..2ac242e
--- /dev/null
+++ b/meme_suite.yml
@@ -0,0 +1,5 @@
+
+name: meme_suite
+dependencies:
+  - python=2.7
+  - meme
diff --git a/nextflow.config b/nextflow.config
index 653afd6..c935ad0 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,11 +1,11 @@
-wd = ""
+wd = "/mnt/agnerds/Rene.Wiegandt/repo2/masterJLU2018"
 createTimeout = 40
-params.threads=60 //Parameter for for scripts! Not for nextflow processes.
-params.config="${wd}/config/uropa.config"
+params.threads = 10 //Parameter for for scripts! Not for nextflow processes.
+params.config = "${wd}/config/uropa.config"
 
 env {
 	path_bin = "${wd}/bin"
-	path_env = "${wd}/masterenv.yml"
+	meme_env = "${wd}/meme_suite.yml"
 }
 
 includeConfig "${wd}/config/footprint_extraction.config"
diff --git a/pipeline.nf b/pipeline.nf
index 2e60f35..2084666 100644
--- a/pipeline.nf
+++ b/pipeline.nf
@@ -1,5 +1,6 @@
 #!/usr/bin/env nextflow
 
+disable_mo_clu = 1
 
 //setting default values
 	params.bigwig=""
@@ -9,7 +10,8 @@
 	params.config=""
 	params.tfbs_path=""
 	params.help = 0
-	params.gtf_path=""
+	params.gtf_merged=""
+	params.gtf_annotation=""
 	params.out = "./out/"
 
 //footprint_extraction
@@ -24,19 +26,19 @@
 	params.tfbsscan_method = "moods"
 
 //clustering
-  	//reduce_sequence
-  	params.kmer=10
-  	params.aprox_motif_len=10
-  	params.motif_occurence=1
-  	params.min_seq_length=10
-
-  	//cdhit_wrapper
-  	params.global=0
-  	params.identity=0.8
-  	params.sequence_coverage=8
-  	params.memory=800
- 	params.throw_away_seq=9
-  	params.strand=0
+  //reduce_sequence
+	params.kmer=10
+	params.aprox_motif_len=10
+	params.motif_occurrence=1
+	params.min_seq_length=10
+
+	//cdhit_wrapper
+	params.global=0
+	params.identity=0.8
+	params.sequence_coverage=8
+	params.memory=800
+	params.throw_away_seq=9
+	params.strand=0
 
 //motif_estimation
 	//bed_to_clustered_fasta
@@ -47,6 +49,7 @@
 	params.motif_max_key = 20 // Maximum number of key positions (aligned columns)
 	params.iteration = 10000 // Number of Iterations done by glam2. A high iteration number equals a more accurate result but with an higher runtime.
 	params.gap_penalty = 1000
+	params.seed = 123456789
 
 	//tomtom
 	params.tomtom_treshold = 0.01 // threshold for similarity score.
@@ -60,110 +63,152 @@
 
 //creating_gtf
 	params.organism=""
-	params.tissue=""
+	params.tissues=""
 
-if (params.bigwig == "" || params.bed == "" || params.organism == "" || params.genome_fasta == "" || params.motif_db == "" || params.config == "" || "${params.help}" != "0" ){
+//evaluation
+	params.max_uropa_runs = 10
+
+/*
+Checking for parameter input!
+*/
+int_params = ["window_length", "step", "min_size_fp", "max_size_fp", "kmer",
+             "aprox_motif_len", "motif_occurrence", "min_seq_length", "global",
+             "sequence_coverage", "memory", "throw_away_seq", "strand",
+             "min_seq", "motif_min_key", "motif_max_key", "iteration",
+             "edge_weight", "best_motif", "min_gap", "gap_penalty", "edge_weight",
+						 "threads", "max_uropa_runs"]
+file_params = ["bigwig", "bed", "genome_fasta", "motif_db", "config", "gtf_annotation",]
+all_params = int_params + file_params + ["organism" , "identity", "tfbsscan_method",
+							"percentage", "tomtom_treshold", "motif_similarity_thresh", "out",
+							"tissues", "gtf_merged", "cluster_motif", "tfbs_path", "help", "seed"]
+req_params = file_params + ["organism"]
+
+valid_organism = ["hg38", "hg19", "mm9", "mm10"]
+valid_tfbsscan_methods = ["moods","fimo"]
+val_missing = false
+send_help = false
+missing_params = []
+
+req_params.each { key ->
+	if (req_params.contains(key)){
+		if (key == "gtf_annotation") {
+			if (params[key] == "" && params.gtf_merged == "") {
+				val_missing = true
+				missing_params.add("$key or gtf_merged")
+			}
+		} else {
+			if (params[key] == ""){
+				val_missing = true
+				missing_params.add(key)
+			}
+		}
+	}
+}
+
+if (val_missing){
+	send_help = true
+	println("Error: Following required parameters are missing: $missing_params")
+}
+
+if (send_help || "${params.help}" != "0") {
 log.info """
 Usage: nextflow run pipeline.nf --bigwig [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --motif_db [MEME-file] --config [UROPA-config-file]
 
-Required arguments:
-	--bigwig		 Path to BigWig-file
-	--bed			 Path to BED-file
-	--genome_fasta		 Path to genome in FASTA-format
-	--motif_db		 Path to motif-database in MEME-format
-	--config		 Path to UROPA configuration file
- 	--organism 		 Input organism [hg38 | hg19 | mm9 | mm10]
-	--out			 Output Directory (Default: './out/')
-
-Optional arguments:
-
-	--help [0|1]		1 to show this help message. (Default: 0)
-	--gtf_path		Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
-	--tfbs_path 		Path to directory with output from tfbsscan. If given tfbsscan will be skipped.
-
-	Footprint extraction:
-	--window_length INT	This parameter sets the length of a sliding window. (Default: 200)
-	--step INT		This parameter sets the number of positions to slide the window forward. (Default: 100)
-	--percentage INT	Threshold in percent (Default: 0)
-	--min_gap INT		If footprints are less than X bases apart the footprints will be merged (Default: 6)
-
-	Filter motifs:
-	--min_size_fp INT	Minimum sequence length threshold. Smaller sequences are discarded. (Default: 10)
-	--max_size_fp INT	Maximum sequence length threshold. Discards all sequences longer than this value. (Default: 200)
-	--tfbsscan_method [moods|fimo] Method used by tfbsscan. (Default: moods)
-
-	Cluster:
-	Sequence preparation/ reduction:
-	--kmer INT		K-mer length (Default: 10)
-	--aprox_motif_len INT	Motif length (Default: 10)
-	--motif_occurence FLOAT	Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
-	--min_seq_length Interations	Remove all sequences below this value. (Default: 10)
-	Clustering:
-	--global INT		Global (=1) or local (=0) alignment. (Default: 0)
-	--identity FLOAT	Identity threshold. (Default: 0.8)
-	--sequence_coverage INT	Minimum aligned nucleotides on both sequences. (Default: 8)
-	--memory INT		Memory limit in MB. 0 for unlimited. (Default: 800)
-	--throw_away_seq INT	Remove all sequences equal or below this length before clustering. (Default: 9)
-	--strand INT		Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
-
-	Motif estimation:
-	--min_seq INT 		Sets the minimum number of sequences required for the FASTA-files given to GLAM2. (Default: 100)
-	--motif_min_key INT	Minimum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 8)
-	--motif_max_key INT	Maximum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 20)
-	--iteration INT		Number of iterations done by GLAM2. More Iterations: better results, higher runtime. (Default: 10000)
-	--tomtom_treshold FLOAT	Threshold for similarity score. (Default: 0.01)
-	--best_motif INT	Get the best X motifs per cluster. (Default: 3)
-	--gap_penalty INT	Set penalty for gaps in GLAM2 (Default: 1000)
-	Moitf clustering:
-	--cluster_motif	Boolean	If 1 pipeline clusters motifs. If its 0 it does not. (Defaul: 0)
-	--edge_weight INT	Minimum weight of edges in motif-cluster-graph (Default: 5)
-	--motif_similarity_thresh FLOAT	Threshold for motif similarity score (Default: 0.00001)
-
-	Creating GTF:
-	--tissues List/String 	List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON
-				config
+	Required arguments:
+		--bigwig		 Path to BigWig-file
+		--bed			 Path to BED-file
+		--genome_fasta		 Path to genome in FASTA-format
+		--motif_db		 Path to motif-database in MEME-format
+		--config		 Path to UROPA configuration file
+		--gtf_annotation	Path to gtf annotation file
+		--organism 		 Input organism [hg38 | hg19 | mm9 | mm10]
+		--out			 Output Directory (Default: './out/')
+
+	Optional arguments:
+
+		--help [0|1]		1 to show this help message. (Default: 0)
+		--gtf_merged		Path to gtf-file. If path is set the process which creates a gtf-file is skipped.
+		--tfbs_path 		Path to directory with tfbsscan output. If given tfbsscan will be skipped.
+
+		Footprint extraction:
+		--window_length INT	This parameter sets the length of a sliding window. (Default: 200)
+		--step INT		This parameter sets the number of positions to slide the window forward. (Default: 100)
+		--percentage INT	Threshold in percent (Default: 0)
+		--min_gap INT		If footprints are less than X bases apart the footprints will be merged (Default: 6)
+
+		Filter motifs:
+		--min_size_fp INT	Minimum sequence length threshold. Smaller sequences are discarded. (Default: 10)
+		--max_size_fp INT	Maximum sequence length threshold. Discards all sequences longer than this value. (Default: 200)
+		--tfbsscan_method [moods|fimo] Method used by tfbsscan. (Default: moods)
+
+		Cluster:
+		Sequence preparation/ reduction:
+		--kmer INT		K-mer length (Default: 10)
+		--aprox_motif_len INT	Motif length (Default: 10)
+		--motif_occurrence FLOAT	Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
+		--min_seq_length Interations	Remove all sequences below this value. (Default: 10)
+		Clustering:
+		--global INT		Global (=1) or local (=0) alignment. (Default: 0)
+		--identity FLOAT	Identity threshold. (Default: 0.8)
+		--sequence_coverage INT	Minimum aligned nucleotides on both sequences. (Default: 8)
+		--memory INT		Memory limit in MB. 0 for unlimited. (Default: 800)
+		--throw_away_seq INT	Remove all sequences equal or below this length before clustering. (Default: 9)
+		--strand INT		Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
+
+		Motif estimation:
+		--min_seq INT 		Sets the minimum number of sequences required for the FASTA-files given to GLAM2. (Default: 100)
+		--motif_min_key INT	Minimum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 8)
+		--motif_max_key INT	Maximum number of key positions (aligned columns) in the alignment done by GLAM2. (Default: 20)
+		--iteration INT		Number of iterations done by GLAM2. More Iterations: better results, higher runtime. (Default: 10000)
+		--tomtom_treshold FLOAT	Threshold for similarity score. (Default: 0.01)
+		--best_motif INT	Get the best X motifs per cluster. (Default: 3)
+		--gap_penalty INT	Set penalty for gaps in GLAM2 (Default: 1000)
+		--seed Set seed for GLAM2 (Default: 123456789)
+		Moitf clustering:
+		--cluster_motif	Boolean	If 1 pipeline clusters motifs. If its 0 it does not. (Defaul: 0)
+		--edge_weight INT	Minimum weight of edges in motif-cluster-graph (Default: 5)
+		--motif_similarity_thresh FLOAT	Threshold for motif similarity score (Default: 0.00001)
+
+		Creating GTF:
+		--tissues List/String 	List of one or more keywords for tissue-/category-activity, categories must be specified as in JSON
+					config
+		Evaluation:
+		--max_uropa_runs INT	 Maximum number UROPA runs running parallelized (Default: 10)
 All arguments can be set in the configuration files
- ```
 """
-System.exit(2)
+	System.exit(2)
 } else {
 	Channel.fromPath(params.bigwig).map {it -> [it.simpleName, it]}.set {bigwig_input}
-	Channel.fromPath(params.bed).into {bed_input; bed_for_tfbsscan}
+	Channel.fromPath(params.bed).into {bed_input; bed_for_tfbsscan; bed_for_rescan}
 	Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
 	Channel.fromPath(params.motif_db).into {db_for_motivscan; db_for_tomtom}
 	Channel.fromPath(params.config).set {config}
+	if (params.gtf_annotation != ""){
+		Channel.fromPath(params.gtf_annotation).set {gtf_to_merge}
+	}
 	if (params.tfbs_path != ""){
 		known_tfbs = file(params.tfbs_path).toAbsolutePath()
 	}
 }
 
-/*
-Checking for parameter input!
-*/
-int_params = ["window_length", "step", "min_size_fp", "max_size_fp", "kmer",
-             "aprox_motif_len", "motif_occurence", "min_seq_length", "global",
-             "sequence_coverage", "memory", "throw_away_seq", "strand",
-             "min_seq", "motif_min_key", "motif_max_key", "iteration",
-             "edge_weight", "best_motif", "min_gap", "gap_penalty", "edge_weight"]
-req_params = ["bigwig", "bed", "genome_fasta", "motif_db", "config"]
-
-valid_organism = ["hg38", "hg19", "mm9", "mm10"]
-valid_tfbsscan_methods = ["moods","fimo"]
-
 params.each { key, value ->
+	if (!(all_params.contains(key))){
+		println("Warning: Parameter $key is unknown. Please check for typos or the parameter list!")
+	}
   if(int_params.contains(key)) {
     if (!("${value}" ==~ /\d+/ )){
       println("ERROR: $key needs to be an Integer")
       System.exit(2)
     }
   }
-  if(req_params.contains(key)) {
+  if(file_params.contains(key) || (key == "gtf_merged" && value != "") ) {
     if(!file(value).exists()) {
       println("ERROR: $key not found. Please check the given path.")
       System.exit(2)
     }
   }
 }
+
 if (!("${params.identity}" ==~ /^0\.[8-9][[0-9]*]?|^1(\.0)?/ )){
   println("ERROR: --identity needs to be float in range 0.8 to 1.0")
   System.exit(2)
@@ -205,7 +250,7 @@ process footprint_extraction {
 
 	tag{name}
 	publishDir "${params.out}/1.1_footprint_extraction/", mode: 'copy', pattern: '*.bed'
-	publishDir "${params.out}/1.1_footprint_extraction/log", mode: 'copy', pattern: '*.log'
+	publishDir "${params.out}/log", mode: 'copy', pattern: '*.log'
 
 	input:
 	set name, file (bigWig), file (bed) from footprint_in
@@ -228,7 +273,8 @@ Find postitions of known tfbs with tfbsscan and discard the overlaps with compar
 */
 process overlap_with_known_TFBS {
 	//conda "${path_env}"
-	publishDir "${params.out}/1.2_filter_motifs", mode :'copy'
+	publishDir "${params.out}/1.2_filter_motifs", mode :'copy', pattern: "${name}_unknown.bed"
+	publishDir "${params.out}/log", mode: 'copy', pattern: '*.log'
 	tag{name}
 	errorStrategy 'finish'
 
@@ -237,18 +283,25 @@ process overlap_with_known_TFBS {
 
 	output:
 	set name, file ("${name}_unknown.bed") into bed_for_reducing
-	file ('./known_tfbs/*.bed') optional true
-	file ('*.stats')
+	file ('*.log')
 
 	script:
 	if(params.tfbs_path == ""){
 		"""
+		if [[ ! -x "${path_bin}/1.2_filter_motifs/compareBed.sh" ]]
+		then
+			chmod +x ${path_bin}/1.2_filter_motifs/compareBed.sh
+		fi
 		python ${path_bin}/1.2_filter_motifs/tfbsscan.py --use ${params.tfbsscan_method} --core ${params.threads} -m ${db} -g ${fasta} -o ./known_tfbs -b ${bed_peaks}
 		${path_bin}/1.2_filter_motifs/compareBed.sh --data ${bed_footprints} --motifs ./known_tfbs --fasta ${fasta} -o ${name}_unknown.bed -min ${params.min_size_fp} -max ${params.max_size_fp}
 		cp -r ./known_tfbs/ ${params.out}/1.2_filter_motifs/
 		"""
 	} else {
 		"""
+		if [[ ! -x "${path_bin}/1.2_filter_motifs/compareBed.sh" ]]
+		then
+			chmod +x ${path_bin}/1.2_filter_motifs/compareBed.sh
+		fi
 		${path_bin}/1.2_filter_motifs/compareBed.sh --data ${bed_footprints} --motifs ${known_tfbs} --fasta ${fasta} -o ${name}_unknown.bed -min ${params.min_size_fp} -max ${params.max_size_fp}
 		"""
 	}
@@ -265,7 +318,7 @@ process reduce_sequence {
 	tag{name}
 
 	publishDir "${params.out}/2.1_clustering/reduced_bed/", mode: 'copy', pattern: '*.bed'
-	publishDir "${params.out}/2.1_clustering/log", mode: 'copy', pattern: '*.log'
+	publishDir "${params.out}/log", mode: 'copy', pattern: '*.log'
 
 	input:
 	set name, file (bed) from bed_for_reducing
@@ -276,7 +329,7 @@ process reduce_sequence {
 
 	script:
 	"""
-	Rscript ${path_bin}/2.1_clustering/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurence} -s ${params.min_seq_length} --summary reduce_sequence.log
+	Rscript ${path_bin}/2.1_clustering/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurrence} -s ${params.min_seq_length} --summary reduce_sequence.log
 	"""
 }
 
@@ -288,13 +341,14 @@ process clustering {
 	//conda "${path_env}"
 
 	publishDir "${params.out}/2.1_clustering/", mode: 'copy', pattern: '*.bed'
-	publishDir "${params.out}/2.1_clustering/log", mode: 'copy', pattern: '*.log'
+	publishDir "${params.out}/log", mode: 'copy', pattern: '*.log'
 
 	input:
 	set name, file (bed) from bed_for_clustering
 
 	output:
 	set name, file ('*.bed') into bed_for_motif_esitmation
+	file ('*.bed') into bed_for_venn
 	file ('*.log')
 
 	script:
@@ -310,21 +364,21 @@ Converting BED-File to one FASTA-File per cluster
 process bed_to_clustered_fasta {
 
 	//conda "${path_env}"
-	publishDir "${params.out}/2.2_motif_estimation/fasta/", mode: 'copy', pattern: '*.FASTA'
-    tag{name}
+	publishDir "${params.out}/2.2_motif_estimation/01_fasta/", mode: 'copy', pattern: '*.FASTA'
+  tag{name}
 
-    input:
-    set name, file (bed) from bed_for_motif_esitmation
+  input:
+  set name, file (bed) from bed_for_motif_esitmation
 
-    output:
-    file ('*.FASTA') into fasta_for_glam2
-    file ('*.FASTA') into fasta_for_motif_cluster
+  output:
+  file ('*.FASTA') into fasta_for_glam2
+  file ('*.FASTA') into fasta_for_motif_cluster
 	file ('*.log') into log22
 
-    script:
-    """
-    Rscript ${path_bin}/2.2_motif_estimation/bed_to_fasta.R -i ${bed} -p ${name} -m ${params.min_seq}
-    """
+  script:
+  """
+  Rscript ${path_bin}/2.2_motif_estimation/bed_to_fasta.R -i ${bed} -p ${name} -m ${params.min_seq}
+  """
 }
 
 
@@ -339,35 +393,36 @@ Generating Motifs through alignment and scoring best local matches.
 */
 process glam2 {
 
-    tag{name}
-	publishDir "${params.out}/2.2_motif_estimation/glam2/", mode: 'copy'
-	//conda "${path_env}"
+  tag{name}
+	publishDir "${params.out}/2.2_motif_estimation/02_glam2/", mode: 'copy'
+	conda "${meme_env}"
 
-    input:
-    set name, file (fasta) from fasta_for_glam2
+  input:
+  set name, file (fasta) from fasta_for_glam2
 
 	output:
-    file("${name}/*.meme") into meme_to_merge
-	set name, file("${name}/*.meme") into meme_for_tomtom
-	set name, file("${name}/*.meme") into meme_to_search_in_merged
-	set name, file("${name}/*.meme") into meme_for_filter
+  file("${name}/*.meme") into meme_to_merge
+	set name, file("${name}/*.meme") into meme_for_tomtom, meme_to_search_in_merged, meme_for_filter
 	set name, file("${name}/*.txt") into glam_for_seq
+	set name, file("${name}/logo[1-9]*.png") into motif_png
 	file ('*')
 
-    script:
-    """
-    glam2 n ${fasta} -O ./${name}/ -E ${params.gap_penalty} -J ${params.gap_penalty} -a ${params.motif_min_key} -b ${params.motif_max_key} -z 5 -n ${params.iteration}
-    """
+  script:
+  """
+  glam2 n ${fasta} -O ./${name}/ -E ${params.gap_penalty} -J ${params.gap_penalty} -a ${params.motif_min_key} -b ${params.motif_max_key} -z 5 -n ${params.iteration} -s ${params.seed}
+  """
 }
 
+//motif_png.toSortedList()).println()
+
 /*
 Combining all MEME-files to one big MEME-file.
 The paths are sorted numerically depending on the cluster ID.
 */
 process merge_meme {
 
-	publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/merged_meme/", mode: 'copy'
-	//conda "${path_env}"
+	publishDir "${params.out}/2.2_motif_estimation/03_motif_cluster/", mode: 'copy'
+	// conda "/home/rwiegan/.conda/envs/meme"
 
 	input:
 	val (memelist) from meme_to_merge.toList()
@@ -377,6 +432,7 @@ process merge_meme {
 
 	when:
 	params.cluster_motif == 1
+	disable_mo_clu == 0
 
 	script:
 	//sorting
@@ -386,6 +442,7 @@ process merge_meme {
 	//create list for bash
 	meme_list = meme_sorted_full.toString().replaceAll(/\,|\[|\]/,"")
 	"""
+	set +u; source activate meme;
 	meme2meme ${meme_list} > merged_meme.meme
 	"""
 }
@@ -396,11 +453,11 @@ to_find_similar_motifs = meme_to_search_in_merged.combine(merged_meme)
 Running Tomtom on merged meme-files.
 Output table has the information which clusters are similar to each other.
 */
-process find_similar_motifs {
+process compare_cluster {
 
 	tag{name}
-	publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/cluster_similarity/", mode: 'copy'
-	//conda "${path_env}"
+	publishDir "${params.out}/2.2_motif_estimation/03_motif_cluster/01_tomtom/", mode: 'copy'
+	conda "${meme_env}"
 
 	input:
 	set name, file (meme) ,file (merged_meme) from to_find_similar_motifs
@@ -423,7 +480,7 @@ Label first column of TSV-file with Cluster ID
 process label_cluster {
 
 	tag{name}
-	//conda "${path_env}"
+	publishDir "${params.out}/2.2_motif_estimation/03_motif_cluster/02_labeled_tsv/", mode: 'copy'
 
 	input:
 	set name, file (tsv) from motif_similarity
@@ -435,17 +492,19 @@ process label_cluster {
 	params.cluster_motif == 1
 
 	script:
+	id = name.split('_')[-1].toInteger() + 1
 	"""
-	Rscript ${path_bin}/2.2_motif_estimation/label_cluster.R -i ${tsv} -o ${name}_labeled.tsv
+	tail -n +2 ${tsv} | awk -F \$'\t' 'BEGIN {OFS = FS}(\$1=\$1".${id}")' > ${name}_labeled.tsv
 	"""
 }
 
+
 /*
 Merging tsv files_for_merge_fasta
 */
 process merge_labeled_tsv {
 
-	publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/", mode: 'copy'
+	publishDir "${params.out}/2.2_motif_estimation/03_motif_cluster/", mode: 'copy'
 
 	input:
 	val (tsvlist) from labeled_tsv.toSortedList { it.toString().tokenize('_')[-2] as Integer }
@@ -459,7 +518,7 @@ process merge_labeled_tsv {
 	script:
 	tsvs = tsvlist.toString().replaceAll(/\,|\[|\]/,"")
 	"""
-	echo -e 'Query_ID\tTarget_ID\tOptimal_offset\tp-value\tE-value\tq-value\tOverlap\tQuery_consensus\tTarget_consensus\tOrientation'> merged_labeled.tsv
+	echo -e 'Query_ID\tTarget_ID\tOptimal_offset\tp-value\tE-value\tq-value\tOverlap\tQuery_consensus\tTarget_consensus\tOrientation' > merged_labeled.tsv
 	for i in $tsvs; do
 		cat \$i >> merged_labeled.tsv
 	done
@@ -475,23 +534,23 @@ Merging FASTA-files of similar clusters
 process merge_fasta {
 
 	//conda "${path_env}"
-	publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/merged_fasta/", mode: 'copy'
+	publishDir "${params.out}/2.2_motif_estimation/03_motif_cluster/03_merged_fasta/", mode: 'copy'
 
 	input:
 	set file (motiv_sim), val (fasta_list) from files_for_merge_fasta
 
 	output:
 	file ('*.fasta') into motif_clustered_fasta_list
-  	file('*.png')
+  file('*')
 
-  	when:
-  	params.cluster_motif == 1
+  when:
+  params.cluster_motif == 1
 
 	script:
-  	fa_sorted = fasta_list.sort(false) { it.getBaseName().tokenize('_')[-1] as Integer }
-  	fastalist = fa_sorted.toString().replaceAll(/\s|\[|\]/,"")
+  fa_sorted = fasta_list.sort(false) { it.getBaseName().tokenize('_')[-1] as Integer }
+  fastalist = fa_sorted.toString().replaceAll(/\s|\[|\]/,"")
 	"""
-	Rscript ${path_bin}/2.2_motif_estimation/merge_similar_clusters.R ${motiv_sim} ${fastalist} ${params.edge_weight}
+	Rscript ${path_bin}/2.2_motif_estimation/merge_similar_clusters.R -i ${motiv_sim} -l ${fastalist} -w ${params.edge_weight}
 	"""
 }
 
@@ -503,26 +562,25 @@ Run GLAM2 on emrged FASTA-files
 */
 process clustered_glam2 {
 
-	publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/glam2/", mode: 'copy'
+	publishDir "${params.out}/2.2_motif_estimation/03_motif_cluster/04_glam2/", mode: 'copy'
 	tag{name}
-	//conda "${path_env}"
+	conda "${meme_env}"
 
 	input:
 	file (fasta) from motif_clustered_fasta_flat
 
 	output:
-	set name, file ("/${name}/*.meme") into clustered_meme_for_tomtom
-  	set name, file ("/${name}/*.meme") into clustered_meme_for_filter
-	set name, file("/${name}/glam2.txt") into clust_glam_for_seq
-  	file('*')
+	set name, file ("./${name}/glam2.meme") into clustered_meme_for_tomtom, clustered_meme_for_filter
+	set name, file("./${name}/glam2.txt") into clust_glam_for_seq
+  file('*')
 
 	when:
 	params.cluster_motif == 1
 
 	script:
-  	name = fasta.getBaseName()
+  name = fasta.getBaseName()
 	"""
-	glam2 n ${fasta} -O ./${name}/ -E ${params.gap_penalty} -J ${params.gap_penalty} -a ${params.motif_min_key} -b ${params.motif_max_key} -z 5 -n ${params.iteration}
+	glam2 n ${fasta} -O ./${name}/ -E ${params.gap_penalty} -J ${params.gap_penalty} -a ${params.motif_min_key} -b ${params.motif_max_key} -z 5 -n ${params.iteration} -s ${params.seed}
 	"""
 }
 
@@ -547,20 +605,19 @@ Tomtom searches motifs in databases.
 */
 process tomtom {
 
-    tag{name}
-	publishDir "${params.out}/2.2_motif_estimation/tomtom/", mode: 'copy'
-	//conda "${path_env}"
+	tag{name}
+	publishDir "${params.out}/2.2_motif_estimation/04_tomtom/", mode: 'copy'
 
-    input:
-    set name, file (meme) from for_tomtom
+  input:
+  set name, file (meme) from for_tomtom
 
-    output:
-    set name, file ('*.tsv') into tsv_for_filter
+  output:
+  set name, file ('*.tsv') into tsv_for_filter
 
-    script:
-    """
-    tomtom ${meme} ${params.motif_db} -thresh ${params.tomtom_treshold} -mi 1 -text | sed '/^#/ d' | sed '/^\$/d' > ${name}_known_motif.tsv
-    """
+  script:
+  """
+  tomtom ${meme} ${params.motif_db} -thresh ${params.tomtom_treshold} -mi 1 -text | sed '/^#/ d' | sed '/^\$/d' > ${name}_known_motif.tsv
+  """
 }
 
 
@@ -582,7 +639,7 @@ log22.combine(count_cluster).combine(count_cluster_before_filter ).set {log22_fi
 //Writes log file for 2.2_motif_estimation
 process write_log_for_motif_estimation {
 
-	publishDir "${params.out}/2.2_motif_estimation/log/", mode: 'copy'
+	publishDir "${params.out}/log/", mode: 'copy'
 
 	input:
 	set file (logfile), after_filter, before_filter from log22_final
@@ -598,20 +655,20 @@ process write_log_for_motif_estimation {
 	"""
 }
 
+
 //If channel 'check' is empty print errormessage
 process check_for_unknown_motifs {
-    echo true
-
-    input:
-    val x from check.ifEmpty('EMPTY')
+  echo true
 
-    when:
-    x == 'EMPTY'
+  input:
+  val x from check.ifEmpty('EMPTY')
 
-    """
-    echo '>>> STOPPED: No unknown Motifs were found.'
-    """
+  when:
+  x == 'EMPTY'
 
+  """
+  echo '>>> STOPPED: No unknown Motifs were found.'
+  """
 }
 
 
@@ -620,13 +677,13 @@ process get_best_motif {
 
 	//conda "${path_env}"
 	tag{name}
-	publishDir "${params.out}/2.2_motif_estimation/best_unknown_motifs/", mode: 'copy'
+	publishDir "${params.out}/2.2_motif_estimation/05_best_unknown_motifs/motifs/", mode: 'copy'
 
 	input:
 	set name, file(meme), file(tsv) from meme_for_scan
 
 	output:
-	set name, file('*_best.meme') into best_motif
+	set name, file('*_best.meme') into best_motif, best_motif_for_venn
 
 	script:
 	cluster_id = name.split('_')[-1]
@@ -639,56 +696,73 @@ process get_best_motif {
 /*
 Get the sequence names used to create best X Motifs as a JSON-file
 */
-process get_best_motif_seq {
+process get_best_motif_seq_id {
 
 	//conda "${path_env}"
 	tag{name}
-	publishDir "${params.out}/2.2_motif_estimation/best_unknown_motifs/motif_sequences/", mode: 'copy'
+	publishDir "${params.out}/2.2_motif_estimation/05_best_unknown_motifs/motif_sequence_ids/", mode: 'copy'
 
 	input:
 	set name, file (txt) from seq_to_json
 
 	output:
-	file("${name}_seq.json")
+	set name, file("${name}_seq.json") into seq_id_for_venn
 
 	script:
 	cluster_id = name.split('_')[-1]
 	"""
-	Rscript ${path_bin}/2.2_motif_estimation/get_motif_seq.R -i ${txt} -o ${name}_seq.json -n ${params.best_motif} -t ./motif_seq_tmp/cluster_${cluster_id} -c ${cluster_id}
+	Rscript ${path_bin}/2.2_motif_estimation/get_motif_seq_id.R -i ${txt} -o ${name}_seq.json -n ${params.best_motif} -t ./motif_seq_tmp/cluster_${cluster_id} -c ${cluster_id}
 	"""
 }
 
 
-best_motif.combine(fa_scan).set {files_for_genome_scan}
+best_motif.combine(fa_scan).combine(bed_for_rescan).set {files_for_re_scan}
 
 /*
-process genome_scan {
+*/
+process re_scan {
 	//conda "${path_env}"
+	tag{name}
+	publishDir "${params.out}/3.2_evaluation/rescan/${name}", mode: 'copy'
 
 	input:
-	set name, file(meme), file(fasta) from files_for_genome_scan
+	set name, file(meme), file(fasta), file (peaks) from files_for_re_scan
 
 	output:
-	file ('.bed') into bed_for_uropa, bed_for_cluster_quality
+	set name, val ('link') into link_re_scan_to_venn
+	file ("*.bed") into re_scan_for_uropa
 
 	script:
 	"""
+	mkdir -p ./tmp/re_scan/${name}
+	python ${path_bin}/1.2_filter_motifs/tfbsscan.py -m ${meme} -g ${fasta} -b ${peaks} -o . --cores 4 --use moods --resolve_overlaps
+	mkdir -p ${workflow.workDir}/tmp/re_scan_final
+	cp ./Cluster*.bed ${workflow.workDir}/tmp/re_scan_final
 	"""
 }
 
 
-process cluster_quality {
+seq_id_for_venn.combine(link_re_scan_to_venn, by: 0).combine(bed_for_venn).set {for_venn}
+
+/*
+*/
+process venn {
+
+	tag{name}
+	publishDir "${params.out}/3.2_evaluation/venn", mode: 'copy'
 
 	input:
-	file (bed) from bed_for_cluster_quality
+	set name, file(seq_ids), link, file (clustered_bed) from for_venn
 
 	output:
-	file ('*.bed') into bed_for_final_filter
+	file ('*.pdf')
 
 	script:
+	cluster_id = name.split('_')[-1]
 	"""
+	Rscript ${path_bin}/3.2_evaluation/venn.R -i ${workflow.workDir}/tmp/re_scan_final/ -p Cluster_${cluster_id}_ -l ${seq_ids} -c ${clustered_bed} -o .
 	"""
-} */
+}
 
 
 process create_GTF {
@@ -700,7 +774,7 @@ process create_GTF {
 	file ('*.gtf') into gtf
 
 	when:
-	params.gtf_path == ""
+	params.gtf_merged == ""
 
 	script:
 	"""
@@ -708,60 +782,85 @@ process create_GTF {
 	"""
 }
 
-if (params.gtf_path == "") {
-	gtf_for_uropa = gtf
-} else {
-	gtf_for_uropa = Channel.fromPath(params.gtf_path)
+//TODO put operators in process mwerge_gtf
+gtf_list = Channel.empty()
+if (params.gtf_merged == "") {
+	gtf.combine(gtf_to_merge).set {gtf_list}
 }
 
-/*
-bed_for_final_filter.combine(gtf_for_uropa).set {uropa_in}
-
-
-// Create configuration file for UROPA.
-// Takes template and replaces bed- and gtf-placeholders with actual paths.
-process create_uropa_config {
+process merge_gtf {
 
-	publishDir '/mnt/agnerds/Rene.Wiegandt/10_Master/', mode: 'copy'
+	stageInMode 'copy'
+	publishDir "${params.out}/3.1_create_gtf/", mode: 'copy'
 
 	input:
-	set val(bed), val(gtf) from uropa_in.toList()
-	file (conf) from config
+	set file (gtf_reg), file (gtf_genes) from gtf_list
 
 	output:
-	file ('uropa.config') into uropa_config
+	file ('merged.gtf') into merged_gtf
+
+	when:
+	params.gtf_merged == ""
 
 	script:
 	"""
-  	sed -- 's/placeholder_gtf/${gtf}/g; s/placeholder_bed/${bed}/g' ${conf} > uropa.config.final
+	cat ${gtf_genes} > merged.gtf
+	cat ${gtf_reg} >> merged.gtf | sort -k 1,1 -k 4,4n -k 5,5n
 	"""
 }
 
 
-process UROPA {
+if (params.gtf_merged == "") {
+	gtf_for_uropa = merged_gtf
+} else {
+	Channel.fromPath( params.gtf_merged ).set {gtf_for_uropa}
+}
+
+re_scan_for_uropa.flatten().combine(gtf_for_uropa).combine(config).set {uropa_in}
+
+// Create configuration file for UROPA.
+// Takes template and replaces bed- and gtf-placeholders with actual paths.
+process create_uropa_config {
+
+	tag{bed}
+	publishDir "${params.out}/3.2_evaluation/uropa/config", mode: 'copy'
 
 	input:
-	file (config) from uropa_config
+	set val(bed), val (gtf), file (conf) from uropa_in
 
 	output:
-	set file ("*_allhits.txt"), file ("*_finalhits.txt") into uropa_for_filter
+	set name, file ("${name}_uropa.config") into uropa_config
 
 	script:
+	name = bed.getBaseName()
 	"""
+  	sed -- 's=placeholder_gtf=${gtf}=g; s=placeholder_bed=${bed}=g' ${conf} > ${name}_uropa.config
 	"""
 }
 
 
-process filter {
+process UROPA {
+
+	tag{config}
+	maxForks params.max_uropa_runs
+	publishDir "${params.out}/3.2_evaluation/uropa", mode: 'copy', pattern: "*.txt"
+	publishDir "${params.out}/3.2_evaluation/uropa/summary", mode: 'copy', pattern: "*.pdf"
+	publishDir "${params.out}/log/uropa/${name}", mode: 'copy', pattern: "*.log"
 
 	input:
+	set name, file (config) from uropa_config
 
 	output:
+	set file ("*_allhits.txt"), file ("*_finalhits.txt") into uropa_for_filter
+	file("*.pdf")
+	file("*.log")
 
 	script:
 	"""
+	uropa -i ${config} -l uropa.log -t 4 -p ${name} -s -d
 	"""
-} */
+}
+
 
 workflow.onComplete {
 log.info"""
@@ -774,4 +873,6 @@ workDir     : ${workflow.workDir}
 exit status : ${workflow.exitStatus}
 Error report: ${workflow.errorReport ?: '-'}
 """
+f = file(workflow.workDir + '/tmp/re_scan_final')
+f.deleteDir()
 }