diff --git a/bin/merge_similar_clusters.R b/bin/merge_similar_clusters.R index e72fa03..03b8cf1 100644 --- a/bin/merge_similar_clusters.R +++ b/bin/merge_similar_clusters.R @@ -1,7 +1,7 @@ #!/usr/bin/env Rscript # Merging FASTA-files, which motifs are similar. -# +# # @parameter tsv_in Path to TSV file generated by Tomtom. # The input for Tomtom is a from all clusters merged meme-file. # @parameter file_list Numerically sorted whitespace separated list of absolute fasta-file paths @@ -10,7 +10,7 @@ args = commandArgs(trailingOnly = TRUE) -tsv_in <- args[1] +tsv_in <- args[1] file_list <- args[2] min_weight <- args[3] @@ -38,7 +38,7 @@ edgelist <- sim_not_unique[query_cluster != target_cluster] g <- igraph::graph_from_edgelist(as.matrix(edgelist)) # converting graph to adjacency matrix adj_matrix <- igraph::get.adjacency(g, names = T) -# generating weighted graph from adjacency matrix +# generating weighted graph from adjacency matrix g_adj <- igraph::graph_from_adjacency_matrix(adj_matrix, weighted = T) # get subgraphs from graph with edges of weight > min_weight @@ -47,7 +47,11 @@ png('motif_clusters.png') plot(s1) dev.off() clust <- igraph::clusters(s1) - +if (clust$no < 1){ + b <- lapply(files, function(f){ + system(paste("cat",f,">",basename(f))) + }) +} # merge FASTA-files depending on the clustered graphs a <- lapply(seq(from = 1, to = clust$no, by = 1), function(i){ cl <- as.vector(which(clust$membership %in% c(i))) diff --git a/pipeline.nf b/pipeline.nf index f5358a6..a39616a 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -124,7 +124,7 @@ All arguments can be set in the configuration files System.exit(2) } else { Channel.fromPath(params.bigwig).map {it -> [it.simpleName, it]}.set {bigwig_input} - Channel.fromPath(params.bed).set {bed_input} + Channel.fromPath(params.bed).into {bed_input; bed_for_tfbsscan} Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2} Channel.fromPath(params.motif_db).into {db_for_motivscan; db_for_tomtom} Channel.fromPath(params.config).set {config} @@ -205,6 +205,7 @@ process footprint_extraction { """ } +for_tfbs = fa_overlap.combine(db_for_motivscan).combine(bed_for_tfbsscan) /* @@ -216,8 +217,7 @@ process extract_known_TFBS { publishDir "${params.out}/known_TFBS/", mode: 'copy', pattern: '*.bed' input: - file (fasta) from fa_overlap - file (db) from db_for_motivscan + set file (fasta), file (db), file (bed) from for_tfbs output: val ('done') into known_TFBS_for_overlap @@ -227,7 +227,7 @@ process extract_known_TFBS { script: """ - python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ${params.create_known_tfbs_path} + python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ${params.create_known_tfbs_path} -b ${bed} """ } @@ -417,7 +417,7 @@ Merging FASTA-files of similar clusters process merge_fasta { conda "${path_env}" publishDir "${params.out}/esimated_motifs/merged_fasta/", mode: 'copy' - + echo true input: set file (motiv_sim), val (fasta_list) from files_for_merge_fasta