From b2772174e287a56728a6ec841c37418026b31695 Mon Sep 17 00:00:00 2001
From: msbentsen <mette.bentsen@mpi-bn.mpg.de>
Date: Fri, 22 Mar 2019 11:38:20 +0100
Subject: [PATCH] Moved snakemake pipeline to separate repo

---
 snakemake_pipeline/TOBIAS.snake               | 178 ------------------
 snakemake_pipeline/TOBIAS_example.config      |  32 ----
 snakemake_pipeline/environments/macs.yaml     |   8 -
 snakemake_pipeline/environments/tobias.yaml   |  27 ---
 .../snakefiles/footprinting.snake             | 103 ----------
 snakemake_pipeline/snakefiles/helper.snake    |  46 -----
 .../snakefiles/preprocessing.snake            |  98 ----------
 .../snakefiles/visualization.snake            |  95 ----------
 8 files changed, 587 deletions(-)
 delete mode 100644 snakemake_pipeline/TOBIAS.snake
 delete mode 100644 snakemake_pipeline/TOBIAS_example.config
 delete mode 100644 snakemake_pipeline/environments/macs.yaml
 delete mode 100644 snakemake_pipeline/environments/tobias.yaml
 delete mode 100644 snakemake_pipeline/snakefiles/footprinting.snake
 delete mode 100644 snakemake_pipeline/snakefiles/helper.snake
 delete mode 100644 snakemake_pipeline/snakefiles/preprocessing.snake
 delete mode 100644 snakemake_pipeline/snakefiles/visualization.snake

diff --git a/snakemake_pipeline/TOBIAS.snake b/snakemake_pipeline/TOBIAS.snake
deleted file mode 100644
index e7eef8e..0000000
--- a/snakemake_pipeline/TOBIAS.snake
+++ /dev/null
@@ -1,178 +0,0 @@
-"""
-Upper level TOBIAS snake
-"""
-
-import os
-import subprocess
-import itertools
-
-#Set config
-if workflow.overwrite_configfile != None:
-	configfile: str(workflow.overwrite_configfile)
-else:
-	configfile: 'TOBIAS.config'
-CONFIGFILE = str(workflow.overwrite_configfile)
-
-include: "snakefiles/helper.snake"
-#shell.prefix("")
-
-#-------------------------------------------------------------------------------#
-#------------------------- CHECK FORMAT OF CONFIG FILE -------------------------#
-#-------------------------------------------------------------------------------#
-
-required = [("data",),
-			("run_info",),
-				("run_info", "organism"),
-				("run_info", "fasta"),
-				("run_info", "blacklist"),
-				("run_info", "gtf"),
-				("run_info", "motifs"),
-				("run_info", "output"),
-			]
-
-#Check if all keys are existing and contain information
-for key_list in required:
-	lookup_dict = config
-	for key in key_list:
-		try:
-			lookup_dict = lookup_dict[key]
-			if lookup_dict == None:
-				print("ERROR: Missing input for key {0}".format(key_list))
-		except:
-			print("ERROR: Could not find key(s) \"{0}\" in configfile {1}. Please check that your configfile has right format for TOBIAS.".format(":".join(key_list), CONFIGFILE))
-			sys.exit()
-
-#Check if there is at least one condition with bamfiles
-if len(config["data"]) > 0:
-	for condition in config["data"]:
-		if len(config["data"][condition]) == 0:
-			print("ERROR: Could not find any bamfiles in \"{0}\" in configfile {1}".format(":".join(("data", condition)), CONFIGFILE))
-else:
-	print("ERROR: Could not find any conditions (\"data:\{condition\}\") in configfile {0}".format(CONFIGFILE))
-	sys.exit()
-
-
-#-------------------------------------------------------------------------------#
-#------------------------- WHICH FILES/INFO WERE INPUT? ------------------------#
-#-------------------------------------------------------------------------------#
-
-input_files = []
-
-#Files related to experimental data (bam)
-CONDITION_IDS = list(config["data"].keys())
-for condition in CONDITION_IDS:
-	if not isinstance(config["data"][condition], list):
-		config['data'][condition] = [config['data'][condition]]
-	input_files.extend(config['data'][condition])
-
-
-#Flatfiles independent from experimental data (run_info)
-FASTA = config['run_info']['fasta']
-BLACKLIST = config['run_info']['blacklist']
-GTF = config['run_info']['gtf']
-OUTPUTDIR = config['run_info']["output"]
-BLACKLIST = config['run_info']['blacklist']
-MOTIFDIR = config['run_info']['motifs']
-
-input_files.extend([FASTA, BLACKLIST, GTF])
-
-
-#---------- Test that input files exist -----------#
-for file in input_files:
-	if file != None:
-		full_path = os.path.abspath(file) 
-		if not os.path.exists(full_path):
-			exit("ERROR: The following file given in config does not exist: {0}".format(full_path))
-
-
-
-#-------------------------------------------------------------------------------#
-#------------------------ WHICH FILES SHOULD BE CREATED? -----------------------#
-#-------------------------------------------------------------------------------#
-
-output_files = []
-
-#--------------------------------- MOTIFS --------------------------------------#
-#Identify IDS of motifs
-files = os.listdir(MOTIFDIR)
-MOTIF_FILES = {}
-for file in files:
-	full_file = os.path.join(MOTIFDIR, file)
-	with open(full_file) as f:
-		for line in f:
-			if line.startswith("MOTIF"):
-				columns = line.rstrip().split()
-				ID = columns[2] + "_" + columns[1]
-				ID = filafy(ID)
-			elif line.startswith(">"):
-				columns = line.replace(">", "").rstrip().split()
-				ID = columns[1] + "_" + columns[0]
-				ID = filafy(ID)	
-		MOTIF_FILES[ID] = full_file
-
-TF_IDS = list(MOTIF_FILES.keys())
-
-
-#----------------------------------- OUTPUT  -----------------------------------#
-
-id2bam = {condition:{} for condition in CONDITION_IDS}
-
-for condition in CONDITION_IDS:
-
-	config_bams = config['data'][condition]
-	sampleids = [os.path.splitext(os.path.basename(bam))[0] for bam in config_bams]
-	id2bam[condition] = {sampleids[i]:config_bams[i] for i in range(len(sampleids))}	# Link sample ids to bams
-
-
-PLOTNAMES = expand("{condition}_{plotname}", condition=CONDITION_IDS, plotname=["aggregate"])
-if len(CONDITION_IDS) > 1:
-	PLOTNAMES.extend(["heatmap_comparison", "aggregate_comparison_all", "aggregate_comparison_bound"]) 
-
-output_files.append(expand(os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"), condition=CONDITION_IDS))
-
-#output_files.append(os.path.join(OUTPUTDIR, "TFBS", "bindetect_results.txt"))
-#output_files.append(os.path.join(OUTPUTDIR, "overview", "bindetect_results.txt"))
-
-#Visualization
-output_files.extend(expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{plotname}.pdf"), TF=TF_IDS, plotname=PLOTNAMES))
-output_files.extend(expand(os.path.join(OUTPUTDIR, "overview", "all_{plotname}.pdf"), plotname=PLOTNAMES))
-
-
-
-#-------------------------------------------------------------------------------#
-#--------------------- WHICH SNAKE MODULES SHOULD BE USED? ---------------------#
-#-------------------------------------------------------------------------------#
-
-include: "snakefiles/preprocessing.snake"
-include: "snakefiles/footprinting.snake"
-include: "snakefiles/visualization.snake"
-
-
-#-------------------------------------------------------------------------------#
-#------------------------ DEAL WITH SPECIAL ENVIRONMENTS -----------------------#
-#-------------------------------------------------------------------------------#
-
-
-sys_env = subprocess.check_output(['conda', 'env', 'list'], universal_newlines=True)
-env_list = [line.split()[0] for line in sys_env.split("\n") if len(line.split()) > 0]
-
-# default TOBIAS environment
-if "TOBIAS_ENV" not in env_list:
-	print("Creating TOBIAS environment for the first time")
-	subprocess.call(["conda", "env", "create", "--file", "environments/tobias.yaml"])
-
-# python 2 related envs 
-if "MACS_ENV" not in env_list:
-	print("Creating macs environment for the first time")
-	subprocess.call(["conda", "env", "create", "--file", "environments/macs.yaml"])
-
-
-#-------------------------------------------------------------------------------#
-#---------------------------------- RUN :-) ------------------------------------#
-#-------------------------------------------------------------------------------#
-
-rule all:
-	input: 
-		output_files
-	message: "Rule all"
-
diff --git a/snakemake_pipeline/TOBIAS_example.config b/snakemake_pipeline/TOBIAS_example.config
deleted file mode 100644
index 9f394a5..0000000
--- a/snakemake_pipeline/TOBIAS_example.config
+++ /dev/null
@@ -1,32 +0,0 @@
-#-------------------------------------------------------------------------#
-#-------------------------- TOBIAS input data ----------------------------#
-#-------------------------------------------------------------------------#
-
-data:
-  Bcell: [../test_data/Bcell_chr4.bam]                                   #list of .bam-files
-  Tcell: [../test_data/Tcell_chr4_1.bam, ../test_data/Tcell_chr4_2.bam]  #list of .bam-files
-  
-run_info:
-  organism: human                                #mouse/human
-  fasta: ../test_data/genome_chr4.fa.gz          #.fasta-file containing organism genome
-  blacklist: ../test_data/blacklist_chr4.bed     #.bed-file containing blacklisted regions
-  gtf: ../test_data/genes_chr4.gtf               #.gtf-file for annotation of peaks
-  motifs: ../test_data/individual_motifs         #directory containing motifs (single files in MEME/JASPAR/PFM format)
-  output: ../test_output                         #output directory 
-
-
-#-------------------------------------------------------------------------#
-#----------------------- Default module parameters -----------------------#
-#-------------------------------------------------------------------------#
-
-macs: "--nomodel --shift -100 --extsize 200 --broad"
-
-# for parameter description see uropa manual: http://uropa-manual.readthedocs.io/config.html
-# adjust filter attribute for given gtf: ensembl gene_biotype / genecode gene_type
-# other optional parameters: --filter_attribute gene_biotype --attribute_value protein_coding
-uropa: "--feature gene --feature_anchor start --distance [10000,1000] --filter_attribute gene_biotype --attribute_value protein_coding --show_attribute gene_name,gene_id,gene_biotype" 
-
-atacorrect: ""
-footprinting: ""
-bindetect: "--prefix immune"
-plotting: ""
diff --git a/snakemake_pipeline/environments/macs.yaml b/snakemake_pipeline/environments/macs.yaml
deleted file mode 100644
index 2717441..0000000
--- a/snakemake_pipeline/environments/macs.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: MACS_ENV
-
-channels:
-  - bioconda
-  - conda-forge
-  
-dependencies:
-  - macs2
diff --git a/snakemake_pipeline/environments/tobias.yaml b/snakemake_pipeline/environments/tobias.yaml
deleted file mode 100644
index 8775ad2..0000000
--- a/snakemake_pipeline/environments/tobias.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: TOBIAS_ENV
-
-channels:
-  - bioconda
-  - conda-forge
-
-dependencies:
-  - python=3
-  - pysam
-  - pybigwig
-  - moods
-  - pybedtools
-  - cython
-  - matplotlib
-  - numpy
-  - scipy
-  - pypdf2
-  - scikit-learn
-  - snakemake
-  - bedtools
-  - uropa
-  - igvtools
-  - openjdk
-  - xlsxwriter
-  - cloudpickle=0.5.6
-  - pip: 
-    - adjustText
\ No newline at end of file
diff --git a/snakemake_pipeline/snakefiles/footprinting.snake b/snakemake_pipeline/snakefiles/footprinting.snake
deleted file mode 100644
index 2fde401..0000000
--- a/snakemake_pipeline/snakefiles/footprinting.snake
+++ /dev/null
@@ -1,103 +0,0 @@
-# vim: syntax=python tabstop=4 expandtab
-# coding: utf-8
-
-#--------------------------------------------------------------------------------------------------------#
-#Format motifs to pfm format
-rule format_motifs:
-	input: 
-		MOTIF_FILES.values()	#MOTIF_FILES is a dict
-	output:
-		os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt") 
-	priority: 2
-	log: 
-		os.path.join(OUTPUTDIR, "logs", "format_motifs.log")
-	shell:
-		"TOBIAS FormatMotifs --input {input} --format pfm --task join --output {output} &> {log}"
-		
-#--------------------------------------------------------------------------------------------------------#
-#Correct reads for Tn5 sequence bias
-rule atacorrect:
-	input:
-		bam = os.path.join(OUTPUTDIR, "mapping", "{condition}.bam"),
-		peaks = os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed"),
-		genome = FASTA
-	output:
-		os.path.join(OUTPUTDIR, "bias_correction", "{condition}_uncorrected.bw"),
-		os.path.join(OUTPUTDIR, "bias_correction", "{condition}_bias.bw"),
-		os.path.join(OUTPUTDIR, "bias_correction", "{condition}_expected.bw"),
-		os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"),
-	params: 
-		"--blacklist " + BLACKLIST if BLACKLIST != "" else "",
-		"--outdir " + os.path.join(OUTPUTDIR, "bias_correction"),
-		"--prefix " + "{condition}",
-		config["atacorrect"]
-	priority: 2
-	threads: 99 	#unless there are more than 99 cores, this rule will run on max threads
-	log: 
-		os.path.join(OUTPUTDIR, "logs", "{condition}_atacorrect.log")
-	message: 
-		"Running ATACorrect for condition {wildcards.condition} ({input.bam})"
-	shell:
-		"TOBIAS ATACorrect -b {input.bam} -g {input.genome} -p {input.peaks} --cores {threads} {params} &> {log}"
-
-
-#--------------------------------------------------------------------------------------------------------#
-#Calculate footprint scores per condition
-rule footprinting:
-	input: 
-		signal = os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"),
-		regions = os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed")
-	output: 
-		footprints = os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"),
-	params:
-		config["footprinting"]
-	priority: 2
-	threads: 99
-	log:
-		os.path.join(OUTPUTDIR, "logs", "{condition}_footprinting.log")
-	message:
-		"Running footprinting for condition {wildcards.condition} ({input.signal})"
-	shell:
-		"TOBIAS FootprintScores --signal {input.signal} --regions {input.regions} --output {output.footprints} --cores {threads} {params} &> {log}"
-
-
-#--------------------------------------------------------------------------------------------------------#
-#Estimate bound sites from scored file
-rule bindetect:
-	input: 
-		motifs = os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt"),
-		footprints = expand(os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"), condition=CONDITION_IDS),
-		genome = FASTA,
-		peaks = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated.bed"),
-		peak_header = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated_header.txt")
-	output:
-		bedfiles = expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_{suffix}.bed"), TF=TF_IDS, suffix=["all"] + expand("{condition}_{state}", condition=CONDITION_IDS, state=["bound", "unbound"])),
-		overview = expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "{TF}_overview.txt"), TF=TF_IDS),
-		#figures = os.path.join(OUTPUTDIR, "TFBS", "bindetect_figures.pdf"),
-		#local = [os.path.join(OUTPUTDIR, "TFBS", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]],
-	threads: 99	
-	priority: 2
-	log:
-		os.path.join(OUTPUTDIR, "logs", "bindetect.log")
-	params:
-		"--outdir " + os.path.join(OUTPUTDIR, "TFBS"),
-		"--cond_names " + " ".join(CONDITION_IDS),
-		config["bindetect"]
-	message: 
-		"Running BINDetect"
-	shell:
-		"TOBIAS BINDetect --motifs {input.motifs} --signals {input.footprints} --genome {input.genome} --peaks {input.peaks} --peak_header {input.peak_header} --cores {threads} {params} &> {log}; "
-		"mkdir -p " + os.path.join(OUTPUTDIR, "overview") + ";"
-		"mv " + os.path.join(OUTPUTDIR, "TFBS", "*.txt") + " " + os.path.join(OUTPUTDIR, "overview") + ";"	#move files to overview
-		"mv " + os.path.join(OUTPUTDIR, "TFBS", "*.xlsx") + " " + os.path.join(OUTPUTDIR, "overview") + ";"
-		"mv " + os.path.join(OUTPUTDIR, "TFBS", "*.pdf") + " " + os.path.join(OUTPUTDIR, "overview") + ";"
-"""
-rule copy_to_overview:
-	input: 
-		[os.path.join(OUTPUTDIR, "TFBS", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]],
-	output:
-		[os.path.join(OUTPUTDIR, "overview", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]]
-	priority: 2
-	shell:
-		"mv " + os.path.join(OUTPUTDIR, "TFBS", "*.*") + " " + os.path.join(OUTPUTDIR, "overview")	#move files to overview
-"""
\ No newline at end of file
diff --git a/snakemake_pipeline/snakefiles/helper.snake b/snakemake_pipeline/snakefiles/helper.snake
deleted file mode 100644
index 8e658b8..0000000
--- a/snakemake_pipeline/snakefiles/helper.snake
+++ /dev/null
@@ -1,46 +0,0 @@
-import string
-import re
-
-def check_format(file):
-
-	found_format = None
-
-	#Read first nlines of file
-	nlines = 100
-	count = 0
-	file_content = ""
-	with open(file) as f:
-		for line in f:
-			count += 1
-			file_content += line
-
-			if count == nlines:
-				break
-
-	#Bed file
-	if re.match("(^.+\s\d+\s\d+.*\n)+", file_content):
-		found_format = "bed"
-		#print("{0} is a .bed-file".format(file))
-
-	#Jaspar file
-	elif re.match("(>.+\n([^>]+\n){4})+.*", file_content): 
-		found_format = "jaspar"
-		#print("{0} is a jaspar pfm file".format(file))
-
-	#Meme file
-	elif re.match(".*MEME version.*ALPHABET.*MOTIF.*", file_content, flags=re.DOTALL):
-		found_format = "meme"
-		#print("{0} is a meme file".format(file))
-
-	else:
-		print("Could not recognize format of file {0}".format(file))
-
-	return(found_format)
-
-
-
-def filafy(astring): 	#Make name into accepted filename
-	
-	valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
-	filename = ''.join(char for char in astring if char in valid_chars)
-	return(filename)
diff --git a/snakemake_pipeline/snakefiles/preprocessing.snake b/snakemake_pipeline/snakefiles/preprocessing.snake
deleted file mode 100644
index e28b6be..0000000
--- a/snakemake_pipeline/snakefiles/preprocessing.snake
+++ /dev/null
@@ -1,98 +0,0 @@
-# vim: syntax=python tabstop=4 expandtab
-# coding: utf-8
-
-# ------------------------------------------------------------------------------------------------------------------ #
-# ------------------------------------------------- Bam processing ------------------------------------------------- #
-# ------------------------------------------------------------------------------------------------------------------ #
-
-# Merge sample bam files to condition bam file
-# if only one sample per condition, copy sample bam to merged bam file name for further processing
-rule conditionbam:
-  input: lambda wildcards: config["data"][wildcards.condition]
-  output:
-    bam = os.path.join(OUTPUTDIR, "mapping", "{condition}.bam"),
-    bai = os.path.join(OUTPUTDIR, "mapping", "{condition}.bam.bai")
-  threads: 2
-  message: "Joining individual bamfiles from condition {wildcards.condition}"
-  run:
-    if len(input) > 1:
-      shell("samtools merge -@ {threads} {output.bam} {input}")
-      shell("samtools index {output.bam}")
-    else:
-      shell("cp {input} {output.bam}")
-      shell("samtools index {output.bam}")
-
-
-# ------------------------------------------------------------------------------------------------------------------ #
-# --------------------------------------------------- Peak-calling ------------------------------------------------- #
-# ------------------------------------------------------------------------------------------------------------------ #
-
-# Peak-calling
-gsizes = {"human":"hs",
-          "mouse":"mm",
-          "zebrafish": 1369631918}  #https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html
-            
-rule macs:
-  input:
-     lambda wildcards: id2bam[wildcards.condition][wildcards.sample_id]
-  output: 
-    macs = os.path.join(OUTPUTDIR, "peak_calling", "{condition}", "{sample_id}_peaks.broadPeak"),
-    raw = os.path.join(OUTPUTDIR, "peak_calling", "{condition}", "{sample_id}_raw.bed")
-  log: os.path.join(OUTPUTDIR, "logs", "{condition}_{sample_id}_peak_calling.log")
-  message: "Running macs2 with .bam-file: {input}"
-  params:
-    "--name {sample_id}",
-    "--outdir " + os.path.join(OUTPUTDIR, "peak_calling", "{condition}"),
-    "--gsize " + str(gsizes[config["run_info"]["organism"]]),
-    config["macs"],
-  shell:
-    "set +u; source activate MACS_ENV;"
-    "macs2 callpeak -t {input} {params} &> {log}; "
-    "cp {output.macs} {output.raw}; " 
-
-
-# ------------------------------------------------------------------------------------------------------------------ #
-# ------------------------------------------------- Peak-processing ------------------------------------------------ #
-# ------------------------------------------------------------------------------------------------------------------ #
-# process peaks:
-# 1. reduce to genomic location columns and sort
-# 2. merge peaks per condition
-# 3. remove blacklisted regions and add unique peak ids
-rule process_peaks:
-  input: 
-    peaks = lambda wildcards: [os.path.join(OUTPUTDIR, "peak_calling", wildcards.condition, sample_id + "_raw.bed") for sample_id in id2bam[wildcards.condition].keys()],
-    blacklisted = BLACKLIST
-  output: 
-    peaks = os.path.join(OUTPUTDIR, "peak_calling", "{condition}_union.bed"),
-  message: "Processing peaks from condition {wildcards.condition}"
-  shell:
-    "cat {input.peaks} | cut -f1-3 | sort -k1,1 -k2,2n | bedtools merge -d 5 | bedtools subtract -a - -b {input.blacklisted} -A | "
-    "awk '$1 !~ /[M]/' | "
-    "awk '{{print $s\"\\t{wildcards.condition}_\"NR}}' > {output.peaks}"
-
-
-# Union peaks across all conditions
-rule merge_condition_peaks:
-  input: expand(os.path.join(OUTPUTDIR, "peak_calling", "{condition}_union.bed"), condition=CONDITION_IDS)
-  output: os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed")
-  message: "Merging peaks across conditions"
-  shell:
-    "cat {input} | sort -k1,1 -k2,2n | bedtools merge -d 5 -c 4 -o collapse > {output}"
-
-
-# Peak annotation
-# peaks per condition or across conditions, dependent on run info output
-rule uropa:
-  input: 
-    bed = os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed"),
-    gtf = GTF
-  output:
-    peaks = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated.bed"),
-    header = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated_header.txt"),
-  threads: 99
-  log: os.path.join(OUTPUTDIR, "logs", "uropa.log")
-  params:
-    config["uropa"]
-  shell:
-    "peak_annotation.sh -i {input.bed} -o {output.peaks} -p {output.header} -g {input.gtf} {params} -c {threads} -l {log} &> {log}; "
-    
\ No newline at end of file
diff --git a/snakemake_pipeline/snakefiles/visualization.snake b/snakemake_pipeline/snakefiles/visualization.snake
deleted file mode 100644
index 3acf6c6..0000000
--- a/snakemake_pipeline/snakefiles/visualization.snake
+++ /dev/null
@@ -1,95 +0,0 @@
-#----------------------------------------------------------------#
-#---------------------------- Heatmaps --------------------------#
-#----------------------------------------------------------------#
-"""
-#Heatmaps split in bound/unbound within conditions
-rule plot_heatmaps_within:
-	input:
-		beds = expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_{{condition}}_{state}.bed"), state=["bound", "unbound"]),
-		tracks = expand(os.path.join(OUTPUTDIR, "bias_correction", "{{condition}}_{track}.bw"), track=["uncorrected", "bias", "expected", "corrected"])
-	output:
-		heatmap = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{condition}_heatmap.pdf")
-	message: "Plotting heatmap for TF \"{wildcards.TF}\" within \"{wildcards.condition}\""
-	params:
-		"--TFBS_labels Bound Unbound",
-		"--signal_labels " + " ".join(["uncorrected", "bias", "expected", "corrected"]),
-		"--title '{TF} heatmap'",
-	shell:
-		"TOBIAS PlotHeatmap --TFBS {input.beds} --signals {input.tracks} --output {output.heatmap} {params} >/dev/null"
-"""
-
-#Heatmaps across conditions
-rule plot_heatmaps_across:
-	input:
-		beds = [expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_" + condition + "_{state}.bed"), state=["bound", "unbound"]) for condition in CONDITION_IDS],
-		tracks = expand(os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), condition=CONDITION_IDS)
-	output:
-		heatmap = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_heatmap_comparison.pdf")
-	message: "Plotting heatmap for TF \"{wildcards.TF}\""
-	params:
-		beds = " ".join(["--TFBS {0}".format(" ".join(expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_" + condition + "_{state}.bed"), state=["bound", "unbound"]))) for condition in CONDITION_IDS]),
-		#bed_labels = lambda wildcards, input: " ".join(["--TFBS_labels {0}".format(" ".join(["{0}_{1}".format(condition, state) for condition in CONDITION_IDS] for state in ["bound", "unbound"]))]),
-		signal_labels = "--signal_labels " + " ".join(CONDITION_IDS),
-		title = "--title '{TF} heatmap across conditions' "
-	shell:
-		"TOBIAS PlotHeatmap --signals {input.tracks} --output {output.heatmap} {params} --share_colorbar >/dev/null"
-
-
-
-#----------------------------------------------------------------#
-#--------------------------- Aggregates -------------------------#
-#----------------------------------------------------------------#
-
-#Comparison between bound/unbound sets of individual TFs
-rule plot_aggregate_within:
-	input: 
-		TFBS = [os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_all.bed")] + [os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_{condition}_" + state + ".bed") for state in ["bound", "unbound"]],
-		signals = [os.path.join(OUTPUTDIR, "bias_correction", "{condition}_" + state + ".bw") for state in ["uncorrected", "expected", "corrected"]],
-	output:
-		os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{condition}_aggregate.pdf")
-	log:
-		os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_{condition}_aggregate.log")
-	message: "Plotting split between bound/unbound around TFBS for TF \"{wildcards.TF}\" in condition \"{wildcards.condition}\""
-	params:
-		"--title 'Bias correction and split for {TF} in condition {condition}'",
-		"--share_y sites",
-		"--plot_boundaries",
-	shell:
-		"TOBIAS PlotAggregate --TFBS {input.TFBS} --signals {input.signals} --output {output} {params} > {log} "
-
-
-#Aggregates across conditions for all and for bound subsets
-rule plot_aggregate_across:
-	input: 
-		TFBS_all = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_all.bed"),
-		TFBS_bound = expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_{condition}_bound.bed"), condition=CONDITION_IDS),
-		signals = expand(os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), condition=CONDITION_IDS),
-	output: 
-		all_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_all.pdf"),
-		bound_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_bound.pdf"),
-		all_log = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_aggregate_comparison_all.log"),
-		bound_log = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_aggregate_comparison_bound.log")
-	priority: 2
-	params: 
-		"--title {0}".format("{TF}"),
-		"--plot_boundaries",
-		"--share_y both",
-	message: "Plotting comparison of cutsite signals for \"{wildcards.TF}\" between conditions"
-	shell:
-		"TOBIAS PlotAggregate --TFBS {input.TFBS_all} --signals {input.signals} --output {output.all_compare} {params} > {output.all_log}; "
-		"TOBIAS PlotAggregate --TFBS {input.TFBS_bound} --signals {input.signals} --output {output.bound_compare} {params} > {output.bound_log};"
-
-
-#----------------------------------------------------------------#
-#-------------------- Join pdfs from all TFs --------------------#
-#----------------------------------------------------------------#
-
-rule join_pdfs:
-	input:
-		expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{{plotname}}.pdf"), TF=TF_IDS)
-	output:
-		os.path.join(OUTPUTDIR, "overview", "all_{plotname}.pdf")
-	message: "Joining {wildcards.plotname} plots from all TFs"
-	shell:
-		"TOBIAS MergePDF --input {input} --output {output} >/dev/null"
-	
\ No newline at end of file