snakefiles/visualization.snake

#----------------------------------------------------------------#
#---------------------------- Heatmaps --------------------------#
#----------------------------------------------------------------#
"""
#Heatmaps split in bound/unbound within conditions
rule plot_heatmaps_within:
	input:
		beds = expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_{{condition}}_{state}.bed"), state=["bound", "unbound"]),
		tracks = expand(os.path.join(OUTPUTDIR, "bias_correction", "{{condition}}_{track}.bw"), track=["uncorrected", "bias", "expected", "corrected"])
	output:
		heatmap = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{condition}_heatmap.pdf")
	message: "Plotting heatmap for TF \"{wildcards.TF}\" within \"{wildcards.condition}\""
	params:
		"--TFBS_labels Bound Unbound",
		"--signal_labels " + " ".join(["uncorrected", "bias", "expected", "corrected"]),
		"--title '{TF} heatmap'",
	shell:
		"TOBIAS PlotHeatmap --TFBS {input.beds} --signals {input.tracks} --output {output.heatmap} {params} >/dev/null"
"""

#Heatmaps across conditions
rule plot_heatmaps_across:
	input:
		beds = ancient([expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_" + condition + "_{state}.bed"), state=["bound", "unbound"]) for condition in CONDITION_IDS]),
		tracks = expand(rules.atacorrect.output.corrected, condition=CONDITION_IDS) 	#expand(os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), condition=CONDITION_IDS)
	output:
		heatmap = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_heatmap_comparison.pdf")
	message: "Plotting heatmap for TF \"{wildcards.TF}\""
	params:
		beds = " ".join(["--TFBS {0}".format(" ".join(expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_" + condition + "_{state}.bed"), state=["bound", "unbound"]))) for condition in CONDITION_IDS]),
		#bed_labels = lambda wildcards, input: " ".join(["--TFBS_labels {0}".format(" ".join(["{0}_{1}".format(condition, state) for condition in CONDITION_IDS] for state in ["bound", "unbound"]))]),
		signal_labels = "--signal_labels " + " ".join(CONDITION_IDS),
		title = "--title '{TF} heatmap across conditions' ",
		sortby = "--sort-by -1"
	shell:
		"TOBIAS PlotHeatmap --signals {input.tracks} --output {output.heatmap} {params} --share_colorbar >/dev/null"

#----------------------------------------------------------------#
#------------------------- Venn diagrams ------------------------#
#----------------------------------------------------------------#

rule plot_venn:
	input: expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_{condition}_bound.bed"), condition=CONDITION_IDS)
	output: 
		raw = temp(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "Intervene_venn.pdf")),
		final = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_venn_diagram.pdf")
	params: 
		"--output " + os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots"),
		"--names " + ",".join(["'{0} bound sites'".format(condition) for condition in CONDITION_IDS]),
		"--title " + "'Overlap of sites for TF: {TF}'"
	conda:
		"../environments/tools.yaml"
	shell: 
		"intervene venn --input {input} {params} > /dev/null ; "
		"cp {output.raw} {output.final}"


#----------------------------------------------------------------#
#--------------------------- Aggregates -------------------------#
#----------------------------------------------------------------#

#Comparison between bound/unbound sets of individual TFs
rule plot_aggregate_within:
	input: 
		TFBS = ancient([os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_all.bed")] + [os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_{condition}_" + state + ".bed") for state in ["bound", "unbound"]]),
		signals = [os.path.join(OUTPUTDIR, "bias_correction", "{condition}_" + state + ".bw") for state in ["uncorrected", "expected", "corrected"]],
	output:
		os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{condition}_aggregate.pdf")
	log:
		os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_{condition}_aggregate.log")
	message: "Plotting split between bound/unbound around TFBS for TF \"{wildcards.TF}\" in condition \"{wildcards.condition}\""
	params:
		"--title 'Bias correction and split for {TF} in condition {condition}'",
		"--share_y sites",
		"--plot_boundaries",
	shell:
		"TOBIAS PlotAggregate --TFBS {input.TFBS} --signals {input.signals} --output {output} {params} > {log} "


#Aggregates across conditions for all and for bound subsets
rule plot_aggregate_across:
	input: 
		TFBS_all = ancient(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_all.bed")),
		TFBS_bound = ancient(expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_{condition}_bound.bed"), condition=CONDITION_IDS)),
		signals = expand(rules.atacorrect.output.corrected, condition=CONDITION_IDS)	#expand(os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), condition=CONDITION_IDS),
	output: 
		all_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_all.pdf"),
		bound_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_bound.pdf"),
		all_log = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_aggregate_comparison_all.log"),
		bound_log = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_aggregate_comparison_bound.log")
	priority: 2
	params: 
		"--title {0}".format("{TF}"),
		"--plot_boundaries",
		"--share_y both",
	message: "Plotting comparison of cutsite signals for \"{wildcards.TF}\" between conditions"
	shell:
		"TOBIAS PlotAggregate --TFBS {input.TFBS_all} --signals {input.signals} --output {output.all_compare} {params} > {output.all_log}; "
		"TOBIAS PlotAggregate --TFBS {input.TFBS_bound} --signals {input.signals} --output {output.bound_compare} {params} > {output.bound_log};"

#----------------------------------------------------------------#
#-------------------- Join pdfs from all TFs --------------------#
#----------------------------------------------------------------#

rule join_pdfs:
	input:
		lambda wildcards: expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{{plotname}}.pdf"), TF=get_TF_ids(wildcards))
	output:
		os.path.join(OUTPUTDIR, "overview", "all_{plotname}.pdf")
	message: 
		"Joining {wildcards.plotname} plots from all TFs"
	shell:
		"TOBIAS MergePDF --input {input} --output {output} >/dev/null"


#----------------------------------------------------------------#
#----------------------- Lineplot changes -----------------------#
#----------------------------------------------------------------#

rule lineplot_changes:
	input:
		lambda wildcards: os.path.join(checkpoints.bindetect.get(**wildcards).output[0], "bindetect_results.txt")
	output:
		out = os.path.join(OUTPUTDIR, "overview", "bindetect_results_subset.txt"),
		tfs = os.path.join(OUTPUTDIR, "overview", "most_changed_TFs.txt"),
		lineplot = os.path.join(OUTPUTDIR, "overview", "TF_changes.pdf")
	log:
		os.path.join(OUTPUTDIR, "logs", "TF_changes.log")
	priority: 2
	shell:
		"filter_important_factors.py -in {input} -filter 5 -o {output.out} > {output.tfs};"
		"TOBIAS PlotChanges --bindetect {input} --TFS {output.tfs} --output {output.lineplot} > {log}"