From b2772174e287a56728a6ec841c37418026b31695 Mon Sep 17 00:00:00 2001 From: msbentsen Date: Fri, 22 Mar 2019 11:38:20 +0100 Subject: [PATCH] Moved snakemake pipeline to separate repo --- snakemake_pipeline/TOBIAS.snake | 178 ------------------ snakemake_pipeline/TOBIAS_example.config | 32 ---- snakemake_pipeline/environments/macs.yaml | 8 - snakemake_pipeline/environments/tobias.yaml | 27 --- .../snakefiles/footprinting.snake | 103 ---------- snakemake_pipeline/snakefiles/helper.snake | 46 ----- .../snakefiles/preprocessing.snake | 98 ---------- .../snakefiles/visualization.snake | 95 ---------- 8 files changed, 587 deletions(-) delete mode 100644 snakemake_pipeline/TOBIAS.snake delete mode 100644 snakemake_pipeline/TOBIAS_example.config delete mode 100644 snakemake_pipeline/environments/macs.yaml delete mode 100644 snakemake_pipeline/environments/tobias.yaml delete mode 100644 snakemake_pipeline/snakefiles/footprinting.snake delete mode 100644 snakemake_pipeline/snakefiles/helper.snake delete mode 100644 snakemake_pipeline/snakefiles/preprocessing.snake delete mode 100644 snakemake_pipeline/snakefiles/visualization.snake diff --git a/snakemake_pipeline/TOBIAS.snake b/snakemake_pipeline/TOBIAS.snake deleted file mode 100644 index e7eef8e..0000000 --- a/snakemake_pipeline/TOBIAS.snake +++ /dev/null @@ -1,178 +0,0 @@ -""" -Upper level TOBIAS snake -""" - -import os -import subprocess -import itertools - -#Set config -if workflow.overwrite_configfile != None: - configfile: str(workflow.overwrite_configfile) -else: - configfile: 'TOBIAS.config' -CONFIGFILE = str(workflow.overwrite_configfile) - -include: "snakefiles/helper.snake" -#shell.prefix("") - -#-------------------------------------------------------------------------------# -#------------------------- CHECK FORMAT OF CONFIG FILE -------------------------# -#-------------------------------------------------------------------------------# - -required = [("data",), - ("run_info",), - ("run_info", "organism"), - ("run_info", "fasta"), - ("run_info", "blacklist"), - ("run_info", "gtf"), - ("run_info", "motifs"), - ("run_info", "output"), - ] - -#Check if all keys are existing and contain information -for key_list in required: - lookup_dict = config - for key in key_list: - try: - lookup_dict = lookup_dict[key] - if lookup_dict == None: - print("ERROR: Missing input for key {0}".format(key_list)) - except: - print("ERROR: Could not find key(s) \"{0}\" in configfile {1}. Please check that your configfile has right format for TOBIAS.".format(":".join(key_list), CONFIGFILE)) - sys.exit() - -#Check if there is at least one condition with bamfiles -if len(config["data"]) > 0: - for condition in config["data"]: - if len(config["data"][condition]) == 0: - print("ERROR: Could not find any bamfiles in \"{0}\" in configfile {1}".format(":".join(("data", condition)), CONFIGFILE)) -else: - print("ERROR: Could not find any conditions (\"data:\{condition\}\") in configfile {0}".format(CONFIGFILE)) - sys.exit() - - -#-------------------------------------------------------------------------------# -#------------------------- WHICH FILES/INFO WERE INPUT? ------------------------# -#-------------------------------------------------------------------------------# - -input_files = [] - -#Files related to experimental data (bam) -CONDITION_IDS = list(config["data"].keys()) -for condition in CONDITION_IDS: - if not isinstance(config["data"][condition], list): - config['data'][condition] = [config['data'][condition]] - input_files.extend(config['data'][condition]) - - -#Flatfiles independent from experimental data (run_info) -FASTA = config['run_info']['fasta'] -BLACKLIST = config['run_info']['blacklist'] -GTF = config['run_info']['gtf'] -OUTPUTDIR = config['run_info']["output"] -BLACKLIST = config['run_info']['blacklist'] -MOTIFDIR = config['run_info']['motifs'] - -input_files.extend([FASTA, BLACKLIST, GTF]) - - -#---------- Test that input files exist -----------# -for file in input_files: - if file != None: - full_path = os.path.abspath(file) - if not os.path.exists(full_path): - exit("ERROR: The following file given in config does not exist: {0}".format(full_path)) - - - -#-------------------------------------------------------------------------------# -#------------------------ WHICH FILES SHOULD BE CREATED? -----------------------# -#-------------------------------------------------------------------------------# - -output_files = [] - -#--------------------------------- MOTIFS --------------------------------------# -#Identify IDS of motifs -files = os.listdir(MOTIFDIR) -MOTIF_FILES = {} -for file in files: - full_file = os.path.join(MOTIFDIR, file) - with open(full_file) as f: - for line in f: - if line.startswith("MOTIF"): - columns = line.rstrip().split() - ID = columns[2] + "_" + columns[1] - ID = filafy(ID) - elif line.startswith(">"): - columns = line.replace(">", "").rstrip().split() - ID = columns[1] + "_" + columns[0] - ID = filafy(ID) - MOTIF_FILES[ID] = full_file - -TF_IDS = list(MOTIF_FILES.keys()) - - -#----------------------------------- OUTPUT -----------------------------------# - -id2bam = {condition:{} for condition in CONDITION_IDS} - -for condition in CONDITION_IDS: - - config_bams = config['data'][condition] - sampleids = [os.path.splitext(os.path.basename(bam))[0] for bam in config_bams] - id2bam[condition] = {sampleids[i]:config_bams[i] for i in range(len(sampleids))} # Link sample ids to bams - - -PLOTNAMES = expand("{condition}_{plotname}", condition=CONDITION_IDS, plotname=["aggregate"]) -if len(CONDITION_IDS) > 1: - PLOTNAMES.extend(["heatmap_comparison", "aggregate_comparison_all", "aggregate_comparison_bound"]) - -output_files.append(expand(os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"), condition=CONDITION_IDS)) - -#output_files.append(os.path.join(OUTPUTDIR, "TFBS", "bindetect_results.txt")) -#output_files.append(os.path.join(OUTPUTDIR, "overview", "bindetect_results.txt")) - -#Visualization -output_files.extend(expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{plotname}.pdf"), TF=TF_IDS, plotname=PLOTNAMES)) -output_files.extend(expand(os.path.join(OUTPUTDIR, "overview", "all_{plotname}.pdf"), plotname=PLOTNAMES)) - - - -#-------------------------------------------------------------------------------# -#--------------------- WHICH SNAKE MODULES SHOULD BE USED? ---------------------# -#-------------------------------------------------------------------------------# - -include: "snakefiles/preprocessing.snake" -include: "snakefiles/footprinting.snake" -include: "snakefiles/visualization.snake" - - -#-------------------------------------------------------------------------------# -#------------------------ DEAL WITH SPECIAL ENVIRONMENTS -----------------------# -#-------------------------------------------------------------------------------# - - -sys_env = subprocess.check_output(['conda', 'env', 'list'], universal_newlines=True) -env_list = [line.split()[0] for line in sys_env.split("\n") if len(line.split()) > 0] - -# default TOBIAS environment -if "TOBIAS_ENV" not in env_list: - print("Creating TOBIAS environment for the first time") - subprocess.call(["conda", "env", "create", "--file", "environments/tobias.yaml"]) - -# python 2 related envs -if "MACS_ENV" not in env_list: - print("Creating macs environment for the first time") - subprocess.call(["conda", "env", "create", "--file", "environments/macs.yaml"]) - - -#-------------------------------------------------------------------------------# -#---------------------------------- RUN :-) ------------------------------------# -#-------------------------------------------------------------------------------# - -rule all: - input: - output_files - message: "Rule all" - diff --git a/snakemake_pipeline/TOBIAS_example.config b/snakemake_pipeline/TOBIAS_example.config deleted file mode 100644 index 9f394a5..0000000 --- a/snakemake_pipeline/TOBIAS_example.config +++ /dev/null @@ -1,32 +0,0 @@ -#-------------------------------------------------------------------------# -#-------------------------- TOBIAS input data ----------------------------# -#-------------------------------------------------------------------------# - -data: - Bcell: [../test_data/Bcell_chr4.bam] #list of .bam-files - Tcell: [../test_data/Tcell_chr4_1.bam, ../test_data/Tcell_chr4_2.bam] #list of .bam-files - -run_info: - organism: human #mouse/human - fasta: ../test_data/genome_chr4.fa.gz #.fasta-file containing organism genome - blacklist: ../test_data/blacklist_chr4.bed #.bed-file containing blacklisted regions - gtf: ../test_data/genes_chr4.gtf #.gtf-file for annotation of peaks - motifs: ../test_data/individual_motifs #directory containing motifs (single files in MEME/JASPAR/PFM format) - output: ../test_output #output directory - - -#-------------------------------------------------------------------------# -#----------------------- Default module parameters -----------------------# -#-------------------------------------------------------------------------# - -macs: "--nomodel --shift -100 --extsize 200 --broad" - -# for parameter description see uropa manual: http://uropa-manual.readthedocs.io/config.html -# adjust filter attribute for given gtf: ensembl gene_biotype / genecode gene_type -# other optional parameters: --filter_attribute gene_biotype --attribute_value protein_coding -uropa: "--feature gene --feature_anchor start --distance [10000,1000] --filter_attribute gene_biotype --attribute_value protein_coding --show_attribute gene_name,gene_id,gene_biotype" - -atacorrect: "" -footprinting: "" -bindetect: "--prefix immune" -plotting: "" diff --git a/snakemake_pipeline/environments/macs.yaml b/snakemake_pipeline/environments/macs.yaml deleted file mode 100644 index 2717441..0000000 --- a/snakemake_pipeline/environments/macs.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: MACS_ENV - -channels: - - bioconda - - conda-forge - -dependencies: - - macs2 diff --git a/snakemake_pipeline/environments/tobias.yaml b/snakemake_pipeline/environments/tobias.yaml deleted file mode 100644 index 8775ad2..0000000 --- a/snakemake_pipeline/environments/tobias.yaml +++ /dev/null @@ -1,27 +0,0 @@ -name: TOBIAS_ENV - -channels: - - bioconda - - conda-forge - -dependencies: - - python=3 - - pysam - - pybigwig - - moods - - pybedtools - - cython - - matplotlib - - numpy - - scipy - - pypdf2 - - scikit-learn - - snakemake - - bedtools - - uropa - - igvtools - - openjdk - - xlsxwriter - - cloudpickle=0.5.6 - - pip: - - adjustText \ No newline at end of file diff --git a/snakemake_pipeline/snakefiles/footprinting.snake b/snakemake_pipeline/snakefiles/footprinting.snake deleted file mode 100644 index 2fde401..0000000 --- a/snakemake_pipeline/snakefiles/footprinting.snake +++ /dev/null @@ -1,103 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -#--------------------------------------------------------------------------------------------------------# -#Format motifs to pfm format -rule format_motifs: - input: - MOTIF_FILES.values() #MOTIF_FILES is a dict - output: - os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt") - priority: 2 - log: - os.path.join(OUTPUTDIR, "logs", "format_motifs.log") - shell: - "TOBIAS FormatMotifs --input {input} --format pfm --task join --output {output} &> {log}" - -#--------------------------------------------------------------------------------------------------------# -#Correct reads for Tn5 sequence bias -rule atacorrect: - input: - bam = os.path.join(OUTPUTDIR, "mapping", "{condition}.bam"), - peaks = os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed"), - genome = FASTA - output: - os.path.join(OUTPUTDIR, "bias_correction", "{condition}_uncorrected.bw"), - os.path.join(OUTPUTDIR, "bias_correction", "{condition}_bias.bw"), - os.path.join(OUTPUTDIR, "bias_correction", "{condition}_expected.bw"), - os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), - params: - "--blacklist " + BLACKLIST if BLACKLIST != "" else "", - "--outdir " + os.path.join(OUTPUTDIR, "bias_correction"), - "--prefix " + "{condition}", - config["atacorrect"] - priority: 2 - threads: 99 #unless there are more than 99 cores, this rule will run on max threads - log: - os.path.join(OUTPUTDIR, "logs", "{condition}_atacorrect.log") - message: - "Running ATACorrect for condition {wildcards.condition} ({input.bam})" - shell: - "TOBIAS ATACorrect -b {input.bam} -g {input.genome} -p {input.peaks} --cores {threads} {params} &> {log}" - - -#--------------------------------------------------------------------------------------------------------# -#Calculate footprint scores per condition -rule footprinting: - input: - signal = os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), - regions = os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed") - output: - footprints = os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"), - params: - config["footprinting"] - priority: 2 - threads: 99 - log: - os.path.join(OUTPUTDIR, "logs", "{condition}_footprinting.log") - message: - "Running footprinting for condition {wildcards.condition} ({input.signal})" - shell: - "TOBIAS FootprintScores --signal {input.signal} --regions {input.regions} --output {output.footprints} --cores {threads} {params} &> {log}" - - -#--------------------------------------------------------------------------------------------------------# -#Estimate bound sites from scored file -rule bindetect: - input: - motifs = os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt"), - footprints = expand(os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"), condition=CONDITION_IDS), - genome = FASTA, - peaks = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated.bed"), - peak_header = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated_header.txt") - output: - bedfiles = expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_{suffix}.bed"), TF=TF_IDS, suffix=["all"] + expand("{condition}_{state}", condition=CONDITION_IDS, state=["bound", "unbound"])), - overview = expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "{TF}_overview.txt"), TF=TF_IDS), - #figures = os.path.join(OUTPUTDIR, "TFBS", "bindetect_figures.pdf"), - #local = [os.path.join(OUTPUTDIR, "TFBS", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]], - threads: 99 - priority: 2 - log: - os.path.join(OUTPUTDIR, "logs", "bindetect.log") - params: - "--outdir " + os.path.join(OUTPUTDIR, "TFBS"), - "--cond_names " + " ".join(CONDITION_IDS), - config["bindetect"] - message: - "Running BINDetect" - shell: - "TOBIAS BINDetect --motifs {input.motifs} --signals {input.footprints} --genome {input.genome} --peaks {input.peaks} --peak_header {input.peak_header} --cores {threads} {params} &> {log}; " - "mkdir -p " + os.path.join(OUTPUTDIR, "overview") + ";" - "mv " + os.path.join(OUTPUTDIR, "TFBS", "*.txt") + " " + os.path.join(OUTPUTDIR, "overview") + ";" #move files to overview - "mv " + os.path.join(OUTPUTDIR, "TFBS", "*.xlsx") + " " + os.path.join(OUTPUTDIR, "overview") + ";" - "mv " + os.path.join(OUTPUTDIR, "TFBS", "*.pdf") + " " + os.path.join(OUTPUTDIR, "overview") + ";" -""" -rule copy_to_overview: - input: - [os.path.join(OUTPUTDIR, "TFBS", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]], - output: - [os.path.join(OUTPUTDIR, "overview", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]] - priority: 2 - shell: - "mv " + os.path.join(OUTPUTDIR, "TFBS", "*.*") + " " + os.path.join(OUTPUTDIR, "overview") #move files to overview -""" \ No newline at end of file diff --git a/snakemake_pipeline/snakefiles/helper.snake b/snakemake_pipeline/snakefiles/helper.snake deleted file mode 100644 index 8e658b8..0000000 --- a/snakemake_pipeline/snakefiles/helper.snake +++ /dev/null @@ -1,46 +0,0 @@ -import string -import re - -def check_format(file): - - found_format = None - - #Read first nlines of file - nlines = 100 - count = 0 - file_content = "" - with open(file) as f: - for line in f: - count += 1 - file_content += line - - if count == nlines: - break - - #Bed file - if re.match("(^.+\s\d+\s\d+.*\n)+", file_content): - found_format = "bed" - #print("{0} is a .bed-file".format(file)) - - #Jaspar file - elif re.match("(>.+\n([^>]+\n){4})+.*", file_content): - found_format = "jaspar" - #print("{0} is a jaspar pfm file".format(file)) - - #Meme file - elif re.match(".*MEME version.*ALPHABET.*MOTIF.*", file_content, flags=re.DOTALL): - found_format = "meme" - #print("{0} is a meme file".format(file)) - - else: - print("Could not recognize format of file {0}".format(file)) - - return(found_format) - - - -def filafy(astring): #Make name into accepted filename - - valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) - filename = ''.join(char for char in astring if char in valid_chars) - return(filename) diff --git a/snakemake_pipeline/snakefiles/preprocessing.snake b/snakemake_pipeline/snakefiles/preprocessing.snake deleted file mode 100644 index e28b6be..0000000 --- a/snakemake_pipeline/snakefiles/preprocessing.snake +++ /dev/null @@ -1,98 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - -# ------------------------------------------------------------------------------------------------------------------ # -# ------------------------------------------------- Bam processing ------------------------------------------------- # -# ------------------------------------------------------------------------------------------------------------------ # - -# Merge sample bam files to condition bam file -# if only one sample per condition, copy sample bam to merged bam file name for further processing -rule conditionbam: - input: lambda wildcards: config["data"][wildcards.condition] - output: - bam = os.path.join(OUTPUTDIR, "mapping", "{condition}.bam"), - bai = os.path.join(OUTPUTDIR, "mapping", "{condition}.bam.bai") - threads: 2 - message: "Joining individual bamfiles from condition {wildcards.condition}" - run: - if len(input) > 1: - shell("samtools merge -@ {threads} {output.bam} {input}") - shell("samtools index {output.bam}") - else: - shell("cp {input} {output.bam}") - shell("samtools index {output.bam}") - - -# ------------------------------------------------------------------------------------------------------------------ # -# --------------------------------------------------- Peak-calling ------------------------------------------------- # -# ------------------------------------------------------------------------------------------------------------------ # - -# Peak-calling -gsizes = {"human":"hs", - "mouse":"mm", - "zebrafish": 1369631918} #https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html - -rule macs: - input: - lambda wildcards: id2bam[wildcards.condition][wildcards.sample_id] - output: - macs = os.path.join(OUTPUTDIR, "peak_calling", "{condition}", "{sample_id}_peaks.broadPeak"), - raw = os.path.join(OUTPUTDIR, "peak_calling", "{condition}", "{sample_id}_raw.bed") - log: os.path.join(OUTPUTDIR, "logs", "{condition}_{sample_id}_peak_calling.log") - message: "Running macs2 with .bam-file: {input}" - params: - "--name {sample_id}", - "--outdir " + os.path.join(OUTPUTDIR, "peak_calling", "{condition}"), - "--gsize " + str(gsizes[config["run_info"]["organism"]]), - config["macs"], - shell: - "set +u; source activate MACS_ENV;" - "macs2 callpeak -t {input} {params} &> {log}; " - "cp {output.macs} {output.raw}; " - - -# ------------------------------------------------------------------------------------------------------------------ # -# ------------------------------------------------- Peak-processing ------------------------------------------------ # -# ------------------------------------------------------------------------------------------------------------------ # -# process peaks: -# 1. reduce to genomic location columns and sort -# 2. merge peaks per condition -# 3. remove blacklisted regions and add unique peak ids -rule process_peaks: - input: - peaks = lambda wildcards: [os.path.join(OUTPUTDIR, "peak_calling", wildcards.condition, sample_id + "_raw.bed") for sample_id in id2bam[wildcards.condition].keys()], - blacklisted = BLACKLIST - output: - peaks = os.path.join(OUTPUTDIR, "peak_calling", "{condition}_union.bed"), - message: "Processing peaks from condition {wildcards.condition}" - shell: - "cat {input.peaks} | cut -f1-3 | sort -k1,1 -k2,2n | bedtools merge -d 5 | bedtools subtract -a - -b {input.blacklisted} -A | " - "awk '$1 !~ /[M]/' | " - "awk '{{print $s\"\\t{wildcards.condition}_\"NR}}' > {output.peaks}" - - -# Union peaks across all conditions -rule merge_condition_peaks: - input: expand(os.path.join(OUTPUTDIR, "peak_calling", "{condition}_union.bed"), condition=CONDITION_IDS) - output: os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed") - message: "Merging peaks across conditions" - shell: - "cat {input} | sort -k1,1 -k2,2n | bedtools merge -d 5 -c 4 -o collapse > {output}" - - -# Peak annotation -# peaks per condition or across conditions, dependent on run info output -rule uropa: - input: - bed = os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed"), - gtf = GTF - output: - peaks = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated.bed"), - header = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated_header.txt"), - threads: 99 - log: os.path.join(OUTPUTDIR, "logs", "uropa.log") - params: - config["uropa"] - shell: - "peak_annotation.sh -i {input.bed} -o {output.peaks} -p {output.header} -g {input.gtf} {params} -c {threads} -l {log} &> {log}; " - \ No newline at end of file diff --git a/snakemake_pipeline/snakefiles/visualization.snake b/snakemake_pipeline/snakefiles/visualization.snake deleted file mode 100644 index 3acf6c6..0000000 --- a/snakemake_pipeline/snakefiles/visualization.snake +++ /dev/null @@ -1,95 +0,0 @@ -#----------------------------------------------------------------# -#---------------------------- Heatmaps --------------------------# -#----------------------------------------------------------------# -""" -#Heatmaps split in bound/unbound within conditions -rule plot_heatmaps_within: - input: - beds = expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_{{condition}}_{state}.bed"), state=["bound", "unbound"]), - tracks = expand(os.path.join(OUTPUTDIR, "bias_correction", "{{condition}}_{track}.bw"), track=["uncorrected", "bias", "expected", "corrected"]) - output: - heatmap = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{condition}_heatmap.pdf") - message: "Plotting heatmap for TF \"{wildcards.TF}\" within \"{wildcards.condition}\"" - params: - "--TFBS_labels Bound Unbound", - "--signal_labels " + " ".join(["uncorrected", "bias", "expected", "corrected"]), - "--title '{TF} heatmap'", - shell: - "TOBIAS PlotHeatmap --TFBS {input.beds} --signals {input.tracks} --output {output.heatmap} {params} >/dev/null" -""" - -#Heatmaps across conditions -rule plot_heatmaps_across: - input: - beds = [expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_" + condition + "_{state}.bed"), state=["bound", "unbound"]) for condition in CONDITION_IDS], - tracks = expand(os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), condition=CONDITION_IDS) - output: - heatmap = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_heatmap_comparison.pdf") - message: "Plotting heatmap for TF \"{wildcards.TF}\"" - params: - beds = " ".join(["--TFBS {0}".format(" ".join(expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_" + condition + "_{state}.bed"), state=["bound", "unbound"]))) for condition in CONDITION_IDS]), - #bed_labels = lambda wildcards, input: " ".join(["--TFBS_labels {0}".format(" ".join(["{0}_{1}".format(condition, state) for condition in CONDITION_IDS] for state in ["bound", "unbound"]))]), - signal_labels = "--signal_labels " + " ".join(CONDITION_IDS), - title = "--title '{TF} heatmap across conditions' " - shell: - "TOBIAS PlotHeatmap --signals {input.tracks} --output {output.heatmap} {params} --share_colorbar >/dev/null" - - - -#----------------------------------------------------------------# -#--------------------------- Aggregates -------------------------# -#----------------------------------------------------------------# - -#Comparison between bound/unbound sets of individual TFs -rule plot_aggregate_within: - input: - TFBS = [os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_all.bed")] + [os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_{condition}_" + state + ".bed") for state in ["bound", "unbound"]], - signals = [os.path.join(OUTPUTDIR, "bias_correction", "{condition}_" + state + ".bw") for state in ["uncorrected", "expected", "corrected"]], - output: - os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{condition}_aggregate.pdf") - log: - os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_{condition}_aggregate.log") - message: "Plotting split between bound/unbound around TFBS for TF \"{wildcards.TF}\" in condition \"{wildcards.condition}\"" - params: - "--title 'Bias correction and split for {TF} in condition {condition}'", - "--share_y sites", - "--plot_boundaries", - shell: - "TOBIAS PlotAggregate --TFBS {input.TFBS} --signals {input.signals} --output {output} {params} > {log} " - - -#Aggregates across conditions for all and for bound subsets -rule plot_aggregate_across: - input: - TFBS_all = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_all.bed"), - TFBS_bound = expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_{condition}_bound.bed"), condition=CONDITION_IDS), - signals = expand(os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), condition=CONDITION_IDS), - output: - all_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_all.pdf"), - bound_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_bound.pdf"), - all_log = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_aggregate_comparison_all.log"), - bound_log = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_aggregate_comparison_bound.log") - priority: 2 - params: - "--title {0}".format("{TF}"), - "--plot_boundaries", - "--share_y both", - message: "Plotting comparison of cutsite signals for \"{wildcards.TF}\" between conditions" - shell: - "TOBIAS PlotAggregate --TFBS {input.TFBS_all} --signals {input.signals} --output {output.all_compare} {params} > {output.all_log}; " - "TOBIAS PlotAggregate --TFBS {input.TFBS_bound} --signals {input.signals} --output {output.bound_compare} {params} > {output.bound_log};" - - -#----------------------------------------------------------------# -#-------------------- Join pdfs from all TFs --------------------# -#----------------------------------------------------------------# - -rule join_pdfs: - input: - expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{{plotname}}.pdf"), TF=TF_IDS) - output: - os.path.join(OUTPUTDIR, "overview", "all_{plotname}.pdf") - message: "Joining {wildcards.plotname} plots from all TFs" - shell: - "TOBIAS MergePDF --input {input} --output {output} >/dev/null" - \ No newline at end of file