Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
TOBIAS_snakemake/Snakefile
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
192 lines (154 sloc)
7.35 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Upper level TOBIAS snake | |
""" | |
import os | |
import subprocess | |
import itertools | |
import glob | |
#Set config | |
if workflow.overwrite_configfile != None: | |
configfile: str(workflow.overwrite_configfile) | |
else: | |
configfile: 'TOBIAS.config' | |
CONFIGFILE = str(workflow.overwrite_configfile) | |
#Snake modules used to setup run | |
include: "snakefiles/helper.snake" | |
#shell.prefix("") | |
#-------------------------------------------------------------------------------# | |
#------------------------- CHECK FORMAT OF CONFIG FILE -------------------------# | |
#-------------------------------------------------------------------------------# | |
required = [("data",), | |
("run_info",), | |
("run_info", "organism"), | |
("run_info", "fasta"), | |
("run_info", "blacklist"), | |
("run_info", "gtf"), | |
("run_info", "motifs"), | |
("run_info", "output"), | |
] | |
#Check if all keys are existing and contain information | |
for key_list in required: | |
lookup_dict = config | |
for key in key_list: | |
try: | |
lookup_dict = lookup_dict[key] | |
if lookup_dict == None: | |
print("ERROR: Missing input for key {0}".format(key_list)) | |
except: | |
print("ERROR: Could not find key(s) \"{0}\" in configfile {1}. Please check that your configfile has right format for TOBIAS.".format(":".join(key_list), CONFIGFILE)) | |
sys.exit() | |
#Check if there is at least one condition with bamfiles | |
if len(config["data"]) > 0: | |
for condition in config["data"]: | |
if len(config["data"][condition]) == 0: | |
print("ERROR: Could not find any bamfiles in \"{0}\" in configfile {1}".format(":".join(("data", condition)), CONFIGFILE)) | |
else: | |
print("ERROR: Could not find any conditions (\"data:\{condition\}\") in configfile {0}".format(CONFIGFILE)) | |
sys.exit() | |
#-------------------------------------------------------------------------------# | |
#------------------------- WHICH FILES/INFO WERE INPUT? ------------------------# | |
#-------------------------------------------------------------------------------# | |
input_files = [] | |
#Files related to experimental data (bam) | |
CONDITION_IDS = list(config["data"].keys()) | |
for condition in CONDITION_IDS: | |
if not isinstance(config["data"][condition], list): | |
config['data'][condition] = [config['data'][condition]] | |
config["data"][condition] = sum([glob.glob(f) for f in config["data"][condition]], []) #make flat list | |
config["data"][condition] = list(set(config["data"][condition])) #remove duplicates | |
input_files.extend(config['data'][condition]) | |
#Flatfiles independent from experimental data (run_info) | |
FASTA = config['run_info']['fasta'] | |
BLACKLIST = config['run_info']['blacklist'] | |
GTF = config['run_info']['gtf'] | |
OUTPUTDIR = config['run_info']["output"] | |
BLACKLIST = config['run_info']['blacklist'] | |
#MOTIFS = config['run_info']['motifs'] | |
input_files.extend([FASTA, BLACKLIST, GTF]) | |
#---------- Test that input files exist -----------# | |
for file in input_files: | |
if file != None: | |
full_path = os.path.abspath(file) | |
if not os.path.exists(full_path): | |
exit("ERROR: The following file given in config does not exist: {0}".format(full_path)) | |
#--------------------------------- MOTIFS ------------------------------# | |
#If not list, make it list and glob elements | |
if not isinstance(config['run_info']['motifs'], list): | |
config['run_info']['motifs'] = [config['run_info']['motifs']] | |
motif_input = sum([glob.glob(element) for element in config['run_info']['motifs']], []) | |
#Test if input is directory or file | |
motif_files = [] | |
for path in motif_input: | |
#If input is dir; fetch all input files | |
if os.path.isdir(path): | |
files = os.listdir(path) | |
motif_files.extend([os.path.join(path, f) for f in files]) | |
#If input is file, add to list of files | |
elif os.path.isfile(path): | |
motif_files.append(path) | |
motif_files = list(set(motif_files)) #remove duplicates | |
config['run_info']['motifs'] = sorted(motif_files) | |
#Identify IDS of motifs | |
MOTIF_FILES = {} | |
for file in motif_files: | |
full_file = file | |
with open(full_file) as f: | |
for line in f: | |
if line.startswith("MOTIF"): | |
columns = line.rstrip().split() | |
ID = columns[2] + "_" + columns[1] | |
ID = filafy(ID) | |
elif line.startswith(">"): | |
columns = line.replace(">", "").rstrip().split() | |
ID = columns[1] + "_" + columns[0] | |
ID = filafy(ID) | |
MOTIF_FILES[ID] = full_file | |
TF_IDS = list(MOTIF_FILES.keys()) | |
#-------------------------------------------------------------------------------# | |
#------------------------ WHICH FILES SHOULD BE CREATED? -----------------------# | |
#-------------------------------------------------------------------------------# | |
output_files = [] | |
id2bam = {condition:{} for condition in CONDITION_IDS} | |
for condition in CONDITION_IDS: | |
config_bams = config['data'][condition] | |
sampleids = [os.path.splitext(os.path.basename(bam))[0] for bam in config_bams] | |
id2bam[condition] = {sampleids[i]:config_bams[i] for i in range(len(sampleids))} # Link sample ids to bams | |
PLOTNAMES = expand("{condition}_{plotname}", condition=CONDITION_IDS, plotname=["aggregate"]) | |
if len(CONDITION_IDS) > 1: | |
PLOTNAMES.extend(["heatmap_comparison", "aggregate_comparison_all", "aggregate_comparison_bound"]) | |
output_files.append(os.path.join(OUTPUTDIR, "config.yaml")) | |
output_files.append(expand(os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"), condition=CONDITION_IDS)) | |
#output_files.append(os.path.join(OUTPUTDIR, "TFBS", "bindetect_results.txt")) | |
#output_files.append(os.path.join(OUTPUTDIR, "overview", "bindetect_results.txt")) | |
output_files.extend(expand(os.path.join(OUTPUTDIR, "overview", "all_{condition}_bound.bed"), condition=CONDITION_IDS)) | |
#Visualization | |
output_files.extend(expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{plotname}.pdf"), TF=TF_IDS, plotname=PLOTNAMES)) | |
output_files.extend(expand(os.path.join(OUTPUTDIR, "overview", "all_{plotname}.pdf"), plotname=PLOTNAMES)) | |
output_files.append(os.path.join(OUTPUTDIR, "overview", "TF_changes.pdf")) | |
#Wilson | |
output_files.extend(expand(os.path.join(OUTPUTDIR, "wilson", "data", "{TF}_overview.clarion"), TF=TF_IDS)) | |
output_files.append(os.path.join(OUTPUTDIR, "wilson", "HOW_TO_WILSON.txt")) | |
#-------------------------------------------------------------------------------# | |
#------------------------ DEAL WITH SPECIAL ENVIRONMENTS -----------------------# | |
#-------------------------------------------------------------------------------# | |
sys_env = subprocess.check_output(['conda', 'env', 'list'], universal_newlines=True) | |
env_list = [line.split()[0] for line in sys_env.split("\n") if len(line.split()) > 0] | |
# default TOBIAS environment | |
if "TOBIAS_ENV" not in env_list: | |
print("Creating TOBIAS environment for the first time") | |
subprocess.call(["conda", "env", "create", "--file", "environments/tobias.yaml"]) | |
# python 2 related envs | |
if "MACS_ENV" not in env_list: | |
print("Creating macs environment for the first time") | |
subprocess.call(["conda", "env", "create", "--file", "environments/macs.yaml"]) | |
#-------------------------------------------------------------------------------# | |
#---------------------------------- RUN :-) ------------------------------------# | |
#-------------------------------------------------------------------------------# | |
include: "snakefiles/preprocessing.snake" | |
include: "snakefiles/footprinting.snake" | |
include: "snakefiles/visualization.snake" | |
include: "snakefiles/wilson.snake" | |
rule all: | |
input: | |
output_files | |
message: "Rule all" |