diff --git a/Snakefile b/Snakefile index bab51f4..513bca1 100644 --- a/Snakefile +++ b/Snakefile @@ -16,7 +16,6 @@ CONFIGFILE = str(workflow.overwrite_configfile) #Snake modules used to setup run include: "snakefiles/helper.snake" - #shell.prefix("") #-------------------------------------------------------------------------------# @@ -54,7 +53,6 @@ else: print("ERROR: Could not find any conditions (\"data:\{condition\}\") in configfile {0}".format(CONFIGFILE)) sys.exit() - #-------------------------------------------------------------------------------# #------------------------- WHICH FILES/INFO WERE INPUT? ------------------------# #-------------------------------------------------------------------------------# @@ -76,11 +74,10 @@ BLACKLIST = config['run_info']['blacklist'] GTF = config['run_info']['gtf'] OUTPUTDIR = config['run_info']["output"] BLACKLIST = config['run_info']['blacklist'] -MOTIFDIR = config['run_info']['motifs'] +#MOTIFS = config['run_info']['motifs'] input_files.extend([FASTA, BLACKLIST, GTF]) - #---------- Test that input files exist -----------# for file in input_files: if file != None: @@ -89,12 +86,33 @@ for file in input_files: exit("ERROR: The following file given in config does not exist: {0}".format(full_path)) -#--------------------------------- MOTIFS --------------------------------------# +#--------------------------------- MOTIFS ------------------------------# + +#If not list, make it list and glob elements +if not isinstance(config['run_info']['motifs'], list): + config['run_info']['motifs'] = [config['run_info']['motifs']] +motif_input = sum([glob.glob(element) for element in config['run_info']['motifs']], []) + +#Test if input is directory or file +motif_files = [] +for path in motif_input: + + #If input is dir; fetch all input files + if os.path.isdir(path): + files = os.listdir(path) + motif_files.extend([os.path.join(path, f) for f in files]) + + #If input is file, add to list of files + elif os.path.isfile(path): + motif_files.append(path) + +motif_files = list(set(motif_files)) #remove duplicates +config['run_info']['motifs'] = sorted(motif_files) + #Identify IDS of motifs -files = os.listdir(MOTIFDIR) MOTIF_FILES = {} -for file in files: - full_file = os.path.join(MOTIFDIR, file) +for file in motif_files: + full_file = file with open(full_file) as f: for line in f: if line.startswith("MOTIF"): @@ -109,7 +127,6 @@ for file in files: TF_IDS = list(MOTIF_FILES.keys()) - #-------------------------------------------------------------------------------# #------------------------ WHICH FILES SHOULD BE CREATED? -----------------------# #-------------------------------------------------------------------------------# diff --git a/example_config.yaml b/example_config.yaml index 0bdf8ab..6c3ae14 100644 --- a/example_config.yaml +++ b/example_config.yaml @@ -7,12 +7,12 @@ data: Tcell: [data/Tcell_chr4_*.bam] #list of .bam-files run_info: - organism: human #mouse/human - fasta: data/genome_chr4.fa.gz #.fasta-file containing organism genome - blacklist: data/blacklist_chr4.bed #.bed-file containing blacklisted regions - gtf: data/genes_chr4.gtf #.gtf-file for annotation of peaks - motifs: data/individual_motifs #directory containing motifs (single files in MEME/JASPAR/PFM format) - output: test_output #output directory + organism: human #mouse/human + fasta: data/genome_chr4.fa.gz #.fasta-file containing organism genome + blacklist: data/blacklist_chr4.bed #.bed-file containing blacklisted regions + gtf: data/genes_chr4.gtf #.gtf-file for annotation of peaks + motifs: data/individual_motifs/* #motifs (directory with files or individual files in MEME/JASPAR/PFM format) + output: test_output #output directory #-------------------------------------------------------------------------# diff --git a/snakefiles/footprinting.snake b/snakefiles/footprinting.snake index 86ca69a..9eb409f 100644 --- a/snakefiles/footprinting.snake +++ b/snakefiles/footprinting.snake @@ -5,7 +5,7 @@ #Format motifs to pfm format rule format_motifs: input: - MOTIF_FILES.values() #MOTIF_FILES is a dict + MOTIF_FILES.values() #MOTIF_FILES is a dict with paths to motif files as values output: os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt") priority: 2