Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added globbing of input motifs, meaning that multiple motif file/dirs…
… are possible
  • Loading branch information
msbentsen committed May 6, 2019
1 parent 3bcbd36 commit a370185
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 16 deletions.
35 changes: 26 additions & 9 deletions Snakefile
Expand Up @@ -16,7 +16,6 @@ CONFIGFILE = str(workflow.overwrite_configfile)

#Snake modules used to setup run
include: "snakefiles/helper.snake"

#shell.prefix("")

#-------------------------------------------------------------------------------#
Expand Down Expand Up @@ -54,7 +53,6 @@ else:
print("ERROR: Could not find any conditions (\"data:\{condition\}\") in configfile {0}".format(CONFIGFILE))
sys.exit()


#-------------------------------------------------------------------------------#
#------------------------- WHICH FILES/INFO WERE INPUT? ------------------------#
#-------------------------------------------------------------------------------#
Expand All @@ -76,11 +74,10 @@ BLACKLIST = config['run_info']['blacklist']
GTF = config['run_info']['gtf']
OUTPUTDIR = config['run_info']["output"]
BLACKLIST = config['run_info']['blacklist']
MOTIFDIR = config['run_info']['motifs']
#MOTIFS = config['run_info']['motifs']

input_files.extend([FASTA, BLACKLIST, GTF])


#---------- Test that input files exist -----------#
for file in input_files:
if file != None:
Expand All @@ -89,12 +86,33 @@ for file in input_files:
exit("ERROR: The following file given in config does not exist: {0}".format(full_path))


#--------------------------------- MOTIFS --------------------------------------#
#--------------------------------- MOTIFS ------------------------------#

#If not list, make it list and glob elements
if not isinstance(config['run_info']['motifs'], list):
config['run_info']['motifs'] = [config['run_info']['motifs']]
motif_input = sum([glob.glob(element) for element in config['run_info']['motifs']], [])

#Test if input is directory or file
motif_files = []
for path in motif_input:

#If input is dir; fetch all input files
if os.path.isdir(path):
files = os.listdir(path)
motif_files.extend([os.path.join(path, f) for f in files])

#If input is file, add to list of files
elif os.path.isfile(path):
motif_files.append(path)

motif_files = list(set(motif_files)) #remove duplicates
config['run_info']['motifs'] = sorted(motif_files)

#Identify IDS of motifs
files = os.listdir(MOTIFDIR)
MOTIF_FILES = {}
for file in files:
full_file = os.path.join(MOTIFDIR, file)
for file in motif_files:
full_file = file
with open(full_file) as f:
for line in f:
if line.startswith("MOTIF"):
Expand All @@ -109,7 +127,6 @@ for file in files:

TF_IDS = list(MOTIF_FILES.keys())


#-------------------------------------------------------------------------------#
#------------------------ WHICH FILES SHOULD BE CREATED? -----------------------#
#-------------------------------------------------------------------------------#
Expand Down
12 changes: 6 additions & 6 deletions example_config.yaml
Expand Up @@ -7,12 +7,12 @@ data:
Tcell: [data/Tcell_chr4_*.bam] #list of .bam-files

run_info:
organism: human #mouse/human
fasta: data/genome_chr4.fa.gz #.fasta-file containing organism genome
blacklist: data/blacklist_chr4.bed #.bed-file containing blacklisted regions
gtf: data/genes_chr4.gtf #.gtf-file for annotation of peaks
motifs: data/individual_motifs #directory containing motifs (single files in MEME/JASPAR/PFM format)
output: test_output #output directory
organism: human #mouse/human
fasta: data/genome_chr4.fa.gz #.fasta-file containing organism genome
blacklist: data/blacklist_chr4.bed #.bed-file containing blacklisted regions
gtf: data/genes_chr4.gtf #.gtf-file for annotation of peaks
motifs: data/individual_motifs/* #motifs (directory with files or individual files in MEME/JASPAR/PFM format)
output: test_output #output directory


#-------------------------------------------------------------------------#
Expand Down
2 changes: 1 addition & 1 deletion snakefiles/footprinting.snake
Expand Up @@ -5,7 +5,7 @@
#Format motifs to pfm format
rule format_motifs:
input:
MOTIF_FILES.values() #MOTIF_FILES is a dict
MOTIF_FILES.values() #MOTIF_FILES is a dict with paths to motif files as values
output:
os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt")
priority: 2
Expand Down

0 comments on commit a370185

Please sign in to comment.