Skip to content
Permalink
5ccee944f2
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
149 lines (134 sloc) 6.56 KB
# vim: syntax=python tabstop=4 expandtab
# coding: utf-8
#--------------------------------------------------------------------------------------------------------#
#Copy motif files to folder
rule copy_motifs:
input:
config["run_info"]["motifs"] #list of files
output:
[os.path.join(OUTPUTDIR, "motifs", "individual", os.path.basename(f)) for f in config["run_info"]["motifs"]]
params:
outdir = os.path.join(OUTPUTDIR, "motifs", "individual")
run:
for f in input:
shell("cp {0} {1}".format(f, params.outdir))
#Format motifs to one file
rule format_motifs:
input:
rules.copy_motifs.output #this is a directory
output:
joined = os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt")
priority:
2
params:
motifdir = os.path.join(OUTPUTDIR, "motifs", "individual")
log:
os.path.join(OUTPUTDIR, "logs", "format_motifs.log")
shell:
"TOBIAS FormatMotifs --input {params.motifdir} --format pfm --task join --output {output} &>> {log}"
#--------------------------------------------------------------------------------------------------------#
#Correct reads for Tn5 sequence bias
rule atacorrect:
input:
bam = rules.conditionbam.output.bam, #os.path.join(OUTPUTDIR, "mapping", "{condition}.bam"),
peaks = rules.sort_peak_names.output, #os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed"),
genome = FASTA
output:
uncorrected = os.path.join(OUTPUTDIR, "bias_correction", "{condition}_uncorrected.bw"),
bias = os.path.join(OUTPUTDIR, "bias_correction", "{condition}_bias.bw"),
expected = os.path.join(OUTPUTDIR, "bias_correction", "{condition}_expected.bw"),
corrected = os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"),
params:
"--blacklist " + BLACKLIST if BLACKLIST != "" else "",
"--outdir " + os.path.join(OUTPUTDIR, "bias_correction"),
"--prefix " + "{condition}",
config.get("atacorrect", "")
priority:
2
threads:
99 #unless there are more than 99 cores, this rule will run on max threads
log:
os.path.join(OUTPUTDIR, "logs", "{condition}_atacorrect.log")
message:
"Running ATACorrect for condition {wildcards.condition} ({input.bam})"
shell:
"TOBIAS ATACorrect -b {input.bam} -g {input.genome} -p {input.peaks} --cores {threads} {params} &> {log}"
#--------------------------------------------------------------------------------------------------------#
#Calculate footprint scores per condition
rule footprinting:
input:
signal = rules.atacorrect.output.corrected, #os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"),
regions = rules.sort_peak_names.output #os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed")
output:
footprints = os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"),
params:
config.get("footprinting", "")
priority:
2
threads:
99
log:
os.path.join(OUTPUTDIR, "logs", "{condition}_footprinting.log")
message:
"Running footprinting for condition {wildcards.condition} ({input.signal})"
shell:
"TOBIAS FootprintScores --signal {input.signal} --regions {input.regions} --output {output.footprints} --cores {threads} {params} &> {log}"
#--------------------------------------------------------------------------------------------------------#
#Estimate bound sites from scored file
checkpoint bindetect:
input:
motifs = rules.format_motifs.output, #os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt"),
footprints = expand(rules.footprinting.output.footprints, condition=CONDITION_IDS), #os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"), condition=CONDITION_IDS),
genome = FASTA,
peaks = rules.uropa.output.peaks, #os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated.bed"),
peak_header = rules.uropa.output.header #os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated_header.txt")
output:
directory(os.path.join(OUTPUTDIR, "TFBS"))
#os.path.join(OUTPUTDIR, "overview", "bindetect_results.txt")
#bound_files = dynamic(expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "beds", "{{TF}}_{condition}_bound.bed"), condition=CONDITION_IDS)), #, TF={TF}"),
#unbound_files = expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_{condition}_unbound.bed"), condition=CONDITION_IDS, TF=TF_IDS),
#all_files = expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_all.bed"), TF=TF_IDS),
#overview = dynamic(expand(os.path.join(OUTPUTDIR, "TFBS", "{{TF}}", "{{TF}}_overview.txt"))),
#figures = os.path.join(OUTPUTDIR, "TFBS", "bindetect_figures.pdf"),
#local = [os.path.join(OUTPUTDIR, "TFBS", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]],
threads: 99
priority: 2
log:
os.path.join(OUTPUTDIR, "logs", "bindetect.log")
params:
config.get("bindetect", ""),
"--cond_names " + " ".join(CONDITION_IDS),
message:
"Running BINDetect"
shell:
"TOBIAS BINDetect --motifs {input.motifs} --signals {input.footprints} --genome {input.genome} "
"--peaks {input.peaks} --peak_header {input.peak_header} --cores {threads} --outdir {output} {params} &> {log}; "
"mkdir -p " + os.path.join(OUTPUTDIR, "overview") + ";"
"cp " + os.path.join(OUTPUTDIR, "TFBS", "*.txt") + " " + os.path.join(OUTPUTDIR, "overview") + ";" #move files to overview
"cp " + os.path.join(OUTPUTDIR, "TFBS", "*.xlsx") + " " + os.path.join(OUTPUTDIR, "overview") + ";"
"cp " + os.path.join(OUTPUTDIR, "TFBS", "*.pdf") + " " + os.path.join(OUTPUTDIR, "overview") + ";"
#--------------------------------------------------------------------------------------------------------#
def get_TF_ids(wildcards):
bindetect_output = checkpoints.bindetect.get(**wildcards).output[0]
TF_IDS = glob_wildcards(os.path.join(bindetect_output, "{TF}", "beds")).TF #wildcard for each TF from dir name
return(TF_IDS)
#--------------------------------------------------------------------------------------------------------#
#Join bound estimates per condition
rule join_bound:
input:
lambda wildcards: expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_{{condition}}_bound.bed"), TF=get_TF_ids(wildcards)),
output:
unsorted = temp(os.path.join(OUTPUTDIR, "overview", "all_{condition}_bound.tmp")),
final = os.path.join(OUTPUTDIR, "overview", "all_{condition}_bound.bed")
priority:
2
threads:
99 #Make sure that only one join_bound runs at a time; due to memory issues
run:
n = 100 #chunks of 100 files
shell("> " + output[0])
file_chunks = [input[i:i+n] for i in range(0,len(input),n)]
for chunk in file_chunks:
shell("cat {0} | bedtools sort >> {1}".format(" ".join(chunk), output.unsorted))
shell("sort -k 1,1 -k2,2 -n {output.unsorted} > {output.final}")
shell("igvtools index {output.final}")