Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added filtering of peaks not in input fasta
  • Loading branch information
msbentsen committed Feb 5, 2020
1 parent b348eb7 commit 5ccee94
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 12 deletions.
21 changes: 14 additions & 7 deletions snakefiles/footprinting.snake
Expand Up @@ -21,7 +21,8 @@ rule format_motifs:
rules.copy_motifs.output #this is a directory
output:
joined = os.path.join(OUTPUTDIR, "motifs", "all_motifs.txt")
priority: 2
priority:
2
params:
motifdir = os.path.join(OUTPUTDIR, "motifs", "individual")
log:
Expand All @@ -46,8 +47,10 @@ rule atacorrect:
"--outdir " + os.path.join(OUTPUTDIR, "bias_correction"),
"--prefix " + "{condition}",
config.get("atacorrect", "")
priority: 2
threads: 99 #unless there are more than 99 cores, this rule will run on max threads
priority:
2
threads:
99 #unless there are more than 99 cores, this rule will run on max threads
log:
os.path.join(OUTPUTDIR, "logs", "{condition}_atacorrect.log")
message:
Expand All @@ -66,8 +69,10 @@ rule footprinting:
footprints = os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"),
params:
config.get("footprinting", "")
priority: 2
threads: 99
priority:
2
threads:
99
log:
os.path.join(OUTPUTDIR, "logs", "{condition}_footprinting.log")
message:
Expand Down Expand Up @@ -129,8 +134,10 @@ rule join_bound:
output:
unsorted = temp(os.path.join(OUTPUTDIR, "overview", "all_{condition}_bound.tmp")),
final = os.path.join(OUTPUTDIR, "overview", "all_{condition}_bound.bed")
priority: 2
threads: 99 #Make sure that only one join_bound runs at a time; due to memory issues
priority:
2
threads:
99 #Make sure that only one join_bound runs at a time; due to memory issues
run:
n = 100 #chunks of 100 files

Expand Down
23 changes: 18 additions & 5 deletions snakefiles/preprocessing.snake
Expand Up @@ -16,6 +16,16 @@ rule copy_flatfiles:
"cp {input.blacklist} {output.blacklist};"
"cp {input.gtf} {output.gtf}"

#Get chromosomes available in fasta
rule get_fasta_chroms:
input:
rules.copy_flatfiles.output.fasta
output:
txt = os.path.join(OUTPUTDIR, "flatfiles", "chromsizes.txt"),
bed = os.path.join(OUTPUTDIR, "flatfiles", "chromsizes.bed")
shell:
"faidx {input} -i chromsizes > {output.txt};"
"awk '{{ print $1\"\t\"0\"\t\"$2 }}' {output.txt} > {output.bed}"

# ------------------------------------------------------------------------------------------------------------------ #
# ---------------------------------------------- Config processing ------------------------------------------------- #
Expand Down Expand Up @@ -98,13 +108,16 @@ rule macs:
rule process_peaks:
input:
peaks = lambda wildcards: [os.path.join(OUTPUTDIR, "peak_calling", wildcards.condition, sample_id + "_raw.bed") for sample_id in id2bam[wildcards.condition].keys()],
blacklisted = BLACKLIST
blacklist = BLACKLIST,
whitelist = rules.get_fasta_chroms.output.bed
output:
peaks = os.path.join(OUTPUTDIR, "peak_calling", "{condition}_union.bed")
message: "Processing peaks from condition {wildcards.condition}"
shell:
"cat {input.peaks} | cut -f1-3 | sort -k1,1 -k2,2n | bedtools merge -d 5 | bedtools subtract -a - -b {input.blacklisted} -A | "
"awk '$1 !~ /[M]/' | "
"cat {input.peaks} | cut -f1-3 | sort -k1,1 -k2,2n | bedtools merge -d 5 | "
"bedtools subtract -a - -b {input.blacklist} -A | "
"bedtools intersect -a - -b {input.whitelist} -wa | "
"awk '$1 !~ /[M]/' | " #exclude chromosome M
"awk '{{print $s\"\\t{wildcards.condition}\"}}' > {output.peaks}" #add condition name to each peak

# Union peaks across all conditions
Expand All @@ -118,10 +131,10 @@ rule merge_condition_peaks:
shell:
"cat {input} | sort -k1,1 -k2,2n | bedtools merge -d 5 -c 4 -o distinct > {output}"

#Get correct sorting of peak_names for
#Get correct sorting of peak_names
rule sort_peak_names:
input:
rules.merge_condition_peaks.output #os.path.join(OUTPUTDIR, "peak_calling", "all_merged.tmp")
rules.merge_condition_peaks.output
output:
peaks = os.path.join(OUTPUTDIR, "peak_calling", "all_merged.bed")
run:
Expand Down

0 comments on commit 5ccee94

Please sign in to comment.