Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Version 0.2: Changed estimation of background in BINDetect, introduce…
Browse files Browse the repository at this point in the history
…d tobias-wide logger with verbosity, smaller fixes and reorganization in folders
  • Loading branch information
msbentsen committed Feb 12, 2019
1 parent 26ff437 commit ba7b318
Show file tree
Hide file tree
Showing 31 changed files with 1,905 additions and 1,533 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ def readme():
Extension("tobias.utils.signals", ["tobias/utils/signals.pyx"], include_dirs=[np.get_include()])]

setup(name='tobias',
version='0.1',
version='0.2',
description='Transcription factor Occupancy prediction By Investigation of ATAC-seq Signal',
long_description=readme(),
url='https://github.molgen.mpg.de/loosolab/TOBIAS',
author='Mette Bentsen',
author_email='mette.bentsen@mpi-bn.mpg.de',
license='MIT',
packages=['tobias', 'tobias.footprinting', 'tobias.utils', 'tobias.plotting', 'tobias.motifs'],
packages=['tobias', 'tobias.footprinting', 'tobias.plotting', 'tobias.motifs', 'tobias.misc', 'tobias.utils'],
entry_points = {
'console_scripts': ['TOBIAS=tobias.TOBIAS:main']
},
Expand Down
4 changes: 2 additions & 2 deletions snakemake_pipeline/TOBIAS.snake
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ if len(CONDITION_IDS) > 1:

output_files.append(expand(os.path.join(OUTPUTDIR, "footprinting", "{condition}_footprints.bw"), condition=CONDITION_IDS))

output_files.append(os.path.join(OUTPUTDIR, "TFBS", "bindetect_results.txt"))
output_files.append(os.path.join(OUTPUTDIR, "overview", "bindetect_results.txt"))
#output_files.append(os.path.join(OUTPUTDIR, "TFBS", "bindetect_results.txt"))
#output_files.append(os.path.join(OUTPUTDIR, "overview", "bindetect_results.txt"))

#Visualization
output_files.extend(expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{plotname}.pdf"), TF=TF_IDS, plotname=PLOTNAMES))
Expand Down
2 changes: 1 addition & 1 deletion snakemake_pipeline/TOBIAS_example.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ uropa: "--feature gene --feature_anchor start --distance [10000,1000] --filter_a

atacorrect: ""
footprinting: ""
bindetect: ""
bindetect: "--prefix immune"
plotting: ""
2 changes: 1 addition & 1 deletion snakemake_pipeline/environments/tobias.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ channels:
- conda-forge

dependencies:
- python=3
- pysam
- pybigwig
- moods
Expand All @@ -22,6 +23,5 @@ dependencies:
- openjdk
- xlsxwriter
- cloudpickle=0.5.6
- seaborn
- pip:
- adjustText
14 changes: 9 additions & 5 deletions snakemake_pipeline/snakefiles/footprinting.snake
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ rule bindetect:
output:
bedfiles = expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "beds", "{TF}_{suffix}.bed"), TF=TF_IDS, suffix=["all"] + expand("{condition}_{state}", condition=CONDITION_IDS, state=["bound", "unbound"])),
overview = expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "{TF}_overview.txt"), TF=TF_IDS),
figures = os.path.join(OUTPUTDIR, "TFBS", "bindetect_figures.pdf"),
local = [os.path.join(OUTPUTDIR, "TFBS", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]],

#figures = os.path.join(OUTPUTDIR, "TFBS", "bindetect_figures.pdf"),
#local = [os.path.join(OUTPUTDIR, "TFBS", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]],
threads: 99
priority: 2
log:
Expand All @@ -88,12 +87,17 @@ rule bindetect:
"Running BINDetect"
shell:
"TOBIAS BINDetect --motifs {input.motifs} --signals {input.footprints} --genome {input.genome} --peaks {input.peaks} --peak_header {input.peak_header} --cores {threads} {params} &> {log}; "

"mkdir -p " + os.path.join(OUTPUTDIR, "overview") + ";"
"mv " + os.path.join(OUTPUTDIR, "TFBS", "*.txt") + " " + os.path.join(OUTPUTDIR, "overview") + ";" #move files to overview
"mv " + os.path.join(OUTPUTDIR, "TFBS", "*.xlsx") + " " + os.path.join(OUTPUTDIR, "overview") + ";"
"mv " + os.path.join(OUTPUTDIR, "TFBS", "*.pdf") + " " + os.path.join(OUTPUTDIR, "overview") + ";"
"""
rule copy_to_overview:
input:
[os.path.join(OUTPUTDIR, "TFBS", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]],
output:
[os.path.join(OUTPUTDIR, "overview", fil) for fil in ["bindetect_results.txt", "bindetect_results.xlsx"]]
priority: 2
shell:
"cp " + os.path.join(OUTPUTDIR, "TFBS", "bindetect_*") + " " + os.path.join(OUTPUTDIR, "overview")
"mv " + os.path.join(OUTPUTDIR, "TFBS", "*.*") + " " + os.path.join(OUTPUTDIR, "overview") #move files to overview
"""
14 changes: 9 additions & 5 deletions snakemake_pipeline/snakefiles/visualization.snake
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ rule plot_aggregate_within:
signals = [os.path.join(OUTPUTDIR, "bias_correction", "{condition}_" + state + ".bw") for state in ["uncorrected", "expected", "corrected"]],
output:
os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{condition}_aggregate.pdf")
log:
os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_{condition}_aggregate.log")
message: "Plotting split between bound/unbound around TFBS for TF \"{wildcards.TF}\" in condition \"{wildcards.condition}\""
params:
"--title 'Bias correction and split for {TF} in condition {condition}'",
"--share_y rows",
"--share_y sites",
"--plot_boundaries",
shell:
"TOBIAS PlotAggregate --TFBS {input.TFBS} --signals {input.signals} --output {output} {params} >/dev/null "
"TOBIAS PlotAggregate --TFBS {input.TFBS} --signals {input.signals} --output {output} {params} > {log} "


#Aggregates across conditions for all and for bound subsets
Expand All @@ -64,16 +66,18 @@ rule plot_aggregate_across:
signals = expand(os.path.join(OUTPUTDIR, "bias_correction", "{condition}_corrected.bw"), condition=CONDITION_IDS),
output:
all_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_all.pdf"),
bound_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_bound.pdf")
bound_compare = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_aggregate_comparison_bound.pdf"),
all_log = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_aggregate_comparison_all.log"),
bound_log = os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "logs", "{TF}_aggregate_comparison_bound.log")
priority: 2
params:
"--title {0}".format("{TF}"),
"--plot_boundaries",
"--share_y both",
message: "Plotting comparison of cutsite signals for \"{wildcards.TF}\" between conditions"
shell:
"TOBIAS PlotAggregate --TFBS {input.TFBS_all} --signals {input.signals} --output {output.all_compare} {params} >/dev/null; "
"TOBIAS PlotAggregate --TFBS {input.TFBS_bound} --signals {input.signals} --output {output.bound_compare} {params} >/dev/null;"
"TOBIAS PlotAggregate --TFBS {input.TFBS_all} --signals {input.signals} --output {output.all_compare} {params} > {output.all_log}; "
"TOBIAS PlotAggregate --TFBS {input.TFBS_bound} --signals {input.signals} --output {output.bound_compare} {params} > {output.bound_log};"


#----------------------------------------------------------------#
Expand Down
36 changes: 18 additions & 18 deletions tobias/TOBIAS.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,19 @@

from tobias.plotting.plot_aggregate import *
from tobias.plotting.plot_heatmap import *
from tobias.plotting.plot_bindetect import *
from tobias.plotting.plot_changes import *

from tobias.motifs.tfbscan import *
from tobias.motifs.format_motifs import *
from tobias.motifs.cluster_tfbs import *
from tobias.motifs.score_bed import *

from tobias.utils.subsample_bam import *
from tobias.utils.merge_pdfs import *
from tobias.utils.score_bed import *
from tobias.misc.subsample_bam import *
from tobias.misc.merge_pdfs import *
from tobias.misc.maxpos import *

TOBIAS_VERSION = "0.1"

TOBIAS_VERSION = "0.2" #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Change here :-)

def main():
parser = argparse.ArgumentParser("TOBIAS", usage=SUPPRESS)
Expand Down Expand Up @@ -98,14 +100,14 @@ def main():
formatmotifs_parser.set_defaults(func=run_formatmotifs)
all_tool_parsers[name.lower()] = formatmotifs_parser

"""
name, hlp = "ClusterTF", "Cluster TFs based on overlap of sites"

name, hlp = "ClusterTFBS", "Cluster TFs based on overlap of sites"
parser.description += " {0}\t\t{1}\n".format(name, hlp)
clustering_parser = subparsers.add_parser(name, usage=SUPPRESS)
clustering_parser = add_clustering_arguments(clustering_parser)
clustering_parser.set_defaults(func=run_clustering)
all_tool_parsers[name] = clustering_parser
"""
all_tool_parsers[name.lower()] = clustering_parser


name, hlp = "ScoreBed", "Score .bed-file with signal from .bigwig-file(s)"
parser.description += " {0}\t\t{1}\n".format(name, hlp)
Expand Down Expand Up @@ -136,15 +138,6 @@ def main():
heatmap_parser.set_defaults(func=run_heatmap)
all_tool_parsers[name.lower()] = heatmap_parser


name, hlp = "PlotBINDetect", "Plotting function from BINDetect (to re-plot output)"
parser.description += " {0}\t{1}\n".format(name, hlp)
diffplot_parser = subparsers.add_parser(name, usage=SUPPRESS)
diffplot_parser = add_diffplot_arguments(diffplot_parser)
diffplot_parser.set_defaults(func=run_diffplot)
all_tool_parsers[name.lower()] = diffplot_parser


name, hlp = "PlotChanges", "Plot changes in TF binding across multiple conditions (from BINDetect output)"
parser.description += " {0}\t\t{1}\n".format(name, hlp)
changeplot_parser = subparsers.add_parser(name, usage=SUPPRESS)
Expand All @@ -167,6 +160,13 @@ def main():
mergepdf_parser.set_defaults(func=run_mergepdf)
all_tool_parsers[name.lower()] = mergepdf_parser

name, hlp = "MaxPos", "Get .bed-positions of highest bigwig signal within .bed-regions"
parser.description += " {0}\t\t{1}\n".format(name, hlp)
maxpos_parser = subparsers.add_parser(name, usage=SUPPRESS)
maxpos_parser = add_maxpos_arguments(maxpos_parser)
maxpos_parser.set_defaults(func=run_maxpos)
all_tool_parsers[name.lower()] = maxpos_parser

name, hlp = "SubsampleBam", "Subsample a .bam-file using samtools"
parser.description += " {0}\t\t{1}\n".format(name, hlp)
subsample_parser = subparsers.add_parser(name, usage=SUPPRESS)
Expand Down
Loading

0 comments on commit ba7b318

Please sign in to comment.