diff --git a/CHANGES b/CHANGES index 806cd70..86297b9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,10 @@ -## 0.3.2 (2019-04-26) +## 0.4.0 (2019-04-26) - Added --add_region_columns to TFBScan +- Renamed FootprintScores to ScoreBigwig +- Added normalization of input score distributions in BINDetect +- Added general warning for --cores > max number of processors when running multiprocessing +- Updated PlotChanges to also plot mean_score and percent_bound as well as overview per motif cluster +- Added MOODS-python as dependency through pip ## 0.3.1 (2019-04-10) - Split motif ids between name/motif_id/output_prefix \ No newline at end of file diff --git a/setup.py b/setup.py index 210fb09..6d2574f 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import os import sys import re -from setuptools import setup, Extension, dist +from setuptools import setup, Extension, dist, find_packages #Test if numpy is installed try: @@ -56,7 +56,7 @@ def readme(): author='Mette Bentsen', author_email='mette.bentsen@mpi-bn.mpg.de', license='MIT', - packages=['tobias', 'tobias.footprinting', 'tobias.plotting', 'tobias.motifs', 'tobias.misc', 'tobias.utils'], + packages=find_packages(), #"tobias"), #['tobias', 'tobias.footprinting', 'tobias.plotting', 'tobias.motifs', 'tobias.misc', 'tobias.utils'], entry_points={ 'console_scripts': ['TOBIAS=tobias.TOBIAS:main'] }, @@ -77,7 +77,6 @@ def readme(): 'pyBigWig', 'MOODS-python', ], - scripts = ["tobias/utils/filter_important_factors.py"], classifiers=[ 'License :: OSI Approved :: MIT License', 'Intended Audience :: Science/Research', diff --git a/tobias/TOBIAS.py b/tobias/TOBIAS.py index 35e1dd9..b2fa07b 100644 --- a/tobias/TOBIAS.py +++ b/tobias/TOBIAS.py @@ -28,9 +28,9 @@ from argparse import SUPPRESS import textwrap -from tobias.footprinting.ATACorrect import * -from tobias.footprinting.footprint_scores import * -from tobias.footprinting.BINDetect import * +from tobias.footprinting.atacorrect import * +from tobias.footprinting.scorebigwig import * +from tobias.footprinting.bindetect import * from tobias.plotting.plot_aggregate import * from tobias.plotting.plot_heatmap import * @@ -44,7 +44,7 @@ from tobias.misc.subsample_bam import * from tobias.misc.merge_pdfs import * from tobias.misc.maxpos import * -from tobias.misc.create_network import * +#from tobias.misc.create_network import * from tobias.misc.log2table import * from tobias import __version__ as TOBIAS_VERSION @@ -56,7 +56,7 @@ def main(): all_parser_info = {"Tools for footprinting analysis": { "ATACorrect":{"help":"Correct reads with regards to Tn5 sequence bias", "add_arguments": add_atacorrect_arguments, "function":run_atacorrect}, - "FootprintScores":{"help":"Calculate footprint scores from cutsites", "add_arguments": add_footprint_arguments, "function":run_footprinting, "space":"\t"}, + "ScoreBigwig":{"help":"Calculate scores such as footprints from cutsites", "add_arguments": add_scorebigwig_arguments, "function":run_scorebigwig, "replaces":"FootprintScores"}, "BINDetect":{"help":"Detect TF binding from footprints and motifs", "add_arguments": add_bindetect_arguments, "function":run_bindetect}, }, @@ -80,7 +80,7 @@ def main(): "MergePDF": {"help": "Merge pdf files to one", "add_arguments":add_mergepdf_arguments, "function":run_mergepdf}, "MaxPos": {"help": "Get .bed-positions of highest bigwig signal within .bed-regions", "add_arguments": add_maxpos_arguments, "function": run_maxpos}, "SubsampleBam": {"help": "Subsample a .bam-file using samtools", "add_arguments": add_subsample_arguments, "function": run_subsampling}, - "CreateNetwork": {"help": "Create TF-gene network from annotated TFBS", "add_arguments": add_network_arguments, "function": run_network, "space":"\t"}, + #"CreateNetwork": {"help": "Create TF-gene network from annotated TFBS", "add_arguments": add_network_arguments, "function": run_network, "space":"\t"}, "Log2Table": {"help": "Convert logs from PlotAggregate to tab-delimitered tables of footprint stats", "add_arguments": add_log2table_arguments, "function": run_log2table} } } @@ -116,16 +116,21 @@ def main(): subparser = info[tool]["add_arguments"](subparser) subparser.set_defaults(func=info[tool]["function"]) all_tool_parsers[tool.lower()] = subparser - + + #Add version to subparser + subparser.add_argument("--version", action='version', version=TOBIAS_VERSION) + + #Add parser for old tool names + if "replaces" in info[tool]: + all_tool_parsers[info[tool]["replaces"].lower()] = subparser + parser.description += "\n" parser.description += "For help on each tool, please run: TOBIAS --help\n" - - #Add version number to upper TOBIAS parser and all subparsers + + #Add version number to upper TOBIAS parser parser.description += "For version number: TOBIAS --version" parser.add_argument("--version", action='version', version=TOBIAS_VERSION) - for name in all_tool_parsers: - all_tool_parsers[name].add_argument("--version", action='version', version=TOBIAS_VERSION) #If no args, print help for top-level TOBIAS if len(sys.argv[1:]) == 0: diff --git a/tobias/__init__.py b/tobias/__init__.py index 39a2139..6a9beea 100644 --- a/tobias/__init__.py +++ b/tobias/__init__.py @@ -1 +1 @@ -__version__ = "0.3.2" +__version__ = "0.4.0" diff --git a/tobias/footprinting/ATACorrect.py b/tobias/footprinting/ATACorrect.py index d01b70e..89ae7da 100644 --- a/tobias/footprinting/ATACorrect.py +++ b/tobias/footprinting/ATACorrect.py @@ -37,7 +37,7 @@ import pysam #Internal functions and classes -from tobias.footprinting.ATACorrect_functions import * +from tobias.footprinting.atacorrect_functions import * from tobias.utils.utilities import * from tobias.utils.regions import * from tobias.utils.sequences import * diff --git a/tobias/footprinting/BINDetect.py b/tobias/footprinting/BINDetect.py index ee6acfc..cf93e11 100644 --- a/tobias/footprinting/BINDetect.py +++ b/tobias/footprinting/BINDetect.py @@ -36,7 +36,7 @@ import pysam #Internal functions and classes -from tobias.footprinting.BINDetect_functions import * +from tobias.footprinting.bindetect_functions import * from tobias.utils.utilities import * from tobias.utils.regions import * from tobias.utils.sequences import * @@ -177,10 +177,9 @@ def run_bindetect(args): #output and order titles = [] + titles.append("Raw score distributions") + titles.append("Normalized score distributions") if args.debug: - - titles.append("Raw score distributions") - titles.append("Normalized score distributions") for (cond1, cond2) in comparisons: titles.append("Background log2FCs ({0} / {1})".format(cond1, cond2)) @@ -361,10 +360,9 @@ def run_bindetect(args): logger.comment("") logger.info("Estimating score distribution per condition") - if args.debug == True: - fig = plot_score_distribution([background["signal"][bigwig] for bigwig in args.cond_names], labels=args.cond_names, title="Raw scores per condition") - figure_pdf.savefig(fig, bbox_inches='tight') - plt.close() + fig = plot_score_distribution([background["signal"][bigwig] for bigwig in args.cond_names], labels=args.cond_names, title="Raw scores per condition") + figure_pdf.savefig(fig, bbox_inches='tight') + plt.close() logger.info("Normalizing scores") list_of_vals = [background["signal"][bigwig] for bigwig in args.cond_names] @@ -374,10 +372,9 @@ def run_bindetect(args): for bigwig in args.cond_names: background["signal"][bigwig] = args.norm_objects[bigwig].normalize(background["signal"][bigwig]) - if args.debug == True: - fig = plot_score_distribution([background["signal"][bigwig] for bigwig in args.cond_names], labels=args.cond_names, title="Normalized scores per condition") - figure_pdf.savefig(fig, bbox_inches='tight') - plt.close() + fig = plot_score_distribution([background["signal"][bigwig] for bigwig in args.cond_names], labels=args.cond_names, title="Normalized scores per condition") + figure_pdf.savefig(fig, bbox_inches='tight') + plt.close() ########################################################### logger.info("Estimating bound/unbound threshold") @@ -413,7 +410,7 @@ def run_bindetect(args): mode = scipy.optimize.fmin(lambda x: -scipy.stats.lognorm.pdf(x, *log_params), 0, disp=False)[0] logger.debug("- Mode estimated at: {0}".format(mode)) pseudo = mode / 2.0 #pseudo is half the mode - args.pseudo = pseudo #. append(pseudo) + args.pseudo = pseudo logger.debug("Pseudocount estimated at: {0}".format(round(args.pseudo, 5))) # Estimate theoretical normal for threshold diff --git a/tobias/footprinting/BINDetect_functions.py b/tobias/footprinting/BINDetect_functions.py index 8f265f1..a9a8c32 100644 --- a/tobias/footprinting/BINDetect_functions.py +++ b/tobias/footprinting/BINDetect_functions.py @@ -617,8 +617,8 @@ def plot_bindetect(motifs, cluster_obj, conditions, args): #print(txts) #Plot custom legend for colors - legend_elements = [Line2D([0],[0], marker='o', color='w', markerfacecolor="red", label="More bound in {0}".format(conditions[0])), - Line2D([0],[0], marker='o', color='w', markerfacecolor="blue", label="More bound in {0}".format(conditions[1]))] + legend_elements = [Line2D([0],[0], marker='o', color='w', markerfacecolor="red", label="Higher scores in {0}".format(conditions[0])), + Line2D([0],[0], marker='o', color='w', markerfacecolor="blue", label="Higher scores in {0}".format(conditions[1]))] ax1.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc='upper left') return(fig) diff --git a/tobias/footprinting/footprint_scores.py b/tobias/footprinting/scorebigwig.py similarity index 86% rename from tobias/footprinting/footprint_scores.py rename to tobias/footprinting/scorebigwig.py index 8f521f0..356970a 100644 --- a/tobias/footprinting/footprint_scores.py +++ b/tobias/footprinting/scorebigwig.py @@ -31,12 +31,12 @@ from tobias.utils.logger import * #-----------------------------------------------------------------# -def add_footprint_arguments(parser): +def add_scorebigwig_arguments(parser): parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90) - description = "FootprintScores calculates footprint scores from ATAC-seq cutsites (.bigwig format) calculated using the ATACorrect tool.\n\n" - description += "Usage: FootprintScores --signal --output \n\n" - description += "Output:\n- " + description = "ScoreBigwig calculates scores (such as footprint-scores) from bigwig files (such as ATAC-seq cutsites calculated using the ATACorrect tool).\n\n" + description += "Usage: ScoreBigwig --signal --regions --output \n\n" + description += "Output:\n- " parser.description = format_help_description("FootprintScores", description) parser._action_groups.pop() #pop -h @@ -48,20 +48,22 @@ def add_footprint_arguments(parser): required.add_argument('-r', '--regions', metavar="", help="Genomic regions to run footprinting within") optargs = parser.add_argument_group('Optional arguments') - optargs.add_argument('--score', metavar="", choices=["footprint", "sum", "FOS"], help="Type of scoring to perform on cutsites (footprint/sum/FOS) (default: footprint)", default="footprint") + optargs.add_argument('--score', metavar="", choices=["footprint", "sum"], help="Type of scoring to perform on cutsites (footprint/sum) (default: footprint)", default="footprint") + #mean + #abs convert bigwig signal to absolute scores before calculating score optargs.add_argument('--extend', metavar="", type=int, help="Extend input regions with bp (default: 100)", default=100) optargs.add_argument('--smooth', metavar="", type=int, help="Smooth output signal by mean in windows (default: no smoothing)", default=1) - optargs.add_argument('--min_limit', metavar="", type=float, help="Limit input bigwig score range (default: no lower limit)") #default none - optargs.add_argument('--max_limit', metavar="", type=float, help="Limit input bigwig score range (default: no upper limit)") #default none + optargs.add_argument('--min-limit', metavar="", type=float, help="Limit input bigwig score range (default: no lower limit)") #default none + optargs.add_argument('--max-limit', metavar="", type=float, help="Limit input bigwig score range (default: no upper limit)") #default none footprintargs = parser.add_argument_group('Parameters for score == footprint') - optargs.add_argument('--fp_min', metavar="", type=int, help="Minimum footprint width (default: 20)", default=20) - optargs.add_argument('--fp_max', metavar="", type=int, help="Maximum footprint width (default: 50)", default=50) - optargs.add_argument('--flank_min', metavar="", type=int, help="Minimum range of flanking regions (default: 10)", default=10) - optargs.add_argument('--flank_max', metavar="", type=int, help="Maximum range of flanking regions (default: 30)", default=30) + optargs.add_argument('--fp-min', metavar="", type=int, help="Minimum footprint width (default: 20)", default=20) + optargs.add_argument('--fp-max', metavar="", type=int, help="Maximum footprint width (default: 50)", default=50) + optargs.add_argument('--flank-min', metavar="", type=int, help="Minimum range of flanking regions (default: 10)", default=10) + optargs.add_argument('--flank-max', metavar="", type=int, help="Maximum range of flanking regions (default: 30)", default=30) sumargs = parser.add_argument_group('Parameters for score == sum') - optargs.add_argument('--window', metavar="", type=int, help="The window for calculation of sum (default: 100)", default=100) + sumargs.add_argument('--window', metavar="", type=int, help="The window for calculation of sum (default: 100)", default=100) runargs = parser.add_argument_group('Run arguments') runargs.add_argument('--cores', metavar="", type=int, help="Number of cores to use for computation (default: 1)", default=1) @@ -91,6 +93,7 @@ def calculate_scores(regions, args): signal = region.get_signal(pybw_signal) signal = np.nan_to_num(signal).astype("float64") + # if args.min_limit != None: signal[signal < args.min_limit] = args.min_limit if args.max_limit != None: @@ -125,7 +128,7 @@ def calculate_scores(regions, args): #------------------------------------------------------------------------------------------# -def run_footprinting(args): +def run_scorebigwig(args): check_required(args, ["signal", "output", "regions"]) check_files([args.signal, args.regions], "r") @@ -135,7 +138,7 @@ def run_footprinting(args): # Create logger and write info to log #---------------------------------------------------------------------------------------# - logger = TobiasLogger("FootprintScores", args.verbosity) + logger = TobiasLogger("ScoreBigwig", args.verbosity) logger.begin() parser = add_footprint_arguments(argparse.ArgumentParser())