From ce5e13c878bac37f5c39d4cc94477c76c5e0a13d Mon Sep 17 00:00:00 2001 From: msbentsen Date: Thu, 2 May 2019 15:13:27 +0200 Subject: [PATCH] Added sum/mean/none to ScoreBigwig --- CHANGES | 3 +++ setup.py | 4 +++- tobias/__init__.py | 2 +- tobias/footprinting/scorebigwig.py | 31 ++++++++++++++++++++---------- tobias/utils/regions.py | 2 +- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/CHANGES b/CHANGES index 46bcb8c..abe0cd1 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +## 0.5.0 (2019-05-02) +- Added sum/mean/none scoring to ScoreBigwig as well as the option to get --absolute of input signal + ## 0.4.1 (2019-04-29) - Fixed weird "can't pickle SwigPyObject objects"-error in bindetect diff --git a/setup.py b/setup.py index 6d2574f..b6225c0 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ cmdclass = {'build_ext': build_ext} except ImportError: use_cython = False + else: use_cython = True @@ -56,7 +57,7 @@ def readme(): author='Mette Bentsen', author_email='mette.bentsen@mpi-bn.mpg.de', license='MIT', - packages=find_packages(), #"tobias"), #['tobias', 'tobias.footprinting', 'tobias.plotting', 'tobias.motifs', 'tobias.misc', 'tobias.utils'], + packages=find_packages(), entry_points={ 'console_scripts': ['TOBIAS=tobias.TOBIAS:main'] }, @@ -77,6 +78,7 @@ def readme(): 'pyBigWig', 'MOODS-python', ], + scripts=["tobias/utils/filter_important_factors.py"], classifiers=[ 'License :: OSI Approved :: MIT License', 'Intended Audience :: Science/Research', diff --git a/tobias/__init__.py b/tobias/__init__.py index 3d26edf..3d18726 100644 --- a/tobias/__init__.py +++ b/tobias/__init__.py @@ -1 +1 @@ -__version__ = "0.4.1" +__version__ = "0.5.0" diff --git a/tobias/footprinting/scorebigwig.py b/tobias/footprinting/scorebigwig.py index 4cb6466..f77e443 100644 --- a/tobias/footprinting/scorebigwig.py +++ b/tobias/footprinting/scorebigwig.py @@ -48,13 +48,12 @@ def add_scorebigwig_arguments(parser): required.add_argument('-r', '--regions', metavar="", help="Genomic regions to run footprinting within") optargs = parser.add_argument_group('Optional arguments') - optargs.add_argument('--score', metavar="", choices=["footprint", "sum"], help="Type of scoring to perform on cutsites (footprint/sum) (default: footprint)", default="footprint") - #mean - #abs convert bigwig signal to absolute scores before calculating score + optargs.add_argument('--score', metavar="", choices=["footprint", "sum", "mean", "none"], help="Type of scoring to perform on cutsites (footprint/sum/mean/none) (default: footprint)", default="footprint") + optargs.add_argument('--absolute', action='store_true', help="Convert bigwig signal to absolute values before calculating score") optargs.add_argument('--extend', metavar="", type=int, help="Extend input regions with bp (default: 100)", default=100) optargs.add_argument('--smooth', metavar="", type=int, help="Smooth output signal by mean in windows (default: no smoothing)", default=1) - optargs.add_argument('--min-limit', metavar="", type=float, help="Limit input bigwig score range (default: no lower limit)") #default none - optargs.add_argument('--max-limit', metavar="", type=float, help="Limit input bigwig score range (default: no upper limit)") #default none + optargs.add_argument('--min-limit', metavar="", type=float, help="Limit input bigwig value range (default: no lower limit)") #default none + optargs.add_argument('--max-limit', metavar="", type=float, help="Limit input bigwig value range (default: no upper limit)") #default none footprintargs = parser.add_argument_group('Parameters for score == footprint') footprintargs.add_argument('--fp-min', metavar="", type=int, help="Minimum footprint width (default: 20)", default=20) @@ -93,27 +92,37 @@ def calculate_scores(regions, args): signal = region.get_signal(pybw_signal) signal = np.nan_to_num(signal).astype("float64") - # + #-------- Prepare signal for score calculation -------# + if args.absolute: + signal = np.abs(signal) + if args.min_limit != None: signal[signal < args.min_limit] = args.min_limit if args.max_limit != None: signal[signal > args.max_limit] = args.max_limit - #Calculate scores + #------------------ Calculate scores ----------------# if args.score == "sum": - signal = np.abs(signal) scores = fast_rolling_math(signal, args.window, "sum") + elif args.score == "mean": + scores = fast_rolling_math(signal, args.window, "mean") + elif args.score == "footprint": scores = tobias_footprint_array(signal, args.flank_min, args.flank_max, args.fp_min, args.fp_max) #numpy array elif args.score == "FOS": - scores = FOS_score(signal, args.flank_min, args.flank_max, args.fp_min, args.fp_max) - scores = -scores + scores = FOS_score(signal, args.flank_min, args.flank_max, args.fp_min, args.fp_max) + scores = -scores + elif args.score == "none": + scores = signal + else: sys.exit("Scoring {0} not found".format(args.score)) + #----------------- Post-process scores --------------# + #Smooth signal with args.smooth bp if args.smooth > 1: scores = fast_rolling_math(scores, args.smooth, "mean") @@ -175,6 +184,8 @@ def run_scorebigwig(args): args.region_flank = int(args.window/2.0) elif args.score == "footprint" or args.score == "FOS": args.region_flank = int(args.flank_max) + else: + args.region_flank = 0 #Go through each region for i, region in enumerate(regions): diff --git a/tobias/utils/regions.py b/tobias/utils/regions.py index ed34e53..98a942c 100644 --- a/tobias/utils/regions.py +++ b/tobias/utils/regions.py @@ -666,7 +666,7 @@ def get_cluster_names(self): self.cluster_names[cluster] = "C_" + self.names[ordered_idx[0]] #cluster is the idx for cluster else: self.cluster_names[cluster] = self.clusters[cluster]["member_names"][0] - self.clusters[cluster]["representative"] = self.cluster_names[cluster] + self.clusters[cluster]["representative"] = "C_" + self.cluster_names[cluster] self.clusters[cluster]["cluster_name"] = self.cluster_names[cluster]