Getting ready for 0.4.0

loosolab · Apr 29, 2019 · c20987e · c20987e
1 parent b7c05c4
commit c20987e
Show file tree

Hide file tree

Showing 8 changed files with 55 additions and 46 deletions.
diff --git a/CHANGES b/CHANGES
@@ -1,5 +1,10 @@
-## 0.3.2 (2019-04-26)
+## 0.4.0 (2019-04-26)
 - Added --add_region_columns to TFBScan
+- Renamed FootprintScores to ScoreBigwig
+- Added normalization of input score distributions in BINDetect
+- Added general warning for --cores > max number of processors when running multiprocessing
+- Updated PlotChanges to also plot mean_score and percent_bound as well as overview per motif cluster
+- Added MOODS-python as dependency through pip
 
 ## 0.3.1 (2019-04-10)
 - Split motif ids between name/motif_id/output_prefix
diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 import os
 import sys
 import re
-from setuptools import setup, Extension, dist
+from setuptools import setup, Extension, dist, find_packages
 
 #Test if numpy is installed
 try:
@@ -56,7 +56,7 @@ def readme():
 		author='Mette Bentsen',
 		author_email='mette.bentsen@mpi-bn.mpg.de',
 		license='MIT',
-		packages=['tobias', 'tobias.footprinting', 'tobias.plotting', 'tobias.motifs', 'tobias.misc', 'tobias.utils'],
+		packages=find_packages(), #"tobias"), #['tobias', 'tobias.footprinting', 'tobias.plotting', 'tobias.motifs', 'tobias.misc', 'tobias.utils'],
 		entry_points={
 			'console_scripts': ['TOBIAS=tobias.TOBIAS:main']
 		},
@@ -77,7 +77,6 @@ def readme():
 			'pyBigWig',
 			'MOODS-python',
 		],
-		scripts = ["tobias/utils/filter_important_factors.py"],
 		classifiers=[
 			'License :: OSI Approved :: MIT License',
 			'Intended Audience :: Science/Research',

diff --git a/tobias/TOBIAS.py b/tobias/TOBIAS.py
@@ -28,9 +28,9 @@
 from argparse import SUPPRESS
 import textwrap
 
-from tobias.footprinting.ATACorrect import *
-from tobias.footprinting.footprint_scores import *
-from tobias.footprinting.BINDetect import *
+from tobias.footprinting.atacorrect import *
+from tobias.footprinting.scorebigwig import *
+from tobias.footprinting.bindetect import *
 
 from tobias.plotting.plot_aggregate import *
 from tobias.plotting.plot_heatmap import *
@@ -44,7 +44,7 @@
 from tobias.misc.subsample_bam import *
 from tobias.misc.merge_pdfs import *
 from tobias.misc.maxpos import *
-from tobias.misc.create_network import *
+#from tobias.misc.create_network import *
 from tobias.misc.log2table import *
 
 from tobias import __version__ as TOBIAS_VERSION
@@ -56,7 +56,7 @@ def main():
 	all_parser_info = {"Tools for footprinting analysis":
 							{
 							"ATACorrect":{"help":"Correct reads with regards to Tn5 sequence bias", "add_arguments": add_atacorrect_arguments, "function":run_atacorrect},
-							"FootprintScores":{"help":"Calculate footprint scores from cutsites", "add_arguments": add_footprint_arguments, "function":run_footprinting, "space":"\t"},
+							"ScoreBigwig":{"help":"Calculate scores such as footprints from cutsites", "add_arguments": add_scorebigwig_arguments, "function":run_scorebigwig, "replaces":"FootprintScores"},
 							"BINDetect":{"help":"Detect TF binding from footprints and motifs", "add_arguments": add_bindetect_arguments, "function":run_bindetect},
 							},
 
@@ -80,7 +80,7 @@ def main():
 							"MergePDF": {"help": "Merge pdf files to one", "add_arguments":add_mergepdf_arguments, "function":run_mergepdf},
 							"MaxPos": {"help": "Get .bed-positions of highest bigwig signal within .bed-regions", "add_arguments": add_maxpos_arguments, "function": run_maxpos},
 							"SubsampleBam": {"help": "Subsample a .bam-file using samtools", "add_arguments": add_subsample_arguments, "function": run_subsampling},
-							"CreateNetwork": {"help": "Create TF-gene network from annotated TFBS", "add_arguments": add_network_arguments, "function": run_network, "space":"\t"},
+							#"CreateNetwork": {"help": "Create TF-gene network from annotated TFBS", "add_arguments": add_network_arguments, "function": run_network, "space":"\t"},
 							"Log2Table": {"help": "Convert logs from PlotAggregate to tab-delimitered tables of footprint stats", "add_arguments": add_log2table_arguments, "function": run_log2table}
 							}
 						}
@@ -116,16 +116,21 @@ def main():
 			subparser = info[tool]["add_arguments"](subparser)
 			subparser.set_defaults(func=info[tool]["function"])
 			all_tool_parsers[tool.lower()] = subparser
-
+
+			#Add version to subparser
+			subparser.add_argument("--version", action='version', version=TOBIAS_VERSION)
+
+			#Add parser for old tool names
+			if "replaces" in info[tool]:
+				all_tool_parsers[info[tool]["replaces"].lower()] = subparser
+
 		parser.description += "\n"
 
 	parser.description += "For help on each tool, please run: TOBIAS <TOOLNAME> --help\n"
-
-	#Add version number to upper TOBIAS parser and all subparsers
+	
+	#Add version number to upper TOBIAS parser 
 	parser.description += "For version number: TOBIAS --version"
 	parser.add_argument("--version", action='version', version=TOBIAS_VERSION)
-	for name in all_tool_parsers:
-		all_tool_parsers[name].add_argument("--version", action='version', version=TOBIAS_VERSION)
 
 	#If no args, print help for top-level TOBIAS
 	if len(sys.argv[1:]) == 0:

diff --git a/tobias/__init__.py b/tobias/__init__.py
@@ -1 +1 @@
-__version__ = "0.3.2"
+__version__ = "0.4.0"
diff --git a/tobias/footprinting/ATACorrect.py b/tobias/footprinting/ATACorrect.py
@@ -37,7 +37,7 @@
 import pysam
 
 #Internal functions and classes
-from tobias.footprinting.ATACorrect_functions import *
+from tobias.footprinting.atacorrect_functions import *
 from tobias.utils.utilities import *
 from tobias.utils.regions import *
 from tobias.utils.sequences import *

diff --git a/tobias/footprinting/BINDetect.py b/tobias/footprinting/BINDetect.py
@@ -36,7 +36,7 @@
 import pysam
 
 #Internal functions and classes
-from tobias.footprinting.BINDetect_functions import *
+from tobias.footprinting.bindetect_functions import *
 from tobias.utils.utilities import *
 from tobias.utils.regions import *
 from tobias.utils.sequences import *
@@ -177,10 +177,9 @@ def run_bindetect(args):
 
 	#output and order
 	titles = []
+	titles.append("Raw score distributions")
+	titles.append("Normalized score distributions")
 	if args.debug:
-
-		titles.append("Raw score distributions")
-		titles.append("Normalized score distributions")
 		for (cond1, cond2) in comparisons:
 			titles.append("Background log2FCs ({0} / {1})".format(cond1, cond2))	
 
@@ -361,10 +360,9 @@ def run_bindetect(args):
 	logger.comment("")
 	logger.info("Estimating score distribution per condition")
 
-	if args.debug == True:
-		fig = plot_score_distribution([background["signal"][bigwig] for bigwig in args.cond_names], labels=args.cond_names, title="Raw scores per condition")
-		figure_pdf.savefig(fig, bbox_inches='tight')
-		plt.close()
+	fig = plot_score_distribution([background["signal"][bigwig] for bigwig in args.cond_names], labels=args.cond_names, title="Raw scores per condition")
+	figure_pdf.savefig(fig, bbox_inches='tight')
+	plt.close()
 
 	logger.info("Normalizing scores")
 	list_of_vals = [background["signal"][bigwig] for bigwig in args.cond_names]
@@ -374,10 +372,9 @@ def run_bindetect(args):
 	for bigwig in args.cond_names:
 		background["signal"][bigwig] = args.norm_objects[bigwig].normalize(background["signal"][bigwig]) 
 
-	if args.debug == True:
-		fig = plot_score_distribution([background["signal"][bigwig] for bigwig in args.cond_names], labels=args.cond_names, title="Normalized scores per condition")
-		figure_pdf.savefig(fig, bbox_inches='tight')
-		plt.close()
+	fig = plot_score_distribution([background["signal"][bigwig] for bigwig in args.cond_names], labels=args.cond_names, title="Normalized scores per condition")
+	figure_pdf.savefig(fig, bbox_inches='tight')
+	plt.close()
 
 	###########################################################
 	logger.info("Estimating bound/unbound threshold")
@@ -413,7 +410,7 @@ def run_bindetect(args):
 	mode = scipy.optimize.fmin(lambda x: -scipy.stats.lognorm.pdf(x, *log_params), 0, disp=False)[0]
 	logger.debug("- Mode estimated at: {0}".format(mode))
 	pseudo = mode / 2.0		#pseudo is half the mode
-	args.pseudo = pseudo  #.  append(pseudo)
+	args.pseudo = pseudo
 	logger.debug("Pseudocount estimated at: {0}".format(round(args.pseudo, 5)))
 
 	# Estimate theoretical normal for threshold

diff --git a/tobias/footprinting/BINDetect_functions.py b/tobias/footprinting/BINDetect_functions.py
@@ -617,8 +617,8 @@ def plot_bindetect(motifs, cluster_obj, conditions, args):
 	#print(txts)
 
 	#Plot custom legend for colors
-	legend_elements = [Line2D([0],[0], marker='o', color='w', markerfacecolor="red", label="More bound in {0}".format(conditions[0])),
-						Line2D([0],[0], marker='o', color='w', markerfacecolor="blue", label="More bound in {0}".format(conditions[1]))]
+	legend_elements = [Line2D([0],[0], marker='o', color='w', markerfacecolor="red", label="Higher scores in {0}".format(conditions[0])),
+						Line2D([0],[0], marker='o', color='w', markerfacecolor="blue", label="Higher scores in {0}".format(conditions[1]))]
 	ax1.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc='upper left')
 
 	return(fig)

diff --git a/tobias/footprinting/footprint_scores.py → tobias/footprinting/scorebigwig.py b/tobias/footprinting/footprint_scores.py → tobias/footprinting/scorebigwig.py
@@ -31,12 +31,12 @@
 from tobias.utils.logger import *
 
 #-----------------------------------------------------------------#
-def add_footprint_arguments(parser):
+def add_scorebigwig_arguments(parser):
 
 	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90)
-	description = "FootprintScores calculates footprint scores from ATAC-seq cutsites (.bigwig format) calculated using the ATACorrect tool.\n\n"
-	description += "Usage: FootprintScores --signal <cutsites.bw> --output <output.bw>\n\n"
-	description += "Output:\n- <footprint_scores.bigwig>"
+	description = "ScoreBigwig calculates scores (such as footprint-scores) from bigwig files (such as ATAC-seq cutsites calculated using the ATACorrect tool).\n\n"
+	description += "Usage: ScoreBigwig --signal <cutsites.bw> --regions <regions.bed> --output <output.bw>\n\n"
+	description += "Output:\n- <output.bw>"
 	parser.description = format_help_description("FootprintScores", description)
 
 	parser._action_groups.pop()	#pop -h
@@ -48,20 +48,22 @@ def add_footprint_arguments(parser):
 	required.add_argument('-r', '--regions', metavar="<bed>", help="Genomic regions to run footprinting within")
 
 	optargs = parser.add_argument_group('Optional arguments')
-	optargs.add_argument('--score', metavar="<score>", choices=["footprint", "sum", "FOS"], help="Type of scoring to perform on cutsites (footprint/sum/FOS) (default: footprint)", default="footprint")
+	optargs.add_argument('--score', metavar="<score>", choices=["footprint", "sum"], help="Type of scoring to perform on cutsites (footprint/sum) (default: footprint)", default="footprint")
+	#mean
+	#abs convert bigwig signal to absolute scores before calculating score
 	optargs.add_argument('--extend', metavar="<int>", type=int, help="Extend input regions with bp (default: 100)", default=100)
 	optargs.add_argument('--smooth', metavar="<int>", type=int, help="Smooth output signal by mean in <bp> windows (default: no smoothing)", default=1)
-	optargs.add_argument('--min_limit', metavar="<float>", type=float, help="Limit input bigwig score range (default: no lower limit)") 		#default none
-	optargs.add_argument('--max_limit', metavar="<float>", type=float, help="Limit input bigwig score range (default: no upper limit)") 		#default none
+	optargs.add_argument('--min-limit', metavar="<float>", type=float, help="Limit input bigwig score range (default: no lower limit)") 		#default none
+	optargs.add_argument('--max-limit', metavar="<float>", type=float, help="Limit input bigwig score range (default: no upper limit)") 		#default none
 
 	footprintargs = parser.add_argument_group('Parameters for score == footprint')
-	optargs.add_argument('--fp_min', metavar="<int>", type=int, help="Minimum footprint width (default: 20)", default=20)
-	optargs.add_argument('--fp_max', metavar="<int>", type=int, help="Maximum footprint width (default: 50)", default=50)
-	optargs.add_argument('--flank_min', metavar="<int>", type=int, help="Minimum range of flanking regions (default: 10)", default=10)
-	optargs.add_argument('--flank_max', metavar="<int>", type=int, help="Maximum range of flanking regions (default: 30)", default=30)
+	optargs.add_argument('--fp-min', metavar="<int>", type=int, help="Minimum footprint width (default: 20)", default=20)
+	optargs.add_argument('--fp-max', metavar="<int>", type=int, help="Maximum footprint width (default: 50)", default=50)
+	optargs.add_argument('--flank-min', metavar="<int>", type=int, help="Minimum range of flanking regions (default: 10)", default=10)
+	optargs.add_argument('--flank-max', metavar="<int>", type=int, help="Maximum range of flanking regions (default: 30)", default=30)
 
 	sumargs = parser.add_argument_group('Parameters for score == sum')
-	optargs.add_argument('--window', metavar="<int>", type=int, help="The window for calculation of sum (default: 100)", default=100)
+	sumargs.add_argument('--window', metavar="<int>", type=int, help="The window for calculation of sum (default: 100)", default=100)
 
 	runargs = parser.add_argument_group('Run arguments')
 	runargs.add_argument('--cores', metavar="<int>", type=int, help="Number of cores to use for computation (default: 1)", default=1)
@@ -91,6 +93,7 @@ def calculate_scores(regions, args):
 		signal = region.get_signal(pybw_signal)
 		signal = np.nan_to_num(signal).astype("float64")
 
+		#
 		if args.min_limit != None:
 			signal[signal < args.min_limit] = args.min_limit
 		if args.max_limit != None:
@@ -125,7 +128,7 @@ def calculate_scores(regions, args):
 
 #------------------------------------------------------------------------------------------#
 
-def run_footprinting(args):
+def run_scorebigwig(args):
 
 	check_required(args, ["signal", "output", "regions"])
 	check_files([args.signal, args.regions], "r")
@@ -135,7 +138,7 @@ def run_footprinting(args):
 	# Create logger and write info to log
 	#---------------------------------------------------------------------------------------#
 
-	logger = TobiasLogger("FootprintScores", args.verbosity)
+	logger = TobiasLogger("ScoreBigwig", args.verbosity)
 	logger.begin()
 
 	parser = add_footprint_arguments(argparse.ArgumentParser())