Skip to content
This repository has been archived by the owner. It is now read-only.

Commit

Permalink
Merge branch 'master' into network
Browse files Browse the repository at this point in the history
  • Loading branch information
msbentsen committed Aug 21, 2019
2 parents 02c98f8 + 906e919 commit 7375885
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 7 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 0.7.0 (2019-08-13)
- Updated the way p-values are calculated in BINDetect. Included 100-fold subsampling of background to estimate confidence interval.

## 0.6.4 (2019-08-07)
- Bug fix for error with regions very close to chromosome borders for ATACorrect and ScoreBigwig.

Expand Down
2 changes: 1 addition & 1 deletion tobias/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.4"
__version__ = "0.7.0"
4 changes: 3 additions & 1 deletion tobias/footprinting/bindetect.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import itertools
import pandas as pd

#Machine learning
#Machine learning and statistics
import sklearn
from sklearn import mixture
import scipy
Expand Down Expand Up @@ -78,6 +78,8 @@ def add_bindetect_arguments(parser):
#optargs.add_argument('--naming', metavar="<type>", help="Naming convention for TFs ('id', 'name', 'name_id', 'id_name') (default: 'name_id')", choices=["id", "name", "name_id", "id_name"], default="name_id")
optargs.add_argument('--motif-pvalue', metavar="<float>", type=lambda x: restricted_float(x, 0, 1), help="Set p-value threshold for motif scanning (default: 1e-4)", default=0.0001)
optargs.add_argument('--bound-pvalue', metavar="<float>", type=lambda x: restricted_float(x, 0, 1), help="Set p-value threshold for bound/unbound split (default: 0.001)", default=0.001)
#optargs.add_argument('--volcano-diff-thresh', metavar="<float>", help="", default=0.2) #not yet implemented
#optargs.add_argument('--volcano-p-thresh', metavar="<float>", help="", default=0.05) #not yet implemented
optargs.add_argument('--pseudo', type=float, metavar="<float>", help="Pseudocount for calculating log2fcs (default: estimated from data)", default=None)
optargs.add_argument('--time-series', action='store_true', help="Will only compare signals1<->signals2<->signals3 (...) in order of input, and skip all-against-all comparison.")
optargs.add_argument('--skip-excel', action='store_true', help="Skip creation of excel files - for large datasets, this will speed up BINDetect considerably")
Expand Down
22 changes: 17 additions & 5 deletions tobias/footprinting/bindetect_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,23 +393,35 @@ def process_tfbs(TF_name, args, log2fc_params): #per tf
obs_mean, obs_std = obs_params
bg_mean, bg_std = bg_params
obs_no = np.min([len(observed_log2fcs), 50000]) #Set cap on obs_no to prevent super small p-values
n_obs = len(observed_log2fcs)

#If there was any change found at all (0 can happen if two bigwigs are the same)
if obs_mean != bg_mean:
info_table.at[TF_name, base + "_change"] = (obs_mean - bg_mean) / np.mean([obs_std, bg_std]) #effect size
info_table.at[TF_name, base + "_change"] = np.round(info_table.at[TF_name, base + "_change"], 5)

#pval = scipy.stats.mannwhitneyu(observed_log2fcs, bg_log2fcs, alternative="two-sided")[1]
pval = scipy.stats.ttest_ind_from_stats(obs_mean, obs_std, obs_no, bg_mean, bg_std, obs_no, equal_var=False)[1] #pvalue is second in tup
info_table.at[TF_name, base + "_pvalue"] = pval
#pval = scipy.stats.ttest_ind_from_stats(obs_mean, obs_std, obs_no, bg_mean, bg_std, obs_no, equal_var=False)[1] #pvalue is second in tup
#info_table.at[TF_name, base + "_pvalue"] = pval

#Else not possible to compare groups
else:
info_table.at[TF_name, base + "_change"] = 0
info_table.at[TF_name, base + "_pvalue"] = 1

#stime_plot = datetime.now()

#Sample from background distribution
np.random.seed(n_obs)
sample_changes = []
for i in range(100):
sample = scipy.stats.norm.rvs(*log2fc_params[(cond1, cond2)], size=n_obs)
sample_mean, sample_std = np.mean(sample), np.std(sample)
sample_change = (sample_mean - bg_mean) / np.mean([sample_std, bg_std])
sample_changes.append(sample_change)

#Estimate p-value by comparing sampling to observed mean
ttest = scipy.stats.ttest_1samp(sample_changes, info_table.at[TF_name, base + "_change"])
info_table.at[TF_name, base + "_pvalue"] = ttest[1]

#### Plot comparison ###
fig, ax = plt.subplots(1,1)
ax.hist(observed_log2fcs, bins='auto', label="Observed log2fcs", density=True)
Expand Down Expand Up @@ -616,7 +628,7 @@ def plot_bindetect(motifs, cluster_obj, conditions, args):
ax1.scatter(coord[0], coord[1], color=diff_scores[TF]["color"], s=4.5)

if diff_scores[TF]["show"] == True:
txts.append(ax1.text(coord[0], coord[1], diff_scores[TF]["volcano_label"], fontsize=8))
txts.append(ax1.text(coord[0], coord[1], diff_scores[TF]["volcano_label"], fontsize=9))

#Plot custom legend for colors
legend_elements = [Line2D([0],[0], marker='o', color='w', markerfacecolor="red", label="Higher scores in {0}".format(conditions[0])),
Expand Down

0 comments on commit 7375885

Please sign in to comment.