Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
sage_selection_public/binning.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
180 lines (165 sloc)
7.34 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Thu Dec 7 17:11:57 2017 | |
@author: mints | |
""" | |
import numpy as np | |
from scipy.stats import binned_statistic_2d | |
from sage_selection import binary_split_nd as bnd | |
from sage_selection import tools | |
from sage_selection import constants as c | |
fun_selection = tools.selection2d_flat | |
def wilson(fg, bg, z=1): | |
p_hat = fg / bg | |
est = (p_hat + z * z * 0.5 / bg) / (1 + z * z / bg) | |
est[est > 1] = 1. | |
uncert = z * np.sqrt((p_hat * (1 - p_hat) / bg) + | |
(z / (2 * bg)) ** 2) / (1 + z * z / bg) | |
return est, uncert | |
def agresti(fg, bg, z=1): | |
n = bg + z**2 | |
p = (fg + 0.5*z**2) / n | |
if np.any(p >= 1): | |
u = np.empty_like(p) | |
u = z * np.sqrt(fg) / n | |
u[p < 1] = z * np.sqrt(p[p < 1] * (1 - p[p < 1]) / n[p < 1]) | |
else: | |
u = z * np.sqrt(p * (1 - p) / n) | |
return p, u | |
def get_estimate_and_error(fg, bg): | |
return agresti(fg, bg) | |
def get_direct_2d(hbg, r_fg): | |
hfg = binned_statistic_2d(r_fg[0], r_fg[1], values=None, | |
bins=tools.bins, statistic='count', | |
expand_binnumbers=True) | |
fg_values = np.zeros_like(r_fg[0]) | |
bg_values = np.zeros_like(r_fg[0]) | |
for ipoint, icoord in enumerate(zip(*hfg.binnumber)): | |
ix, iy = icoord | |
bg_values[ipoint] = hbg[ix - 1, iy - 1] | |
fg_values[ipoint] = hfg.statistic[ix - 1, iy - 1] | |
estimate, error_estimate = get_estimate_and_error(fg_values, bg_values) | |
error_estimate[np.isnan(error_estimate)] = 1. | |
return estimate, error_estimate | |
def get_smooth_2d(hbg, r_fg): | |
hfg = np.histogram2d(r_fg[0], r_fg[1], bins=tools.bins)[0].T.flatten() | |
pos = np.meshgrid(c.JMAG_GRID, c.JK_GRID) | |
pos = np.array([pos[0].flatten(), pos[1].flatten()]).T | |
mask = (hfg > 0).flatten() | |
r_fg_points = np.array(r_fg).T | |
est, error_est = get_estimate_and_error(hfg, hbg.T.flatten()) | |
estimate = tools.interp_log_2d(pos[mask], est[mask], r_fg_points, | |
bbox=[tools.bins[0][0], tools.bins[0][-1], | |
tools.bins[1][0], tools.bins[1][-1]], | |
kx=1, ky=1, s=0) | |
error_estimate = tools.interp_log_2d( | |
pos[mask], error_est[mask], | |
r_fg_points, | |
bbox=[tools.bins[0][0], tools.bins[0][-1], | |
tools.bins[1][0], tools.bins[1][-1]], | |
kx=1, ky=1, s=0) | |
return estimate, error_estimate | |
def get_median_2d(hbg, r_fg, occupation=25, shrink=False, | |
add_bg_variance=False, interpolation='post', | |
return_mask=False): | |
r_fg = np.array(r_fg).T | |
indices = np.arange(len(r_fg)) | |
r_fg = np.hstack((r_fg, indices[:, np.newaxis])) | |
bin_indices = np.empty_like(indices, dtype=int) | |
pos = [] | |
fg_value = [] | |
bg_value = [] | |
fg_variance = [] | |
bg_variance = [] | |
es_variance = [] | |
bbox = bnd.get_bounding_box_grid(r_fg, *tools.bins) | |
hfg = np.histogram2d(r_fg[:, 0], r_fg[:, 1], | |
bins=(tools.bins[0]-1e-5, tools.bins[1] - 1e-5))[0] | |
#bins=tools.bins)[0] | |
full_mask = np.zeros_like(hbg, dtype=bool) | |
for p in bnd.binary_split_2d_grid(r_fg, *tools.bins, | |
boundaries=bbox, | |
min_occupation=occupation): | |
if np.any(p[0] == bbox) and shrink: | |
if p[0][0] == bbox[0]: | |
p[0][0] = np.searchsorted(tools.bins[0], p[1][:, 0].min()) - 1 | |
if p[0][1] == bbox[1]: | |
p[0][1] = np.searchsorted(tools.bins[1], p[1][:, 1].min()) - 1 | |
if p[0][2] == bbox[2]: | |
p[0][2] = np.searchsorted(tools.bins[0], p[1][:, 0].max()) | |
if p[0][3] == bbox[3]: | |
p[0][3] = np.searchsorted(tools.bins[1], p[1][:, 1].max()) | |
bg_bins = hbg[p[0][0]:p[0][2], p[0][1]:p[0][3]] | |
fg_bins = hfg[p[0][0]:p[0][2], p[0][1]:p[0][3]] | |
full_mask[p[0][0]:p[0][2], p[0][1]:p[0][3]] = True | |
count_bg = np.sum(bg_bins) | |
mask_fg = np.logical_and( | |
np.logical_and(r_fg[:, 0] >= tools.bins[0][p[0][0]], | |
r_fg[:, 0] < tools.bins[0][p[0][2]]), | |
np.logical_and(r_fg[:, 1] >= tools.bins[1][p[0][1]], | |
r_fg[:, 1] < tools.bins[1][p[0][3]])) | |
count_fg = np.sum(fg_bins) #np.sum(mask_fg) | |
bin_indices[np.asarray(p[1][:, 2], dtype=int)] = len(fg_value) | |
pos.append(np.mean(r_fg[mask_fg][:, :2], axis=0)) | |
fg_value.append(count_fg) | |
bg_value.append(count_bg) | |
bg_variance.append(np.var(bg_bins) / np.sum(bg_bins) ** 2) | |
fg_variance.append(np.var(fg_bins) / np.sum(fg_bins) ** 2) | |
if np.any(np.isnan(fg_variance)): | |
import ipdb; ipdb.set_trace() | |
with tools.SuppressRuntime(all='ignore'): | |
es_variance.append(np.nanvar(fg_bins / bg_bins)) | |
fg_value = np.array(fg_value) | |
bg_value = np.array(bg_value) | |
bg_variance = np.array(bg_variance) | |
fg_variance = np.array(fg_variance) | |
es_variance = np.array(es_variance) | |
pos = np.array(pos) | |
arg = np.argsort(pos[:, 0]) | |
fg_value = fg_value[arg] | |
bg_value = bg_value[arg] | |
fg_variance = fg_variance[arg] | |
bg_variance = bg_variance[arg] | |
pos = pos[arg] | |
bbox = [tools.bins[0][0], tools.bins[0][-1], | |
tools.bins[1][0], tools.bins[1][-1]] | |
if interpolation == 'post': | |
est, error_est = get_estimate_and_error(fg_value, bg_value) | |
#print(pos, est, error_est, fg_value, bg_value) | |
#if np.any(est > 1): | |
# print(pos[est > 1], fg_value[est > 1], bg_value[est > 1]) | |
if add_bg_variance: | |
error_est += est * np.sqrt((bg_variance + fg_variance)) | |
estimate = tools.interp_log_2d(pos, est, r_fg[:, :2], | |
bbox=bbox, kx=1, ky=1, s=0) | |
try: | |
error_estimate = tools.interp_log_2d( | |
pos, error_est, r_fg[:, :2], | |
bbox=bbox, kx=1, ky=1, s=0) | |
except: | |
import ipdb; ipdb.set_trace() | |
elif interpolation == 'pre': | |
bg_value = tools.interp_log_2d(pos, bg_value, r_fg[:, :2], | |
bbox=bbox, kx=1, ky=1, s=0) | |
fg_value = tools.interp_log_2d(pos, fg_value, r_fg[:, :2], | |
bbox=bbox, kx=1, ky=1, s=0) | |
estimate, error_estimate = get_estimate_and_error(fg_value, bg_value) | |
if add_bg_variance: | |
error_estimate += estimate * np.sqrt( | |
bg_variance[bin_indices] / bg_value**2 + | |
fg_variance[bin_indices] / fg_value**2) | |
elif interpolation == 'none': | |
fg_value = fg_value[bin_indices] | |
bg_value = bg_value[bin_indices] | |
estimate, error_estimate = get_estimate_and_error(fg_value, bg_value) | |
if add_bg_variance: | |
error_estimate += estimate * np.sqrt( | |
bg_variance[bin_indices] / bg_value**2 + | |
fg_variance[bin_indices] / fg_value**2) | |
else: | |
raise ValueError('interpolation=%s is not implemented' % interpolation) | |
if return_mask: | |
return estimate, error_estimate, full_mask | |
else: | |
return estimate, error_estimate |