binning.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Dec  7 17:11:57 2017

@author: mints
"""
import numpy as np
from scipy.stats import binned_statistic_2d
from sage_selection import binary_split_nd as bnd
from sage_selection import tools
from sage_selection import constants as c

fun_selection = tools.selection2d_flat


def wilson(fg, bg, z=1):
    p_hat = fg / bg
    est = (p_hat + z * z * 0.5 / bg) / (1 + z * z / bg)
    est[est > 1] = 1.
    uncert = z * np.sqrt((p_hat * (1 - p_hat) / bg) +
                         (z / (2 * bg)) ** 2) / (1 + z * z / bg)
    return est, uncert


def agresti(fg, bg, z=1):
    n = bg + z**2
    p = (fg + 0.5*z**2) / n
    if np.any(p >= 1):
        u = np.empty_like(p)
        u = z * np.sqrt(fg) / n
        u[p < 1] = z * np.sqrt(p[p < 1] * (1 - p[p < 1]) / n[p < 1])
    else:
        u = z * np.sqrt(p * (1 - p) / n)
    return p, u


def get_estimate_and_error(fg, bg):
    return agresti(fg, bg)


def get_direct_2d(hbg, r_fg):
    hfg = binned_statistic_2d(r_fg[0], r_fg[1], values=None,
                              bins=tools.bins, statistic='count',
                              expand_binnumbers=True)
    fg_values = np.zeros_like(r_fg[0])
    bg_values = np.zeros_like(r_fg[0])
    for ipoint, icoord in enumerate(zip(*hfg.binnumber)):
        ix, iy = icoord
        bg_values[ipoint] = hbg[ix - 1, iy - 1]
        fg_values[ipoint] = hfg.statistic[ix - 1, iy - 1]
    estimate, error_estimate = get_estimate_and_error(fg_values, bg_values)
    error_estimate[np.isnan(error_estimate)] = 1.
    return estimate, error_estimate


def get_smooth_2d(hbg, r_fg):
    hfg = np.histogram2d(r_fg[0], r_fg[1], bins=tools.bins)[0].T.flatten()
    pos = np.meshgrid(c.JMAG_GRID, c.JK_GRID)
    pos = np.array([pos[0].flatten(), pos[1].flatten()]).T
    mask = (hfg > 0).flatten()
    r_fg_points = np.array(r_fg).T
    est, error_est = get_estimate_and_error(hfg, hbg.T.flatten())
    estimate = tools.interp_log_2d(pos[mask], est[mask], r_fg_points,
                                   bbox=[tools.bins[0][0], tools.bins[0][-1],
                                         tools.bins[1][0], tools.bins[1][-1]],
                                   kx=1, ky=1, s=0)
    error_estimate = tools.interp_log_2d(
                                   pos[mask], error_est[mask],
                                   r_fg_points,
                                   bbox=[tools.bins[0][0], tools.bins[0][-1],
                                         tools.bins[1][0], tools.bins[1][-1]],
                                   kx=1, ky=1, s=0)
    return estimate, error_estimate


def get_median_2d(hbg, r_fg, occupation=25, shrink=False,
                  add_bg_variance=False, interpolation='post',
                  return_mask=False):
    r_fg = np.array(r_fg).T
    indices = np.arange(len(r_fg))
    r_fg = np.hstack((r_fg, indices[:, np.newaxis]))
    bin_indices = np.empty_like(indices, dtype=int)
    pos = []
    fg_value = []
    bg_value = []
    fg_variance = []
    bg_variance = []
    es_variance = []
    bbox = bnd.get_bounding_box_grid(r_fg, *tools.bins)
    hfg = np.histogram2d(r_fg[:, 0], r_fg[:, 1],
                         bins=(tools.bins[0]-1e-5, tools.bins[1] - 1e-5))[0]
                         #bins=tools.bins)[0]
    full_mask = np.zeros_like(hbg, dtype=bool)
    for p in bnd.binary_split_2d_grid(r_fg, *tools.bins,
                                      boundaries=bbox,
                                      min_occupation=occupation):
        if np.any(p[0] == bbox) and shrink:
            if p[0][0] == bbox[0]:
                p[0][0] = np.searchsorted(tools.bins[0], p[1][:, 0].min()) - 1
            if p[0][1] == bbox[1]:
                p[0][1] = np.searchsorted(tools.bins[1], p[1][:, 1].min()) - 1
            if p[0][2] == bbox[2]:
                p[0][2] = np.searchsorted(tools.bins[0], p[1][:, 0].max())
            if p[0][3] == bbox[3]:
                p[0][3] = np.searchsorted(tools.bins[1], p[1][:, 1].max())
        bg_bins = hbg[p[0][0]:p[0][2], p[0][1]:p[0][3]]
        fg_bins = hfg[p[0][0]:p[0][2], p[0][1]:p[0][3]]
        full_mask[p[0][0]:p[0][2], p[0][1]:p[0][3]] = True
        count_bg = np.sum(bg_bins)
        mask_fg = np.logical_and(
            np.logical_and(r_fg[:, 0] >= tools.bins[0][p[0][0]],
                           r_fg[:, 0] < tools.bins[0][p[0][2]]),
            np.logical_and(r_fg[:, 1] >= tools.bins[1][p[0][1]],
                           r_fg[:, 1] < tools.bins[1][p[0][3]]))
        count_fg = np.sum(fg_bins) #np.sum(mask_fg)
        bin_indices[np.asarray(p[1][:, 2], dtype=int)] = len(fg_value)
        pos.append(np.mean(r_fg[mask_fg][:, :2], axis=0))
        fg_value.append(count_fg)
        bg_value.append(count_bg)
        bg_variance.append(np.var(bg_bins) / np.sum(bg_bins) ** 2)
        fg_variance.append(np.var(fg_bins) / np.sum(fg_bins) ** 2)
        if np.any(np.isnan(fg_variance)):
            import ipdb; ipdb.set_trace()
        with tools.SuppressRuntime(all='ignore'):
            es_variance.append(np.nanvar(fg_bins / bg_bins))
    fg_value = np.array(fg_value)
    bg_value = np.array(bg_value)
    bg_variance = np.array(bg_variance)
    fg_variance = np.array(fg_variance)
    es_variance = np.array(es_variance)
    pos = np.array(pos)
    arg = np.argsort(pos[:, 0])
    fg_value = fg_value[arg]
    bg_value = bg_value[arg]
    fg_variance = fg_variance[arg]
    bg_variance = bg_variance[arg]
    pos = pos[arg]
    bbox = [tools.bins[0][0], tools.bins[0][-1],
            tools.bins[1][0], tools.bins[1][-1]]
    if interpolation == 'post':
        est, error_est = get_estimate_and_error(fg_value, bg_value)
        #print(pos, est, error_est, fg_value, bg_value)
        #if np.any(est > 1):
        #    print(pos[est > 1], fg_value[est > 1], bg_value[est > 1])
        if add_bg_variance:
            error_est += est * np.sqrt((bg_variance + fg_variance))
        estimate = tools.interp_log_2d(pos, est, r_fg[:, :2],
                                       bbox=bbox, kx=1, ky=1, s=0)
        try:
            error_estimate = tools.interp_log_2d(
                                           pos, error_est, r_fg[:, :2],
                                           bbox=bbox, kx=1, ky=1, s=0)
        except:
            import ipdb; ipdb.set_trace()
    elif interpolation == 'pre':
        bg_value = tools.interp_log_2d(pos, bg_value, r_fg[:, :2],
                                       bbox=bbox, kx=1, ky=1, s=0)
        fg_value = tools.interp_log_2d(pos, fg_value, r_fg[:, :2],
                                       bbox=bbox, kx=1, ky=1, s=0)
        estimate, error_estimate = get_estimate_and_error(fg_value, bg_value)
        if add_bg_variance:
            error_estimate += estimate * np.sqrt(
                      bg_variance[bin_indices] / bg_value**2 +
                      fg_variance[bin_indices] / fg_value**2)
    elif interpolation == 'none':
        fg_value = fg_value[bin_indices]
        bg_value = bg_value[bin_indices]
        estimate, error_estimate = get_estimate_and_error(fg_value, bg_value)
        if add_bg_variance:
            error_estimate += estimate * np.sqrt(
                      bg_variance[bin_indices] / bg_value**2 +
                      fg_variance[bin_indices] / fg_value**2)
    else:
        raise ValueError('interpolation=%s is not implemented' % interpolation)
    if return_mask:
        return estimate, error_estimate, full_mask
    else:
        return estimate, error_estimate
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Created on Thu Dec 7 17:11:57 2017

	@author: mints
	"""
	import numpy as np
	from scipy.stats import binned_statistic_2d
	from sage_selection import binary_split_nd as bnd
	from sage_selection import tools
	from sage_selection import constants as c

	fun_selection = tools.selection2d_flat


	def wilson(fg, bg, z=1):
	p_hat = fg / bg
	est = (p_hat + z * z * 0.5 / bg) / (1 + z * z / bg)
	est[est > 1] = 1.
	uncert = z * np.sqrt((p_hat * (1 - p_hat) / bg) +
	(z / (2 * bg)) ** 2) / (1 + z * z / bg)
	return est, uncert


	def agresti(fg, bg, z=1):
	n = bg + z**2
	p = (fg + 0.5z*2) / n
	if np.any(p >= 1):
	u = np.empty_like(p)
	u = z * np.sqrt(fg) / n
	u[p < 1] = z * np.sqrt(p[p < 1] * (1 - p[p < 1]) / n[p < 1])
	else:
	u = z * np.sqrt(p * (1 - p) / n)
	return p, u


	def get_estimate_and_error(fg, bg):
	return agresti(fg, bg)


	def get_direct_2d(hbg, r_fg):
	hfg = binned_statistic_2d(r_fg[0], r_fg[1], values=None,
	bins=tools.bins, statistic='count',
	expand_binnumbers=True)
	fg_values = np.zeros_like(r_fg[0])
	bg_values = np.zeros_like(r_fg[0])
	for ipoint, icoord in enumerate(zip(*hfg.binnumber)):
	ix, iy = icoord
	bg_values[ipoint] = hbg[ix - 1, iy - 1]
	fg_values[ipoint] = hfg.statistic[ix - 1, iy - 1]
	estimate, error_estimate = get_estimate_and_error(fg_values, bg_values)
	error_estimate[np.isnan(error_estimate)] = 1.
	return estimate, error_estimate


	def get_smooth_2d(hbg, r_fg):
	hfg = np.histogram2d(r_fg[0], r_fg[1], bins=tools.bins)[0].T.flatten()
	pos = np.meshgrid(c.JMAG_GRID, c.JK_GRID)
	pos = np.array([pos[0].flatten(), pos[1].flatten()]).T
	mask = (hfg > 0).flatten()
	r_fg_points = np.array(r_fg).T
	est, error_est = get_estimate_and_error(hfg, hbg.T.flatten())
	estimate = tools.interp_log_2d(pos[mask], est[mask], r_fg_points,
	bbox=[tools.bins[0][0], tools.bins[0][-1],
	tools.bins[1][0], tools.bins[1][-1]],
	kx=1, ky=1, s=0)
	error_estimate = tools.interp_log_2d(
	pos[mask], error_est[mask],
	r_fg_points,
	bbox=[tools.bins[0][0], tools.bins[0][-1],
	tools.bins[1][0], tools.bins[1][-1]],
	kx=1, ky=1, s=0)
	return estimate, error_estimate


	def get_median_2d(hbg, r_fg, occupation=25, shrink=False,
	add_bg_variance=False, interpolation='post',
	return_mask=False):
	r_fg = np.array(r_fg).T
	indices = np.arange(len(r_fg))
	r_fg = np.hstack((r_fg, indices[:, np.newaxis]))
	bin_indices = np.empty_like(indices, dtype=int)
	pos = []
	fg_value = []
	bg_value = []
	fg_variance = []
	bg_variance = []
	es_variance = []
	bbox = bnd.get_bounding_box_grid(r_fg, *tools.bins)
	hfg = np.histogram2d(r_fg[:, 0], r_fg[:, 1],
	bins=(tools.bins[0]-1e-5, tools.bins[1] - 1e-5))[0]
	#bins=tools.bins)[0]
	full_mask = np.zeros_like(hbg, dtype=bool)
	for p in bnd.binary_split_2d_grid(r_fg, *tools.bins,
	boundaries=bbox,
	min_occupation=occupation):
	if np.any(p[0] == bbox) and shrink:
	if p[0][0] == bbox[0]:
	p[0][0] = np.searchsorted(tools.bins[0], p[1][:, 0].min()) - 1
	if p[0][1] == bbox[1]:
	p[0][1] = np.searchsorted(tools.bins[1], p[1][:, 1].min()) - 1
	if p[0][2] == bbox[2]:
	p[0][2] = np.searchsorted(tools.bins[0], p[1][:, 0].max())
	if p[0][3] == bbox[3]:
	p[0][3] = np.searchsorted(tools.bins[1], p[1][:, 1].max())
	bg_bins = hbg[p[0][0]:p[0][2], p[0][1]:p[0][3]]
	fg_bins = hfg[p[0][0]:p[0][2], p[0][1]:p[0][3]]
	full_mask[p[0][0]:p[0][2], p[0][1]:p[0][3]] = True
	count_bg = np.sum(bg_bins)
	mask_fg = np.logical_and(
	np.logical_and(r_fg[:, 0] >= tools.bins[0][p[0][0]],
	r_fg[:, 0] < tools.bins[0][p[0][2]]),
	np.logical_and(r_fg[:, 1] >= tools.bins[1][p[0][1]],
	r_fg[:, 1] < tools.bins[1][p[0][3]]))
	count_fg = np.sum(fg_bins) #np.sum(mask_fg)
	bin_indices[np.asarray(p[1][:, 2], dtype=int)] = len(fg_value)
	pos.append(np.mean(r_fg[mask_fg][:, :2], axis=0))
	fg_value.append(count_fg)
	bg_value.append(count_bg)
	bg_variance.append(np.var(bg_bins) / np.sum(bg_bins) ** 2)
	fg_variance.append(np.var(fg_bins) / np.sum(fg_bins) ** 2)
	if np.any(np.isnan(fg_variance)):
	import ipdb; ipdb.set_trace()
	with tools.SuppressRuntime(all='ignore'):
	es_variance.append(np.nanvar(fg_bins / bg_bins))
	fg_value = np.array(fg_value)
	bg_value = np.array(bg_value)
	bg_variance = np.array(bg_variance)
	fg_variance = np.array(fg_variance)
	es_variance = np.array(es_variance)
	pos = np.array(pos)
	arg = np.argsort(pos[:, 0])
	fg_value = fg_value[arg]
	bg_value = bg_value[arg]
	fg_variance = fg_variance[arg]
	bg_variance = bg_variance[arg]
	pos = pos[arg]
	bbox = [tools.bins[0][0], tools.bins[0][-1],
	tools.bins[1][0], tools.bins[1][-1]]
	if interpolation == 'post':
	est, error_est = get_estimate_and_error(fg_value, bg_value)
	#print(pos, est, error_est, fg_value, bg_value)
	#if np.any(est > 1):
	# print(pos[est > 1], fg_value[est > 1], bg_value[est > 1])
	if add_bg_variance:
	error_est += est * np.sqrt((bg_variance + fg_variance))
	estimate = tools.interp_log_2d(pos, est, r_fg[:, :2],
	bbox=bbox, kx=1, ky=1, s=0)
	try:
	error_estimate = tools.interp_log_2d(
	pos, error_est, r_fg[:, :2],
	bbox=bbox, kx=1, ky=1, s=0)
	except:
	import ipdb; ipdb.set_trace()
	elif interpolation == 'pre':
	bg_value = tools.interp_log_2d(pos, bg_value, r_fg[:, :2],
	bbox=bbox, kx=1, ky=1, s=0)
	fg_value = tools.interp_log_2d(pos, fg_value, r_fg[:, :2],
	bbox=bbox, kx=1, ky=1, s=0)
	estimate, error_estimate = get_estimate_and_error(fg_value, bg_value)
	if add_bg_variance:
	error_estimate += estimate * np.sqrt(
	bg_variance[bin_indices] / bg_value**2 +
	fg_variance[bin_indices] / fg_value**2)
	elif interpolation == 'none':
	fg_value = fg_value[bin_indices]
	bg_value = bg_value[bin_indices]
	estimate, error_estimate = get_estimate_and_error(fg_value, bg_value)
	if add_bg_variance:
	error_estimate += estimate * np.sqrt(
	bg_variance[bin_indices] / bg_value**2 +
	fg_variance[bin_indices] / fg_value**2)
	else:
	raise ValueError('interpolation=%s is not implemented' % interpolation)
	if return_mask:
	return estimate, error_estimate, full_mask
	else:
	return estimate, error_estimate