| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- import numpy as np
- from numpy.testing import assert_allclose
- from pytest import approx
- from sklearn.utils.stats import _weighted_percentile
- def test_weighted_percentile():
- y = np.empty(102, dtype=np.float64)
- y[:50] = 0
- y[-51:] = 2
- y[-1] = 100000
- y[50] = 1
- sw = np.ones(102, dtype=np.float64)
- sw[-1] = 0.0
- score = _weighted_percentile(y, sw, 50)
- assert approx(score) == 1
- def test_weighted_percentile_equal():
- y = np.empty(102, dtype=np.float64)
- y.fill(0.0)
- sw = np.ones(102, dtype=np.float64)
- sw[-1] = 0.0
- score = _weighted_percentile(y, sw, 50)
- assert score == 0
- def test_weighted_percentile_zero_weight():
- y = np.empty(102, dtype=np.float64)
- y.fill(1.0)
- sw = np.ones(102, dtype=np.float64)
- sw.fill(0.0)
- score = _weighted_percentile(y, sw, 50)
- assert approx(score) == 1.0
- def test_weighted_percentile_zero_weight_zero_percentile():
- y = np.array([0, 1, 2, 3, 4, 5])
- sw = np.array([0, 0, 1, 1, 1, 0])
- score = _weighted_percentile(y, sw, 0)
- assert approx(score) == 2
- score = _weighted_percentile(y, sw, 50)
- assert approx(score) == 3
- score = _weighted_percentile(y, sw, 100)
- assert approx(score) == 4
- def test_weighted_median_equal_weights():
- # Checks weighted percentile=0.5 is same as median when weights equal
- rng = np.random.RandomState(0)
- # Odd size as _weighted_percentile takes lower weighted percentile
- x = rng.randint(10, size=11)
- weights = np.ones(x.shape)
- median = np.median(x)
- w_median = _weighted_percentile(x, weights)
- assert median == approx(w_median)
- def test_weighted_median_integer_weights():
- # Checks weighted percentile=0.5 is same as median when manually weight
- # data
- rng = np.random.RandomState(0)
- x = rng.randint(20, size=10)
- weights = rng.choice(5, size=10)
- x_manual = np.repeat(x, weights)
- median = np.median(x_manual)
- w_median = _weighted_percentile(x, weights)
- assert median == approx(w_median)
- def test_weighted_percentile_2d():
- # Check for when array 2D and sample_weight 1D
- rng = np.random.RandomState(0)
- x1 = rng.randint(10, size=10)
- w1 = rng.choice(5, size=10)
- x2 = rng.randint(20, size=10)
- x_2d = np.vstack((x1, x2)).T
- w_median = _weighted_percentile(x_2d, w1)
- p_axis_0 = [_weighted_percentile(x_2d[:, i], w1) for i in range(x_2d.shape[1])]
- assert_allclose(w_median, p_axis_0)
- # Check when array and sample_weight boht 2D
- w2 = rng.choice(5, size=10)
- w_2d = np.vstack((w1, w2)).T
- w_median = _weighted_percentile(x_2d, w_2d)
- p_axis_0 = [
- _weighted_percentile(x_2d[:, i], w_2d[:, i]) for i in range(x_2d.shape[1])
- ]
- assert_allclose(w_median, p_axis_0)
|