| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688 |
- import numpy as np
- import pytest
- import scipy.sparse as sp
- from sklearn.base import clone
- from sklearn.dummy import DummyClassifier, DummyRegressor
- from sklearn.exceptions import NotFittedError
- from sklearn.utils._testing import (
- assert_almost_equal,
- assert_array_almost_equal,
- assert_array_equal,
- ignore_warnings,
- )
- from sklearn.utils.stats import _weighted_percentile
- @ignore_warnings
- def _check_predict_proba(clf, X, y):
- proba = clf.predict_proba(X)
- # We know that we can have division by zero
- log_proba = clf.predict_log_proba(X)
- y = np.atleast_1d(y)
- if y.ndim == 1:
- y = np.reshape(y, (-1, 1))
- n_outputs = y.shape[1]
- n_samples = len(X)
- if n_outputs == 1:
- proba = [proba]
- log_proba = [log_proba]
- for k in range(n_outputs):
- assert proba[k].shape[0] == n_samples
- assert proba[k].shape[1] == len(np.unique(y[:, k]))
- assert_array_almost_equal(proba[k].sum(axis=1), np.ones(len(X)))
- # We know that we can have division by zero
- assert_array_almost_equal(np.log(proba[k]), log_proba[k])
- def _check_behavior_2d(clf):
- # 1d case
- X = np.array([[0], [0], [0], [0]]) # ignored
- y = np.array([1, 2, 1, 1])
- est = clone(clf)
- est.fit(X, y)
- y_pred = est.predict(X)
- assert y.shape == y_pred.shape
- # 2d case
- y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]])
- est = clone(clf)
- est.fit(X, y)
- y_pred = est.predict(X)
- assert y.shape == y_pred.shape
- def _check_behavior_2d_for_constant(clf):
- # 2d case only
- X = np.array([[0], [0], [0], [0]]) # ignored
- y = np.array([[1, 0, 5, 4, 3], [2, 0, 1, 2, 5], [1, 0, 4, 5, 2], [1, 3, 3, 2, 0]])
- est = clone(clf)
- est.fit(X, y)
- y_pred = est.predict(X)
- assert y.shape == y_pred.shape
- def _check_equality_regressor(statistic, y_learn, y_pred_learn, y_test, y_pred_test):
- assert_array_almost_equal(np.tile(statistic, (y_learn.shape[0], 1)), y_pred_learn)
- assert_array_almost_equal(np.tile(statistic, (y_test.shape[0], 1)), y_pred_test)
- def test_most_frequent_and_prior_strategy():
- X = [[0], [0], [0], [0]] # ignored
- y = [1, 2, 1, 1]
- for strategy in ("most_frequent", "prior"):
- clf = DummyClassifier(strategy=strategy, random_state=0)
- clf.fit(X, y)
- assert_array_equal(clf.predict(X), np.ones(len(X)))
- _check_predict_proba(clf, X, y)
- if strategy == "prior":
- assert_array_almost_equal(
- clf.predict_proba([X[0]]), clf.class_prior_.reshape((1, -1))
- )
- else:
- assert_array_almost_equal(
- clf.predict_proba([X[0]]), clf.class_prior_.reshape((1, -1)) > 0.5
- )
- def test_most_frequent_and_prior_strategy_with_2d_column_y():
- # non-regression test added in
- # https://github.com/scikit-learn/scikit-learn/pull/13545
- X = [[0], [0], [0], [0]]
- y_1d = [1, 2, 1, 1]
- y_2d = [[1], [2], [1], [1]]
- for strategy in ("most_frequent", "prior"):
- clf_1d = DummyClassifier(strategy=strategy, random_state=0)
- clf_2d = DummyClassifier(strategy=strategy, random_state=0)
- clf_1d.fit(X, y_1d)
- clf_2d.fit(X, y_2d)
- assert_array_equal(clf_1d.predict(X), clf_2d.predict(X))
- def test_most_frequent_and_prior_strategy_multioutput():
- X = [[0], [0], [0], [0]] # ignored
- y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]])
- n_samples = len(X)
- for strategy in ("prior", "most_frequent"):
- clf = DummyClassifier(strategy=strategy, random_state=0)
- clf.fit(X, y)
- assert_array_equal(
- clf.predict(X),
- np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]),
- )
- _check_predict_proba(clf, X, y)
- _check_behavior_2d(clf)
- def test_stratified_strategy(global_random_seed):
- X = [[0]] * 5 # ignored
- y = [1, 2, 1, 1, 2]
- clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
- clf.fit(X, y)
- X = [[0]] * 500
- y_pred = clf.predict(X)
- p = np.bincount(y_pred) / float(len(X))
- assert_almost_equal(p[1], 3.0 / 5, decimal=1)
- assert_almost_equal(p[2], 2.0 / 5, decimal=1)
- _check_predict_proba(clf, X, y)
- def test_stratified_strategy_multioutput(global_random_seed):
- X = [[0]] * 5 # ignored
- y = np.array([[2, 1], [2, 2], [1, 1], [1, 2], [1, 1]])
- clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
- clf.fit(X, y)
- X = [[0]] * 500
- y_pred = clf.predict(X)
- for k in range(y.shape[1]):
- p = np.bincount(y_pred[:, k]) / float(len(X))
- assert_almost_equal(p[1], 3.0 / 5, decimal=1)
- assert_almost_equal(p[2], 2.0 / 5, decimal=1)
- _check_predict_proba(clf, X, y)
- _check_behavior_2d(clf)
- def test_uniform_strategy(global_random_seed):
- X = [[0]] * 4 # ignored
- y = [1, 2, 1, 1]
- clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
- clf.fit(X, y)
- X = [[0]] * 500
- y_pred = clf.predict(X)
- p = np.bincount(y_pred) / float(len(X))
- assert_almost_equal(p[1], 0.5, decimal=1)
- assert_almost_equal(p[2], 0.5, decimal=1)
- _check_predict_proba(clf, X, y)
- def test_uniform_strategy_multioutput(global_random_seed):
- X = [[0]] * 4 # ignored
- y = np.array([[2, 1], [2, 2], [1, 2], [1, 1]])
- clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
- clf.fit(X, y)
- X = [[0]] * 500
- y_pred = clf.predict(X)
- for k in range(y.shape[1]):
- p = np.bincount(y_pred[:, k]) / float(len(X))
- assert_almost_equal(p[1], 0.5, decimal=1)
- assert_almost_equal(p[2], 0.5, decimal=1)
- _check_predict_proba(clf, X, y)
- _check_behavior_2d(clf)
- def test_string_labels():
- X = [[0]] * 5
- y = ["paris", "paris", "tokyo", "amsterdam", "berlin"]
- clf = DummyClassifier(strategy="most_frequent")
- clf.fit(X, y)
- assert_array_equal(clf.predict(X), ["paris"] * 5)
- @pytest.mark.parametrize(
- "y,y_test",
- [
- ([2, 1, 1, 1], [2, 2, 1, 1]),
- (
- np.array([[2, 2], [1, 1], [1, 1], [1, 1]]),
- np.array([[2, 2], [2, 2], [1, 1], [1, 1]]),
- ),
- ],
- )
- def test_classifier_score_with_None(y, y_test):
- clf = DummyClassifier(strategy="most_frequent")
- clf.fit(None, y)
- assert clf.score(None, y_test) == 0.5
- @pytest.mark.parametrize(
- "strategy", ["stratified", "most_frequent", "prior", "uniform", "constant"]
- )
- def test_classifier_prediction_independent_of_X(strategy, global_random_seed):
- y = [0, 2, 1, 1]
- X1 = [[0]] * 4
- clf1 = DummyClassifier(
- strategy=strategy, random_state=global_random_seed, constant=0
- )
- clf1.fit(X1, y)
- predictions1 = clf1.predict(X1)
- X2 = [[1]] * 4
- clf2 = DummyClassifier(
- strategy=strategy, random_state=global_random_seed, constant=0
- )
- clf2.fit(X2, y)
- predictions2 = clf2.predict(X2)
- assert_array_equal(predictions1, predictions2)
- def test_mean_strategy_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X = [[0]] * 4 # ignored
- y = random_state.randn(4)
- reg = DummyRegressor()
- reg.fit(X, y)
- assert_array_equal(reg.predict(X), [np.mean(y)] * len(X))
- def test_mean_strategy_multioutput_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X_learn = random_state.randn(10, 10)
- y_learn = random_state.randn(10, 5)
- mean = np.mean(y_learn, axis=0).reshape((1, -1))
- X_test = random_state.randn(20, 10)
- y_test = random_state.randn(20, 5)
- # Correctness oracle
- est = DummyRegressor()
- est.fit(X_learn, y_learn)
- y_pred_learn = est.predict(X_learn)
- y_pred_test = est.predict(X_test)
- _check_equality_regressor(mean, y_learn, y_pred_learn, y_test, y_pred_test)
- _check_behavior_2d(est)
- def test_regressor_exceptions():
- reg = DummyRegressor()
- with pytest.raises(NotFittedError):
- reg.predict([])
- def test_median_strategy_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X = [[0]] * 5 # ignored
- y = random_state.randn(5)
- reg = DummyRegressor(strategy="median")
- reg.fit(X, y)
- assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
- def test_median_strategy_multioutput_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X_learn = random_state.randn(10, 10)
- y_learn = random_state.randn(10, 5)
- median = np.median(y_learn, axis=0).reshape((1, -1))
- X_test = random_state.randn(20, 10)
- y_test = random_state.randn(20, 5)
- # Correctness oracle
- est = DummyRegressor(strategy="median")
- est.fit(X_learn, y_learn)
- y_pred_learn = est.predict(X_learn)
- y_pred_test = est.predict(X_test)
- _check_equality_regressor(median, y_learn, y_pred_learn, y_test, y_pred_test)
- _check_behavior_2d(est)
- def test_quantile_strategy_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X = [[0]] * 5 # ignored
- y = random_state.randn(5)
- reg = DummyRegressor(strategy="quantile", quantile=0.5)
- reg.fit(X, y)
- assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
- reg = DummyRegressor(strategy="quantile", quantile=0)
- reg.fit(X, y)
- assert_array_equal(reg.predict(X), [np.min(y)] * len(X))
- reg = DummyRegressor(strategy="quantile", quantile=1)
- reg.fit(X, y)
- assert_array_equal(reg.predict(X), [np.max(y)] * len(X))
- reg = DummyRegressor(strategy="quantile", quantile=0.3)
- reg.fit(X, y)
- assert_array_equal(reg.predict(X), [np.percentile(y, q=30)] * len(X))
- def test_quantile_strategy_multioutput_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X_learn = random_state.randn(10, 10)
- y_learn = random_state.randn(10, 5)
- median = np.median(y_learn, axis=0).reshape((1, -1))
- quantile_values = np.percentile(y_learn, axis=0, q=80).reshape((1, -1))
- X_test = random_state.randn(20, 10)
- y_test = random_state.randn(20, 5)
- # Correctness oracle
- est = DummyRegressor(strategy="quantile", quantile=0.5)
- est.fit(X_learn, y_learn)
- y_pred_learn = est.predict(X_learn)
- y_pred_test = est.predict(X_test)
- _check_equality_regressor(median, y_learn, y_pred_learn, y_test, y_pred_test)
- _check_behavior_2d(est)
- # Correctness oracle
- est = DummyRegressor(strategy="quantile", quantile=0.8)
- est.fit(X_learn, y_learn)
- y_pred_learn = est.predict(X_learn)
- y_pred_test = est.predict(X_test)
- _check_equality_regressor(
- quantile_values, y_learn, y_pred_learn, y_test, y_pred_test
- )
- _check_behavior_2d(est)
- def test_quantile_invalid():
- X = [[0]] * 5 # ignored
- y = [0] * 5 # ignored
- est = DummyRegressor(strategy="quantile", quantile=None)
- err_msg = (
- "When using `strategy='quantile', you have to specify the desired quantile"
- )
- with pytest.raises(ValueError, match=err_msg):
- est.fit(X, y)
- def test_quantile_strategy_empty_train():
- est = DummyRegressor(strategy="quantile", quantile=0.4)
- with pytest.raises(ValueError):
- est.fit([], [])
- def test_constant_strategy_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X = [[0]] * 5 # ignored
- y = random_state.randn(5)
- reg = DummyRegressor(strategy="constant", constant=[43])
- reg.fit(X, y)
- assert_array_equal(reg.predict(X), [43] * len(X))
- reg = DummyRegressor(strategy="constant", constant=43)
- reg.fit(X, y)
- assert_array_equal(reg.predict(X), [43] * len(X))
- # non-regression test for #22478
- assert not isinstance(reg.constant, np.ndarray)
- def test_constant_strategy_multioutput_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X_learn = random_state.randn(10, 10)
- y_learn = random_state.randn(10, 5)
- # test with 2d array
- constants = random_state.randn(5)
- X_test = random_state.randn(20, 10)
- y_test = random_state.randn(20, 5)
- # Correctness oracle
- est = DummyRegressor(strategy="constant", constant=constants)
- est.fit(X_learn, y_learn)
- y_pred_learn = est.predict(X_learn)
- y_pred_test = est.predict(X_test)
- _check_equality_regressor(constants, y_learn, y_pred_learn, y_test, y_pred_test)
- _check_behavior_2d_for_constant(est)
- def test_y_mean_attribute_regressor():
- X = [[0]] * 5
- y = [1, 2, 4, 6, 8]
- # when strategy = 'mean'
- est = DummyRegressor(strategy="mean")
- est.fit(X, y)
- assert est.constant_ == np.mean(y)
- def test_constants_not_specified_regressor():
- X = [[0]] * 5
- y = [1, 2, 4, 6, 8]
- est = DummyRegressor(strategy="constant")
- err_msg = "Constant target value has to be specified"
- with pytest.raises(TypeError, match=err_msg):
- est.fit(X, y)
- def test_constant_size_multioutput_regressor(global_random_seed):
- random_state = np.random.RandomState(seed=global_random_seed)
- X = random_state.randn(10, 10)
- y = random_state.randn(10, 5)
- est = DummyRegressor(strategy="constant", constant=[1, 2, 3, 4])
- err_msg = r"Constant target value should have shape \(5, 1\)."
- with pytest.raises(ValueError, match=err_msg):
- est.fit(X, y)
- def test_constant_strategy():
- X = [[0], [0], [0], [0]] # ignored
- y = [2, 1, 2, 2]
- clf = DummyClassifier(strategy="constant", random_state=0, constant=1)
- clf.fit(X, y)
- assert_array_equal(clf.predict(X), np.ones(len(X)))
- _check_predict_proba(clf, X, y)
- X = [[0], [0], [0], [0]] # ignored
- y = ["two", "one", "two", "two"]
- clf = DummyClassifier(strategy="constant", random_state=0, constant="one")
- clf.fit(X, y)
- assert_array_equal(clf.predict(X), np.array(["one"] * 4))
- _check_predict_proba(clf, X, y)
- def test_constant_strategy_multioutput():
- X = [[0], [0], [0], [0]] # ignored
- y = np.array([[2, 3], [1, 3], [2, 3], [2, 0]])
- n_samples = len(X)
- clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
- clf.fit(X, y)
- assert_array_equal(
- clf.predict(X), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
- )
- _check_predict_proba(clf, X, y)
- @pytest.mark.parametrize(
- "y, params, err_msg",
- [
- ([2, 1, 2, 2], {"random_state": 0}, "Constant.*has to be specified"),
- ([2, 1, 2, 2], {"constant": [2, 0]}, "Constant.*should have shape"),
- (
- np.transpose([[2, 1, 2, 2], [2, 1, 2, 2]]),
- {"constant": 2},
- "Constant.*should have shape",
- ),
- (
- [2, 1, 2, 2],
- {"constant": "my-constant"},
- "constant=my-constant.*Possible values.*\\[1, 2]",
- ),
- (
- np.transpose([[2, 1, 2, 2], [2, 1, 2, 2]]),
- {"constant": [2, "unknown"]},
- "constant=\\[2, 'unknown'].*Possible values.*\\[1, 2]",
- ),
- ],
- ids=[
- "no-constant",
- "too-many-constant",
- "not-enough-output",
- "single-output",
- "multi-output",
- ],
- )
- def test_constant_strategy_exceptions(y, params, err_msg):
- X = [[0], [0], [0], [0]]
- clf = DummyClassifier(strategy="constant", **params)
- with pytest.raises(ValueError, match=err_msg):
- clf.fit(X, y)
- def test_classification_sample_weight():
- X = [[0], [0], [1]]
- y = [0, 1, 0]
- sample_weight = [0.1, 1.0, 0.1]
- clf = DummyClassifier(strategy="stratified").fit(X, y, sample_weight)
- assert_array_almost_equal(clf.class_prior_, [0.2 / 1.2, 1.0 / 1.2])
- def test_constant_strategy_sparse_target():
- X = [[0]] * 5 # ignored
- y = sp.csc_matrix(np.array([[0, 1], [4, 0], [1, 1], [1, 4], [1, 1]]))
- n_samples = len(X)
- clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
- clf.fit(X, y)
- y_pred = clf.predict(X)
- assert sp.issparse(y_pred)
- assert_array_equal(
- y_pred.toarray(), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
- )
- def test_uniform_strategy_sparse_target_warning(global_random_seed):
- X = [[0]] * 5 # ignored
- y = sp.csc_matrix(np.array([[2, 1], [2, 2], [1, 4], [4, 2], [1, 1]]))
- clf = DummyClassifier(strategy="uniform", random_state=global_random_seed)
- with pytest.warns(UserWarning, match="the uniform strategy would not save memory"):
- clf.fit(X, y)
- X = [[0]] * 500
- y_pred = clf.predict(X)
- for k in range(y.shape[1]):
- p = np.bincount(y_pred[:, k]) / float(len(X))
- assert_almost_equal(p[1], 1 / 3, decimal=1)
- assert_almost_equal(p[2], 1 / 3, decimal=1)
- assert_almost_equal(p[4], 1 / 3, decimal=1)
- def test_stratified_strategy_sparse_target(global_random_seed):
- X = [[0]] * 5 # ignored
- y = sp.csc_matrix(np.array([[4, 1], [0, 0], [1, 1], [1, 4], [1, 1]]))
- clf = DummyClassifier(strategy="stratified", random_state=global_random_seed)
- clf.fit(X, y)
- X = [[0]] * 500
- y_pred = clf.predict(X)
- assert sp.issparse(y_pred)
- y_pred = y_pred.toarray()
- for k in range(y.shape[1]):
- p = np.bincount(y_pred[:, k]) / float(len(X))
- assert_almost_equal(p[1], 3.0 / 5, decimal=1)
- assert_almost_equal(p[0], 1.0 / 5, decimal=1)
- assert_almost_equal(p[4], 1.0 / 5, decimal=1)
- def test_most_frequent_and_prior_strategy_sparse_target():
- X = [[0]] * 5 # ignored
- y = sp.csc_matrix(np.array([[1, 0], [1, 3], [4, 0], [0, 1], [1, 0]]))
- n_samples = len(X)
- y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
- for strategy in ("most_frequent", "prior"):
- clf = DummyClassifier(strategy=strategy, random_state=0)
- clf.fit(X, y)
- y_pred = clf.predict(X)
- assert sp.issparse(y_pred)
- assert_array_equal(y_pred.toarray(), y_expected)
- def test_dummy_regressor_sample_weight(global_random_seed, n_samples=10):
- random_state = np.random.RandomState(seed=global_random_seed)
- X = [[0]] * n_samples
- y = random_state.rand(n_samples)
- sample_weight = random_state.rand(n_samples)
- est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
- assert est.constant_ == np.average(y, weights=sample_weight)
- est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
- assert est.constant_ == _weighted_percentile(y, sample_weight, 50.0)
- est = DummyRegressor(strategy="quantile", quantile=0.95).fit(X, y, sample_weight)
- assert est.constant_ == _weighted_percentile(y, sample_weight, 95.0)
- def test_dummy_regressor_on_3D_array():
- X = np.array([[["foo"]], [["bar"]], [["baz"]]])
- y = np.array([2, 2, 2])
- y_expected = np.array([2, 2, 2])
- cls = DummyRegressor()
- cls.fit(X, y)
- y_pred = cls.predict(X)
- assert_array_equal(y_pred, y_expected)
- def test_dummy_classifier_on_3D_array():
- X = np.array([[["foo"]], [["bar"]], [["baz"]]])
- y = [2, 2, 2]
- y_expected = [2, 2, 2]
- y_proba_expected = [[1], [1], [1]]
- cls = DummyClassifier(strategy="stratified")
- cls.fit(X, y)
- y_pred = cls.predict(X)
- y_pred_proba = cls.predict_proba(X)
- assert_array_equal(y_pred, y_expected)
- assert_array_equal(y_pred_proba, y_proba_expected)
- def test_dummy_regressor_return_std():
- X = [[0]] * 3 # ignored
- y = np.array([2, 2, 2])
- y_std_expected = np.array([0, 0, 0])
- cls = DummyRegressor()
- cls.fit(X, y)
- y_pred_list = cls.predict(X, return_std=True)
- # there should be two elements when return_std is True
- assert len(y_pred_list) == 2
- # the second element should be all zeros
- assert_array_equal(y_pred_list[1], y_std_expected)
- @pytest.mark.parametrize(
- "y,y_test",
- [
- ([1, 1, 1, 2], [1.25] * 4),
- (np.array([[2, 2], [1, 1], [1, 1], [1, 1]]), [[1.25, 1.25]] * 4),
- ],
- )
- def test_regressor_score_with_None(y, y_test):
- reg = DummyRegressor()
- reg.fit(None, y)
- assert reg.score(None, y_test) == 1.0
- @pytest.mark.parametrize("strategy", ["mean", "median", "quantile", "constant"])
- def test_regressor_prediction_independent_of_X(strategy):
- y = [0, 2, 1, 1]
- X1 = [[0]] * 4
- reg1 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
- reg1.fit(X1, y)
- predictions1 = reg1.predict(X1)
- X2 = [[1]] * 4
- reg2 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
- reg2.fit(X2, y)
- predictions2 = reg2.predict(X2)
- assert_array_equal(predictions1, predictions2)
- @pytest.mark.parametrize(
- "strategy", ["stratified", "most_frequent", "prior", "uniform", "constant"]
- )
- def test_dtype_of_classifier_probas(strategy):
- y = [0, 2, 1, 1]
- X = np.zeros(4)
- model = DummyClassifier(strategy=strategy, random_state=0, constant=0)
- probas = model.fit(X, y).predict_proba(X)
- assert probas.dtype == np.float64
|