| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- import numpy as np
- import pytest
- from sklearn import datasets
- from sklearn.base import BaseEstimator, TransformerMixin, clone
- from sklearn.compose import TransformedTargetRegressor
- from sklearn.dummy import DummyRegressor
- from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit
- from sklearn.pipeline import Pipeline
- from sklearn.preprocessing import FunctionTransformer, StandardScaler
- from sklearn.utils._testing import assert_allclose, assert_no_warnings
- friedman = datasets.make_friedman1(random_state=0)
- def test_transform_target_regressor_error():
- X, y = friedman
- # provide a transformer and functions at the same time
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(),
- transformer=StandardScaler(),
- func=np.exp,
- inverse_func=np.log,
- )
- with pytest.raises(
- ValueError,
- match="'transformer' and functions 'func'/'inverse_func' cannot both be set.",
- ):
- regr.fit(X, y)
- # fit with sample_weight with a regressor which does not support it
- sample_weight = np.ones((y.shape[0],))
- regr = TransformedTargetRegressor(
- regressor=OrthogonalMatchingPursuit(), transformer=StandardScaler()
- )
- with pytest.raises(
- TypeError,
- match=r"fit\(\) got an unexpected " "keyword argument 'sample_weight'",
- ):
- regr.fit(X, y, sample_weight=sample_weight)
- # func is given but inverse_func is not
- regr = TransformedTargetRegressor(func=np.exp)
- with pytest.raises(
- ValueError,
- match="When 'func' is provided, 'inverse_func' must also be provided",
- ):
- regr.fit(X, y)
- def test_transform_target_regressor_invertible():
- X, y = friedman
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(),
- func=np.sqrt,
- inverse_func=np.log,
- check_inverse=True,
- )
- with pytest.warns(
- UserWarning,
- match=(
- "The provided functions or"
- " transformer are not strictly inverse of each other."
- ),
- ):
- regr.fit(X, y)
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log
- )
- regr.set_params(check_inverse=False)
- assert_no_warnings(regr.fit, X, y)
- def _check_standard_scaled(y, y_pred):
- y_mean = np.mean(y, axis=0)
- y_std = np.std(y, axis=0)
- assert_allclose((y - y_mean) / y_std, y_pred)
- def _check_shifted_by_one(y, y_pred):
- assert_allclose(y + 1, y_pred)
- def test_transform_target_regressor_functions():
- X, y = friedman
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(), func=np.log, inverse_func=np.exp
- )
- y_pred = regr.fit(X, y).predict(X)
- # check the transformer output
- y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
- assert_allclose(np.log(y), y_tran)
- assert_allclose(
- y, regr.transformer_.inverse_transform(y_tran.reshape(-1, 1)).squeeze()
- )
- assert y.shape == y_pred.shape
- assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
- # check the regressor output
- lr = LinearRegression().fit(X, regr.func(y))
- assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
- def test_transform_target_regressor_functions_multioutput():
- X = friedman[0]
- y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(), func=np.log, inverse_func=np.exp
- )
- y_pred = regr.fit(X, y).predict(X)
- # check the transformer output
- y_tran = regr.transformer_.transform(y)
- assert_allclose(np.log(y), y_tran)
- assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
- assert y.shape == y_pred.shape
- assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
- # check the regressor output
- lr = LinearRegression().fit(X, regr.func(y))
- assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
- @pytest.mark.parametrize(
- "X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)]
- )
- def test_transform_target_regressor_1d_transformer(X, y):
- # All transformer in scikit-learn expect 2D data. FunctionTransformer with
- # validate=False lift this constraint without checking that the input is a
- # 2D vector. We check the consistency of the data shape using a 1D and 2D y
- # array.
- transformer = FunctionTransformer(
- func=lambda x: x + 1, inverse_func=lambda x: x - 1
- )
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(), transformer=transformer
- )
- y_pred = regr.fit(X, y).predict(X)
- assert y.shape == y_pred.shape
- # consistency forward transform
- y_tran = regr.transformer_.transform(y)
- _check_shifted_by_one(y, y_tran)
- assert y.shape == y_pred.shape
- # consistency inverse transform
- assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
- # consistency of the regressor
- lr = LinearRegression()
- transformer2 = clone(transformer)
- lr.fit(X, transformer2.fit_transform(y))
- y_lr_pred = lr.predict(X)
- assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
- assert_allclose(regr.regressor_.coef_, lr.coef_)
- @pytest.mark.parametrize(
- "X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)]
- )
- def test_transform_target_regressor_2d_transformer(X, y):
- # Check consistency with transformer accepting only 2D array and a 1D/2D y
- # array.
- transformer = StandardScaler()
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(), transformer=transformer
- )
- y_pred = regr.fit(X, y).predict(X)
- assert y.shape == y_pred.shape
- # consistency forward transform
- if y.ndim == 1: # create a 2D array and squeeze results
- y_tran = regr.transformer_.transform(y.reshape(-1, 1))
- else:
- y_tran = regr.transformer_.transform(y)
- _check_standard_scaled(y, y_tran.squeeze())
- assert y.shape == y_pred.shape
- # consistency inverse transform
- assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
- # consistency of the regressor
- lr = LinearRegression()
- transformer2 = clone(transformer)
- if y.ndim == 1: # create a 2D array and squeeze results
- lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze())
- y_lr_pred = lr.predict(X).reshape(-1, 1)
- y_pred2 = transformer2.inverse_transform(y_lr_pred).squeeze()
- else:
- lr.fit(X, transformer2.fit_transform(y))
- y_lr_pred = lr.predict(X)
- y_pred2 = transformer2.inverse_transform(y_lr_pred)
- assert_allclose(y_pred, y_pred2)
- assert_allclose(regr.regressor_.coef_, lr.coef_)
- def test_transform_target_regressor_2d_transformer_multioutput():
- # Check consistency with transformer accepting only 2D array and a 2D y
- # array.
- X = friedman[0]
- y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
- transformer = StandardScaler()
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(), transformer=transformer
- )
- y_pred = regr.fit(X, y).predict(X)
- assert y.shape == y_pred.shape
- # consistency forward transform
- y_tran = regr.transformer_.transform(y)
- _check_standard_scaled(y, y_tran)
- assert y.shape == y_pred.shape
- # consistency inverse transform
- assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
- # consistency of the regressor
- lr = LinearRegression()
- transformer2 = clone(transformer)
- lr.fit(X, transformer2.fit_transform(y))
- y_lr_pred = lr.predict(X)
- assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
- assert_allclose(regr.regressor_.coef_, lr.coef_)
- def test_transform_target_regressor_3d_target():
- # Non-regression test for:
- # https://github.com/scikit-learn/scikit-learn/issues/18866
- # Check with a 3D target with a transformer that reshapes the target
- X = friedman[0]
- y = np.tile(friedman[1].reshape(-1, 1, 1), [1, 3, 2])
- def flatten_data(data):
- return data.reshape(data.shape[0], -1)
- def unflatten_data(data):
- return data.reshape(data.shape[0], -1, 2)
- transformer = FunctionTransformer(func=flatten_data, inverse_func=unflatten_data)
- regr = TransformedTargetRegressor(
- regressor=LinearRegression(), transformer=transformer
- )
- y_pred = regr.fit(X, y).predict(X)
- assert y.shape == y_pred.shape
- def test_transform_target_regressor_multi_to_single():
- X = friedman[0]
- y = np.transpose([friedman[1], (friedman[1] ** 2 + 1)])
- def func(y):
- out = np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
- return out[:, np.newaxis]
- def inverse_func(y):
- return y
- tt = TransformedTargetRegressor(
- func=func, inverse_func=inverse_func, check_inverse=False
- )
- tt.fit(X, y)
- y_pred_2d_func = tt.predict(X)
- assert y_pred_2d_func.shape == (100, 1)
- # force that the function only return a 1D array
- def func(y):
- return np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
- tt = TransformedTargetRegressor(
- func=func, inverse_func=inverse_func, check_inverse=False
- )
- tt.fit(X, y)
- y_pred_1d_func = tt.predict(X)
- assert y_pred_1d_func.shape == (100, 1)
- assert_allclose(y_pred_1d_func, y_pred_2d_func)
- class DummyCheckerArrayTransformer(TransformerMixin, BaseEstimator):
- def fit(self, X, y=None):
- assert isinstance(X, np.ndarray)
- return self
- def transform(self, X):
- assert isinstance(X, np.ndarray)
- return X
- def inverse_transform(self, X):
- assert isinstance(X, np.ndarray)
- return X
- class DummyCheckerListRegressor(DummyRegressor):
- def fit(self, X, y, sample_weight=None):
- assert isinstance(X, list)
- return super().fit(X, y, sample_weight)
- def predict(self, X):
- assert isinstance(X, list)
- return super().predict(X)
- def test_transform_target_regressor_ensure_y_array():
- # check that the target ``y`` passed to the transformer will always be a
- # numpy array. Similarly, if ``X`` is passed as a list, we check that the
- # predictor receive as it is.
- X, y = friedman
- tt = TransformedTargetRegressor(
- transformer=DummyCheckerArrayTransformer(),
- regressor=DummyCheckerListRegressor(),
- check_inverse=False,
- )
- tt.fit(X.tolist(), y.tolist())
- tt.predict(X.tolist())
- with pytest.raises(AssertionError):
- tt.fit(X, y.tolist())
- with pytest.raises(AssertionError):
- tt.predict(X)
- class DummyTransformer(TransformerMixin, BaseEstimator):
- """Dummy transformer which count how many time fit was called."""
- def __init__(self, fit_counter=0):
- self.fit_counter = fit_counter
- def fit(self, X, y=None):
- self.fit_counter += 1
- return self
- def transform(self, X):
- return X
- def inverse_transform(self, X):
- return X
- @pytest.mark.parametrize("check_inverse", [False, True])
- def test_transform_target_regressor_count_fit(check_inverse):
- # regression test for gh-issue #11618
- # check that we only call a single time fit for the transformer
- X, y = friedman
- ttr = TransformedTargetRegressor(
- transformer=DummyTransformer(), check_inverse=check_inverse
- )
- ttr.fit(X, y)
- assert ttr.transformer_.fit_counter == 1
- class DummyRegressorWithExtraFitParams(DummyRegressor):
- def fit(self, X, y, sample_weight=None, check_input=True):
- # on the test below we force this to false, we make sure this is
- # actually passed to the regressor
- assert not check_input
- return super().fit(X, y, sample_weight)
- def test_transform_target_regressor_pass_fit_parameters():
- X, y = friedman
- regr = TransformedTargetRegressor(
- regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer()
- )
- regr.fit(X, y, check_input=False)
- assert regr.transformer_.fit_counter == 1
- def test_transform_target_regressor_route_pipeline():
- X, y = friedman
- regr = TransformedTargetRegressor(
- regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer()
- )
- estimators = [("normalize", StandardScaler()), ("est", regr)]
- pip = Pipeline(estimators)
- pip.fit(X, y, **{"est__check_input": False})
- assert regr.transformer_.fit_counter == 1
- class DummyRegressorWithExtraPredictParams(DummyRegressor):
- def predict(self, X, check_input=True):
- # In the test below we make sure that the check input parameter is
- # passed as false
- self.predict_called = True
- assert not check_input
- return super().predict(X)
- def test_transform_target_regressor_pass_extra_predict_parameters():
- # Checks that predict kwargs are passed to regressor.
- X, y = friedman
- regr = TransformedTargetRegressor(
- regressor=DummyRegressorWithExtraPredictParams(), transformer=DummyTransformer()
- )
- regr.fit(X, y)
- regr.predict(X, check_input=False)
- assert regr.regressor_.predict_called
|