| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- # Authors: Andreas Mueller <andreas.mueller@columbia.edu>
- # Guillaume Lemaitre <guillaume.lemaitre@inria.fr>
- # License: BSD 3 clause
- import warnings
- import numpy as np
- from ..base import BaseEstimator, RegressorMixin, _fit_context, clone
- from ..exceptions import NotFittedError
- from ..preprocessing import FunctionTransformer
- from ..utils import _safe_indexing, check_array
- from ..utils._param_validation import HasMethods
- from ..utils._tags import _safe_tags
- from ..utils.validation import check_is_fitted
- __all__ = ["TransformedTargetRegressor"]
- class TransformedTargetRegressor(RegressorMixin, BaseEstimator):
- """Meta-estimator to regress on a transformed target.
- Useful for applying a non-linear transformation to the target `y` in
- regression problems. This transformation can be given as a Transformer
- such as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a
- function and its inverse such as `np.log` and `np.exp`.
- The computation during :meth:`fit` is::
- regressor.fit(X, func(y))
- or::
- regressor.fit(X, transformer.transform(y))
- The computation during :meth:`predict` is::
- inverse_func(regressor.predict(X))
- or::
- transformer.inverse_transform(regressor.predict(X))
- Read more in the :ref:`User Guide <transformed_target_regressor>`.
- .. versionadded:: 0.20
- Parameters
- ----------
- regressor : object, default=None
- Regressor object such as derived from
- :class:`~sklearn.base.RegressorMixin`. This regressor will
- automatically be cloned each time prior to fitting. If `regressor is
- None`, :class:`~sklearn.linear_model.LinearRegression` is created and used.
- transformer : object, default=None
- Estimator object such as derived from
- :class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time
- as `func` and `inverse_func`. If `transformer is None` as well as
- `func` and `inverse_func`, the transformer will be an identity
- transformer. Note that the transformer will be cloned during fitting.
- Also, the transformer is restricting `y` to be a numpy array.
- func : function, default=None
- Function to apply to `y` before passing to :meth:`fit`. Cannot be set
- at the same time as `transformer`. The function needs to return a
- 2-dimensional array. If `func is None`, the function used will be the
- identity function.
- inverse_func : function, default=None
- Function to apply to the prediction of the regressor. Cannot be set at
- the same time as `transformer`. The function needs to return a
- 2-dimensional array. The inverse function is used to return
- predictions to the same space of the original training labels.
- check_inverse : bool, default=True
- Whether to check that `transform` followed by `inverse_transform`
- or `func` followed by `inverse_func` leads to the original targets.
- Attributes
- ----------
- regressor_ : object
- Fitted regressor.
- transformer_ : object
- Transformer used in :meth:`fit` and :meth:`predict`.
- n_features_in_ : int
- Number of features seen during :term:`fit`. Only defined if the
- underlying regressor exposes such an attribute when fit.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- sklearn.preprocessing.FunctionTransformer : Construct a transformer from an
- arbitrary callable.
- Notes
- -----
- Internally, the target `y` is always converted into a 2-dimensional array
- to be used by scikit-learn transformers. At the time of prediction, the
- output will be reshaped to a have the same number of dimensions as `y`.
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.linear_model import LinearRegression
- >>> from sklearn.compose import TransformedTargetRegressor
- >>> tt = TransformedTargetRegressor(regressor=LinearRegression(),
- ... func=np.log, inverse_func=np.exp)
- >>> X = np.arange(4).reshape(-1, 1)
- >>> y = np.exp(2 * X).ravel()
- >>> tt.fit(X, y)
- TransformedTargetRegressor(...)
- >>> tt.score(X, y)
- 1.0
- >>> tt.regressor_.coef_
- array([2.])
- For a more detailed example use case refer to
- :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py`.
- """
- _parameter_constraints: dict = {
- "regressor": [HasMethods(["fit", "predict"]), None],
- "transformer": [HasMethods("transform"), None],
- "func": [callable, None],
- "inverse_func": [callable, None],
- "check_inverse": ["boolean"],
- }
- def __init__(
- self,
- regressor=None,
- *,
- transformer=None,
- func=None,
- inverse_func=None,
- check_inverse=True,
- ):
- self.regressor = regressor
- self.transformer = transformer
- self.func = func
- self.inverse_func = inverse_func
- self.check_inverse = check_inverse
- def _fit_transformer(self, y):
- """Check transformer and fit transformer.
- Create the default transformer, fit it and make additional inverse
- check on a subset (optional).
- """
- if self.transformer is not None and (
- self.func is not None or self.inverse_func is not None
- ):
- raise ValueError(
- "'transformer' and functions 'func'/'inverse_func' cannot both be set."
- )
- elif self.transformer is not None:
- self.transformer_ = clone(self.transformer)
- else:
- if self.func is not None and self.inverse_func is None:
- raise ValueError(
- "When 'func' is provided, 'inverse_func' must also be provided"
- )
- self.transformer_ = FunctionTransformer(
- func=self.func,
- inverse_func=self.inverse_func,
- validate=True,
- check_inverse=self.check_inverse,
- )
- # XXX: sample_weight is not currently passed to the
- # transformer. However, if transformer starts using sample_weight, the
- # code should be modified accordingly. At the time to consider the
- # sample_prop feature, it is also a good use case to be considered.
- self.transformer_.fit(y)
- if self.check_inverse:
- idx_selected = slice(None, None, max(1, y.shape[0] // 10))
- y_sel = _safe_indexing(y, idx_selected)
- y_sel_t = self.transformer_.transform(y_sel)
- if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)):
- warnings.warn(
- (
- "The provided functions or transformer are"
- " not strictly inverse of each other. If"
- " you are sure you want to proceed regardless"
- ", set 'check_inverse=False'"
- ),
- UserWarning,
- )
- @_fit_context(
- # TransformedTargetRegressor.regressor/transformer are not validated yet.
- prefer_skip_nested_validation=False
- )
- def fit(self, X, y, **fit_params):
- """Fit the model according to the given training data.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Training vector, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- y : array-like of shape (n_samples,)
- Target values.
- **fit_params : dict
- Parameters passed to the `fit` method of the underlying
- regressor.
- Returns
- -------
- self : object
- Fitted estimator.
- """
- if y is None:
- raise ValueError(
- f"This {self.__class__.__name__} estimator "
- "requires y to be passed, but the target y is None."
- )
- y = check_array(
- y,
- input_name="y",
- accept_sparse=False,
- force_all_finite=True,
- ensure_2d=False,
- dtype="numeric",
- allow_nd=True,
- )
- # store the number of dimension of the target to predict an array of
- # similar shape at predict
- self._training_dim = y.ndim
- # transformers are designed to modify X which is 2d dimensional, we
- # need to modify y accordingly.
- if y.ndim == 1:
- y_2d = y.reshape(-1, 1)
- else:
- y_2d = y
- self._fit_transformer(y_2d)
- # transform y and convert back to 1d array if needed
- y_trans = self.transformer_.transform(y_2d)
- # FIXME: a FunctionTransformer can return a 1D array even when validate
- # is set to True. Therefore, we need to check the number of dimension
- # first.
- if y_trans.ndim == 2 and y_trans.shape[1] == 1:
- y_trans = y_trans.squeeze(axis=1)
- if self.regressor is None:
- from ..linear_model import LinearRegression
- self.regressor_ = LinearRegression()
- else:
- self.regressor_ = clone(self.regressor)
- self.regressor_.fit(X, y_trans, **fit_params)
- if hasattr(self.regressor_, "feature_names_in_"):
- self.feature_names_in_ = self.regressor_.feature_names_in_
- return self
- def predict(self, X, **predict_params):
- """Predict using the base regressor, applying inverse.
- The regressor is used to predict and the `inverse_func` or
- `inverse_transform` is applied before returning the prediction.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Samples.
- **predict_params : dict of str -> object
- Parameters passed to the `predict` method of the underlying
- regressor.
- Returns
- -------
- y_hat : ndarray of shape (n_samples,)
- Predicted values.
- """
- check_is_fitted(self)
- pred = self.regressor_.predict(X, **predict_params)
- if pred.ndim == 1:
- pred_trans = self.transformer_.inverse_transform(pred.reshape(-1, 1))
- else:
- pred_trans = self.transformer_.inverse_transform(pred)
- if (
- self._training_dim == 1
- and pred_trans.ndim == 2
- and pred_trans.shape[1] == 1
- ):
- pred_trans = pred_trans.squeeze(axis=1)
- return pred_trans
- def _more_tags(self):
- regressor = self.regressor
- if regressor is None:
- from ..linear_model import LinearRegression
- regressor = LinearRegression()
- return {
- "poor_score": True,
- "multioutput": _safe_tags(regressor, key="multioutput"),
- }
- @property
- def n_features_in_(self):
- """Number of features seen during :term:`fit`."""
- # For consistency with other estimators we raise a AttributeError so
- # that hasattr() returns False the estimator isn't fitted.
- try:
- check_is_fitted(self)
- except NotFittedError as nfe:
- raise AttributeError(
- "{} object has no n_features_in_ attribute.".format(
- self.__class__.__name__
- )
- ) from nfe
- return self.regressor_.n_features_in_
|