_target.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. # Authors: Andreas Mueller <andreas.mueller@columbia.edu>
  2. # Guillaume Lemaitre <guillaume.lemaitre@inria.fr>
  3. # License: BSD 3 clause
  4. import warnings
  5. import numpy as np
  6. from ..base import BaseEstimator, RegressorMixin, _fit_context, clone
  7. from ..exceptions import NotFittedError
  8. from ..preprocessing import FunctionTransformer
  9. from ..utils import _safe_indexing, check_array
  10. from ..utils._param_validation import HasMethods
  11. from ..utils._tags import _safe_tags
  12. from ..utils.validation import check_is_fitted
  13. __all__ = ["TransformedTargetRegressor"]
  14. class TransformedTargetRegressor(RegressorMixin, BaseEstimator):
  15. """Meta-estimator to regress on a transformed target.
  16. Useful for applying a non-linear transformation to the target `y` in
  17. regression problems. This transformation can be given as a Transformer
  18. such as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a
  19. function and its inverse such as `np.log` and `np.exp`.
  20. The computation during :meth:`fit` is::
  21. regressor.fit(X, func(y))
  22. or::
  23. regressor.fit(X, transformer.transform(y))
  24. The computation during :meth:`predict` is::
  25. inverse_func(regressor.predict(X))
  26. or::
  27. transformer.inverse_transform(regressor.predict(X))
  28. Read more in the :ref:`User Guide <transformed_target_regressor>`.
  29. .. versionadded:: 0.20
  30. Parameters
  31. ----------
  32. regressor : object, default=None
  33. Regressor object such as derived from
  34. :class:`~sklearn.base.RegressorMixin`. This regressor will
  35. automatically be cloned each time prior to fitting. If `regressor is
  36. None`, :class:`~sklearn.linear_model.LinearRegression` is created and used.
  37. transformer : object, default=None
  38. Estimator object such as derived from
  39. :class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time
  40. as `func` and `inverse_func`. If `transformer is None` as well as
  41. `func` and `inverse_func`, the transformer will be an identity
  42. transformer. Note that the transformer will be cloned during fitting.
  43. Also, the transformer is restricting `y` to be a numpy array.
  44. func : function, default=None
  45. Function to apply to `y` before passing to :meth:`fit`. Cannot be set
  46. at the same time as `transformer`. The function needs to return a
  47. 2-dimensional array. If `func is None`, the function used will be the
  48. identity function.
  49. inverse_func : function, default=None
  50. Function to apply to the prediction of the regressor. Cannot be set at
  51. the same time as `transformer`. The function needs to return a
  52. 2-dimensional array. The inverse function is used to return
  53. predictions to the same space of the original training labels.
  54. check_inverse : bool, default=True
  55. Whether to check that `transform` followed by `inverse_transform`
  56. or `func` followed by `inverse_func` leads to the original targets.
  57. Attributes
  58. ----------
  59. regressor_ : object
  60. Fitted regressor.
  61. transformer_ : object
  62. Transformer used in :meth:`fit` and :meth:`predict`.
  63. n_features_in_ : int
  64. Number of features seen during :term:`fit`. Only defined if the
  65. underlying regressor exposes such an attribute when fit.
  66. .. versionadded:: 0.24
  67. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  68. Names of features seen during :term:`fit`. Defined only when `X`
  69. has feature names that are all strings.
  70. .. versionadded:: 1.0
  71. See Also
  72. --------
  73. sklearn.preprocessing.FunctionTransformer : Construct a transformer from an
  74. arbitrary callable.
  75. Notes
  76. -----
  77. Internally, the target `y` is always converted into a 2-dimensional array
  78. to be used by scikit-learn transformers. At the time of prediction, the
  79. output will be reshaped to a have the same number of dimensions as `y`.
  80. Examples
  81. --------
  82. >>> import numpy as np
  83. >>> from sklearn.linear_model import LinearRegression
  84. >>> from sklearn.compose import TransformedTargetRegressor
  85. >>> tt = TransformedTargetRegressor(regressor=LinearRegression(),
  86. ... func=np.log, inverse_func=np.exp)
  87. >>> X = np.arange(4).reshape(-1, 1)
  88. >>> y = np.exp(2 * X).ravel()
  89. >>> tt.fit(X, y)
  90. TransformedTargetRegressor(...)
  91. >>> tt.score(X, y)
  92. 1.0
  93. >>> tt.regressor_.coef_
  94. array([2.])
  95. For a more detailed example use case refer to
  96. :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py`.
  97. """
  98. _parameter_constraints: dict = {
  99. "regressor": [HasMethods(["fit", "predict"]), None],
  100. "transformer": [HasMethods("transform"), None],
  101. "func": [callable, None],
  102. "inverse_func": [callable, None],
  103. "check_inverse": ["boolean"],
  104. }
  105. def __init__(
  106. self,
  107. regressor=None,
  108. *,
  109. transformer=None,
  110. func=None,
  111. inverse_func=None,
  112. check_inverse=True,
  113. ):
  114. self.regressor = regressor
  115. self.transformer = transformer
  116. self.func = func
  117. self.inverse_func = inverse_func
  118. self.check_inverse = check_inverse
  119. def _fit_transformer(self, y):
  120. """Check transformer and fit transformer.
  121. Create the default transformer, fit it and make additional inverse
  122. check on a subset (optional).
  123. """
  124. if self.transformer is not None and (
  125. self.func is not None or self.inverse_func is not None
  126. ):
  127. raise ValueError(
  128. "'transformer' and functions 'func'/'inverse_func' cannot both be set."
  129. )
  130. elif self.transformer is not None:
  131. self.transformer_ = clone(self.transformer)
  132. else:
  133. if self.func is not None and self.inverse_func is None:
  134. raise ValueError(
  135. "When 'func' is provided, 'inverse_func' must also be provided"
  136. )
  137. self.transformer_ = FunctionTransformer(
  138. func=self.func,
  139. inverse_func=self.inverse_func,
  140. validate=True,
  141. check_inverse=self.check_inverse,
  142. )
  143. # XXX: sample_weight is not currently passed to the
  144. # transformer. However, if transformer starts using sample_weight, the
  145. # code should be modified accordingly. At the time to consider the
  146. # sample_prop feature, it is also a good use case to be considered.
  147. self.transformer_.fit(y)
  148. if self.check_inverse:
  149. idx_selected = slice(None, None, max(1, y.shape[0] // 10))
  150. y_sel = _safe_indexing(y, idx_selected)
  151. y_sel_t = self.transformer_.transform(y_sel)
  152. if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)):
  153. warnings.warn(
  154. (
  155. "The provided functions or transformer are"
  156. " not strictly inverse of each other. If"
  157. " you are sure you want to proceed regardless"
  158. ", set 'check_inverse=False'"
  159. ),
  160. UserWarning,
  161. )
  162. @_fit_context(
  163. # TransformedTargetRegressor.regressor/transformer are not validated yet.
  164. prefer_skip_nested_validation=False
  165. )
  166. def fit(self, X, y, **fit_params):
  167. """Fit the model according to the given training data.
  168. Parameters
  169. ----------
  170. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  171. Training vector, where `n_samples` is the number of samples and
  172. `n_features` is the number of features.
  173. y : array-like of shape (n_samples,)
  174. Target values.
  175. **fit_params : dict
  176. Parameters passed to the `fit` method of the underlying
  177. regressor.
  178. Returns
  179. -------
  180. self : object
  181. Fitted estimator.
  182. """
  183. if y is None:
  184. raise ValueError(
  185. f"This {self.__class__.__name__} estimator "
  186. "requires y to be passed, but the target y is None."
  187. )
  188. y = check_array(
  189. y,
  190. input_name="y",
  191. accept_sparse=False,
  192. force_all_finite=True,
  193. ensure_2d=False,
  194. dtype="numeric",
  195. allow_nd=True,
  196. )
  197. # store the number of dimension of the target to predict an array of
  198. # similar shape at predict
  199. self._training_dim = y.ndim
  200. # transformers are designed to modify X which is 2d dimensional, we
  201. # need to modify y accordingly.
  202. if y.ndim == 1:
  203. y_2d = y.reshape(-1, 1)
  204. else:
  205. y_2d = y
  206. self._fit_transformer(y_2d)
  207. # transform y and convert back to 1d array if needed
  208. y_trans = self.transformer_.transform(y_2d)
  209. # FIXME: a FunctionTransformer can return a 1D array even when validate
  210. # is set to True. Therefore, we need to check the number of dimension
  211. # first.
  212. if y_trans.ndim == 2 and y_trans.shape[1] == 1:
  213. y_trans = y_trans.squeeze(axis=1)
  214. if self.regressor is None:
  215. from ..linear_model import LinearRegression
  216. self.regressor_ = LinearRegression()
  217. else:
  218. self.regressor_ = clone(self.regressor)
  219. self.regressor_.fit(X, y_trans, **fit_params)
  220. if hasattr(self.regressor_, "feature_names_in_"):
  221. self.feature_names_in_ = self.regressor_.feature_names_in_
  222. return self
  223. def predict(self, X, **predict_params):
  224. """Predict using the base regressor, applying inverse.
  225. The regressor is used to predict and the `inverse_func` or
  226. `inverse_transform` is applied before returning the prediction.
  227. Parameters
  228. ----------
  229. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  230. Samples.
  231. **predict_params : dict of str -> object
  232. Parameters passed to the `predict` method of the underlying
  233. regressor.
  234. Returns
  235. -------
  236. y_hat : ndarray of shape (n_samples,)
  237. Predicted values.
  238. """
  239. check_is_fitted(self)
  240. pred = self.regressor_.predict(X, **predict_params)
  241. if pred.ndim == 1:
  242. pred_trans = self.transformer_.inverse_transform(pred.reshape(-1, 1))
  243. else:
  244. pred_trans = self.transformer_.inverse_transform(pred)
  245. if (
  246. self._training_dim == 1
  247. and pred_trans.ndim == 2
  248. and pred_trans.shape[1] == 1
  249. ):
  250. pred_trans = pred_trans.squeeze(axis=1)
  251. return pred_trans
  252. def _more_tags(self):
  253. regressor = self.regressor
  254. if regressor is None:
  255. from ..linear_model import LinearRegression
  256. regressor = LinearRegression()
  257. return {
  258. "poor_score": True,
  259. "multioutput": _safe_tags(regressor, key="multioutput"),
  260. }
  261. @property
  262. def n_features_in_(self):
  263. """Number of features seen during :term:`fit`."""
  264. # For consistency with other estimators we raise a AttributeError so
  265. # that hasattr() returns False the estimator isn't fitted.
  266. try:
  267. check_is_fitted(self)
  268. except NotFittedError as nfe:
  269. raise AttributeError(
  270. "{} object has no n_features_in_ attribute.".format(
  271. self.__class__.__name__
  272. )
  273. ) from nfe
  274. return self.regressor_.n_features_in_