test_target.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. import numpy as np
  2. import pytest
  3. from sklearn import datasets
  4. from sklearn.base import BaseEstimator, TransformerMixin, clone
  5. from sklearn.compose import TransformedTargetRegressor
  6. from sklearn.dummy import DummyRegressor
  7. from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit
  8. from sklearn.pipeline import Pipeline
  9. from sklearn.preprocessing import FunctionTransformer, StandardScaler
  10. from sklearn.utils._testing import assert_allclose, assert_no_warnings
  11. friedman = datasets.make_friedman1(random_state=0)
  12. def test_transform_target_regressor_error():
  13. X, y = friedman
  14. # provide a transformer and functions at the same time
  15. regr = TransformedTargetRegressor(
  16. regressor=LinearRegression(),
  17. transformer=StandardScaler(),
  18. func=np.exp,
  19. inverse_func=np.log,
  20. )
  21. with pytest.raises(
  22. ValueError,
  23. match="'transformer' and functions 'func'/'inverse_func' cannot both be set.",
  24. ):
  25. regr.fit(X, y)
  26. # fit with sample_weight with a regressor which does not support it
  27. sample_weight = np.ones((y.shape[0],))
  28. regr = TransformedTargetRegressor(
  29. regressor=OrthogonalMatchingPursuit(), transformer=StandardScaler()
  30. )
  31. with pytest.raises(
  32. TypeError,
  33. match=r"fit\(\) got an unexpected " "keyword argument 'sample_weight'",
  34. ):
  35. regr.fit(X, y, sample_weight=sample_weight)
  36. # func is given but inverse_func is not
  37. regr = TransformedTargetRegressor(func=np.exp)
  38. with pytest.raises(
  39. ValueError,
  40. match="When 'func' is provided, 'inverse_func' must also be provided",
  41. ):
  42. regr.fit(X, y)
  43. def test_transform_target_regressor_invertible():
  44. X, y = friedman
  45. regr = TransformedTargetRegressor(
  46. regressor=LinearRegression(),
  47. func=np.sqrt,
  48. inverse_func=np.log,
  49. check_inverse=True,
  50. )
  51. with pytest.warns(
  52. UserWarning,
  53. match=(
  54. "The provided functions or"
  55. " transformer are not strictly inverse of each other."
  56. ),
  57. ):
  58. regr.fit(X, y)
  59. regr = TransformedTargetRegressor(
  60. regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log
  61. )
  62. regr.set_params(check_inverse=False)
  63. assert_no_warnings(regr.fit, X, y)
  64. def _check_standard_scaled(y, y_pred):
  65. y_mean = np.mean(y, axis=0)
  66. y_std = np.std(y, axis=0)
  67. assert_allclose((y - y_mean) / y_std, y_pred)
  68. def _check_shifted_by_one(y, y_pred):
  69. assert_allclose(y + 1, y_pred)
  70. def test_transform_target_regressor_functions():
  71. X, y = friedman
  72. regr = TransformedTargetRegressor(
  73. regressor=LinearRegression(), func=np.log, inverse_func=np.exp
  74. )
  75. y_pred = regr.fit(X, y).predict(X)
  76. # check the transformer output
  77. y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
  78. assert_allclose(np.log(y), y_tran)
  79. assert_allclose(
  80. y, regr.transformer_.inverse_transform(y_tran.reshape(-1, 1)).squeeze()
  81. )
  82. assert y.shape == y_pred.shape
  83. assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
  84. # check the regressor output
  85. lr = LinearRegression().fit(X, regr.func(y))
  86. assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
  87. def test_transform_target_regressor_functions_multioutput():
  88. X = friedman[0]
  89. y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
  90. regr = TransformedTargetRegressor(
  91. regressor=LinearRegression(), func=np.log, inverse_func=np.exp
  92. )
  93. y_pred = regr.fit(X, y).predict(X)
  94. # check the transformer output
  95. y_tran = regr.transformer_.transform(y)
  96. assert_allclose(np.log(y), y_tran)
  97. assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
  98. assert y.shape == y_pred.shape
  99. assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
  100. # check the regressor output
  101. lr = LinearRegression().fit(X, regr.func(y))
  102. assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
  103. @pytest.mark.parametrize(
  104. "X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)]
  105. )
  106. def test_transform_target_regressor_1d_transformer(X, y):
  107. # All transformer in scikit-learn expect 2D data. FunctionTransformer with
  108. # validate=False lift this constraint without checking that the input is a
  109. # 2D vector. We check the consistency of the data shape using a 1D and 2D y
  110. # array.
  111. transformer = FunctionTransformer(
  112. func=lambda x: x + 1, inverse_func=lambda x: x - 1
  113. )
  114. regr = TransformedTargetRegressor(
  115. regressor=LinearRegression(), transformer=transformer
  116. )
  117. y_pred = regr.fit(X, y).predict(X)
  118. assert y.shape == y_pred.shape
  119. # consistency forward transform
  120. y_tran = regr.transformer_.transform(y)
  121. _check_shifted_by_one(y, y_tran)
  122. assert y.shape == y_pred.shape
  123. # consistency inverse transform
  124. assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
  125. # consistency of the regressor
  126. lr = LinearRegression()
  127. transformer2 = clone(transformer)
  128. lr.fit(X, transformer2.fit_transform(y))
  129. y_lr_pred = lr.predict(X)
  130. assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
  131. assert_allclose(regr.regressor_.coef_, lr.coef_)
  132. @pytest.mark.parametrize(
  133. "X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)]
  134. )
  135. def test_transform_target_regressor_2d_transformer(X, y):
  136. # Check consistency with transformer accepting only 2D array and a 1D/2D y
  137. # array.
  138. transformer = StandardScaler()
  139. regr = TransformedTargetRegressor(
  140. regressor=LinearRegression(), transformer=transformer
  141. )
  142. y_pred = regr.fit(X, y).predict(X)
  143. assert y.shape == y_pred.shape
  144. # consistency forward transform
  145. if y.ndim == 1: # create a 2D array and squeeze results
  146. y_tran = regr.transformer_.transform(y.reshape(-1, 1))
  147. else:
  148. y_tran = regr.transformer_.transform(y)
  149. _check_standard_scaled(y, y_tran.squeeze())
  150. assert y.shape == y_pred.shape
  151. # consistency inverse transform
  152. assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
  153. # consistency of the regressor
  154. lr = LinearRegression()
  155. transformer2 = clone(transformer)
  156. if y.ndim == 1: # create a 2D array and squeeze results
  157. lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze())
  158. y_lr_pred = lr.predict(X).reshape(-1, 1)
  159. y_pred2 = transformer2.inverse_transform(y_lr_pred).squeeze()
  160. else:
  161. lr.fit(X, transformer2.fit_transform(y))
  162. y_lr_pred = lr.predict(X)
  163. y_pred2 = transformer2.inverse_transform(y_lr_pred)
  164. assert_allclose(y_pred, y_pred2)
  165. assert_allclose(regr.regressor_.coef_, lr.coef_)
  166. def test_transform_target_regressor_2d_transformer_multioutput():
  167. # Check consistency with transformer accepting only 2D array and a 2D y
  168. # array.
  169. X = friedman[0]
  170. y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
  171. transformer = StandardScaler()
  172. regr = TransformedTargetRegressor(
  173. regressor=LinearRegression(), transformer=transformer
  174. )
  175. y_pred = regr.fit(X, y).predict(X)
  176. assert y.shape == y_pred.shape
  177. # consistency forward transform
  178. y_tran = regr.transformer_.transform(y)
  179. _check_standard_scaled(y, y_tran)
  180. assert y.shape == y_pred.shape
  181. # consistency inverse transform
  182. assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
  183. # consistency of the regressor
  184. lr = LinearRegression()
  185. transformer2 = clone(transformer)
  186. lr.fit(X, transformer2.fit_transform(y))
  187. y_lr_pred = lr.predict(X)
  188. assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
  189. assert_allclose(regr.regressor_.coef_, lr.coef_)
  190. def test_transform_target_regressor_3d_target():
  191. # Non-regression test for:
  192. # https://github.com/scikit-learn/scikit-learn/issues/18866
  193. # Check with a 3D target with a transformer that reshapes the target
  194. X = friedman[0]
  195. y = np.tile(friedman[1].reshape(-1, 1, 1), [1, 3, 2])
  196. def flatten_data(data):
  197. return data.reshape(data.shape[0], -1)
  198. def unflatten_data(data):
  199. return data.reshape(data.shape[0], -1, 2)
  200. transformer = FunctionTransformer(func=flatten_data, inverse_func=unflatten_data)
  201. regr = TransformedTargetRegressor(
  202. regressor=LinearRegression(), transformer=transformer
  203. )
  204. y_pred = regr.fit(X, y).predict(X)
  205. assert y.shape == y_pred.shape
  206. def test_transform_target_regressor_multi_to_single():
  207. X = friedman[0]
  208. y = np.transpose([friedman[1], (friedman[1] ** 2 + 1)])
  209. def func(y):
  210. out = np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
  211. return out[:, np.newaxis]
  212. def inverse_func(y):
  213. return y
  214. tt = TransformedTargetRegressor(
  215. func=func, inverse_func=inverse_func, check_inverse=False
  216. )
  217. tt.fit(X, y)
  218. y_pred_2d_func = tt.predict(X)
  219. assert y_pred_2d_func.shape == (100, 1)
  220. # force that the function only return a 1D array
  221. def func(y):
  222. return np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
  223. tt = TransformedTargetRegressor(
  224. func=func, inverse_func=inverse_func, check_inverse=False
  225. )
  226. tt.fit(X, y)
  227. y_pred_1d_func = tt.predict(X)
  228. assert y_pred_1d_func.shape == (100, 1)
  229. assert_allclose(y_pred_1d_func, y_pred_2d_func)
  230. class DummyCheckerArrayTransformer(TransformerMixin, BaseEstimator):
  231. def fit(self, X, y=None):
  232. assert isinstance(X, np.ndarray)
  233. return self
  234. def transform(self, X):
  235. assert isinstance(X, np.ndarray)
  236. return X
  237. def inverse_transform(self, X):
  238. assert isinstance(X, np.ndarray)
  239. return X
  240. class DummyCheckerListRegressor(DummyRegressor):
  241. def fit(self, X, y, sample_weight=None):
  242. assert isinstance(X, list)
  243. return super().fit(X, y, sample_weight)
  244. def predict(self, X):
  245. assert isinstance(X, list)
  246. return super().predict(X)
  247. def test_transform_target_regressor_ensure_y_array():
  248. # check that the target ``y`` passed to the transformer will always be a
  249. # numpy array. Similarly, if ``X`` is passed as a list, we check that the
  250. # predictor receive as it is.
  251. X, y = friedman
  252. tt = TransformedTargetRegressor(
  253. transformer=DummyCheckerArrayTransformer(),
  254. regressor=DummyCheckerListRegressor(),
  255. check_inverse=False,
  256. )
  257. tt.fit(X.tolist(), y.tolist())
  258. tt.predict(X.tolist())
  259. with pytest.raises(AssertionError):
  260. tt.fit(X, y.tolist())
  261. with pytest.raises(AssertionError):
  262. tt.predict(X)
  263. class DummyTransformer(TransformerMixin, BaseEstimator):
  264. """Dummy transformer which count how many time fit was called."""
  265. def __init__(self, fit_counter=0):
  266. self.fit_counter = fit_counter
  267. def fit(self, X, y=None):
  268. self.fit_counter += 1
  269. return self
  270. def transform(self, X):
  271. return X
  272. def inverse_transform(self, X):
  273. return X
  274. @pytest.mark.parametrize("check_inverse", [False, True])
  275. def test_transform_target_regressor_count_fit(check_inverse):
  276. # regression test for gh-issue #11618
  277. # check that we only call a single time fit for the transformer
  278. X, y = friedman
  279. ttr = TransformedTargetRegressor(
  280. transformer=DummyTransformer(), check_inverse=check_inverse
  281. )
  282. ttr.fit(X, y)
  283. assert ttr.transformer_.fit_counter == 1
  284. class DummyRegressorWithExtraFitParams(DummyRegressor):
  285. def fit(self, X, y, sample_weight=None, check_input=True):
  286. # on the test below we force this to false, we make sure this is
  287. # actually passed to the regressor
  288. assert not check_input
  289. return super().fit(X, y, sample_weight)
  290. def test_transform_target_regressor_pass_fit_parameters():
  291. X, y = friedman
  292. regr = TransformedTargetRegressor(
  293. regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer()
  294. )
  295. regr.fit(X, y, check_input=False)
  296. assert regr.transformer_.fit_counter == 1
  297. def test_transform_target_regressor_route_pipeline():
  298. X, y = friedman
  299. regr = TransformedTargetRegressor(
  300. regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer()
  301. )
  302. estimators = [("normalize", StandardScaler()), ("est", regr)]
  303. pip = Pipeline(estimators)
  304. pip.fit(X, y, **{"est__check_input": False})
  305. assert regr.transformer_.fit_counter == 1
  306. class DummyRegressorWithExtraPredictParams(DummyRegressor):
  307. def predict(self, X, check_input=True):
  308. # In the test below we make sure that the check input parameter is
  309. # passed as false
  310. self.predict_called = True
  311. assert not check_input
  312. return super().predict(X)
  313. def test_transform_target_regressor_pass_extra_predict_parameters():
  314. # Checks that predict kwargs are passed to regressor.
  315. X, y = friedman
  316. regr = TransformedTargetRegressor(
  317. regressor=DummyRegressorWithExtraPredictParams(), transformer=DummyTransformer()
  318. )
  319. regr.fit(X, y)
  320. regr.predict(X, check_input=False)
  321. assert regr.regressor_.predict_called