test_ridge.py 68 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074
  1. import warnings
  2. from itertools import product
  3. import numpy as np
  4. import pytest
  5. import scipy.sparse as sp
  6. from scipy import linalg
  7. from sklearn import datasets
  8. from sklearn.datasets import (
  9. make_classification,
  10. make_low_rank_matrix,
  11. make_multilabel_classification,
  12. make_regression,
  13. )
  14. from sklearn.exceptions import ConvergenceWarning
  15. from sklearn.linear_model import (
  16. LinearRegression,
  17. Ridge,
  18. RidgeClassifier,
  19. RidgeClassifierCV,
  20. RidgeCV,
  21. ridge_regression,
  22. )
  23. from sklearn.linear_model._ridge import (
  24. _check_gcv_mode,
  25. _RidgeGCV,
  26. _solve_cholesky,
  27. _solve_cholesky_kernel,
  28. _solve_lbfgs,
  29. _solve_svd,
  30. _X_CenterStackOp,
  31. )
  32. from sklearn.metrics import get_scorer, make_scorer, mean_squared_error
  33. from sklearn.model_selection import (
  34. GridSearchCV,
  35. GroupKFold,
  36. KFold,
  37. LeaveOneOut,
  38. cross_val_predict,
  39. )
  40. from sklearn.preprocessing import minmax_scale
  41. from sklearn.utils import _IS_32BIT, check_random_state
  42. from sklearn.utils._testing import (
  43. assert_allclose,
  44. assert_almost_equal,
  45. assert_array_almost_equal,
  46. assert_array_equal,
  47. ignore_warnings,
  48. )
  49. SOLVERS = ["svd", "sparse_cg", "cholesky", "lsqr", "sag", "saga"]
  50. SPARSE_SOLVERS_WITH_INTERCEPT = ("sparse_cg", "sag")
  51. SPARSE_SOLVERS_WITHOUT_INTERCEPT = ("sparse_cg", "cholesky", "lsqr", "sag", "saga")
  52. diabetes = datasets.load_diabetes()
  53. X_diabetes, y_diabetes = diabetes.data, diabetes.target
  54. ind = np.arange(X_diabetes.shape[0])
  55. rng = np.random.RandomState(0)
  56. rng.shuffle(ind)
  57. ind = ind[:200]
  58. X_diabetes, y_diabetes = X_diabetes[ind], y_diabetes[ind]
  59. iris = datasets.load_iris()
  60. X_iris = sp.csr_matrix(iris.data)
  61. y_iris = iris.target
  62. def DENSE_FILTER(X):
  63. return X
  64. def SPARSE_FILTER(X):
  65. return sp.csr_matrix(X)
  66. def _accuracy_callable(y_test, y_pred):
  67. return np.mean(y_test == y_pred)
  68. def _mean_squared_error_callable(y_test, y_pred):
  69. return ((y_test - y_pred) ** 2).mean()
  70. @pytest.fixture(params=["long", "wide"])
  71. def ols_ridge_dataset(global_random_seed, request):
  72. """Dataset with OLS and Ridge solutions, well conditioned X.
  73. The construction is based on the SVD decomposition of X = U S V'.
  74. Parameters
  75. ----------
  76. type : {"long", "wide"}
  77. If "long", then n_samples > n_features.
  78. If "wide", then n_features > n_samples.
  79. For "wide", we return the minimum norm solution w = X' (XX')^-1 y:
  80. min ||w||_2 subject to X w = y
  81. Returns
  82. -------
  83. X : ndarray
  84. Last column of 1, i.e. intercept.
  85. y : ndarray
  86. coef_ols : ndarray of shape
  87. Minimum norm OLS solutions, i.e. min ||X w - y||_2_2 (with minimum ||w||_2 in
  88. case of ambiguity)
  89. Last coefficient is intercept.
  90. coef_ridge : ndarray of shape (5,)
  91. Ridge solution with alpha=1, i.e. min ||X w - y||_2_2 + ||w||_2^2.
  92. Last coefficient is intercept.
  93. """
  94. # Make larger dim more than double as big as the smaller one.
  95. # This helps when constructing singular matrices like (X, X).
  96. if request.param == "long":
  97. n_samples, n_features = 12, 4
  98. else:
  99. n_samples, n_features = 4, 12
  100. k = min(n_samples, n_features)
  101. rng = np.random.RandomState(global_random_seed)
  102. X = make_low_rank_matrix(
  103. n_samples=n_samples, n_features=n_features, effective_rank=k, random_state=rng
  104. )
  105. X[:, -1] = 1 # last columns acts as intercept
  106. U, s, Vt = linalg.svd(X)
  107. assert np.all(s > 1e-3) # to be sure
  108. U1, U2 = U[:, :k], U[:, k:]
  109. Vt1, _ = Vt[:k, :], Vt[k:, :]
  110. if request.param == "long":
  111. # Add a term that vanishes in the product X'y
  112. coef_ols = rng.uniform(low=-10, high=10, size=n_features)
  113. y = X @ coef_ols
  114. y += U2 @ rng.normal(size=n_samples - n_features) ** 2
  115. else:
  116. y = rng.uniform(low=-10, high=10, size=n_samples)
  117. # w = X'(XX')^-1 y = V s^-1 U' y
  118. coef_ols = Vt1.T @ np.diag(1 / s) @ U1.T @ y
  119. # Add penalty alpha * ||coef||_2^2 for alpha=1 and solve via normal equations.
  120. # Note that the problem is well conditioned such that we get accurate results.
  121. alpha = 1
  122. d = alpha * np.identity(n_features)
  123. d[-1, -1] = 0 # intercept gets no penalty
  124. coef_ridge = linalg.solve(X.T @ X + d, X.T @ y)
  125. # To be sure
  126. R_OLS = y - X @ coef_ols
  127. R_Ridge = y - X @ coef_ridge
  128. assert np.linalg.norm(R_OLS) < np.linalg.norm(R_Ridge)
  129. return X, y, coef_ols, coef_ridge
  130. @pytest.mark.parametrize("solver", SOLVERS)
  131. @pytest.mark.parametrize("fit_intercept", [True, False])
  132. def test_ridge_regression(solver, fit_intercept, ols_ridge_dataset, global_random_seed):
  133. """Test that Ridge converges for all solvers to correct solution.
  134. We work with a simple constructed data set with known solution.
  135. """
  136. X, y, _, coef = ols_ridge_dataset
  137. alpha = 1.0 # because ols_ridge_dataset uses this.
  138. params = dict(
  139. alpha=alpha,
  140. fit_intercept=True,
  141. solver=solver,
  142. tol=1e-15 if solver in ("sag", "saga") else 1e-10,
  143. random_state=global_random_seed,
  144. )
  145. # Calculate residuals and R2.
  146. res_null = y - np.mean(y)
  147. res_Ridge = y - X @ coef
  148. R2_Ridge = 1 - np.sum(res_Ridge**2) / np.sum(res_null**2)
  149. model = Ridge(**params)
  150. X = X[:, :-1] # remove intercept
  151. if fit_intercept:
  152. intercept = coef[-1]
  153. else:
  154. X = X - X.mean(axis=0)
  155. y = y - y.mean()
  156. intercept = 0
  157. model.fit(X, y)
  158. coef = coef[:-1]
  159. assert model.intercept_ == pytest.approx(intercept)
  160. assert_allclose(model.coef_, coef)
  161. assert model.score(X, y) == pytest.approx(R2_Ridge)
  162. # Same with sample_weight.
  163. model = Ridge(**params).fit(X, y, sample_weight=np.ones(X.shape[0]))
  164. assert model.intercept_ == pytest.approx(intercept)
  165. assert_allclose(model.coef_, coef)
  166. assert model.score(X, y) == pytest.approx(R2_Ridge)
  167. @pytest.mark.parametrize("solver", SOLVERS)
  168. @pytest.mark.parametrize("fit_intercept", [True, False])
  169. def test_ridge_regression_hstacked_X(
  170. solver, fit_intercept, ols_ridge_dataset, global_random_seed
  171. ):
  172. """Test that Ridge converges for all solvers to correct solution on hstacked data.
  173. We work with a simple constructed data set with known solution.
  174. Fit on [X] with alpha is the same as fit on [X, X]/2 with alpha/2.
  175. For long X, [X, X] is a singular matrix.
  176. """
  177. X, y, _, coef = ols_ridge_dataset
  178. n_samples, n_features = X.shape
  179. alpha = 1.0 # because ols_ridge_dataset uses this.
  180. model = Ridge(
  181. alpha=alpha / 2,
  182. fit_intercept=fit_intercept,
  183. solver=solver,
  184. tol=1e-15 if solver in ("sag", "saga") else 1e-10,
  185. random_state=global_random_seed,
  186. )
  187. X = X[:, :-1] # remove intercept
  188. X = 0.5 * np.concatenate((X, X), axis=1)
  189. assert np.linalg.matrix_rank(X) <= min(n_samples, n_features - 1)
  190. if fit_intercept:
  191. intercept = coef[-1]
  192. else:
  193. X = X - X.mean(axis=0)
  194. y = y - y.mean()
  195. intercept = 0
  196. model.fit(X, y)
  197. coef = coef[:-1]
  198. assert model.intercept_ == pytest.approx(intercept)
  199. # coefficients are not all on the same magnitude, adding a small atol to
  200. # make this test less brittle
  201. assert_allclose(model.coef_, np.r_[coef, coef], atol=1e-8)
  202. @pytest.mark.parametrize("solver", SOLVERS)
  203. @pytest.mark.parametrize("fit_intercept", [True, False])
  204. def test_ridge_regression_vstacked_X(
  205. solver, fit_intercept, ols_ridge_dataset, global_random_seed
  206. ):
  207. """Test that Ridge converges for all solvers to correct solution on vstacked data.
  208. We work with a simple constructed data set with known solution.
  209. Fit on [X] with alpha is the same as fit on [X], [y]
  210. [X], [y] with 2 * alpha.
  211. For wide X, [X', X'] is a singular matrix.
  212. """
  213. X, y, _, coef = ols_ridge_dataset
  214. n_samples, n_features = X.shape
  215. alpha = 1.0 # because ols_ridge_dataset uses this.
  216. model = Ridge(
  217. alpha=2 * alpha,
  218. fit_intercept=fit_intercept,
  219. solver=solver,
  220. tol=1e-15 if solver in ("sag", "saga") else 1e-10,
  221. random_state=global_random_seed,
  222. )
  223. X = X[:, :-1] # remove intercept
  224. X = np.concatenate((X, X), axis=0)
  225. assert np.linalg.matrix_rank(X) <= min(n_samples, n_features)
  226. y = np.r_[y, y]
  227. if fit_intercept:
  228. intercept = coef[-1]
  229. else:
  230. X = X - X.mean(axis=0)
  231. y = y - y.mean()
  232. intercept = 0
  233. model.fit(X, y)
  234. coef = coef[:-1]
  235. assert model.intercept_ == pytest.approx(intercept)
  236. # coefficients are not all on the same magnitude, adding a small atol to
  237. # make this test less brittle
  238. assert_allclose(model.coef_, coef, atol=1e-8)
  239. @pytest.mark.parametrize("solver", SOLVERS)
  240. @pytest.mark.parametrize("fit_intercept", [True, False])
  241. def test_ridge_regression_unpenalized(
  242. solver, fit_intercept, ols_ridge_dataset, global_random_seed
  243. ):
  244. """Test that unpenalized Ridge = OLS converges for all solvers to correct solution.
  245. We work with a simple constructed data set with known solution.
  246. Note: This checks the minimum norm solution for wide X, i.e.
  247. n_samples < n_features:
  248. min ||w||_2 subject to X w = y
  249. """
  250. X, y, coef, _ = ols_ridge_dataset
  251. n_samples, n_features = X.shape
  252. alpha = 0 # OLS
  253. params = dict(
  254. alpha=alpha,
  255. fit_intercept=fit_intercept,
  256. solver=solver,
  257. tol=1e-15 if solver in ("sag", "saga") else 1e-10,
  258. random_state=global_random_seed,
  259. )
  260. model = Ridge(**params)
  261. # Note that cholesky might give a warning: "Singular matrix in solving dual
  262. # problem. Using least-squares solution instead."
  263. if fit_intercept:
  264. X = X[:, :-1] # remove intercept
  265. intercept = coef[-1]
  266. coef = coef[:-1]
  267. else:
  268. intercept = 0
  269. model.fit(X, y)
  270. # FIXME: `assert_allclose(model.coef_, coef)` should work for all cases but fails
  271. # for the wide/fat case with n_features > n_samples. The current Ridge solvers do
  272. # NOT return the minimum norm solution with fit_intercept=True.
  273. if n_samples > n_features or not fit_intercept:
  274. assert model.intercept_ == pytest.approx(intercept)
  275. assert_allclose(model.coef_, coef)
  276. else:
  277. # As it is an underdetermined problem, residuals = 0. This shows that we get
  278. # a solution to X w = y ....
  279. assert_allclose(model.predict(X), y)
  280. assert_allclose(X @ coef + intercept, y)
  281. # But it is not the minimum norm solution. (This should be equal.)
  282. assert np.linalg.norm(np.r_[model.intercept_, model.coef_]) > np.linalg.norm(
  283. np.r_[intercept, coef]
  284. )
  285. pytest.xfail(reason="Ridge does not provide the minimum norm solution.")
  286. assert model.intercept_ == pytest.approx(intercept)
  287. assert_allclose(model.coef_, coef)
  288. @pytest.mark.parametrize("solver", SOLVERS)
  289. @pytest.mark.parametrize("fit_intercept", [True, False])
  290. def test_ridge_regression_unpenalized_hstacked_X(
  291. solver, fit_intercept, ols_ridge_dataset, global_random_seed
  292. ):
  293. """Test that unpenalized Ridge = OLS converges for all solvers to correct solution.
  294. We work with a simple constructed data set with known solution.
  295. OLS fit on [X] is the same as fit on [X, X]/2.
  296. For long X, [X, X] is a singular matrix and we check against the minimum norm
  297. solution:
  298. min ||w||_2 subject to min ||X w - y||_2
  299. """
  300. X, y, coef, _ = ols_ridge_dataset
  301. n_samples, n_features = X.shape
  302. alpha = 0 # OLS
  303. model = Ridge(
  304. alpha=alpha,
  305. fit_intercept=fit_intercept,
  306. solver=solver,
  307. tol=1e-15 if solver in ("sag", "saga") else 1e-10,
  308. random_state=global_random_seed,
  309. )
  310. if fit_intercept:
  311. X = X[:, :-1] # remove intercept
  312. intercept = coef[-1]
  313. coef = coef[:-1]
  314. else:
  315. intercept = 0
  316. X = 0.5 * np.concatenate((X, X), axis=1)
  317. assert np.linalg.matrix_rank(X) <= min(n_samples, n_features)
  318. model.fit(X, y)
  319. if n_samples > n_features or not fit_intercept:
  320. assert model.intercept_ == pytest.approx(intercept)
  321. if solver == "cholesky":
  322. # Cholesky is a bad choice for singular X.
  323. pytest.skip()
  324. assert_allclose(model.coef_, np.r_[coef, coef])
  325. else:
  326. # FIXME: Same as in test_ridge_regression_unpenalized.
  327. # As it is an underdetermined problem, residuals = 0. This shows that we get
  328. # a solution to X w = y ....
  329. assert_allclose(model.predict(X), y)
  330. # But it is not the minimum norm solution. (This should be equal.)
  331. assert np.linalg.norm(np.r_[model.intercept_, model.coef_]) > np.linalg.norm(
  332. np.r_[intercept, coef, coef]
  333. )
  334. pytest.xfail(reason="Ridge does not provide the minimum norm solution.")
  335. assert model.intercept_ == pytest.approx(intercept)
  336. assert_allclose(model.coef_, np.r_[coef, coef])
  337. @pytest.mark.parametrize("solver", SOLVERS)
  338. @pytest.mark.parametrize("fit_intercept", [True, False])
  339. def test_ridge_regression_unpenalized_vstacked_X(
  340. solver, fit_intercept, ols_ridge_dataset, global_random_seed
  341. ):
  342. """Test that unpenalized Ridge = OLS converges for all solvers to correct solution.
  343. We work with a simple constructed data set with known solution.
  344. OLS fit on [X] is the same as fit on [X], [y]
  345. [X], [y].
  346. For wide X, [X', X'] is a singular matrix and we check against the minimum norm
  347. solution:
  348. min ||w||_2 subject to X w = y
  349. """
  350. X, y, coef, _ = ols_ridge_dataset
  351. n_samples, n_features = X.shape
  352. alpha = 0 # OLS
  353. model = Ridge(
  354. alpha=alpha,
  355. fit_intercept=fit_intercept,
  356. solver=solver,
  357. tol=1e-15 if solver in ("sag", "saga") else 1e-10,
  358. random_state=global_random_seed,
  359. )
  360. if fit_intercept:
  361. X = X[:, :-1] # remove intercept
  362. intercept = coef[-1]
  363. coef = coef[:-1]
  364. else:
  365. intercept = 0
  366. X = np.concatenate((X, X), axis=0)
  367. assert np.linalg.matrix_rank(X) <= min(n_samples, n_features)
  368. y = np.r_[y, y]
  369. model.fit(X, y)
  370. if n_samples > n_features or not fit_intercept:
  371. assert model.intercept_ == pytest.approx(intercept)
  372. assert_allclose(model.coef_, coef)
  373. else:
  374. # FIXME: Same as in test_ridge_regression_unpenalized.
  375. # As it is an underdetermined problem, residuals = 0. This shows that we get
  376. # a solution to X w = y ....
  377. assert_allclose(model.predict(X), y)
  378. # But it is not the minimum norm solution. (This should be equal.)
  379. assert np.linalg.norm(np.r_[model.intercept_, model.coef_]) > np.linalg.norm(
  380. np.r_[intercept, coef]
  381. )
  382. pytest.xfail(reason="Ridge does not provide the minimum norm solution.")
  383. assert model.intercept_ == pytest.approx(intercept)
  384. assert_allclose(model.coef_, coef)
  385. @pytest.mark.parametrize("solver", SOLVERS)
  386. @pytest.mark.parametrize("fit_intercept", [True, False])
  387. @pytest.mark.parametrize("sparseX", [True, False])
  388. @pytest.mark.parametrize("alpha", [1.0, 1e-2])
  389. def test_ridge_regression_sample_weights(
  390. solver, fit_intercept, sparseX, alpha, ols_ridge_dataset, global_random_seed
  391. ):
  392. """Test that Ridge with sample weights gives correct results.
  393. We use the following trick:
  394. ||y - Xw||_2 = (z - Aw)' W (z - Aw)
  395. for z=[y, y], A' = [X', X'] (vstacked), and W[:n/2] + W[n/2:] = 1, W=diag(W)
  396. """
  397. if sparseX:
  398. if fit_intercept and solver not in SPARSE_SOLVERS_WITH_INTERCEPT:
  399. pytest.skip()
  400. elif not fit_intercept and solver not in SPARSE_SOLVERS_WITHOUT_INTERCEPT:
  401. pytest.skip()
  402. X, y, _, coef = ols_ridge_dataset
  403. n_samples, n_features = X.shape
  404. sw = rng.uniform(low=0, high=1, size=n_samples)
  405. model = Ridge(
  406. alpha=alpha,
  407. fit_intercept=fit_intercept,
  408. solver=solver,
  409. tol=1e-15 if solver in ["sag", "saga"] else 1e-10,
  410. max_iter=100_000,
  411. random_state=global_random_seed,
  412. )
  413. X = X[:, :-1] # remove intercept
  414. X = np.concatenate((X, X), axis=0)
  415. y = np.r_[y, y]
  416. sw = np.r_[sw, 1 - sw] * alpha
  417. if fit_intercept:
  418. intercept = coef[-1]
  419. else:
  420. X = X - X.mean(axis=0)
  421. y = y - y.mean()
  422. intercept = 0
  423. if sparseX:
  424. X = sp.csr_matrix(X)
  425. model.fit(X, y, sample_weight=sw)
  426. coef = coef[:-1]
  427. assert model.intercept_ == pytest.approx(intercept)
  428. assert_allclose(model.coef_, coef)
  429. def test_primal_dual_relationship():
  430. y = y_diabetes.reshape(-1, 1)
  431. coef = _solve_cholesky(X_diabetes, y, alpha=[1e-2])
  432. K = np.dot(X_diabetes, X_diabetes.T)
  433. dual_coef = _solve_cholesky_kernel(K, y, alpha=[1e-2])
  434. coef2 = np.dot(X_diabetes.T, dual_coef).T
  435. assert_array_almost_equal(coef, coef2)
  436. def test_ridge_regression_convergence_fail():
  437. rng = np.random.RandomState(0)
  438. y = rng.randn(5)
  439. X = rng.randn(5, 10)
  440. warning_message = r"sparse_cg did not converge after" r" [0-9]+ iterations."
  441. with pytest.warns(ConvergenceWarning, match=warning_message):
  442. ridge_regression(
  443. X, y, alpha=1.0, solver="sparse_cg", tol=0.0, max_iter=None, verbose=1
  444. )
  445. def test_ridge_shapes_type():
  446. # Test shape of coef_ and intercept_
  447. rng = np.random.RandomState(0)
  448. n_samples, n_features = 5, 10
  449. X = rng.randn(n_samples, n_features)
  450. y = rng.randn(n_samples)
  451. Y1 = y[:, np.newaxis]
  452. Y = np.c_[y, 1 + y]
  453. ridge = Ridge()
  454. ridge.fit(X, y)
  455. assert ridge.coef_.shape == (n_features,)
  456. assert ridge.intercept_.shape == ()
  457. assert isinstance(ridge.coef_, np.ndarray)
  458. assert isinstance(ridge.intercept_, float)
  459. ridge.fit(X, Y1)
  460. assert ridge.coef_.shape == (1, n_features)
  461. assert ridge.intercept_.shape == (1,)
  462. assert isinstance(ridge.coef_, np.ndarray)
  463. assert isinstance(ridge.intercept_, np.ndarray)
  464. ridge.fit(X, Y)
  465. assert ridge.coef_.shape == (2, n_features)
  466. assert ridge.intercept_.shape == (2,)
  467. assert isinstance(ridge.coef_, np.ndarray)
  468. assert isinstance(ridge.intercept_, np.ndarray)
  469. def test_ridge_intercept():
  470. # Test intercept with multiple targets GH issue #708
  471. rng = np.random.RandomState(0)
  472. n_samples, n_features = 5, 10
  473. X = rng.randn(n_samples, n_features)
  474. y = rng.randn(n_samples)
  475. Y = np.c_[y, 1.0 + y]
  476. ridge = Ridge()
  477. ridge.fit(X, y)
  478. intercept = ridge.intercept_
  479. ridge.fit(X, Y)
  480. assert_almost_equal(ridge.intercept_[0], intercept)
  481. assert_almost_equal(ridge.intercept_[1], intercept + 1.0)
  482. def test_ridge_vs_lstsq():
  483. # On alpha=0., Ridge and OLS yield the same solution.
  484. rng = np.random.RandomState(0)
  485. # we need more samples than features
  486. n_samples, n_features = 5, 4
  487. y = rng.randn(n_samples)
  488. X = rng.randn(n_samples, n_features)
  489. ridge = Ridge(alpha=0.0, fit_intercept=False)
  490. ols = LinearRegression(fit_intercept=False)
  491. ridge.fit(X, y)
  492. ols.fit(X, y)
  493. assert_almost_equal(ridge.coef_, ols.coef_)
  494. ridge.fit(X, y)
  495. ols.fit(X, y)
  496. assert_almost_equal(ridge.coef_, ols.coef_)
  497. def test_ridge_individual_penalties():
  498. # Tests the ridge object using individual penalties
  499. rng = np.random.RandomState(42)
  500. n_samples, n_features, n_targets = 20, 10, 5
  501. X = rng.randn(n_samples, n_features)
  502. y = rng.randn(n_samples, n_targets)
  503. penalties = np.arange(n_targets)
  504. coef_cholesky = np.array(
  505. [
  506. Ridge(alpha=alpha, solver="cholesky").fit(X, target).coef_
  507. for alpha, target in zip(penalties, y.T)
  508. ]
  509. )
  510. coefs_indiv_pen = [
  511. Ridge(alpha=penalties, solver=solver, tol=1e-12).fit(X, y).coef_
  512. for solver in ["svd", "sparse_cg", "lsqr", "cholesky", "sag", "saga"]
  513. ]
  514. for coef_indiv_pen in coefs_indiv_pen:
  515. assert_array_almost_equal(coef_cholesky, coef_indiv_pen)
  516. # Test error is raised when number of targets and penalties do not match.
  517. ridge = Ridge(alpha=penalties[:-1])
  518. err_msg = "Number of targets and number of penalties do not correspond: 4 != 5"
  519. with pytest.raises(ValueError, match=err_msg):
  520. ridge.fit(X, y)
  521. @pytest.mark.parametrize("n_col", [(), (1,), (3,)])
  522. def test_X_CenterStackOp(n_col):
  523. rng = np.random.RandomState(0)
  524. X = rng.randn(11, 8)
  525. X_m = rng.randn(8)
  526. sqrt_sw = rng.randn(len(X))
  527. Y = rng.randn(11, *n_col)
  528. A = rng.randn(9, *n_col)
  529. operator = _X_CenterStackOp(sp.csr_matrix(X), X_m, sqrt_sw)
  530. reference_operator = np.hstack([X - sqrt_sw[:, None] * X_m, sqrt_sw[:, None]])
  531. assert_allclose(reference_operator.dot(A), operator.dot(A))
  532. assert_allclose(reference_operator.T.dot(Y), operator.T.dot(Y))
  533. @pytest.mark.parametrize("shape", [(10, 1), (13, 9), (3, 7), (2, 2), (20, 20)])
  534. @pytest.mark.parametrize("uniform_weights", [True, False])
  535. def test_compute_gram(shape, uniform_weights):
  536. rng = np.random.RandomState(0)
  537. X = rng.randn(*shape)
  538. if uniform_weights:
  539. sw = np.ones(X.shape[0])
  540. else:
  541. sw = rng.chisquare(1, shape[0])
  542. sqrt_sw = np.sqrt(sw)
  543. X_mean = np.average(X, axis=0, weights=sw)
  544. X_centered = (X - X_mean) * sqrt_sw[:, None]
  545. true_gram = X_centered.dot(X_centered.T)
  546. X_sparse = sp.csr_matrix(X * sqrt_sw[:, None])
  547. gcv = _RidgeGCV(fit_intercept=True)
  548. computed_gram, computed_mean = gcv._compute_gram(X_sparse, sqrt_sw)
  549. assert_allclose(X_mean, computed_mean)
  550. assert_allclose(true_gram, computed_gram)
  551. @pytest.mark.parametrize("shape", [(10, 1), (13, 9), (3, 7), (2, 2), (20, 20)])
  552. @pytest.mark.parametrize("uniform_weights", [True, False])
  553. def test_compute_covariance(shape, uniform_weights):
  554. rng = np.random.RandomState(0)
  555. X = rng.randn(*shape)
  556. if uniform_weights:
  557. sw = np.ones(X.shape[0])
  558. else:
  559. sw = rng.chisquare(1, shape[0])
  560. sqrt_sw = np.sqrt(sw)
  561. X_mean = np.average(X, axis=0, weights=sw)
  562. X_centered = (X - X_mean) * sqrt_sw[:, None]
  563. true_covariance = X_centered.T.dot(X_centered)
  564. X_sparse = sp.csr_matrix(X * sqrt_sw[:, None])
  565. gcv = _RidgeGCV(fit_intercept=True)
  566. computed_cov, computed_mean = gcv._compute_covariance(X_sparse, sqrt_sw)
  567. assert_allclose(X_mean, computed_mean)
  568. assert_allclose(true_covariance, computed_cov)
  569. def _make_sparse_offset_regression(
  570. n_samples=100,
  571. n_features=100,
  572. proportion_nonzero=0.5,
  573. n_informative=10,
  574. n_targets=1,
  575. bias=13.0,
  576. X_offset=30.0,
  577. noise=30.0,
  578. shuffle=True,
  579. coef=False,
  580. positive=False,
  581. random_state=None,
  582. ):
  583. X, y, c = make_regression(
  584. n_samples=n_samples,
  585. n_features=n_features,
  586. n_informative=n_informative,
  587. n_targets=n_targets,
  588. bias=bias,
  589. noise=noise,
  590. shuffle=shuffle,
  591. coef=True,
  592. random_state=random_state,
  593. )
  594. if n_features == 1:
  595. c = np.asarray([c])
  596. X += X_offset
  597. mask = (
  598. np.random.RandomState(random_state).binomial(1, proportion_nonzero, X.shape) > 0
  599. )
  600. removed_X = X.copy()
  601. X[~mask] = 0.0
  602. removed_X[mask] = 0.0
  603. y -= removed_X.dot(c)
  604. if positive:
  605. y += X.dot(np.abs(c) + 1 - c)
  606. c = np.abs(c) + 1
  607. if n_features == 1:
  608. c = c[0]
  609. if coef:
  610. return X, y, c
  611. return X, y
  612. @pytest.mark.parametrize(
  613. "solver, sparse_X",
  614. (
  615. (solver, sparse_X)
  616. for (solver, sparse_X) in product(
  617. ["cholesky", "sag", "sparse_cg", "lsqr", "saga", "ridgecv"],
  618. [False, True],
  619. )
  620. if not (sparse_X and solver not in ["sparse_cg", "ridgecv"])
  621. ),
  622. )
  623. @pytest.mark.parametrize(
  624. "n_samples,dtype,proportion_nonzero",
  625. [(20, "float32", 0.1), (40, "float32", 1.0), (20, "float64", 0.2)],
  626. )
  627. @pytest.mark.parametrize("seed", np.arange(3))
  628. def test_solver_consistency(
  629. solver, proportion_nonzero, n_samples, dtype, sparse_X, seed
  630. ):
  631. alpha = 1.0
  632. noise = 50.0 if proportion_nonzero > 0.9 else 500.0
  633. X, y = _make_sparse_offset_regression(
  634. bias=10,
  635. n_features=30,
  636. proportion_nonzero=proportion_nonzero,
  637. noise=noise,
  638. random_state=seed,
  639. n_samples=n_samples,
  640. )
  641. # Manually scale the data to avoid pathological cases. We use
  642. # minmax_scale to deal with the sparse case without breaking
  643. # the sparsity pattern.
  644. X = minmax_scale(X)
  645. svd_ridge = Ridge(solver="svd", alpha=alpha).fit(X, y)
  646. X = X.astype(dtype, copy=False)
  647. y = y.astype(dtype, copy=False)
  648. if sparse_X:
  649. X = sp.csr_matrix(X)
  650. if solver == "ridgecv":
  651. ridge = RidgeCV(alphas=[alpha])
  652. else:
  653. ridge = Ridge(solver=solver, tol=1e-10, alpha=alpha)
  654. ridge.fit(X, y)
  655. assert_allclose(ridge.coef_, svd_ridge.coef_, atol=1e-3, rtol=1e-3)
  656. assert_allclose(ridge.intercept_, svd_ridge.intercept_, atol=1e-3, rtol=1e-3)
  657. @pytest.mark.parametrize("gcv_mode", ["svd", "eigen"])
  658. @pytest.mark.parametrize("X_constructor", [np.asarray, sp.csr_matrix])
  659. @pytest.mark.parametrize("X_shape", [(11, 8), (11, 20)])
  660. @pytest.mark.parametrize("fit_intercept", [True, False])
  661. @pytest.mark.parametrize(
  662. "y_shape, noise",
  663. [
  664. ((11,), 1.0),
  665. ((11, 1), 30.0),
  666. ((11, 3), 150.0),
  667. ],
  668. )
  669. def test_ridge_gcv_vs_ridge_loo_cv(
  670. gcv_mode, X_constructor, X_shape, y_shape, fit_intercept, noise
  671. ):
  672. n_samples, n_features = X_shape
  673. n_targets = y_shape[-1] if len(y_shape) == 2 else 1
  674. X, y = _make_sparse_offset_regression(
  675. n_samples=n_samples,
  676. n_features=n_features,
  677. n_targets=n_targets,
  678. random_state=0,
  679. shuffle=False,
  680. noise=noise,
  681. n_informative=5,
  682. )
  683. y = y.reshape(y_shape)
  684. alphas = [1e-3, 0.1, 1.0, 10.0, 1e3]
  685. loo_ridge = RidgeCV(
  686. cv=n_samples,
  687. fit_intercept=fit_intercept,
  688. alphas=alphas,
  689. scoring="neg_mean_squared_error",
  690. )
  691. gcv_ridge = RidgeCV(
  692. gcv_mode=gcv_mode,
  693. fit_intercept=fit_intercept,
  694. alphas=alphas,
  695. )
  696. loo_ridge.fit(X, y)
  697. X_gcv = X_constructor(X)
  698. gcv_ridge.fit(X_gcv, y)
  699. assert gcv_ridge.alpha_ == pytest.approx(loo_ridge.alpha_)
  700. assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3)
  701. assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3)
  702. def test_ridge_loo_cv_asym_scoring():
  703. # checking on asymmetric scoring
  704. scoring = "explained_variance"
  705. n_samples, n_features = 10, 5
  706. n_targets = 1
  707. X, y = _make_sparse_offset_regression(
  708. n_samples=n_samples,
  709. n_features=n_features,
  710. n_targets=n_targets,
  711. random_state=0,
  712. shuffle=False,
  713. noise=1,
  714. n_informative=5,
  715. )
  716. alphas = [1e-3, 0.1, 1.0, 10.0, 1e3]
  717. loo_ridge = RidgeCV(
  718. cv=n_samples, fit_intercept=True, alphas=alphas, scoring=scoring
  719. )
  720. gcv_ridge = RidgeCV(fit_intercept=True, alphas=alphas, scoring=scoring)
  721. loo_ridge.fit(X, y)
  722. gcv_ridge.fit(X, y)
  723. assert gcv_ridge.alpha_ == pytest.approx(loo_ridge.alpha_)
  724. assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3)
  725. assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3)
  726. @pytest.mark.parametrize("gcv_mode", ["svd", "eigen"])
  727. @pytest.mark.parametrize("X_constructor", [np.asarray, sp.csr_matrix])
  728. @pytest.mark.parametrize("n_features", [8, 20])
  729. @pytest.mark.parametrize(
  730. "y_shape, fit_intercept, noise",
  731. [
  732. ((11,), True, 1.0),
  733. ((11, 1), True, 20.0),
  734. ((11, 3), True, 150.0),
  735. ((11, 3), False, 30.0),
  736. ],
  737. )
  738. def test_ridge_gcv_sample_weights(
  739. gcv_mode, X_constructor, fit_intercept, n_features, y_shape, noise
  740. ):
  741. alphas = [1e-3, 0.1, 1.0, 10.0, 1e3]
  742. rng = np.random.RandomState(0)
  743. n_targets = y_shape[-1] if len(y_shape) == 2 else 1
  744. X, y = _make_sparse_offset_regression(
  745. n_samples=11,
  746. n_features=n_features,
  747. n_targets=n_targets,
  748. random_state=0,
  749. shuffle=False,
  750. noise=noise,
  751. )
  752. y = y.reshape(y_shape)
  753. sample_weight = 3 * rng.randn(len(X))
  754. sample_weight = (sample_weight - sample_weight.min() + 1).astype(int)
  755. indices = np.repeat(np.arange(X.shape[0]), sample_weight)
  756. sample_weight = sample_weight.astype(float)
  757. X_tiled, y_tiled = X[indices], y[indices]
  758. cv = GroupKFold(n_splits=X.shape[0])
  759. splits = cv.split(X_tiled, y_tiled, groups=indices)
  760. kfold = RidgeCV(
  761. alphas=alphas,
  762. cv=splits,
  763. scoring="neg_mean_squared_error",
  764. fit_intercept=fit_intercept,
  765. )
  766. kfold.fit(X_tiled, y_tiled)
  767. ridge_reg = Ridge(alpha=kfold.alpha_, fit_intercept=fit_intercept)
  768. splits = cv.split(X_tiled, y_tiled, groups=indices)
  769. predictions = cross_val_predict(ridge_reg, X_tiled, y_tiled, cv=splits)
  770. kfold_errors = (y_tiled - predictions) ** 2
  771. kfold_errors = [
  772. np.sum(kfold_errors[indices == i], axis=0) for i in np.arange(X.shape[0])
  773. ]
  774. kfold_errors = np.asarray(kfold_errors)
  775. X_gcv = X_constructor(X)
  776. gcv_ridge = RidgeCV(
  777. alphas=alphas,
  778. store_cv_values=True,
  779. gcv_mode=gcv_mode,
  780. fit_intercept=fit_intercept,
  781. )
  782. gcv_ridge.fit(X_gcv, y, sample_weight=sample_weight)
  783. if len(y_shape) == 2:
  784. gcv_errors = gcv_ridge.cv_values_[:, :, alphas.index(kfold.alpha_)]
  785. else:
  786. gcv_errors = gcv_ridge.cv_values_[:, alphas.index(kfold.alpha_)]
  787. assert kfold.alpha_ == pytest.approx(gcv_ridge.alpha_)
  788. assert_allclose(gcv_errors, kfold_errors, rtol=1e-3)
  789. assert_allclose(gcv_ridge.coef_, kfold.coef_, rtol=1e-3)
  790. assert_allclose(gcv_ridge.intercept_, kfold.intercept_, rtol=1e-3)
  791. @pytest.mark.parametrize("sparse", [True, False])
  792. @pytest.mark.parametrize(
  793. "mode, mode_n_greater_than_p, mode_p_greater_than_n",
  794. [
  795. (None, "svd", "eigen"),
  796. ("auto", "svd", "eigen"),
  797. ("eigen", "eigen", "eigen"),
  798. ("svd", "svd", "svd"),
  799. ],
  800. )
  801. def test_check_gcv_mode_choice(
  802. sparse, mode, mode_n_greater_than_p, mode_p_greater_than_n
  803. ):
  804. X, _ = make_regression(n_samples=5, n_features=2)
  805. if sparse:
  806. X = sp.csr_matrix(X)
  807. assert _check_gcv_mode(X, mode) == mode_n_greater_than_p
  808. assert _check_gcv_mode(X.T, mode) == mode_p_greater_than_n
  809. def _test_ridge_loo(filter_):
  810. # test that can work with both dense or sparse matrices
  811. n_samples = X_diabetes.shape[0]
  812. ret = []
  813. fit_intercept = filter_ == DENSE_FILTER
  814. ridge_gcv = _RidgeGCV(fit_intercept=fit_intercept)
  815. # check best alpha
  816. ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
  817. alpha_ = ridge_gcv.alpha_
  818. ret.append(alpha_)
  819. # check that we get same best alpha with custom loss_func
  820. f = ignore_warnings
  821. scoring = make_scorer(mean_squared_error, greater_is_better=False)
  822. ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring)
  823. f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes)
  824. assert ridge_gcv2.alpha_ == pytest.approx(alpha_)
  825. # check that we get same best alpha with custom score_func
  826. def func(x, y):
  827. return -mean_squared_error(x, y)
  828. scoring = make_scorer(func)
  829. ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring)
  830. f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes)
  831. assert ridge_gcv3.alpha_ == pytest.approx(alpha_)
  832. # check that we get same best alpha with a scorer
  833. scorer = get_scorer("neg_mean_squared_error")
  834. ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer)
  835. ridge_gcv4.fit(filter_(X_diabetes), y_diabetes)
  836. assert ridge_gcv4.alpha_ == pytest.approx(alpha_)
  837. # check that we get same best alpha with sample weights
  838. if filter_ == DENSE_FILTER:
  839. ridge_gcv.fit(filter_(X_diabetes), y_diabetes, sample_weight=np.ones(n_samples))
  840. assert ridge_gcv.alpha_ == pytest.approx(alpha_)
  841. # simulate several responses
  842. Y = np.vstack((y_diabetes, y_diabetes)).T
  843. ridge_gcv.fit(filter_(X_diabetes), Y)
  844. Y_pred = ridge_gcv.predict(filter_(X_diabetes))
  845. ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
  846. y_pred = ridge_gcv.predict(filter_(X_diabetes))
  847. assert_allclose(np.vstack((y_pred, y_pred)).T, Y_pred, rtol=1e-5)
  848. return ret
  849. def _test_ridge_cv(filter_):
  850. ridge_cv = RidgeCV()
  851. ridge_cv.fit(filter_(X_diabetes), y_diabetes)
  852. ridge_cv.predict(filter_(X_diabetes))
  853. assert len(ridge_cv.coef_.shape) == 1
  854. assert type(ridge_cv.intercept_) == np.float64
  855. cv = KFold(5)
  856. ridge_cv.set_params(cv=cv)
  857. ridge_cv.fit(filter_(X_diabetes), y_diabetes)
  858. ridge_cv.predict(filter_(X_diabetes))
  859. assert len(ridge_cv.coef_.shape) == 1
  860. assert type(ridge_cv.intercept_) == np.float64
  861. @pytest.mark.parametrize(
  862. "ridge, make_dataset",
  863. [
  864. (RidgeCV(store_cv_values=False), make_regression),
  865. (RidgeClassifierCV(store_cv_values=False), make_classification),
  866. ],
  867. )
  868. def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset):
  869. # Check that `cv_values_` is not stored when store_cv_values is False
  870. X, y = make_dataset(n_samples=6, random_state=42)
  871. ridge.fit(X, y)
  872. assert not hasattr(ridge, "cv_values_")
  873. @pytest.mark.parametrize(
  874. "ridge, make_dataset",
  875. [(RidgeCV(), make_regression), (RidgeClassifierCV(), make_classification)],
  876. )
  877. @pytest.mark.parametrize("cv", [None, 3])
  878. def test_ridge_best_score(ridge, make_dataset, cv):
  879. # check that the best_score_ is store
  880. X, y = make_dataset(n_samples=6, random_state=42)
  881. ridge.set_params(store_cv_values=False, cv=cv)
  882. ridge.fit(X, y)
  883. assert hasattr(ridge, "best_score_")
  884. assert isinstance(ridge.best_score_, float)
  885. def test_ridge_cv_individual_penalties():
  886. # Tests the ridge_cv object optimizing individual penalties for each target
  887. rng = np.random.RandomState(42)
  888. # Create random dataset with multiple targets. Each target should have
  889. # a different optimal alpha.
  890. n_samples, n_features, n_targets = 20, 5, 3
  891. y = rng.randn(n_samples, n_targets)
  892. X = (
  893. np.dot(y[:, [0]], np.ones((1, n_features)))
  894. + np.dot(y[:, [1]], 0.05 * np.ones((1, n_features)))
  895. + np.dot(y[:, [2]], 0.001 * np.ones((1, n_features)))
  896. + rng.randn(n_samples, n_features)
  897. )
  898. alphas = (1, 100, 1000)
  899. # Find optimal alpha for each target
  900. optimal_alphas = [RidgeCV(alphas=alphas).fit(X, target).alpha_ for target in y.T]
  901. # Find optimal alphas for all targets simultaneously
  902. ridge_cv = RidgeCV(alphas=alphas, alpha_per_target=True).fit(X, y)
  903. assert_array_equal(optimal_alphas, ridge_cv.alpha_)
  904. # The resulting regression weights should incorporate the different
  905. # alpha values.
  906. assert_array_almost_equal(
  907. Ridge(alpha=ridge_cv.alpha_).fit(X, y).coef_, ridge_cv.coef_
  908. )
  909. # Test shape of alpha_ and cv_values_
  910. ridge_cv = RidgeCV(alphas=alphas, alpha_per_target=True, store_cv_values=True).fit(
  911. X, y
  912. )
  913. assert ridge_cv.alpha_.shape == (n_targets,)
  914. assert ridge_cv.best_score_.shape == (n_targets,)
  915. assert ridge_cv.cv_values_.shape == (n_samples, len(alphas), n_targets)
  916. # Test edge case of there being only one alpha value
  917. ridge_cv = RidgeCV(alphas=1, alpha_per_target=True, store_cv_values=True).fit(X, y)
  918. assert ridge_cv.alpha_.shape == (n_targets,)
  919. assert ridge_cv.best_score_.shape == (n_targets,)
  920. assert ridge_cv.cv_values_.shape == (n_samples, n_targets, 1)
  921. # Test edge case of there being only one target
  922. ridge_cv = RidgeCV(alphas=alphas, alpha_per_target=True, store_cv_values=True).fit(
  923. X, y[:, 0]
  924. )
  925. assert np.isscalar(ridge_cv.alpha_)
  926. assert np.isscalar(ridge_cv.best_score_)
  927. assert ridge_cv.cv_values_.shape == (n_samples, len(alphas))
  928. # Try with a custom scoring function
  929. ridge_cv = RidgeCV(alphas=alphas, alpha_per_target=True, scoring="r2").fit(X, y)
  930. assert_array_equal(optimal_alphas, ridge_cv.alpha_)
  931. assert_array_almost_equal(
  932. Ridge(alpha=ridge_cv.alpha_).fit(X, y).coef_, ridge_cv.coef_
  933. )
  934. # Using a custom CV object should throw an error in combination with
  935. # alpha_per_target=True
  936. ridge_cv = RidgeCV(alphas=alphas, cv=LeaveOneOut(), alpha_per_target=True)
  937. msg = "cv!=None and alpha_per_target=True are incompatible"
  938. with pytest.raises(ValueError, match=msg):
  939. ridge_cv.fit(X, y)
  940. ridge_cv = RidgeCV(alphas=alphas, cv=6, alpha_per_target=True)
  941. with pytest.raises(ValueError, match=msg):
  942. ridge_cv.fit(X, y)
  943. def _test_ridge_diabetes(filter_):
  944. ridge = Ridge(fit_intercept=False)
  945. ridge.fit(filter_(X_diabetes), y_diabetes)
  946. return np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5)
  947. def _test_multi_ridge_diabetes(filter_):
  948. # simulate several responses
  949. Y = np.vstack((y_diabetes, y_diabetes)).T
  950. n_features = X_diabetes.shape[1]
  951. ridge = Ridge(fit_intercept=False)
  952. ridge.fit(filter_(X_diabetes), Y)
  953. assert ridge.coef_.shape == (2, n_features)
  954. Y_pred = ridge.predict(filter_(X_diabetes))
  955. ridge.fit(filter_(X_diabetes), y_diabetes)
  956. y_pred = ridge.predict(filter_(X_diabetes))
  957. assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
  958. def _test_ridge_classifiers(filter_):
  959. n_classes = np.unique(y_iris).shape[0]
  960. n_features = X_iris.shape[1]
  961. for reg in (RidgeClassifier(), RidgeClassifierCV()):
  962. reg.fit(filter_(X_iris), y_iris)
  963. assert reg.coef_.shape == (n_classes, n_features)
  964. y_pred = reg.predict(filter_(X_iris))
  965. assert np.mean(y_iris == y_pred) > 0.79
  966. cv = KFold(5)
  967. reg = RidgeClassifierCV(cv=cv)
  968. reg.fit(filter_(X_iris), y_iris)
  969. y_pred = reg.predict(filter_(X_iris))
  970. assert np.mean(y_iris == y_pred) >= 0.8
  971. @pytest.mark.parametrize("scoring", [None, "accuracy", _accuracy_callable])
  972. @pytest.mark.parametrize("cv", [None, KFold(5)])
  973. @pytest.mark.parametrize("filter_", [DENSE_FILTER, SPARSE_FILTER])
  974. def test_ridge_classifier_with_scoring(filter_, scoring, cv):
  975. # non-regression test for #14672
  976. # check that RidgeClassifierCV works with all sort of scoring and
  977. # cross-validation
  978. scoring_ = make_scorer(scoring) if callable(scoring) else scoring
  979. clf = RidgeClassifierCV(scoring=scoring_, cv=cv)
  980. # Smoke test to check that fit/predict does not raise error
  981. clf.fit(filter_(X_iris), y_iris).predict(filter_(X_iris))
  982. @pytest.mark.parametrize("cv", [None, KFold(5)])
  983. @pytest.mark.parametrize("filter_", [DENSE_FILTER, SPARSE_FILTER])
  984. def test_ridge_regression_custom_scoring(filter_, cv):
  985. # check that custom scoring is working as expected
  986. # check the tie breaking strategy (keep the first alpha tried)
  987. def _dummy_score(y_test, y_pred):
  988. return 0.42
  989. alphas = np.logspace(-2, 2, num=5)
  990. clf = RidgeClassifierCV(alphas=alphas, scoring=make_scorer(_dummy_score), cv=cv)
  991. clf.fit(filter_(X_iris), y_iris)
  992. assert clf.best_score_ == pytest.approx(0.42)
  993. # In case of tie score, the first alphas will be kept
  994. assert clf.alpha_ == pytest.approx(alphas[0])
  995. def _test_tolerance(filter_):
  996. ridge = Ridge(tol=1e-5, fit_intercept=False)
  997. ridge.fit(filter_(X_diabetes), y_diabetes)
  998. score = ridge.score(filter_(X_diabetes), y_diabetes)
  999. ridge2 = Ridge(tol=1e-3, fit_intercept=False)
  1000. ridge2.fit(filter_(X_diabetes), y_diabetes)
  1001. score2 = ridge2.score(filter_(X_diabetes), y_diabetes)
  1002. assert score >= score2
  1003. def check_dense_sparse(test_func):
  1004. # test dense matrix
  1005. ret_dense = test_func(DENSE_FILTER)
  1006. # test sparse matrix
  1007. ret_sparse = test_func(SPARSE_FILTER)
  1008. # test that the outputs are the same
  1009. if ret_dense is not None and ret_sparse is not None:
  1010. assert_array_almost_equal(ret_dense, ret_sparse, decimal=3)
  1011. @pytest.mark.parametrize(
  1012. "test_func",
  1013. (
  1014. _test_ridge_loo,
  1015. _test_ridge_cv,
  1016. _test_ridge_diabetes,
  1017. _test_multi_ridge_diabetes,
  1018. _test_ridge_classifiers,
  1019. _test_tolerance,
  1020. ),
  1021. )
  1022. def test_dense_sparse(test_func):
  1023. check_dense_sparse(test_func)
  1024. def test_class_weights():
  1025. # Test class weights.
  1026. X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
  1027. y = [1, 1, 1, -1, -1]
  1028. reg = RidgeClassifier(class_weight=None)
  1029. reg.fit(X, y)
  1030. assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([1]))
  1031. # we give a small weights to class 1
  1032. reg = RidgeClassifier(class_weight={1: 0.001})
  1033. reg.fit(X, y)
  1034. # now the hyperplane should rotate clock-wise and
  1035. # the prediction on this point should shift
  1036. assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([-1]))
  1037. # check if class_weight = 'balanced' can handle negative labels.
  1038. reg = RidgeClassifier(class_weight="balanced")
  1039. reg.fit(X, y)
  1040. assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([1]))
  1041. # class_weight = 'balanced', and class_weight = None should return
  1042. # same values when y has equal number of all labels
  1043. X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0]])
  1044. y = [1, 1, -1, -1]
  1045. reg = RidgeClassifier(class_weight=None)
  1046. reg.fit(X, y)
  1047. rega = RidgeClassifier(class_weight="balanced")
  1048. rega.fit(X, y)
  1049. assert len(rega.classes_) == 2
  1050. assert_array_almost_equal(reg.coef_, rega.coef_)
  1051. assert_array_almost_equal(reg.intercept_, rega.intercept_)
  1052. @pytest.mark.parametrize("reg", (RidgeClassifier, RidgeClassifierCV))
  1053. def test_class_weight_vs_sample_weight(reg):
  1054. """Check class_weights resemble sample_weights behavior."""
  1055. # Iris is balanced, so no effect expected for using 'balanced' weights
  1056. reg1 = reg()
  1057. reg1.fit(iris.data, iris.target)
  1058. reg2 = reg(class_weight="balanced")
  1059. reg2.fit(iris.data, iris.target)
  1060. assert_almost_equal(reg1.coef_, reg2.coef_)
  1061. # Inflate importance of class 1, check against user-defined weights
  1062. sample_weight = np.ones(iris.target.shape)
  1063. sample_weight[iris.target == 1] *= 100
  1064. class_weight = {0: 1.0, 1: 100.0, 2: 1.0}
  1065. reg1 = reg()
  1066. reg1.fit(iris.data, iris.target, sample_weight)
  1067. reg2 = reg(class_weight=class_weight)
  1068. reg2.fit(iris.data, iris.target)
  1069. assert_almost_equal(reg1.coef_, reg2.coef_)
  1070. # Check that sample_weight and class_weight are multiplicative
  1071. reg1 = reg()
  1072. reg1.fit(iris.data, iris.target, sample_weight**2)
  1073. reg2 = reg(class_weight=class_weight)
  1074. reg2.fit(iris.data, iris.target, sample_weight)
  1075. assert_almost_equal(reg1.coef_, reg2.coef_)
  1076. def test_class_weights_cv():
  1077. # Test class weights for cross validated ridge classifier.
  1078. X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
  1079. y = [1, 1, 1, -1, -1]
  1080. reg = RidgeClassifierCV(class_weight=None, alphas=[0.01, 0.1, 1])
  1081. reg.fit(X, y)
  1082. # we give a small weights to class 1
  1083. reg = RidgeClassifierCV(class_weight={1: 0.001}, alphas=[0.01, 0.1, 1, 10])
  1084. reg.fit(X, y)
  1085. assert_array_equal(reg.predict([[-0.2, 2]]), np.array([-1]))
  1086. @pytest.mark.parametrize(
  1087. "scoring", [None, "neg_mean_squared_error", _mean_squared_error_callable]
  1088. )
  1089. def test_ridgecv_store_cv_values(scoring):
  1090. rng = np.random.RandomState(42)
  1091. n_samples = 8
  1092. n_features = 5
  1093. x = rng.randn(n_samples, n_features)
  1094. alphas = [1e-1, 1e0, 1e1]
  1095. n_alphas = len(alphas)
  1096. scoring_ = make_scorer(scoring) if callable(scoring) else scoring
  1097. r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True, scoring=scoring_)
  1098. # with len(y.shape) == 1
  1099. y = rng.randn(n_samples)
  1100. r.fit(x, y)
  1101. assert r.cv_values_.shape == (n_samples, n_alphas)
  1102. # with len(y.shape) == 2
  1103. n_targets = 3
  1104. y = rng.randn(n_samples, n_targets)
  1105. r.fit(x, y)
  1106. assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
  1107. r = RidgeCV(cv=3, store_cv_values=True, scoring=scoring)
  1108. with pytest.raises(ValueError, match="cv!=None and store_cv_values"):
  1109. r.fit(x, y)
  1110. @pytest.mark.parametrize("scoring", [None, "accuracy", _accuracy_callable])
  1111. def test_ridge_classifier_cv_store_cv_values(scoring):
  1112. x = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
  1113. y = np.array([1, 1, 1, -1, -1])
  1114. n_samples = x.shape[0]
  1115. alphas = [1e-1, 1e0, 1e1]
  1116. n_alphas = len(alphas)
  1117. scoring_ = make_scorer(scoring) if callable(scoring) else scoring
  1118. r = RidgeClassifierCV(
  1119. alphas=alphas, cv=None, store_cv_values=True, scoring=scoring_
  1120. )
  1121. # with len(y.shape) == 1
  1122. n_targets = 1
  1123. r.fit(x, y)
  1124. assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
  1125. # with len(y.shape) == 2
  1126. y = np.array(
  1127. [[1, 1, 1, -1, -1], [1, -1, 1, -1, 1], [-1, -1, 1, -1, -1]]
  1128. ).transpose()
  1129. n_targets = y.shape[1]
  1130. r.fit(x, y)
  1131. assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
  1132. @pytest.mark.parametrize("Estimator", [RidgeCV, RidgeClassifierCV])
  1133. def test_ridgecv_alphas_conversion(Estimator):
  1134. rng = np.random.RandomState(0)
  1135. alphas = (0.1, 1.0, 10.0)
  1136. n_samples, n_features = 5, 5
  1137. if Estimator is RidgeCV:
  1138. y = rng.randn(n_samples)
  1139. else:
  1140. y = rng.randint(0, 2, n_samples)
  1141. X = rng.randn(n_samples, n_features)
  1142. ridge_est = Estimator(alphas=alphas)
  1143. assert (
  1144. ridge_est.alphas is alphas
  1145. ), f"`alphas` was mutated in `{Estimator.__name__}.__init__`"
  1146. ridge_est.fit(X, y)
  1147. assert_array_equal(ridge_est.alphas, np.asarray(alphas))
  1148. def test_ridgecv_sample_weight():
  1149. rng = np.random.RandomState(0)
  1150. alphas = (0.1, 1.0, 10.0)
  1151. # There are different algorithms for n_samples > n_features
  1152. # and the opposite, so test them both.
  1153. for n_samples, n_features in ((6, 5), (5, 10)):
  1154. y = rng.randn(n_samples)
  1155. X = rng.randn(n_samples, n_features)
  1156. sample_weight = 1.0 + rng.rand(n_samples)
  1157. cv = KFold(5)
  1158. ridgecv = RidgeCV(alphas=alphas, cv=cv)
  1159. ridgecv.fit(X, y, sample_weight=sample_weight)
  1160. # Check using GridSearchCV directly
  1161. parameters = {"alpha": alphas}
  1162. gs = GridSearchCV(Ridge(), parameters, cv=cv)
  1163. gs.fit(X, y, sample_weight=sample_weight)
  1164. assert ridgecv.alpha_ == gs.best_estimator_.alpha
  1165. assert_array_almost_equal(ridgecv.coef_, gs.best_estimator_.coef_)
  1166. def test_raises_value_error_if_sample_weights_greater_than_1d():
  1167. # Sample weights must be either scalar or 1D
  1168. n_sampless = [2, 3]
  1169. n_featuress = [3, 2]
  1170. rng = np.random.RandomState(42)
  1171. for n_samples, n_features in zip(n_sampless, n_featuress):
  1172. X = rng.randn(n_samples, n_features)
  1173. y = rng.randn(n_samples)
  1174. sample_weights_OK = rng.randn(n_samples) ** 2 + 1
  1175. sample_weights_OK_1 = 1.0
  1176. sample_weights_OK_2 = 2.0
  1177. sample_weights_not_OK = sample_weights_OK[:, np.newaxis]
  1178. sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :]
  1179. ridge = Ridge(alpha=1)
  1180. # make sure the "OK" sample weights actually work
  1181. ridge.fit(X, y, sample_weights_OK)
  1182. ridge.fit(X, y, sample_weights_OK_1)
  1183. ridge.fit(X, y, sample_weights_OK_2)
  1184. def fit_ridge_not_ok():
  1185. ridge.fit(X, y, sample_weights_not_OK)
  1186. def fit_ridge_not_ok_2():
  1187. ridge.fit(X, y, sample_weights_not_OK_2)
  1188. err_msg = "Sample weights must be 1D array or scalar"
  1189. with pytest.raises(ValueError, match=err_msg):
  1190. fit_ridge_not_ok()
  1191. err_msg = "Sample weights must be 1D array or scalar"
  1192. with pytest.raises(ValueError, match=err_msg):
  1193. fit_ridge_not_ok_2()
  1194. def test_sparse_design_with_sample_weights():
  1195. # Sample weights must work with sparse matrices
  1196. n_sampless = [2, 3]
  1197. n_featuress = [3, 2]
  1198. rng = np.random.RandomState(42)
  1199. sparse_matrix_converters = [
  1200. sp.coo_matrix,
  1201. sp.csr_matrix,
  1202. sp.csc_matrix,
  1203. sp.lil_matrix,
  1204. sp.dok_matrix,
  1205. ]
  1206. sparse_ridge = Ridge(alpha=1.0, fit_intercept=False)
  1207. dense_ridge = Ridge(alpha=1.0, fit_intercept=False)
  1208. for n_samples, n_features in zip(n_sampless, n_featuress):
  1209. X = rng.randn(n_samples, n_features)
  1210. y = rng.randn(n_samples)
  1211. sample_weights = rng.randn(n_samples) ** 2 + 1
  1212. for sparse_converter in sparse_matrix_converters:
  1213. X_sparse = sparse_converter(X)
  1214. sparse_ridge.fit(X_sparse, y, sample_weight=sample_weights)
  1215. dense_ridge.fit(X, y, sample_weight=sample_weights)
  1216. assert_array_almost_equal(sparse_ridge.coef_, dense_ridge.coef_, decimal=6)
  1217. def test_ridgecv_int_alphas():
  1218. X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
  1219. y = [1, 1, 1, -1, -1]
  1220. # Integers
  1221. ridge = RidgeCV(alphas=(1, 10, 100))
  1222. ridge.fit(X, y)
  1223. @pytest.mark.parametrize("Estimator", [RidgeCV, RidgeClassifierCV])
  1224. @pytest.mark.parametrize(
  1225. "params, err_type, err_msg",
  1226. [
  1227. ({"alphas": (1, -1, -100)}, ValueError, r"alphas\[1\] == -1, must be > 0.0"),
  1228. (
  1229. {"alphas": (-0.1, -1.0, -10.0)},
  1230. ValueError,
  1231. r"alphas\[0\] == -0.1, must be > 0.0",
  1232. ),
  1233. (
  1234. {"alphas": (1, 1.0, "1")},
  1235. TypeError,
  1236. r"alphas\[2\] must be an instance of float, not str",
  1237. ),
  1238. ],
  1239. )
  1240. def test_ridgecv_alphas_validation(Estimator, params, err_type, err_msg):
  1241. """Check the `alphas` validation in RidgeCV and RidgeClassifierCV."""
  1242. n_samples, n_features = 5, 5
  1243. X = rng.randn(n_samples, n_features)
  1244. y = rng.randint(0, 2, n_samples)
  1245. with pytest.raises(err_type, match=err_msg):
  1246. Estimator(**params).fit(X, y)
  1247. @pytest.mark.parametrize("Estimator", [RidgeCV, RidgeClassifierCV])
  1248. def test_ridgecv_alphas_scalar(Estimator):
  1249. """Check the case when `alphas` is a scalar.
  1250. This case was supported in the past when `alphas` where converted
  1251. into array in `__init__`.
  1252. We add this test to ensure backward compatibility.
  1253. """
  1254. n_samples, n_features = 5, 5
  1255. X = rng.randn(n_samples, n_features)
  1256. if Estimator is RidgeCV:
  1257. y = rng.randn(n_samples)
  1258. else:
  1259. y = rng.randint(0, 2, n_samples)
  1260. Estimator(alphas=1).fit(X, y)
  1261. def test_raises_value_error_if_solver_not_supported():
  1262. # Tests whether a ValueError is raised if a non-identified solver
  1263. # is passed to ridge_regression
  1264. wrong_solver = "This is not a solver (MagritteSolveCV QuantumBitcoin)"
  1265. exception = ValueError
  1266. message = (
  1267. "Known solvers are 'sparse_cg', 'cholesky', 'svd'"
  1268. " 'lsqr', 'sag' or 'saga'. Got %s." % wrong_solver
  1269. )
  1270. def func():
  1271. X = np.eye(3)
  1272. y = np.ones(3)
  1273. ridge_regression(X, y, alpha=1.0, solver=wrong_solver)
  1274. with pytest.raises(exception, match=message):
  1275. func()
  1276. def test_sparse_cg_max_iter():
  1277. reg = Ridge(solver="sparse_cg", max_iter=1)
  1278. reg.fit(X_diabetes, y_diabetes)
  1279. assert reg.coef_.shape[0] == X_diabetes.shape[1]
  1280. @ignore_warnings
  1281. def test_n_iter():
  1282. # Test that self.n_iter_ is correct.
  1283. n_targets = 2
  1284. X, y = X_diabetes, y_diabetes
  1285. y_n = np.tile(y, (n_targets, 1)).T
  1286. for max_iter in range(1, 4):
  1287. for solver in ("sag", "saga", "lsqr"):
  1288. reg = Ridge(solver=solver, max_iter=max_iter, tol=1e-12)
  1289. reg.fit(X, y_n)
  1290. assert_array_equal(reg.n_iter_, np.tile(max_iter, n_targets))
  1291. for solver in ("sparse_cg", "svd", "cholesky"):
  1292. reg = Ridge(solver=solver, max_iter=1, tol=1e-1)
  1293. reg.fit(X, y_n)
  1294. assert reg.n_iter_ is None
  1295. @pytest.mark.parametrize("solver", ["lsqr", "sparse_cg", "lbfgs", "auto"])
  1296. @pytest.mark.parametrize("with_sample_weight", [True, False])
  1297. def test_ridge_fit_intercept_sparse(solver, with_sample_weight, global_random_seed):
  1298. """Check that ridge finds the same coefs and intercept on dense and sparse input
  1299. in the presence of sample weights.
  1300. For now only sparse_cg and lbfgs can correctly fit an intercept
  1301. with sparse X with default tol and max_iter.
  1302. 'sag' is tested separately in test_ridge_fit_intercept_sparse_sag because it
  1303. requires more iterations and should raise a warning if default max_iter is used.
  1304. Other solvers raise an exception, as checked in
  1305. test_ridge_fit_intercept_sparse_error
  1306. """
  1307. positive = solver == "lbfgs"
  1308. X, y = _make_sparse_offset_regression(
  1309. n_features=20, random_state=global_random_seed, positive=positive
  1310. )
  1311. sample_weight = None
  1312. if with_sample_weight:
  1313. rng = np.random.RandomState(global_random_seed)
  1314. sample_weight = 1.0 + rng.uniform(size=X.shape[0])
  1315. # "auto" should switch to "sparse_cg" when X is sparse
  1316. # so the reference we use for both ("auto" and "sparse_cg") is
  1317. # Ridge(solver="sparse_cg"), fitted using the dense representation (note
  1318. # that "sparse_cg" can fit sparse or dense data)
  1319. dense_solver = "sparse_cg" if solver == "auto" else solver
  1320. dense_ridge = Ridge(solver=dense_solver, tol=1e-12, positive=positive)
  1321. sparse_ridge = Ridge(solver=solver, tol=1e-12, positive=positive)
  1322. dense_ridge.fit(X, y, sample_weight=sample_weight)
  1323. sparse_ridge.fit(sp.csr_matrix(X), y, sample_weight=sample_weight)
  1324. assert_allclose(dense_ridge.intercept_, sparse_ridge.intercept_)
  1325. assert_allclose(dense_ridge.coef_, sparse_ridge.coef_, rtol=5e-7)
  1326. @pytest.mark.parametrize("solver", ["saga", "svd", "cholesky"])
  1327. def test_ridge_fit_intercept_sparse_error(solver):
  1328. X, y = _make_sparse_offset_regression(n_features=20, random_state=0)
  1329. X_csr = sp.csr_matrix(X)
  1330. sparse_ridge = Ridge(solver=solver)
  1331. err_msg = "solver='{}' does not support".format(solver)
  1332. with pytest.raises(ValueError, match=err_msg):
  1333. sparse_ridge.fit(X_csr, y)
  1334. @pytest.mark.parametrize("with_sample_weight", [True, False])
  1335. def test_ridge_fit_intercept_sparse_sag(with_sample_weight, global_random_seed):
  1336. X, y = _make_sparse_offset_regression(
  1337. n_features=5, n_samples=20, random_state=global_random_seed, X_offset=5.0
  1338. )
  1339. if with_sample_weight:
  1340. rng = np.random.RandomState(global_random_seed)
  1341. sample_weight = 1.0 + rng.uniform(size=X.shape[0])
  1342. else:
  1343. sample_weight = None
  1344. X_csr = sp.csr_matrix(X)
  1345. params = dict(
  1346. alpha=1.0, solver="sag", fit_intercept=True, tol=1e-10, max_iter=100000
  1347. )
  1348. dense_ridge = Ridge(**params)
  1349. sparse_ridge = Ridge(**params)
  1350. dense_ridge.fit(X, y, sample_weight=sample_weight)
  1351. with warnings.catch_warnings():
  1352. warnings.simplefilter("error", UserWarning)
  1353. sparse_ridge.fit(X_csr, y, sample_weight=sample_weight)
  1354. assert_allclose(dense_ridge.intercept_, sparse_ridge.intercept_, rtol=1e-4)
  1355. assert_allclose(dense_ridge.coef_, sparse_ridge.coef_, rtol=1e-4)
  1356. with pytest.warns(UserWarning, match='"sag" solver requires.*'):
  1357. Ridge(solver="sag", fit_intercept=True, tol=1e-3, max_iter=None).fit(X_csr, y)
  1358. @pytest.mark.parametrize("return_intercept", [False, True])
  1359. @pytest.mark.parametrize("sample_weight", [None, np.ones(1000)])
  1360. @pytest.mark.parametrize("arr_type", [np.array, sp.csr_matrix])
  1361. @pytest.mark.parametrize(
  1362. "solver", ["auto", "sparse_cg", "cholesky", "lsqr", "sag", "saga", "lbfgs"]
  1363. )
  1364. def test_ridge_regression_check_arguments_validity(
  1365. return_intercept, sample_weight, arr_type, solver
  1366. ):
  1367. """check if all combinations of arguments give valid estimations"""
  1368. # test excludes 'svd' solver because it raises exception for sparse inputs
  1369. rng = check_random_state(42)
  1370. X = rng.rand(1000, 3)
  1371. true_coefs = [1, 2, 0.1]
  1372. y = np.dot(X, true_coefs)
  1373. true_intercept = 0.0
  1374. if return_intercept:
  1375. true_intercept = 10000.0
  1376. y += true_intercept
  1377. X_testing = arr_type(X)
  1378. alpha, tol = 1e-3, 1e-6
  1379. atol = 1e-3 if _IS_32BIT else 1e-4
  1380. positive = solver == "lbfgs"
  1381. if solver not in ["sag", "auto"] and return_intercept:
  1382. with pytest.raises(ValueError, match="In Ridge, only 'sag' solver"):
  1383. ridge_regression(
  1384. X_testing,
  1385. y,
  1386. alpha=alpha,
  1387. solver=solver,
  1388. sample_weight=sample_weight,
  1389. return_intercept=return_intercept,
  1390. positive=positive,
  1391. tol=tol,
  1392. )
  1393. return
  1394. out = ridge_regression(
  1395. X_testing,
  1396. y,
  1397. alpha=alpha,
  1398. solver=solver,
  1399. sample_weight=sample_weight,
  1400. positive=positive,
  1401. return_intercept=return_intercept,
  1402. tol=tol,
  1403. )
  1404. if return_intercept:
  1405. coef, intercept = out
  1406. assert_allclose(coef, true_coefs, rtol=0, atol=atol)
  1407. assert_allclose(intercept, true_intercept, rtol=0, atol=atol)
  1408. else:
  1409. assert_allclose(out, true_coefs, rtol=0, atol=atol)
  1410. @pytest.mark.parametrize(
  1411. "solver", ["svd", "sparse_cg", "cholesky", "lsqr", "sag", "saga", "lbfgs"]
  1412. )
  1413. def test_dtype_match(solver):
  1414. rng = np.random.RandomState(0)
  1415. alpha = 1.0
  1416. positive = solver == "lbfgs"
  1417. n_samples, n_features = 6, 5
  1418. X_64 = rng.randn(n_samples, n_features)
  1419. y_64 = rng.randn(n_samples)
  1420. X_32 = X_64.astype(np.float32)
  1421. y_32 = y_64.astype(np.float32)
  1422. tol = 2 * np.finfo(np.float32).resolution
  1423. # Check type consistency 32bits
  1424. ridge_32 = Ridge(
  1425. alpha=alpha, solver=solver, max_iter=500, tol=tol, positive=positive
  1426. )
  1427. ridge_32.fit(X_32, y_32)
  1428. coef_32 = ridge_32.coef_
  1429. # Check type consistency 64 bits
  1430. ridge_64 = Ridge(
  1431. alpha=alpha, solver=solver, max_iter=500, tol=tol, positive=positive
  1432. )
  1433. ridge_64.fit(X_64, y_64)
  1434. coef_64 = ridge_64.coef_
  1435. # Do the actual checks at once for easier debug
  1436. assert coef_32.dtype == X_32.dtype
  1437. assert coef_64.dtype == X_64.dtype
  1438. assert ridge_32.predict(X_32).dtype == X_32.dtype
  1439. assert ridge_64.predict(X_64).dtype == X_64.dtype
  1440. assert_allclose(ridge_32.coef_, ridge_64.coef_, rtol=1e-4, atol=5e-4)
  1441. def test_dtype_match_cholesky():
  1442. # Test different alphas in cholesky solver to ensure full coverage.
  1443. # This test is separated from test_dtype_match for clarity.
  1444. rng = np.random.RandomState(0)
  1445. alpha = np.array([1.0, 0.5])
  1446. n_samples, n_features, n_target = 6, 7, 2
  1447. X_64 = rng.randn(n_samples, n_features)
  1448. y_64 = rng.randn(n_samples, n_target)
  1449. X_32 = X_64.astype(np.float32)
  1450. y_32 = y_64.astype(np.float32)
  1451. # Check type consistency 32bits
  1452. ridge_32 = Ridge(alpha=alpha, solver="cholesky")
  1453. ridge_32.fit(X_32, y_32)
  1454. coef_32 = ridge_32.coef_
  1455. # Check type consistency 64 bits
  1456. ridge_64 = Ridge(alpha=alpha, solver="cholesky")
  1457. ridge_64.fit(X_64, y_64)
  1458. coef_64 = ridge_64.coef_
  1459. # Do all the checks at once, like this is easier to debug
  1460. assert coef_32.dtype == X_32.dtype
  1461. assert coef_64.dtype == X_64.dtype
  1462. assert ridge_32.predict(X_32).dtype == X_32.dtype
  1463. assert ridge_64.predict(X_64).dtype == X_64.dtype
  1464. assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)
  1465. @pytest.mark.parametrize(
  1466. "solver", ["svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga", "lbfgs"]
  1467. )
  1468. @pytest.mark.parametrize("seed", range(1))
  1469. def test_ridge_regression_dtype_stability(solver, seed):
  1470. random_state = np.random.RandomState(seed)
  1471. n_samples, n_features = 6, 5
  1472. X = random_state.randn(n_samples, n_features)
  1473. coef = random_state.randn(n_features)
  1474. y = np.dot(X, coef) + 0.01 * random_state.randn(n_samples)
  1475. alpha = 1.0
  1476. positive = solver == "lbfgs"
  1477. results = dict()
  1478. # XXX: Sparse CG seems to be far less numerically stable than the
  1479. # others, maybe we should not enable float32 for this one.
  1480. atol = 1e-3 if solver == "sparse_cg" else 1e-5
  1481. for current_dtype in (np.float32, np.float64):
  1482. results[current_dtype] = ridge_regression(
  1483. X.astype(current_dtype),
  1484. y.astype(current_dtype),
  1485. alpha=alpha,
  1486. solver=solver,
  1487. random_state=random_state,
  1488. sample_weight=None,
  1489. positive=positive,
  1490. max_iter=500,
  1491. tol=1e-10,
  1492. return_n_iter=False,
  1493. return_intercept=False,
  1494. )
  1495. assert results[np.float32].dtype == np.float32
  1496. assert results[np.float64].dtype == np.float64
  1497. assert_allclose(results[np.float32], results[np.float64], atol=atol)
  1498. def test_ridge_sag_with_X_fortran():
  1499. # check that Fortran array are converted when using SAG solver
  1500. X, y = make_regression(random_state=42)
  1501. # for the order of X and y to not be C-ordered arrays
  1502. X = np.asfortranarray(X)
  1503. X = X[::2, :]
  1504. y = y[::2]
  1505. Ridge(solver="sag").fit(X, y)
  1506. @pytest.mark.parametrize(
  1507. "Classifier, params",
  1508. [
  1509. (RidgeClassifier, {}),
  1510. (RidgeClassifierCV, {"cv": None}),
  1511. (RidgeClassifierCV, {"cv": 3}),
  1512. ],
  1513. )
  1514. def test_ridgeclassifier_multilabel(Classifier, params):
  1515. """Check that multilabel classification is supported and give meaningful
  1516. results."""
  1517. X, y = make_multilabel_classification(n_classes=1, random_state=0)
  1518. y = y.reshape(-1, 1)
  1519. Y = np.concatenate([y, y], axis=1)
  1520. clf = Classifier(**params).fit(X, Y)
  1521. Y_pred = clf.predict(X)
  1522. assert Y_pred.shape == Y.shape
  1523. assert_array_equal(Y_pred[:, 0], Y_pred[:, 1])
  1524. Ridge(solver="sag").fit(X, y)
  1525. @pytest.mark.parametrize("solver", ["auto", "lbfgs"])
  1526. @pytest.mark.parametrize("fit_intercept", [True, False])
  1527. @pytest.mark.parametrize("alpha", [1e-3, 1e-2, 0.1, 1.0])
  1528. def test_ridge_positive_regression_test(solver, fit_intercept, alpha):
  1529. """Test that positive Ridge finds true positive coefficients."""
  1530. X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
  1531. coef = np.array([1, -10])
  1532. if fit_intercept:
  1533. intercept = 20
  1534. y = X.dot(coef) + intercept
  1535. else:
  1536. y = X.dot(coef)
  1537. model = Ridge(
  1538. alpha=alpha, positive=True, solver=solver, fit_intercept=fit_intercept
  1539. )
  1540. model.fit(X, y)
  1541. assert np.all(model.coef_ >= 0)
  1542. @pytest.mark.parametrize("fit_intercept", [True, False])
  1543. @pytest.mark.parametrize("alpha", [1e-3, 1e-2, 0.1, 1.0])
  1544. def test_ridge_ground_truth_positive_test(fit_intercept, alpha):
  1545. """Test that Ridge w/wo positive converges to the same solution.
  1546. Ridge with positive=True and positive=False must give the same
  1547. when the ground truth coefs are all positive.
  1548. """
  1549. rng = np.random.RandomState(42)
  1550. X = rng.randn(300, 100)
  1551. coef = rng.uniform(0.1, 1.0, size=X.shape[1])
  1552. if fit_intercept:
  1553. intercept = 1
  1554. y = X @ coef + intercept
  1555. else:
  1556. y = X @ coef
  1557. y += rng.normal(size=X.shape[0]) * 0.01
  1558. results = []
  1559. for positive in [True, False]:
  1560. model = Ridge(
  1561. alpha=alpha, positive=positive, fit_intercept=fit_intercept, tol=1e-10
  1562. )
  1563. results.append(model.fit(X, y).coef_)
  1564. assert_allclose(*results, atol=1e-6, rtol=0)
  1565. @pytest.mark.parametrize(
  1566. "solver", ["svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]
  1567. )
  1568. def test_ridge_positive_error_test(solver):
  1569. """Test input validation for positive argument in Ridge."""
  1570. alpha = 0.1
  1571. X = np.array([[1, 2], [3, 4]])
  1572. coef = np.array([1, -1])
  1573. y = X @ coef
  1574. model = Ridge(alpha=alpha, positive=True, solver=solver, fit_intercept=False)
  1575. with pytest.raises(ValueError, match="does not support positive"):
  1576. model.fit(X, y)
  1577. with pytest.raises(ValueError, match="only 'lbfgs' solver can be used"):
  1578. _, _ = ridge_regression(
  1579. X, y, alpha, positive=True, solver=solver, return_intercept=False
  1580. )
  1581. @pytest.mark.parametrize("alpha", [1e-3, 1e-2, 0.1, 1.0])
  1582. def test_positive_ridge_loss(alpha):
  1583. """Check ridge loss consistency when positive argument is enabled."""
  1584. X, y = make_regression(n_samples=300, n_features=300, random_state=42)
  1585. alpha = 0.10
  1586. n_checks = 100
  1587. def ridge_loss(model, random_state=None, noise_scale=1e-8):
  1588. intercept = model.intercept_
  1589. if random_state is not None:
  1590. rng = np.random.RandomState(random_state)
  1591. coef = model.coef_ + rng.uniform(0, noise_scale, size=model.coef_.shape)
  1592. else:
  1593. coef = model.coef_
  1594. return 0.5 * np.sum((y - X @ coef - intercept) ** 2) + 0.5 * alpha * np.sum(
  1595. coef**2
  1596. )
  1597. model = Ridge(alpha=alpha).fit(X, y)
  1598. model_positive = Ridge(alpha=alpha, positive=True).fit(X, y)
  1599. # Check 1:
  1600. # Loss for solution found by Ridge(positive=False)
  1601. # is lower than that for solution found by Ridge(positive=True)
  1602. loss = ridge_loss(model)
  1603. loss_positive = ridge_loss(model_positive)
  1604. assert loss <= loss_positive
  1605. # Check 2:
  1606. # Loss for solution found by Ridge(positive=True)
  1607. # is lower than that for small random positive perturbation
  1608. # of the positive solution.
  1609. for random_state in range(n_checks):
  1610. loss_perturbed = ridge_loss(model_positive, random_state=random_state)
  1611. assert loss_positive <= loss_perturbed
  1612. @pytest.mark.parametrize("alpha", [1e-3, 1e-2, 0.1, 1.0])
  1613. def test_lbfgs_solver_consistency(alpha):
  1614. """Test that LBGFS gets almost the same coef of svd when positive=False."""
  1615. X, y = make_regression(n_samples=300, n_features=300, random_state=42)
  1616. y = np.expand_dims(y, 1)
  1617. alpha = np.asarray([alpha])
  1618. config = {
  1619. "positive": False,
  1620. "tol": 1e-16,
  1621. "max_iter": 500000,
  1622. }
  1623. coef_lbfgs = _solve_lbfgs(X, y, alpha, **config)
  1624. coef_cholesky = _solve_svd(X, y, alpha)
  1625. assert_allclose(coef_lbfgs, coef_cholesky, atol=1e-4, rtol=0)
  1626. def test_lbfgs_solver_error():
  1627. """Test that LBFGS solver raises ConvergenceWarning."""
  1628. X = np.array([[1, -1], [1, 1]])
  1629. y = np.array([-1e10, 1e10])
  1630. model = Ridge(
  1631. alpha=0.01,
  1632. solver="lbfgs",
  1633. fit_intercept=False,
  1634. tol=1e-12,
  1635. positive=True,
  1636. max_iter=1,
  1637. )
  1638. with pytest.warns(ConvergenceWarning, match="lbfgs solver did not converge"):
  1639. model.fit(X, y)
  1640. @pytest.mark.parametrize("fit_intercept", [False, True])
  1641. @pytest.mark.parametrize("sparseX", [False, True])
  1642. @pytest.mark.parametrize("data", ["tall", "wide"])
  1643. @pytest.mark.parametrize("solver", SOLVERS + ["lbfgs"])
  1644. def test_ridge_sample_weight_consistency(
  1645. fit_intercept, sparseX, data, solver, global_random_seed
  1646. ):
  1647. """Test that the impact of sample_weight is consistent.
  1648. Note that this test is stricter than the common test
  1649. check_sample_weights_invariance alone.
  1650. """
  1651. # filter out solver that do not support sparse input
  1652. if sparseX:
  1653. if solver == "svd" or (solver in ("cholesky", "saga") and fit_intercept):
  1654. pytest.skip("unsupported configuration")
  1655. # XXX: this test is quite sensitive to the seed used to generate the data:
  1656. # ideally we would like the test to pass for any global_random_seed but this is not
  1657. # the case at the moment.
  1658. rng = np.random.RandomState(42)
  1659. n_samples = 12
  1660. if data == "tall":
  1661. n_features = n_samples // 2
  1662. else:
  1663. n_features = n_samples * 2
  1664. X = rng.rand(n_samples, n_features)
  1665. y = rng.rand(n_samples)
  1666. if sparseX:
  1667. X = sp.csr_matrix(X)
  1668. params = dict(
  1669. fit_intercept=fit_intercept,
  1670. alpha=1.0,
  1671. solver=solver,
  1672. positive=(solver == "lbfgs"),
  1673. random_state=global_random_seed, # for sag/saga
  1674. tol=1e-12,
  1675. )
  1676. # 1) sample_weight=np.ones(..) should be equivalent to sample_weight=None
  1677. # same check as check_sample_weights_invariance(name, reg, kind="ones"), but we also
  1678. # test with sparse input.
  1679. reg = Ridge(**params).fit(X, y, sample_weight=None)
  1680. coef = reg.coef_.copy()
  1681. if fit_intercept:
  1682. intercept = reg.intercept_
  1683. sample_weight = np.ones_like(y)
  1684. reg.fit(X, y, sample_weight=sample_weight)
  1685. assert_allclose(reg.coef_, coef, rtol=1e-6)
  1686. if fit_intercept:
  1687. assert_allclose(reg.intercept_, intercept)
  1688. # 2) setting elements of sample_weight to 0 is equivalent to removing these samples
  1689. # same check as check_sample_weights_invariance(name, reg, kind="zeros"), but we
  1690. # also test with sparse input
  1691. sample_weight = rng.uniform(low=0.01, high=2, size=X.shape[0])
  1692. sample_weight[-5:] = 0
  1693. y[-5:] *= 1000 # to make excluding those samples important
  1694. reg.fit(X, y, sample_weight=sample_weight)
  1695. coef = reg.coef_.copy()
  1696. if fit_intercept:
  1697. intercept = reg.intercept_
  1698. reg.fit(X[:-5, :], y[:-5], sample_weight=sample_weight[:-5])
  1699. assert_allclose(reg.coef_, coef, rtol=1e-6)
  1700. if fit_intercept:
  1701. assert_allclose(reg.intercept_, intercept)
  1702. # 3) scaling of sample_weight should have no effect
  1703. # Note: For models with penalty, scaling the penalty term might work.
  1704. reg2 = Ridge(**params).set_params(alpha=np.pi * params["alpha"])
  1705. reg2.fit(X, y, sample_weight=np.pi * sample_weight)
  1706. if solver in ("sag", "saga") and not fit_intercept:
  1707. pytest.xfail(f"Solver {solver} does fail test for scaling of sample_weight.")
  1708. assert_allclose(reg2.coef_, coef, rtol=1e-6)
  1709. if fit_intercept:
  1710. assert_allclose(reg2.intercept_, intercept)
  1711. # 4) check that multiplying sample_weight by 2 is equivalent
  1712. # to repeating corresponding samples twice
  1713. if sparseX:
  1714. X = X.toarray()
  1715. X2 = np.concatenate([X, X[: n_samples // 2]], axis=0)
  1716. y2 = np.concatenate([y, y[: n_samples // 2]])
  1717. sample_weight_1 = sample_weight.copy()
  1718. sample_weight_1[: n_samples // 2] *= 2
  1719. sample_weight_2 = np.concatenate(
  1720. [sample_weight, sample_weight[: n_samples // 2]], axis=0
  1721. )
  1722. if sparseX:
  1723. X = sp.csr_matrix(X)
  1724. X2 = sp.csr_matrix(X2)
  1725. reg1 = Ridge(**params).fit(X, y, sample_weight=sample_weight_1)
  1726. reg2 = Ridge(**params).fit(X2, y2, sample_weight=sample_weight_2)
  1727. assert_allclose(reg1.coef_, reg2.coef_)
  1728. if fit_intercept:
  1729. assert_allclose(reg1.intercept_, reg2.intercept_)