| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074 |
- import warnings
- from itertools import product
- import numpy as np
- import pytest
- import scipy.sparse as sp
- from scipy import linalg
- from sklearn import datasets
- from sklearn.datasets import (
- make_classification,
- make_low_rank_matrix,
- make_multilabel_classification,
- make_regression,
- )
- from sklearn.exceptions import ConvergenceWarning
- from sklearn.linear_model import (
- LinearRegression,
- Ridge,
- RidgeClassifier,
- RidgeClassifierCV,
- RidgeCV,
- ridge_regression,
- )
- from sklearn.linear_model._ridge import (
- _check_gcv_mode,
- _RidgeGCV,
- _solve_cholesky,
- _solve_cholesky_kernel,
- _solve_lbfgs,
- _solve_svd,
- _X_CenterStackOp,
- )
- from sklearn.metrics import get_scorer, make_scorer, mean_squared_error
- from sklearn.model_selection import (
- GridSearchCV,
- GroupKFold,
- KFold,
- LeaveOneOut,
- cross_val_predict,
- )
- from sklearn.preprocessing import minmax_scale
- from sklearn.utils import _IS_32BIT, check_random_state
- from sklearn.utils._testing import (
- assert_allclose,
- assert_almost_equal,
- assert_array_almost_equal,
- assert_array_equal,
- ignore_warnings,
- )
- SOLVERS = ["svd", "sparse_cg", "cholesky", "lsqr", "sag", "saga"]
- SPARSE_SOLVERS_WITH_INTERCEPT = ("sparse_cg", "sag")
- SPARSE_SOLVERS_WITHOUT_INTERCEPT = ("sparse_cg", "cholesky", "lsqr", "sag", "saga")
- diabetes = datasets.load_diabetes()
- X_diabetes, y_diabetes = diabetes.data, diabetes.target
- ind = np.arange(X_diabetes.shape[0])
- rng = np.random.RandomState(0)
- rng.shuffle(ind)
- ind = ind[:200]
- X_diabetes, y_diabetes = X_diabetes[ind], y_diabetes[ind]
- iris = datasets.load_iris()
- X_iris = sp.csr_matrix(iris.data)
- y_iris = iris.target
- def DENSE_FILTER(X):
- return X
- def SPARSE_FILTER(X):
- return sp.csr_matrix(X)
- def _accuracy_callable(y_test, y_pred):
- return np.mean(y_test == y_pred)
- def _mean_squared_error_callable(y_test, y_pred):
- return ((y_test - y_pred) ** 2).mean()
- @pytest.fixture(params=["long", "wide"])
- def ols_ridge_dataset(global_random_seed, request):
- """Dataset with OLS and Ridge solutions, well conditioned X.
- The construction is based on the SVD decomposition of X = U S V'.
- Parameters
- ----------
- type : {"long", "wide"}
- If "long", then n_samples > n_features.
- If "wide", then n_features > n_samples.
- For "wide", we return the minimum norm solution w = X' (XX')^-1 y:
- min ||w||_2 subject to X w = y
- Returns
- -------
- X : ndarray
- Last column of 1, i.e. intercept.
- y : ndarray
- coef_ols : ndarray of shape
- Minimum norm OLS solutions, i.e. min ||X w - y||_2_2 (with minimum ||w||_2 in
- case of ambiguity)
- Last coefficient is intercept.
- coef_ridge : ndarray of shape (5,)
- Ridge solution with alpha=1, i.e. min ||X w - y||_2_2 + ||w||_2^2.
- Last coefficient is intercept.
- """
- # Make larger dim more than double as big as the smaller one.
- # This helps when constructing singular matrices like (X, X).
- if request.param == "long":
- n_samples, n_features = 12, 4
- else:
- n_samples, n_features = 4, 12
- k = min(n_samples, n_features)
- rng = np.random.RandomState(global_random_seed)
- X = make_low_rank_matrix(
- n_samples=n_samples, n_features=n_features, effective_rank=k, random_state=rng
- )
- X[:, -1] = 1 # last columns acts as intercept
- U, s, Vt = linalg.svd(X)
- assert np.all(s > 1e-3) # to be sure
- U1, U2 = U[:, :k], U[:, k:]
- Vt1, _ = Vt[:k, :], Vt[k:, :]
- if request.param == "long":
- # Add a term that vanishes in the product X'y
- coef_ols = rng.uniform(low=-10, high=10, size=n_features)
- y = X @ coef_ols
- y += U2 @ rng.normal(size=n_samples - n_features) ** 2
- else:
- y = rng.uniform(low=-10, high=10, size=n_samples)
- # w = X'(XX')^-1 y = V s^-1 U' y
- coef_ols = Vt1.T @ np.diag(1 / s) @ U1.T @ y
- # Add penalty alpha * ||coef||_2^2 for alpha=1 and solve via normal equations.
- # Note that the problem is well conditioned such that we get accurate results.
- alpha = 1
- d = alpha * np.identity(n_features)
- d[-1, -1] = 0 # intercept gets no penalty
- coef_ridge = linalg.solve(X.T @ X + d, X.T @ y)
- # To be sure
- R_OLS = y - X @ coef_ols
- R_Ridge = y - X @ coef_ridge
- assert np.linalg.norm(R_OLS) < np.linalg.norm(R_Ridge)
- return X, y, coef_ols, coef_ridge
- @pytest.mark.parametrize("solver", SOLVERS)
- @pytest.mark.parametrize("fit_intercept", [True, False])
- def test_ridge_regression(solver, fit_intercept, ols_ridge_dataset, global_random_seed):
- """Test that Ridge converges for all solvers to correct solution.
- We work with a simple constructed data set with known solution.
- """
- X, y, _, coef = ols_ridge_dataset
- alpha = 1.0 # because ols_ridge_dataset uses this.
- params = dict(
- alpha=alpha,
- fit_intercept=True,
- solver=solver,
- tol=1e-15 if solver in ("sag", "saga") else 1e-10,
- random_state=global_random_seed,
- )
- # Calculate residuals and R2.
- res_null = y - np.mean(y)
- res_Ridge = y - X @ coef
- R2_Ridge = 1 - np.sum(res_Ridge**2) / np.sum(res_null**2)
- model = Ridge(**params)
- X = X[:, :-1] # remove intercept
- if fit_intercept:
- intercept = coef[-1]
- else:
- X = X - X.mean(axis=0)
- y = y - y.mean()
- intercept = 0
- model.fit(X, y)
- coef = coef[:-1]
- assert model.intercept_ == pytest.approx(intercept)
- assert_allclose(model.coef_, coef)
- assert model.score(X, y) == pytest.approx(R2_Ridge)
- # Same with sample_weight.
- model = Ridge(**params).fit(X, y, sample_weight=np.ones(X.shape[0]))
- assert model.intercept_ == pytest.approx(intercept)
- assert_allclose(model.coef_, coef)
- assert model.score(X, y) == pytest.approx(R2_Ridge)
- @pytest.mark.parametrize("solver", SOLVERS)
- @pytest.mark.parametrize("fit_intercept", [True, False])
- def test_ridge_regression_hstacked_X(
- solver, fit_intercept, ols_ridge_dataset, global_random_seed
- ):
- """Test that Ridge converges for all solvers to correct solution on hstacked data.
- We work with a simple constructed data set with known solution.
- Fit on [X] with alpha is the same as fit on [X, X]/2 with alpha/2.
- For long X, [X, X] is a singular matrix.
- """
- X, y, _, coef = ols_ridge_dataset
- n_samples, n_features = X.shape
- alpha = 1.0 # because ols_ridge_dataset uses this.
- model = Ridge(
- alpha=alpha / 2,
- fit_intercept=fit_intercept,
- solver=solver,
- tol=1e-15 if solver in ("sag", "saga") else 1e-10,
- random_state=global_random_seed,
- )
- X = X[:, :-1] # remove intercept
- X = 0.5 * np.concatenate((X, X), axis=1)
- assert np.linalg.matrix_rank(X) <= min(n_samples, n_features - 1)
- if fit_intercept:
- intercept = coef[-1]
- else:
- X = X - X.mean(axis=0)
- y = y - y.mean()
- intercept = 0
- model.fit(X, y)
- coef = coef[:-1]
- assert model.intercept_ == pytest.approx(intercept)
- # coefficients are not all on the same magnitude, adding a small atol to
- # make this test less brittle
- assert_allclose(model.coef_, np.r_[coef, coef], atol=1e-8)
- @pytest.mark.parametrize("solver", SOLVERS)
- @pytest.mark.parametrize("fit_intercept", [True, False])
- def test_ridge_regression_vstacked_X(
- solver, fit_intercept, ols_ridge_dataset, global_random_seed
- ):
- """Test that Ridge converges for all solvers to correct solution on vstacked data.
- We work with a simple constructed data set with known solution.
- Fit on [X] with alpha is the same as fit on [X], [y]
- [X], [y] with 2 * alpha.
- For wide X, [X', X'] is a singular matrix.
- """
- X, y, _, coef = ols_ridge_dataset
- n_samples, n_features = X.shape
- alpha = 1.0 # because ols_ridge_dataset uses this.
- model = Ridge(
- alpha=2 * alpha,
- fit_intercept=fit_intercept,
- solver=solver,
- tol=1e-15 if solver in ("sag", "saga") else 1e-10,
- random_state=global_random_seed,
- )
- X = X[:, :-1] # remove intercept
- X = np.concatenate((X, X), axis=0)
- assert np.linalg.matrix_rank(X) <= min(n_samples, n_features)
- y = np.r_[y, y]
- if fit_intercept:
- intercept = coef[-1]
- else:
- X = X - X.mean(axis=0)
- y = y - y.mean()
- intercept = 0
- model.fit(X, y)
- coef = coef[:-1]
- assert model.intercept_ == pytest.approx(intercept)
- # coefficients are not all on the same magnitude, adding a small atol to
- # make this test less brittle
- assert_allclose(model.coef_, coef, atol=1e-8)
- @pytest.mark.parametrize("solver", SOLVERS)
- @pytest.mark.parametrize("fit_intercept", [True, False])
- def test_ridge_regression_unpenalized(
- solver, fit_intercept, ols_ridge_dataset, global_random_seed
- ):
- """Test that unpenalized Ridge = OLS converges for all solvers to correct solution.
- We work with a simple constructed data set with known solution.
- Note: This checks the minimum norm solution for wide X, i.e.
- n_samples < n_features:
- min ||w||_2 subject to X w = y
- """
- X, y, coef, _ = ols_ridge_dataset
- n_samples, n_features = X.shape
- alpha = 0 # OLS
- params = dict(
- alpha=alpha,
- fit_intercept=fit_intercept,
- solver=solver,
- tol=1e-15 if solver in ("sag", "saga") else 1e-10,
- random_state=global_random_seed,
- )
- model = Ridge(**params)
- # Note that cholesky might give a warning: "Singular matrix in solving dual
- # problem. Using least-squares solution instead."
- if fit_intercept:
- X = X[:, :-1] # remove intercept
- intercept = coef[-1]
- coef = coef[:-1]
- else:
- intercept = 0
- model.fit(X, y)
- # FIXME: `assert_allclose(model.coef_, coef)` should work for all cases but fails
- # for the wide/fat case with n_features > n_samples. The current Ridge solvers do
- # NOT return the minimum norm solution with fit_intercept=True.
- if n_samples > n_features or not fit_intercept:
- assert model.intercept_ == pytest.approx(intercept)
- assert_allclose(model.coef_, coef)
- else:
- # As it is an underdetermined problem, residuals = 0. This shows that we get
- # a solution to X w = y ....
- assert_allclose(model.predict(X), y)
- assert_allclose(X @ coef + intercept, y)
- # But it is not the minimum norm solution. (This should be equal.)
- assert np.linalg.norm(np.r_[model.intercept_, model.coef_]) > np.linalg.norm(
- np.r_[intercept, coef]
- )
- pytest.xfail(reason="Ridge does not provide the minimum norm solution.")
- assert model.intercept_ == pytest.approx(intercept)
- assert_allclose(model.coef_, coef)
- @pytest.mark.parametrize("solver", SOLVERS)
- @pytest.mark.parametrize("fit_intercept", [True, False])
- def test_ridge_regression_unpenalized_hstacked_X(
- solver, fit_intercept, ols_ridge_dataset, global_random_seed
- ):
- """Test that unpenalized Ridge = OLS converges for all solvers to correct solution.
- We work with a simple constructed data set with known solution.
- OLS fit on [X] is the same as fit on [X, X]/2.
- For long X, [X, X] is a singular matrix and we check against the minimum norm
- solution:
- min ||w||_2 subject to min ||X w - y||_2
- """
- X, y, coef, _ = ols_ridge_dataset
- n_samples, n_features = X.shape
- alpha = 0 # OLS
- model = Ridge(
- alpha=alpha,
- fit_intercept=fit_intercept,
- solver=solver,
- tol=1e-15 if solver in ("sag", "saga") else 1e-10,
- random_state=global_random_seed,
- )
- if fit_intercept:
- X = X[:, :-1] # remove intercept
- intercept = coef[-1]
- coef = coef[:-1]
- else:
- intercept = 0
- X = 0.5 * np.concatenate((X, X), axis=1)
- assert np.linalg.matrix_rank(X) <= min(n_samples, n_features)
- model.fit(X, y)
- if n_samples > n_features or not fit_intercept:
- assert model.intercept_ == pytest.approx(intercept)
- if solver == "cholesky":
- # Cholesky is a bad choice for singular X.
- pytest.skip()
- assert_allclose(model.coef_, np.r_[coef, coef])
- else:
- # FIXME: Same as in test_ridge_regression_unpenalized.
- # As it is an underdetermined problem, residuals = 0. This shows that we get
- # a solution to X w = y ....
- assert_allclose(model.predict(X), y)
- # But it is not the minimum norm solution. (This should be equal.)
- assert np.linalg.norm(np.r_[model.intercept_, model.coef_]) > np.linalg.norm(
- np.r_[intercept, coef, coef]
- )
- pytest.xfail(reason="Ridge does not provide the minimum norm solution.")
- assert model.intercept_ == pytest.approx(intercept)
- assert_allclose(model.coef_, np.r_[coef, coef])
- @pytest.mark.parametrize("solver", SOLVERS)
- @pytest.mark.parametrize("fit_intercept", [True, False])
- def test_ridge_regression_unpenalized_vstacked_X(
- solver, fit_intercept, ols_ridge_dataset, global_random_seed
- ):
- """Test that unpenalized Ridge = OLS converges for all solvers to correct solution.
- We work with a simple constructed data set with known solution.
- OLS fit on [X] is the same as fit on [X], [y]
- [X], [y].
- For wide X, [X', X'] is a singular matrix and we check against the minimum norm
- solution:
- min ||w||_2 subject to X w = y
- """
- X, y, coef, _ = ols_ridge_dataset
- n_samples, n_features = X.shape
- alpha = 0 # OLS
- model = Ridge(
- alpha=alpha,
- fit_intercept=fit_intercept,
- solver=solver,
- tol=1e-15 if solver in ("sag", "saga") else 1e-10,
- random_state=global_random_seed,
- )
- if fit_intercept:
- X = X[:, :-1] # remove intercept
- intercept = coef[-1]
- coef = coef[:-1]
- else:
- intercept = 0
- X = np.concatenate((X, X), axis=0)
- assert np.linalg.matrix_rank(X) <= min(n_samples, n_features)
- y = np.r_[y, y]
- model.fit(X, y)
- if n_samples > n_features or not fit_intercept:
- assert model.intercept_ == pytest.approx(intercept)
- assert_allclose(model.coef_, coef)
- else:
- # FIXME: Same as in test_ridge_regression_unpenalized.
- # As it is an underdetermined problem, residuals = 0. This shows that we get
- # a solution to X w = y ....
- assert_allclose(model.predict(X), y)
- # But it is not the minimum norm solution. (This should be equal.)
- assert np.linalg.norm(np.r_[model.intercept_, model.coef_]) > np.linalg.norm(
- np.r_[intercept, coef]
- )
- pytest.xfail(reason="Ridge does not provide the minimum norm solution.")
- assert model.intercept_ == pytest.approx(intercept)
- assert_allclose(model.coef_, coef)
- @pytest.mark.parametrize("solver", SOLVERS)
- @pytest.mark.parametrize("fit_intercept", [True, False])
- @pytest.mark.parametrize("sparseX", [True, False])
- @pytest.mark.parametrize("alpha", [1.0, 1e-2])
- def test_ridge_regression_sample_weights(
- solver, fit_intercept, sparseX, alpha, ols_ridge_dataset, global_random_seed
- ):
- """Test that Ridge with sample weights gives correct results.
- We use the following trick:
- ||y - Xw||_2 = (z - Aw)' W (z - Aw)
- for z=[y, y], A' = [X', X'] (vstacked), and W[:n/2] + W[n/2:] = 1, W=diag(W)
- """
- if sparseX:
- if fit_intercept and solver not in SPARSE_SOLVERS_WITH_INTERCEPT:
- pytest.skip()
- elif not fit_intercept and solver not in SPARSE_SOLVERS_WITHOUT_INTERCEPT:
- pytest.skip()
- X, y, _, coef = ols_ridge_dataset
- n_samples, n_features = X.shape
- sw = rng.uniform(low=0, high=1, size=n_samples)
- model = Ridge(
- alpha=alpha,
- fit_intercept=fit_intercept,
- solver=solver,
- tol=1e-15 if solver in ["sag", "saga"] else 1e-10,
- max_iter=100_000,
- random_state=global_random_seed,
- )
- X = X[:, :-1] # remove intercept
- X = np.concatenate((X, X), axis=0)
- y = np.r_[y, y]
- sw = np.r_[sw, 1 - sw] * alpha
- if fit_intercept:
- intercept = coef[-1]
- else:
- X = X - X.mean(axis=0)
- y = y - y.mean()
- intercept = 0
- if sparseX:
- X = sp.csr_matrix(X)
- model.fit(X, y, sample_weight=sw)
- coef = coef[:-1]
- assert model.intercept_ == pytest.approx(intercept)
- assert_allclose(model.coef_, coef)
- def test_primal_dual_relationship():
- y = y_diabetes.reshape(-1, 1)
- coef = _solve_cholesky(X_diabetes, y, alpha=[1e-2])
- K = np.dot(X_diabetes, X_diabetes.T)
- dual_coef = _solve_cholesky_kernel(K, y, alpha=[1e-2])
- coef2 = np.dot(X_diabetes.T, dual_coef).T
- assert_array_almost_equal(coef, coef2)
- def test_ridge_regression_convergence_fail():
- rng = np.random.RandomState(0)
- y = rng.randn(5)
- X = rng.randn(5, 10)
- warning_message = r"sparse_cg did not converge after" r" [0-9]+ iterations."
- with pytest.warns(ConvergenceWarning, match=warning_message):
- ridge_regression(
- X, y, alpha=1.0, solver="sparse_cg", tol=0.0, max_iter=None, verbose=1
- )
- def test_ridge_shapes_type():
- # Test shape of coef_ and intercept_
- rng = np.random.RandomState(0)
- n_samples, n_features = 5, 10
- X = rng.randn(n_samples, n_features)
- y = rng.randn(n_samples)
- Y1 = y[:, np.newaxis]
- Y = np.c_[y, 1 + y]
- ridge = Ridge()
- ridge.fit(X, y)
- assert ridge.coef_.shape == (n_features,)
- assert ridge.intercept_.shape == ()
- assert isinstance(ridge.coef_, np.ndarray)
- assert isinstance(ridge.intercept_, float)
- ridge.fit(X, Y1)
- assert ridge.coef_.shape == (1, n_features)
- assert ridge.intercept_.shape == (1,)
- assert isinstance(ridge.coef_, np.ndarray)
- assert isinstance(ridge.intercept_, np.ndarray)
- ridge.fit(X, Y)
- assert ridge.coef_.shape == (2, n_features)
- assert ridge.intercept_.shape == (2,)
- assert isinstance(ridge.coef_, np.ndarray)
- assert isinstance(ridge.intercept_, np.ndarray)
- def test_ridge_intercept():
- # Test intercept with multiple targets GH issue #708
- rng = np.random.RandomState(0)
- n_samples, n_features = 5, 10
- X = rng.randn(n_samples, n_features)
- y = rng.randn(n_samples)
- Y = np.c_[y, 1.0 + y]
- ridge = Ridge()
- ridge.fit(X, y)
- intercept = ridge.intercept_
- ridge.fit(X, Y)
- assert_almost_equal(ridge.intercept_[0], intercept)
- assert_almost_equal(ridge.intercept_[1], intercept + 1.0)
- def test_ridge_vs_lstsq():
- # On alpha=0., Ridge and OLS yield the same solution.
- rng = np.random.RandomState(0)
- # we need more samples than features
- n_samples, n_features = 5, 4
- y = rng.randn(n_samples)
- X = rng.randn(n_samples, n_features)
- ridge = Ridge(alpha=0.0, fit_intercept=False)
- ols = LinearRegression(fit_intercept=False)
- ridge.fit(X, y)
- ols.fit(X, y)
- assert_almost_equal(ridge.coef_, ols.coef_)
- ridge.fit(X, y)
- ols.fit(X, y)
- assert_almost_equal(ridge.coef_, ols.coef_)
- def test_ridge_individual_penalties():
- # Tests the ridge object using individual penalties
- rng = np.random.RandomState(42)
- n_samples, n_features, n_targets = 20, 10, 5
- X = rng.randn(n_samples, n_features)
- y = rng.randn(n_samples, n_targets)
- penalties = np.arange(n_targets)
- coef_cholesky = np.array(
- [
- Ridge(alpha=alpha, solver="cholesky").fit(X, target).coef_
- for alpha, target in zip(penalties, y.T)
- ]
- )
- coefs_indiv_pen = [
- Ridge(alpha=penalties, solver=solver, tol=1e-12).fit(X, y).coef_
- for solver in ["svd", "sparse_cg", "lsqr", "cholesky", "sag", "saga"]
- ]
- for coef_indiv_pen in coefs_indiv_pen:
- assert_array_almost_equal(coef_cholesky, coef_indiv_pen)
- # Test error is raised when number of targets and penalties do not match.
- ridge = Ridge(alpha=penalties[:-1])
- err_msg = "Number of targets and number of penalties do not correspond: 4 != 5"
- with pytest.raises(ValueError, match=err_msg):
- ridge.fit(X, y)
- @pytest.mark.parametrize("n_col", [(), (1,), (3,)])
- def test_X_CenterStackOp(n_col):
- rng = np.random.RandomState(0)
- X = rng.randn(11, 8)
- X_m = rng.randn(8)
- sqrt_sw = rng.randn(len(X))
- Y = rng.randn(11, *n_col)
- A = rng.randn(9, *n_col)
- operator = _X_CenterStackOp(sp.csr_matrix(X), X_m, sqrt_sw)
- reference_operator = np.hstack([X - sqrt_sw[:, None] * X_m, sqrt_sw[:, None]])
- assert_allclose(reference_operator.dot(A), operator.dot(A))
- assert_allclose(reference_operator.T.dot(Y), operator.T.dot(Y))
- @pytest.mark.parametrize("shape", [(10, 1), (13, 9), (3, 7), (2, 2), (20, 20)])
- @pytest.mark.parametrize("uniform_weights", [True, False])
- def test_compute_gram(shape, uniform_weights):
- rng = np.random.RandomState(0)
- X = rng.randn(*shape)
- if uniform_weights:
- sw = np.ones(X.shape[0])
- else:
- sw = rng.chisquare(1, shape[0])
- sqrt_sw = np.sqrt(sw)
- X_mean = np.average(X, axis=0, weights=sw)
- X_centered = (X - X_mean) * sqrt_sw[:, None]
- true_gram = X_centered.dot(X_centered.T)
- X_sparse = sp.csr_matrix(X * sqrt_sw[:, None])
- gcv = _RidgeGCV(fit_intercept=True)
- computed_gram, computed_mean = gcv._compute_gram(X_sparse, sqrt_sw)
- assert_allclose(X_mean, computed_mean)
- assert_allclose(true_gram, computed_gram)
- @pytest.mark.parametrize("shape", [(10, 1), (13, 9), (3, 7), (2, 2), (20, 20)])
- @pytest.mark.parametrize("uniform_weights", [True, False])
- def test_compute_covariance(shape, uniform_weights):
- rng = np.random.RandomState(0)
- X = rng.randn(*shape)
- if uniform_weights:
- sw = np.ones(X.shape[0])
- else:
- sw = rng.chisquare(1, shape[0])
- sqrt_sw = np.sqrt(sw)
- X_mean = np.average(X, axis=0, weights=sw)
- X_centered = (X - X_mean) * sqrt_sw[:, None]
- true_covariance = X_centered.T.dot(X_centered)
- X_sparse = sp.csr_matrix(X * sqrt_sw[:, None])
- gcv = _RidgeGCV(fit_intercept=True)
- computed_cov, computed_mean = gcv._compute_covariance(X_sparse, sqrt_sw)
- assert_allclose(X_mean, computed_mean)
- assert_allclose(true_covariance, computed_cov)
- def _make_sparse_offset_regression(
- n_samples=100,
- n_features=100,
- proportion_nonzero=0.5,
- n_informative=10,
- n_targets=1,
- bias=13.0,
- X_offset=30.0,
- noise=30.0,
- shuffle=True,
- coef=False,
- positive=False,
- random_state=None,
- ):
- X, y, c = make_regression(
- n_samples=n_samples,
- n_features=n_features,
- n_informative=n_informative,
- n_targets=n_targets,
- bias=bias,
- noise=noise,
- shuffle=shuffle,
- coef=True,
- random_state=random_state,
- )
- if n_features == 1:
- c = np.asarray([c])
- X += X_offset
- mask = (
- np.random.RandomState(random_state).binomial(1, proportion_nonzero, X.shape) > 0
- )
- removed_X = X.copy()
- X[~mask] = 0.0
- removed_X[mask] = 0.0
- y -= removed_X.dot(c)
- if positive:
- y += X.dot(np.abs(c) + 1 - c)
- c = np.abs(c) + 1
- if n_features == 1:
- c = c[0]
- if coef:
- return X, y, c
- return X, y
- @pytest.mark.parametrize(
- "solver, sparse_X",
- (
- (solver, sparse_X)
- for (solver, sparse_X) in product(
- ["cholesky", "sag", "sparse_cg", "lsqr", "saga", "ridgecv"],
- [False, True],
- )
- if not (sparse_X and solver not in ["sparse_cg", "ridgecv"])
- ),
- )
- @pytest.mark.parametrize(
- "n_samples,dtype,proportion_nonzero",
- [(20, "float32", 0.1), (40, "float32", 1.0), (20, "float64", 0.2)],
- )
- @pytest.mark.parametrize("seed", np.arange(3))
- def test_solver_consistency(
- solver, proportion_nonzero, n_samples, dtype, sparse_X, seed
- ):
- alpha = 1.0
- noise = 50.0 if proportion_nonzero > 0.9 else 500.0
- X, y = _make_sparse_offset_regression(
- bias=10,
- n_features=30,
- proportion_nonzero=proportion_nonzero,
- noise=noise,
- random_state=seed,
- n_samples=n_samples,
- )
- # Manually scale the data to avoid pathological cases. We use
- # minmax_scale to deal with the sparse case without breaking
- # the sparsity pattern.
- X = minmax_scale(X)
- svd_ridge = Ridge(solver="svd", alpha=alpha).fit(X, y)
- X = X.astype(dtype, copy=False)
- y = y.astype(dtype, copy=False)
- if sparse_X:
- X = sp.csr_matrix(X)
- if solver == "ridgecv":
- ridge = RidgeCV(alphas=[alpha])
- else:
- ridge = Ridge(solver=solver, tol=1e-10, alpha=alpha)
- ridge.fit(X, y)
- assert_allclose(ridge.coef_, svd_ridge.coef_, atol=1e-3, rtol=1e-3)
- assert_allclose(ridge.intercept_, svd_ridge.intercept_, atol=1e-3, rtol=1e-3)
- @pytest.mark.parametrize("gcv_mode", ["svd", "eigen"])
- @pytest.mark.parametrize("X_constructor", [np.asarray, sp.csr_matrix])
- @pytest.mark.parametrize("X_shape", [(11, 8), (11, 20)])
- @pytest.mark.parametrize("fit_intercept", [True, False])
- @pytest.mark.parametrize(
- "y_shape, noise",
- [
- ((11,), 1.0),
- ((11, 1), 30.0),
- ((11, 3), 150.0),
- ],
- )
- def test_ridge_gcv_vs_ridge_loo_cv(
- gcv_mode, X_constructor, X_shape, y_shape, fit_intercept, noise
- ):
- n_samples, n_features = X_shape
- n_targets = y_shape[-1] if len(y_shape) == 2 else 1
- X, y = _make_sparse_offset_regression(
- n_samples=n_samples,
- n_features=n_features,
- n_targets=n_targets,
- random_state=0,
- shuffle=False,
- noise=noise,
- n_informative=5,
- )
- y = y.reshape(y_shape)
- alphas = [1e-3, 0.1, 1.0, 10.0, 1e3]
- loo_ridge = RidgeCV(
- cv=n_samples,
- fit_intercept=fit_intercept,
- alphas=alphas,
- scoring="neg_mean_squared_error",
- )
- gcv_ridge = RidgeCV(
- gcv_mode=gcv_mode,
- fit_intercept=fit_intercept,
- alphas=alphas,
- )
- loo_ridge.fit(X, y)
- X_gcv = X_constructor(X)
- gcv_ridge.fit(X_gcv, y)
- assert gcv_ridge.alpha_ == pytest.approx(loo_ridge.alpha_)
- assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3)
- assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3)
- def test_ridge_loo_cv_asym_scoring():
- # checking on asymmetric scoring
- scoring = "explained_variance"
- n_samples, n_features = 10, 5
- n_targets = 1
- X, y = _make_sparse_offset_regression(
- n_samples=n_samples,
- n_features=n_features,
- n_targets=n_targets,
- random_state=0,
- shuffle=False,
- noise=1,
- n_informative=5,
- )
- alphas = [1e-3, 0.1, 1.0, 10.0, 1e3]
- loo_ridge = RidgeCV(
- cv=n_samples, fit_intercept=True, alphas=alphas, scoring=scoring
- )
- gcv_ridge = RidgeCV(fit_intercept=True, alphas=alphas, scoring=scoring)
- loo_ridge.fit(X, y)
- gcv_ridge.fit(X, y)
- assert gcv_ridge.alpha_ == pytest.approx(loo_ridge.alpha_)
- assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3)
- assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3)
- @pytest.mark.parametrize("gcv_mode", ["svd", "eigen"])
- @pytest.mark.parametrize("X_constructor", [np.asarray, sp.csr_matrix])
- @pytest.mark.parametrize("n_features", [8, 20])
- @pytest.mark.parametrize(
- "y_shape, fit_intercept, noise",
- [
- ((11,), True, 1.0),
- ((11, 1), True, 20.0),
- ((11, 3), True, 150.0),
- ((11, 3), False, 30.0),
- ],
- )
- def test_ridge_gcv_sample_weights(
- gcv_mode, X_constructor, fit_intercept, n_features, y_shape, noise
- ):
- alphas = [1e-3, 0.1, 1.0, 10.0, 1e3]
- rng = np.random.RandomState(0)
- n_targets = y_shape[-1] if len(y_shape) == 2 else 1
- X, y = _make_sparse_offset_regression(
- n_samples=11,
- n_features=n_features,
- n_targets=n_targets,
- random_state=0,
- shuffle=False,
- noise=noise,
- )
- y = y.reshape(y_shape)
- sample_weight = 3 * rng.randn(len(X))
- sample_weight = (sample_weight - sample_weight.min() + 1).astype(int)
- indices = np.repeat(np.arange(X.shape[0]), sample_weight)
- sample_weight = sample_weight.astype(float)
- X_tiled, y_tiled = X[indices], y[indices]
- cv = GroupKFold(n_splits=X.shape[0])
- splits = cv.split(X_tiled, y_tiled, groups=indices)
- kfold = RidgeCV(
- alphas=alphas,
- cv=splits,
- scoring="neg_mean_squared_error",
- fit_intercept=fit_intercept,
- )
- kfold.fit(X_tiled, y_tiled)
- ridge_reg = Ridge(alpha=kfold.alpha_, fit_intercept=fit_intercept)
- splits = cv.split(X_tiled, y_tiled, groups=indices)
- predictions = cross_val_predict(ridge_reg, X_tiled, y_tiled, cv=splits)
- kfold_errors = (y_tiled - predictions) ** 2
- kfold_errors = [
- np.sum(kfold_errors[indices == i], axis=0) for i in np.arange(X.shape[0])
- ]
- kfold_errors = np.asarray(kfold_errors)
- X_gcv = X_constructor(X)
- gcv_ridge = RidgeCV(
- alphas=alphas,
- store_cv_values=True,
- gcv_mode=gcv_mode,
- fit_intercept=fit_intercept,
- )
- gcv_ridge.fit(X_gcv, y, sample_weight=sample_weight)
- if len(y_shape) == 2:
- gcv_errors = gcv_ridge.cv_values_[:, :, alphas.index(kfold.alpha_)]
- else:
- gcv_errors = gcv_ridge.cv_values_[:, alphas.index(kfold.alpha_)]
- assert kfold.alpha_ == pytest.approx(gcv_ridge.alpha_)
- assert_allclose(gcv_errors, kfold_errors, rtol=1e-3)
- assert_allclose(gcv_ridge.coef_, kfold.coef_, rtol=1e-3)
- assert_allclose(gcv_ridge.intercept_, kfold.intercept_, rtol=1e-3)
- @pytest.mark.parametrize("sparse", [True, False])
- @pytest.mark.parametrize(
- "mode, mode_n_greater_than_p, mode_p_greater_than_n",
- [
- (None, "svd", "eigen"),
- ("auto", "svd", "eigen"),
- ("eigen", "eigen", "eigen"),
- ("svd", "svd", "svd"),
- ],
- )
- def test_check_gcv_mode_choice(
- sparse, mode, mode_n_greater_than_p, mode_p_greater_than_n
- ):
- X, _ = make_regression(n_samples=5, n_features=2)
- if sparse:
- X = sp.csr_matrix(X)
- assert _check_gcv_mode(X, mode) == mode_n_greater_than_p
- assert _check_gcv_mode(X.T, mode) == mode_p_greater_than_n
- def _test_ridge_loo(filter_):
- # test that can work with both dense or sparse matrices
- n_samples = X_diabetes.shape[0]
- ret = []
- fit_intercept = filter_ == DENSE_FILTER
- ridge_gcv = _RidgeGCV(fit_intercept=fit_intercept)
- # check best alpha
- ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
- alpha_ = ridge_gcv.alpha_
- ret.append(alpha_)
- # check that we get same best alpha with custom loss_func
- f = ignore_warnings
- scoring = make_scorer(mean_squared_error, greater_is_better=False)
- ridge_gcv2 = RidgeCV(fit_intercept=False, scoring=scoring)
- f(ridge_gcv2.fit)(filter_(X_diabetes), y_diabetes)
- assert ridge_gcv2.alpha_ == pytest.approx(alpha_)
- # check that we get same best alpha with custom score_func
- def func(x, y):
- return -mean_squared_error(x, y)
- scoring = make_scorer(func)
- ridge_gcv3 = RidgeCV(fit_intercept=False, scoring=scoring)
- f(ridge_gcv3.fit)(filter_(X_diabetes), y_diabetes)
- assert ridge_gcv3.alpha_ == pytest.approx(alpha_)
- # check that we get same best alpha with a scorer
- scorer = get_scorer("neg_mean_squared_error")
- ridge_gcv4 = RidgeCV(fit_intercept=False, scoring=scorer)
- ridge_gcv4.fit(filter_(X_diabetes), y_diabetes)
- assert ridge_gcv4.alpha_ == pytest.approx(alpha_)
- # check that we get same best alpha with sample weights
- if filter_ == DENSE_FILTER:
- ridge_gcv.fit(filter_(X_diabetes), y_diabetes, sample_weight=np.ones(n_samples))
- assert ridge_gcv.alpha_ == pytest.approx(alpha_)
- # simulate several responses
- Y = np.vstack((y_diabetes, y_diabetes)).T
- ridge_gcv.fit(filter_(X_diabetes), Y)
- Y_pred = ridge_gcv.predict(filter_(X_diabetes))
- ridge_gcv.fit(filter_(X_diabetes), y_diabetes)
- y_pred = ridge_gcv.predict(filter_(X_diabetes))
- assert_allclose(np.vstack((y_pred, y_pred)).T, Y_pred, rtol=1e-5)
- return ret
- def _test_ridge_cv(filter_):
- ridge_cv = RidgeCV()
- ridge_cv.fit(filter_(X_diabetes), y_diabetes)
- ridge_cv.predict(filter_(X_diabetes))
- assert len(ridge_cv.coef_.shape) == 1
- assert type(ridge_cv.intercept_) == np.float64
- cv = KFold(5)
- ridge_cv.set_params(cv=cv)
- ridge_cv.fit(filter_(X_diabetes), y_diabetes)
- ridge_cv.predict(filter_(X_diabetes))
- assert len(ridge_cv.coef_.shape) == 1
- assert type(ridge_cv.intercept_) == np.float64
- @pytest.mark.parametrize(
- "ridge, make_dataset",
- [
- (RidgeCV(store_cv_values=False), make_regression),
- (RidgeClassifierCV(store_cv_values=False), make_classification),
- ],
- )
- def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset):
- # Check that `cv_values_` is not stored when store_cv_values is False
- X, y = make_dataset(n_samples=6, random_state=42)
- ridge.fit(X, y)
- assert not hasattr(ridge, "cv_values_")
- @pytest.mark.parametrize(
- "ridge, make_dataset",
- [(RidgeCV(), make_regression), (RidgeClassifierCV(), make_classification)],
- )
- @pytest.mark.parametrize("cv", [None, 3])
- def test_ridge_best_score(ridge, make_dataset, cv):
- # check that the best_score_ is store
- X, y = make_dataset(n_samples=6, random_state=42)
- ridge.set_params(store_cv_values=False, cv=cv)
- ridge.fit(X, y)
- assert hasattr(ridge, "best_score_")
- assert isinstance(ridge.best_score_, float)
- def test_ridge_cv_individual_penalties():
- # Tests the ridge_cv object optimizing individual penalties for each target
- rng = np.random.RandomState(42)
- # Create random dataset with multiple targets. Each target should have
- # a different optimal alpha.
- n_samples, n_features, n_targets = 20, 5, 3
- y = rng.randn(n_samples, n_targets)
- X = (
- np.dot(y[:, [0]], np.ones((1, n_features)))
- + np.dot(y[:, [1]], 0.05 * np.ones((1, n_features)))
- + np.dot(y[:, [2]], 0.001 * np.ones((1, n_features)))
- + rng.randn(n_samples, n_features)
- )
- alphas = (1, 100, 1000)
- # Find optimal alpha for each target
- optimal_alphas = [RidgeCV(alphas=alphas).fit(X, target).alpha_ for target in y.T]
- # Find optimal alphas for all targets simultaneously
- ridge_cv = RidgeCV(alphas=alphas, alpha_per_target=True).fit(X, y)
- assert_array_equal(optimal_alphas, ridge_cv.alpha_)
- # The resulting regression weights should incorporate the different
- # alpha values.
- assert_array_almost_equal(
- Ridge(alpha=ridge_cv.alpha_).fit(X, y).coef_, ridge_cv.coef_
- )
- # Test shape of alpha_ and cv_values_
- ridge_cv = RidgeCV(alphas=alphas, alpha_per_target=True, store_cv_values=True).fit(
- X, y
- )
- assert ridge_cv.alpha_.shape == (n_targets,)
- assert ridge_cv.best_score_.shape == (n_targets,)
- assert ridge_cv.cv_values_.shape == (n_samples, len(alphas), n_targets)
- # Test edge case of there being only one alpha value
- ridge_cv = RidgeCV(alphas=1, alpha_per_target=True, store_cv_values=True).fit(X, y)
- assert ridge_cv.alpha_.shape == (n_targets,)
- assert ridge_cv.best_score_.shape == (n_targets,)
- assert ridge_cv.cv_values_.shape == (n_samples, n_targets, 1)
- # Test edge case of there being only one target
- ridge_cv = RidgeCV(alphas=alphas, alpha_per_target=True, store_cv_values=True).fit(
- X, y[:, 0]
- )
- assert np.isscalar(ridge_cv.alpha_)
- assert np.isscalar(ridge_cv.best_score_)
- assert ridge_cv.cv_values_.shape == (n_samples, len(alphas))
- # Try with a custom scoring function
- ridge_cv = RidgeCV(alphas=alphas, alpha_per_target=True, scoring="r2").fit(X, y)
- assert_array_equal(optimal_alphas, ridge_cv.alpha_)
- assert_array_almost_equal(
- Ridge(alpha=ridge_cv.alpha_).fit(X, y).coef_, ridge_cv.coef_
- )
- # Using a custom CV object should throw an error in combination with
- # alpha_per_target=True
- ridge_cv = RidgeCV(alphas=alphas, cv=LeaveOneOut(), alpha_per_target=True)
- msg = "cv!=None and alpha_per_target=True are incompatible"
- with pytest.raises(ValueError, match=msg):
- ridge_cv.fit(X, y)
- ridge_cv = RidgeCV(alphas=alphas, cv=6, alpha_per_target=True)
- with pytest.raises(ValueError, match=msg):
- ridge_cv.fit(X, y)
- def _test_ridge_diabetes(filter_):
- ridge = Ridge(fit_intercept=False)
- ridge.fit(filter_(X_diabetes), y_diabetes)
- return np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5)
- def _test_multi_ridge_diabetes(filter_):
- # simulate several responses
- Y = np.vstack((y_diabetes, y_diabetes)).T
- n_features = X_diabetes.shape[1]
- ridge = Ridge(fit_intercept=False)
- ridge.fit(filter_(X_diabetes), Y)
- assert ridge.coef_.shape == (2, n_features)
- Y_pred = ridge.predict(filter_(X_diabetes))
- ridge.fit(filter_(X_diabetes), y_diabetes)
- y_pred = ridge.predict(filter_(X_diabetes))
- assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
- def _test_ridge_classifiers(filter_):
- n_classes = np.unique(y_iris).shape[0]
- n_features = X_iris.shape[1]
- for reg in (RidgeClassifier(), RidgeClassifierCV()):
- reg.fit(filter_(X_iris), y_iris)
- assert reg.coef_.shape == (n_classes, n_features)
- y_pred = reg.predict(filter_(X_iris))
- assert np.mean(y_iris == y_pred) > 0.79
- cv = KFold(5)
- reg = RidgeClassifierCV(cv=cv)
- reg.fit(filter_(X_iris), y_iris)
- y_pred = reg.predict(filter_(X_iris))
- assert np.mean(y_iris == y_pred) >= 0.8
- @pytest.mark.parametrize("scoring", [None, "accuracy", _accuracy_callable])
- @pytest.mark.parametrize("cv", [None, KFold(5)])
- @pytest.mark.parametrize("filter_", [DENSE_FILTER, SPARSE_FILTER])
- def test_ridge_classifier_with_scoring(filter_, scoring, cv):
- # non-regression test for #14672
- # check that RidgeClassifierCV works with all sort of scoring and
- # cross-validation
- scoring_ = make_scorer(scoring) if callable(scoring) else scoring
- clf = RidgeClassifierCV(scoring=scoring_, cv=cv)
- # Smoke test to check that fit/predict does not raise error
- clf.fit(filter_(X_iris), y_iris).predict(filter_(X_iris))
- @pytest.mark.parametrize("cv", [None, KFold(5)])
- @pytest.mark.parametrize("filter_", [DENSE_FILTER, SPARSE_FILTER])
- def test_ridge_regression_custom_scoring(filter_, cv):
- # check that custom scoring is working as expected
- # check the tie breaking strategy (keep the first alpha tried)
- def _dummy_score(y_test, y_pred):
- return 0.42
- alphas = np.logspace(-2, 2, num=5)
- clf = RidgeClassifierCV(alphas=alphas, scoring=make_scorer(_dummy_score), cv=cv)
- clf.fit(filter_(X_iris), y_iris)
- assert clf.best_score_ == pytest.approx(0.42)
- # In case of tie score, the first alphas will be kept
- assert clf.alpha_ == pytest.approx(alphas[0])
- def _test_tolerance(filter_):
- ridge = Ridge(tol=1e-5, fit_intercept=False)
- ridge.fit(filter_(X_diabetes), y_diabetes)
- score = ridge.score(filter_(X_diabetes), y_diabetes)
- ridge2 = Ridge(tol=1e-3, fit_intercept=False)
- ridge2.fit(filter_(X_diabetes), y_diabetes)
- score2 = ridge2.score(filter_(X_diabetes), y_diabetes)
- assert score >= score2
- def check_dense_sparse(test_func):
- # test dense matrix
- ret_dense = test_func(DENSE_FILTER)
- # test sparse matrix
- ret_sparse = test_func(SPARSE_FILTER)
- # test that the outputs are the same
- if ret_dense is not None and ret_sparse is not None:
- assert_array_almost_equal(ret_dense, ret_sparse, decimal=3)
- @pytest.mark.parametrize(
- "test_func",
- (
- _test_ridge_loo,
- _test_ridge_cv,
- _test_ridge_diabetes,
- _test_multi_ridge_diabetes,
- _test_ridge_classifiers,
- _test_tolerance,
- ),
- )
- def test_dense_sparse(test_func):
- check_dense_sparse(test_func)
- def test_class_weights():
- # Test class weights.
- X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
- y = [1, 1, 1, -1, -1]
- reg = RidgeClassifier(class_weight=None)
- reg.fit(X, y)
- assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([1]))
- # we give a small weights to class 1
- reg = RidgeClassifier(class_weight={1: 0.001})
- reg.fit(X, y)
- # now the hyperplane should rotate clock-wise and
- # the prediction on this point should shift
- assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([-1]))
- # check if class_weight = 'balanced' can handle negative labels.
- reg = RidgeClassifier(class_weight="balanced")
- reg.fit(X, y)
- assert_array_equal(reg.predict([[0.2, -1.0]]), np.array([1]))
- # class_weight = 'balanced', and class_weight = None should return
- # same values when y has equal number of all labels
- X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0]])
- y = [1, 1, -1, -1]
- reg = RidgeClassifier(class_weight=None)
- reg.fit(X, y)
- rega = RidgeClassifier(class_weight="balanced")
- rega.fit(X, y)
- assert len(rega.classes_) == 2
- assert_array_almost_equal(reg.coef_, rega.coef_)
- assert_array_almost_equal(reg.intercept_, rega.intercept_)
- @pytest.mark.parametrize("reg", (RidgeClassifier, RidgeClassifierCV))
- def test_class_weight_vs_sample_weight(reg):
- """Check class_weights resemble sample_weights behavior."""
- # Iris is balanced, so no effect expected for using 'balanced' weights
- reg1 = reg()
- reg1.fit(iris.data, iris.target)
- reg2 = reg(class_weight="balanced")
- reg2.fit(iris.data, iris.target)
- assert_almost_equal(reg1.coef_, reg2.coef_)
- # Inflate importance of class 1, check against user-defined weights
- sample_weight = np.ones(iris.target.shape)
- sample_weight[iris.target == 1] *= 100
- class_weight = {0: 1.0, 1: 100.0, 2: 1.0}
- reg1 = reg()
- reg1.fit(iris.data, iris.target, sample_weight)
- reg2 = reg(class_weight=class_weight)
- reg2.fit(iris.data, iris.target)
- assert_almost_equal(reg1.coef_, reg2.coef_)
- # Check that sample_weight and class_weight are multiplicative
- reg1 = reg()
- reg1.fit(iris.data, iris.target, sample_weight**2)
- reg2 = reg(class_weight=class_weight)
- reg2.fit(iris.data, iris.target, sample_weight)
- assert_almost_equal(reg1.coef_, reg2.coef_)
- def test_class_weights_cv():
- # Test class weights for cross validated ridge classifier.
- X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
- y = [1, 1, 1, -1, -1]
- reg = RidgeClassifierCV(class_weight=None, alphas=[0.01, 0.1, 1])
- reg.fit(X, y)
- # we give a small weights to class 1
- reg = RidgeClassifierCV(class_weight={1: 0.001}, alphas=[0.01, 0.1, 1, 10])
- reg.fit(X, y)
- assert_array_equal(reg.predict([[-0.2, 2]]), np.array([-1]))
- @pytest.mark.parametrize(
- "scoring", [None, "neg_mean_squared_error", _mean_squared_error_callable]
- )
- def test_ridgecv_store_cv_values(scoring):
- rng = np.random.RandomState(42)
- n_samples = 8
- n_features = 5
- x = rng.randn(n_samples, n_features)
- alphas = [1e-1, 1e0, 1e1]
- n_alphas = len(alphas)
- scoring_ = make_scorer(scoring) if callable(scoring) else scoring
- r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True, scoring=scoring_)
- # with len(y.shape) == 1
- y = rng.randn(n_samples)
- r.fit(x, y)
- assert r.cv_values_.shape == (n_samples, n_alphas)
- # with len(y.shape) == 2
- n_targets = 3
- y = rng.randn(n_samples, n_targets)
- r.fit(x, y)
- assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
- r = RidgeCV(cv=3, store_cv_values=True, scoring=scoring)
- with pytest.raises(ValueError, match="cv!=None and store_cv_values"):
- r.fit(x, y)
- @pytest.mark.parametrize("scoring", [None, "accuracy", _accuracy_callable])
- def test_ridge_classifier_cv_store_cv_values(scoring):
- x = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
- y = np.array([1, 1, 1, -1, -1])
- n_samples = x.shape[0]
- alphas = [1e-1, 1e0, 1e1]
- n_alphas = len(alphas)
- scoring_ = make_scorer(scoring) if callable(scoring) else scoring
- r = RidgeClassifierCV(
- alphas=alphas, cv=None, store_cv_values=True, scoring=scoring_
- )
- # with len(y.shape) == 1
- n_targets = 1
- r.fit(x, y)
- assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
- # with len(y.shape) == 2
- y = np.array(
- [[1, 1, 1, -1, -1], [1, -1, 1, -1, 1], [-1, -1, 1, -1, -1]]
- ).transpose()
- n_targets = y.shape[1]
- r.fit(x, y)
- assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
- @pytest.mark.parametrize("Estimator", [RidgeCV, RidgeClassifierCV])
- def test_ridgecv_alphas_conversion(Estimator):
- rng = np.random.RandomState(0)
- alphas = (0.1, 1.0, 10.0)
- n_samples, n_features = 5, 5
- if Estimator is RidgeCV:
- y = rng.randn(n_samples)
- else:
- y = rng.randint(0, 2, n_samples)
- X = rng.randn(n_samples, n_features)
- ridge_est = Estimator(alphas=alphas)
- assert (
- ridge_est.alphas is alphas
- ), f"`alphas` was mutated in `{Estimator.__name__}.__init__`"
- ridge_est.fit(X, y)
- assert_array_equal(ridge_est.alphas, np.asarray(alphas))
- def test_ridgecv_sample_weight():
- rng = np.random.RandomState(0)
- alphas = (0.1, 1.0, 10.0)
- # There are different algorithms for n_samples > n_features
- # and the opposite, so test them both.
- for n_samples, n_features in ((6, 5), (5, 10)):
- y = rng.randn(n_samples)
- X = rng.randn(n_samples, n_features)
- sample_weight = 1.0 + rng.rand(n_samples)
- cv = KFold(5)
- ridgecv = RidgeCV(alphas=alphas, cv=cv)
- ridgecv.fit(X, y, sample_weight=sample_weight)
- # Check using GridSearchCV directly
- parameters = {"alpha": alphas}
- gs = GridSearchCV(Ridge(), parameters, cv=cv)
- gs.fit(X, y, sample_weight=sample_weight)
- assert ridgecv.alpha_ == gs.best_estimator_.alpha
- assert_array_almost_equal(ridgecv.coef_, gs.best_estimator_.coef_)
- def test_raises_value_error_if_sample_weights_greater_than_1d():
- # Sample weights must be either scalar or 1D
- n_sampless = [2, 3]
- n_featuress = [3, 2]
- rng = np.random.RandomState(42)
- for n_samples, n_features in zip(n_sampless, n_featuress):
- X = rng.randn(n_samples, n_features)
- y = rng.randn(n_samples)
- sample_weights_OK = rng.randn(n_samples) ** 2 + 1
- sample_weights_OK_1 = 1.0
- sample_weights_OK_2 = 2.0
- sample_weights_not_OK = sample_weights_OK[:, np.newaxis]
- sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :]
- ridge = Ridge(alpha=1)
- # make sure the "OK" sample weights actually work
- ridge.fit(X, y, sample_weights_OK)
- ridge.fit(X, y, sample_weights_OK_1)
- ridge.fit(X, y, sample_weights_OK_2)
- def fit_ridge_not_ok():
- ridge.fit(X, y, sample_weights_not_OK)
- def fit_ridge_not_ok_2():
- ridge.fit(X, y, sample_weights_not_OK_2)
- err_msg = "Sample weights must be 1D array or scalar"
- with pytest.raises(ValueError, match=err_msg):
- fit_ridge_not_ok()
- err_msg = "Sample weights must be 1D array or scalar"
- with pytest.raises(ValueError, match=err_msg):
- fit_ridge_not_ok_2()
- def test_sparse_design_with_sample_weights():
- # Sample weights must work with sparse matrices
- n_sampless = [2, 3]
- n_featuress = [3, 2]
- rng = np.random.RandomState(42)
- sparse_matrix_converters = [
- sp.coo_matrix,
- sp.csr_matrix,
- sp.csc_matrix,
- sp.lil_matrix,
- sp.dok_matrix,
- ]
- sparse_ridge = Ridge(alpha=1.0, fit_intercept=False)
- dense_ridge = Ridge(alpha=1.0, fit_intercept=False)
- for n_samples, n_features in zip(n_sampless, n_featuress):
- X = rng.randn(n_samples, n_features)
- y = rng.randn(n_samples)
- sample_weights = rng.randn(n_samples) ** 2 + 1
- for sparse_converter in sparse_matrix_converters:
- X_sparse = sparse_converter(X)
- sparse_ridge.fit(X_sparse, y, sample_weight=sample_weights)
- dense_ridge.fit(X, y, sample_weight=sample_weights)
- assert_array_almost_equal(sparse_ridge.coef_, dense_ridge.coef_, decimal=6)
- def test_ridgecv_int_alphas():
- X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
- y = [1, 1, 1, -1, -1]
- # Integers
- ridge = RidgeCV(alphas=(1, 10, 100))
- ridge.fit(X, y)
- @pytest.mark.parametrize("Estimator", [RidgeCV, RidgeClassifierCV])
- @pytest.mark.parametrize(
- "params, err_type, err_msg",
- [
- ({"alphas": (1, -1, -100)}, ValueError, r"alphas\[1\] == -1, must be > 0.0"),
- (
- {"alphas": (-0.1, -1.0, -10.0)},
- ValueError,
- r"alphas\[0\] == -0.1, must be > 0.0",
- ),
- (
- {"alphas": (1, 1.0, "1")},
- TypeError,
- r"alphas\[2\] must be an instance of float, not str",
- ),
- ],
- )
- def test_ridgecv_alphas_validation(Estimator, params, err_type, err_msg):
- """Check the `alphas` validation in RidgeCV and RidgeClassifierCV."""
- n_samples, n_features = 5, 5
- X = rng.randn(n_samples, n_features)
- y = rng.randint(0, 2, n_samples)
- with pytest.raises(err_type, match=err_msg):
- Estimator(**params).fit(X, y)
- @pytest.mark.parametrize("Estimator", [RidgeCV, RidgeClassifierCV])
- def test_ridgecv_alphas_scalar(Estimator):
- """Check the case when `alphas` is a scalar.
- This case was supported in the past when `alphas` where converted
- into array in `__init__`.
- We add this test to ensure backward compatibility.
- """
- n_samples, n_features = 5, 5
- X = rng.randn(n_samples, n_features)
- if Estimator is RidgeCV:
- y = rng.randn(n_samples)
- else:
- y = rng.randint(0, 2, n_samples)
- Estimator(alphas=1).fit(X, y)
- def test_raises_value_error_if_solver_not_supported():
- # Tests whether a ValueError is raised if a non-identified solver
- # is passed to ridge_regression
- wrong_solver = "This is not a solver (MagritteSolveCV QuantumBitcoin)"
- exception = ValueError
- message = (
- "Known solvers are 'sparse_cg', 'cholesky', 'svd'"
- " 'lsqr', 'sag' or 'saga'. Got %s." % wrong_solver
- )
- def func():
- X = np.eye(3)
- y = np.ones(3)
- ridge_regression(X, y, alpha=1.0, solver=wrong_solver)
- with pytest.raises(exception, match=message):
- func()
- def test_sparse_cg_max_iter():
- reg = Ridge(solver="sparse_cg", max_iter=1)
- reg.fit(X_diabetes, y_diabetes)
- assert reg.coef_.shape[0] == X_diabetes.shape[1]
- @ignore_warnings
- def test_n_iter():
- # Test that self.n_iter_ is correct.
- n_targets = 2
- X, y = X_diabetes, y_diabetes
- y_n = np.tile(y, (n_targets, 1)).T
- for max_iter in range(1, 4):
- for solver in ("sag", "saga", "lsqr"):
- reg = Ridge(solver=solver, max_iter=max_iter, tol=1e-12)
- reg.fit(X, y_n)
- assert_array_equal(reg.n_iter_, np.tile(max_iter, n_targets))
- for solver in ("sparse_cg", "svd", "cholesky"):
- reg = Ridge(solver=solver, max_iter=1, tol=1e-1)
- reg.fit(X, y_n)
- assert reg.n_iter_ is None
- @pytest.mark.parametrize("solver", ["lsqr", "sparse_cg", "lbfgs", "auto"])
- @pytest.mark.parametrize("with_sample_weight", [True, False])
- def test_ridge_fit_intercept_sparse(solver, with_sample_weight, global_random_seed):
- """Check that ridge finds the same coefs and intercept on dense and sparse input
- in the presence of sample weights.
- For now only sparse_cg and lbfgs can correctly fit an intercept
- with sparse X with default tol and max_iter.
- 'sag' is tested separately in test_ridge_fit_intercept_sparse_sag because it
- requires more iterations and should raise a warning if default max_iter is used.
- Other solvers raise an exception, as checked in
- test_ridge_fit_intercept_sparse_error
- """
- positive = solver == "lbfgs"
- X, y = _make_sparse_offset_regression(
- n_features=20, random_state=global_random_seed, positive=positive
- )
- sample_weight = None
- if with_sample_weight:
- rng = np.random.RandomState(global_random_seed)
- sample_weight = 1.0 + rng.uniform(size=X.shape[0])
- # "auto" should switch to "sparse_cg" when X is sparse
- # so the reference we use for both ("auto" and "sparse_cg") is
- # Ridge(solver="sparse_cg"), fitted using the dense representation (note
- # that "sparse_cg" can fit sparse or dense data)
- dense_solver = "sparse_cg" if solver == "auto" else solver
- dense_ridge = Ridge(solver=dense_solver, tol=1e-12, positive=positive)
- sparse_ridge = Ridge(solver=solver, tol=1e-12, positive=positive)
- dense_ridge.fit(X, y, sample_weight=sample_weight)
- sparse_ridge.fit(sp.csr_matrix(X), y, sample_weight=sample_weight)
- assert_allclose(dense_ridge.intercept_, sparse_ridge.intercept_)
- assert_allclose(dense_ridge.coef_, sparse_ridge.coef_, rtol=5e-7)
- @pytest.mark.parametrize("solver", ["saga", "svd", "cholesky"])
- def test_ridge_fit_intercept_sparse_error(solver):
- X, y = _make_sparse_offset_regression(n_features=20, random_state=0)
- X_csr = sp.csr_matrix(X)
- sparse_ridge = Ridge(solver=solver)
- err_msg = "solver='{}' does not support".format(solver)
- with pytest.raises(ValueError, match=err_msg):
- sparse_ridge.fit(X_csr, y)
- @pytest.mark.parametrize("with_sample_weight", [True, False])
- def test_ridge_fit_intercept_sparse_sag(with_sample_weight, global_random_seed):
- X, y = _make_sparse_offset_regression(
- n_features=5, n_samples=20, random_state=global_random_seed, X_offset=5.0
- )
- if with_sample_weight:
- rng = np.random.RandomState(global_random_seed)
- sample_weight = 1.0 + rng.uniform(size=X.shape[0])
- else:
- sample_weight = None
- X_csr = sp.csr_matrix(X)
- params = dict(
- alpha=1.0, solver="sag", fit_intercept=True, tol=1e-10, max_iter=100000
- )
- dense_ridge = Ridge(**params)
- sparse_ridge = Ridge(**params)
- dense_ridge.fit(X, y, sample_weight=sample_weight)
- with warnings.catch_warnings():
- warnings.simplefilter("error", UserWarning)
- sparse_ridge.fit(X_csr, y, sample_weight=sample_weight)
- assert_allclose(dense_ridge.intercept_, sparse_ridge.intercept_, rtol=1e-4)
- assert_allclose(dense_ridge.coef_, sparse_ridge.coef_, rtol=1e-4)
- with pytest.warns(UserWarning, match='"sag" solver requires.*'):
- Ridge(solver="sag", fit_intercept=True, tol=1e-3, max_iter=None).fit(X_csr, y)
- @pytest.mark.parametrize("return_intercept", [False, True])
- @pytest.mark.parametrize("sample_weight", [None, np.ones(1000)])
- @pytest.mark.parametrize("arr_type", [np.array, sp.csr_matrix])
- @pytest.mark.parametrize(
- "solver", ["auto", "sparse_cg", "cholesky", "lsqr", "sag", "saga", "lbfgs"]
- )
- def test_ridge_regression_check_arguments_validity(
- return_intercept, sample_weight, arr_type, solver
- ):
- """check if all combinations of arguments give valid estimations"""
- # test excludes 'svd' solver because it raises exception for sparse inputs
- rng = check_random_state(42)
- X = rng.rand(1000, 3)
- true_coefs = [1, 2, 0.1]
- y = np.dot(X, true_coefs)
- true_intercept = 0.0
- if return_intercept:
- true_intercept = 10000.0
- y += true_intercept
- X_testing = arr_type(X)
- alpha, tol = 1e-3, 1e-6
- atol = 1e-3 if _IS_32BIT else 1e-4
- positive = solver == "lbfgs"
- if solver not in ["sag", "auto"] and return_intercept:
- with pytest.raises(ValueError, match="In Ridge, only 'sag' solver"):
- ridge_regression(
- X_testing,
- y,
- alpha=alpha,
- solver=solver,
- sample_weight=sample_weight,
- return_intercept=return_intercept,
- positive=positive,
- tol=tol,
- )
- return
- out = ridge_regression(
- X_testing,
- y,
- alpha=alpha,
- solver=solver,
- sample_weight=sample_weight,
- positive=positive,
- return_intercept=return_intercept,
- tol=tol,
- )
- if return_intercept:
- coef, intercept = out
- assert_allclose(coef, true_coefs, rtol=0, atol=atol)
- assert_allclose(intercept, true_intercept, rtol=0, atol=atol)
- else:
- assert_allclose(out, true_coefs, rtol=0, atol=atol)
- @pytest.mark.parametrize(
- "solver", ["svd", "sparse_cg", "cholesky", "lsqr", "sag", "saga", "lbfgs"]
- )
- def test_dtype_match(solver):
- rng = np.random.RandomState(0)
- alpha = 1.0
- positive = solver == "lbfgs"
- n_samples, n_features = 6, 5
- X_64 = rng.randn(n_samples, n_features)
- y_64 = rng.randn(n_samples)
- X_32 = X_64.astype(np.float32)
- y_32 = y_64.astype(np.float32)
- tol = 2 * np.finfo(np.float32).resolution
- # Check type consistency 32bits
- ridge_32 = Ridge(
- alpha=alpha, solver=solver, max_iter=500, tol=tol, positive=positive
- )
- ridge_32.fit(X_32, y_32)
- coef_32 = ridge_32.coef_
- # Check type consistency 64 bits
- ridge_64 = Ridge(
- alpha=alpha, solver=solver, max_iter=500, tol=tol, positive=positive
- )
- ridge_64.fit(X_64, y_64)
- coef_64 = ridge_64.coef_
- # Do the actual checks at once for easier debug
- assert coef_32.dtype == X_32.dtype
- assert coef_64.dtype == X_64.dtype
- assert ridge_32.predict(X_32).dtype == X_32.dtype
- assert ridge_64.predict(X_64).dtype == X_64.dtype
- assert_allclose(ridge_32.coef_, ridge_64.coef_, rtol=1e-4, atol=5e-4)
- def test_dtype_match_cholesky():
- # Test different alphas in cholesky solver to ensure full coverage.
- # This test is separated from test_dtype_match for clarity.
- rng = np.random.RandomState(0)
- alpha = np.array([1.0, 0.5])
- n_samples, n_features, n_target = 6, 7, 2
- X_64 = rng.randn(n_samples, n_features)
- y_64 = rng.randn(n_samples, n_target)
- X_32 = X_64.astype(np.float32)
- y_32 = y_64.astype(np.float32)
- # Check type consistency 32bits
- ridge_32 = Ridge(alpha=alpha, solver="cholesky")
- ridge_32.fit(X_32, y_32)
- coef_32 = ridge_32.coef_
- # Check type consistency 64 bits
- ridge_64 = Ridge(alpha=alpha, solver="cholesky")
- ridge_64.fit(X_64, y_64)
- coef_64 = ridge_64.coef_
- # Do all the checks at once, like this is easier to debug
- assert coef_32.dtype == X_32.dtype
- assert coef_64.dtype == X_64.dtype
- assert ridge_32.predict(X_32).dtype == X_32.dtype
- assert ridge_64.predict(X_64).dtype == X_64.dtype
- assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)
- @pytest.mark.parametrize(
- "solver", ["svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga", "lbfgs"]
- )
- @pytest.mark.parametrize("seed", range(1))
- def test_ridge_regression_dtype_stability(solver, seed):
- random_state = np.random.RandomState(seed)
- n_samples, n_features = 6, 5
- X = random_state.randn(n_samples, n_features)
- coef = random_state.randn(n_features)
- y = np.dot(X, coef) + 0.01 * random_state.randn(n_samples)
- alpha = 1.0
- positive = solver == "lbfgs"
- results = dict()
- # XXX: Sparse CG seems to be far less numerically stable than the
- # others, maybe we should not enable float32 for this one.
- atol = 1e-3 if solver == "sparse_cg" else 1e-5
- for current_dtype in (np.float32, np.float64):
- results[current_dtype] = ridge_regression(
- X.astype(current_dtype),
- y.astype(current_dtype),
- alpha=alpha,
- solver=solver,
- random_state=random_state,
- sample_weight=None,
- positive=positive,
- max_iter=500,
- tol=1e-10,
- return_n_iter=False,
- return_intercept=False,
- )
- assert results[np.float32].dtype == np.float32
- assert results[np.float64].dtype == np.float64
- assert_allclose(results[np.float32], results[np.float64], atol=atol)
- def test_ridge_sag_with_X_fortran():
- # check that Fortran array are converted when using SAG solver
- X, y = make_regression(random_state=42)
- # for the order of X and y to not be C-ordered arrays
- X = np.asfortranarray(X)
- X = X[::2, :]
- y = y[::2]
- Ridge(solver="sag").fit(X, y)
- @pytest.mark.parametrize(
- "Classifier, params",
- [
- (RidgeClassifier, {}),
- (RidgeClassifierCV, {"cv": None}),
- (RidgeClassifierCV, {"cv": 3}),
- ],
- )
- def test_ridgeclassifier_multilabel(Classifier, params):
- """Check that multilabel classification is supported and give meaningful
- results."""
- X, y = make_multilabel_classification(n_classes=1, random_state=0)
- y = y.reshape(-1, 1)
- Y = np.concatenate([y, y], axis=1)
- clf = Classifier(**params).fit(X, Y)
- Y_pred = clf.predict(X)
- assert Y_pred.shape == Y.shape
- assert_array_equal(Y_pred[:, 0], Y_pred[:, 1])
- Ridge(solver="sag").fit(X, y)
- @pytest.mark.parametrize("solver", ["auto", "lbfgs"])
- @pytest.mark.parametrize("fit_intercept", [True, False])
- @pytest.mark.parametrize("alpha", [1e-3, 1e-2, 0.1, 1.0])
- def test_ridge_positive_regression_test(solver, fit_intercept, alpha):
- """Test that positive Ridge finds true positive coefficients."""
- X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
- coef = np.array([1, -10])
- if fit_intercept:
- intercept = 20
- y = X.dot(coef) + intercept
- else:
- y = X.dot(coef)
- model = Ridge(
- alpha=alpha, positive=True, solver=solver, fit_intercept=fit_intercept
- )
- model.fit(X, y)
- assert np.all(model.coef_ >= 0)
- @pytest.mark.parametrize("fit_intercept", [True, False])
- @pytest.mark.parametrize("alpha", [1e-3, 1e-2, 0.1, 1.0])
- def test_ridge_ground_truth_positive_test(fit_intercept, alpha):
- """Test that Ridge w/wo positive converges to the same solution.
- Ridge with positive=True and positive=False must give the same
- when the ground truth coefs are all positive.
- """
- rng = np.random.RandomState(42)
- X = rng.randn(300, 100)
- coef = rng.uniform(0.1, 1.0, size=X.shape[1])
- if fit_intercept:
- intercept = 1
- y = X @ coef + intercept
- else:
- y = X @ coef
- y += rng.normal(size=X.shape[0]) * 0.01
- results = []
- for positive in [True, False]:
- model = Ridge(
- alpha=alpha, positive=positive, fit_intercept=fit_intercept, tol=1e-10
- )
- results.append(model.fit(X, y).coef_)
- assert_allclose(*results, atol=1e-6, rtol=0)
- @pytest.mark.parametrize(
- "solver", ["svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]
- )
- def test_ridge_positive_error_test(solver):
- """Test input validation for positive argument in Ridge."""
- alpha = 0.1
- X = np.array([[1, 2], [3, 4]])
- coef = np.array([1, -1])
- y = X @ coef
- model = Ridge(alpha=alpha, positive=True, solver=solver, fit_intercept=False)
- with pytest.raises(ValueError, match="does not support positive"):
- model.fit(X, y)
- with pytest.raises(ValueError, match="only 'lbfgs' solver can be used"):
- _, _ = ridge_regression(
- X, y, alpha, positive=True, solver=solver, return_intercept=False
- )
- @pytest.mark.parametrize("alpha", [1e-3, 1e-2, 0.1, 1.0])
- def test_positive_ridge_loss(alpha):
- """Check ridge loss consistency when positive argument is enabled."""
- X, y = make_regression(n_samples=300, n_features=300, random_state=42)
- alpha = 0.10
- n_checks = 100
- def ridge_loss(model, random_state=None, noise_scale=1e-8):
- intercept = model.intercept_
- if random_state is not None:
- rng = np.random.RandomState(random_state)
- coef = model.coef_ + rng.uniform(0, noise_scale, size=model.coef_.shape)
- else:
- coef = model.coef_
- return 0.5 * np.sum((y - X @ coef - intercept) ** 2) + 0.5 * alpha * np.sum(
- coef**2
- )
- model = Ridge(alpha=alpha).fit(X, y)
- model_positive = Ridge(alpha=alpha, positive=True).fit(X, y)
- # Check 1:
- # Loss for solution found by Ridge(positive=False)
- # is lower than that for solution found by Ridge(positive=True)
- loss = ridge_loss(model)
- loss_positive = ridge_loss(model_positive)
- assert loss <= loss_positive
- # Check 2:
- # Loss for solution found by Ridge(positive=True)
- # is lower than that for small random positive perturbation
- # of the positive solution.
- for random_state in range(n_checks):
- loss_perturbed = ridge_loss(model_positive, random_state=random_state)
- assert loss_positive <= loss_perturbed
- @pytest.mark.parametrize("alpha", [1e-3, 1e-2, 0.1, 1.0])
- def test_lbfgs_solver_consistency(alpha):
- """Test that LBGFS gets almost the same coef of svd when positive=False."""
- X, y = make_regression(n_samples=300, n_features=300, random_state=42)
- y = np.expand_dims(y, 1)
- alpha = np.asarray([alpha])
- config = {
- "positive": False,
- "tol": 1e-16,
- "max_iter": 500000,
- }
- coef_lbfgs = _solve_lbfgs(X, y, alpha, **config)
- coef_cholesky = _solve_svd(X, y, alpha)
- assert_allclose(coef_lbfgs, coef_cholesky, atol=1e-4, rtol=0)
- def test_lbfgs_solver_error():
- """Test that LBFGS solver raises ConvergenceWarning."""
- X = np.array([[1, -1], [1, 1]])
- y = np.array([-1e10, 1e10])
- model = Ridge(
- alpha=0.01,
- solver="lbfgs",
- fit_intercept=False,
- tol=1e-12,
- positive=True,
- max_iter=1,
- )
- with pytest.warns(ConvergenceWarning, match="lbfgs solver did not converge"):
- model.fit(X, y)
- @pytest.mark.parametrize("fit_intercept", [False, True])
- @pytest.mark.parametrize("sparseX", [False, True])
- @pytest.mark.parametrize("data", ["tall", "wide"])
- @pytest.mark.parametrize("solver", SOLVERS + ["lbfgs"])
- def test_ridge_sample_weight_consistency(
- fit_intercept, sparseX, data, solver, global_random_seed
- ):
- """Test that the impact of sample_weight is consistent.
- Note that this test is stricter than the common test
- check_sample_weights_invariance alone.
- """
- # filter out solver that do not support sparse input
- if sparseX:
- if solver == "svd" or (solver in ("cholesky", "saga") and fit_intercept):
- pytest.skip("unsupported configuration")
- # XXX: this test is quite sensitive to the seed used to generate the data:
- # ideally we would like the test to pass for any global_random_seed but this is not
- # the case at the moment.
- rng = np.random.RandomState(42)
- n_samples = 12
- if data == "tall":
- n_features = n_samples // 2
- else:
- n_features = n_samples * 2
- X = rng.rand(n_samples, n_features)
- y = rng.rand(n_samples)
- if sparseX:
- X = sp.csr_matrix(X)
- params = dict(
- fit_intercept=fit_intercept,
- alpha=1.0,
- solver=solver,
- positive=(solver == "lbfgs"),
- random_state=global_random_seed, # for sag/saga
- tol=1e-12,
- )
- # 1) sample_weight=np.ones(..) should be equivalent to sample_weight=None
- # same check as check_sample_weights_invariance(name, reg, kind="ones"), but we also
- # test with sparse input.
- reg = Ridge(**params).fit(X, y, sample_weight=None)
- coef = reg.coef_.copy()
- if fit_intercept:
- intercept = reg.intercept_
- sample_weight = np.ones_like(y)
- reg.fit(X, y, sample_weight=sample_weight)
- assert_allclose(reg.coef_, coef, rtol=1e-6)
- if fit_intercept:
- assert_allclose(reg.intercept_, intercept)
- # 2) setting elements of sample_weight to 0 is equivalent to removing these samples
- # same check as check_sample_weights_invariance(name, reg, kind="zeros"), but we
- # also test with sparse input
- sample_weight = rng.uniform(low=0.01, high=2, size=X.shape[0])
- sample_weight[-5:] = 0
- y[-5:] *= 1000 # to make excluding those samples important
- reg.fit(X, y, sample_weight=sample_weight)
- coef = reg.coef_.copy()
- if fit_intercept:
- intercept = reg.intercept_
- reg.fit(X[:-5, :], y[:-5], sample_weight=sample_weight[:-5])
- assert_allclose(reg.coef_, coef, rtol=1e-6)
- if fit_intercept:
- assert_allclose(reg.intercept_, intercept)
- # 3) scaling of sample_weight should have no effect
- # Note: For models with penalty, scaling the penalty term might work.
- reg2 = Ridge(**params).set_params(alpha=np.pi * params["alpha"])
- reg2.fit(X, y, sample_weight=np.pi * sample_weight)
- if solver in ("sag", "saga") and not fit_intercept:
- pytest.xfail(f"Solver {solver} does fail test for scaling of sample_weight.")
- assert_allclose(reg2.coef_, coef, rtol=1e-6)
- if fit_intercept:
- assert_allclose(reg2.intercept_, intercept)
- # 4) check that multiplying sample_weight by 2 is equivalent
- # to repeating corresponding samples twice
- if sparseX:
- X = X.toarray()
- X2 = np.concatenate([X, X[: n_samples // 2]], axis=0)
- y2 = np.concatenate([y, y[: n_samples // 2]])
- sample_weight_1 = sample_weight.copy()
- sample_weight_1[: n_samples // 2] *= 2
- sample_weight_2 = np.concatenate(
- [sample_weight, sample_weight[: n_samples // 2]], axis=0
- )
- if sparseX:
- X = sp.csr_matrix(X)
- X2 = sp.csr_matrix(X2)
- reg1 = Ridge(**params).fit(X, y, sample_weight=sample_weight_1)
- reg2 = Ridge(**params).fit(X2, y2, sample_weight=sample_weight_2)
- assert_allclose(reg1.coef_, reg2.coef_)
- if fit_intercept:
- assert_allclose(reg1.intercept_, reg2.intercept_)
|