test_least_angle.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915
  1. import warnings
  2. import numpy as np
  3. import pytest
  4. from scipy import linalg
  5. from sklearn import datasets, linear_model
  6. from sklearn.base import clone
  7. from sklearn.exceptions import ConvergenceWarning
  8. from sklearn.linear_model import (
  9. Lars,
  10. LarsCV,
  11. LassoLars,
  12. LassoLarsCV,
  13. LassoLarsIC,
  14. lars_path,
  15. )
  16. from sklearn.linear_model._least_angle import _lars_path_residues
  17. from sklearn.model_selection import train_test_split
  18. from sklearn.pipeline import make_pipeline
  19. from sklearn.preprocessing import StandardScaler
  20. from sklearn.utils import check_random_state
  21. from sklearn.utils._testing import (
  22. TempMemmap,
  23. assert_allclose,
  24. assert_array_almost_equal,
  25. ignore_warnings,
  26. )
  27. # TODO: use another dataset that has multiple drops
  28. diabetes = datasets.load_diabetes()
  29. X, y = diabetes.data, diabetes.target
  30. G = np.dot(X.T, X)
  31. Xy = np.dot(X.T, y)
  32. n_samples = y.size
  33. # TODO(1.4): 'normalize' to be removed
  34. filterwarnings_normalize = pytest.mark.filterwarnings(
  35. "ignore:'normalize' was deprecated"
  36. )
  37. # TODO(1.4) 'normalize' to be removed
  38. @pytest.mark.parametrize(
  39. "LeastAngleModel", [Lars, LassoLars, LarsCV, LassoLarsCV, LassoLarsIC]
  40. )
  41. @pytest.mark.parametrize(
  42. "normalize, n_warnings", [(True, 1), (False, 1), ("deprecated", 0)]
  43. )
  44. def test_assure_warning_when_normalize(LeastAngleModel, normalize, n_warnings):
  45. # check that we issue a FutureWarning when normalize was set
  46. rng = check_random_state(0)
  47. n_samples = 200
  48. n_features = 2
  49. X = rng.randn(n_samples, n_features)
  50. X[X < 0.1] = 0.0
  51. y = rng.rand(n_samples)
  52. model = LeastAngleModel(normalize=normalize)
  53. with warnings.catch_warnings(record=True) as rec:
  54. warnings.simplefilter("always", FutureWarning)
  55. model.fit(X, y)
  56. assert len([w.message for w in rec]) == n_warnings
  57. def test_simple():
  58. # Principle of Lars is to keep covariances tied and decreasing
  59. # also test verbose output
  60. import sys
  61. from io import StringIO
  62. old_stdout = sys.stdout
  63. try:
  64. sys.stdout = StringIO()
  65. _, _, coef_path_ = linear_model.lars_path(X, y, method="lar", verbose=10)
  66. sys.stdout = old_stdout
  67. for i, coef_ in enumerate(coef_path_.T):
  68. res = y - np.dot(X, coef_)
  69. cov = np.dot(X.T, res)
  70. C = np.max(abs(cov))
  71. eps = 1e-3
  72. ocur = len(cov[C - eps < abs(cov)])
  73. if i < X.shape[1]:
  74. assert ocur == i + 1
  75. else:
  76. # no more than max_pred variables can go into the active set
  77. assert ocur == X.shape[1]
  78. finally:
  79. sys.stdout = old_stdout
  80. def test_simple_precomputed():
  81. # The same, with precomputed Gram matrix
  82. _, _, coef_path_ = linear_model.lars_path(X, y, Gram=G, method="lar")
  83. for i, coef_ in enumerate(coef_path_.T):
  84. res = y - np.dot(X, coef_)
  85. cov = np.dot(X.T, res)
  86. C = np.max(abs(cov))
  87. eps = 1e-3
  88. ocur = len(cov[C - eps < abs(cov)])
  89. if i < X.shape[1]:
  90. assert ocur == i + 1
  91. else:
  92. # no more than max_pred variables can go into the active set
  93. assert ocur == X.shape[1]
  94. def _assert_same_lars_path_result(output1, output2):
  95. assert len(output1) == len(output2)
  96. for o1, o2 in zip(output1, output2):
  97. assert_allclose(o1, o2)
  98. @pytest.mark.parametrize("method", ["lar", "lasso"])
  99. @pytest.mark.parametrize("return_path", [True, False])
  100. def test_lars_path_gram_equivalent(method, return_path):
  101. _assert_same_lars_path_result(
  102. linear_model.lars_path_gram(
  103. Xy=Xy, Gram=G, n_samples=n_samples, method=method, return_path=return_path
  104. ),
  105. linear_model.lars_path(X, y, Gram=G, method=method, return_path=return_path),
  106. )
  107. def test_x_none_gram_none_raises_value_error():
  108. # Test that lars_path with no X and Gram raises exception
  109. Xy = np.dot(X.T, y)
  110. with pytest.raises(ValueError):
  111. linear_model.lars_path(None, y, Gram=None, Xy=Xy)
  112. def test_all_precomputed():
  113. # Test that lars_path with precomputed Gram and Xy gives the right answer
  114. G = np.dot(X.T, X)
  115. Xy = np.dot(X.T, y)
  116. for method in "lar", "lasso":
  117. output = linear_model.lars_path(X, y, method=method)
  118. output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy, method=method)
  119. for expected, got in zip(output, output_pre):
  120. assert_array_almost_equal(expected, got)
  121. # TODO(1.4): 'normalize' to be removed
  122. @filterwarnings_normalize
  123. @pytest.mark.filterwarnings("ignore: `rcond` parameter will change")
  124. # numpy deprecation
  125. def test_lars_lstsq():
  126. # Test that Lars gives least square solution at the end
  127. # of the path
  128. X1 = 3 * X # use un-normalized dataset
  129. clf = linear_model.LassoLars(alpha=0.0)
  130. clf.fit(X1, y)
  131. coef_lstsq = np.linalg.lstsq(X1, y, rcond=None)[0]
  132. assert_array_almost_equal(clf.coef_, coef_lstsq)
  133. @pytest.mark.filterwarnings("ignore:`rcond` parameter will change")
  134. # numpy deprecation
  135. def test_lasso_gives_lstsq_solution():
  136. # Test that Lars Lasso gives least square solution at the end
  137. # of the path
  138. _, _, coef_path_ = linear_model.lars_path(X, y, method="lasso")
  139. coef_lstsq = np.linalg.lstsq(X, y)[0]
  140. assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
  141. def test_collinearity():
  142. # Check that lars_path is robust to collinearity in input
  143. X = np.array([[3.0, 3.0, 1.0], [2.0, 2.0, 0.0], [1.0, 1.0, 0]])
  144. y = np.array([1.0, 0.0, 0])
  145. rng = np.random.RandomState(0)
  146. f = ignore_warnings
  147. _, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
  148. assert not np.isnan(coef_path_).any()
  149. residual = np.dot(X, coef_path_[:, -1]) - y
  150. assert (residual**2).sum() < 1.0 # just make sure it's bounded
  151. n_samples = 10
  152. X = rng.rand(n_samples, 5)
  153. y = np.zeros(n_samples)
  154. _, _, coef_path_ = linear_model.lars_path(
  155. X,
  156. y,
  157. Gram="auto",
  158. copy_X=False,
  159. copy_Gram=False,
  160. alpha_min=0.0,
  161. method="lasso",
  162. verbose=0,
  163. max_iter=500,
  164. )
  165. assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_))
  166. def test_no_path():
  167. # Test that the ``return_path=False`` option returns the correct output
  168. alphas_, _, coef_path_ = linear_model.lars_path(X, y, method="lar")
  169. alpha_, _, coef = linear_model.lars_path(X, y, method="lar", return_path=False)
  170. assert_array_almost_equal(coef, coef_path_[:, -1])
  171. assert alpha_ == alphas_[-1]
  172. def test_no_path_precomputed():
  173. # Test that the ``return_path=False`` option with Gram remains correct
  174. alphas_, _, coef_path_ = linear_model.lars_path(X, y, method="lar", Gram=G)
  175. alpha_, _, coef = linear_model.lars_path(
  176. X, y, method="lar", Gram=G, return_path=False
  177. )
  178. assert_array_almost_equal(coef, coef_path_[:, -1])
  179. assert alpha_ == alphas_[-1]
  180. def test_no_path_all_precomputed():
  181. # Test that the ``return_path=False`` option with Gram and Xy remains
  182. # correct
  183. X, y = 3 * diabetes.data, diabetes.target
  184. G = np.dot(X.T, X)
  185. Xy = np.dot(X.T, y)
  186. alphas_, _, coef_path_ = linear_model.lars_path(
  187. X, y, method="lasso", Xy=Xy, Gram=G, alpha_min=0.9
  188. )
  189. alpha_, _, coef = linear_model.lars_path(
  190. X, y, method="lasso", Gram=G, Xy=Xy, alpha_min=0.9, return_path=False
  191. )
  192. assert_array_almost_equal(coef, coef_path_[:, -1])
  193. assert alpha_ == alphas_[-1]
  194. @filterwarnings_normalize
  195. @pytest.mark.parametrize(
  196. "classifier", [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC]
  197. )
  198. def test_lars_precompute(classifier):
  199. # Check for different values of precompute
  200. G = np.dot(X.T, X)
  201. clf = classifier(precompute=G)
  202. output_1 = ignore_warnings(clf.fit)(X, y).coef_
  203. for precompute in [True, False, "auto", None]:
  204. clf = classifier(precompute=precompute)
  205. output_2 = clf.fit(X, y).coef_
  206. assert_array_almost_equal(output_1, output_2, decimal=8)
  207. def test_singular_matrix():
  208. # Test when input is a singular matrix
  209. X1 = np.array([[1, 1.0], [1.0, 1.0]])
  210. y1 = np.array([1, 1])
  211. _, _, coef_path = linear_model.lars_path(X1, y1)
  212. assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]])
  213. def test_rank_deficient_design():
  214. # consistency test that checks that LARS Lasso is handling rank
  215. # deficient input data (with n_features < rank) in the same way
  216. # as coordinate descent Lasso
  217. y = [5, 0, 5]
  218. for X in ([[5, 0], [0, 5], [10, 10]], [[10, 10, 0], [1e-32, 0, 0], [0, 0, 1]]):
  219. # To be able to use the coefs to compute the objective function,
  220. # we need to turn off normalization
  221. lars = linear_model.LassoLars(0.1)
  222. coef_lars_ = lars.fit(X, y).coef_
  223. obj_lars = 1.0 / (2.0 * 3.0) * linalg.norm(
  224. y - np.dot(X, coef_lars_)
  225. ) ** 2 + 0.1 * linalg.norm(coef_lars_, 1)
  226. coord_descent = linear_model.Lasso(0.1, tol=1e-6)
  227. coef_cd_ = coord_descent.fit(X, y).coef_
  228. obj_cd = (1.0 / (2.0 * 3.0)) * linalg.norm(
  229. y - np.dot(X, coef_cd_)
  230. ) ** 2 + 0.1 * linalg.norm(coef_cd_, 1)
  231. assert obj_lars < obj_cd * (1.0 + 1e-8)
  232. def test_lasso_lars_vs_lasso_cd():
  233. # Test that LassoLars and Lasso using coordinate descent give the
  234. # same results.
  235. X = 3 * diabetes.data
  236. alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
  237. lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
  238. for c, a in zip(lasso_path.T, alphas):
  239. if a == 0:
  240. continue
  241. lasso_cd.alpha = a
  242. lasso_cd.fit(X, y)
  243. error = linalg.norm(c - lasso_cd.coef_)
  244. assert error < 0.01
  245. # similar test, with the classifiers
  246. for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
  247. clf1 = linear_model.LassoLars(alpha=alpha).fit(X, y)
  248. clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8).fit(X, y)
  249. err = linalg.norm(clf1.coef_ - clf2.coef_)
  250. assert err < 1e-3
  251. # same test, with normalized data
  252. X = diabetes.data
  253. X = X - X.sum(axis=0)
  254. X /= np.linalg.norm(X, axis=0)
  255. alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
  256. lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
  257. for c, a in zip(lasso_path.T, alphas):
  258. if a == 0:
  259. continue
  260. lasso_cd.alpha = a
  261. lasso_cd.fit(X, y)
  262. error = linalg.norm(c - lasso_cd.coef_)
  263. assert error < 0.01
  264. @filterwarnings_normalize
  265. def test_lasso_lars_vs_lasso_cd_early_stopping():
  266. # Test that LassoLars and Lasso using coordinate descent give the
  267. # same results when early stopping is used.
  268. # (test : before, in the middle, and in the last part of the path)
  269. alphas_min = [10, 0.9, 1e-4]
  270. X = diabetes.data
  271. for alpha_min in alphas_min:
  272. alphas, _, lasso_path = linear_model.lars_path(
  273. X, y, method="lasso", alpha_min=alpha_min
  274. )
  275. lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
  276. lasso_cd.alpha = alphas[-1]
  277. lasso_cd.fit(X, y)
  278. error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
  279. assert error < 0.01
  280. # same test, with normalization
  281. X = diabetes.data - diabetes.data.sum(axis=0)
  282. X /= np.linalg.norm(X, axis=0)
  283. for alpha_min in alphas_min:
  284. alphas, _, lasso_path = linear_model.lars_path(
  285. X, y, method="lasso", alpha_min=alpha_min
  286. )
  287. lasso_cd = linear_model.Lasso(tol=1e-8)
  288. lasso_cd.alpha = alphas[-1]
  289. lasso_cd.fit(X, y)
  290. error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
  291. assert error < 0.01
  292. @filterwarnings_normalize
  293. def test_lasso_lars_path_length():
  294. # Test that the path length of the LassoLars is right
  295. lasso = linear_model.LassoLars()
  296. lasso.fit(X, y)
  297. lasso2 = linear_model.LassoLars(alpha=lasso.alphas_[2])
  298. lasso2.fit(X, y)
  299. assert_array_almost_equal(lasso.alphas_[:3], lasso2.alphas_)
  300. # Also check that the sequence of alphas is always decreasing
  301. assert np.all(np.diff(lasso.alphas_) < 0)
  302. def test_lasso_lars_vs_lasso_cd_ill_conditioned():
  303. # Test lasso lars on a very ill-conditioned design, and check that
  304. # it does not blow up, and stays somewhat close to a solution given
  305. # by the coordinate descent solver
  306. # Also test that lasso_path (using lars_path output style) gives
  307. # the same result as lars_path and previous lasso output style
  308. # under these conditions.
  309. rng = np.random.RandomState(42)
  310. # Generate data
  311. n, m = 70, 100
  312. k = 5
  313. X = rng.randn(n, m)
  314. w = np.zeros((m, 1))
  315. i = np.arange(0, m)
  316. rng.shuffle(i)
  317. supp = i[:k]
  318. w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
  319. y = np.dot(X, w)
  320. sigma = 0.2
  321. y += sigma * rng.rand(*y.shape)
  322. y = y.squeeze()
  323. lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method="lasso")
  324. _, lasso_coef2, _ = linear_model.lasso_path(X, y, alphas=lars_alphas, tol=1e-6)
  325. assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
  326. def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
  327. # Create an ill-conditioned situation in which the LARS has to go
  328. # far in the path to converge, and check that LARS and coordinate
  329. # descent give the same answers
  330. # Note it used to be the case that Lars had to use the drop for good
  331. # strategy for this but this is no longer the case with the
  332. # equality_tolerance checks
  333. X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]]
  334. y = [10, 10, 1]
  335. alpha = 0.0001
  336. def objective_function(coef):
  337. return 1.0 / (2.0 * len(X)) * linalg.norm(
  338. y - np.dot(X, coef)
  339. ) ** 2 + alpha * linalg.norm(coef, 1)
  340. lars = linear_model.LassoLars(alpha=alpha)
  341. warning_message = "Regressors in active set degenerate."
  342. with pytest.warns(ConvergenceWarning, match=warning_message):
  343. lars.fit(X, y)
  344. lars_coef_ = lars.coef_
  345. lars_obj = objective_function(lars_coef_)
  346. coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4)
  347. cd_coef_ = coord_descent.fit(X, y).coef_
  348. cd_obj = objective_function(cd_coef_)
  349. assert lars_obj < cd_obj * (1.0 + 1e-8)
  350. @filterwarnings_normalize
  351. def test_lars_add_features():
  352. # assure that at least some features get added if necessary
  353. # test for 6d2b4c
  354. # Hilbert matrix
  355. n = 5
  356. H = 1.0 / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
  357. clf = linear_model.Lars(fit_intercept=False).fit(H, np.arange(n))
  358. assert np.all(np.isfinite(clf.coef_))
  359. @filterwarnings_normalize
  360. def test_lars_n_nonzero_coefs(verbose=False):
  361. lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
  362. lars.fit(X, y)
  363. assert len(lars.coef_.nonzero()[0]) == 6
  364. # The path should be of length 6 + 1 in a Lars going down to 6
  365. # non-zero coefs
  366. assert len(lars.alphas_) == 7
  367. @filterwarnings_normalize
  368. @ignore_warnings
  369. def test_multitarget():
  370. # Assure that estimators receiving multidimensional y do the right thing
  371. Y = np.vstack([y, y**2]).T
  372. n_targets = Y.shape[1]
  373. estimators = [
  374. linear_model.LassoLars(),
  375. linear_model.Lars(),
  376. # regression test for gh-1615
  377. linear_model.LassoLars(fit_intercept=False),
  378. linear_model.Lars(fit_intercept=False),
  379. ]
  380. for estimator in estimators:
  381. estimator.fit(X, Y)
  382. Y_pred = estimator.predict(X)
  383. alphas, active, coef, path = (
  384. estimator.alphas_,
  385. estimator.active_,
  386. estimator.coef_,
  387. estimator.coef_path_,
  388. )
  389. for k in range(n_targets):
  390. estimator.fit(X, Y[:, k])
  391. y_pred = estimator.predict(X)
  392. assert_array_almost_equal(alphas[k], estimator.alphas_)
  393. assert_array_almost_equal(active[k], estimator.active_)
  394. assert_array_almost_equal(coef[k], estimator.coef_)
  395. assert_array_almost_equal(path[k], estimator.coef_path_)
  396. assert_array_almost_equal(Y_pred[:, k], y_pred)
  397. @filterwarnings_normalize
  398. def test_lars_cv():
  399. # Test the LassoLarsCV object by checking that the optimal alpha
  400. # increases as the number of samples increases.
  401. # This property is not actually guaranteed in general and is just a
  402. # property of the given dataset, with the given steps chosen.
  403. old_alpha = 0
  404. lars_cv = linear_model.LassoLarsCV()
  405. for length in (400, 200, 100):
  406. X = diabetes.data[:length]
  407. y = diabetes.target[:length]
  408. lars_cv.fit(X, y)
  409. np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
  410. old_alpha = lars_cv.alpha_
  411. assert not hasattr(lars_cv, "n_nonzero_coefs")
  412. def test_lars_cv_max_iter(recwarn):
  413. warnings.simplefilter("always")
  414. with np.errstate(divide="raise", invalid="raise"):
  415. X = diabetes.data
  416. y = diabetes.target
  417. rng = np.random.RandomState(42)
  418. x = rng.randn(len(y))
  419. X = diabetes.data
  420. X = np.c_[X, x, x] # add correlated features
  421. X = StandardScaler().fit_transform(X)
  422. lars_cv = linear_model.LassoLarsCV(max_iter=5, cv=5)
  423. lars_cv.fit(X, y)
  424. # Check that there is no warning in general and no ConvergenceWarning
  425. # in particular.
  426. # Materialize the string representation of the warning to get a more
  427. # informative error message in case of AssertionError.
  428. recorded_warnings = [str(w) for w in recwarn]
  429. assert len(recorded_warnings) == 0
  430. def test_lasso_lars_ic():
  431. # Test the LassoLarsIC object by checking that
  432. # - some good features are selected.
  433. # - alpha_bic > alpha_aic
  434. # - n_nonzero_bic < n_nonzero_aic
  435. lars_bic = linear_model.LassoLarsIC("bic")
  436. lars_aic = linear_model.LassoLarsIC("aic")
  437. rng = np.random.RandomState(42)
  438. X = diabetes.data
  439. X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features
  440. X = StandardScaler().fit_transform(X)
  441. lars_bic.fit(X, y)
  442. lars_aic.fit(X, y)
  443. nonzero_bic = np.where(lars_bic.coef_)[0]
  444. nonzero_aic = np.where(lars_aic.coef_)[0]
  445. assert lars_bic.alpha_ > lars_aic.alpha_
  446. assert len(nonzero_bic) < len(nonzero_aic)
  447. assert np.max(nonzero_bic) < diabetes.data.shape[1]
  448. def test_lars_path_readonly_data():
  449. # When using automated memory mapping on large input, the
  450. # fold data is in read-only mode
  451. # This is a non-regression test for:
  452. # https://github.com/scikit-learn/scikit-learn/issues/4597
  453. splitted_data = train_test_split(X, y, random_state=42)
  454. with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
  455. # The following should not fail despite copy=False
  456. _lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
  457. def test_lars_path_positive_constraint():
  458. # this is the main test for the positive parameter on the lars_path method
  459. # the estimator classes just make use of this function
  460. # we do the test on the diabetes dataset
  461. # ensure that we get negative coefficients when positive=False
  462. # and all positive when positive=True
  463. # for method 'lar' (default) and lasso
  464. err_msg = "Positive constraint not supported for 'lar' coding method."
  465. with pytest.raises(ValueError, match=err_msg):
  466. linear_model.lars_path(
  467. diabetes["data"], diabetes["target"], method="lar", positive=True
  468. )
  469. method = "lasso"
  470. _, _, coefs = linear_model.lars_path(
  471. X, y, return_path=True, method=method, positive=False
  472. )
  473. assert coefs.min() < 0
  474. _, _, coefs = linear_model.lars_path(
  475. X, y, return_path=True, method=method, positive=True
  476. )
  477. assert coefs.min() >= 0
  478. # now we gonna test the positive option for all estimator classes
  479. default_parameter = {"fit_intercept": False}
  480. estimator_parameter_map = {
  481. "LassoLars": {"alpha": 0.1},
  482. "LassoLarsCV": {},
  483. "LassoLarsIC": {},
  484. }
  485. @filterwarnings_normalize
  486. def test_estimatorclasses_positive_constraint():
  487. # testing the transmissibility for the positive option of all estimator
  488. # classes in this same function here
  489. default_parameter = {"fit_intercept": False}
  490. estimator_parameter_map = {
  491. "LassoLars": {"alpha": 0.1},
  492. "LassoLarsCV": {},
  493. "LassoLarsIC": {},
  494. }
  495. for estname in estimator_parameter_map:
  496. params = default_parameter.copy()
  497. params.update(estimator_parameter_map[estname])
  498. estimator = getattr(linear_model, estname)(positive=False, **params)
  499. estimator.fit(X, y)
  500. assert estimator.coef_.min() < 0
  501. estimator = getattr(linear_model, estname)(positive=True, **params)
  502. estimator.fit(X, y)
  503. assert min(estimator.coef_) >= 0
  504. def test_lasso_lars_vs_lasso_cd_positive():
  505. # Test that LassoLars and Lasso using coordinate descent give the
  506. # same results when using the positive option
  507. # This test is basically a copy of the above with additional positive
  508. # option. However for the middle part, the comparison of coefficient values
  509. # for a range of alphas, we had to make an adaptations. See below.
  510. # not normalized data
  511. X = 3 * diabetes.data
  512. alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", positive=True)
  513. lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
  514. for c, a in zip(lasso_path.T, alphas):
  515. if a == 0:
  516. continue
  517. lasso_cd.alpha = a
  518. lasso_cd.fit(X, y)
  519. error = linalg.norm(c - lasso_cd.coef_)
  520. assert error < 0.01
  521. # The range of alphas chosen for coefficient comparison here is restricted
  522. # as compared with the above test without the positive option. This is due
  523. # to the circumstance that the Lars-Lasso algorithm does not converge to
  524. # the least-squares-solution for small alphas, see 'Least Angle Regression'
  525. # by Efron et al 2004. The coefficients are typically in congruence up to
  526. # the smallest alpha reached by the Lars-Lasso algorithm and start to
  527. # diverge thereafter. See
  528. # https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff
  529. for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
  530. clf1 = linear_model.LassoLars(
  531. fit_intercept=False, alpha=alpha, positive=True
  532. ).fit(X, y)
  533. clf2 = linear_model.Lasso(
  534. fit_intercept=False, alpha=alpha, tol=1e-8, positive=True
  535. ).fit(X, y)
  536. err = linalg.norm(clf1.coef_ - clf2.coef_)
  537. assert err < 1e-3
  538. # normalized data
  539. X = diabetes.data - diabetes.data.sum(axis=0)
  540. X /= np.linalg.norm(X, axis=0)
  541. alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", positive=True)
  542. lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
  543. for c, a in zip(lasso_path.T[:-1], alphas[:-1]): # don't include alpha=0
  544. lasso_cd.alpha = a
  545. lasso_cd.fit(X, y)
  546. error = linalg.norm(c - lasso_cd.coef_)
  547. assert error < 0.01
  548. def test_lasso_lars_vs_R_implementation():
  549. # Test that sklearn LassoLars implementation agrees with the LassoLars
  550. # implementation available in R (lars library) when fit_intercept=False.
  551. # Let's generate the data used in the bug report 7778
  552. y = np.array([-6.45006793, -3.51251449, -8.52445396, 6.12277822, -19.42109366])
  553. x = np.array(
  554. [
  555. [0.47299829, 0, 0, 0, 0],
  556. [0.08239882, 0.85784863, 0, 0, 0],
  557. [0.30114139, -0.07501577, 0.80895216, 0, 0],
  558. [-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0],
  559. [-0.69363927, 0.06754067, 0.18064514, -0.0803561, 0.40427291],
  560. ]
  561. )
  562. X = x.T
  563. # The R result was obtained using the following code:
  564. #
  565. # library(lars)
  566. # model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE,
  567. # trace=TRUE, normalize=FALSE)
  568. # r = t(model_lasso_lars$beta)
  569. #
  570. r = np.array(
  571. [
  572. [
  573. 0,
  574. 0,
  575. 0,
  576. 0,
  577. 0,
  578. -79.810362809499026,
  579. -83.528788732782829,
  580. -83.777653739190711,
  581. -83.784156932888934,
  582. -84.033390591756657,
  583. ],
  584. [0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0, 0.025219751009936],
  585. [
  586. 0,
  587. -3.577397088285891,
  588. -4.702795355871871,
  589. -7.016748621359461,
  590. -7.614898471899412,
  591. -0.336938391359179,
  592. 0,
  593. 0,
  594. 0.001213370600853,
  595. 0.048162321585148,
  596. ],
  597. [
  598. 0,
  599. 0,
  600. 0,
  601. 2.231558436628169,
  602. 2.723267514525966,
  603. 2.811549786389614,
  604. 2.813766976061531,
  605. 2.817462468949557,
  606. 2.817368178703816,
  607. 2.816221090636795,
  608. ],
  609. [
  610. 0,
  611. 0,
  612. -1.218422599914637,
  613. -3.457726183014808,
  614. -4.021304522060710,
  615. -45.827461592423745,
  616. -47.776608869312305,
  617. -47.911561610746404,
  618. -47.914845922736234,
  619. -48.039562334265717,
  620. ],
  621. ]
  622. )
  623. model_lasso_lars = linear_model.LassoLars(alpha=0, fit_intercept=False)
  624. model_lasso_lars.fit(X, y)
  625. skl_betas = model_lasso_lars.coef_path_
  626. assert_array_almost_equal(r, skl_betas, decimal=12)
  627. @filterwarnings_normalize
  628. @pytest.mark.parametrize("copy_X", [True, False])
  629. def test_lasso_lars_copyX_behaviour(copy_X):
  630. """
  631. Test that user input regarding copy_X is not being overridden (it was until
  632. at least version 0.21)
  633. """
  634. lasso_lars = LassoLarsIC(copy_X=copy_X, precompute=False)
  635. rng = np.random.RandomState(0)
  636. X = rng.normal(0, 1, (100, 5))
  637. X_copy = X.copy()
  638. y = X[:, 2]
  639. lasso_lars.fit(X, y)
  640. assert copy_X == np.array_equal(X, X_copy)
  641. @filterwarnings_normalize
  642. @pytest.mark.parametrize("copy_X", [True, False])
  643. def test_lasso_lars_fit_copyX_behaviour(copy_X):
  644. """
  645. Test that user input to .fit for copy_X overrides default __init__ value
  646. """
  647. lasso_lars = LassoLarsIC(precompute=False)
  648. rng = np.random.RandomState(0)
  649. X = rng.normal(0, 1, (100, 5))
  650. X_copy = X.copy()
  651. y = X[:, 2]
  652. lasso_lars.fit(X, y, copy_X=copy_X)
  653. assert copy_X == np.array_equal(X, X_copy)
  654. @filterwarnings_normalize
  655. @pytest.mark.parametrize("est", (LassoLars(alpha=1e-3), Lars()))
  656. def test_lars_with_jitter(est):
  657. # Test that a small amount of jitter helps stability,
  658. # using example provided in issue #2746
  659. X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0], [0.0, -1.0, 0.0, 0.0, 0.0]])
  660. y = [-2.5, -2.5]
  661. expected_coef = [0, 2.5, 0, 2.5, 0]
  662. # set to fit_intercept to False since target is constant and we want check
  663. # the value of coef. coef would be all zeros otherwise.
  664. est.set_params(fit_intercept=False)
  665. est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)
  666. est.fit(X, y)
  667. est_jitter.fit(X, y)
  668. assert np.mean((est.coef_ - est_jitter.coef_) ** 2) > 0.1
  669. np.testing.assert_allclose(est_jitter.coef_, expected_coef, rtol=1e-3)
  670. def test_X_none_gram_not_none():
  671. with pytest.raises(ValueError, match="X cannot be None if Gram is not None"):
  672. lars_path(X=None, y=[1], Gram="not None")
  673. def test_copy_X_with_auto_gram():
  674. # Non-regression test for #17789, `copy_X=True` and Gram='auto' does not
  675. # overwrite X
  676. rng = np.random.RandomState(42)
  677. X = rng.rand(6, 6)
  678. y = rng.rand(6)
  679. X_before = X.copy()
  680. linear_model.lars_path(X, y, Gram="auto", copy_X=True, method="lasso")
  681. # X did not change
  682. assert_allclose(X, X_before)
  683. @pytest.mark.parametrize(
  684. "LARS, has_coef_path, args",
  685. (
  686. (Lars, True, {}),
  687. (LassoLars, True, {}),
  688. (LassoLarsIC, False, {}),
  689. (LarsCV, True, {}),
  690. # max_iter=5 is for avoiding ConvergenceWarning
  691. (LassoLarsCV, True, {"max_iter": 5}),
  692. ),
  693. )
  694. @pytest.mark.parametrize("dtype", (np.float32, np.float64))
  695. @filterwarnings_normalize
  696. def test_lars_dtype_match(LARS, has_coef_path, args, dtype):
  697. # The test ensures that the fit method preserves input dtype
  698. rng = np.random.RandomState(0)
  699. X = rng.rand(20, 6).astype(dtype)
  700. y = rng.rand(20).astype(dtype)
  701. model = LARS(**args)
  702. model.fit(X, y)
  703. assert model.coef_.dtype == dtype
  704. if has_coef_path:
  705. assert model.coef_path_.dtype == dtype
  706. assert model.intercept_.dtype == dtype
  707. @pytest.mark.parametrize(
  708. "LARS, has_coef_path, args",
  709. (
  710. (Lars, True, {}),
  711. (LassoLars, True, {}),
  712. (LassoLarsIC, False, {}),
  713. (LarsCV, True, {}),
  714. # max_iter=5 is for avoiding ConvergenceWarning
  715. (LassoLarsCV, True, {"max_iter": 5}),
  716. ),
  717. )
  718. @filterwarnings_normalize
  719. def test_lars_numeric_consistency(LARS, has_coef_path, args):
  720. # The test ensures numerical consistency between trained coefficients
  721. # of float32 and float64.
  722. rtol = 1e-5
  723. atol = 1e-5
  724. rng = np.random.RandomState(0)
  725. X_64 = rng.rand(10, 6)
  726. y_64 = rng.rand(10)
  727. model_64 = LARS(**args).fit(X_64, y_64)
  728. model_32 = LARS(**args).fit(X_64.astype(np.float32), y_64.astype(np.float32))
  729. assert_allclose(model_64.coef_, model_32.coef_, rtol=rtol, atol=atol)
  730. if has_coef_path:
  731. assert_allclose(model_64.coef_path_, model_32.coef_path_, rtol=rtol, atol=atol)
  732. assert_allclose(model_64.intercept_, model_32.intercept_, rtol=rtol, atol=atol)
  733. @pytest.mark.parametrize("criterion", ["aic", "bic"])
  734. def test_lassolarsic_alpha_selection(criterion):
  735. """Check that we properly compute the AIC and BIC score.
  736. In this test, we reproduce the example of the Fig. 2 of Zou et al.
  737. (reference [1] in LassoLarsIC) In this example, only 7 features should be
  738. selected.
  739. """
  740. model = make_pipeline(StandardScaler(), LassoLarsIC(criterion=criterion))
  741. model.fit(X, y)
  742. best_alpha_selected = np.argmin(model[-1].criterion_)
  743. assert best_alpha_selected == 7
  744. @pytest.mark.parametrize("fit_intercept", [True, False])
  745. def test_lassolarsic_noise_variance(fit_intercept):
  746. """Check the behaviour when `n_samples` < `n_features` and that one needs
  747. to provide the noise variance."""
  748. rng = np.random.RandomState(0)
  749. X, y = datasets.make_regression(
  750. n_samples=10, n_features=11 - fit_intercept, random_state=rng
  751. )
  752. model = make_pipeline(StandardScaler(), LassoLarsIC(fit_intercept=fit_intercept))
  753. err_msg = (
  754. "You are using LassoLarsIC in the case where the number of samples is smaller"
  755. " than the number of features"
  756. )
  757. with pytest.raises(ValueError, match=err_msg):
  758. model.fit(X, y)
  759. model.set_params(lassolarsic__noise_variance=1.0)
  760. model.fit(X, y).predict(X)