_least_angle.py 82 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338
  1. """
  2. Least Angle Regression algorithm. See the documentation on the
  3. Generalized Linear Model for a complete discussion.
  4. """
  5. # Author: Fabian Pedregosa <fabian.pedregosa@inria.fr>
  6. # Alexandre Gramfort <alexandre.gramfort@inria.fr>
  7. # Gael Varoquaux
  8. #
  9. # License: BSD 3 clause
  10. import sys
  11. import warnings
  12. from math import log
  13. from numbers import Integral, Real
  14. import numpy as np
  15. from scipy import interpolate, linalg
  16. from scipy.linalg.lapack import get_lapack_funcs
  17. from ..base import MultiOutputMixin, RegressorMixin, _fit_context
  18. from ..exceptions import ConvergenceWarning
  19. from ..model_selection import check_cv
  20. # mypy error: Module 'sklearn.utils' has no attribute 'arrayfuncs'
  21. from ..utils import arrayfuncs, as_float_array, check_random_state # type: ignore
  22. from ..utils._param_validation import Hidden, Interval, StrOptions
  23. from ..utils.parallel import Parallel, delayed
  24. from ._base import LinearModel, LinearRegression, _deprecate_normalize, _preprocess_data
  25. SOLVE_TRIANGULAR_ARGS = {"check_finite": False}
  26. def lars_path(
  27. X,
  28. y,
  29. Xy=None,
  30. *,
  31. Gram=None,
  32. max_iter=500,
  33. alpha_min=0,
  34. method="lar",
  35. copy_X=True,
  36. eps=np.finfo(float).eps,
  37. copy_Gram=True,
  38. verbose=0,
  39. return_path=True,
  40. return_n_iter=False,
  41. positive=False,
  42. ):
  43. """Compute Least Angle Regression or Lasso path using the LARS algorithm [1].
  44. The optimization objective for the case method='lasso' is::
  45. (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
  46. in the case of method='lar', the objective function is only known in
  47. the form of an implicit equation (see discussion in [1]).
  48. Read more in the :ref:`User Guide <least_angle_regression>`.
  49. Parameters
  50. ----------
  51. X : None or array-like of shape (n_samples, n_features)
  52. Input data. Note that if X is `None` then the Gram matrix must be
  53. specified, i.e., cannot be `None` or `False`.
  54. y : None or array-like of shape (n_samples,)
  55. Input targets.
  56. Xy : array-like of shape (n_features,) or (n_features, n_targets), \
  57. default=None
  58. `Xy = np.dot(X.T, y)` that can be precomputed. It is useful
  59. only when the Gram matrix is precomputed.
  60. Gram : None, 'auto', array-like of shape (n_features, n_features), \
  61. default=None
  62. Precomputed Gram matrix (X' * X), if `'auto'`, the Gram
  63. matrix is precomputed from the given X, if there are more samples
  64. than features.
  65. max_iter : int, default=500
  66. Maximum number of iterations to perform, set to infinity for no limit.
  67. alpha_min : float, default=0
  68. Minimum correlation along the path. It corresponds to the
  69. regularization parameter `alpha` in the Lasso.
  70. method : {'lar', 'lasso'}, default='lar'
  71. Specifies the returned model. Select `'lar'` for Least Angle
  72. Regression, `'lasso'` for the Lasso.
  73. copy_X : bool, default=True
  74. If `False`, `X` is overwritten.
  75. eps : float, default=np.finfo(float).eps
  76. The machine-precision regularization in the computation of the
  77. Cholesky diagonal factors. Increase this for very ill-conditioned
  78. systems. Unlike the `tol` parameter in some iterative
  79. optimization-based algorithms, this parameter does not control
  80. the tolerance of the optimization.
  81. copy_Gram : bool, default=True
  82. If `False`, `Gram` is overwritten.
  83. verbose : int, default=0
  84. Controls output verbosity.
  85. return_path : bool, default=True
  86. If `True`, returns the entire path, else returns only the
  87. last point of the path.
  88. return_n_iter : bool, default=False
  89. Whether to return the number of iterations.
  90. positive : bool, default=False
  91. Restrict coefficients to be >= 0.
  92. This option is only allowed with method 'lasso'. Note that the model
  93. coefficients will not converge to the ordinary-least-squares solution
  94. for small values of alpha. Only coefficients up to the smallest alpha
  95. value (`alphas_[alphas_ > 0.].min()` when fit_path=True) reached by
  96. the stepwise Lars-Lasso algorithm are typically in congruence with the
  97. solution of the coordinate descent `lasso_path` function.
  98. Returns
  99. -------
  100. alphas : array-like of shape (n_alphas + 1,)
  101. Maximum of covariances (in absolute value) at each iteration.
  102. `n_alphas` is either `max_iter`, `n_features`, or the
  103. number of nodes in the path with `alpha >= alpha_min`, whichever
  104. is smaller.
  105. active : array-like of shape (n_alphas,)
  106. Indices of active variables at the end of the path.
  107. coefs : array-like of shape (n_features, n_alphas + 1)
  108. Coefficients along the path.
  109. n_iter : int
  110. Number of iterations run. Returned only if return_n_iter is set
  111. to True.
  112. See Also
  113. --------
  114. lars_path_gram : Compute LARS path in the sufficient stats mode.
  115. lasso_path : Compute Lasso path with coordinate descent.
  116. LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
  117. Lars : Least Angle Regression model a.k.a. LAR.
  118. LassoLarsCV : Cross-validated Lasso, using the LARS algorithm.
  119. LarsCV : Cross-validated Least Angle Regression model.
  120. sklearn.decomposition.sparse_encode : Sparse coding.
  121. References
  122. ----------
  123. .. [1] "Least Angle Regression", Efron et al.
  124. http://statweb.stanford.edu/~tibs/ftp/lars.pdf
  125. .. [2] `Wikipedia entry on the Least-angle regression
  126. <https://en.wikipedia.org/wiki/Least-angle_regression>`_
  127. .. [3] `Wikipedia entry on the Lasso
  128. <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_
  129. """
  130. if X is None and Gram is not None:
  131. raise ValueError(
  132. "X cannot be None if Gram is not None"
  133. "Use lars_path_gram to avoid passing X and y."
  134. )
  135. return _lars_path_solver(
  136. X=X,
  137. y=y,
  138. Xy=Xy,
  139. Gram=Gram,
  140. n_samples=None,
  141. max_iter=max_iter,
  142. alpha_min=alpha_min,
  143. method=method,
  144. copy_X=copy_X,
  145. eps=eps,
  146. copy_Gram=copy_Gram,
  147. verbose=verbose,
  148. return_path=return_path,
  149. return_n_iter=return_n_iter,
  150. positive=positive,
  151. )
  152. def lars_path_gram(
  153. Xy,
  154. Gram,
  155. *,
  156. n_samples,
  157. max_iter=500,
  158. alpha_min=0,
  159. method="lar",
  160. copy_X=True,
  161. eps=np.finfo(float).eps,
  162. copy_Gram=True,
  163. verbose=0,
  164. return_path=True,
  165. return_n_iter=False,
  166. positive=False,
  167. ):
  168. """The lars_path in the sufficient stats mode [1].
  169. The optimization objective for the case method='lasso' is::
  170. (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
  171. in the case of method='lars', the objective function is only known in
  172. the form of an implicit equation (see discussion in [1])
  173. Read more in the :ref:`User Guide <least_angle_regression>`.
  174. Parameters
  175. ----------
  176. Xy : array-like of shape (n_features,) or (n_features, n_targets)
  177. Xy = np.dot(X.T, y).
  178. Gram : array-like of shape (n_features, n_features)
  179. Gram = np.dot(X.T * X).
  180. n_samples : int or float
  181. Equivalent size of sample.
  182. max_iter : int, default=500
  183. Maximum number of iterations to perform, set to infinity for no limit.
  184. alpha_min : float, default=0
  185. Minimum correlation along the path. It corresponds to the
  186. regularization parameter alpha parameter in the Lasso.
  187. method : {'lar', 'lasso'}, default='lar'
  188. Specifies the returned model. Select ``'lar'`` for Least Angle
  189. Regression, ``'lasso'`` for the Lasso.
  190. copy_X : bool, default=True
  191. If ``False``, ``X`` is overwritten.
  192. eps : float, default=np.finfo(float).eps
  193. The machine-precision regularization in the computation of the
  194. Cholesky diagonal factors. Increase this for very ill-conditioned
  195. systems. Unlike the ``tol`` parameter in some iterative
  196. optimization-based algorithms, this parameter does not control
  197. the tolerance of the optimization.
  198. copy_Gram : bool, default=True
  199. If ``False``, ``Gram`` is overwritten.
  200. verbose : int, default=0
  201. Controls output verbosity.
  202. return_path : bool, default=True
  203. If ``return_path==True`` returns the entire path, else returns only the
  204. last point of the path.
  205. return_n_iter : bool, default=False
  206. Whether to return the number of iterations.
  207. positive : bool, default=False
  208. Restrict coefficients to be >= 0.
  209. This option is only allowed with method 'lasso'. Note that the model
  210. coefficients will not converge to the ordinary-least-squares solution
  211. for small values of alpha. Only coefficients up to the smallest alpha
  212. value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by
  213. the stepwise Lars-Lasso algorithm are typically in congruence with the
  214. solution of the coordinate descent lasso_path function.
  215. Returns
  216. -------
  217. alphas : array-like of shape (n_alphas + 1,)
  218. Maximum of covariances (in absolute value) at each iteration.
  219. ``n_alphas`` is either ``max_iter``, ``n_features`` or the
  220. number of nodes in the path with ``alpha >= alpha_min``, whichever
  221. is smaller.
  222. active : array-like of shape (n_alphas,)
  223. Indices of active variables at the end of the path.
  224. coefs : array-like of shape (n_features, n_alphas + 1)
  225. Coefficients along the path.
  226. n_iter : int
  227. Number of iterations run. Returned only if return_n_iter is set
  228. to True.
  229. See Also
  230. --------
  231. lars_path_gram : Compute LARS path.
  232. lasso_path : Compute Lasso path with coordinate descent.
  233. LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
  234. Lars : Least Angle Regression model a.k.a. LAR.
  235. LassoLarsCV : Cross-validated Lasso, using the LARS algorithm.
  236. LarsCV : Cross-validated Least Angle Regression model.
  237. sklearn.decomposition.sparse_encode : Sparse coding.
  238. References
  239. ----------
  240. .. [1] "Least Angle Regression", Efron et al.
  241. http://statweb.stanford.edu/~tibs/ftp/lars.pdf
  242. .. [2] `Wikipedia entry on the Least-angle regression
  243. <https://en.wikipedia.org/wiki/Least-angle_regression>`_
  244. .. [3] `Wikipedia entry on the Lasso
  245. <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_
  246. """
  247. return _lars_path_solver(
  248. X=None,
  249. y=None,
  250. Xy=Xy,
  251. Gram=Gram,
  252. n_samples=n_samples,
  253. max_iter=max_iter,
  254. alpha_min=alpha_min,
  255. method=method,
  256. copy_X=copy_X,
  257. eps=eps,
  258. copy_Gram=copy_Gram,
  259. verbose=verbose,
  260. return_path=return_path,
  261. return_n_iter=return_n_iter,
  262. positive=positive,
  263. )
  264. def _lars_path_solver(
  265. X,
  266. y,
  267. Xy=None,
  268. Gram=None,
  269. n_samples=None,
  270. max_iter=500,
  271. alpha_min=0,
  272. method="lar",
  273. copy_X=True,
  274. eps=np.finfo(float).eps,
  275. copy_Gram=True,
  276. verbose=0,
  277. return_path=True,
  278. return_n_iter=False,
  279. positive=False,
  280. ):
  281. """Compute Least Angle Regression or Lasso path using LARS algorithm [1]
  282. The optimization objective for the case method='lasso' is::
  283. (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
  284. in the case of method='lars', the objective function is only known in
  285. the form of an implicit equation (see discussion in [1])
  286. Read more in the :ref:`User Guide <least_angle_regression>`.
  287. Parameters
  288. ----------
  289. X : None or ndarray of shape (n_samples, n_features)
  290. Input data. Note that if X is None then Gram must be specified,
  291. i.e., cannot be None or False.
  292. y : None or ndarray of shape (n_samples,)
  293. Input targets.
  294. Xy : array-like of shape (n_features,) or (n_features, n_targets), \
  295. default=None
  296. `Xy = np.dot(X.T, y)` that can be precomputed. It is useful
  297. only when the Gram matrix is precomputed.
  298. Gram : None, 'auto' or array-like of shape (n_features, n_features), \
  299. default=None
  300. Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram
  301. matrix is precomputed from the given X, if there are more samples
  302. than features.
  303. n_samples : int or float, default=None
  304. Equivalent size of sample. If `None`, it will be `n_samples`.
  305. max_iter : int, default=500
  306. Maximum number of iterations to perform, set to infinity for no limit.
  307. alpha_min : float, default=0
  308. Minimum correlation along the path. It corresponds to the
  309. regularization parameter alpha parameter in the Lasso.
  310. method : {'lar', 'lasso'}, default='lar'
  311. Specifies the returned model. Select ``'lar'`` for Least Angle
  312. Regression, ``'lasso'`` for the Lasso.
  313. copy_X : bool, default=True
  314. If ``False``, ``X`` is overwritten.
  315. eps : float, default=np.finfo(float).eps
  316. The machine-precision regularization in the computation of the
  317. Cholesky diagonal factors. Increase this for very ill-conditioned
  318. systems. Unlike the ``tol`` parameter in some iterative
  319. optimization-based algorithms, this parameter does not control
  320. the tolerance of the optimization.
  321. copy_Gram : bool, default=True
  322. If ``False``, ``Gram`` is overwritten.
  323. verbose : int, default=0
  324. Controls output verbosity.
  325. return_path : bool, default=True
  326. If ``return_path==True`` returns the entire path, else returns only the
  327. last point of the path.
  328. return_n_iter : bool, default=False
  329. Whether to return the number of iterations.
  330. positive : bool, default=False
  331. Restrict coefficients to be >= 0.
  332. This option is only allowed with method 'lasso'. Note that the model
  333. coefficients will not converge to the ordinary-least-squares solution
  334. for small values of alpha. Only coefficients up to the smallest alpha
  335. value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by
  336. the stepwise Lars-Lasso algorithm are typically in congruence with the
  337. solution of the coordinate descent lasso_path function.
  338. Returns
  339. -------
  340. alphas : array-like of shape (n_alphas + 1,)
  341. Maximum of covariances (in absolute value) at each iteration.
  342. ``n_alphas`` is either ``max_iter``, ``n_features`` or the
  343. number of nodes in the path with ``alpha >= alpha_min``, whichever
  344. is smaller.
  345. active : array-like of shape (n_alphas,)
  346. Indices of active variables at the end of the path.
  347. coefs : array-like of shape (n_features, n_alphas + 1)
  348. Coefficients along the path
  349. n_iter : int
  350. Number of iterations run. Returned only if return_n_iter is set
  351. to True.
  352. See Also
  353. --------
  354. lasso_path
  355. LassoLars
  356. Lars
  357. LassoLarsCV
  358. LarsCV
  359. sklearn.decomposition.sparse_encode
  360. References
  361. ----------
  362. .. [1] "Least Angle Regression", Efron et al.
  363. http://statweb.stanford.edu/~tibs/ftp/lars.pdf
  364. .. [2] `Wikipedia entry on the Least-angle regression
  365. <https://en.wikipedia.org/wiki/Least-angle_regression>`_
  366. .. [3] `Wikipedia entry on the Lasso
  367. <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_
  368. """
  369. if method == "lar" and positive:
  370. raise ValueError("Positive constraint not supported for 'lar' coding method.")
  371. n_samples = n_samples if n_samples is not None else y.size
  372. if Xy is None:
  373. Cov = np.dot(X.T, y)
  374. else:
  375. Cov = Xy.copy()
  376. if Gram is None or Gram is False:
  377. Gram = None
  378. if X is None:
  379. raise ValueError("X and Gram cannot both be unspecified.")
  380. elif isinstance(Gram, str) and Gram == "auto" or Gram is True:
  381. if Gram is True or X.shape[0] > X.shape[1]:
  382. Gram = np.dot(X.T, X)
  383. else:
  384. Gram = None
  385. elif copy_Gram:
  386. Gram = Gram.copy()
  387. if Gram is None:
  388. n_features = X.shape[1]
  389. else:
  390. n_features = Cov.shape[0]
  391. if Gram.shape != (n_features, n_features):
  392. raise ValueError("The shapes of the inputs Gram and Xy do not match.")
  393. if copy_X and X is not None and Gram is None:
  394. # force copy. setting the array to be fortran-ordered
  395. # speeds up the calculation of the (partial) Gram matrix
  396. # and allows to easily swap columns
  397. X = X.copy("F")
  398. max_features = min(max_iter, n_features)
  399. dtypes = set(a.dtype for a in (X, y, Xy, Gram) if a is not None)
  400. if len(dtypes) == 1:
  401. # use the precision level of input data if it is consistent
  402. return_dtype = next(iter(dtypes))
  403. else:
  404. # fallback to double precision otherwise
  405. return_dtype = np.float64
  406. if return_path:
  407. coefs = np.zeros((max_features + 1, n_features), dtype=return_dtype)
  408. alphas = np.zeros(max_features + 1, dtype=return_dtype)
  409. else:
  410. coef, prev_coef = (
  411. np.zeros(n_features, dtype=return_dtype),
  412. np.zeros(n_features, dtype=return_dtype),
  413. )
  414. alpha, prev_alpha = (
  415. np.array([0.0], dtype=return_dtype),
  416. np.array([0.0], dtype=return_dtype),
  417. )
  418. # above better ideas?
  419. n_iter, n_active = 0, 0
  420. active, indices = list(), np.arange(n_features)
  421. # holds the sign of covariance
  422. sign_active = np.empty(max_features, dtype=np.int8)
  423. drop = False
  424. # will hold the cholesky factorization. Only lower part is
  425. # referenced.
  426. if Gram is None:
  427. L = np.empty((max_features, max_features), dtype=X.dtype)
  428. swap, nrm2 = linalg.get_blas_funcs(("swap", "nrm2"), (X,))
  429. else:
  430. L = np.empty((max_features, max_features), dtype=Gram.dtype)
  431. swap, nrm2 = linalg.get_blas_funcs(("swap", "nrm2"), (Cov,))
  432. (solve_cholesky,) = get_lapack_funcs(("potrs",), (L,))
  433. if verbose:
  434. if verbose > 1:
  435. print("Step\t\tAdded\t\tDropped\t\tActive set size\t\tC")
  436. else:
  437. sys.stdout.write(".")
  438. sys.stdout.flush()
  439. tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning
  440. cov_precision = np.finfo(Cov.dtype).precision
  441. equality_tolerance = np.finfo(np.float32).eps
  442. if Gram is not None:
  443. Gram_copy = Gram.copy()
  444. Cov_copy = Cov.copy()
  445. while True:
  446. if Cov.size:
  447. if positive:
  448. C_idx = np.argmax(Cov)
  449. else:
  450. C_idx = np.argmax(np.abs(Cov))
  451. C_ = Cov[C_idx]
  452. if positive:
  453. C = C_
  454. else:
  455. C = np.fabs(C_)
  456. else:
  457. C = 0.0
  458. if return_path:
  459. alpha = alphas[n_iter, np.newaxis]
  460. coef = coefs[n_iter]
  461. prev_alpha = alphas[n_iter - 1, np.newaxis]
  462. prev_coef = coefs[n_iter - 1]
  463. alpha[0] = C / n_samples
  464. if alpha[0] <= alpha_min + equality_tolerance: # early stopping
  465. if abs(alpha[0] - alpha_min) > equality_tolerance:
  466. # interpolation factor 0 <= ss < 1
  467. if n_iter > 0:
  468. # In the first iteration, all alphas are zero, the formula
  469. # below would make ss a NaN
  470. ss = (prev_alpha[0] - alpha_min) / (prev_alpha[0] - alpha[0])
  471. coef[:] = prev_coef + ss * (coef - prev_coef)
  472. alpha[0] = alpha_min
  473. if return_path:
  474. coefs[n_iter] = coef
  475. break
  476. if n_iter >= max_iter or n_active >= n_features:
  477. break
  478. if not drop:
  479. ##########################################################
  480. # Append x_j to the Cholesky factorization of (Xa * Xa') #
  481. # #
  482. # ( L 0 ) #
  483. # L -> ( ) , where L * w = Xa' x_j #
  484. # ( w z ) and z = ||x_j|| #
  485. # #
  486. ##########################################################
  487. if positive:
  488. sign_active[n_active] = np.ones_like(C_)
  489. else:
  490. sign_active[n_active] = np.sign(C_)
  491. m, n = n_active, C_idx + n_active
  492. Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
  493. indices[n], indices[m] = indices[m], indices[n]
  494. Cov_not_shortened = Cov
  495. Cov = Cov[1:] # remove Cov[0]
  496. if Gram is None:
  497. X.T[n], X.T[m] = swap(X.T[n], X.T[m])
  498. c = nrm2(X.T[n_active]) ** 2
  499. L[n_active, :n_active] = np.dot(X.T[n_active], X.T[:n_active].T)
  500. else:
  501. # swap does only work inplace if matrix is fortran
  502. # contiguous ...
  503. Gram[m], Gram[n] = swap(Gram[m], Gram[n])
  504. Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n])
  505. c = Gram[n_active, n_active]
  506. L[n_active, :n_active] = Gram[n_active, :n_active]
  507. # Update the cholesky decomposition for the Gram matrix
  508. if n_active:
  509. linalg.solve_triangular(
  510. L[:n_active, :n_active],
  511. L[n_active, :n_active],
  512. trans=0,
  513. lower=1,
  514. overwrite_b=True,
  515. **SOLVE_TRIANGULAR_ARGS,
  516. )
  517. v = np.dot(L[n_active, :n_active], L[n_active, :n_active])
  518. diag = max(np.sqrt(np.abs(c - v)), eps)
  519. L[n_active, n_active] = diag
  520. if diag < 1e-7:
  521. # The system is becoming too ill-conditioned.
  522. # We have degenerate vectors in our active set.
  523. # We'll 'drop for good' the last regressor added.
  524. warnings.warn(
  525. "Regressors in active set degenerate. "
  526. "Dropping a regressor, after %i iterations, "
  527. "i.e. alpha=%.3e, "
  528. "with an active set of %i regressors, and "
  529. "the smallest cholesky pivot element being %.3e."
  530. " Reduce max_iter or increase eps parameters."
  531. % (n_iter, alpha.item(), n_active, diag),
  532. ConvergenceWarning,
  533. )
  534. # XXX: need to figure a 'drop for good' way
  535. Cov = Cov_not_shortened
  536. Cov[0] = 0
  537. Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
  538. continue
  539. active.append(indices[n_active])
  540. n_active += 1
  541. if verbose > 1:
  542. print(
  543. "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], "", n_active, C)
  544. )
  545. if method == "lasso" and n_iter > 0 and prev_alpha[0] < alpha[0]:
  546. # alpha is increasing. This is because the updates of Cov are
  547. # bringing in too much numerical error that is greater than
  548. # than the remaining correlation with the
  549. # regressors. Time to bail out
  550. warnings.warn(
  551. "Early stopping the lars path, as the residues "
  552. "are small and the current value of alpha is no "
  553. "longer well controlled. %i iterations, alpha=%.3e, "
  554. "previous alpha=%.3e, with an active set of %i "
  555. "regressors." % (n_iter, alpha.item(), prev_alpha.item(), n_active),
  556. ConvergenceWarning,
  557. )
  558. break
  559. # least squares solution
  560. least_squares, _ = solve_cholesky(
  561. L[:n_active, :n_active], sign_active[:n_active], lower=True
  562. )
  563. if least_squares.size == 1 and least_squares == 0:
  564. # This happens because sign_active[:n_active] = 0
  565. least_squares[...] = 1
  566. AA = 1.0
  567. else:
  568. # is this really needed ?
  569. AA = 1.0 / np.sqrt(np.sum(least_squares * sign_active[:n_active]))
  570. if not np.isfinite(AA):
  571. # L is too ill-conditioned
  572. i = 0
  573. L_ = L[:n_active, :n_active].copy()
  574. while not np.isfinite(AA):
  575. L_.flat[:: n_active + 1] += (2**i) * eps
  576. least_squares, _ = solve_cholesky(
  577. L_, sign_active[:n_active], lower=True
  578. )
  579. tmp = max(np.sum(least_squares * sign_active[:n_active]), eps)
  580. AA = 1.0 / np.sqrt(tmp)
  581. i += 1
  582. least_squares *= AA
  583. if Gram is None:
  584. # equiangular direction of variables in the active set
  585. eq_dir = np.dot(X.T[:n_active].T, least_squares)
  586. # correlation between each unactive variables and
  587. # eqiangular vector
  588. corr_eq_dir = np.dot(X.T[n_active:], eq_dir)
  589. else:
  590. # if huge number of features, this takes 50% of time, I
  591. # think could be avoided if we just update it using an
  592. # orthogonal (QR) decomposition of X
  593. corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares)
  594. # Explicit rounding can be necessary to avoid `np.argmax(Cov)` yielding
  595. # unstable results because of rounding errors.
  596. np.around(corr_eq_dir, decimals=cov_precision, out=corr_eq_dir)
  597. g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny32))
  598. if positive:
  599. gamma_ = min(g1, C / AA)
  600. else:
  601. g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny32))
  602. gamma_ = min(g1, g2, C / AA)
  603. # TODO: better names for these variables: z
  604. drop = False
  605. z = -coef[active] / (least_squares + tiny32)
  606. z_pos = arrayfuncs.min_pos(z)
  607. if z_pos < gamma_:
  608. # some coefficients have changed sign
  609. idx = np.where(z == z_pos)[0][::-1]
  610. # update the sign, important for LAR
  611. sign_active[idx] = -sign_active[idx]
  612. if method == "lasso":
  613. gamma_ = z_pos
  614. drop = True
  615. n_iter += 1
  616. if return_path:
  617. if n_iter >= coefs.shape[0]:
  618. del coef, alpha, prev_alpha, prev_coef
  619. # resize the coefs and alphas array
  620. add_features = 2 * max(1, (max_features - n_active))
  621. coefs = np.resize(coefs, (n_iter + add_features, n_features))
  622. coefs[-add_features:] = 0
  623. alphas = np.resize(alphas, n_iter + add_features)
  624. alphas[-add_features:] = 0
  625. coef = coefs[n_iter]
  626. prev_coef = coefs[n_iter - 1]
  627. else:
  628. # mimic the effect of incrementing n_iter on the array references
  629. prev_coef = coef
  630. prev_alpha[0] = alpha[0]
  631. coef = np.zeros_like(coef)
  632. coef[active] = prev_coef[active] + gamma_ * least_squares
  633. # update correlations
  634. Cov -= gamma_ * corr_eq_dir
  635. # See if any coefficient has changed sign
  636. if drop and method == "lasso":
  637. # handle the case when idx is not length of 1
  638. for ii in idx:
  639. arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii)
  640. n_active -= 1
  641. # handle the case when idx is not length of 1
  642. drop_idx = [active.pop(ii) for ii in idx]
  643. if Gram is None:
  644. # propagate dropped variable
  645. for ii in idx:
  646. for i in range(ii, n_active):
  647. X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1])
  648. # yeah this is stupid
  649. indices[i], indices[i + 1] = indices[i + 1], indices[i]
  650. # TODO: this could be updated
  651. residual = y - np.dot(X[:, :n_active], coef[active])
  652. temp = np.dot(X.T[n_active], residual)
  653. Cov = np.r_[temp, Cov]
  654. else:
  655. for ii in idx:
  656. for i in range(ii, n_active):
  657. indices[i], indices[i + 1] = indices[i + 1], indices[i]
  658. Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1])
  659. Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1])
  660. # Cov_n = Cov_j + x_j * X + increment(betas) TODO:
  661. # will this still work with multiple drops ?
  662. # recompute covariance. Probably could be done better
  663. # wrong as Xy is not swapped with the rest of variables
  664. # TODO: this could be updated
  665. temp = Cov_copy[drop_idx] - np.dot(Gram_copy[drop_idx], coef)
  666. Cov = np.r_[temp, Cov]
  667. sign_active = np.delete(sign_active, idx)
  668. sign_active = np.append(sign_active, 0.0) # just to maintain size
  669. if verbose > 1:
  670. print(
  671. "%s\t\t%s\t\t%s\t\t%s\t\t%s"
  672. % (n_iter, "", drop_idx, n_active, abs(temp))
  673. )
  674. if return_path:
  675. # resize coefs in case of early stop
  676. alphas = alphas[: n_iter + 1]
  677. coefs = coefs[: n_iter + 1]
  678. if return_n_iter:
  679. return alphas, active, coefs.T, n_iter
  680. else:
  681. return alphas, active, coefs.T
  682. else:
  683. if return_n_iter:
  684. return alpha, active, coef, n_iter
  685. else:
  686. return alpha, active, coef
  687. ###############################################################################
  688. # Estimator classes
  689. class Lars(MultiOutputMixin, RegressorMixin, LinearModel):
  690. """Least Angle Regression model a.k.a. LAR.
  691. Read more in the :ref:`User Guide <least_angle_regression>`.
  692. Parameters
  693. ----------
  694. fit_intercept : bool, default=True
  695. Whether to calculate the intercept for this model. If set
  696. to false, no intercept will be used in calculations
  697. (i.e. data is expected to be centered).
  698. verbose : bool or int, default=False
  699. Sets the verbosity amount.
  700. normalize : bool, default=False
  701. This parameter is ignored when ``fit_intercept`` is set to False.
  702. If True, the regressors X will be normalized before regression by
  703. subtracting the mean and dividing by the l2-norm.
  704. If you wish to standardize, please use
  705. :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
  706. on an estimator with ``normalize=False``.
  707. .. versionchanged:: 1.2
  708. default changed from True to False in 1.2.
  709. .. deprecated:: 1.2
  710. ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
  711. precompute : bool, 'auto' or array-like , default='auto'
  712. Whether to use a precomputed Gram matrix to speed up
  713. calculations. If set to ``'auto'`` let us decide. The Gram
  714. matrix can also be passed as argument.
  715. n_nonzero_coefs : int, default=500
  716. Target number of non-zero coefficients. Use ``np.inf`` for no limit.
  717. eps : float, default=np.finfo(float).eps
  718. The machine-precision regularization in the computation of the
  719. Cholesky diagonal factors. Increase this for very ill-conditioned
  720. systems. Unlike the ``tol`` parameter in some iterative
  721. optimization-based algorithms, this parameter does not control
  722. the tolerance of the optimization.
  723. copy_X : bool, default=True
  724. If ``True``, X will be copied; else, it may be overwritten.
  725. fit_path : bool, default=True
  726. If True the full path is stored in the ``coef_path_`` attribute.
  727. If you compute the solution for a large problem or many targets,
  728. setting ``fit_path`` to ``False`` will lead to a speedup, especially
  729. with a small alpha.
  730. jitter : float, default=None
  731. Upper bound on a uniform noise parameter to be added to the
  732. `y` values, to satisfy the model's assumption of
  733. one-at-a-time computations. Might help with stability.
  734. .. versionadded:: 0.23
  735. random_state : int, RandomState instance or None, default=None
  736. Determines random number generation for jittering. Pass an int
  737. for reproducible output across multiple function calls.
  738. See :term:`Glossary <random_state>`. Ignored if `jitter` is None.
  739. .. versionadded:: 0.23
  740. Attributes
  741. ----------
  742. alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
  743. Maximum of covariances (in absolute value) at each iteration.
  744. ``n_alphas`` is either ``max_iter``, ``n_features`` or the
  745. number of nodes in the path with ``alpha >= alpha_min``, whichever
  746. is smaller. If this is a list of array-like, the length of the outer
  747. list is `n_targets`.
  748. active_ : list of shape (n_alphas,) or list of such lists
  749. Indices of active variables at the end of the path.
  750. If this is a list of list, the length of the outer list is `n_targets`.
  751. coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \
  752. of such arrays
  753. The varying values of the coefficients along the path. It is not
  754. present if the ``fit_path`` parameter is ``False``. If this is a list
  755. of array-like, the length of the outer list is `n_targets`.
  756. coef_ : array-like of shape (n_features,) or (n_targets, n_features)
  757. Parameter vector (w in the formulation formula).
  758. intercept_ : float or array-like of shape (n_targets,)
  759. Independent term in decision function.
  760. n_iter_ : array-like or int
  761. The number of iterations taken by lars_path to find the
  762. grid of alphas for each target.
  763. n_features_in_ : int
  764. Number of features seen during :term:`fit`.
  765. .. versionadded:: 0.24
  766. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  767. Names of features seen during :term:`fit`. Defined only when `X`
  768. has feature names that are all strings.
  769. .. versionadded:: 1.0
  770. See Also
  771. --------
  772. lars_path: Compute Least Angle Regression or Lasso
  773. path using LARS algorithm.
  774. LarsCV : Cross-validated Least Angle Regression model.
  775. sklearn.decomposition.sparse_encode : Sparse coding.
  776. Examples
  777. --------
  778. >>> from sklearn import linear_model
  779. >>> reg = linear_model.Lars(n_nonzero_coefs=1)
  780. >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])
  781. Lars(n_nonzero_coefs=1)
  782. >>> print(reg.coef_)
  783. [ 0. -1.11...]
  784. """
  785. _parameter_constraints: dict = {
  786. "fit_intercept": ["boolean"],
  787. "verbose": ["verbose"],
  788. "normalize": ["boolean", Hidden(StrOptions({"deprecated"}))],
  789. "precompute": ["boolean", StrOptions({"auto"}), np.ndarray, Hidden(None)],
  790. "n_nonzero_coefs": [Interval(Integral, 1, None, closed="left")],
  791. "eps": [Interval(Real, 0, None, closed="left")],
  792. "copy_X": ["boolean"],
  793. "fit_path": ["boolean"],
  794. "jitter": [Interval(Real, 0, None, closed="left"), None],
  795. "random_state": ["random_state"],
  796. }
  797. method = "lar"
  798. positive = False
  799. def __init__(
  800. self,
  801. *,
  802. fit_intercept=True,
  803. verbose=False,
  804. normalize="deprecated",
  805. precompute="auto",
  806. n_nonzero_coefs=500,
  807. eps=np.finfo(float).eps,
  808. copy_X=True,
  809. fit_path=True,
  810. jitter=None,
  811. random_state=None,
  812. ):
  813. self.fit_intercept = fit_intercept
  814. self.verbose = verbose
  815. self.normalize = normalize
  816. self.precompute = precompute
  817. self.n_nonzero_coefs = n_nonzero_coefs
  818. self.eps = eps
  819. self.copy_X = copy_X
  820. self.fit_path = fit_path
  821. self.jitter = jitter
  822. self.random_state = random_state
  823. @staticmethod
  824. def _get_gram(precompute, X, y):
  825. if (not hasattr(precompute, "__array__")) and (
  826. (precompute is True)
  827. or (precompute == "auto" and X.shape[0] > X.shape[1])
  828. or (precompute == "auto" and y.shape[1] > 1)
  829. ):
  830. precompute = np.dot(X.T, X)
  831. return precompute
  832. def _fit(self, X, y, max_iter, alpha, fit_path, normalize, Xy=None):
  833. """Auxiliary method to fit the model using X, y as training data"""
  834. n_features = X.shape[1]
  835. X, y, X_offset, y_offset, X_scale = _preprocess_data(
  836. X, y, self.fit_intercept, normalize, self.copy_X
  837. )
  838. if y.ndim == 1:
  839. y = y[:, np.newaxis]
  840. n_targets = y.shape[1]
  841. Gram = self._get_gram(self.precompute, X, y)
  842. self.alphas_ = []
  843. self.n_iter_ = []
  844. self.coef_ = np.empty((n_targets, n_features), dtype=X.dtype)
  845. if fit_path:
  846. self.active_ = []
  847. self.coef_path_ = []
  848. for k in range(n_targets):
  849. this_Xy = None if Xy is None else Xy[:, k]
  850. alphas, active, coef_path, n_iter_ = lars_path(
  851. X,
  852. y[:, k],
  853. Gram=Gram,
  854. Xy=this_Xy,
  855. copy_X=self.copy_X,
  856. copy_Gram=True,
  857. alpha_min=alpha,
  858. method=self.method,
  859. verbose=max(0, self.verbose - 1),
  860. max_iter=max_iter,
  861. eps=self.eps,
  862. return_path=True,
  863. return_n_iter=True,
  864. positive=self.positive,
  865. )
  866. self.alphas_.append(alphas)
  867. self.active_.append(active)
  868. self.n_iter_.append(n_iter_)
  869. self.coef_path_.append(coef_path)
  870. self.coef_[k] = coef_path[:, -1]
  871. if n_targets == 1:
  872. self.alphas_, self.active_, self.coef_path_, self.coef_ = [
  873. a[0]
  874. for a in (self.alphas_, self.active_, self.coef_path_, self.coef_)
  875. ]
  876. self.n_iter_ = self.n_iter_[0]
  877. else:
  878. for k in range(n_targets):
  879. this_Xy = None if Xy is None else Xy[:, k]
  880. alphas, _, self.coef_[k], n_iter_ = lars_path(
  881. X,
  882. y[:, k],
  883. Gram=Gram,
  884. Xy=this_Xy,
  885. copy_X=self.copy_X,
  886. copy_Gram=True,
  887. alpha_min=alpha,
  888. method=self.method,
  889. verbose=max(0, self.verbose - 1),
  890. max_iter=max_iter,
  891. eps=self.eps,
  892. return_path=False,
  893. return_n_iter=True,
  894. positive=self.positive,
  895. )
  896. self.alphas_.append(alphas)
  897. self.n_iter_.append(n_iter_)
  898. if n_targets == 1:
  899. self.alphas_ = self.alphas_[0]
  900. self.n_iter_ = self.n_iter_[0]
  901. self._set_intercept(X_offset, y_offset, X_scale)
  902. return self
  903. @_fit_context(prefer_skip_nested_validation=True)
  904. def fit(self, X, y, Xy=None):
  905. """Fit the model using X, y as training data.
  906. Parameters
  907. ----------
  908. X : array-like of shape (n_samples, n_features)
  909. Training data.
  910. y : array-like of shape (n_samples,) or (n_samples, n_targets)
  911. Target values.
  912. Xy : array-like of shape (n_features,) or (n_features, n_targets), \
  913. default=None
  914. Xy = np.dot(X.T, y) that can be precomputed. It is useful
  915. only when the Gram matrix is precomputed.
  916. Returns
  917. -------
  918. self : object
  919. Returns an instance of self.
  920. """
  921. X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)
  922. _normalize = _deprecate_normalize(
  923. self.normalize, estimator_name=self.__class__.__name__
  924. )
  925. alpha = getattr(self, "alpha", 0.0)
  926. if hasattr(self, "n_nonzero_coefs"):
  927. alpha = 0.0 # n_nonzero_coefs parametrization takes priority
  928. max_iter = self.n_nonzero_coefs
  929. else:
  930. max_iter = self.max_iter
  931. if self.jitter is not None:
  932. rng = check_random_state(self.random_state)
  933. noise = rng.uniform(high=self.jitter, size=len(y))
  934. y = y + noise
  935. self._fit(
  936. X,
  937. y,
  938. max_iter=max_iter,
  939. alpha=alpha,
  940. fit_path=self.fit_path,
  941. normalize=_normalize,
  942. Xy=Xy,
  943. )
  944. return self
  945. class LassoLars(Lars):
  946. """Lasso model fit with Least Angle Regression a.k.a. Lars.
  947. It is a Linear Model trained with an L1 prior as regularizer.
  948. The optimization objective for Lasso is::
  949. (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
  950. Read more in the :ref:`User Guide <least_angle_regression>`.
  951. Parameters
  952. ----------
  953. alpha : float, default=1.0
  954. Constant that multiplies the penalty term. Defaults to 1.0.
  955. ``alpha = 0`` is equivalent to an ordinary least square, solved
  956. by :class:`LinearRegression`. For numerical reasons, using
  957. ``alpha = 0`` with the LassoLars object is not advised and you
  958. should prefer the LinearRegression object.
  959. fit_intercept : bool, default=True
  960. Whether to calculate the intercept for this model. If set
  961. to false, no intercept will be used in calculations
  962. (i.e. data is expected to be centered).
  963. verbose : bool or int, default=False
  964. Sets the verbosity amount.
  965. normalize : bool, default=False
  966. This parameter is ignored when ``fit_intercept`` is set to False.
  967. If True, the regressors X will be normalized before regression by
  968. subtracting the mean and dividing by the l2-norm.
  969. If you wish to standardize, please use
  970. :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
  971. on an estimator with ``normalize=False``.
  972. .. versionchanged:: 1.2
  973. default changed from True to False in 1.2.
  974. .. deprecated:: 1.2
  975. ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
  976. precompute : bool, 'auto' or array-like, default='auto'
  977. Whether to use a precomputed Gram matrix to speed up
  978. calculations. If set to ``'auto'`` let us decide. The Gram
  979. matrix can also be passed as argument.
  980. max_iter : int, default=500
  981. Maximum number of iterations to perform.
  982. eps : float, default=np.finfo(float).eps
  983. The machine-precision regularization in the computation of the
  984. Cholesky diagonal factors. Increase this for very ill-conditioned
  985. systems. Unlike the ``tol`` parameter in some iterative
  986. optimization-based algorithms, this parameter does not control
  987. the tolerance of the optimization.
  988. copy_X : bool, default=True
  989. If True, X will be copied; else, it may be overwritten.
  990. fit_path : bool, default=True
  991. If ``True`` the full path is stored in the ``coef_path_`` attribute.
  992. If you compute the solution for a large problem or many targets,
  993. setting ``fit_path`` to ``False`` will lead to a speedup, especially
  994. with a small alpha.
  995. positive : bool, default=False
  996. Restrict coefficients to be >= 0. Be aware that you might want to
  997. remove fit_intercept which is set True by default.
  998. Under the positive restriction the model coefficients will not converge
  999. to the ordinary-least-squares solution for small values of alpha.
  1000. Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
  1001. 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
  1002. algorithm are typically in congruence with the solution of the
  1003. coordinate descent Lasso estimator.
  1004. jitter : float, default=None
  1005. Upper bound on a uniform noise parameter to be added to the
  1006. `y` values, to satisfy the model's assumption of
  1007. one-at-a-time computations. Might help with stability.
  1008. .. versionadded:: 0.23
  1009. random_state : int, RandomState instance or None, default=None
  1010. Determines random number generation for jittering. Pass an int
  1011. for reproducible output across multiple function calls.
  1012. See :term:`Glossary <random_state>`. Ignored if `jitter` is None.
  1013. .. versionadded:: 0.23
  1014. Attributes
  1015. ----------
  1016. alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
  1017. Maximum of covariances (in absolute value) at each iteration.
  1018. ``n_alphas`` is either ``max_iter``, ``n_features`` or the
  1019. number of nodes in the path with ``alpha >= alpha_min``, whichever
  1020. is smaller. If this is a list of array-like, the length of the outer
  1021. list is `n_targets`.
  1022. active_ : list of length n_alphas or list of such lists
  1023. Indices of active variables at the end of the path.
  1024. If this is a list of list, the length of the outer list is `n_targets`.
  1025. coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \
  1026. of such arrays
  1027. If a list is passed it's expected to be one of n_targets such arrays.
  1028. The varying values of the coefficients along the path. It is not
  1029. present if the ``fit_path`` parameter is ``False``. If this is a list
  1030. of array-like, the length of the outer list is `n_targets`.
  1031. coef_ : array-like of shape (n_features,) or (n_targets, n_features)
  1032. Parameter vector (w in the formulation formula).
  1033. intercept_ : float or array-like of shape (n_targets,)
  1034. Independent term in decision function.
  1035. n_iter_ : array-like or int
  1036. The number of iterations taken by lars_path to find the
  1037. grid of alphas for each target.
  1038. n_features_in_ : int
  1039. Number of features seen during :term:`fit`.
  1040. .. versionadded:: 0.24
  1041. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  1042. Names of features seen during :term:`fit`. Defined only when `X`
  1043. has feature names that are all strings.
  1044. .. versionadded:: 1.0
  1045. See Also
  1046. --------
  1047. lars_path : Compute Least Angle Regression or Lasso
  1048. path using LARS algorithm.
  1049. lasso_path : Compute Lasso path with coordinate descent.
  1050. Lasso : Linear Model trained with L1 prior as
  1051. regularizer (aka the Lasso).
  1052. LassoCV : Lasso linear model with iterative fitting
  1053. along a regularization path.
  1054. LassoLarsCV: Cross-validated Lasso, using the LARS algorithm.
  1055. LassoLarsIC : Lasso model fit with Lars using BIC
  1056. or AIC for model selection.
  1057. sklearn.decomposition.sparse_encode : Sparse coding.
  1058. Examples
  1059. --------
  1060. >>> from sklearn import linear_model
  1061. >>> reg = linear_model.LassoLars(alpha=0.01)
  1062. >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])
  1063. LassoLars(alpha=0.01)
  1064. >>> print(reg.coef_)
  1065. [ 0. -0.955...]
  1066. """
  1067. _parameter_constraints: dict = {
  1068. **Lars._parameter_constraints,
  1069. "alpha": [Interval(Real, 0, None, closed="left")],
  1070. "max_iter": [Interval(Integral, 0, None, closed="left")],
  1071. "positive": ["boolean"],
  1072. }
  1073. _parameter_constraints.pop("n_nonzero_coefs")
  1074. method = "lasso"
  1075. def __init__(
  1076. self,
  1077. alpha=1.0,
  1078. *,
  1079. fit_intercept=True,
  1080. verbose=False,
  1081. normalize="deprecated",
  1082. precompute="auto",
  1083. max_iter=500,
  1084. eps=np.finfo(float).eps,
  1085. copy_X=True,
  1086. fit_path=True,
  1087. positive=False,
  1088. jitter=None,
  1089. random_state=None,
  1090. ):
  1091. self.alpha = alpha
  1092. self.fit_intercept = fit_intercept
  1093. self.max_iter = max_iter
  1094. self.verbose = verbose
  1095. self.normalize = normalize
  1096. self.positive = positive
  1097. self.precompute = precompute
  1098. self.copy_X = copy_X
  1099. self.eps = eps
  1100. self.fit_path = fit_path
  1101. self.jitter = jitter
  1102. self.random_state = random_state
  1103. ###############################################################################
  1104. # Cross-validated estimator classes
  1105. def _check_copy_and_writeable(array, copy=False):
  1106. if copy or not array.flags.writeable:
  1107. return array.copy()
  1108. return array
  1109. def _lars_path_residues(
  1110. X_train,
  1111. y_train,
  1112. X_test,
  1113. y_test,
  1114. Gram=None,
  1115. copy=True,
  1116. method="lars",
  1117. verbose=False,
  1118. fit_intercept=True,
  1119. normalize=False,
  1120. max_iter=500,
  1121. eps=np.finfo(float).eps,
  1122. positive=False,
  1123. ):
  1124. """Compute the residues on left-out data for a full LARS path
  1125. Parameters
  1126. -----------
  1127. X_train : array-like of shape (n_samples, n_features)
  1128. The data to fit the LARS on
  1129. y_train : array-like of shape (n_samples,)
  1130. The target variable to fit LARS on
  1131. X_test : array-like of shape (n_samples, n_features)
  1132. The data to compute the residues on
  1133. y_test : array-like of shape (n_samples,)
  1134. The target variable to compute the residues on
  1135. Gram : None, 'auto' or array-like of shape (n_features, n_features), \
  1136. default=None
  1137. Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram
  1138. matrix is precomputed from the given X, if there are more samples
  1139. than features
  1140. copy : bool, default=True
  1141. Whether X_train, X_test, y_train and y_test should be copied;
  1142. if False, they may be overwritten.
  1143. method : {'lar' , 'lasso'}, default='lar'
  1144. Specifies the returned model. Select ``'lar'`` for Least Angle
  1145. Regression, ``'lasso'`` for the Lasso.
  1146. verbose : bool or int, default=False
  1147. Sets the amount of verbosity
  1148. fit_intercept : bool, default=True
  1149. whether to calculate the intercept for this model. If set
  1150. to false, no intercept will be used in calculations
  1151. (i.e. data is expected to be centered).
  1152. positive : bool, default=False
  1153. Restrict coefficients to be >= 0. Be aware that you might want to
  1154. remove fit_intercept which is set True by default.
  1155. See reservations for using this option in combination with method
  1156. 'lasso' for expected small values of alpha in the doc of LassoLarsCV
  1157. and LassoLarsIC.
  1158. normalize : bool, default=False
  1159. This parameter is ignored when ``fit_intercept`` is set to False.
  1160. If True, the regressors X will be normalized before regression by
  1161. subtracting the mean and dividing by the l2-norm.
  1162. If you wish to standardize, please use
  1163. :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
  1164. on an estimator with ``normalize=False``.
  1165. .. versionchanged:: 1.2
  1166. default changed from True to False in 1.2.
  1167. .. deprecated:: 1.2
  1168. ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
  1169. max_iter : int, default=500
  1170. Maximum number of iterations to perform.
  1171. eps : float, default=np.finfo(float).eps
  1172. The machine-precision regularization in the computation of the
  1173. Cholesky diagonal factors. Increase this for very ill-conditioned
  1174. systems. Unlike the ``tol`` parameter in some iterative
  1175. optimization-based algorithms, this parameter does not control
  1176. the tolerance of the optimization.
  1177. Returns
  1178. --------
  1179. alphas : array-like of shape (n_alphas,)
  1180. Maximum of covariances (in absolute value) at each iteration.
  1181. ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever
  1182. is smaller.
  1183. active : list
  1184. Indices of active variables at the end of the path.
  1185. coefs : array-like of shape (n_features, n_alphas)
  1186. Coefficients along the path
  1187. residues : array-like of shape (n_alphas, n_samples)
  1188. Residues of the prediction on the test data
  1189. """
  1190. X_train = _check_copy_and_writeable(X_train, copy)
  1191. y_train = _check_copy_and_writeable(y_train, copy)
  1192. X_test = _check_copy_and_writeable(X_test, copy)
  1193. y_test = _check_copy_and_writeable(y_test, copy)
  1194. if fit_intercept:
  1195. X_mean = X_train.mean(axis=0)
  1196. X_train -= X_mean
  1197. X_test -= X_mean
  1198. y_mean = y_train.mean(axis=0)
  1199. y_train = as_float_array(y_train, copy=False)
  1200. y_train -= y_mean
  1201. y_test = as_float_array(y_test, copy=False)
  1202. y_test -= y_mean
  1203. if normalize:
  1204. norms = np.sqrt(np.sum(X_train**2, axis=0))
  1205. nonzeros = np.flatnonzero(norms)
  1206. X_train[:, nonzeros] /= norms[nonzeros]
  1207. alphas, active, coefs = lars_path(
  1208. X_train,
  1209. y_train,
  1210. Gram=Gram,
  1211. copy_X=False,
  1212. copy_Gram=False,
  1213. method=method,
  1214. verbose=max(0, verbose - 1),
  1215. max_iter=max_iter,
  1216. eps=eps,
  1217. positive=positive,
  1218. )
  1219. if normalize:
  1220. coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]
  1221. residues = np.dot(X_test, coefs) - y_test[:, np.newaxis]
  1222. return alphas, active, coefs, residues.T
  1223. class LarsCV(Lars):
  1224. """Cross-validated Least Angle Regression model.
  1225. See glossary entry for :term:`cross-validation estimator`.
  1226. Read more in the :ref:`User Guide <least_angle_regression>`.
  1227. Parameters
  1228. ----------
  1229. fit_intercept : bool, default=True
  1230. Whether to calculate the intercept for this model. If set
  1231. to false, no intercept will be used in calculations
  1232. (i.e. data is expected to be centered).
  1233. verbose : bool or int, default=False
  1234. Sets the verbosity amount.
  1235. max_iter : int, default=500
  1236. Maximum number of iterations to perform.
  1237. normalize : bool, default=False
  1238. This parameter is ignored when ``fit_intercept`` is set to False.
  1239. If True, the regressors X will be normalized before regression by
  1240. subtracting the mean and dividing by the l2-norm.
  1241. If you wish to standardize, please use
  1242. :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
  1243. on an estimator with ``normalize=False``.
  1244. .. versionchanged:: 1.2
  1245. default changed from True to False in 1.2.
  1246. .. deprecated:: 1.2
  1247. ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
  1248. precompute : bool, 'auto' or array-like , default='auto'
  1249. Whether to use a precomputed Gram matrix to speed up
  1250. calculations. If set to ``'auto'`` let us decide. The Gram matrix
  1251. cannot be passed as argument since we will use only subsets of X.
  1252. cv : int, cross-validation generator or an iterable, default=None
  1253. Determines the cross-validation splitting strategy.
  1254. Possible inputs for cv are:
  1255. - None, to use the default 5-fold cross-validation,
  1256. - integer, to specify the number of folds.
  1257. - :term:`CV splitter`,
  1258. - An iterable yielding (train, test) splits as arrays of indices.
  1259. For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
  1260. Refer :ref:`User Guide <cross_validation>` for the various
  1261. cross-validation strategies that can be used here.
  1262. .. versionchanged:: 0.22
  1263. ``cv`` default value if None changed from 3-fold to 5-fold.
  1264. max_n_alphas : int, default=1000
  1265. The maximum number of points on the path used to compute the
  1266. residuals in the cross-validation.
  1267. n_jobs : int or None, default=None
  1268. Number of CPUs to use during the cross validation.
  1269. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
  1270. ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
  1271. for more details.
  1272. eps : float, default=np.finfo(float).eps
  1273. The machine-precision regularization in the computation of the
  1274. Cholesky diagonal factors. Increase this for very ill-conditioned
  1275. systems. Unlike the ``tol`` parameter in some iterative
  1276. optimization-based algorithms, this parameter does not control
  1277. the tolerance of the optimization.
  1278. copy_X : bool, default=True
  1279. If ``True``, X will be copied; else, it may be overwritten.
  1280. Attributes
  1281. ----------
  1282. active_ : list of length n_alphas or list of such lists
  1283. Indices of active variables at the end of the path.
  1284. If this is a list of lists, the outer list length is `n_targets`.
  1285. coef_ : array-like of shape (n_features,)
  1286. parameter vector (w in the formulation formula)
  1287. intercept_ : float
  1288. independent term in decision function
  1289. coef_path_ : array-like of shape (n_features, n_alphas)
  1290. the varying values of the coefficients along the path
  1291. alpha_ : float
  1292. the estimated regularization parameter alpha
  1293. alphas_ : array-like of shape (n_alphas,)
  1294. the different values of alpha along the path
  1295. cv_alphas_ : array-like of shape (n_cv_alphas,)
  1296. all the values of alpha along the path for the different folds
  1297. mse_path_ : array-like of shape (n_folds, n_cv_alphas)
  1298. the mean square error on left-out for each fold along the path
  1299. (alpha values given by ``cv_alphas``)
  1300. n_iter_ : array-like or int
  1301. the number of iterations run by Lars with the optimal alpha.
  1302. n_features_in_ : int
  1303. Number of features seen during :term:`fit`.
  1304. .. versionadded:: 0.24
  1305. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  1306. Names of features seen during :term:`fit`. Defined only when `X`
  1307. has feature names that are all strings.
  1308. .. versionadded:: 1.0
  1309. See Also
  1310. --------
  1311. lars_path : Compute Least Angle Regression or Lasso
  1312. path using LARS algorithm.
  1313. lasso_path : Compute Lasso path with coordinate descent.
  1314. Lasso : Linear Model trained with L1 prior as
  1315. regularizer (aka the Lasso).
  1316. LassoCV : Lasso linear model with iterative fitting
  1317. along a regularization path.
  1318. LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
  1319. LassoLarsIC : Lasso model fit with Lars using BIC
  1320. or AIC for model selection.
  1321. sklearn.decomposition.sparse_encode : Sparse coding.
  1322. Notes
  1323. -----
  1324. In `fit`, once the best parameter `alpha` is found through
  1325. cross-validation, the model is fit again using the entire training set.
  1326. Examples
  1327. --------
  1328. >>> from sklearn.linear_model import LarsCV
  1329. >>> from sklearn.datasets import make_regression
  1330. >>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
  1331. >>> reg = LarsCV(cv=5).fit(X, y)
  1332. >>> reg.score(X, y)
  1333. 0.9996...
  1334. >>> reg.alpha_
  1335. 0.2961...
  1336. >>> reg.predict(X[:1,])
  1337. array([154.3996...])
  1338. """
  1339. _parameter_constraints: dict = {
  1340. **Lars._parameter_constraints,
  1341. "max_iter": [Interval(Integral, 0, None, closed="left")],
  1342. "cv": ["cv_object"],
  1343. "max_n_alphas": [Interval(Integral, 1, None, closed="left")],
  1344. "n_jobs": [Integral, None],
  1345. }
  1346. for parameter in ["n_nonzero_coefs", "jitter", "fit_path", "random_state"]:
  1347. _parameter_constraints.pop(parameter)
  1348. method = "lar"
  1349. def __init__(
  1350. self,
  1351. *,
  1352. fit_intercept=True,
  1353. verbose=False,
  1354. max_iter=500,
  1355. normalize="deprecated",
  1356. precompute="auto",
  1357. cv=None,
  1358. max_n_alphas=1000,
  1359. n_jobs=None,
  1360. eps=np.finfo(float).eps,
  1361. copy_X=True,
  1362. ):
  1363. self.max_iter = max_iter
  1364. self.cv = cv
  1365. self.max_n_alphas = max_n_alphas
  1366. self.n_jobs = n_jobs
  1367. super().__init__(
  1368. fit_intercept=fit_intercept,
  1369. verbose=verbose,
  1370. normalize=normalize,
  1371. precompute=precompute,
  1372. n_nonzero_coefs=500,
  1373. eps=eps,
  1374. copy_X=copy_X,
  1375. fit_path=True,
  1376. )
  1377. def _more_tags(self):
  1378. return {"multioutput": False}
  1379. @_fit_context(prefer_skip_nested_validation=True)
  1380. def fit(self, X, y):
  1381. """Fit the model using X, y as training data.
  1382. Parameters
  1383. ----------
  1384. X : array-like of shape (n_samples, n_features)
  1385. Training data.
  1386. y : array-like of shape (n_samples,)
  1387. Target values.
  1388. Returns
  1389. -------
  1390. self : object
  1391. Returns an instance of self.
  1392. """
  1393. _normalize = _deprecate_normalize(
  1394. self.normalize, estimator_name=self.__class__.__name__
  1395. )
  1396. X, y = self._validate_data(X, y, y_numeric=True)
  1397. X = as_float_array(X, copy=self.copy_X)
  1398. y = as_float_array(y, copy=self.copy_X)
  1399. # init cross-validation generator
  1400. cv = check_cv(self.cv, classifier=False)
  1401. # As we use cross-validation, the Gram matrix is not precomputed here
  1402. Gram = self.precompute
  1403. if hasattr(Gram, "__array__"):
  1404. warnings.warn(
  1405. 'Parameter "precompute" cannot be an array in '
  1406. '%s. Automatically switch to "auto" instead.'
  1407. % self.__class__.__name__
  1408. )
  1409. Gram = "auto"
  1410. cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
  1411. delayed(_lars_path_residues)(
  1412. X[train],
  1413. y[train],
  1414. X[test],
  1415. y[test],
  1416. Gram=Gram,
  1417. copy=False,
  1418. method=self.method,
  1419. verbose=max(0, self.verbose - 1),
  1420. normalize=_normalize,
  1421. fit_intercept=self.fit_intercept,
  1422. max_iter=self.max_iter,
  1423. eps=self.eps,
  1424. positive=self.positive,
  1425. )
  1426. for train, test in cv.split(X, y)
  1427. )
  1428. all_alphas = np.concatenate(list(zip(*cv_paths))[0])
  1429. # Unique also sorts
  1430. all_alphas = np.unique(all_alphas)
  1431. # Take at most max_n_alphas values
  1432. stride = int(max(1, int(len(all_alphas) / float(self.max_n_alphas))))
  1433. all_alphas = all_alphas[::stride]
  1434. mse_path = np.empty((len(all_alphas), len(cv_paths)))
  1435. for index, (alphas, _, _, residues) in enumerate(cv_paths):
  1436. alphas = alphas[::-1]
  1437. residues = residues[::-1]
  1438. if alphas[0] != 0:
  1439. alphas = np.r_[0, alphas]
  1440. residues = np.r_[residues[0, np.newaxis], residues]
  1441. if alphas[-1] != all_alphas[-1]:
  1442. alphas = np.r_[alphas, all_alphas[-1]]
  1443. residues = np.r_[residues, residues[-1, np.newaxis]]
  1444. this_residues = interpolate.interp1d(alphas, residues, axis=0)(all_alphas)
  1445. this_residues **= 2
  1446. mse_path[:, index] = np.mean(this_residues, axis=-1)
  1447. mask = np.all(np.isfinite(mse_path), axis=-1)
  1448. all_alphas = all_alphas[mask]
  1449. mse_path = mse_path[mask]
  1450. # Select the alpha that minimizes left-out error
  1451. i_best_alpha = np.argmin(mse_path.mean(axis=-1))
  1452. best_alpha = all_alphas[i_best_alpha]
  1453. # Store our parameters
  1454. self.alpha_ = best_alpha
  1455. self.cv_alphas_ = all_alphas
  1456. self.mse_path_ = mse_path
  1457. # Now compute the full model using best_alpha
  1458. # it will call a lasso internally when self if LassoLarsCV
  1459. # as self.method == 'lasso'
  1460. self._fit(
  1461. X,
  1462. y,
  1463. max_iter=self.max_iter,
  1464. alpha=best_alpha,
  1465. Xy=None,
  1466. fit_path=True,
  1467. normalize=_normalize,
  1468. )
  1469. return self
  1470. class LassoLarsCV(LarsCV):
  1471. """Cross-validated Lasso, using the LARS algorithm.
  1472. See glossary entry for :term:`cross-validation estimator`.
  1473. The optimization objective for Lasso is::
  1474. (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
  1475. Read more in the :ref:`User Guide <least_angle_regression>`.
  1476. Parameters
  1477. ----------
  1478. fit_intercept : bool, default=True
  1479. Whether to calculate the intercept for this model. If set
  1480. to false, no intercept will be used in calculations
  1481. (i.e. data is expected to be centered).
  1482. verbose : bool or int, default=False
  1483. Sets the verbosity amount.
  1484. max_iter : int, default=500
  1485. Maximum number of iterations to perform.
  1486. normalize : bool, default=False
  1487. This parameter is ignored when ``fit_intercept`` is set to False.
  1488. If True, the regressors X will be normalized before regression by
  1489. subtracting the mean and dividing by the l2-norm.
  1490. If you wish to standardize, please use
  1491. :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
  1492. on an estimator with ``normalize=False``.
  1493. .. versionchanged:: 1.2
  1494. default changed from True to False in 1.2.
  1495. .. deprecated:: 1.2
  1496. ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
  1497. precompute : bool or 'auto' , default='auto'
  1498. Whether to use a precomputed Gram matrix to speed up
  1499. calculations. If set to ``'auto'`` let us decide. The Gram matrix
  1500. cannot be passed as argument since we will use only subsets of X.
  1501. cv : int, cross-validation generator or an iterable, default=None
  1502. Determines the cross-validation splitting strategy.
  1503. Possible inputs for cv are:
  1504. - None, to use the default 5-fold cross-validation,
  1505. - integer, to specify the number of folds.
  1506. - :term:`CV splitter`,
  1507. - An iterable yielding (train, test) splits as arrays of indices.
  1508. For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
  1509. Refer :ref:`User Guide <cross_validation>` for the various
  1510. cross-validation strategies that can be used here.
  1511. .. versionchanged:: 0.22
  1512. ``cv`` default value if None changed from 3-fold to 5-fold.
  1513. max_n_alphas : int, default=1000
  1514. The maximum number of points on the path used to compute the
  1515. residuals in the cross-validation.
  1516. n_jobs : int or None, default=None
  1517. Number of CPUs to use during the cross validation.
  1518. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
  1519. ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
  1520. for more details.
  1521. eps : float, default=np.finfo(float).eps
  1522. The machine-precision regularization in the computation of the
  1523. Cholesky diagonal factors. Increase this for very ill-conditioned
  1524. systems. Unlike the ``tol`` parameter in some iterative
  1525. optimization-based algorithms, this parameter does not control
  1526. the tolerance of the optimization.
  1527. copy_X : bool, default=True
  1528. If True, X will be copied; else, it may be overwritten.
  1529. positive : bool, default=False
  1530. Restrict coefficients to be >= 0. Be aware that you might want to
  1531. remove fit_intercept which is set True by default.
  1532. Under the positive restriction the model coefficients do not converge
  1533. to the ordinary-least-squares solution for small values of alpha.
  1534. Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
  1535. 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
  1536. algorithm are typically in congruence with the solution of the
  1537. coordinate descent Lasso estimator.
  1538. As a consequence using LassoLarsCV only makes sense for problems where
  1539. a sparse solution is expected and/or reached.
  1540. Attributes
  1541. ----------
  1542. coef_ : array-like of shape (n_features,)
  1543. parameter vector (w in the formulation formula)
  1544. intercept_ : float
  1545. independent term in decision function.
  1546. coef_path_ : array-like of shape (n_features, n_alphas)
  1547. the varying values of the coefficients along the path
  1548. alpha_ : float
  1549. the estimated regularization parameter alpha
  1550. alphas_ : array-like of shape (n_alphas,)
  1551. the different values of alpha along the path
  1552. cv_alphas_ : array-like of shape (n_cv_alphas,)
  1553. all the values of alpha along the path for the different folds
  1554. mse_path_ : array-like of shape (n_folds, n_cv_alphas)
  1555. the mean square error on left-out for each fold along the path
  1556. (alpha values given by ``cv_alphas``)
  1557. n_iter_ : array-like or int
  1558. the number of iterations run by Lars with the optimal alpha.
  1559. active_ : list of int
  1560. Indices of active variables at the end of the path.
  1561. n_features_in_ : int
  1562. Number of features seen during :term:`fit`.
  1563. .. versionadded:: 0.24
  1564. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  1565. Names of features seen during :term:`fit`. Defined only when `X`
  1566. has feature names that are all strings.
  1567. .. versionadded:: 1.0
  1568. See Also
  1569. --------
  1570. lars_path : Compute Least Angle Regression or Lasso
  1571. path using LARS algorithm.
  1572. lasso_path : Compute Lasso path with coordinate descent.
  1573. Lasso : Linear Model trained with L1 prior as
  1574. regularizer (aka the Lasso).
  1575. LassoCV : Lasso linear model with iterative fitting
  1576. along a regularization path.
  1577. LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
  1578. LassoLarsIC : Lasso model fit with Lars using BIC
  1579. or AIC for model selection.
  1580. sklearn.decomposition.sparse_encode : Sparse coding.
  1581. Notes
  1582. -----
  1583. The object solves the same problem as the
  1584. :class:`~sklearn.linear_model.LassoCV` object. However, unlike the
  1585. :class:`~sklearn.linear_model.LassoCV`, it find the relevant alphas values
  1586. by itself. In general, because of this property, it will be more stable.
  1587. However, it is more fragile to heavily multicollinear datasets.
  1588. It is more efficient than the :class:`~sklearn.linear_model.LassoCV` if
  1589. only a small number of features are selected compared to the total number,
  1590. for instance if there are very few samples compared to the number of
  1591. features.
  1592. In `fit`, once the best parameter `alpha` is found through
  1593. cross-validation, the model is fit again using the entire training set.
  1594. Examples
  1595. --------
  1596. >>> from sklearn.linear_model import LassoLarsCV
  1597. >>> from sklearn.datasets import make_regression
  1598. >>> X, y = make_regression(noise=4.0, random_state=0)
  1599. >>> reg = LassoLarsCV(cv=5).fit(X, y)
  1600. >>> reg.score(X, y)
  1601. 0.9993...
  1602. >>> reg.alpha_
  1603. 0.3972...
  1604. >>> reg.predict(X[:1,])
  1605. array([-78.4831...])
  1606. """
  1607. _parameter_constraints = {
  1608. **LarsCV._parameter_constraints,
  1609. "positive": ["boolean"],
  1610. }
  1611. method = "lasso"
  1612. def __init__(
  1613. self,
  1614. *,
  1615. fit_intercept=True,
  1616. verbose=False,
  1617. max_iter=500,
  1618. normalize="deprecated",
  1619. precompute="auto",
  1620. cv=None,
  1621. max_n_alphas=1000,
  1622. n_jobs=None,
  1623. eps=np.finfo(float).eps,
  1624. copy_X=True,
  1625. positive=False,
  1626. ):
  1627. self.fit_intercept = fit_intercept
  1628. self.verbose = verbose
  1629. self.max_iter = max_iter
  1630. self.normalize = normalize
  1631. self.precompute = precompute
  1632. self.cv = cv
  1633. self.max_n_alphas = max_n_alphas
  1634. self.n_jobs = n_jobs
  1635. self.eps = eps
  1636. self.copy_X = copy_X
  1637. self.positive = positive
  1638. # XXX : we don't use super().__init__
  1639. # to avoid setting n_nonzero_coefs
  1640. class LassoLarsIC(LassoLars):
  1641. """Lasso model fit with Lars using BIC or AIC for model selection.
  1642. The optimization objective for Lasso is::
  1643. (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
  1644. AIC is the Akaike information criterion [2]_ and BIC is the Bayes
  1645. Information criterion [3]_. Such criteria are useful to select the value
  1646. of the regularization parameter by making a trade-off between the
  1647. goodness of fit and the complexity of the model. A good model should
  1648. explain well the data while being simple.
  1649. Read more in the :ref:`User Guide <lasso_lars_ic>`.
  1650. Parameters
  1651. ----------
  1652. criterion : {'aic', 'bic'}, default='aic'
  1653. The type of criterion to use.
  1654. fit_intercept : bool, default=True
  1655. Whether to calculate the intercept for this model. If set
  1656. to false, no intercept will be used in calculations
  1657. (i.e. data is expected to be centered).
  1658. verbose : bool or int, default=False
  1659. Sets the verbosity amount.
  1660. normalize : bool, default=False
  1661. This parameter is ignored when ``fit_intercept`` is set to False.
  1662. If True, the regressors X will be normalized before regression by
  1663. subtracting the mean and dividing by the l2-norm.
  1664. If you wish to standardize, please use
  1665. :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
  1666. on an estimator with ``normalize=False``.
  1667. .. versionchanged:: 1.2
  1668. default changed from True to False in 1.2.
  1669. .. deprecated:: 1.2
  1670. ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
  1671. precompute : bool, 'auto' or array-like, default='auto'
  1672. Whether to use a precomputed Gram matrix to speed up
  1673. calculations. If set to ``'auto'`` let us decide. The Gram
  1674. matrix can also be passed as argument.
  1675. max_iter : int, default=500
  1676. Maximum number of iterations to perform. Can be used for
  1677. early stopping.
  1678. eps : float, default=np.finfo(float).eps
  1679. The machine-precision regularization in the computation of the
  1680. Cholesky diagonal factors. Increase this for very ill-conditioned
  1681. systems. Unlike the ``tol`` parameter in some iterative
  1682. optimization-based algorithms, this parameter does not control
  1683. the tolerance of the optimization.
  1684. copy_X : bool, default=True
  1685. If True, X will be copied; else, it may be overwritten.
  1686. positive : bool, default=False
  1687. Restrict coefficients to be >= 0. Be aware that you might want to
  1688. remove fit_intercept which is set True by default.
  1689. Under the positive restriction the model coefficients do not converge
  1690. to the ordinary-least-squares solution for small values of alpha.
  1691. Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
  1692. 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
  1693. algorithm are typically in congruence with the solution of the
  1694. coordinate descent Lasso estimator.
  1695. As a consequence using LassoLarsIC only makes sense for problems where
  1696. a sparse solution is expected and/or reached.
  1697. noise_variance : float, default=None
  1698. The estimated noise variance of the data. If `None`, an unbiased
  1699. estimate is computed by an OLS model. However, it is only possible
  1700. in the case where `n_samples > n_features + fit_intercept`.
  1701. .. versionadded:: 1.1
  1702. Attributes
  1703. ----------
  1704. coef_ : array-like of shape (n_features,)
  1705. parameter vector (w in the formulation formula)
  1706. intercept_ : float
  1707. independent term in decision function.
  1708. alpha_ : float
  1709. the alpha parameter chosen by the information criterion
  1710. alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
  1711. Maximum of covariances (in absolute value) at each iteration.
  1712. ``n_alphas`` is either ``max_iter``, ``n_features`` or the
  1713. number of nodes in the path with ``alpha >= alpha_min``, whichever
  1714. is smaller. If a list, it will be of length `n_targets`.
  1715. n_iter_ : int
  1716. number of iterations run by lars_path to find the grid of
  1717. alphas.
  1718. criterion_ : array-like of shape (n_alphas,)
  1719. The value of the information criteria ('aic', 'bic') across all
  1720. alphas. The alpha which has the smallest information criterion is
  1721. chosen, as specified in [1]_.
  1722. noise_variance_ : float
  1723. The estimated noise variance from the data used to compute the
  1724. criterion.
  1725. .. versionadded:: 1.1
  1726. n_features_in_ : int
  1727. Number of features seen during :term:`fit`.
  1728. .. versionadded:: 0.24
  1729. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  1730. Names of features seen during :term:`fit`. Defined only when `X`
  1731. has feature names that are all strings.
  1732. .. versionadded:: 1.0
  1733. See Also
  1734. --------
  1735. lars_path : Compute Least Angle Regression or Lasso
  1736. path using LARS algorithm.
  1737. lasso_path : Compute Lasso path with coordinate descent.
  1738. Lasso : Linear Model trained with L1 prior as
  1739. regularizer (aka the Lasso).
  1740. LassoCV : Lasso linear model with iterative fitting
  1741. along a regularization path.
  1742. LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
  1743. LassoLarsCV: Cross-validated Lasso, using the LARS algorithm.
  1744. sklearn.decomposition.sparse_encode : Sparse coding.
  1745. Notes
  1746. -----
  1747. The number of degrees of freedom is computed as in [1]_.
  1748. To have more details regarding the mathematical formulation of the
  1749. AIC and BIC criteria, please refer to :ref:`User Guide <lasso_lars_ic>`.
  1750. References
  1751. ----------
  1752. .. [1] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani.
  1753. "On the degrees of freedom of the lasso."
  1754. The Annals of Statistics 35.5 (2007): 2173-2192.
  1755. <0712.0881>`
  1756. .. [2] `Wikipedia entry on the Akaike information criterion
  1757. <https://en.wikipedia.org/wiki/Akaike_information_criterion>`_
  1758. .. [3] `Wikipedia entry on the Bayesian information criterion
  1759. <https://en.wikipedia.org/wiki/Bayesian_information_criterion>`_
  1760. Examples
  1761. --------
  1762. >>> from sklearn import linear_model
  1763. >>> reg = linear_model.LassoLarsIC(criterion='bic')
  1764. >>> X = [[-2, 2], [-1, 1], [0, 0], [1, 1], [2, 2]]
  1765. >>> y = [-2.2222, -1.1111, 0, -1.1111, -2.2222]
  1766. >>> reg.fit(X, y)
  1767. LassoLarsIC(criterion='bic')
  1768. >>> print(reg.coef_)
  1769. [ 0. -1.11...]
  1770. """
  1771. _parameter_constraints: dict = {
  1772. **LassoLars._parameter_constraints,
  1773. "criterion": [StrOptions({"aic", "bic"})],
  1774. "noise_variance": [Interval(Real, 0, None, closed="left"), None],
  1775. }
  1776. for parameter in ["jitter", "fit_path", "alpha", "random_state"]:
  1777. _parameter_constraints.pop(parameter)
  1778. def __init__(
  1779. self,
  1780. criterion="aic",
  1781. *,
  1782. fit_intercept=True,
  1783. verbose=False,
  1784. normalize="deprecated",
  1785. precompute="auto",
  1786. max_iter=500,
  1787. eps=np.finfo(float).eps,
  1788. copy_X=True,
  1789. positive=False,
  1790. noise_variance=None,
  1791. ):
  1792. self.criterion = criterion
  1793. self.fit_intercept = fit_intercept
  1794. self.positive = positive
  1795. self.max_iter = max_iter
  1796. self.verbose = verbose
  1797. self.normalize = normalize
  1798. self.copy_X = copy_X
  1799. self.precompute = precompute
  1800. self.eps = eps
  1801. self.fit_path = True
  1802. self.noise_variance = noise_variance
  1803. def _more_tags(self):
  1804. return {"multioutput": False}
  1805. @_fit_context(prefer_skip_nested_validation=True)
  1806. def fit(self, X, y, copy_X=None):
  1807. """Fit the model using X, y as training data.
  1808. Parameters
  1809. ----------
  1810. X : array-like of shape (n_samples, n_features)
  1811. Training data.
  1812. y : array-like of shape (n_samples,)
  1813. Target values. Will be cast to X's dtype if necessary.
  1814. copy_X : bool, default=None
  1815. If provided, this parameter will override the choice
  1816. of copy_X made at instance creation.
  1817. If ``True``, X will be copied; else, it may be overwritten.
  1818. Returns
  1819. -------
  1820. self : object
  1821. Returns an instance of self.
  1822. """
  1823. _normalize = _deprecate_normalize(
  1824. self.normalize, estimator_name=self.__class__.__name__
  1825. )
  1826. if copy_X is None:
  1827. copy_X = self.copy_X
  1828. X, y = self._validate_data(X, y, y_numeric=True)
  1829. X, y, Xmean, ymean, Xstd = _preprocess_data(
  1830. X, y, self.fit_intercept, _normalize, copy_X
  1831. )
  1832. Gram = self.precompute
  1833. alphas_, _, coef_path_, self.n_iter_ = lars_path(
  1834. X,
  1835. y,
  1836. Gram=Gram,
  1837. copy_X=copy_X,
  1838. copy_Gram=True,
  1839. alpha_min=0.0,
  1840. method="lasso",
  1841. verbose=self.verbose,
  1842. max_iter=self.max_iter,
  1843. eps=self.eps,
  1844. return_n_iter=True,
  1845. positive=self.positive,
  1846. )
  1847. n_samples = X.shape[0]
  1848. if self.criterion == "aic":
  1849. criterion_factor = 2
  1850. elif self.criterion == "bic":
  1851. criterion_factor = log(n_samples)
  1852. else:
  1853. raise ValueError(
  1854. f"criterion should be either bic or aic, got {self.criterion!r}"
  1855. )
  1856. residuals = y[:, np.newaxis] - np.dot(X, coef_path_)
  1857. residuals_sum_squares = np.sum(residuals**2, axis=0)
  1858. degrees_of_freedom = np.zeros(coef_path_.shape[1], dtype=int)
  1859. for k, coef in enumerate(coef_path_.T):
  1860. mask = np.abs(coef) > np.finfo(coef.dtype).eps
  1861. if not np.any(mask):
  1862. continue
  1863. # get the number of degrees of freedom equal to:
  1864. # Xc = X[:, mask]
  1865. # Trace(Xc * inv(Xc.T, Xc) * Xc.T) ie the number of non-zero coefs
  1866. degrees_of_freedom[k] = np.sum(mask)
  1867. self.alphas_ = alphas_
  1868. if self.noise_variance is None:
  1869. self.noise_variance_ = self._estimate_noise_variance(
  1870. X, y, positive=self.positive
  1871. )
  1872. else:
  1873. self.noise_variance_ = self.noise_variance
  1874. self.criterion_ = (
  1875. n_samples * np.log(2 * np.pi * self.noise_variance_)
  1876. + residuals_sum_squares / self.noise_variance_
  1877. + criterion_factor * degrees_of_freedom
  1878. )
  1879. n_best = np.argmin(self.criterion_)
  1880. self.alpha_ = alphas_[n_best]
  1881. self.coef_ = coef_path_[:, n_best]
  1882. self._set_intercept(Xmean, ymean, Xstd)
  1883. return self
  1884. def _estimate_noise_variance(self, X, y, positive):
  1885. """Compute an estimate of the variance with an OLS model.
  1886. Parameters
  1887. ----------
  1888. X : ndarray of shape (n_samples, n_features)
  1889. Data to be fitted by the OLS model. We expect the data to be
  1890. centered.
  1891. y : ndarray of shape (n_samples,)
  1892. Associated target.
  1893. positive : bool, default=False
  1894. Restrict coefficients to be >= 0. This should be inline with
  1895. the `positive` parameter from `LassoLarsIC`.
  1896. Returns
  1897. -------
  1898. noise_variance : float
  1899. An estimator of the noise variance of an OLS model.
  1900. """
  1901. if X.shape[0] <= X.shape[1] + self.fit_intercept:
  1902. raise ValueError(
  1903. f"You are using {self.__class__.__name__} in the case where the number "
  1904. "of samples is smaller than the number of features. In this setting, "
  1905. "getting a good estimate for the variance of the noise is not "
  1906. "possible. Provide an estimate of the noise variance in the "
  1907. "constructor."
  1908. )
  1909. # X and y are already centered and we don't need to fit with an intercept
  1910. ols_model = LinearRegression(positive=positive, fit_intercept=False)
  1911. y_pred = ols_model.fit(X, y).predict(X)
  1912. return np.sum((y - y_pred) ** 2) / (
  1913. X.shape[0] - X.shape[1] - self.fit_intercept
  1914. )