| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338 |
- """
- Least Angle Regression algorithm. See the documentation on the
- Generalized Linear Model for a complete discussion.
- """
- # Author: Fabian Pedregosa <fabian.pedregosa@inria.fr>
- # Alexandre Gramfort <alexandre.gramfort@inria.fr>
- # Gael Varoquaux
- #
- # License: BSD 3 clause
- import sys
- import warnings
- from math import log
- from numbers import Integral, Real
- import numpy as np
- from scipy import interpolate, linalg
- from scipy.linalg.lapack import get_lapack_funcs
- from ..base import MultiOutputMixin, RegressorMixin, _fit_context
- from ..exceptions import ConvergenceWarning
- from ..model_selection import check_cv
- # mypy error: Module 'sklearn.utils' has no attribute 'arrayfuncs'
- from ..utils import arrayfuncs, as_float_array, check_random_state # type: ignore
- from ..utils._param_validation import Hidden, Interval, StrOptions
- from ..utils.parallel import Parallel, delayed
- from ._base import LinearModel, LinearRegression, _deprecate_normalize, _preprocess_data
- SOLVE_TRIANGULAR_ARGS = {"check_finite": False}
- def lars_path(
- X,
- y,
- Xy=None,
- *,
- Gram=None,
- max_iter=500,
- alpha_min=0,
- method="lar",
- copy_X=True,
- eps=np.finfo(float).eps,
- copy_Gram=True,
- verbose=0,
- return_path=True,
- return_n_iter=False,
- positive=False,
- ):
- """Compute Least Angle Regression or Lasso path using the LARS algorithm [1].
- The optimization objective for the case method='lasso' is::
- (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
- in the case of method='lar', the objective function is only known in
- the form of an implicit equation (see discussion in [1]).
- Read more in the :ref:`User Guide <least_angle_regression>`.
- Parameters
- ----------
- X : None or array-like of shape (n_samples, n_features)
- Input data. Note that if X is `None` then the Gram matrix must be
- specified, i.e., cannot be `None` or `False`.
- y : None or array-like of shape (n_samples,)
- Input targets.
- Xy : array-like of shape (n_features,) or (n_features, n_targets), \
- default=None
- `Xy = np.dot(X.T, y)` that can be precomputed. It is useful
- only when the Gram matrix is precomputed.
- Gram : None, 'auto', array-like of shape (n_features, n_features), \
- default=None
- Precomputed Gram matrix (X' * X), if `'auto'`, the Gram
- matrix is precomputed from the given X, if there are more samples
- than features.
- max_iter : int, default=500
- Maximum number of iterations to perform, set to infinity for no limit.
- alpha_min : float, default=0
- Minimum correlation along the path. It corresponds to the
- regularization parameter `alpha` in the Lasso.
- method : {'lar', 'lasso'}, default='lar'
- Specifies the returned model. Select `'lar'` for Least Angle
- Regression, `'lasso'` for the Lasso.
- copy_X : bool, default=True
- If `False`, `X` is overwritten.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the `tol` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- copy_Gram : bool, default=True
- If `False`, `Gram` is overwritten.
- verbose : int, default=0
- Controls output verbosity.
- return_path : bool, default=True
- If `True`, returns the entire path, else returns only the
- last point of the path.
- return_n_iter : bool, default=False
- Whether to return the number of iterations.
- positive : bool, default=False
- Restrict coefficients to be >= 0.
- This option is only allowed with method 'lasso'. Note that the model
- coefficients will not converge to the ordinary-least-squares solution
- for small values of alpha. Only coefficients up to the smallest alpha
- value (`alphas_[alphas_ > 0.].min()` when fit_path=True) reached by
- the stepwise Lars-Lasso algorithm are typically in congruence with the
- solution of the coordinate descent `lasso_path` function.
- Returns
- -------
- alphas : array-like of shape (n_alphas + 1,)
- Maximum of covariances (in absolute value) at each iteration.
- `n_alphas` is either `max_iter`, `n_features`, or the
- number of nodes in the path with `alpha >= alpha_min`, whichever
- is smaller.
- active : array-like of shape (n_alphas,)
- Indices of active variables at the end of the path.
- coefs : array-like of shape (n_features, n_alphas + 1)
- Coefficients along the path.
- n_iter : int
- Number of iterations run. Returned only if return_n_iter is set
- to True.
- See Also
- --------
- lars_path_gram : Compute LARS path in the sufficient stats mode.
- lasso_path : Compute Lasso path with coordinate descent.
- LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
- Lars : Least Angle Regression model a.k.a. LAR.
- LassoLarsCV : Cross-validated Lasso, using the LARS algorithm.
- LarsCV : Cross-validated Least Angle Regression model.
- sklearn.decomposition.sparse_encode : Sparse coding.
- References
- ----------
- .. [1] "Least Angle Regression", Efron et al.
- http://statweb.stanford.edu/~tibs/ftp/lars.pdf
- .. [2] `Wikipedia entry on the Least-angle regression
- <https://en.wikipedia.org/wiki/Least-angle_regression>`_
- .. [3] `Wikipedia entry on the Lasso
- <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_
- """
- if X is None and Gram is not None:
- raise ValueError(
- "X cannot be None if Gram is not None"
- "Use lars_path_gram to avoid passing X and y."
- )
- return _lars_path_solver(
- X=X,
- y=y,
- Xy=Xy,
- Gram=Gram,
- n_samples=None,
- max_iter=max_iter,
- alpha_min=alpha_min,
- method=method,
- copy_X=copy_X,
- eps=eps,
- copy_Gram=copy_Gram,
- verbose=verbose,
- return_path=return_path,
- return_n_iter=return_n_iter,
- positive=positive,
- )
- def lars_path_gram(
- Xy,
- Gram,
- *,
- n_samples,
- max_iter=500,
- alpha_min=0,
- method="lar",
- copy_X=True,
- eps=np.finfo(float).eps,
- copy_Gram=True,
- verbose=0,
- return_path=True,
- return_n_iter=False,
- positive=False,
- ):
- """The lars_path in the sufficient stats mode [1].
- The optimization objective for the case method='lasso' is::
- (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
- in the case of method='lars', the objective function is only known in
- the form of an implicit equation (see discussion in [1])
- Read more in the :ref:`User Guide <least_angle_regression>`.
- Parameters
- ----------
- Xy : array-like of shape (n_features,) or (n_features, n_targets)
- Xy = np.dot(X.T, y).
- Gram : array-like of shape (n_features, n_features)
- Gram = np.dot(X.T * X).
- n_samples : int or float
- Equivalent size of sample.
- max_iter : int, default=500
- Maximum number of iterations to perform, set to infinity for no limit.
- alpha_min : float, default=0
- Minimum correlation along the path. It corresponds to the
- regularization parameter alpha parameter in the Lasso.
- method : {'lar', 'lasso'}, default='lar'
- Specifies the returned model. Select ``'lar'`` for Least Angle
- Regression, ``'lasso'`` for the Lasso.
- copy_X : bool, default=True
- If ``False``, ``X`` is overwritten.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the ``tol`` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- copy_Gram : bool, default=True
- If ``False``, ``Gram`` is overwritten.
- verbose : int, default=0
- Controls output verbosity.
- return_path : bool, default=True
- If ``return_path==True`` returns the entire path, else returns only the
- last point of the path.
- return_n_iter : bool, default=False
- Whether to return the number of iterations.
- positive : bool, default=False
- Restrict coefficients to be >= 0.
- This option is only allowed with method 'lasso'. Note that the model
- coefficients will not converge to the ordinary-least-squares solution
- for small values of alpha. Only coefficients up to the smallest alpha
- value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by
- the stepwise Lars-Lasso algorithm are typically in congruence with the
- solution of the coordinate descent lasso_path function.
- Returns
- -------
- alphas : array-like of shape (n_alphas + 1,)
- Maximum of covariances (in absolute value) at each iteration.
- ``n_alphas`` is either ``max_iter``, ``n_features`` or the
- number of nodes in the path with ``alpha >= alpha_min``, whichever
- is smaller.
- active : array-like of shape (n_alphas,)
- Indices of active variables at the end of the path.
- coefs : array-like of shape (n_features, n_alphas + 1)
- Coefficients along the path.
- n_iter : int
- Number of iterations run. Returned only if return_n_iter is set
- to True.
- See Also
- --------
- lars_path_gram : Compute LARS path.
- lasso_path : Compute Lasso path with coordinate descent.
- LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
- Lars : Least Angle Regression model a.k.a. LAR.
- LassoLarsCV : Cross-validated Lasso, using the LARS algorithm.
- LarsCV : Cross-validated Least Angle Regression model.
- sklearn.decomposition.sparse_encode : Sparse coding.
- References
- ----------
- .. [1] "Least Angle Regression", Efron et al.
- http://statweb.stanford.edu/~tibs/ftp/lars.pdf
- .. [2] `Wikipedia entry on the Least-angle regression
- <https://en.wikipedia.org/wiki/Least-angle_regression>`_
- .. [3] `Wikipedia entry on the Lasso
- <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_
- """
- return _lars_path_solver(
- X=None,
- y=None,
- Xy=Xy,
- Gram=Gram,
- n_samples=n_samples,
- max_iter=max_iter,
- alpha_min=alpha_min,
- method=method,
- copy_X=copy_X,
- eps=eps,
- copy_Gram=copy_Gram,
- verbose=verbose,
- return_path=return_path,
- return_n_iter=return_n_iter,
- positive=positive,
- )
- def _lars_path_solver(
- X,
- y,
- Xy=None,
- Gram=None,
- n_samples=None,
- max_iter=500,
- alpha_min=0,
- method="lar",
- copy_X=True,
- eps=np.finfo(float).eps,
- copy_Gram=True,
- verbose=0,
- return_path=True,
- return_n_iter=False,
- positive=False,
- ):
- """Compute Least Angle Regression or Lasso path using LARS algorithm [1]
- The optimization objective for the case method='lasso' is::
- (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
- in the case of method='lars', the objective function is only known in
- the form of an implicit equation (see discussion in [1])
- Read more in the :ref:`User Guide <least_angle_regression>`.
- Parameters
- ----------
- X : None or ndarray of shape (n_samples, n_features)
- Input data. Note that if X is None then Gram must be specified,
- i.e., cannot be None or False.
- y : None or ndarray of shape (n_samples,)
- Input targets.
- Xy : array-like of shape (n_features,) or (n_features, n_targets), \
- default=None
- `Xy = np.dot(X.T, y)` that can be precomputed. It is useful
- only when the Gram matrix is precomputed.
- Gram : None, 'auto' or array-like of shape (n_features, n_features), \
- default=None
- Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram
- matrix is precomputed from the given X, if there are more samples
- than features.
- n_samples : int or float, default=None
- Equivalent size of sample. If `None`, it will be `n_samples`.
- max_iter : int, default=500
- Maximum number of iterations to perform, set to infinity for no limit.
- alpha_min : float, default=0
- Minimum correlation along the path. It corresponds to the
- regularization parameter alpha parameter in the Lasso.
- method : {'lar', 'lasso'}, default='lar'
- Specifies the returned model. Select ``'lar'`` for Least Angle
- Regression, ``'lasso'`` for the Lasso.
- copy_X : bool, default=True
- If ``False``, ``X`` is overwritten.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the ``tol`` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- copy_Gram : bool, default=True
- If ``False``, ``Gram`` is overwritten.
- verbose : int, default=0
- Controls output verbosity.
- return_path : bool, default=True
- If ``return_path==True`` returns the entire path, else returns only the
- last point of the path.
- return_n_iter : bool, default=False
- Whether to return the number of iterations.
- positive : bool, default=False
- Restrict coefficients to be >= 0.
- This option is only allowed with method 'lasso'. Note that the model
- coefficients will not converge to the ordinary-least-squares solution
- for small values of alpha. Only coefficients up to the smallest alpha
- value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by
- the stepwise Lars-Lasso algorithm are typically in congruence with the
- solution of the coordinate descent lasso_path function.
- Returns
- -------
- alphas : array-like of shape (n_alphas + 1,)
- Maximum of covariances (in absolute value) at each iteration.
- ``n_alphas`` is either ``max_iter``, ``n_features`` or the
- number of nodes in the path with ``alpha >= alpha_min``, whichever
- is smaller.
- active : array-like of shape (n_alphas,)
- Indices of active variables at the end of the path.
- coefs : array-like of shape (n_features, n_alphas + 1)
- Coefficients along the path
- n_iter : int
- Number of iterations run. Returned only if return_n_iter is set
- to True.
- See Also
- --------
- lasso_path
- LassoLars
- Lars
- LassoLarsCV
- LarsCV
- sklearn.decomposition.sparse_encode
- References
- ----------
- .. [1] "Least Angle Regression", Efron et al.
- http://statweb.stanford.edu/~tibs/ftp/lars.pdf
- .. [2] `Wikipedia entry on the Least-angle regression
- <https://en.wikipedia.org/wiki/Least-angle_regression>`_
- .. [3] `Wikipedia entry on the Lasso
- <https://en.wikipedia.org/wiki/Lasso_(statistics)>`_
- """
- if method == "lar" and positive:
- raise ValueError("Positive constraint not supported for 'lar' coding method.")
- n_samples = n_samples if n_samples is not None else y.size
- if Xy is None:
- Cov = np.dot(X.T, y)
- else:
- Cov = Xy.copy()
- if Gram is None or Gram is False:
- Gram = None
- if X is None:
- raise ValueError("X and Gram cannot both be unspecified.")
- elif isinstance(Gram, str) and Gram == "auto" or Gram is True:
- if Gram is True or X.shape[0] > X.shape[1]:
- Gram = np.dot(X.T, X)
- else:
- Gram = None
- elif copy_Gram:
- Gram = Gram.copy()
- if Gram is None:
- n_features = X.shape[1]
- else:
- n_features = Cov.shape[0]
- if Gram.shape != (n_features, n_features):
- raise ValueError("The shapes of the inputs Gram and Xy do not match.")
- if copy_X and X is not None and Gram is None:
- # force copy. setting the array to be fortran-ordered
- # speeds up the calculation of the (partial) Gram matrix
- # and allows to easily swap columns
- X = X.copy("F")
- max_features = min(max_iter, n_features)
- dtypes = set(a.dtype for a in (X, y, Xy, Gram) if a is not None)
- if len(dtypes) == 1:
- # use the precision level of input data if it is consistent
- return_dtype = next(iter(dtypes))
- else:
- # fallback to double precision otherwise
- return_dtype = np.float64
- if return_path:
- coefs = np.zeros((max_features + 1, n_features), dtype=return_dtype)
- alphas = np.zeros(max_features + 1, dtype=return_dtype)
- else:
- coef, prev_coef = (
- np.zeros(n_features, dtype=return_dtype),
- np.zeros(n_features, dtype=return_dtype),
- )
- alpha, prev_alpha = (
- np.array([0.0], dtype=return_dtype),
- np.array([0.0], dtype=return_dtype),
- )
- # above better ideas?
- n_iter, n_active = 0, 0
- active, indices = list(), np.arange(n_features)
- # holds the sign of covariance
- sign_active = np.empty(max_features, dtype=np.int8)
- drop = False
- # will hold the cholesky factorization. Only lower part is
- # referenced.
- if Gram is None:
- L = np.empty((max_features, max_features), dtype=X.dtype)
- swap, nrm2 = linalg.get_blas_funcs(("swap", "nrm2"), (X,))
- else:
- L = np.empty((max_features, max_features), dtype=Gram.dtype)
- swap, nrm2 = linalg.get_blas_funcs(("swap", "nrm2"), (Cov,))
- (solve_cholesky,) = get_lapack_funcs(("potrs",), (L,))
- if verbose:
- if verbose > 1:
- print("Step\t\tAdded\t\tDropped\t\tActive set size\t\tC")
- else:
- sys.stdout.write(".")
- sys.stdout.flush()
- tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning
- cov_precision = np.finfo(Cov.dtype).precision
- equality_tolerance = np.finfo(np.float32).eps
- if Gram is not None:
- Gram_copy = Gram.copy()
- Cov_copy = Cov.copy()
- while True:
- if Cov.size:
- if positive:
- C_idx = np.argmax(Cov)
- else:
- C_idx = np.argmax(np.abs(Cov))
- C_ = Cov[C_idx]
- if positive:
- C = C_
- else:
- C = np.fabs(C_)
- else:
- C = 0.0
- if return_path:
- alpha = alphas[n_iter, np.newaxis]
- coef = coefs[n_iter]
- prev_alpha = alphas[n_iter - 1, np.newaxis]
- prev_coef = coefs[n_iter - 1]
- alpha[0] = C / n_samples
- if alpha[0] <= alpha_min + equality_tolerance: # early stopping
- if abs(alpha[0] - alpha_min) > equality_tolerance:
- # interpolation factor 0 <= ss < 1
- if n_iter > 0:
- # In the first iteration, all alphas are zero, the formula
- # below would make ss a NaN
- ss = (prev_alpha[0] - alpha_min) / (prev_alpha[0] - alpha[0])
- coef[:] = prev_coef + ss * (coef - prev_coef)
- alpha[0] = alpha_min
- if return_path:
- coefs[n_iter] = coef
- break
- if n_iter >= max_iter or n_active >= n_features:
- break
- if not drop:
- ##########################################################
- # Append x_j to the Cholesky factorization of (Xa * Xa') #
- # #
- # ( L 0 ) #
- # L -> ( ) , where L * w = Xa' x_j #
- # ( w z ) and z = ||x_j|| #
- # #
- ##########################################################
- if positive:
- sign_active[n_active] = np.ones_like(C_)
- else:
- sign_active[n_active] = np.sign(C_)
- m, n = n_active, C_idx + n_active
- Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
- indices[n], indices[m] = indices[m], indices[n]
- Cov_not_shortened = Cov
- Cov = Cov[1:] # remove Cov[0]
- if Gram is None:
- X.T[n], X.T[m] = swap(X.T[n], X.T[m])
- c = nrm2(X.T[n_active]) ** 2
- L[n_active, :n_active] = np.dot(X.T[n_active], X.T[:n_active].T)
- else:
- # swap does only work inplace if matrix is fortran
- # contiguous ...
- Gram[m], Gram[n] = swap(Gram[m], Gram[n])
- Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n])
- c = Gram[n_active, n_active]
- L[n_active, :n_active] = Gram[n_active, :n_active]
- # Update the cholesky decomposition for the Gram matrix
- if n_active:
- linalg.solve_triangular(
- L[:n_active, :n_active],
- L[n_active, :n_active],
- trans=0,
- lower=1,
- overwrite_b=True,
- **SOLVE_TRIANGULAR_ARGS,
- )
- v = np.dot(L[n_active, :n_active], L[n_active, :n_active])
- diag = max(np.sqrt(np.abs(c - v)), eps)
- L[n_active, n_active] = diag
- if diag < 1e-7:
- # The system is becoming too ill-conditioned.
- # We have degenerate vectors in our active set.
- # We'll 'drop for good' the last regressor added.
- warnings.warn(
- "Regressors in active set degenerate. "
- "Dropping a regressor, after %i iterations, "
- "i.e. alpha=%.3e, "
- "with an active set of %i regressors, and "
- "the smallest cholesky pivot element being %.3e."
- " Reduce max_iter or increase eps parameters."
- % (n_iter, alpha.item(), n_active, diag),
- ConvergenceWarning,
- )
- # XXX: need to figure a 'drop for good' way
- Cov = Cov_not_shortened
- Cov[0] = 0
- Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
- continue
- active.append(indices[n_active])
- n_active += 1
- if verbose > 1:
- print(
- "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], "", n_active, C)
- )
- if method == "lasso" and n_iter > 0 and prev_alpha[0] < alpha[0]:
- # alpha is increasing. This is because the updates of Cov are
- # bringing in too much numerical error that is greater than
- # than the remaining correlation with the
- # regressors. Time to bail out
- warnings.warn(
- "Early stopping the lars path, as the residues "
- "are small and the current value of alpha is no "
- "longer well controlled. %i iterations, alpha=%.3e, "
- "previous alpha=%.3e, with an active set of %i "
- "regressors." % (n_iter, alpha.item(), prev_alpha.item(), n_active),
- ConvergenceWarning,
- )
- break
- # least squares solution
- least_squares, _ = solve_cholesky(
- L[:n_active, :n_active], sign_active[:n_active], lower=True
- )
- if least_squares.size == 1 and least_squares == 0:
- # This happens because sign_active[:n_active] = 0
- least_squares[...] = 1
- AA = 1.0
- else:
- # is this really needed ?
- AA = 1.0 / np.sqrt(np.sum(least_squares * sign_active[:n_active]))
- if not np.isfinite(AA):
- # L is too ill-conditioned
- i = 0
- L_ = L[:n_active, :n_active].copy()
- while not np.isfinite(AA):
- L_.flat[:: n_active + 1] += (2**i) * eps
- least_squares, _ = solve_cholesky(
- L_, sign_active[:n_active], lower=True
- )
- tmp = max(np.sum(least_squares * sign_active[:n_active]), eps)
- AA = 1.0 / np.sqrt(tmp)
- i += 1
- least_squares *= AA
- if Gram is None:
- # equiangular direction of variables in the active set
- eq_dir = np.dot(X.T[:n_active].T, least_squares)
- # correlation between each unactive variables and
- # eqiangular vector
- corr_eq_dir = np.dot(X.T[n_active:], eq_dir)
- else:
- # if huge number of features, this takes 50% of time, I
- # think could be avoided if we just update it using an
- # orthogonal (QR) decomposition of X
- corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares)
- # Explicit rounding can be necessary to avoid `np.argmax(Cov)` yielding
- # unstable results because of rounding errors.
- np.around(corr_eq_dir, decimals=cov_precision, out=corr_eq_dir)
- g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny32))
- if positive:
- gamma_ = min(g1, C / AA)
- else:
- g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny32))
- gamma_ = min(g1, g2, C / AA)
- # TODO: better names for these variables: z
- drop = False
- z = -coef[active] / (least_squares + tiny32)
- z_pos = arrayfuncs.min_pos(z)
- if z_pos < gamma_:
- # some coefficients have changed sign
- idx = np.where(z == z_pos)[0][::-1]
- # update the sign, important for LAR
- sign_active[idx] = -sign_active[idx]
- if method == "lasso":
- gamma_ = z_pos
- drop = True
- n_iter += 1
- if return_path:
- if n_iter >= coefs.shape[0]:
- del coef, alpha, prev_alpha, prev_coef
- # resize the coefs and alphas array
- add_features = 2 * max(1, (max_features - n_active))
- coefs = np.resize(coefs, (n_iter + add_features, n_features))
- coefs[-add_features:] = 0
- alphas = np.resize(alphas, n_iter + add_features)
- alphas[-add_features:] = 0
- coef = coefs[n_iter]
- prev_coef = coefs[n_iter - 1]
- else:
- # mimic the effect of incrementing n_iter on the array references
- prev_coef = coef
- prev_alpha[0] = alpha[0]
- coef = np.zeros_like(coef)
- coef[active] = prev_coef[active] + gamma_ * least_squares
- # update correlations
- Cov -= gamma_ * corr_eq_dir
- # See if any coefficient has changed sign
- if drop and method == "lasso":
- # handle the case when idx is not length of 1
- for ii in idx:
- arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii)
- n_active -= 1
- # handle the case when idx is not length of 1
- drop_idx = [active.pop(ii) for ii in idx]
- if Gram is None:
- # propagate dropped variable
- for ii in idx:
- for i in range(ii, n_active):
- X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1])
- # yeah this is stupid
- indices[i], indices[i + 1] = indices[i + 1], indices[i]
- # TODO: this could be updated
- residual = y - np.dot(X[:, :n_active], coef[active])
- temp = np.dot(X.T[n_active], residual)
- Cov = np.r_[temp, Cov]
- else:
- for ii in idx:
- for i in range(ii, n_active):
- indices[i], indices[i + 1] = indices[i + 1], indices[i]
- Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1])
- Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1])
- # Cov_n = Cov_j + x_j * X + increment(betas) TODO:
- # will this still work with multiple drops ?
- # recompute covariance. Probably could be done better
- # wrong as Xy is not swapped with the rest of variables
- # TODO: this could be updated
- temp = Cov_copy[drop_idx] - np.dot(Gram_copy[drop_idx], coef)
- Cov = np.r_[temp, Cov]
- sign_active = np.delete(sign_active, idx)
- sign_active = np.append(sign_active, 0.0) # just to maintain size
- if verbose > 1:
- print(
- "%s\t\t%s\t\t%s\t\t%s\t\t%s"
- % (n_iter, "", drop_idx, n_active, abs(temp))
- )
- if return_path:
- # resize coefs in case of early stop
- alphas = alphas[: n_iter + 1]
- coefs = coefs[: n_iter + 1]
- if return_n_iter:
- return alphas, active, coefs.T, n_iter
- else:
- return alphas, active, coefs.T
- else:
- if return_n_iter:
- return alpha, active, coef, n_iter
- else:
- return alpha, active, coef
- ###############################################################################
- # Estimator classes
- class Lars(MultiOutputMixin, RegressorMixin, LinearModel):
- """Least Angle Regression model a.k.a. LAR.
- Read more in the :ref:`User Guide <least_angle_regression>`.
- Parameters
- ----------
- fit_intercept : bool, default=True
- Whether to calculate the intercept for this model. If set
- to false, no intercept will be used in calculations
- (i.e. data is expected to be centered).
- verbose : bool or int, default=False
- Sets the verbosity amount.
- normalize : bool, default=False
- This parameter is ignored when ``fit_intercept`` is set to False.
- If True, the regressors X will be normalized before regression by
- subtracting the mean and dividing by the l2-norm.
- If you wish to standardize, please use
- :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
- on an estimator with ``normalize=False``.
- .. versionchanged:: 1.2
- default changed from True to False in 1.2.
- .. deprecated:: 1.2
- ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
- precompute : bool, 'auto' or array-like , default='auto'
- Whether to use a precomputed Gram matrix to speed up
- calculations. If set to ``'auto'`` let us decide. The Gram
- matrix can also be passed as argument.
- n_nonzero_coefs : int, default=500
- Target number of non-zero coefficients. Use ``np.inf`` for no limit.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the ``tol`` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- copy_X : bool, default=True
- If ``True``, X will be copied; else, it may be overwritten.
- fit_path : bool, default=True
- If True the full path is stored in the ``coef_path_`` attribute.
- If you compute the solution for a large problem or many targets,
- setting ``fit_path`` to ``False`` will lead to a speedup, especially
- with a small alpha.
- jitter : float, default=None
- Upper bound on a uniform noise parameter to be added to the
- `y` values, to satisfy the model's assumption of
- one-at-a-time computations. Might help with stability.
- .. versionadded:: 0.23
- random_state : int, RandomState instance or None, default=None
- Determines random number generation for jittering. Pass an int
- for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`. Ignored if `jitter` is None.
- .. versionadded:: 0.23
- Attributes
- ----------
- alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
- Maximum of covariances (in absolute value) at each iteration.
- ``n_alphas`` is either ``max_iter``, ``n_features`` or the
- number of nodes in the path with ``alpha >= alpha_min``, whichever
- is smaller. If this is a list of array-like, the length of the outer
- list is `n_targets`.
- active_ : list of shape (n_alphas,) or list of such lists
- Indices of active variables at the end of the path.
- If this is a list of list, the length of the outer list is `n_targets`.
- coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \
- of such arrays
- The varying values of the coefficients along the path. It is not
- present if the ``fit_path`` parameter is ``False``. If this is a list
- of array-like, the length of the outer list is `n_targets`.
- coef_ : array-like of shape (n_features,) or (n_targets, n_features)
- Parameter vector (w in the formulation formula).
- intercept_ : float or array-like of shape (n_targets,)
- Independent term in decision function.
- n_iter_ : array-like or int
- The number of iterations taken by lars_path to find the
- grid of alphas for each target.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- lars_path: Compute Least Angle Regression or Lasso
- path using LARS algorithm.
- LarsCV : Cross-validated Least Angle Regression model.
- sklearn.decomposition.sparse_encode : Sparse coding.
- Examples
- --------
- >>> from sklearn import linear_model
- >>> reg = linear_model.Lars(n_nonzero_coefs=1)
- >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])
- Lars(n_nonzero_coefs=1)
- >>> print(reg.coef_)
- [ 0. -1.11...]
- """
- _parameter_constraints: dict = {
- "fit_intercept": ["boolean"],
- "verbose": ["verbose"],
- "normalize": ["boolean", Hidden(StrOptions({"deprecated"}))],
- "precompute": ["boolean", StrOptions({"auto"}), np.ndarray, Hidden(None)],
- "n_nonzero_coefs": [Interval(Integral, 1, None, closed="left")],
- "eps": [Interval(Real, 0, None, closed="left")],
- "copy_X": ["boolean"],
- "fit_path": ["boolean"],
- "jitter": [Interval(Real, 0, None, closed="left"), None],
- "random_state": ["random_state"],
- }
- method = "lar"
- positive = False
- def __init__(
- self,
- *,
- fit_intercept=True,
- verbose=False,
- normalize="deprecated",
- precompute="auto",
- n_nonzero_coefs=500,
- eps=np.finfo(float).eps,
- copy_X=True,
- fit_path=True,
- jitter=None,
- random_state=None,
- ):
- self.fit_intercept = fit_intercept
- self.verbose = verbose
- self.normalize = normalize
- self.precompute = precompute
- self.n_nonzero_coefs = n_nonzero_coefs
- self.eps = eps
- self.copy_X = copy_X
- self.fit_path = fit_path
- self.jitter = jitter
- self.random_state = random_state
- @staticmethod
- def _get_gram(precompute, X, y):
- if (not hasattr(precompute, "__array__")) and (
- (precompute is True)
- or (precompute == "auto" and X.shape[0] > X.shape[1])
- or (precompute == "auto" and y.shape[1] > 1)
- ):
- precompute = np.dot(X.T, X)
- return precompute
- def _fit(self, X, y, max_iter, alpha, fit_path, normalize, Xy=None):
- """Auxiliary method to fit the model using X, y as training data"""
- n_features = X.shape[1]
- X, y, X_offset, y_offset, X_scale = _preprocess_data(
- X, y, self.fit_intercept, normalize, self.copy_X
- )
- if y.ndim == 1:
- y = y[:, np.newaxis]
- n_targets = y.shape[1]
- Gram = self._get_gram(self.precompute, X, y)
- self.alphas_ = []
- self.n_iter_ = []
- self.coef_ = np.empty((n_targets, n_features), dtype=X.dtype)
- if fit_path:
- self.active_ = []
- self.coef_path_ = []
- for k in range(n_targets):
- this_Xy = None if Xy is None else Xy[:, k]
- alphas, active, coef_path, n_iter_ = lars_path(
- X,
- y[:, k],
- Gram=Gram,
- Xy=this_Xy,
- copy_X=self.copy_X,
- copy_Gram=True,
- alpha_min=alpha,
- method=self.method,
- verbose=max(0, self.verbose - 1),
- max_iter=max_iter,
- eps=self.eps,
- return_path=True,
- return_n_iter=True,
- positive=self.positive,
- )
- self.alphas_.append(alphas)
- self.active_.append(active)
- self.n_iter_.append(n_iter_)
- self.coef_path_.append(coef_path)
- self.coef_[k] = coef_path[:, -1]
- if n_targets == 1:
- self.alphas_, self.active_, self.coef_path_, self.coef_ = [
- a[0]
- for a in (self.alphas_, self.active_, self.coef_path_, self.coef_)
- ]
- self.n_iter_ = self.n_iter_[0]
- else:
- for k in range(n_targets):
- this_Xy = None if Xy is None else Xy[:, k]
- alphas, _, self.coef_[k], n_iter_ = lars_path(
- X,
- y[:, k],
- Gram=Gram,
- Xy=this_Xy,
- copy_X=self.copy_X,
- copy_Gram=True,
- alpha_min=alpha,
- method=self.method,
- verbose=max(0, self.verbose - 1),
- max_iter=max_iter,
- eps=self.eps,
- return_path=False,
- return_n_iter=True,
- positive=self.positive,
- )
- self.alphas_.append(alphas)
- self.n_iter_.append(n_iter_)
- if n_targets == 1:
- self.alphas_ = self.alphas_[0]
- self.n_iter_ = self.n_iter_[0]
- self._set_intercept(X_offset, y_offset, X_scale)
- return self
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y, Xy=None):
- """Fit the model using X, y as training data.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,) or (n_samples, n_targets)
- Target values.
- Xy : array-like of shape (n_features,) or (n_features, n_targets), \
- default=None
- Xy = np.dot(X.T, y) that can be precomputed. It is useful
- only when the Gram matrix is precomputed.
- Returns
- -------
- self : object
- Returns an instance of self.
- """
- X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)
- _normalize = _deprecate_normalize(
- self.normalize, estimator_name=self.__class__.__name__
- )
- alpha = getattr(self, "alpha", 0.0)
- if hasattr(self, "n_nonzero_coefs"):
- alpha = 0.0 # n_nonzero_coefs parametrization takes priority
- max_iter = self.n_nonzero_coefs
- else:
- max_iter = self.max_iter
- if self.jitter is not None:
- rng = check_random_state(self.random_state)
- noise = rng.uniform(high=self.jitter, size=len(y))
- y = y + noise
- self._fit(
- X,
- y,
- max_iter=max_iter,
- alpha=alpha,
- fit_path=self.fit_path,
- normalize=_normalize,
- Xy=Xy,
- )
- return self
- class LassoLars(Lars):
- """Lasso model fit with Least Angle Regression a.k.a. Lars.
- It is a Linear Model trained with an L1 prior as regularizer.
- The optimization objective for Lasso is::
- (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
- Read more in the :ref:`User Guide <least_angle_regression>`.
- Parameters
- ----------
- alpha : float, default=1.0
- Constant that multiplies the penalty term. Defaults to 1.0.
- ``alpha = 0`` is equivalent to an ordinary least square, solved
- by :class:`LinearRegression`. For numerical reasons, using
- ``alpha = 0`` with the LassoLars object is not advised and you
- should prefer the LinearRegression object.
- fit_intercept : bool, default=True
- Whether to calculate the intercept for this model. If set
- to false, no intercept will be used in calculations
- (i.e. data is expected to be centered).
- verbose : bool or int, default=False
- Sets the verbosity amount.
- normalize : bool, default=False
- This parameter is ignored when ``fit_intercept`` is set to False.
- If True, the regressors X will be normalized before regression by
- subtracting the mean and dividing by the l2-norm.
- If you wish to standardize, please use
- :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
- on an estimator with ``normalize=False``.
- .. versionchanged:: 1.2
- default changed from True to False in 1.2.
- .. deprecated:: 1.2
- ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
- precompute : bool, 'auto' or array-like, default='auto'
- Whether to use a precomputed Gram matrix to speed up
- calculations. If set to ``'auto'`` let us decide. The Gram
- matrix can also be passed as argument.
- max_iter : int, default=500
- Maximum number of iterations to perform.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the ``tol`` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- copy_X : bool, default=True
- If True, X will be copied; else, it may be overwritten.
- fit_path : bool, default=True
- If ``True`` the full path is stored in the ``coef_path_`` attribute.
- If you compute the solution for a large problem or many targets,
- setting ``fit_path`` to ``False`` will lead to a speedup, especially
- with a small alpha.
- positive : bool, default=False
- Restrict coefficients to be >= 0. Be aware that you might want to
- remove fit_intercept which is set True by default.
- Under the positive restriction the model coefficients will not converge
- to the ordinary-least-squares solution for small values of alpha.
- Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
- 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
- algorithm are typically in congruence with the solution of the
- coordinate descent Lasso estimator.
- jitter : float, default=None
- Upper bound on a uniform noise parameter to be added to the
- `y` values, to satisfy the model's assumption of
- one-at-a-time computations. Might help with stability.
- .. versionadded:: 0.23
- random_state : int, RandomState instance or None, default=None
- Determines random number generation for jittering. Pass an int
- for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`. Ignored if `jitter` is None.
- .. versionadded:: 0.23
- Attributes
- ----------
- alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
- Maximum of covariances (in absolute value) at each iteration.
- ``n_alphas`` is either ``max_iter``, ``n_features`` or the
- number of nodes in the path with ``alpha >= alpha_min``, whichever
- is smaller. If this is a list of array-like, the length of the outer
- list is `n_targets`.
- active_ : list of length n_alphas or list of such lists
- Indices of active variables at the end of the path.
- If this is a list of list, the length of the outer list is `n_targets`.
- coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \
- of such arrays
- If a list is passed it's expected to be one of n_targets such arrays.
- The varying values of the coefficients along the path. It is not
- present if the ``fit_path`` parameter is ``False``. If this is a list
- of array-like, the length of the outer list is `n_targets`.
- coef_ : array-like of shape (n_features,) or (n_targets, n_features)
- Parameter vector (w in the formulation formula).
- intercept_ : float or array-like of shape (n_targets,)
- Independent term in decision function.
- n_iter_ : array-like or int
- The number of iterations taken by lars_path to find the
- grid of alphas for each target.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- lars_path : Compute Least Angle Regression or Lasso
- path using LARS algorithm.
- lasso_path : Compute Lasso path with coordinate descent.
- Lasso : Linear Model trained with L1 prior as
- regularizer (aka the Lasso).
- LassoCV : Lasso linear model with iterative fitting
- along a regularization path.
- LassoLarsCV: Cross-validated Lasso, using the LARS algorithm.
- LassoLarsIC : Lasso model fit with Lars using BIC
- or AIC for model selection.
- sklearn.decomposition.sparse_encode : Sparse coding.
- Examples
- --------
- >>> from sklearn import linear_model
- >>> reg = linear_model.LassoLars(alpha=0.01)
- >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])
- LassoLars(alpha=0.01)
- >>> print(reg.coef_)
- [ 0. -0.955...]
- """
- _parameter_constraints: dict = {
- **Lars._parameter_constraints,
- "alpha": [Interval(Real, 0, None, closed="left")],
- "max_iter": [Interval(Integral, 0, None, closed="left")],
- "positive": ["boolean"],
- }
- _parameter_constraints.pop("n_nonzero_coefs")
- method = "lasso"
- def __init__(
- self,
- alpha=1.0,
- *,
- fit_intercept=True,
- verbose=False,
- normalize="deprecated",
- precompute="auto",
- max_iter=500,
- eps=np.finfo(float).eps,
- copy_X=True,
- fit_path=True,
- positive=False,
- jitter=None,
- random_state=None,
- ):
- self.alpha = alpha
- self.fit_intercept = fit_intercept
- self.max_iter = max_iter
- self.verbose = verbose
- self.normalize = normalize
- self.positive = positive
- self.precompute = precompute
- self.copy_X = copy_X
- self.eps = eps
- self.fit_path = fit_path
- self.jitter = jitter
- self.random_state = random_state
- ###############################################################################
- # Cross-validated estimator classes
- def _check_copy_and_writeable(array, copy=False):
- if copy or not array.flags.writeable:
- return array.copy()
- return array
- def _lars_path_residues(
- X_train,
- y_train,
- X_test,
- y_test,
- Gram=None,
- copy=True,
- method="lars",
- verbose=False,
- fit_intercept=True,
- normalize=False,
- max_iter=500,
- eps=np.finfo(float).eps,
- positive=False,
- ):
- """Compute the residues on left-out data for a full LARS path
- Parameters
- -----------
- X_train : array-like of shape (n_samples, n_features)
- The data to fit the LARS on
- y_train : array-like of shape (n_samples,)
- The target variable to fit LARS on
- X_test : array-like of shape (n_samples, n_features)
- The data to compute the residues on
- y_test : array-like of shape (n_samples,)
- The target variable to compute the residues on
- Gram : None, 'auto' or array-like of shape (n_features, n_features), \
- default=None
- Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram
- matrix is precomputed from the given X, if there are more samples
- than features
- copy : bool, default=True
- Whether X_train, X_test, y_train and y_test should be copied;
- if False, they may be overwritten.
- method : {'lar' , 'lasso'}, default='lar'
- Specifies the returned model. Select ``'lar'`` for Least Angle
- Regression, ``'lasso'`` for the Lasso.
- verbose : bool or int, default=False
- Sets the amount of verbosity
- fit_intercept : bool, default=True
- whether to calculate the intercept for this model. If set
- to false, no intercept will be used in calculations
- (i.e. data is expected to be centered).
- positive : bool, default=False
- Restrict coefficients to be >= 0. Be aware that you might want to
- remove fit_intercept which is set True by default.
- See reservations for using this option in combination with method
- 'lasso' for expected small values of alpha in the doc of LassoLarsCV
- and LassoLarsIC.
- normalize : bool, default=False
- This parameter is ignored when ``fit_intercept`` is set to False.
- If True, the regressors X will be normalized before regression by
- subtracting the mean and dividing by the l2-norm.
- If you wish to standardize, please use
- :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
- on an estimator with ``normalize=False``.
- .. versionchanged:: 1.2
- default changed from True to False in 1.2.
- .. deprecated:: 1.2
- ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
- max_iter : int, default=500
- Maximum number of iterations to perform.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the ``tol`` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- Returns
- --------
- alphas : array-like of shape (n_alphas,)
- Maximum of covariances (in absolute value) at each iteration.
- ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever
- is smaller.
- active : list
- Indices of active variables at the end of the path.
- coefs : array-like of shape (n_features, n_alphas)
- Coefficients along the path
- residues : array-like of shape (n_alphas, n_samples)
- Residues of the prediction on the test data
- """
- X_train = _check_copy_and_writeable(X_train, copy)
- y_train = _check_copy_and_writeable(y_train, copy)
- X_test = _check_copy_and_writeable(X_test, copy)
- y_test = _check_copy_and_writeable(y_test, copy)
- if fit_intercept:
- X_mean = X_train.mean(axis=0)
- X_train -= X_mean
- X_test -= X_mean
- y_mean = y_train.mean(axis=0)
- y_train = as_float_array(y_train, copy=False)
- y_train -= y_mean
- y_test = as_float_array(y_test, copy=False)
- y_test -= y_mean
- if normalize:
- norms = np.sqrt(np.sum(X_train**2, axis=0))
- nonzeros = np.flatnonzero(norms)
- X_train[:, nonzeros] /= norms[nonzeros]
- alphas, active, coefs = lars_path(
- X_train,
- y_train,
- Gram=Gram,
- copy_X=False,
- copy_Gram=False,
- method=method,
- verbose=max(0, verbose - 1),
- max_iter=max_iter,
- eps=eps,
- positive=positive,
- )
- if normalize:
- coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]
- residues = np.dot(X_test, coefs) - y_test[:, np.newaxis]
- return alphas, active, coefs, residues.T
- class LarsCV(Lars):
- """Cross-validated Least Angle Regression model.
- See glossary entry for :term:`cross-validation estimator`.
- Read more in the :ref:`User Guide <least_angle_regression>`.
- Parameters
- ----------
- fit_intercept : bool, default=True
- Whether to calculate the intercept for this model. If set
- to false, no intercept will be used in calculations
- (i.e. data is expected to be centered).
- verbose : bool or int, default=False
- Sets the verbosity amount.
- max_iter : int, default=500
- Maximum number of iterations to perform.
- normalize : bool, default=False
- This parameter is ignored when ``fit_intercept`` is set to False.
- If True, the regressors X will be normalized before regression by
- subtracting the mean and dividing by the l2-norm.
- If you wish to standardize, please use
- :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
- on an estimator with ``normalize=False``.
- .. versionchanged:: 1.2
- default changed from True to False in 1.2.
- .. deprecated:: 1.2
- ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
- precompute : bool, 'auto' or array-like , default='auto'
- Whether to use a precomputed Gram matrix to speed up
- calculations. If set to ``'auto'`` let us decide. The Gram matrix
- cannot be passed as argument since we will use only subsets of X.
- cv : int, cross-validation generator or an iterable, default=None
- Determines the cross-validation splitting strategy.
- Possible inputs for cv are:
- - None, to use the default 5-fold cross-validation,
- - integer, to specify the number of folds.
- - :term:`CV splitter`,
- - An iterable yielding (train, test) splits as arrays of indices.
- For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
- Refer :ref:`User Guide <cross_validation>` for the various
- cross-validation strategies that can be used here.
- .. versionchanged:: 0.22
- ``cv`` default value if None changed from 3-fold to 5-fold.
- max_n_alphas : int, default=1000
- The maximum number of points on the path used to compute the
- residuals in the cross-validation.
- n_jobs : int or None, default=None
- Number of CPUs to use during the cross validation.
- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
- ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
- for more details.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the ``tol`` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- copy_X : bool, default=True
- If ``True``, X will be copied; else, it may be overwritten.
- Attributes
- ----------
- active_ : list of length n_alphas or list of such lists
- Indices of active variables at the end of the path.
- If this is a list of lists, the outer list length is `n_targets`.
- coef_ : array-like of shape (n_features,)
- parameter vector (w in the formulation formula)
- intercept_ : float
- independent term in decision function
- coef_path_ : array-like of shape (n_features, n_alphas)
- the varying values of the coefficients along the path
- alpha_ : float
- the estimated regularization parameter alpha
- alphas_ : array-like of shape (n_alphas,)
- the different values of alpha along the path
- cv_alphas_ : array-like of shape (n_cv_alphas,)
- all the values of alpha along the path for the different folds
- mse_path_ : array-like of shape (n_folds, n_cv_alphas)
- the mean square error on left-out for each fold along the path
- (alpha values given by ``cv_alphas``)
- n_iter_ : array-like or int
- the number of iterations run by Lars with the optimal alpha.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- lars_path : Compute Least Angle Regression or Lasso
- path using LARS algorithm.
- lasso_path : Compute Lasso path with coordinate descent.
- Lasso : Linear Model trained with L1 prior as
- regularizer (aka the Lasso).
- LassoCV : Lasso linear model with iterative fitting
- along a regularization path.
- LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
- LassoLarsIC : Lasso model fit with Lars using BIC
- or AIC for model selection.
- sklearn.decomposition.sparse_encode : Sparse coding.
- Notes
- -----
- In `fit`, once the best parameter `alpha` is found through
- cross-validation, the model is fit again using the entire training set.
- Examples
- --------
- >>> from sklearn.linear_model import LarsCV
- >>> from sklearn.datasets import make_regression
- >>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
- >>> reg = LarsCV(cv=5).fit(X, y)
- >>> reg.score(X, y)
- 0.9996...
- >>> reg.alpha_
- 0.2961...
- >>> reg.predict(X[:1,])
- array([154.3996...])
- """
- _parameter_constraints: dict = {
- **Lars._parameter_constraints,
- "max_iter": [Interval(Integral, 0, None, closed="left")],
- "cv": ["cv_object"],
- "max_n_alphas": [Interval(Integral, 1, None, closed="left")],
- "n_jobs": [Integral, None],
- }
- for parameter in ["n_nonzero_coefs", "jitter", "fit_path", "random_state"]:
- _parameter_constraints.pop(parameter)
- method = "lar"
- def __init__(
- self,
- *,
- fit_intercept=True,
- verbose=False,
- max_iter=500,
- normalize="deprecated",
- precompute="auto",
- cv=None,
- max_n_alphas=1000,
- n_jobs=None,
- eps=np.finfo(float).eps,
- copy_X=True,
- ):
- self.max_iter = max_iter
- self.cv = cv
- self.max_n_alphas = max_n_alphas
- self.n_jobs = n_jobs
- super().__init__(
- fit_intercept=fit_intercept,
- verbose=verbose,
- normalize=normalize,
- precompute=precompute,
- n_nonzero_coefs=500,
- eps=eps,
- copy_X=copy_X,
- fit_path=True,
- )
- def _more_tags(self):
- return {"multioutput": False}
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y):
- """Fit the model using X, y as training data.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,)
- Target values.
- Returns
- -------
- self : object
- Returns an instance of self.
- """
- _normalize = _deprecate_normalize(
- self.normalize, estimator_name=self.__class__.__name__
- )
- X, y = self._validate_data(X, y, y_numeric=True)
- X = as_float_array(X, copy=self.copy_X)
- y = as_float_array(y, copy=self.copy_X)
- # init cross-validation generator
- cv = check_cv(self.cv, classifier=False)
- # As we use cross-validation, the Gram matrix is not precomputed here
- Gram = self.precompute
- if hasattr(Gram, "__array__"):
- warnings.warn(
- 'Parameter "precompute" cannot be an array in '
- '%s. Automatically switch to "auto" instead.'
- % self.__class__.__name__
- )
- Gram = "auto"
- cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
- delayed(_lars_path_residues)(
- X[train],
- y[train],
- X[test],
- y[test],
- Gram=Gram,
- copy=False,
- method=self.method,
- verbose=max(0, self.verbose - 1),
- normalize=_normalize,
- fit_intercept=self.fit_intercept,
- max_iter=self.max_iter,
- eps=self.eps,
- positive=self.positive,
- )
- for train, test in cv.split(X, y)
- )
- all_alphas = np.concatenate(list(zip(*cv_paths))[0])
- # Unique also sorts
- all_alphas = np.unique(all_alphas)
- # Take at most max_n_alphas values
- stride = int(max(1, int(len(all_alphas) / float(self.max_n_alphas))))
- all_alphas = all_alphas[::stride]
- mse_path = np.empty((len(all_alphas), len(cv_paths)))
- for index, (alphas, _, _, residues) in enumerate(cv_paths):
- alphas = alphas[::-1]
- residues = residues[::-1]
- if alphas[0] != 0:
- alphas = np.r_[0, alphas]
- residues = np.r_[residues[0, np.newaxis], residues]
- if alphas[-1] != all_alphas[-1]:
- alphas = np.r_[alphas, all_alphas[-1]]
- residues = np.r_[residues, residues[-1, np.newaxis]]
- this_residues = interpolate.interp1d(alphas, residues, axis=0)(all_alphas)
- this_residues **= 2
- mse_path[:, index] = np.mean(this_residues, axis=-1)
- mask = np.all(np.isfinite(mse_path), axis=-1)
- all_alphas = all_alphas[mask]
- mse_path = mse_path[mask]
- # Select the alpha that minimizes left-out error
- i_best_alpha = np.argmin(mse_path.mean(axis=-1))
- best_alpha = all_alphas[i_best_alpha]
- # Store our parameters
- self.alpha_ = best_alpha
- self.cv_alphas_ = all_alphas
- self.mse_path_ = mse_path
- # Now compute the full model using best_alpha
- # it will call a lasso internally when self if LassoLarsCV
- # as self.method == 'lasso'
- self._fit(
- X,
- y,
- max_iter=self.max_iter,
- alpha=best_alpha,
- Xy=None,
- fit_path=True,
- normalize=_normalize,
- )
- return self
- class LassoLarsCV(LarsCV):
- """Cross-validated Lasso, using the LARS algorithm.
- See glossary entry for :term:`cross-validation estimator`.
- The optimization objective for Lasso is::
- (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
- Read more in the :ref:`User Guide <least_angle_regression>`.
- Parameters
- ----------
- fit_intercept : bool, default=True
- Whether to calculate the intercept for this model. If set
- to false, no intercept will be used in calculations
- (i.e. data is expected to be centered).
- verbose : bool or int, default=False
- Sets the verbosity amount.
- max_iter : int, default=500
- Maximum number of iterations to perform.
- normalize : bool, default=False
- This parameter is ignored when ``fit_intercept`` is set to False.
- If True, the regressors X will be normalized before regression by
- subtracting the mean and dividing by the l2-norm.
- If you wish to standardize, please use
- :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
- on an estimator with ``normalize=False``.
- .. versionchanged:: 1.2
- default changed from True to False in 1.2.
- .. deprecated:: 1.2
- ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
- precompute : bool or 'auto' , default='auto'
- Whether to use a precomputed Gram matrix to speed up
- calculations. If set to ``'auto'`` let us decide. The Gram matrix
- cannot be passed as argument since we will use only subsets of X.
- cv : int, cross-validation generator or an iterable, default=None
- Determines the cross-validation splitting strategy.
- Possible inputs for cv are:
- - None, to use the default 5-fold cross-validation,
- - integer, to specify the number of folds.
- - :term:`CV splitter`,
- - An iterable yielding (train, test) splits as arrays of indices.
- For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
- Refer :ref:`User Guide <cross_validation>` for the various
- cross-validation strategies that can be used here.
- .. versionchanged:: 0.22
- ``cv`` default value if None changed from 3-fold to 5-fold.
- max_n_alphas : int, default=1000
- The maximum number of points on the path used to compute the
- residuals in the cross-validation.
- n_jobs : int or None, default=None
- Number of CPUs to use during the cross validation.
- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
- ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
- for more details.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the ``tol`` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- copy_X : bool, default=True
- If True, X will be copied; else, it may be overwritten.
- positive : bool, default=False
- Restrict coefficients to be >= 0. Be aware that you might want to
- remove fit_intercept which is set True by default.
- Under the positive restriction the model coefficients do not converge
- to the ordinary-least-squares solution for small values of alpha.
- Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
- 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
- algorithm are typically in congruence with the solution of the
- coordinate descent Lasso estimator.
- As a consequence using LassoLarsCV only makes sense for problems where
- a sparse solution is expected and/or reached.
- Attributes
- ----------
- coef_ : array-like of shape (n_features,)
- parameter vector (w in the formulation formula)
- intercept_ : float
- independent term in decision function.
- coef_path_ : array-like of shape (n_features, n_alphas)
- the varying values of the coefficients along the path
- alpha_ : float
- the estimated regularization parameter alpha
- alphas_ : array-like of shape (n_alphas,)
- the different values of alpha along the path
- cv_alphas_ : array-like of shape (n_cv_alphas,)
- all the values of alpha along the path for the different folds
- mse_path_ : array-like of shape (n_folds, n_cv_alphas)
- the mean square error on left-out for each fold along the path
- (alpha values given by ``cv_alphas``)
- n_iter_ : array-like or int
- the number of iterations run by Lars with the optimal alpha.
- active_ : list of int
- Indices of active variables at the end of the path.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- lars_path : Compute Least Angle Regression or Lasso
- path using LARS algorithm.
- lasso_path : Compute Lasso path with coordinate descent.
- Lasso : Linear Model trained with L1 prior as
- regularizer (aka the Lasso).
- LassoCV : Lasso linear model with iterative fitting
- along a regularization path.
- LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
- LassoLarsIC : Lasso model fit with Lars using BIC
- or AIC for model selection.
- sklearn.decomposition.sparse_encode : Sparse coding.
- Notes
- -----
- The object solves the same problem as the
- :class:`~sklearn.linear_model.LassoCV` object. However, unlike the
- :class:`~sklearn.linear_model.LassoCV`, it find the relevant alphas values
- by itself. In general, because of this property, it will be more stable.
- However, it is more fragile to heavily multicollinear datasets.
- It is more efficient than the :class:`~sklearn.linear_model.LassoCV` if
- only a small number of features are selected compared to the total number,
- for instance if there are very few samples compared to the number of
- features.
- In `fit`, once the best parameter `alpha` is found through
- cross-validation, the model is fit again using the entire training set.
- Examples
- --------
- >>> from sklearn.linear_model import LassoLarsCV
- >>> from sklearn.datasets import make_regression
- >>> X, y = make_regression(noise=4.0, random_state=0)
- >>> reg = LassoLarsCV(cv=5).fit(X, y)
- >>> reg.score(X, y)
- 0.9993...
- >>> reg.alpha_
- 0.3972...
- >>> reg.predict(X[:1,])
- array([-78.4831...])
- """
- _parameter_constraints = {
- **LarsCV._parameter_constraints,
- "positive": ["boolean"],
- }
- method = "lasso"
- def __init__(
- self,
- *,
- fit_intercept=True,
- verbose=False,
- max_iter=500,
- normalize="deprecated",
- precompute="auto",
- cv=None,
- max_n_alphas=1000,
- n_jobs=None,
- eps=np.finfo(float).eps,
- copy_X=True,
- positive=False,
- ):
- self.fit_intercept = fit_intercept
- self.verbose = verbose
- self.max_iter = max_iter
- self.normalize = normalize
- self.precompute = precompute
- self.cv = cv
- self.max_n_alphas = max_n_alphas
- self.n_jobs = n_jobs
- self.eps = eps
- self.copy_X = copy_X
- self.positive = positive
- # XXX : we don't use super().__init__
- # to avoid setting n_nonzero_coefs
- class LassoLarsIC(LassoLars):
- """Lasso model fit with Lars using BIC or AIC for model selection.
- The optimization objective for Lasso is::
- (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1
- AIC is the Akaike information criterion [2]_ and BIC is the Bayes
- Information criterion [3]_. Such criteria are useful to select the value
- of the regularization parameter by making a trade-off between the
- goodness of fit and the complexity of the model. A good model should
- explain well the data while being simple.
- Read more in the :ref:`User Guide <lasso_lars_ic>`.
- Parameters
- ----------
- criterion : {'aic', 'bic'}, default='aic'
- The type of criterion to use.
- fit_intercept : bool, default=True
- Whether to calculate the intercept for this model. If set
- to false, no intercept will be used in calculations
- (i.e. data is expected to be centered).
- verbose : bool or int, default=False
- Sets the verbosity amount.
- normalize : bool, default=False
- This parameter is ignored when ``fit_intercept`` is set to False.
- If True, the regressors X will be normalized before regression by
- subtracting the mean and dividing by the l2-norm.
- If you wish to standardize, please use
- :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
- on an estimator with ``normalize=False``.
- .. versionchanged:: 1.2
- default changed from True to False in 1.2.
- .. deprecated:: 1.2
- ``normalize`` was deprecated in version 1.2 and will be removed in 1.4.
- precompute : bool, 'auto' or array-like, default='auto'
- Whether to use a precomputed Gram matrix to speed up
- calculations. If set to ``'auto'`` let us decide. The Gram
- matrix can also be passed as argument.
- max_iter : int, default=500
- Maximum number of iterations to perform. Can be used for
- early stopping.
- eps : float, default=np.finfo(float).eps
- The machine-precision regularization in the computation of the
- Cholesky diagonal factors. Increase this for very ill-conditioned
- systems. Unlike the ``tol`` parameter in some iterative
- optimization-based algorithms, this parameter does not control
- the tolerance of the optimization.
- copy_X : bool, default=True
- If True, X will be copied; else, it may be overwritten.
- positive : bool, default=False
- Restrict coefficients to be >= 0. Be aware that you might want to
- remove fit_intercept which is set True by default.
- Under the positive restriction the model coefficients do not converge
- to the ordinary-least-squares solution for small values of alpha.
- Only coefficients up to the smallest alpha value (``alphas_[alphas_ >
- 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso
- algorithm are typically in congruence with the solution of the
- coordinate descent Lasso estimator.
- As a consequence using LassoLarsIC only makes sense for problems where
- a sparse solution is expected and/or reached.
- noise_variance : float, default=None
- The estimated noise variance of the data. If `None`, an unbiased
- estimate is computed by an OLS model. However, it is only possible
- in the case where `n_samples > n_features + fit_intercept`.
- .. versionadded:: 1.1
- Attributes
- ----------
- coef_ : array-like of shape (n_features,)
- parameter vector (w in the formulation formula)
- intercept_ : float
- independent term in decision function.
- alpha_ : float
- the alpha parameter chosen by the information criterion
- alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays
- Maximum of covariances (in absolute value) at each iteration.
- ``n_alphas`` is either ``max_iter``, ``n_features`` or the
- number of nodes in the path with ``alpha >= alpha_min``, whichever
- is smaller. If a list, it will be of length `n_targets`.
- n_iter_ : int
- number of iterations run by lars_path to find the grid of
- alphas.
- criterion_ : array-like of shape (n_alphas,)
- The value of the information criteria ('aic', 'bic') across all
- alphas. The alpha which has the smallest information criterion is
- chosen, as specified in [1]_.
- noise_variance_ : float
- The estimated noise variance from the data used to compute the
- criterion.
- .. versionadded:: 1.1
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- lars_path : Compute Least Angle Regression or Lasso
- path using LARS algorithm.
- lasso_path : Compute Lasso path with coordinate descent.
- Lasso : Linear Model trained with L1 prior as
- regularizer (aka the Lasso).
- LassoCV : Lasso linear model with iterative fitting
- along a regularization path.
- LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
- LassoLarsCV: Cross-validated Lasso, using the LARS algorithm.
- sklearn.decomposition.sparse_encode : Sparse coding.
- Notes
- -----
- The number of degrees of freedom is computed as in [1]_.
- To have more details regarding the mathematical formulation of the
- AIC and BIC criteria, please refer to :ref:`User Guide <lasso_lars_ic>`.
- References
- ----------
- .. [1] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani.
- "On the degrees of freedom of the lasso."
- The Annals of Statistics 35.5 (2007): 2173-2192.
- <0712.0881>`
- .. [2] `Wikipedia entry on the Akaike information criterion
- <https://en.wikipedia.org/wiki/Akaike_information_criterion>`_
- .. [3] `Wikipedia entry on the Bayesian information criterion
- <https://en.wikipedia.org/wiki/Bayesian_information_criterion>`_
- Examples
- --------
- >>> from sklearn import linear_model
- >>> reg = linear_model.LassoLarsIC(criterion='bic')
- >>> X = [[-2, 2], [-1, 1], [0, 0], [1, 1], [2, 2]]
- >>> y = [-2.2222, -1.1111, 0, -1.1111, -2.2222]
- >>> reg.fit(X, y)
- LassoLarsIC(criterion='bic')
- >>> print(reg.coef_)
- [ 0. -1.11...]
- """
- _parameter_constraints: dict = {
- **LassoLars._parameter_constraints,
- "criterion": [StrOptions({"aic", "bic"})],
- "noise_variance": [Interval(Real, 0, None, closed="left"), None],
- }
- for parameter in ["jitter", "fit_path", "alpha", "random_state"]:
- _parameter_constraints.pop(parameter)
- def __init__(
- self,
- criterion="aic",
- *,
- fit_intercept=True,
- verbose=False,
- normalize="deprecated",
- precompute="auto",
- max_iter=500,
- eps=np.finfo(float).eps,
- copy_X=True,
- positive=False,
- noise_variance=None,
- ):
- self.criterion = criterion
- self.fit_intercept = fit_intercept
- self.positive = positive
- self.max_iter = max_iter
- self.verbose = verbose
- self.normalize = normalize
- self.copy_X = copy_X
- self.precompute = precompute
- self.eps = eps
- self.fit_path = True
- self.noise_variance = noise_variance
- def _more_tags(self):
- return {"multioutput": False}
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y, copy_X=None):
- """Fit the model using X, y as training data.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,)
- Target values. Will be cast to X's dtype if necessary.
- copy_X : bool, default=None
- If provided, this parameter will override the choice
- of copy_X made at instance creation.
- If ``True``, X will be copied; else, it may be overwritten.
- Returns
- -------
- self : object
- Returns an instance of self.
- """
- _normalize = _deprecate_normalize(
- self.normalize, estimator_name=self.__class__.__name__
- )
- if copy_X is None:
- copy_X = self.copy_X
- X, y = self._validate_data(X, y, y_numeric=True)
- X, y, Xmean, ymean, Xstd = _preprocess_data(
- X, y, self.fit_intercept, _normalize, copy_X
- )
- Gram = self.precompute
- alphas_, _, coef_path_, self.n_iter_ = lars_path(
- X,
- y,
- Gram=Gram,
- copy_X=copy_X,
- copy_Gram=True,
- alpha_min=0.0,
- method="lasso",
- verbose=self.verbose,
- max_iter=self.max_iter,
- eps=self.eps,
- return_n_iter=True,
- positive=self.positive,
- )
- n_samples = X.shape[0]
- if self.criterion == "aic":
- criterion_factor = 2
- elif self.criterion == "bic":
- criterion_factor = log(n_samples)
- else:
- raise ValueError(
- f"criterion should be either bic or aic, got {self.criterion!r}"
- )
- residuals = y[:, np.newaxis] - np.dot(X, coef_path_)
- residuals_sum_squares = np.sum(residuals**2, axis=0)
- degrees_of_freedom = np.zeros(coef_path_.shape[1], dtype=int)
- for k, coef in enumerate(coef_path_.T):
- mask = np.abs(coef) > np.finfo(coef.dtype).eps
- if not np.any(mask):
- continue
- # get the number of degrees of freedom equal to:
- # Xc = X[:, mask]
- # Trace(Xc * inv(Xc.T, Xc) * Xc.T) ie the number of non-zero coefs
- degrees_of_freedom[k] = np.sum(mask)
- self.alphas_ = alphas_
- if self.noise_variance is None:
- self.noise_variance_ = self._estimate_noise_variance(
- X, y, positive=self.positive
- )
- else:
- self.noise_variance_ = self.noise_variance
- self.criterion_ = (
- n_samples * np.log(2 * np.pi * self.noise_variance_)
- + residuals_sum_squares / self.noise_variance_
- + criterion_factor * degrees_of_freedom
- )
- n_best = np.argmin(self.criterion_)
- self.alpha_ = alphas_[n_best]
- self.coef_ = coef_path_[:, n_best]
- self._set_intercept(Xmean, ymean, Xstd)
- return self
- def _estimate_noise_variance(self, X, y, positive):
- """Compute an estimate of the variance with an OLS model.
- Parameters
- ----------
- X : ndarray of shape (n_samples, n_features)
- Data to be fitted by the OLS model. We expect the data to be
- centered.
- y : ndarray of shape (n_samples,)
- Associated target.
- positive : bool, default=False
- Restrict coefficients to be >= 0. This should be inline with
- the `positive` parameter from `LassoLarsIC`.
- Returns
- -------
- noise_variance : float
- An estimator of the noise variance of an OLS model.
- """
- if X.shape[0] <= X.shape[1] + self.fit_intercept:
- raise ValueError(
- f"You are using {self.__class__.__name__} in the case where the number "
- "of samples is smaller than the number of features. In this setting, "
- "getting a good estimate for the variance of the noise is not "
- "possible. Provide an estimate of the noise variance in the "
- "constructor."
- )
- # X and y are already centered and we don't need to fit with an intercept
- ols_model = LinearRegression(positive=positive, fit_intercept=False)
- y_pred = ols_model.fit(X, y).predict(X)
- return np.sum((y - y_pred) ** 2) / (
- X.shape[0] - X.shape[1] - self.fit_intercept
- )
|