| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920 |
- """
- Generalized Linear Models.
- """
- # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
- # Fabian Pedregosa <fabian.pedregosa@inria.fr>
- # Olivier Grisel <olivier.grisel@ensta.org>
- # Vincent Michel <vincent.michel@inria.fr>
- # Peter Prettenhofer <peter.prettenhofer@gmail.com>
- # Mathieu Blondel <mathieu@mblondel.org>
- # Lars Buitinck
- # Maryan Morel <maryan.morel@polytechnique.edu>
- # Giorgio Patrini <giorgio.patrini@anu.edu.au>
- # Maria Telenczuk <https://github.com/maikia>
- # License: BSD 3 clause
- import numbers
- import warnings
- from abc import ABCMeta, abstractmethod
- from numbers import Integral
- import numpy as np
- import scipy.sparse as sp
- from scipy import linalg, optimize, sparse
- from scipy.sparse.linalg import lsqr
- from scipy.special import expit
- from ..base import (
- BaseEstimator,
- ClassifierMixin,
- MultiOutputMixin,
- RegressorMixin,
- _fit_context,
- )
- from ..preprocessing._data import _is_constant_feature
- from ..utils import check_array, check_random_state
- from ..utils._array_api import get_namespace
- from ..utils._seq_dataset import (
- ArrayDataset32,
- ArrayDataset64,
- CSRDataset32,
- CSRDataset64,
- )
- from ..utils.extmath import _incremental_mean_and_var, safe_sparse_dot
- from ..utils.parallel import Parallel, delayed
- from ..utils.sparsefuncs import inplace_column_scale, mean_variance_axis
- from ..utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted
- # TODO: bayesian_ridge_regression and bayesian_regression_ard
- # should be squashed into its respective objects.
- SPARSE_INTERCEPT_DECAY = 0.01
- # For sparse data intercept updates are scaled by this decay factor to avoid
- # intercept oscillation.
- # TODO(1.4): remove
- # parameter 'normalize' should be removed from linear models
- def _deprecate_normalize(normalize, estimator_name):
- """Normalize is to be deprecated from linear models and a use of
- a pipeline with a StandardScaler is to be recommended instead.
- Here the appropriate message is selected to be displayed to the user
- depending on the default normalize value (as it varies between the linear
- models and normalize value selected by the user).
- Parameters
- ----------
- normalize : bool,
- normalize value passed by the user
- estimator_name : str
- name of the linear estimator which calls this function.
- The name will be used for writing the deprecation warnings
- Returns
- -------
- normalize : bool,
- normalize value which should further be used by the estimator at this
- stage of the depreciation process
- Notes
- -----
- This function should be completely removed in 1.4.
- """
- if normalize not in [True, False, "deprecated"]:
- raise ValueError(
- "Leave 'normalize' to its default value or set it to True or False"
- )
- if normalize == "deprecated":
- _normalize = False
- else:
- _normalize = normalize
- pipeline_msg = (
- "If you wish to scale the data, use Pipeline with a StandardScaler "
- "in a preprocessing stage. To reproduce the previous behavior:\n\n"
- "from sklearn.pipeline import make_pipeline\n\n"
- "model = make_pipeline(StandardScaler(with_mean=False), "
- f"{estimator_name}())\n\n"
- "If you wish to pass a sample_weight parameter, you need to pass it "
- "as a fit parameter to each step of the pipeline as follows:\n\n"
- "kwargs = {s[0] + '__sample_weight': sample_weight for s "
- "in model.steps}\n"
- "model.fit(X, y, **kwargs)\n\n"
- )
- alpha_msg = ""
- if "LassoLars" in estimator_name:
- alpha_msg = "Set parameter alpha to: original_alpha * np.sqrt(n_samples). "
- if normalize != "deprecated" and normalize:
- warnings.warn(
- "'normalize' was deprecated in version 1.2 and will be removed in 1.4.\n"
- + pipeline_msg
- + alpha_msg,
- FutureWarning,
- )
- elif not normalize:
- warnings.warn(
- (
- "'normalize' was deprecated in version 1.2 and will be "
- "removed in 1.4. "
- "Please leave the normalize parameter to its default value to "
- "silence this warning. The default behavior of this estimator "
- "is to not do any normalization. If normalization is needed "
- "please use sklearn.preprocessing.StandardScaler instead."
- ),
- FutureWarning,
- )
- return _normalize
- def make_dataset(X, y, sample_weight, random_state=None):
- """Create ``Dataset`` abstraction for sparse and dense inputs.
- This also returns the ``intercept_decay`` which is different
- for sparse datasets.
- Parameters
- ----------
- X : array-like, shape (n_samples, n_features)
- Training data
- y : array-like, shape (n_samples, )
- Target values.
- sample_weight : numpy array of shape (n_samples,)
- The weight of each sample
- random_state : int, RandomState instance or None (default)
- Determines random number generation for dataset random sampling. It is not
- used for dataset shuffling.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- Returns
- -------
- dataset
- The ``Dataset`` abstraction
- intercept_decay
- The intercept decay
- """
- rng = check_random_state(random_state)
- # seed should never be 0 in SequentialDataset64
- seed = rng.randint(1, np.iinfo(np.int32).max)
- if X.dtype == np.float32:
- CSRData = CSRDataset32
- ArrayData = ArrayDataset32
- else:
- CSRData = CSRDataset64
- ArrayData = ArrayDataset64
- if sp.issparse(X):
- dataset = CSRData(X.data, X.indptr, X.indices, y, sample_weight, seed=seed)
- intercept_decay = SPARSE_INTERCEPT_DECAY
- else:
- X = np.ascontiguousarray(X)
- dataset = ArrayData(X, y, sample_weight, seed=seed)
- intercept_decay = 1.0
- return dataset, intercept_decay
- def _preprocess_data(
- X,
- y,
- fit_intercept,
- normalize=False,
- copy=True,
- copy_y=True,
- sample_weight=None,
- check_input=True,
- ):
- """Center and scale data.
- Centers data to have mean zero along axis 0. If fit_intercept=False or if
- the X is a sparse matrix, no centering is done, but normalization can still
- be applied. The function returns the statistics necessary to reconstruct
- the input data, which are X_offset, y_offset, X_scale, such that the output
- X = (X - X_offset) / X_scale
- X_scale is the L2 norm of X - X_offset. If sample_weight is not None,
- then the weighted mean of X and y is zero, and not the mean itself. If
- fit_intercept=True, the mean, eventually weighted, is returned, independently
- of whether X was centered (option used for optimization with sparse data in
- coordinate_descend).
- This is here because nearly all linear models will want their data to be
- centered. This function also systematically makes y consistent with X.dtype
- Returns
- -------
- X_out : {ndarray, sparse matrix} of shape (n_samples, n_features)
- If copy=True a copy of the input X is triggered, otherwise operations are
- inplace.
- If input X is dense, then X_out is centered.
- If normalize is True, then X_out is rescaled (dense and sparse case)
- y_out : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)
- Centered version of y. Likely performed inplace on input y.
- X_offset : ndarray of shape (n_features,)
- The mean per column of input X.
- y_offset : float or ndarray of shape (n_features,)
- X_scale : ndarray of shape (n_features,)
- The standard deviation per column of input X.
- """
- if isinstance(sample_weight, numbers.Number):
- sample_weight = None
- if sample_weight is not None:
- sample_weight = np.asarray(sample_weight)
- if check_input:
- X = check_array(X, copy=copy, accept_sparse=["csr", "csc"], dtype=FLOAT_DTYPES)
- y = check_array(y, dtype=X.dtype, copy=copy_y, ensure_2d=False)
- else:
- y = y.astype(X.dtype, copy=copy_y)
- if copy:
- if sp.issparse(X):
- X = X.copy()
- else:
- X = X.copy(order="K")
- if fit_intercept:
- if sp.issparse(X):
- X_offset, X_var = mean_variance_axis(X, axis=0, weights=sample_weight)
- else:
- if normalize:
- X_offset, X_var, _ = _incremental_mean_and_var(
- X,
- last_mean=0.0,
- last_variance=0.0,
- last_sample_count=0.0,
- sample_weight=sample_weight,
- )
- else:
- X_offset = np.average(X, axis=0, weights=sample_weight)
- X_offset = X_offset.astype(X.dtype, copy=False)
- X -= X_offset
- if normalize:
- X_var = X_var.astype(X.dtype, copy=False)
- # Detect constant features on the computed variance, before taking
- # the np.sqrt. Otherwise constant features cannot be detected with
- # sample weights.
- constant_mask = _is_constant_feature(X_var, X_offset, X.shape[0])
- if sample_weight is None:
- X_var *= X.shape[0]
- else:
- X_var *= sample_weight.sum()
- X_scale = np.sqrt(X_var, out=X_var)
- X_scale[constant_mask] = 1.0
- if sp.issparse(X):
- inplace_column_scale(X, 1.0 / X_scale)
- else:
- X /= X_scale
- else:
- X_scale = np.ones(X.shape[1], dtype=X.dtype)
- y_offset = np.average(y, axis=0, weights=sample_weight)
- y -= y_offset
- else:
- X_offset = np.zeros(X.shape[1], dtype=X.dtype)
- X_scale = np.ones(X.shape[1], dtype=X.dtype)
- if y.ndim == 1:
- y_offset = X.dtype.type(0)
- else:
- y_offset = np.zeros(y.shape[1], dtype=X.dtype)
- return X, y, X_offset, y_offset, X_scale
- # TODO: _rescale_data should be factored into _preprocess_data.
- # Currently, the fact that sag implements its own way to deal with
- # sample_weight makes the refactoring tricky.
- def _rescale_data(X, y, sample_weight, inplace=False):
- """Rescale data sample-wise by square root of sample_weight.
- For many linear models, this enables easy support for sample_weight because
- (y - X w)' S (y - X w)
- with S = diag(sample_weight) becomes
- ||y_rescaled - X_rescaled w||_2^2
- when setting
- y_rescaled = sqrt(S) y
- X_rescaled = sqrt(S) X
- Returns
- -------
- X_rescaled : {array-like, sparse matrix}
- y_rescaled : {array-like, sparse matrix}
- """
- # Assume that _validate_data and _check_sample_weight have been called by
- # the caller.
- n_samples = X.shape[0]
- sample_weight_sqrt = np.sqrt(sample_weight)
- if sp.issparse(X) or sp.issparse(y):
- sw_matrix = sparse.dia_matrix(
- (sample_weight_sqrt, 0), shape=(n_samples, n_samples)
- )
- if sp.issparse(X):
- X = safe_sparse_dot(sw_matrix, X)
- else:
- if inplace:
- X *= sample_weight_sqrt[:, np.newaxis]
- else:
- X = X * sample_weight_sqrt[:, np.newaxis]
- if sp.issparse(y):
- y = safe_sparse_dot(sw_matrix, y)
- else:
- if inplace:
- if y.ndim == 1:
- y *= sample_weight_sqrt
- else:
- y *= sample_weight_sqrt[:, np.newaxis]
- else:
- if y.ndim == 1:
- y = y * sample_weight_sqrt
- else:
- y = y * sample_weight_sqrt[:, np.newaxis]
- return X, y, sample_weight_sqrt
- class LinearModel(BaseEstimator, metaclass=ABCMeta):
- """Base class for Linear Models"""
- @abstractmethod
- def fit(self, X, y):
- """Fit model."""
- def _decision_function(self, X):
- check_is_fitted(self)
- X = self._validate_data(X, accept_sparse=["csr", "csc", "coo"], reset=False)
- return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
- def predict(self, X):
- """
- Predict using the linear model.
- Parameters
- ----------
- X : array-like or sparse matrix, shape (n_samples, n_features)
- Samples.
- Returns
- -------
- C : array, shape (n_samples,)
- Returns predicted values.
- """
- return self._decision_function(X)
- def _set_intercept(self, X_offset, y_offset, X_scale):
- """Set the intercept_"""
- if self.fit_intercept:
- # We always want coef_.dtype=X.dtype. For instance, X.dtype can differ from
- # coef_.dtype if warm_start=True.
- self.coef_ = np.divide(self.coef_, X_scale, dtype=X_scale.dtype)
- self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)
- else:
- self.intercept_ = 0.0
- def _more_tags(self):
- return {"requires_y": True}
- # XXX Should this derive from LinearModel? It should be a mixin, not an ABC.
- # Maybe the n_features checking can be moved to LinearModel.
- class LinearClassifierMixin(ClassifierMixin):
- """Mixin for linear classifiers.
- Handles prediction for sparse and dense X.
- """
- def decision_function(self, X):
- """
- Predict confidence scores for samples.
- The confidence score for a sample is proportional to the signed
- distance of that sample to the hyperplane.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The data matrix for which we want to get the confidence scores.
- Returns
- -------
- scores : ndarray of shape (n_samples,) or (n_samples, n_classes)
- Confidence scores per `(n_samples, n_classes)` combination. In the
- binary case, confidence score for `self.classes_[1]` where >0 means
- this class would be predicted.
- """
- check_is_fitted(self)
- xp, _ = get_namespace(X)
- X = self._validate_data(X, accept_sparse="csr", reset=False)
- scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
- return xp.reshape(scores, (-1,)) if scores.shape[1] == 1 else scores
- def predict(self, X):
- """
- Predict class labels for samples in X.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The data matrix for which we want to get the predictions.
- Returns
- -------
- y_pred : ndarray of shape (n_samples,)
- Vector containing the class labels for each sample.
- """
- xp, _ = get_namespace(X)
- scores = self.decision_function(X)
- if len(scores.shape) == 1:
- indices = xp.astype(scores > 0, int)
- else:
- indices = xp.argmax(scores, axis=1)
- return xp.take(self.classes_, indices)
- def _predict_proba_lr(self, X):
- """Probability estimation for OvR logistic regression.
- Positive class probabilities are computed as
- 1. / (1. + np.exp(-self.decision_function(X)));
- multiclass is handled by normalizing that over all classes.
- """
- prob = self.decision_function(X)
- expit(prob, out=prob)
- if prob.ndim == 1:
- return np.vstack([1 - prob, prob]).T
- else:
- # OvR normalization, like LibLinear's predict_probability
- prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
- return prob
- class SparseCoefMixin:
- """Mixin for converting coef_ to and from CSR format.
- L1-regularizing estimators should inherit this.
- """
- def densify(self):
- """
- Convert coefficient matrix to dense array format.
- Converts the ``coef_`` member (back) to a numpy.ndarray. This is the
- default format of ``coef_`` and is required for fitting, so calling
- this method is only required on models that have previously been
- sparsified; otherwise, it is a no-op.
- Returns
- -------
- self
- Fitted estimator.
- """
- msg = "Estimator, %(name)s, must be fitted before densifying."
- check_is_fitted(self, msg=msg)
- if sp.issparse(self.coef_):
- self.coef_ = self.coef_.toarray()
- return self
- def sparsify(self):
- """
- Convert coefficient matrix to sparse format.
- Converts the ``coef_`` member to a scipy.sparse matrix, which for
- L1-regularized models can be much more memory- and storage-efficient
- than the usual numpy.ndarray representation.
- The ``intercept_`` member is not converted.
- Returns
- -------
- self
- Fitted estimator.
- Notes
- -----
- For non-sparse models, i.e. when there are not many zeros in ``coef_``,
- this may actually *increase* memory usage, so use this method with
- care. A rule of thumb is that the number of zero elements, which can
- be computed with ``(coef_ == 0).sum()``, must be more than 50% for this
- to provide significant benefits.
- After calling this method, further fitting with the partial_fit
- method (if any) will not work until you call densify.
- """
- msg = "Estimator, %(name)s, must be fitted before sparsifying."
- check_is_fitted(self, msg=msg)
- self.coef_ = sp.csr_matrix(self.coef_)
- return self
- class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):
- """
- Ordinary least squares Linear Regression.
- LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
- to minimize the residual sum of squares between the observed targets in
- the dataset, and the targets predicted by the linear approximation.
- Parameters
- ----------
- fit_intercept : bool, default=True
- Whether to calculate the intercept for this model. If set
- to False, no intercept will be used in calculations
- (i.e. data is expected to be centered).
- copy_X : bool, default=True
- If True, X will be copied; else, it may be overwritten.
- n_jobs : int, default=None
- The number of jobs to use for the computation. This will only provide
- speedup in case of sufficiently large problems, that is if firstly
- `n_targets > 1` and secondly `X` is sparse or if `positive` is set
- to `True`. ``None`` means 1 unless in a
- :obj:`joblib.parallel_backend` context. ``-1`` means using all
- processors. See :term:`Glossary <n_jobs>` for more details.
- positive : bool, default=False
- When set to ``True``, forces the coefficients to be positive. This
- option is only supported for dense arrays.
- .. versionadded:: 0.24
- Attributes
- ----------
- coef_ : array of shape (n_features, ) or (n_targets, n_features)
- Estimated coefficients for the linear regression problem.
- If multiple targets are passed during the fit (y 2D), this
- is a 2D array of shape (n_targets, n_features), while if only
- one target is passed, this is a 1D array of length n_features.
- rank_ : int
- Rank of matrix `X`. Only available when `X` is dense.
- singular_ : array of shape (min(X, y),)
- Singular values of `X`. Only available when `X` is dense.
- intercept_ : float or array of shape (n_targets,)
- Independent term in the linear model. Set to 0.0 if
- `fit_intercept = False`.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- Ridge : Ridge regression addresses some of the
- problems of Ordinary Least Squares by imposing a penalty on the
- size of the coefficients with l2 regularization.
- Lasso : The Lasso is a linear model that estimates
- sparse coefficients with l1 regularization.
- ElasticNet : Elastic-Net is a linear regression
- model trained with both l1 and l2 -norm regularization of the
- coefficients.
- Notes
- -----
- From the implementation point of view, this is just plain Ordinary
- Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares
- (scipy.optimize.nnls) wrapped as a predictor object.
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.linear_model import LinearRegression
- >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
- >>> # y = 1 * x_0 + 2 * x_1 + 3
- >>> y = np.dot(X, np.array([1, 2])) + 3
- >>> reg = LinearRegression().fit(X, y)
- >>> reg.score(X, y)
- 1.0
- >>> reg.coef_
- array([1., 2.])
- >>> reg.intercept_
- 3.0...
- >>> reg.predict(np.array([[3, 5]]))
- array([16.])
- """
- _parameter_constraints: dict = {
- "fit_intercept": ["boolean"],
- "copy_X": ["boolean"],
- "n_jobs": [None, Integral],
- "positive": ["boolean"],
- }
- def __init__(
- self,
- *,
- fit_intercept=True,
- copy_X=True,
- n_jobs=None,
- positive=False,
- ):
- self.fit_intercept = fit_intercept
- self.copy_X = copy_X
- self.n_jobs = n_jobs
- self.positive = positive
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y, sample_weight=None):
- """
- Fit linear model.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,) or (n_samples, n_targets)
- Target values. Will be cast to X's dtype if necessary.
- sample_weight : array-like of shape (n_samples,), default=None
- Individual weights for each sample.
- .. versionadded:: 0.17
- parameter *sample_weight* support to LinearRegression.
- Returns
- -------
- self : object
- Fitted Estimator.
- """
- n_jobs_ = self.n_jobs
- accept_sparse = False if self.positive else ["csr", "csc", "coo"]
- X, y = self._validate_data(
- X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True
- )
- has_sw = sample_weight is not None
- if has_sw:
- sample_weight = _check_sample_weight(
- sample_weight, X, dtype=X.dtype, only_non_negative=True
- )
- # Note that neither _rescale_data nor the rest of the fit method of
- # LinearRegression can benefit from in-place operations when X is a
- # sparse matrix. Therefore, let's not copy X when it is sparse.
- copy_X_in_preprocess_data = self.copy_X and not sp.issparse(X)
- X, y, X_offset, y_offset, X_scale = _preprocess_data(
- X,
- y,
- fit_intercept=self.fit_intercept,
- copy=copy_X_in_preprocess_data,
- sample_weight=sample_weight,
- )
- if has_sw:
- # Sample weight can be implemented via a simple rescaling. Note
- # that we safely do inplace rescaling when _preprocess_data has
- # already made a copy if requested.
- X, y, sample_weight_sqrt = _rescale_data(
- X, y, sample_weight, inplace=copy_X_in_preprocess_data
- )
- if self.positive:
- if y.ndim < 2:
- self.coef_ = optimize.nnls(X, y)[0]
- else:
- # scipy.optimize.nnls cannot handle y with shape (M, K)
- outs = Parallel(n_jobs=n_jobs_)(
- delayed(optimize.nnls)(X, y[:, j]) for j in range(y.shape[1])
- )
- self.coef_ = np.vstack([out[0] for out in outs])
- elif sp.issparse(X):
- X_offset_scale = X_offset / X_scale
- if has_sw:
- def matvec(b):
- return X.dot(b) - sample_weight_sqrt * b.dot(X_offset_scale)
- def rmatvec(b):
- return X.T.dot(b) - X_offset_scale * b.dot(sample_weight_sqrt)
- else:
- def matvec(b):
- return X.dot(b) - b.dot(X_offset_scale)
- def rmatvec(b):
- return X.T.dot(b) - X_offset_scale * b.sum()
- X_centered = sparse.linalg.LinearOperator(
- shape=X.shape, matvec=matvec, rmatvec=rmatvec
- )
- if y.ndim < 2:
- self.coef_ = lsqr(X_centered, y)[0]
- else:
- # sparse_lstsq cannot handle y with shape (M, K)
- outs = Parallel(n_jobs=n_jobs_)(
- delayed(lsqr)(X_centered, y[:, j].ravel())
- for j in range(y.shape[1])
- )
- self.coef_ = np.vstack([out[0] for out in outs])
- else:
- self.coef_, _, self.rank_, self.singular_ = linalg.lstsq(X, y)
- self.coef_ = self.coef_.T
- if y.ndim == 1:
- self.coef_ = np.ravel(self.coef_)
- self._set_intercept(X_offset, y_offset, X_scale)
- return self
- def _check_precomputed_gram_matrix(
- X, precompute, X_offset, X_scale, rtol=None, atol=1e-5
- ):
- """Computes a single element of the gram matrix and compares it to
- the corresponding element of the user supplied gram matrix.
- If the values do not match a ValueError will be thrown.
- Parameters
- ----------
- X : ndarray of shape (n_samples, n_features)
- Data array.
- precompute : array-like of shape (n_features, n_features)
- User-supplied gram matrix.
- X_offset : ndarray of shape (n_features,)
- Array of feature means used to center design matrix.
- X_scale : ndarray of shape (n_features,)
- Array of feature scale factors used to normalize design matrix.
- rtol : float, default=None
- Relative tolerance; see numpy.allclose
- If None, it is set to 1e-4 for arrays of dtype numpy.float32 and 1e-7
- otherwise.
- atol : float, default=1e-5
- absolute tolerance; see :func`numpy.allclose`. Note that the default
- here is more tolerant than the default for
- :func:`numpy.testing.assert_allclose`, where `atol=0`.
- Raises
- ------
- ValueError
- Raised when the provided Gram matrix is not consistent.
- """
- n_features = X.shape[1]
- f1 = n_features // 2
- f2 = min(f1 + 1, n_features - 1)
- v1 = (X[:, f1] - X_offset[f1]) * X_scale[f1]
- v2 = (X[:, f2] - X_offset[f2]) * X_scale[f2]
- expected = np.dot(v1, v2)
- actual = precompute[f1, f2]
- dtypes = [precompute.dtype, expected.dtype]
- if rtol is None:
- rtols = [1e-4 if dtype == np.float32 else 1e-7 for dtype in dtypes]
- rtol = max(rtols)
- if not np.isclose(expected, actual, rtol=rtol, atol=atol):
- raise ValueError(
- "Gram matrix passed in via 'precompute' parameter "
- "did not pass validation when a single element was "
- "checked - please check that it was computed "
- f"properly. For element ({f1},{f2}) we computed "
- f"{expected} but the user-supplied value was "
- f"{actual}."
- )
- def _pre_fit(
- X,
- y,
- Xy,
- precompute,
- normalize,
- fit_intercept,
- copy,
- check_input=True,
- sample_weight=None,
- ):
- """Function used at beginning of fit in linear models with L1 or L0 penalty.
- This function applies _preprocess_data and additionally computes the gram matrix
- `precompute` as needed as well as `Xy`.
- """
- n_samples, n_features = X.shape
- if sparse.issparse(X):
- # copy is not needed here as X is not modified inplace when X is sparse
- precompute = False
- X, y, X_offset, y_offset, X_scale = _preprocess_data(
- X,
- y,
- fit_intercept=fit_intercept,
- normalize=normalize,
- copy=False,
- check_input=check_input,
- sample_weight=sample_weight,
- )
- else:
- # copy was done in fit if necessary
- X, y, X_offset, y_offset, X_scale = _preprocess_data(
- X,
- y,
- fit_intercept=fit_intercept,
- normalize=normalize,
- copy=copy,
- check_input=check_input,
- sample_weight=sample_weight,
- )
- # Rescale only in dense case. Sparse cd solver directly deals with
- # sample_weight.
- if sample_weight is not None:
- # This triggers copies anyway.
- X, y, _ = _rescale_data(X, y, sample_weight=sample_weight)
- # FIXME: 'normalize' to be removed in 1.4
- if hasattr(precompute, "__array__"):
- if (
- fit_intercept
- and not np.allclose(X_offset, np.zeros(n_features))
- or normalize
- and not np.allclose(X_scale, np.ones(n_features))
- ):
- warnings.warn(
- (
- "Gram matrix was provided but X was centered to fit "
- "intercept, or X was normalized : recomputing Gram matrix."
- ),
- UserWarning,
- )
- # recompute Gram
- precompute = "auto"
- Xy = None
- elif check_input:
- # If we're going to use the user's precomputed gram matrix, we
- # do a quick check to make sure its not totally bogus.
- _check_precomputed_gram_matrix(X, precompute, X_offset, X_scale)
- # precompute if n_samples > n_features
- if isinstance(precompute, str) and precompute == "auto":
- precompute = n_samples > n_features
- if precompute is True:
- # make sure that the 'precompute' array is contiguous.
- precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype, order="C")
- np.dot(X.T, X, out=precompute)
- if not hasattr(precompute, "__array__"):
- Xy = None # cannot use Xy if precompute is not Gram
- if hasattr(precompute, "__array__") and Xy is None:
- common_dtype = np.result_type(X.dtype, y.dtype)
- if y.ndim == 1:
- # Xy is 1d, make sure it is contiguous.
- Xy = np.empty(shape=n_features, dtype=common_dtype, order="C")
- np.dot(X.T, y, out=Xy)
- else:
- # Make sure that Xy is always F contiguous even if X or y are not
- # contiguous: the goal is to make it fast to extract the data for a
- # specific target.
- n_targets = y.shape[1]
- Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype, order="F")
- np.dot(y.T, X, out=Xy.T)
- return X, y, X_offset, y_offset, X_scale, precompute, Xy
|