| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047 |
- """
- Linear Discriminant Analysis and Quadratic Discriminant Analysis
- """
- # Authors: Clemens Brunner
- # Martin Billinger
- # Matthieu Perrot
- # Mathieu Blondel
- # License: BSD 3-Clause
- import warnings
- from numbers import Integral, Real
- import numpy as np
- import scipy.linalg
- from scipy import linalg
- from .base import (
- BaseEstimator,
- ClassifierMixin,
- ClassNamePrefixFeaturesOutMixin,
- TransformerMixin,
- _fit_context,
- )
- from .covariance import empirical_covariance, ledoit_wolf, shrunk_covariance
- from .linear_model._base import LinearClassifierMixin
- from .preprocessing import StandardScaler
- from .utils._array_api import _expit, device, get_namespace, size
- from .utils._param_validation import HasMethods, Interval, StrOptions
- from .utils.extmath import softmax
- from .utils.multiclass import check_classification_targets, unique_labels
- from .utils.validation import check_is_fitted
- __all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"]
- def _cov(X, shrinkage=None, covariance_estimator=None):
- """Estimate covariance matrix (using optional covariance_estimator).
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data.
- shrinkage : {'empirical', 'auto'} or float, default=None
- Shrinkage parameter, possible values:
- - None or 'empirical': no shrinkage (default).
- - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
- - float between 0 and 1: fixed shrinkage parameter.
- Shrinkage parameter is ignored if `covariance_estimator`
- is not None.
- covariance_estimator : estimator, default=None
- If not None, `covariance_estimator` is used to estimate
- the covariance matrices instead of relying on the empirical
- covariance estimator (with potential shrinkage).
- The object should have a fit method and a ``covariance_`` attribute
- like the estimators in :mod:`sklearn.covariance``.
- if None the shrinkage parameter drives the estimate.
- .. versionadded:: 0.24
- Returns
- -------
- s : ndarray of shape (n_features, n_features)
- Estimated covariance matrix.
- """
- if covariance_estimator is None:
- shrinkage = "empirical" if shrinkage is None else shrinkage
- if isinstance(shrinkage, str):
- if shrinkage == "auto":
- sc = StandardScaler() # standardize features
- X = sc.fit_transform(X)
- s = ledoit_wolf(X)[0]
- # rescale
- s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
- elif shrinkage == "empirical":
- s = empirical_covariance(X)
- elif isinstance(shrinkage, Real):
- s = shrunk_covariance(empirical_covariance(X), shrinkage)
- else:
- if shrinkage is not None and shrinkage != 0:
- raise ValueError(
- "covariance_estimator and shrinkage parameters "
- "are not None. Only one of the two can be set."
- )
- covariance_estimator.fit(X)
- if not hasattr(covariance_estimator, "covariance_"):
- raise ValueError(
- "%s does not have a covariance_ attribute"
- % covariance_estimator.__class__.__name__
- )
- s = covariance_estimator.covariance_
- return s
- def _class_means(X, y):
- """Compute class means.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data.
- y : array-like of shape (n_samples,) or (n_samples, n_targets)
- Target values.
- Returns
- -------
- means : array-like of shape (n_classes, n_features)
- Class means.
- """
- xp, is_array_api_compliant = get_namespace(X)
- classes, y = xp.unique_inverse(y)
- means = xp.zeros((classes.shape[0], X.shape[1]), device=device(X), dtype=X.dtype)
- if is_array_api_compliant:
- for i in range(classes.shape[0]):
- means[i, :] = xp.mean(X[y == i], axis=0)
- else:
- # TODO: Explore the choice of using bincount + add.at as it seems sub optimal
- # from a performance-wise
- cnt = np.bincount(y)
- np.add.at(means, y, X)
- means /= cnt[:, None]
- return means
- def _class_cov(X, y, priors, shrinkage=None, covariance_estimator=None):
- """Compute weighted within-class covariance matrix.
- The per-class covariance are weighted by the class priors.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data.
- y : array-like of shape (n_samples,) or (n_samples, n_targets)
- Target values.
- priors : array-like of shape (n_classes,)
- Class priors.
- shrinkage : 'auto' or float, default=None
- Shrinkage parameter, possible values:
- - None: no shrinkage (default).
- - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
- - float between 0 and 1: fixed shrinkage parameter.
- Shrinkage parameter is ignored if `covariance_estimator` is not None.
- covariance_estimator : estimator, default=None
- If not None, `covariance_estimator` is used to estimate
- the covariance matrices instead of relying the empirical
- covariance estimator (with potential shrinkage).
- The object should have a fit method and a ``covariance_`` attribute
- like the estimators in sklearn.covariance.
- If None, the shrinkage parameter drives the estimate.
- .. versionadded:: 0.24
- Returns
- -------
- cov : array-like of shape (n_features, n_features)
- Weighted within-class covariance matrix
- """
- classes = np.unique(y)
- cov = np.zeros(shape=(X.shape[1], X.shape[1]))
- for idx, group in enumerate(classes):
- Xg = X[y == group, :]
- cov += priors[idx] * np.atleast_2d(_cov(Xg, shrinkage, covariance_estimator))
- return cov
- class LinearDiscriminantAnalysis(
- ClassNamePrefixFeaturesOutMixin,
- LinearClassifierMixin,
- TransformerMixin,
- BaseEstimator,
- ):
- """Linear Discriminant Analysis.
- A classifier with a linear decision boundary, generated by fitting class
- conditional densities to the data and using Bayes' rule.
- The model fits a Gaussian density to each class, assuming that all classes
- share the same covariance matrix.
- The fitted model can also be used to reduce the dimensionality of the input
- by projecting it to the most discriminative directions, using the
- `transform` method.
- .. versionadded:: 0.17
- *LinearDiscriminantAnalysis*.
- Read more in the :ref:`User Guide <lda_qda>`.
- Parameters
- ----------
- solver : {'svd', 'lsqr', 'eigen'}, default='svd'
- Solver to use, possible values:
- - 'svd': Singular value decomposition (default).
- Does not compute the covariance matrix, therefore this solver is
- recommended for data with a large number of features.
- - 'lsqr': Least squares solution.
- Can be combined with shrinkage or custom covariance estimator.
- - 'eigen': Eigenvalue decomposition.
- Can be combined with shrinkage or custom covariance estimator.
- .. versionchanged:: 1.2
- `solver="svd"` now has experimental Array API support. See the
- :ref:`Array API User Guide <array_api>` for more details.
- shrinkage : 'auto' or float, default=None
- Shrinkage parameter, possible values:
- - None: no shrinkage (default).
- - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
- - float between 0 and 1: fixed shrinkage parameter.
- This should be left to None if `covariance_estimator` is used.
- Note that shrinkage works only with 'lsqr' and 'eigen' solvers.
- priors : array-like of shape (n_classes,), default=None
- The class prior probabilities. By default, the class proportions are
- inferred from the training data.
- n_components : int, default=None
- Number of components (<= min(n_classes - 1, n_features)) for
- dimensionality reduction. If None, will be set to
- min(n_classes - 1, n_features). This parameter only affects the
- `transform` method.
- store_covariance : bool, default=False
- If True, explicitly compute the weighted within-class covariance
- matrix when solver is 'svd'. The matrix is always computed
- and stored for the other solvers.
- .. versionadded:: 0.17
- tol : float, default=1.0e-4
- Absolute threshold for a singular value of X to be considered
- significant, used to estimate the rank of X. Dimensions whose
- singular values are non-significant are discarded. Only used if
- solver is 'svd'.
- .. versionadded:: 0.17
- covariance_estimator : covariance estimator, default=None
- If not None, `covariance_estimator` is used to estimate
- the covariance matrices instead of relying on the empirical
- covariance estimator (with potential shrinkage).
- The object should have a fit method and a ``covariance_`` attribute
- like the estimators in :mod:`sklearn.covariance`.
- if None the shrinkage parameter drives the estimate.
- This should be left to None if `shrinkage` is used.
- Note that `covariance_estimator` works only with 'lsqr' and 'eigen'
- solvers.
- .. versionadded:: 0.24
- Attributes
- ----------
- coef_ : ndarray of shape (n_features,) or (n_classes, n_features)
- Weight vector(s).
- intercept_ : ndarray of shape (n_classes,)
- Intercept term.
- covariance_ : array-like of shape (n_features, n_features)
- Weighted within-class covariance matrix. It corresponds to
- `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the
- samples in class `k`. The `C_k` are estimated using the (potentially
- shrunk) biased estimator of covariance. If solver is 'svd', only
- exists when `store_covariance` is True.
- explained_variance_ratio_ : ndarray of shape (n_components,)
- Percentage of variance explained by each of the selected components.
- If ``n_components`` is not set then all components are stored and the
- sum of explained variances is equal to 1.0. Only available when eigen
- or svd solver is used.
- means_ : array-like of shape (n_classes, n_features)
- Class-wise means.
- priors_ : array-like of shape (n_classes,)
- Class priors (sum to 1).
- scalings_ : array-like of shape (rank, n_classes - 1)
- Scaling of the features in the space spanned by the class centroids.
- Only available for 'svd' and 'eigen' solvers.
- xbar_ : array-like of shape (n_features,)
- Overall mean. Only present if solver is 'svd'.
- classes_ : array-like of shape (n_classes,)
- Unique class labels.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- QuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
- >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
- >>> y = np.array([1, 1, 1, 2, 2, 2])
- >>> clf = LinearDiscriminantAnalysis()
- >>> clf.fit(X, y)
- LinearDiscriminantAnalysis()
- >>> print(clf.predict([[-0.8, -1]]))
- [1]
- """
- _parameter_constraints: dict = {
- "solver": [StrOptions({"svd", "lsqr", "eigen"})],
- "shrinkage": [StrOptions({"auto"}), Interval(Real, 0, 1, closed="both"), None],
- "n_components": [Interval(Integral, 1, None, closed="left"), None],
- "priors": ["array-like", None],
- "store_covariance": ["boolean"],
- "tol": [Interval(Real, 0, None, closed="left")],
- "covariance_estimator": [HasMethods("fit"), None],
- }
- def __init__(
- self,
- solver="svd",
- shrinkage=None,
- priors=None,
- n_components=None,
- store_covariance=False,
- tol=1e-4,
- covariance_estimator=None,
- ):
- self.solver = solver
- self.shrinkage = shrinkage
- self.priors = priors
- self.n_components = n_components
- self.store_covariance = store_covariance # used only in svd solver
- self.tol = tol # used only in svd solver
- self.covariance_estimator = covariance_estimator
- def _solve_lstsq(self, X, y, shrinkage, covariance_estimator):
- """Least squares solver.
- The least squares solver computes a straightforward solution of the
- optimal decision rule based directly on the discriminant functions. It
- can only be used for classification (with any covariance estimator),
- because
- estimation of eigenvectors is not performed. Therefore, dimensionality
- reduction with the transform is not supported.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,) or (n_samples, n_classes)
- Target values.
- shrinkage : 'auto', float or None
- Shrinkage parameter, possible values:
- - None: no shrinkage.
- - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
- - float between 0 and 1: fixed shrinkage parameter.
- Shrinkage parameter is ignored if `covariance_estimator` i
- not None
- covariance_estimator : estimator, default=None
- If not None, `covariance_estimator` is used to estimate
- the covariance matrices instead of relying the empirical
- covariance estimator (with potential shrinkage).
- The object should have a fit method and a ``covariance_`` attribute
- like the estimators in sklearn.covariance.
- if None the shrinkage parameter drives the estimate.
- .. versionadded:: 0.24
- Notes
- -----
- This solver is based on [1]_, section 2.6.2, pp. 39-41.
- References
- ----------
- .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification
- (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN
- 0-471-05669-3.
- """
- self.means_ = _class_means(X, y)
- self.covariance_ = _class_cov(
- X, y, self.priors_, shrinkage, covariance_estimator
- )
- self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T
- self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(
- self.priors_
- )
- def _solve_eigen(self, X, y, shrinkage, covariance_estimator):
- """Eigenvalue solver.
- The eigenvalue solver computes the optimal solution of the Rayleigh
- coefficient (basically the ratio of between class scatter to within
- class scatter). This solver supports both classification and
- dimensionality reduction (with any covariance estimator).
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,) or (n_samples, n_targets)
- Target values.
- shrinkage : 'auto', float or None
- Shrinkage parameter, possible values:
- - None: no shrinkage.
- - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
- - float between 0 and 1: fixed shrinkage constant.
- Shrinkage parameter is ignored if `covariance_estimator` i
- not None
- covariance_estimator : estimator, default=None
- If not None, `covariance_estimator` is used to estimate
- the covariance matrices instead of relying the empirical
- covariance estimator (with potential shrinkage).
- The object should have a fit method and a ``covariance_`` attribute
- like the estimators in sklearn.covariance.
- if None the shrinkage parameter drives the estimate.
- .. versionadded:: 0.24
- Notes
- -----
- This solver is based on [1]_, section 3.8.3, pp. 121-124.
- References
- ----------
- .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification
- (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN
- 0-471-05669-3.
- """
- self.means_ = _class_means(X, y)
- self.covariance_ = _class_cov(
- X, y, self.priors_, shrinkage, covariance_estimator
- )
- Sw = self.covariance_ # within scatter
- St = _cov(X, shrinkage, covariance_estimator) # total scatter
- Sb = St - Sw # between scatter
- evals, evecs = linalg.eigh(Sb, Sw)
- self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1][
- : self._max_components
- ]
- evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors
- self.scalings_ = evecs
- self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)
- self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(
- self.priors_
- )
- def _solve_svd(self, X, y):
- """SVD solver.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,) or (n_samples, n_targets)
- Target values.
- """
- xp, is_array_api_compliant = get_namespace(X)
- if is_array_api_compliant:
- svd = xp.linalg.svd
- else:
- svd = scipy.linalg.svd
- n_samples, n_features = X.shape
- n_classes = self.classes_.shape[0]
- self.means_ = _class_means(X, y)
- if self.store_covariance:
- self.covariance_ = _class_cov(X, y, self.priors_)
- Xc = []
- for idx, group in enumerate(self.classes_):
- Xg = X[y == group]
- Xc.append(Xg - self.means_[idx, :])
- self.xbar_ = self.priors_ @ self.means_
- Xc = xp.concat(Xc, axis=0)
- # 1) within (univariate) scaling by with classes std-dev
- std = xp.std(Xc, axis=0)
- # avoid division by zero in normalization
- std[std == 0] = 1.0
- fac = xp.asarray(1.0 / (n_samples - n_classes))
- # 2) Within variance scaling
- X = xp.sqrt(fac) * (Xc / std)
- # SVD of centered (within)scaled data
- U, S, Vt = svd(X, full_matrices=False)
- rank = xp.sum(xp.astype(S > self.tol, xp.int32))
- # Scaling of within covariance is: V' 1/S
- scalings = (Vt[:rank, :] / std).T / S[:rank]
- fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1)
- # 3) Between variance scaling
- # Scale weighted centers
- X = (
- (xp.sqrt((n_samples * self.priors_) * fac)) * (self.means_ - self.xbar_).T
- ).T @ scalings
- # Centers are living in a space with n_classes-1 dim (maximum)
- # Use SVD to find projection in the space spanned by the
- # (n_classes) centers
- _, S, Vt = svd(X, full_matrices=False)
- if self._max_components == 0:
- self.explained_variance_ratio_ = xp.empty((0,), dtype=S.dtype)
- else:
- self.explained_variance_ratio_ = (S**2 / xp.sum(S**2))[
- : self._max_components
- ]
- rank = xp.sum(xp.astype(S > self.tol * S[0], xp.int32))
- self.scalings_ = scalings @ Vt.T[:, :rank]
- coef = (self.means_ - self.xbar_) @ self.scalings_
- self.intercept_ = -0.5 * xp.sum(coef**2, axis=1) + xp.log(self.priors_)
- self.coef_ = coef @ self.scalings_.T
- self.intercept_ -= self.xbar_ @ self.coef_.T
- @_fit_context(
- # LinearDiscriminantAnalysis.covariance_estimator is not validated yet
- prefer_skip_nested_validation=False
- )
- def fit(self, X, y):
- """Fit the Linear Discriminant Analysis model.
- .. versionchanged:: 0.19
- *store_covariance* has been moved to main constructor.
- .. versionchanged:: 0.19
- *tol* has been moved to main constructor.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,)
- Target values.
- Returns
- -------
- self : object
- Fitted estimator.
- """
- xp, _ = get_namespace(X)
- X, y = self._validate_data(
- X, y, ensure_min_samples=2, dtype=[xp.float64, xp.float32]
- )
- self.classes_ = unique_labels(y)
- n_samples, _ = X.shape
- n_classes = self.classes_.shape[0]
- if n_samples == n_classes:
- raise ValueError(
- "The number of samples must be more than the number of classes."
- )
- if self.priors is None: # estimate priors from sample
- _, cnts = xp.unique_counts(y) # non-negative ints
- self.priors_ = xp.astype(cnts, X.dtype) / float(y.shape[0])
- else:
- self.priors_ = xp.asarray(self.priors, dtype=X.dtype)
- if xp.any(self.priors_ < 0):
- raise ValueError("priors must be non-negative")
- if xp.abs(xp.sum(self.priors_) - 1.0) > 1e-5:
- warnings.warn("The priors do not sum to 1. Renormalizing", UserWarning)
- self.priors_ = self.priors_ / self.priors_.sum()
- # Maximum number of components no matter what n_components is
- # specified:
- max_components = min(n_classes - 1, X.shape[1])
- if self.n_components is None:
- self._max_components = max_components
- else:
- if self.n_components > max_components:
- raise ValueError(
- "n_components cannot be larger than min(n_features, n_classes - 1)."
- )
- self._max_components = self.n_components
- if self.solver == "svd":
- if self.shrinkage is not None:
- raise NotImplementedError("shrinkage not supported with 'svd' solver.")
- if self.covariance_estimator is not None:
- raise ValueError(
- "covariance estimator "
- "is not supported "
- "with svd solver. Try another solver"
- )
- self._solve_svd(X, y)
- elif self.solver == "lsqr":
- self._solve_lstsq(
- X,
- y,
- shrinkage=self.shrinkage,
- covariance_estimator=self.covariance_estimator,
- )
- elif self.solver == "eigen":
- self._solve_eigen(
- X,
- y,
- shrinkage=self.shrinkage,
- covariance_estimator=self.covariance_estimator,
- )
- if size(self.classes_) == 2: # treat binary case as a special case
- coef_ = xp.asarray(self.coef_[1, :] - self.coef_[0, :], dtype=X.dtype)
- self.coef_ = xp.reshape(coef_, (1, -1))
- intercept_ = xp.asarray(
- self.intercept_[1] - self.intercept_[0], dtype=X.dtype
- )
- self.intercept_ = xp.reshape(intercept_, (1,))
- self._n_features_out = self._max_components
- return self
- def transform(self, X):
- """Project data to maximize class separation.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data.
- Returns
- -------
- X_new : ndarray of shape (n_samples, n_components) or \
- (n_samples, min(rank, n_components))
- Transformed data. In the case of the 'svd' solver, the shape
- is (n_samples, min(rank, n_components)).
- """
- if self.solver == "lsqr":
- raise NotImplementedError(
- "transform not implemented for 'lsqr' solver (use 'svd' or 'eigen')."
- )
- check_is_fitted(self)
- xp, _ = get_namespace(X)
- X = self._validate_data(X, reset=False)
- if self.solver == "svd":
- X_new = (X - self.xbar_) @ self.scalings_
- elif self.solver == "eigen":
- X_new = X @ self.scalings_
- return X_new[:, : self._max_components]
- def predict_proba(self, X):
- """Estimate probability.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data.
- Returns
- -------
- C : ndarray of shape (n_samples, n_classes)
- Estimated probabilities.
- """
- check_is_fitted(self)
- xp, is_array_api_compliant = get_namespace(X)
- decision = self.decision_function(X)
- if size(self.classes_) == 2:
- proba = _expit(decision)
- return xp.stack([1 - proba, proba], axis=1)
- else:
- return softmax(decision)
- def predict_log_proba(self, X):
- """Estimate log probability.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data.
- Returns
- -------
- C : ndarray of shape (n_samples, n_classes)
- Estimated log probabilities.
- """
- xp, _ = get_namespace(X)
- prediction = self.predict_proba(X)
- info = xp.finfo(prediction.dtype)
- if hasattr(info, "smallest_normal"):
- smallest_normal = info.smallest_normal
- else:
- # smallest_normal was introduced in NumPy 1.22
- smallest_normal = info.tiny
- prediction[prediction == 0.0] += smallest_normal
- return xp.log(prediction)
- def decision_function(self, X):
- """Apply decision function to an array of samples.
- The decision function is equal (up to a constant factor) to the
- log-posterior of the model, i.e. `log p(y = k | x)`. In a binary
- classification setting this instead corresponds to the difference
- `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Array of samples (test vectors).
- Returns
- -------
- C : ndarray of shape (n_samples,) or (n_samples, n_classes)
- Decision function values related to each class, per sample.
- In the two-class case, the shape is (n_samples,), giving the
- log likelihood ratio of the positive class.
- """
- # Only override for the doc
- return super().decision_function(X)
- def _more_tags(self):
- return {"array_api_support": True}
- class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator):
- """Quadratic Discriminant Analysis.
- A classifier with a quadratic decision boundary, generated
- by fitting class conditional densities to the data
- and using Bayes' rule.
- The model fits a Gaussian density to each class.
- .. versionadded:: 0.17
- *QuadraticDiscriminantAnalysis*
- Read more in the :ref:`User Guide <lda_qda>`.
- Parameters
- ----------
- priors : array-like of shape (n_classes,), default=None
- Class priors. By default, the class proportions are inferred from the
- training data.
- reg_param : float, default=0.0
- Regularizes the per-class covariance estimates by transforming S2 as
- ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,
- where S2 corresponds to the `scaling_` attribute of a given class.
- store_covariance : bool, default=False
- If True, the class covariance matrices are explicitly computed and
- stored in the `self.covariance_` attribute.
- .. versionadded:: 0.17
- tol : float, default=1.0e-4
- Absolute threshold for a singular value to be considered significant,
- used to estimate the rank of `Xk` where `Xk` is the centered matrix
- of samples in class k. This parameter does not affect the
- predictions. It only controls a warning that is raised when features
- are considered to be colinear.
- .. versionadded:: 0.17
- Attributes
- ----------
- covariance_ : list of len n_classes of ndarray \
- of shape (n_features, n_features)
- For each class, gives the covariance matrix estimated using the
- samples of that class. The estimations are unbiased. Only present if
- `store_covariance` is True.
- means_ : array-like of shape (n_classes, n_features)
- Class-wise means.
- priors_ : array-like of shape (n_classes,)
- Class priors (sum to 1).
- rotations_ : list of len n_classes of ndarray of shape (n_features, n_k)
- For each class k an array of shape (n_features, n_k), where
- ``n_k = min(n_features, number of elements in class k)``
- It is the rotation of the Gaussian distribution, i.e. its
- principal axis. It corresponds to `V`, the matrix of eigenvectors
- coming from the SVD of `Xk = U S Vt` where `Xk` is the centered
- matrix of samples from class k.
- scalings_ : list of len n_classes of ndarray of shape (n_k,)
- For each class, contains the scaling of
- the Gaussian distributions along its principal axes, i.e. the
- variance in the rotated coordinate system. It corresponds to `S^2 /
- (n_samples - 1)`, where `S` is the diagonal matrix of singular values
- from the SVD of `Xk`, where `Xk` is the centered matrix of samples
- from class k.
- classes_ : ndarray of shape (n_classes,)
- Unique class labels.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- LinearDiscriminantAnalysis : Linear Discriminant Analysis.
- Examples
- --------
- >>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
- >>> import numpy as np
- >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
- >>> y = np.array([1, 1, 1, 2, 2, 2])
- >>> clf = QuadraticDiscriminantAnalysis()
- >>> clf.fit(X, y)
- QuadraticDiscriminantAnalysis()
- >>> print(clf.predict([[-0.8, -1]]))
- [1]
- """
- _parameter_constraints: dict = {
- "priors": ["array-like", None],
- "reg_param": [Interval(Real, 0, 1, closed="both")],
- "store_covariance": ["boolean"],
- "tol": [Interval(Real, 0, None, closed="left")],
- }
- def __init__(
- self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4
- ):
- self.priors = priors
- self.reg_param = reg_param
- self.store_covariance = store_covariance
- self.tol = tol
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y):
- """Fit the model according to the given training data and parameters.
- .. versionchanged:: 0.19
- ``store_covariances`` has been moved to main constructor as
- ``store_covariance``
- .. versionchanged:: 0.19
- ``tol`` has been moved to main constructor.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training vector, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- y : array-like of shape (n_samples,)
- Target values (integers).
- Returns
- -------
- self : object
- Fitted estimator.
- """
- X, y = self._validate_data(X, y)
- check_classification_targets(y)
- self.classes_, y = np.unique(y, return_inverse=True)
- n_samples, n_features = X.shape
- n_classes = len(self.classes_)
- if n_classes < 2:
- raise ValueError(
- "The number of classes has to be greater than one; got %d class"
- % (n_classes)
- )
- if self.priors is None:
- self.priors_ = np.bincount(y) / float(n_samples)
- else:
- self.priors_ = np.array(self.priors)
- cov = None
- store_covariance = self.store_covariance
- if store_covariance:
- cov = []
- means = []
- scalings = []
- rotations = []
- for ind in range(n_classes):
- Xg = X[y == ind, :]
- meang = Xg.mean(0)
- means.append(meang)
- if len(Xg) == 1:
- raise ValueError(
- "y has only 1 sample in class %s, covariance is ill defined."
- % str(self.classes_[ind])
- )
- Xgc = Xg - meang
- # Xgc = U * S * V.T
- _, S, Vt = np.linalg.svd(Xgc, full_matrices=False)
- rank = np.sum(S > self.tol)
- if rank < n_features:
- warnings.warn("Variables are collinear")
- S2 = (S**2) / (len(Xg) - 1)
- S2 = ((1 - self.reg_param) * S2) + self.reg_param
- if self.store_covariance or store_covariance:
- # cov = V * (S^2 / (n-1)) * V.T
- cov.append(np.dot(S2 * Vt.T, Vt))
- scalings.append(S2)
- rotations.append(Vt.T)
- if self.store_covariance or store_covariance:
- self.covariance_ = cov
- self.means_ = np.asarray(means)
- self.scalings_ = scalings
- self.rotations_ = rotations
- return self
- def _decision_function(self, X):
- # return log posterior, see eq (4.12) p. 110 of the ESL.
- check_is_fitted(self)
- X = self._validate_data(X, reset=False)
- norm2 = []
- for i in range(len(self.classes_)):
- R = self.rotations_[i]
- S = self.scalings_[i]
- Xm = X - self.means_[i]
- X2 = np.dot(Xm, R * (S ** (-0.5)))
- norm2.append(np.sum(X2**2, axis=1))
- norm2 = np.array(norm2).T # shape = [len(X), n_classes]
- u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
- return -0.5 * (norm2 + u) + np.log(self.priors_)
- def decision_function(self, X):
- """Apply decision function to an array of samples.
- The decision function is equal (up to a constant factor) to the
- log-posterior of the model, i.e. `log p(y = k | x)`. In a binary
- classification setting this instead corresponds to the difference
- `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Array of samples (test vectors).
- Returns
- -------
- C : ndarray of shape (n_samples,) or (n_samples, n_classes)
- Decision function values related to each class, per sample.
- In the two-class case, the shape is (n_samples,), giving the
- log likelihood ratio of the positive class.
- """
- dec_func = self._decision_function(X)
- # handle special case of two classes
- if len(self.classes_) == 2:
- return dec_func[:, 1] - dec_func[:, 0]
- return dec_func
- def predict(self, X):
- """Perform classification on an array of test vectors X.
- The predicted class C for each sample in X is returned.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Vector to be scored, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- Returns
- -------
- C : ndarray of shape (n_samples,)
- Estimated probabilities.
- """
- d = self._decision_function(X)
- y_pred = self.classes_.take(d.argmax(1))
- return y_pred
- def predict_proba(self, X):
- """Return posterior probabilities of classification.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Array of samples/test vectors.
- Returns
- -------
- C : ndarray of shape (n_samples, n_classes)
- Posterior probabilities of classification per class.
- """
- values = self._decision_function(X)
- # compute the likelihood of the underlying gaussian models
- # up to a multiplicative constant.
- likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])
- # compute posterior probabilities
- return likelihood / likelihood.sum(axis=1)[:, np.newaxis]
- def predict_log_proba(self, X):
- """Return log of posterior probabilities of classification.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Array of samples/test vectors.
- Returns
- -------
- C : ndarray of shape (n_samples, n_classes)
- Posterior log-probabilities of classification per class.
- """
- # XXX : can do better to avoid precision overflows
- probas_ = self.predict_proba(X)
- return np.log(probas_)
|