| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165 |
- """Principal Component Analysis Base Classes"""
- # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
- # Olivier Grisel <olivier.grisel@ensta.org>
- # Mathieu Blondel <mathieu@mblondel.org>
- # Denis A. Engemann <denis-alexander.engemann@inria.fr>
- # Kyle Kastner <kastnerkyle@gmail.com>
- #
- # License: BSD 3 clause
- from abc import ABCMeta, abstractmethod
- import numpy as np
- from scipy import linalg
- from ..base import BaseEstimator, ClassNamePrefixFeaturesOutMixin, TransformerMixin
- from ..utils.validation import check_is_fitted
- class _BasePCA(
- ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, metaclass=ABCMeta
- ):
- """Base class for PCA methods.
- Warning: This class should not be used directly.
- Use derived classes instead.
- """
- def get_covariance(self):
- """Compute data covariance with the generative model.
- ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``
- where S**2 contains the explained variances, and sigma2 contains the
- noise variances.
- Returns
- -------
- cov : array of shape=(n_features, n_features)
- Estimated covariance of data.
- """
- components_ = self.components_
- exp_var = self.explained_variance_
- if self.whiten:
- components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
- exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)
- cov = np.dot(components_.T * exp_var_diff, components_)
- cov.flat[:: len(cov) + 1] += self.noise_variance_ # modify diag inplace
- return cov
- def get_precision(self):
- """Compute data precision matrix with the generative model.
- Equals the inverse of the covariance but computed with
- the matrix inversion lemma for efficiency.
- Returns
- -------
- precision : array, shape=(n_features, n_features)
- Estimated precision of data.
- """
- n_features = self.components_.shape[1]
- # handle corner cases first
- if self.n_components_ == 0:
- return np.eye(n_features) / self.noise_variance_
- if np.isclose(self.noise_variance_, 0.0, atol=0.0):
- return linalg.inv(self.get_covariance())
- # Get precision using matrix inversion lemma
- components_ = self.components_
- exp_var = self.explained_variance_
- if self.whiten:
- components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
- exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)
- precision = np.dot(components_, components_.T) / self.noise_variance_
- precision.flat[:: len(precision) + 1] += 1.0 / exp_var_diff
- precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_))
- precision /= -(self.noise_variance_**2)
- precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_
- return precision
- @abstractmethod
- def fit(self, X, y=None):
- """Placeholder for fit. Subclasses should implement this method!
- Fit the model with X.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training data, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- Returns
- -------
- self : object
- Returns the instance itself.
- """
- def transform(self, X):
- """Apply dimensionality reduction to X.
- X is projected on the first principal components previously extracted
- from a training set.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- New data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- Returns
- -------
- X_new : array-like of shape (n_samples, n_components)
- Projection of X in the first principal components, where `n_samples`
- is the number of samples and `n_components` is the number of the components.
- """
- check_is_fitted(self)
- X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)
- if self.mean_ is not None:
- X = X - self.mean_
- X_transformed = np.dot(X, self.components_.T)
- if self.whiten:
- X_transformed /= np.sqrt(self.explained_variance_)
- return X_transformed
- def inverse_transform(self, X):
- """Transform data back to its original space.
- In other words, return an input `X_original` whose transform would be X.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_components)
- New data, where `n_samples` is the number of samples
- and `n_components` is the number of components.
- Returns
- -------
- X_original array-like of shape (n_samples, n_features)
- Original data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- Notes
- -----
- If whitening is enabled, inverse_transform will compute the
- exact inverse operation, which includes reversing whitening.
- """
- if self.whiten:
- return (
- np.dot(
- X,
- np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_,
- )
- + self.mean_
- )
- else:
- return np.dot(X, self.components_) + self.mean_
- @property
- def _n_features_out(self):
- """Number of transformed output features."""
- return self.components_.shape[0]
|