_base.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. """Principal Component Analysis Base Classes"""
  2. # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
  3. # Olivier Grisel <olivier.grisel@ensta.org>
  4. # Mathieu Blondel <mathieu@mblondel.org>
  5. # Denis A. Engemann <denis-alexander.engemann@inria.fr>
  6. # Kyle Kastner <kastnerkyle@gmail.com>
  7. #
  8. # License: BSD 3 clause
  9. from abc import ABCMeta, abstractmethod
  10. import numpy as np
  11. from scipy import linalg
  12. from ..base import BaseEstimator, ClassNamePrefixFeaturesOutMixin, TransformerMixin
  13. from ..utils.validation import check_is_fitted
  14. class _BasePCA(
  15. ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, metaclass=ABCMeta
  16. ):
  17. """Base class for PCA methods.
  18. Warning: This class should not be used directly.
  19. Use derived classes instead.
  20. """
  21. def get_covariance(self):
  22. """Compute data covariance with the generative model.
  23. ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``
  24. where S**2 contains the explained variances, and sigma2 contains the
  25. noise variances.
  26. Returns
  27. -------
  28. cov : array of shape=(n_features, n_features)
  29. Estimated covariance of data.
  30. """
  31. components_ = self.components_
  32. exp_var = self.explained_variance_
  33. if self.whiten:
  34. components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
  35. exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)
  36. cov = np.dot(components_.T * exp_var_diff, components_)
  37. cov.flat[:: len(cov) + 1] += self.noise_variance_ # modify diag inplace
  38. return cov
  39. def get_precision(self):
  40. """Compute data precision matrix with the generative model.
  41. Equals the inverse of the covariance but computed with
  42. the matrix inversion lemma for efficiency.
  43. Returns
  44. -------
  45. precision : array, shape=(n_features, n_features)
  46. Estimated precision of data.
  47. """
  48. n_features = self.components_.shape[1]
  49. # handle corner cases first
  50. if self.n_components_ == 0:
  51. return np.eye(n_features) / self.noise_variance_
  52. if np.isclose(self.noise_variance_, 0.0, atol=0.0):
  53. return linalg.inv(self.get_covariance())
  54. # Get precision using matrix inversion lemma
  55. components_ = self.components_
  56. exp_var = self.explained_variance_
  57. if self.whiten:
  58. components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
  59. exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.0)
  60. precision = np.dot(components_, components_.T) / self.noise_variance_
  61. precision.flat[:: len(precision) + 1] += 1.0 / exp_var_diff
  62. precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_))
  63. precision /= -(self.noise_variance_**2)
  64. precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_
  65. return precision
  66. @abstractmethod
  67. def fit(self, X, y=None):
  68. """Placeholder for fit. Subclasses should implement this method!
  69. Fit the model with X.
  70. Parameters
  71. ----------
  72. X : array-like of shape (n_samples, n_features)
  73. Training data, where `n_samples` is the number of samples and
  74. `n_features` is the number of features.
  75. Returns
  76. -------
  77. self : object
  78. Returns the instance itself.
  79. """
  80. def transform(self, X):
  81. """Apply dimensionality reduction to X.
  82. X is projected on the first principal components previously extracted
  83. from a training set.
  84. Parameters
  85. ----------
  86. X : array-like of shape (n_samples, n_features)
  87. New data, where `n_samples` is the number of samples
  88. and `n_features` is the number of features.
  89. Returns
  90. -------
  91. X_new : array-like of shape (n_samples, n_components)
  92. Projection of X in the first principal components, where `n_samples`
  93. is the number of samples and `n_components` is the number of the components.
  94. """
  95. check_is_fitted(self)
  96. X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)
  97. if self.mean_ is not None:
  98. X = X - self.mean_
  99. X_transformed = np.dot(X, self.components_.T)
  100. if self.whiten:
  101. X_transformed /= np.sqrt(self.explained_variance_)
  102. return X_transformed
  103. def inverse_transform(self, X):
  104. """Transform data back to its original space.
  105. In other words, return an input `X_original` whose transform would be X.
  106. Parameters
  107. ----------
  108. X : array-like of shape (n_samples, n_components)
  109. New data, where `n_samples` is the number of samples
  110. and `n_components` is the number of components.
  111. Returns
  112. -------
  113. X_original array-like of shape (n_samples, n_features)
  114. Original data, where `n_samples` is the number of samples
  115. and `n_features` is the number of features.
  116. Notes
  117. -----
  118. If whitening is enabled, inverse_transform will compute the
  119. exact inverse operation, which includes reversing whitening.
  120. """
  121. if self.whiten:
  122. return (
  123. np.dot(
  124. X,
  125. np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_,
  126. )
  127. + self.mean_
  128. )
  129. else:
  130. return np.dot(X, self.components_) + self.mean_
  131. @property
  132. def _n_features_out(self):
  133. """Number of transformed output features."""
  134. return self.components_.shape[0]