_feature_agglomeration.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. """
  2. Feature agglomeration. Base classes and functions for performing feature
  3. agglomeration.
  4. """
  5. # Author: V. Michel, A. Gramfort
  6. # License: BSD 3 clause
  7. import warnings
  8. import numpy as np
  9. from scipy.sparse import issparse
  10. from ..base import TransformerMixin
  11. from ..utils import metadata_routing
  12. from ..utils.validation import check_is_fitted
  13. ###############################################################################
  14. # Mixin class for feature agglomeration.
  15. class AgglomerationTransform(TransformerMixin):
  16. """
  17. A class for feature agglomeration via the transform interface.
  18. """
  19. # This prevents ``set_split_inverse_transform`` to be generated for the
  20. # non-standard ``Xred`` arg on ``inverse_transform``.
  21. # TODO(1.5): remove when Xred is removed for inverse_transform.
  22. __metadata_request__inverse_transform = {"Xred": metadata_routing.UNUSED}
  23. def transform(self, X):
  24. """
  25. Transform a new matrix using the built clustering.
  26. Parameters
  27. ----------
  28. X : array-like of shape (n_samples, n_features) or \
  29. (n_samples, n_samples)
  30. A M by N array of M observations in N dimensions or a length
  31. M array of M one-dimensional observations.
  32. Returns
  33. -------
  34. Y : ndarray of shape (n_samples, n_clusters) or (n_clusters,)
  35. The pooled values for each feature cluster.
  36. """
  37. check_is_fitted(self)
  38. X = self._validate_data(X, reset=False)
  39. if self.pooling_func == np.mean and not issparse(X):
  40. size = np.bincount(self.labels_)
  41. n_samples = X.shape[0]
  42. # a fast way to compute the mean of grouped features
  43. nX = np.array(
  44. [np.bincount(self.labels_, X[i, :]) / size for i in range(n_samples)]
  45. )
  46. else:
  47. nX = [
  48. self.pooling_func(X[:, self.labels_ == l], axis=1)
  49. for l in np.unique(self.labels_)
  50. ]
  51. nX = np.array(nX).T
  52. return nX
  53. def inverse_transform(self, Xt=None, Xred=None):
  54. """
  55. Inverse the transformation and return a vector of size `n_features`.
  56. Parameters
  57. ----------
  58. Xt : array-like of shape (n_samples, n_clusters) or (n_clusters,)
  59. The values to be assigned to each cluster of samples.
  60. Xred : deprecated
  61. Use `Xt` instead.
  62. .. deprecated:: 1.3
  63. Returns
  64. -------
  65. X : ndarray of shape (n_samples, n_features) or (n_features,)
  66. A vector of size `n_samples` with the values of `Xred` assigned to
  67. each of the cluster of samples.
  68. """
  69. if Xt is None and Xred is None:
  70. raise TypeError("Missing required positional argument: Xt")
  71. if Xred is not None and Xt is not None:
  72. raise ValueError("Please provide only `Xt`, and not `Xred`.")
  73. if Xred is not None:
  74. warnings.warn(
  75. (
  76. "Input argument `Xred` was renamed to `Xt` in v1.3 and will be"
  77. " removed in v1.5."
  78. ),
  79. FutureWarning,
  80. )
  81. Xt = Xred
  82. check_is_fitted(self)
  83. unil, inverse = np.unique(self.labels_, return_inverse=True)
  84. return Xt[..., inverse]