| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810 |
- """Random Projection transformers.
- Random Projections are a simple and computationally efficient way to
- reduce the dimensionality of the data by trading a controlled amount
- of accuracy (as additional variance) for faster processing times and
- smaller model sizes.
- The dimensions and distribution of Random Projections matrices are
- controlled so as to preserve the pairwise distances between any two
- samples of the dataset.
- The main theoretical result behind the efficiency of random projection is the
- `Johnson-Lindenstrauss lemma (quoting Wikipedia)
- <https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma>`_:
- In mathematics, the Johnson-Lindenstrauss lemma is a result
- concerning low-distortion embeddings of points from high-dimensional
- into low-dimensional Euclidean space. The lemma states that a small set
- of points in a high-dimensional space can be embedded into a space of
- much lower dimension in such a way that distances between the points are
- nearly preserved. The map used for the embedding is at least Lipschitz,
- and can even be taken to be an orthogonal projection.
- """
- # Authors: Olivier Grisel <olivier.grisel@ensta.org>,
- # Arnaud Joly <a.joly@ulg.ac.be>
- # License: BSD 3 clause
- import warnings
- from abc import ABCMeta, abstractmethod
- from numbers import Integral, Real
- import numpy as np
- import scipy.sparse as sp
- from scipy import linalg
- from .base import (
- BaseEstimator,
- ClassNamePrefixFeaturesOutMixin,
- TransformerMixin,
- _fit_context,
- )
- from .exceptions import DataDimensionalityWarning
- from .utils import check_random_state
- from .utils._param_validation import Interval, StrOptions, validate_params
- from .utils.extmath import safe_sparse_dot
- from .utils.random import sample_without_replacement
- from .utils.validation import check_array, check_is_fitted
- __all__ = [
- "SparseRandomProjection",
- "GaussianRandomProjection",
- "johnson_lindenstrauss_min_dim",
- ]
- @validate_params(
- {
- "n_samples": ["array-like", Interval(Real, 1, None, closed="left")],
- "eps": ["array-like", Interval(Real, 0, 1, closed="neither")],
- },
- prefer_skip_nested_validation=True,
- )
- def johnson_lindenstrauss_min_dim(n_samples, *, eps=0.1):
- """Find a 'safe' number of components to randomly project to.
- The distortion introduced by a random projection `p` only changes the
- distance between two points by a factor (1 +- eps) in a euclidean space
- with good probability. The projection `p` is an eps-embedding as defined
- by:
- (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2
- Where u and v are any rows taken from a dataset of shape (n_samples,
- n_features), eps is in ]0, 1[ and p is a projection by a random Gaussian
- N(0, 1) matrix of shape (n_components, n_features) (or a sparse
- Achlioptas matrix).
- The minimum number of components to guarantee the eps-embedding is
- given by:
- n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)
- Note that the number of dimensions is independent of the original
- number of features but instead depends on the size of the dataset:
- the larger the dataset, the higher is the minimal dimensionality of
- an eps-embedding.
- Read more in the :ref:`User Guide <johnson_lindenstrauss>`.
- Parameters
- ----------
- n_samples : int or array-like of int
- Number of samples that should be an integer greater than 0. If an array
- is given, it will compute a safe number of components array-wise.
- eps : float or array-like of shape (n_components,), dtype=float, \
- default=0.1
- Maximum distortion rate in the range (0, 1) as defined by the
- Johnson-Lindenstrauss lemma. If an array is given, it will compute a
- safe number of components array-wise.
- Returns
- -------
- n_components : int or ndarray of int
- The minimal number of components to guarantee with good probability
- an eps-embedding with n_samples.
- References
- ----------
- .. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma
- .. [2] `Sanjoy Dasgupta and Anupam Gupta, 1999,
- "An elementary proof of the Johnson-Lindenstrauss Lemma."
- <https://citeseerx.ist.psu.edu/doc_view/pid/95cd464d27c25c9c8690b378b894d337cdf021f9>`_
- Examples
- --------
- >>> from sklearn.random_projection import johnson_lindenstrauss_min_dim
- >>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)
- 663
- >>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])
- array([ 663, 11841, 1112658])
- >>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)
- array([ 7894, 9868, 11841])
- """
- eps = np.asarray(eps)
- n_samples = np.asarray(n_samples)
- if np.any(eps <= 0.0) or np.any(eps >= 1):
- raise ValueError("The JL bound is defined for eps in ]0, 1[, got %r" % eps)
- if np.any(n_samples <= 0):
- raise ValueError(
- "The JL bound is defined for n_samples greater than zero, got %r"
- % n_samples
- )
- denominator = (eps**2 / 2) - (eps**3 / 3)
- return (4 * np.log(n_samples) / denominator).astype(np.int64)
- def _check_density(density, n_features):
- """Factorize density check according to Li et al."""
- if density == "auto":
- density = 1 / np.sqrt(n_features)
- elif density <= 0 or density > 1:
- raise ValueError("Expected density in range ]0, 1], got: %r" % density)
- return density
- def _check_input_size(n_components, n_features):
- """Factorize argument checking for random matrix generation."""
- if n_components <= 0:
- raise ValueError(
- "n_components must be strictly positive, got %d" % n_components
- )
- if n_features <= 0:
- raise ValueError("n_features must be strictly positive, got %d" % n_features)
- def _gaussian_random_matrix(n_components, n_features, random_state=None):
- """Generate a dense Gaussian random matrix.
- The components of the random matrix are drawn from
- N(0, 1.0 / n_components).
- Read more in the :ref:`User Guide <gaussian_random_matrix>`.
- Parameters
- ----------
- n_components : int,
- Dimensionality of the target projection space.
- n_features : int,
- Dimensionality of the original source space.
- random_state : int, RandomState instance or None, default=None
- Controls the pseudo random number generator used to generate the matrix
- at fit time.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- Returns
- -------
- components : ndarray of shape (n_components, n_features)
- The generated Gaussian random matrix.
- See Also
- --------
- GaussianRandomProjection
- """
- _check_input_size(n_components, n_features)
- rng = check_random_state(random_state)
- components = rng.normal(
- loc=0.0, scale=1.0 / np.sqrt(n_components), size=(n_components, n_features)
- )
- return components
- def _sparse_random_matrix(n_components, n_features, density="auto", random_state=None):
- """Generalized Achlioptas random sparse matrix for random projection.
- Setting density to 1 / 3 will yield the original matrix by Dimitris
- Achlioptas while setting a lower value will yield the generalization
- by Ping Li et al.
- If we note :math:`s = 1 / density`, the components of the random matrix are
- drawn from:
- - -sqrt(s) / sqrt(n_components) with probability 1 / 2s
- - 0 with probability 1 - 1 / s
- - +sqrt(s) / sqrt(n_components) with probability 1 / 2s
- Read more in the :ref:`User Guide <sparse_random_matrix>`.
- Parameters
- ----------
- n_components : int,
- Dimensionality of the target projection space.
- n_features : int,
- Dimensionality of the original source space.
- density : float or 'auto', default='auto'
- Ratio of non-zero component in the random projection matrix in the
- range `(0, 1]`
- If density = 'auto', the value is set to the minimum density
- as recommended by Ping Li et al.: 1 / sqrt(n_features).
- Use density = 1 / 3.0 if you want to reproduce the results from
- Achlioptas, 2001.
- random_state : int, RandomState instance or None, default=None
- Controls the pseudo random number generator used to generate the matrix
- at fit time.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- Returns
- -------
- components : {ndarray, sparse matrix} of shape (n_components, n_features)
- The generated Gaussian random matrix. Sparse matrix will be of CSR
- format.
- See Also
- --------
- SparseRandomProjection
- References
- ----------
- .. [1] Ping Li, T. Hastie and K. W. Church, 2006,
- "Very Sparse Random Projections".
- https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf
- .. [2] D. Achlioptas, 2001, "Database-friendly random projections",
- https://cgi.di.uoa.gr/~optas/papers/jl.pdf
- """
- _check_input_size(n_components, n_features)
- density = _check_density(density, n_features)
- rng = check_random_state(random_state)
- if density == 1:
- # skip index generation if totally dense
- components = rng.binomial(1, 0.5, (n_components, n_features)) * 2 - 1
- return 1 / np.sqrt(n_components) * components
- else:
- # Generate location of non zero elements
- indices = []
- offset = 0
- indptr = [offset]
- for _ in range(n_components):
- # find the indices of the non-zero components for row i
- n_nonzero_i = rng.binomial(n_features, density)
- indices_i = sample_without_replacement(
- n_features, n_nonzero_i, random_state=rng
- )
- indices.append(indices_i)
- offset += n_nonzero_i
- indptr.append(offset)
- indices = np.concatenate(indices)
- # Among non zero components the probability of the sign is 50%/50%
- data = rng.binomial(1, 0.5, size=np.size(indices)) * 2 - 1
- # build the CSR structure by concatenating the rows
- components = sp.csr_matrix(
- (data, indices, indptr), shape=(n_components, n_features)
- )
- return np.sqrt(1 / density) / np.sqrt(n_components) * components
- class BaseRandomProjection(
- TransformerMixin, BaseEstimator, ClassNamePrefixFeaturesOutMixin, metaclass=ABCMeta
- ):
- """Base class for random projections.
- Warning: This class should not be used directly.
- Use derived classes instead.
- """
- _parameter_constraints: dict = {
- "n_components": [
- Interval(Integral, 1, None, closed="left"),
- StrOptions({"auto"}),
- ],
- "eps": [Interval(Real, 0, None, closed="neither")],
- "compute_inverse_components": ["boolean"],
- "random_state": ["random_state"],
- }
- @abstractmethod
- def __init__(
- self,
- n_components="auto",
- *,
- eps=0.1,
- compute_inverse_components=False,
- random_state=None,
- ):
- self.n_components = n_components
- self.eps = eps
- self.compute_inverse_components = compute_inverse_components
- self.random_state = random_state
- @abstractmethod
- def _make_random_matrix(self, n_components, n_features):
- """Generate the random projection matrix.
- Parameters
- ----------
- n_components : int,
- Dimensionality of the target projection space.
- n_features : int,
- Dimensionality of the original source space.
- Returns
- -------
- components : {ndarray, sparse matrix} of shape (n_components, n_features)
- The generated random matrix. Sparse matrix will be of CSR format.
- """
- def _compute_inverse_components(self):
- """Compute the pseudo-inverse of the (densified) components."""
- components = self.components_
- if sp.issparse(components):
- components = components.toarray()
- return linalg.pinv(components, check_finite=False)
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y=None):
- """Generate a sparse random projection matrix.
- Parameters
- ----------
- X : {ndarray, sparse matrix} of shape (n_samples, n_features)
- Training set: only the shape is used to find optimal random
- matrix dimensions based on the theory referenced in the
- afore mentioned papers.
- y : Ignored
- Not used, present here for API consistency by convention.
- Returns
- -------
- self : object
- BaseRandomProjection class instance.
- """
- X = self._validate_data(
- X, accept_sparse=["csr", "csc"], dtype=[np.float64, np.float32]
- )
- n_samples, n_features = X.shape
- if self.n_components == "auto":
- self.n_components_ = johnson_lindenstrauss_min_dim(
- n_samples=n_samples, eps=self.eps
- )
- if self.n_components_ <= 0:
- raise ValueError(
- "eps=%f and n_samples=%d lead to a target dimension of "
- "%d which is invalid" % (self.eps, n_samples, self.n_components_)
- )
- elif self.n_components_ > n_features:
- raise ValueError(
- "eps=%f and n_samples=%d lead to a target dimension of "
- "%d which is larger than the original space with "
- "n_features=%d"
- % (self.eps, n_samples, self.n_components_, n_features)
- )
- else:
- if self.n_components > n_features:
- warnings.warn(
- "The number of components is higher than the number of"
- " features: n_features < n_components (%s < %s)."
- "The dimensionality of the problem will not be reduced."
- % (n_features, self.n_components),
- DataDimensionalityWarning,
- )
- self.n_components_ = self.n_components
- # Generate a projection matrix of size [n_components, n_features]
- self.components_ = self._make_random_matrix(
- self.n_components_, n_features
- ).astype(X.dtype, copy=False)
- if self.compute_inverse_components:
- self.inverse_components_ = self._compute_inverse_components()
- # Required by ClassNamePrefixFeaturesOutMixin.get_feature_names_out.
- self._n_features_out = self.n_components
- return self
- def inverse_transform(self, X):
- """Project data back to its original space.
- Returns an array X_original whose transform would be X. Note that even
- if X is sparse, X_original is dense: this may use a lot of RAM.
- If `compute_inverse_components` is False, the inverse of the components is
- computed during each call to `inverse_transform` which can be costly.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_components)
- Data to be transformed back.
- Returns
- -------
- X_original : ndarray of shape (n_samples, n_features)
- Reconstructed data.
- """
- check_is_fitted(self)
- X = check_array(X, dtype=[np.float64, np.float32], accept_sparse=("csr", "csc"))
- if self.compute_inverse_components:
- return X @ self.inverse_components_.T
- inverse_components = self._compute_inverse_components()
- return X @ inverse_components.T
- def _more_tags(self):
- return {
- "preserves_dtype": [np.float64, np.float32],
- }
- class GaussianRandomProjection(BaseRandomProjection):
- """Reduce dimensionality through Gaussian random projection.
- The components of the random matrix are drawn from N(0, 1 / n_components).
- Read more in the :ref:`User Guide <gaussian_random_matrix>`.
- .. versionadded:: 0.13
- Parameters
- ----------
- n_components : int or 'auto', default='auto'
- Dimensionality of the target projection space.
- n_components can be automatically adjusted according to the
- number of samples in the dataset and the bound given by the
- Johnson-Lindenstrauss lemma. In that case the quality of the
- embedding is controlled by the ``eps`` parameter.
- It should be noted that Johnson-Lindenstrauss lemma can yield
- very conservative estimated of the required number of components
- as it makes no assumption on the structure of the dataset.
- eps : float, default=0.1
- Parameter to control the quality of the embedding according to
- the Johnson-Lindenstrauss lemma when `n_components` is set to
- 'auto'. The value should be strictly positive.
- Smaller values lead to better embedding and higher number of
- dimensions (n_components) in the target projection space.
- compute_inverse_components : bool, default=False
- Learn the inverse transform by computing the pseudo-inverse of the
- components during fit. Note that computing the pseudo-inverse does not
- scale well to large matrices.
- random_state : int, RandomState instance or None, default=None
- Controls the pseudo random number generator used to generate the
- projection matrix at fit time.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- Attributes
- ----------
- n_components_ : int
- Concrete number of components computed when n_components="auto".
- components_ : ndarray of shape (n_components, n_features)
- Random matrix used for the projection.
- inverse_components_ : ndarray of shape (n_features, n_components)
- Pseudo-inverse of the components, only computed if
- `compute_inverse_components` is True.
- .. versionadded:: 1.1
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- SparseRandomProjection : Reduce dimensionality through sparse
- random projection.
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.random_projection import GaussianRandomProjection
- >>> rng = np.random.RandomState(42)
- >>> X = rng.rand(25, 3000)
- >>> transformer = GaussianRandomProjection(random_state=rng)
- >>> X_new = transformer.fit_transform(X)
- >>> X_new.shape
- (25, 2759)
- """
- def __init__(
- self,
- n_components="auto",
- *,
- eps=0.1,
- compute_inverse_components=False,
- random_state=None,
- ):
- super().__init__(
- n_components=n_components,
- eps=eps,
- compute_inverse_components=compute_inverse_components,
- random_state=random_state,
- )
- def _make_random_matrix(self, n_components, n_features):
- """Generate the random projection matrix.
- Parameters
- ----------
- n_components : int,
- Dimensionality of the target projection space.
- n_features : int,
- Dimensionality of the original source space.
- Returns
- -------
- components : ndarray of shape (n_components, n_features)
- The generated random matrix.
- """
- random_state = check_random_state(self.random_state)
- return _gaussian_random_matrix(
- n_components, n_features, random_state=random_state
- )
- def transform(self, X):
- """Project the data by using matrix product with the random matrix.
- Parameters
- ----------
- X : {ndarray, sparse matrix} of shape (n_samples, n_features)
- The input data to project into a smaller dimensional space.
- Returns
- -------
- X_new : ndarray of shape (n_samples, n_components)
- Projected array.
- """
- check_is_fitted(self)
- X = self._validate_data(
- X, accept_sparse=["csr", "csc"], reset=False, dtype=[np.float64, np.float32]
- )
- return X @ self.components_.T
- class SparseRandomProjection(BaseRandomProjection):
- """Reduce dimensionality through sparse random projection.
- Sparse random matrix is an alternative to dense random
- projection matrix that guarantees similar embedding quality while being
- much more memory efficient and allowing faster computation of the
- projected data.
- If we note `s = 1 / density` the components of the random matrix are
- drawn from:
- - -sqrt(s) / sqrt(n_components) with probability 1 / 2s
- - 0 with probability 1 - 1 / s
- - +sqrt(s) / sqrt(n_components) with probability 1 / 2s
- Read more in the :ref:`User Guide <sparse_random_matrix>`.
- .. versionadded:: 0.13
- Parameters
- ----------
- n_components : int or 'auto', default='auto'
- Dimensionality of the target projection space.
- n_components can be automatically adjusted according to the
- number of samples in the dataset and the bound given by the
- Johnson-Lindenstrauss lemma. In that case the quality of the
- embedding is controlled by the ``eps`` parameter.
- It should be noted that Johnson-Lindenstrauss lemma can yield
- very conservative estimated of the required number of components
- as it makes no assumption on the structure of the dataset.
- density : float or 'auto', default='auto'
- Ratio in the range (0, 1] of non-zero component in the random
- projection matrix.
- If density = 'auto', the value is set to the minimum density
- as recommended by Ping Li et al.: 1 / sqrt(n_features).
- Use density = 1 / 3.0 if you want to reproduce the results from
- Achlioptas, 2001.
- eps : float, default=0.1
- Parameter to control the quality of the embedding according to
- the Johnson-Lindenstrauss lemma when n_components is set to
- 'auto'. This value should be strictly positive.
- Smaller values lead to better embedding and higher number of
- dimensions (n_components) in the target projection space.
- dense_output : bool, default=False
- If True, ensure that the output of the random projection is a
- dense numpy array even if the input and random projection matrix
- are both sparse. In practice, if the number of components is
- small the number of zero components in the projected data will
- be very small and it will be more CPU and memory efficient to
- use a dense representation.
- If False, the projected data uses a sparse representation if
- the input is sparse.
- compute_inverse_components : bool, default=False
- Learn the inverse transform by computing the pseudo-inverse of the
- components during fit. Note that the pseudo-inverse is always a dense
- array, even if the training data was sparse. This means that it might be
- necessary to call `inverse_transform` on a small batch of samples at a
- time to avoid exhausting the available memory on the host. Moreover,
- computing the pseudo-inverse does not scale well to large matrices.
- random_state : int, RandomState instance or None, default=None
- Controls the pseudo random number generator used to generate the
- projection matrix at fit time.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- Attributes
- ----------
- n_components_ : int
- Concrete number of components computed when n_components="auto".
- components_ : sparse matrix of shape (n_components, n_features)
- Random matrix used for the projection. Sparse matrix will be of CSR
- format.
- inverse_components_ : ndarray of shape (n_features, n_components)
- Pseudo-inverse of the components, only computed if
- `compute_inverse_components` is True.
- .. versionadded:: 1.1
- density_ : float in range 0.0 - 1.0
- Concrete density computed from when density = "auto".
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- GaussianRandomProjection : Reduce dimensionality through Gaussian
- random projection.
- References
- ----------
- .. [1] Ping Li, T. Hastie and K. W. Church, 2006,
- "Very Sparse Random Projections".
- https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf
- .. [2] D. Achlioptas, 2001, "Database-friendly random projections",
- https://cgi.di.uoa.gr/~optas/papers/jl.pdf
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.random_projection import SparseRandomProjection
- >>> rng = np.random.RandomState(42)
- >>> X = rng.rand(25, 3000)
- >>> transformer = SparseRandomProjection(random_state=rng)
- >>> X_new = transformer.fit_transform(X)
- >>> X_new.shape
- (25, 2759)
- >>> # very few components are non-zero
- >>> np.mean(transformer.components_ != 0)
- 0.0182...
- """
- _parameter_constraints: dict = {
- **BaseRandomProjection._parameter_constraints,
- "density": [Interval(Real, 0.0, 1.0, closed="right"), StrOptions({"auto"})],
- "dense_output": ["boolean"],
- }
- def __init__(
- self,
- n_components="auto",
- *,
- density="auto",
- eps=0.1,
- dense_output=False,
- compute_inverse_components=False,
- random_state=None,
- ):
- super().__init__(
- n_components=n_components,
- eps=eps,
- compute_inverse_components=compute_inverse_components,
- random_state=random_state,
- )
- self.dense_output = dense_output
- self.density = density
- def _make_random_matrix(self, n_components, n_features):
- """Generate the random projection matrix
- Parameters
- ----------
- n_components : int
- Dimensionality of the target projection space.
- n_features : int
- Dimensionality of the original source space.
- Returns
- -------
- components : sparse matrix of shape (n_components, n_features)
- The generated random matrix in CSR format.
- """
- random_state = check_random_state(self.random_state)
- self.density_ = _check_density(self.density, n_features)
- return _sparse_random_matrix(
- n_components, n_features, density=self.density_, random_state=random_state
- )
- def transform(self, X):
- """Project the data by using matrix product with the random matrix.
- Parameters
- ----------
- X : {ndarray, sparse matrix} of shape (n_samples, n_features)
- The input data to project into a smaller dimensional space.
- Returns
- -------
- X_new : {ndarray, sparse matrix} of shape (n_samples, n_components)
- Projected array. It is a sparse matrix only when the input is sparse and
- `dense_output = False`.
- """
- check_is_fitted(self)
- X = self._validate_data(
- X, accept_sparse=["csr", "csc"], reset=False, dtype=[np.float64, np.float32]
- )
- return safe_sparse_dot(X, self.components_.T, dense_output=self.dense_output)
|