| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134 |
- """
- The :mod:`sklearn.kernel_approximation` module implements several
- approximate kernel feature maps based on Fourier transforms and Count Sketches.
- """
- # Author: Andreas Mueller <amueller@ais.uni-bonn.de>
- # Daniel Lopez-Sanchez (TensorSketch) <lope@usal.es>
- # License: BSD 3 clause
- import warnings
- from numbers import Integral, Real
- import numpy as np
- import scipy.sparse as sp
- from scipy.linalg import svd
- try:
- from scipy.fft import fft, ifft
- except ImportError: # scipy < 1.4
- from scipy.fftpack import fft, ifft
- from .base import (
- BaseEstimator,
- ClassNamePrefixFeaturesOutMixin,
- TransformerMixin,
- _fit_context,
- )
- from .metrics.pairwise import KERNEL_PARAMS, PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
- from .utils import check_random_state, deprecated
- from .utils._param_validation import Interval, StrOptions
- from .utils.extmath import safe_sparse_dot
- from .utils.validation import (
- _check_feature_names_in,
- check_is_fitted,
- check_non_negative,
- )
- class PolynomialCountSketch(
- ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
- ):
- """Polynomial kernel approximation via Tensor Sketch.
- Implements Tensor Sketch, which approximates the feature map
- of the polynomial kernel::
- K(X, Y) = (gamma * <X, Y> + coef0)^degree
- by efficiently computing a Count Sketch of the outer product of a
- vector with itself using Fast Fourier Transforms (FFT). Read more in the
- :ref:`User Guide <polynomial_kernel_approx>`.
- .. versionadded:: 0.24
- Parameters
- ----------
- gamma : float, default=1.0
- Parameter of the polynomial kernel whose feature map
- will be approximated.
- degree : int, default=2
- Degree of the polynomial kernel whose feature map
- will be approximated.
- coef0 : int, default=0
- Constant term of the polynomial kernel whose feature map
- will be approximated.
- n_components : int, default=100
- Dimensionality of the output feature space. Usually, `n_components`
- should be greater than the number of features in input samples in
- order to achieve good performance. The optimal score / run time
- balance is typically achieved around `n_components` = 10 * `n_features`,
- but this depends on the specific dataset being used.
- random_state : int, RandomState instance, default=None
- Determines random number generation for indexHash and bitHash
- initialization. Pass an int for reproducible results across multiple
- function calls. See :term:`Glossary <random_state>`.
- Attributes
- ----------
- indexHash_ : ndarray of shape (degree, n_features), dtype=int64
- Array of indexes in range [0, n_components) used to represent
- the 2-wise independent hash functions for Count Sketch computation.
- bitHash_ : ndarray of shape (degree, n_features), dtype=float32
- Array with random entries in {+1, -1}, used to represent
- the 2-wise independent hash functions for Count Sketch computation.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
- Nystroem : Approximate a kernel map using a subset of the training data.
- RBFSampler : Approximate a RBF kernel feature map using random Fourier
- features.
- SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel.
- sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
- Examples
- --------
- >>> from sklearn.kernel_approximation import PolynomialCountSketch
- >>> from sklearn.linear_model import SGDClassifier
- >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
- >>> y = [0, 0, 1, 1]
- >>> ps = PolynomialCountSketch(degree=3, random_state=1)
- >>> X_features = ps.fit_transform(X)
- >>> clf = SGDClassifier(max_iter=10, tol=1e-3)
- >>> clf.fit(X_features, y)
- SGDClassifier(max_iter=10)
- >>> clf.score(X_features, y)
- 1.0
- """
- _parameter_constraints: dict = {
- "gamma": [Interval(Real, 0, None, closed="left")],
- "degree": [Interval(Integral, 1, None, closed="left")],
- "coef0": [Interval(Real, None, None, closed="neither")],
- "n_components": [Interval(Integral, 1, None, closed="left")],
- "random_state": ["random_state"],
- }
- def __init__(
- self, *, gamma=1.0, degree=2, coef0=0, n_components=100, random_state=None
- ):
- self.gamma = gamma
- self.degree = degree
- self.coef0 = coef0
- self.n_components = n_components
- self.random_state = random_state
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y=None):
- """Fit the model with X.
- Initializes the internal variables. The method needs no information
- about the distribution of data, so we only care about n_features in X.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Training data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
- default=None
- Target values (None for unsupervised transformations).
- Returns
- -------
- self : object
- Returns the instance itself.
- """
- X = self._validate_data(X, accept_sparse="csc")
- random_state = check_random_state(self.random_state)
- n_features = X.shape[1]
- if self.coef0 != 0:
- n_features += 1
- self.indexHash_ = random_state.randint(
- 0, high=self.n_components, size=(self.degree, n_features)
- )
- self.bitHash_ = random_state.choice(a=[-1, 1], size=(self.degree, n_features))
- self._n_features_out = self.n_components
- return self
- def transform(self, X):
- """Generate the feature map approximation for X.
- Parameters
- ----------
- X : {array-like}, shape (n_samples, n_features)
- New data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- Returns
- -------
- X_new : array-like, shape (n_samples, n_components)
- Returns the instance itself.
- """
- check_is_fitted(self)
- X = self._validate_data(X, accept_sparse="csc", reset=False)
- X_gamma = np.sqrt(self.gamma) * X
- if sp.issparse(X_gamma) and self.coef0 != 0:
- X_gamma = sp.hstack(
- [X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))],
- format="csc",
- )
- elif not sp.issparse(X_gamma) and self.coef0 != 0:
- X_gamma = np.hstack(
- [X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))]
- )
- if X_gamma.shape[1] != self.indexHash_.shape[1]:
- raise ValueError(
- "Number of features of test samples does not"
- " match that of training samples."
- )
- count_sketches = np.zeros((X_gamma.shape[0], self.degree, self.n_components))
- if sp.issparse(X_gamma):
- for j in range(X_gamma.shape[1]):
- for d in range(self.degree):
- iHashIndex = self.indexHash_[d, j]
- iHashBit = self.bitHash_[d, j]
- count_sketches[:, d, iHashIndex] += (
- (iHashBit * X_gamma[:, j]).toarray().ravel()
- )
- else:
- for j in range(X_gamma.shape[1]):
- for d in range(self.degree):
- iHashIndex = self.indexHash_[d, j]
- iHashBit = self.bitHash_[d, j]
- count_sketches[:, d, iHashIndex] += iHashBit * X_gamma[:, j]
- # For each same, compute a count sketch of phi(x) using the polynomial
- # multiplication (via FFT) of p count sketches of x.
- count_sketches_fft = fft(count_sketches, axis=2, overwrite_x=True)
- count_sketches_fft_prod = np.prod(count_sketches_fft, axis=1)
- data_sketch = np.real(ifft(count_sketches_fft_prod, overwrite_x=True))
- return data_sketch
- class RBFSampler(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
- """Approximate a RBF kernel feature map using random Fourier features.
- It implements a variant of Random Kitchen Sinks.[1]
- Read more in the :ref:`User Guide <rbf_kernel_approx>`.
- Parameters
- ----------
- gamma : 'scale' or float, default=1.0
- Parameter of RBF kernel: exp(-gamma * x^2).
- If ``gamma='scale'`` is passed then it uses
- 1 / (n_features * X.var()) as value of gamma.
- .. versionadded:: 1.2
- The option `"scale"` was added in 1.2.
- n_components : int, default=100
- Number of Monte Carlo samples per original feature.
- Equals the dimensionality of the computed feature space.
- random_state : int, RandomState instance or None, default=None
- Pseudo-random number generator to control the generation of the random
- weights and random offset when fitting the training data.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- Attributes
- ----------
- random_offset_ : ndarray of shape (n_components,), dtype={np.float64, np.float32}
- Random offset used to compute the projection in the `n_components`
- dimensions of the feature space.
- random_weights_ : ndarray of shape (n_features, n_components),\
- dtype={np.float64, np.float32}
- Random projection directions drawn from the Fourier transform
- of the RBF kernel.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
- Nystroem : Approximate a kernel map using a subset of the training data.
- PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.
- SkewedChi2Sampler : Approximate feature map for
- "skewed chi-squared" kernel.
- sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
- Notes
- -----
- See "Random Features for Large-Scale Kernel Machines" by A. Rahimi and
- Benjamin Recht.
- [1] "Weighted Sums of Random Kitchen Sinks: Replacing
- minimization with randomization in learning" by A. Rahimi and
- Benjamin Recht.
- (https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)
- Examples
- --------
- >>> from sklearn.kernel_approximation import RBFSampler
- >>> from sklearn.linear_model import SGDClassifier
- >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
- >>> y = [0, 0, 1, 1]
- >>> rbf_feature = RBFSampler(gamma=1, random_state=1)
- >>> X_features = rbf_feature.fit_transform(X)
- >>> clf = SGDClassifier(max_iter=5, tol=1e-3)
- >>> clf.fit(X_features, y)
- SGDClassifier(max_iter=5)
- >>> clf.score(X_features, y)
- 1.0
- """
- _parameter_constraints: dict = {
- "gamma": [
- StrOptions({"scale"}),
- Interval(Real, 0.0, None, closed="left"),
- ],
- "n_components": [Interval(Integral, 1, None, closed="left")],
- "random_state": ["random_state"],
- }
- def __init__(self, *, gamma=1.0, n_components=100, random_state=None):
- self.gamma = gamma
- self.n_components = n_components
- self.random_state = random_state
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y=None):
- """Fit the model with X.
- Samples random projection according to n_features.
- Parameters
- ----------
- X : {array-like, sparse matrix}, shape (n_samples, n_features)
- Training data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
- default=None
- Target values (None for unsupervised transformations).
- Returns
- -------
- self : object
- Returns the instance itself.
- """
- X = self._validate_data(X, accept_sparse="csr")
- random_state = check_random_state(self.random_state)
- n_features = X.shape[1]
- sparse = sp.issparse(X)
- if self.gamma == "scale":
- # var = E[X^2] - E[X]^2 if sparse
- X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()
- self._gamma = 1.0 / (n_features * X_var) if X_var != 0 else 1.0
- else:
- self._gamma = self.gamma
- self.random_weights_ = (2.0 * self._gamma) ** 0.5 * random_state.normal(
- size=(n_features, self.n_components)
- )
- self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)
- if X.dtype == np.float32:
- # Setting the data type of the fitted attribute will ensure the
- # output data type during `transform`.
- self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False)
- self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False)
- self._n_features_out = self.n_components
- return self
- def transform(self, X):
- """Apply the approximate feature map to X.
- Parameters
- ----------
- X : {array-like, sparse matrix}, shape (n_samples, n_features)
- New data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- Returns
- -------
- X_new : array-like, shape (n_samples, n_components)
- Returns the instance itself.
- """
- check_is_fitted(self)
- X = self._validate_data(X, accept_sparse="csr", reset=False)
- projection = safe_sparse_dot(X, self.random_weights_)
- projection += self.random_offset_
- np.cos(projection, projection)
- projection *= (2.0 / self.n_components) ** 0.5
- return projection
- def _more_tags(self):
- return {"preserves_dtype": [np.float64, np.float32]}
- class SkewedChi2Sampler(
- ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
- ):
- """Approximate feature map for "skewed chi-squared" kernel.
- Read more in the :ref:`User Guide <skewed_chi_kernel_approx>`.
- Parameters
- ----------
- skewedness : float, default=1.0
- "skewedness" parameter of the kernel. Needs to be cross-validated.
- n_components : int, default=100
- Number of Monte Carlo samples per original feature.
- Equals the dimensionality of the computed feature space.
- random_state : int, RandomState instance or None, default=None
- Pseudo-random number generator to control the generation of the random
- weights and random offset when fitting the training data.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- Attributes
- ----------
- random_weights_ : ndarray of shape (n_features, n_components)
- Weight array, sampled from a secant hyperbolic distribution, which will
- be used to linearly transform the log of the data.
- random_offset_ : ndarray of shape (n_features, n_components)
- Bias term, which will be added to the data. It is uniformly distributed
- between 0 and 2*pi.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
- Nystroem : Approximate a kernel map using a subset of the training data.
- RBFSampler : Approximate a RBF kernel feature map using random Fourier
- features.
- SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel.
- sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.
- sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
- References
- ----------
- See "Random Fourier Approximations for Skewed Multiplicative Histogram
- Kernels" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.
- Examples
- --------
- >>> from sklearn.kernel_approximation import SkewedChi2Sampler
- >>> from sklearn.linear_model import SGDClassifier
- >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
- >>> y = [0, 0, 1, 1]
- >>> chi2_feature = SkewedChi2Sampler(skewedness=.01,
- ... n_components=10,
- ... random_state=0)
- >>> X_features = chi2_feature.fit_transform(X, y)
- >>> clf = SGDClassifier(max_iter=10, tol=1e-3)
- >>> clf.fit(X_features, y)
- SGDClassifier(max_iter=10)
- >>> clf.score(X_features, y)
- 1.0
- """
- _parameter_constraints: dict = {
- "skewedness": [Interval(Real, None, None, closed="neither")],
- "n_components": [Interval(Integral, 1, None, closed="left")],
- "random_state": ["random_state"],
- }
- def __init__(self, *, skewedness=1.0, n_components=100, random_state=None):
- self.skewedness = skewedness
- self.n_components = n_components
- self.random_state = random_state
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y=None):
- """Fit the model with X.
- Samples random projection according to n_features.
- Parameters
- ----------
- X : array-like, shape (n_samples, n_features)
- Training data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
- default=None
- Target values (None for unsupervised transformations).
- Returns
- -------
- self : object
- Returns the instance itself.
- """
- X = self._validate_data(X)
- random_state = check_random_state(self.random_state)
- n_features = X.shape[1]
- uniform = random_state.uniform(size=(n_features, self.n_components))
- # transform by inverse CDF of sech
- self.random_weights_ = 1.0 / np.pi * np.log(np.tan(np.pi / 2.0 * uniform))
- self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)
- if X.dtype == np.float32:
- # Setting the data type of the fitted attribute will ensure the
- # output data type during `transform`.
- self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False)
- self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False)
- self._n_features_out = self.n_components
- return self
- def transform(self, X):
- """Apply the approximate feature map to X.
- Parameters
- ----------
- X : array-like, shape (n_samples, n_features)
- New data, where `n_samples` is the number of samples
- and `n_features` is the number of features. All values of X must be
- strictly greater than "-skewedness".
- Returns
- -------
- X_new : array-like, shape (n_samples, n_components)
- Returns the instance itself.
- """
- check_is_fitted(self)
- X = self._validate_data(
- X, copy=True, dtype=[np.float64, np.float32], reset=False
- )
- if (X <= -self.skewedness).any():
- raise ValueError("X may not contain entries smaller than -skewedness.")
- X += self.skewedness
- np.log(X, X)
- projection = safe_sparse_dot(X, self.random_weights_)
- projection += self.random_offset_
- np.cos(projection, projection)
- projection *= np.sqrt(2.0) / np.sqrt(self.n_components)
- return projection
- def _more_tags(self):
- return {"preserves_dtype": [np.float64, np.float32]}
- class AdditiveChi2Sampler(TransformerMixin, BaseEstimator):
- """Approximate feature map for additive chi2 kernel.
- Uses sampling the fourier transform of the kernel characteristic
- at regular intervals.
- Since the kernel that is to be approximated is additive, the components of
- the input vectors can be treated separately. Each entry in the original
- space is transformed into 2*sample_steps-1 features, where sample_steps is
- a parameter of the method. Typical values of sample_steps include 1, 2 and
- 3.
- Optimal choices for the sampling interval for certain data ranges can be
- computed (see the reference). The default values should be reasonable.
- Read more in the :ref:`User Guide <additive_chi_kernel_approx>`.
- Parameters
- ----------
- sample_steps : int, default=2
- Gives the number of (complex) sampling points.
- sample_interval : float, default=None
- Sampling interval. Must be specified when sample_steps not in {1,2,3}.
- Attributes
- ----------
- sample_interval_ : float
- Stored sampling interval. Specified as a parameter if `sample_steps`
- not in {1,2,3}.
- .. deprecated:: 1.3
- `sample_interval_` serves internal purposes only and will be removed in 1.5.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of
- the chi squared kernel.
- sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.
- sklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi
- squared kernel.
- Notes
- -----
- This estimator approximates a slightly different version of the additive
- chi squared kernel then ``metric.additive_chi2`` computes.
- This estimator is stateless and does not need to be fitted. However, we
- recommend to call :meth:`fit_transform` instead of :meth:`transform`, as
- parameter validation is only performed in :meth:`fit`.
- References
- ----------
- See `"Efficient additive kernels via explicit feature maps"
- <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>`_
- A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,
- 2011
- Examples
- --------
- >>> from sklearn.datasets import load_digits
- >>> from sklearn.linear_model import SGDClassifier
- >>> from sklearn.kernel_approximation import AdditiveChi2Sampler
- >>> X, y = load_digits(return_X_y=True)
- >>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)
- >>> X_transformed = chi2sampler.fit_transform(X, y)
- >>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)
- >>> clf.fit(X_transformed, y)
- SGDClassifier(max_iter=5, random_state=0)
- >>> clf.score(X_transformed, y)
- 0.9499...
- """
- _parameter_constraints: dict = {
- "sample_steps": [Interval(Integral, 1, None, closed="left")],
- "sample_interval": [Interval(Real, 0, None, closed="left"), None],
- }
- def __init__(self, *, sample_steps=2, sample_interval=None):
- self.sample_steps = sample_steps
- self.sample_interval = sample_interval
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y=None):
- """Only validates estimator's parameters.
- This method allows to: (i) validate the estimator's parameters and
- (ii) be consistent with the scikit-learn transformer API.
- Parameters
- ----------
- X : array-like, shape (n_samples, n_features)
- Training data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
- default=None
- Target values (None for unsupervised transformations).
- Returns
- -------
- self : object
- Returns the transformer.
- """
- X = self._validate_data(X, accept_sparse="csr")
- check_non_negative(X, "X in AdditiveChi2Sampler.fit")
- # TODO(1.5): remove the setting of _sample_interval from fit
- if self.sample_interval is None:
- # See figure 2 c) of "Efficient additive kernels via explicit feature maps"
- # <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>
- # A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,
- # 2011
- if self.sample_steps == 1:
- self._sample_interval = 0.8
- elif self.sample_steps == 2:
- self._sample_interval = 0.5
- elif self.sample_steps == 3:
- self._sample_interval = 0.4
- else:
- raise ValueError(
- "If sample_steps is not in [1, 2, 3],"
- " you need to provide sample_interval"
- )
- else:
- self._sample_interval = self.sample_interval
- return self
- # TODO(1.5): remove
- @deprecated( # type: ignore
- "The ``sample_interval_`` attribute was deprecated in version 1.3 and "
- "will be removed 1.5."
- )
- @property
- def sample_interval_(self):
- return self._sample_interval
- def transform(self, X):
- """Apply approximate feature map to X.
- Parameters
- ----------
- X : {array-like, sparse matrix}, shape (n_samples, n_features)
- Training data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- Returns
- -------
- X_new : {ndarray, sparse matrix}, \
- shape = (n_samples, n_features * (2*sample_steps - 1))
- Whether the return value is an array or sparse matrix depends on
- the type of the input X.
- """
- X = self._validate_data(X, accept_sparse="csr", reset=False)
- check_non_negative(X, "X in AdditiveChi2Sampler.transform")
- sparse = sp.issparse(X)
- if hasattr(self, "_sample_interval"):
- # TODO(1.5): remove this branch
- sample_interval = self._sample_interval
- else:
- if self.sample_interval is None:
- # See figure 2 c) of "Efficient additive kernels via explicit feature maps" # noqa
- # <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>
- # A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence, # noqa
- # 2011
- if self.sample_steps == 1:
- sample_interval = 0.8
- elif self.sample_steps == 2:
- sample_interval = 0.5
- elif self.sample_steps == 3:
- sample_interval = 0.4
- else:
- raise ValueError(
- "If sample_steps is not in [1, 2, 3],"
- " you need to provide sample_interval"
- )
- else:
- sample_interval = self.sample_interval
- # zeroth component
- # 1/cosh = sech
- # cosh(0) = 1.0
- transf = self._transform_sparse if sparse else self._transform_dense
- return transf(X, self.sample_steps, sample_interval)
- def get_feature_names_out(self, input_features=None):
- """Get output feature names for transformation.
- Parameters
- ----------
- input_features : array-like of str or None, default=None
- Only used to validate feature names with the names seen in :meth:`fit`.
- Returns
- -------
- feature_names_out : ndarray of str objects
- Transformed feature names.
- """
- check_is_fitted(self, "n_features_in_")
- input_features = _check_feature_names_in(
- self, input_features, generate_names=True
- )
- est_name = self.__class__.__name__.lower()
- names_list = [f"{est_name}_{name}_sqrt" for name in input_features]
- for j in range(1, self.sample_steps):
- cos_names = [f"{est_name}_{name}_cos{j}" for name in input_features]
- sin_names = [f"{est_name}_{name}_sin{j}" for name in input_features]
- names_list.extend(cos_names + sin_names)
- return np.asarray(names_list, dtype=object)
- @staticmethod
- def _transform_dense(X, sample_steps, sample_interval):
- non_zero = X != 0.0
- X_nz = X[non_zero]
- X_step = np.zeros_like(X)
- X_step[non_zero] = np.sqrt(X_nz * sample_interval)
- X_new = [X_step]
- log_step_nz = sample_interval * np.log(X_nz)
- step_nz = 2 * X_nz * sample_interval
- for j in range(1, sample_steps):
- factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * sample_interval))
- X_step = np.zeros_like(X)
- X_step[non_zero] = factor_nz * np.cos(j * log_step_nz)
- X_new.append(X_step)
- X_step = np.zeros_like(X)
- X_step[non_zero] = factor_nz * np.sin(j * log_step_nz)
- X_new.append(X_step)
- return np.hstack(X_new)
- @staticmethod
- def _transform_sparse(X, sample_steps, sample_interval):
- indices = X.indices.copy()
- indptr = X.indptr.copy()
- data_step = np.sqrt(X.data * sample_interval)
- X_step = sp.csr_matrix(
- (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False
- )
- X_new = [X_step]
- log_step_nz = sample_interval * np.log(X.data)
- step_nz = 2 * X.data * sample_interval
- for j in range(1, sample_steps):
- factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * sample_interval))
- data_step = factor_nz * np.cos(j * log_step_nz)
- X_step = sp.csr_matrix(
- (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False
- )
- X_new.append(X_step)
- data_step = factor_nz * np.sin(j * log_step_nz)
- X_step = sp.csr_matrix(
- (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False
- )
- X_new.append(X_step)
- return sp.hstack(X_new)
- def _more_tags(self):
- return {"stateless": True, "requires_positive_X": True}
- class Nystroem(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
- """Approximate a kernel map using a subset of the training data.
- Constructs an approximate feature map for an arbitrary kernel
- using a subset of the data as basis.
- Read more in the :ref:`User Guide <nystroem_kernel_approx>`.
- .. versionadded:: 0.13
- Parameters
- ----------
- kernel : str or callable, default='rbf'
- Kernel map to be approximated. A callable should accept two arguments
- and the keyword arguments passed to this object as `kernel_params`, and
- should return a floating point number.
- gamma : float, default=None
- Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
- and sigmoid kernels. Interpretation of the default value is left to
- the kernel; see the documentation for sklearn.metrics.pairwise.
- Ignored by other kernels.
- coef0 : float, default=None
- Zero coefficient for polynomial and sigmoid kernels.
- Ignored by other kernels.
- degree : float, default=None
- Degree of the polynomial kernel. Ignored by other kernels.
- kernel_params : dict, default=None
- Additional parameters (keyword arguments) for kernel function passed
- as callable object.
- n_components : int, default=100
- Number of features to construct.
- How many data points will be used to construct the mapping.
- random_state : int, RandomState instance or None, default=None
- Pseudo-random number generator to control the uniform sampling without
- replacement of `n_components` of the training data to construct the
- basis kernel.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- n_jobs : int, default=None
- The number of jobs to use for the computation. This works by breaking
- down the kernel matrix into `n_jobs` even slices and computing them in
- parallel.
- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
- ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
- for more details.
- .. versionadded:: 0.24
- Attributes
- ----------
- components_ : ndarray of shape (n_components, n_features)
- Subset of training points used to construct the feature map.
- component_indices_ : ndarray of shape (n_components)
- Indices of ``components_`` in the training set.
- normalization_ : ndarray of shape (n_components, n_components)
- Normalization matrix needed for embedding.
- Square root of the kernel matrix on ``components_``.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
- PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.
- RBFSampler : Approximate a RBF kernel feature map using random Fourier
- features.
- SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel.
- sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
- References
- ----------
- * Williams, C.K.I. and Seeger, M.
- "Using the Nystroem method to speed up kernel machines",
- Advances in neural information processing systems 2001
- * T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou
- "Nystroem Method vs Random Fourier Features: A Theoretical and Empirical
- Comparison",
- Advances in Neural Information Processing Systems 2012
- Examples
- --------
- >>> from sklearn import datasets, svm
- >>> from sklearn.kernel_approximation import Nystroem
- >>> X, y = datasets.load_digits(n_class=9, return_X_y=True)
- >>> data = X / 16.
- >>> clf = svm.LinearSVC(dual="auto")
- >>> feature_map_nystroem = Nystroem(gamma=.2,
- ... random_state=1,
- ... n_components=300)
- >>> data_transformed = feature_map_nystroem.fit_transform(data)
- >>> clf.fit(data_transformed, y)
- LinearSVC(dual='auto')
- >>> clf.score(data_transformed, y)
- 0.9987...
- """
- _parameter_constraints: dict = {
- "kernel": [
- StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {"precomputed"}),
- callable,
- ],
- "gamma": [Interval(Real, 0, None, closed="left"), None],
- "coef0": [Interval(Real, None, None, closed="neither"), None],
- "degree": [Interval(Real, 1, None, closed="left"), None],
- "kernel_params": [dict, None],
- "n_components": [Interval(Integral, 1, None, closed="left")],
- "random_state": ["random_state"],
- "n_jobs": [Integral, None],
- }
- def __init__(
- self,
- kernel="rbf",
- *,
- gamma=None,
- coef0=None,
- degree=None,
- kernel_params=None,
- n_components=100,
- random_state=None,
- n_jobs=None,
- ):
- self.kernel = kernel
- self.gamma = gamma
- self.coef0 = coef0
- self.degree = degree
- self.kernel_params = kernel_params
- self.n_components = n_components
- self.random_state = random_state
- self.n_jobs = n_jobs
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y=None):
- """Fit estimator to data.
- Samples a subset of training points, computes kernel
- on these and computes normalization matrix.
- Parameters
- ----------
- X : array-like, shape (n_samples, n_features)
- Training data, where `n_samples` is the number of samples
- and `n_features` is the number of features.
- y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
- default=None
- Target values (None for unsupervised transformations).
- Returns
- -------
- self : object
- Returns the instance itself.
- """
- X = self._validate_data(X, accept_sparse="csr")
- rnd = check_random_state(self.random_state)
- n_samples = X.shape[0]
- # get basis vectors
- if self.n_components > n_samples:
- # XXX should we just bail?
- n_components = n_samples
- warnings.warn(
- "n_components > n_samples. This is not possible.\n"
- "n_components was set to n_samples, which results"
- " in inefficient evaluation of the full kernel."
- )
- else:
- n_components = self.n_components
- n_components = min(n_samples, n_components)
- inds = rnd.permutation(n_samples)
- basis_inds = inds[:n_components]
- basis = X[basis_inds]
- basis_kernel = pairwise_kernels(
- basis,
- metric=self.kernel,
- filter_params=True,
- n_jobs=self.n_jobs,
- **self._get_kernel_params(),
- )
- # sqrt of kernel matrix on basis vectors
- U, S, V = svd(basis_kernel)
- S = np.maximum(S, 1e-12)
- self.normalization_ = np.dot(U / np.sqrt(S), V)
- self.components_ = basis
- self.component_indices_ = basis_inds
- self._n_features_out = n_components
- return self
- def transform(self, X):
- """Apply feature map to X.
- Computes an approximate feature map using the kernel
- between some training points and X.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Data to transform.
- Returns
- -------
- X_transformed : ndarray of shape (n_samples, n_components)
- Transformed data.
- """
- check_is_fitted(self)
- X = self._validate_data(X, accept_sparse="csr", reset=False)
- kernel_params = self._get_kernel_params()
- embedded = pairwise_kernels(
- X,
- self.components_,
- metric=self.kernel,
- filter_params=True,
- n_jobs=self.n_jobs,
- **kernel_params,
- )
- return np.dot(embedded, self.normalization_.T)
- def _get_kernel_params(self):
- params = self.kernel_params
- if params is None:
- params = {}
- if not callable(self.kernel) and self.kernel != "precomputed":
- for param in KERNEL_PARAMS[self.kernel]:
- if getattr(self, param) is not None:
- params[param] = getattr(self, param)
- else:
- if (
- self.gamma is not None
- or self.coef0 is not None
- or self.degree is not None
- ):
- raise ValueError(
- "Don't pass gamma, coef0 or degree to "
- "Nystroem if using a callable "
- "or precomputed kernel"
- )
- return params
- def _more_tags(self):
- return {
- "_xfail_checks": {
- "check_transformer_preserve_dtypes": (
- "dtypes are preserved but not at a close enough precision"
- )
- },
- "preserves_dtype": [np.float64, np.float32],
- }
|