kernel_approximation.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134
  1. """
  2. The :mod:`sklearn.kernel_approximation` module implements several
  3. approximate kernel feature maps based on Fourier transforms and Count Sketches.
  4. """
  5. # Author: Andreas Mueller <amueller@ais.uni-bonn.de>
  6. # Daniel Lopez-Sanchez (TensorSketch) <lope@usal.es>
  7. # License: BSD 3 clause
  8. import warnings
  9. from numbers import Integral, Real
  10. import numpy as np
  11. import scipy.sparse as sp
  12. from scipy.linalg import svd
  13. try:
  14. from scipy.fft import fft, ifft
  15. except ImportError: # scipy < 1.4
  16. from scipy.fftpack import fft, ifft
  17. from .base import (
  18. BaseEstimator,
  19. ClassNamePrefixFeaturesOutMixin,
  20. TransformerMixin,
  21. _fit_context,
  22. )
  23. from .metrics.pairwise import KERNEL_PARAMS, PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
  24. from .utils import check_random_state, deprecated
  25. from .utils._param_validation import Interval, StrOptions
  26. from .utils.extmath import safe_sparse_dot
  27. from .utils.validation import (
  28. _check_feature_names_in,
  29. check_is_fitted,
  30. check_non_negative,
  31. )
  32. class PolynomialCountSketch(
  33. ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
  34. ):
  35. """Polynomial kernel approximation via Tensor Sketch.
  36. Implements Tensor Sketch, which approximates the feature map
  37. of the polynomial kernel::
  38. K(X, Y) = (gamma * <X, Y> + coef0)^degree
  39. by efficiently computing a Count Sketch of the outer product of a
  40. vector with itself using Fast Fourier Transforms (FFT). Read more in the
  41. :ref:`User Guide <polynomial_kernel_approx>`.
  42. .. versionadded:: 0.24
  43. Parameters
  44. ----------
  45. gamma : float, default=1.0
  46. Parameter of the polynomial kernel whose feature map
  47. will be approximated.
  48. degree : int, default=2
  49. Degree of the polynomial kernel whose feature map
  50. will be approximated.
  51. coef0 : int, default=0
  52. Constant term of the polynomial kernel whose feature map
  53. will be approximated.
  54. n_components : int, default=100
  55. Dimensionality of the output feature space. Usually, `n_components`
  56. should be greater than the number of features in input samples in
  57. order to achieve good performance. The optimal score / run time
  58. balance is typically achieved around `n_components` = 10 * `n_features`,
  59. but this depends on the specific dataset being used.
  60. random_state : int, RandomState instance, default=None
  61. Determines random number generation for indexHash and bitHash
  62. initialization. Pass an int for reproducible results across multiple
  63. function calls. See :term:`Glossary <random_state>`.
  64. Attributes
  65. ----------
  66. indexHash_ : ndarray of shape (degree, n_features), dtype=int64
  67. Array of indexes in range [0, n_components) used to represent
  68. the 2-wise independent hash functions for Count Sketch computation.
  69. bitHash_ : ndarray of shape (degree, n_features), dtype=float32
  70. Array with random entries in {+1, -1}, used to represent
  71. the 2-wise independent hash functions for Count Sketch computation.
  72. n_features_in_ : int
  73. Number of features seen during :term:`fit`.
  74. .. versionadded:: 0.24
  75. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  76. Names of features seen during :term:`fit`. Defined only when `X`
  77. has feature names that are all strings.
  78. .. versionadded:: 1.0
  79. See Also
  80. --------
  81. AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
  82. Nystroem : Approximate a kernel map using a subset of the training data.
  83. RBFSampler : Approximate a RBF kernel feature map using random Fourier
  84. features.
  85. SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel.
  86. sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
  87. Examples
  88. --------
  89. >>> from sklearn.kernel_approximation import PolynomialCountSketch
  90. >>> from sklearn.linear_model import SGDClassifier
  91. >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
  92. >>> y = [0, 0, 1, 1]
  93. >>> ps = PolynomialCountSketch(degree=3, random_state=1)
  94. >>> X_features = ps.fit_transform(X)
  95. >>> clf = SGDClassifier(max_iter=10, tol=1e-3)
  96. >>> clf.fit(X_features, y)
  97. SGDClassifier(max_iter=10)
  98. >>> clf.score(X_features, y)
  99. 1.0
  100. """
  101. _parameter_constraints: dict = {
  102. "gamma": [Interval(Real, 0, None, closed="left")],
  103. "degree": [Interval(Integral, 1, None, closed="left")],
  104. "coef0": [Interval(Real, None, None, closed="neither")],
  105. "n_components": [Interval(Integral, 1, None, closed="left")],
  106. "random_state": ["random_state"],
  107. }
  108. def __init__(
  109. self, *, gamma=1.0, degree=2, coef0=0, n_components=100, random_state=None
  110. ):
  111. self.gamma = gamma
  112. self.degree = degree
  113. self.coef0 = coef0
  114. self.n_components = n_components
  115. self.random_state = random_state
  116. @_fit_context(prefer_skip_nested_validation=True)
  117. def fit(self, X, y=None):
  118. """Fit the model with X.
  119. Initializes the internal variables. The method needs no information
  120. about the distribution of data, so we only care about n_features in X.
  121. Parameters
  122. ----------
  123. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  124. Training data, where `n_samples` is the number of samples
  125. and `n_features` is the number of features.
  126. y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
  127. default=None
  128. Target values (None for unsupervised transformations).
  129. Returns
  130. -------
  131. self : object
  132. Returns the instance itself.
  133. """
  134. X = self._validate_data(X, accept_sparse="csc")
  135. random_state = check_random_state(self.random_state)
  136. n_features = X.shape[1]
  137. if self.coef0 != 0:
  138. n_features += 1
  139. self.indexHash_ = random_state.randint(
  140. 0, high=self.n_components, size=(self.degree, n_features)
  141. )
  142. self.bitHash_ = random_state.choice(a=[-1, 1], size=(self.degree, n_features))
  143. self._n_features_out = self.n_components
  144. return self
  145. def transform(self, X):
  146. """Generate the feature map approximation for X.
  147. Parameters
  148. ----------
  149. X : {array-like}, shape (n_samples, n_features)
  150. New data, where `n_samples` is the number of samples
  151. and `n_features` is the number of features.
  152. Returns
  153. -------
  154. X_new : array-like, shape (n_samples, n_components)
  155. Returns the instance itself.
  156. """
  157. check_is_fitted(self)
  158. X = self._validate_data(X, accept_sparse="csc", reset=False)
  159. X_gamma = np.sqrt(self.gamma) * X
  160. if sp.issparse(X_gamma) and self.coef0 != 0:
  161. X_gamma = sp.hstack(
  162. [X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))],
  163. format="csc",
  164. )
  165. elif not sp.issparse(X_gamma) and self.coef0 != 0:
  166. X_gamma = np.hstack(
  167. [X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))]
  168. )
  169. if X_gamma.shape[1] != self.indexHash_.shape[1]:
  170. raise ValueError(
  171. "Number of features of test samples does not"
  172. " match that of training samples."
  173. )
  174. count_sketches = np.zeros((X_gamma.shape[0], self.degree, self.n_components))
  175. if sp.issparse(X_gamma):
  176. for j in range(X_gamma.shape[1]):
  177. for d in range(self.degree):
  178. iHashIndex = self.indexHash_[d, j]
  179. iHashBit = self.bitHash_[d, j]
  180. count_sketches[:, d, iHashIndex] += (
  181. (iHashBit * X_gamma[:, j]).toarray().ravel()
  182. )
  183. else:
  184. for j in range(X_gamma.shape[1]):
  185. for d in range(self.degree):
  186. iHashIndex = self.indexHash_[d, j]
  187. iHashBit = self.bitHash_[d, j]
  188. count_sketches[:, d, iHashIndex] += iHashBit * X_gamma[:, j]
  189. # For each same, compute a count sketch of phi(x) using the polynomial
  190. # multiplication (via FFT) of p count sketches of x.
  191. count_sketches_fft = fft(count_sketches, axis=2, overwrite_x=True)
  192. count_sketches_fft_prod = np.prod(count_sketches_fft, axis=1)
  193. data_sketch = np.real(ifft(count_sketches_fft_prod, overwrite_x=True))
  194. return data_sketch
  195. class RBFSampler(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
  196. """Approximate a RBF kernel feature map using random Fourier features.
  197. It implements a variant of Random Kitchen Sinks.[1]
  198. Read more in the :ref:`User Guide <rbf_kernel_approx>`.
  199. Parameters
  200. ----------
  201. gamma : 'scale' or float, default=1.0
  202. Parameter of RBF kernel: exp(-gamma * x^2).
  203. If ``gamma='scale'`` is passed then it uses
  204. 1 / (n_features * X.var()) as value of gamma.
  205. .. versionadded:: 1.2
  206. The option `"scale"` was added in 1.2.
  207. n_components : int, default=100
  208. Number of Monte Carlo samples per original feature.
  209. Equals the dimensionality of the computed feature space.
  210. random_state : int, RandomState instance or None, default=None
  211. Pseudo-random number generator to control the generation of the random
  212. weights and random offset when fitting the training data.
  213. Pass an int for reproducible output across multiple function calls.
  214. See :term:`Glossary <random_state>`.
  215. Attributes
  216. ----------
  217. random_offset_ : ndarray of shape (n_components,), dtype={np.float64, np.float32}
  218. Random offset used to compute the projection in the `n_components`
  219. dimensions of the feature space.
  220. random_weights_ : ndarray of shape (n_features, n_components),\
  221. dtype={np.float64, np.float32}
  222. Random projection directions drawn from the Fourier transform
  223. of the RBF kernel.
  224. n_features_in_ : int
  225. Number of features seen during :term:`fit`.
  226. .. versionadded:: 0.24
  227. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  228. Names of features seen during :term:`fit`. Defined only when `X`
  229. has feature names that are all strings.
  230. .. versionadded:: 1.0
  231. See Also
  232. --------
  233. AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
  234. Nystroem : Approximate a kernel map using a subset of the training data.
  235. PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.
  236. SkewedChi2Sampler : Approximate feature map for
  237. "skewed chi-squared" kernel.
  238. sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
  239. Notes
  240. -----
  241. See "Random Features for Large-Scale Kernel Machines" by A. Rahimi and
  242. Benjamin Recht.
  243. [1] "Weighted Sums of Random Kitchen Sinks: Replacing
  244. minimization with randomization in learning" by A. Rahimi and
  245. Benjamin Recht.
  246. (https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)
  247. Examples
  248. --------
  249. >>> from sklearn.kernel_approximation import RBFSampler
  250. >>> from sklearn.linear_model import SGDClassifier
  251. >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
  252. >>> y = [0, 0, 1, 1]
  253. >>> rbf_feature = RBFSampler(gamma=1, random_state=1)
  254. >>> X_features = rbf_feature.fit_transform(X)
  255. >>> clf = SGDClassifier(max_iter=5, tol=1e-3)
  256. >>> clf.fit(X_features, y)
  257. SGDClassifier(max_iter=5)
  258. >>> clf.score(X_features, y)
  259. 1.0
  260. """
  261. _parameter_constraints: dict = {
  262. "gamma": [
  263. StrOptions({"scale"}),
  264. Interval(Real, 0.0, None, closed="left"),
  265. ],
  266. "n_components": [Interval(Integral, 1, None, closed="left")],
  267. "random_state": ["random_state"],
  268. }
  269. def __init__(self, *, gamma=1.0, n_components=100, random_state=None):
  270. self.gamma = gamma
  271. self.n_components = n_components
  272. self.random_state = random_state
  273. @_fit_context(prefer_skip_nested_validation=True)
  274. def fit(self, X, y=None):
  275. """Fit the model with X.
  276. Samples random projection according to n_features.
  277. Parameters
  278. ----------
  279. X : {array-like, sparse matrix}, shape (n_samples, n_features)
  280. Training data, where `n_samples` is the number of samples
  281. and `n_features` is the number of features.
  282. y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
  283. default=None
  284. Target values (None for unsupervised transformations).
  285. Returns
  286. -------
  287. self : object
  288. Returns the instance itself.
  289. """
  290. X = self._validate_data(X, accept_sparse="csr")
  291. random_state = check_random_state(self.random_state)
  292. n_features = X.shape[1]
  293. sparse = sp.issparse(X)
  294. if self.gamma == "scale":
  295. # var = E[X^2] - E[X]^2 if sparse
  296. X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()
  297. self._gamma = 1.0 / (n_features * X_var) if X_var != 0 else 1.0
  298. else:
  299. self._gamma = self.gamma
  300. self.random_weights_ = (2.0 * self._gamma) ** 0.5 * random_state.normal(
  301. size=(n_features, self.n_components)
  302. )
  303. self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)
  304. if X.dtype == np.float32:
  305. # Setting the data type of the fitted attribute will ensure the
  306. # output data type during `transform`.
  307. self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False)
  308. self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False)
  309. self._n_features_out = self.n_components
  310. return self
  311. def transform(self, X):
  312. """Apply the approximate feature map to X.
  313. Parameters
  314. ----------
  315. X : {array-like, sparse matrix}, shape (n_samples, n_features)
  316. New data, where `n_samples` is the number of samples
  317. and `n_features` is the number of features.
  318. Returns
  319. -------
  320. X_new : array-like, shape (n_samples, n_components)
  321. Returns the instance itself.
  322. """
  323. check_is_fitted(self)
  324. X = self._validate_data(X, accept_sparse="csr", reset=False)
  325. projection = safe_sparse_dot(X, self.random_weights_)
  326. projection += self.random_offset_
  327. np.cos(projection, projection)
  328. projection *= (2.0 / self.n_components) ** 0.5
  329. return projection
  330. def _more_tags(self):
  331. return {"preserves_dtype": [np.float64, np.float32]}
  332. class SkewedChi2Sampler(
  333. ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
  334. ):
  335. """Approximate feature map for "skewed chi-squared" kernel.
  336. Read more in the :ref:`User Guide <skewed_chi_kernel_approx>`.
  337. Parameters
  338. ----------
  339. skewedness : float, default=1.0
  340. "skewedness" parameter of the kernel. Needs to be cross-validated.
  341. n_components : int, default=100
  342. Number of Monte Carlo samples per original feature.
  343. Equals the dimensionality of the computed feature space.
  344. random_state : int, RandomState instance or None, default=None
  345. Pseudo-random number generator to control the generation of the random
  346. weights and random offset when fitting the training data.
  347. Pass an int for reproducible output across multiple function calls.
  348. See :term:`Glossary <random_state>`.
  349. Attributes
  350. ----------
  351. random_weights_ : ndarray of shape (n_features, n_components)
  352. Weight array, sampled from a secant hyperbolic distribution, which will
  353. be used to linearly transform the log of the data.
  354. random_offset_ : ndarray of shape (n_features, n_components)
  355. Bias term, which will be added to the data. It is uniformly distributed
  356. between 0 and 2*pi.
  357. n_features_in_ : int
  358. Number of features seen during :term:`fit`.
  359. .. versionadded:: 0.24
  360. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  361. Names of features seen during :term:`fit`. Defined only when `X`
  362. has feature names that are all strings.
  363. .. versionadded:: 1.0
  364. See Also
  365. --------
  366. AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
  367. Nystroem : Approximate a kernel map using a subset of the training data.
  368. RBFSampler : Approximate a RBF kernel feature map using random Fourier
  369. features.
  370. SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel.
  371. sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.
  372. sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
  373. References
  374. ----------
  375. See "Random Fourier Approximations for Skewed Multiplicative Histogram
  376. Kernels" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.
  377. Examples
  378. --------
  379. >>> from sklearn.kernel_approximation import SkewedChi2Sampler
  380. >>> from sklearn.linear_model import SGDClassifier
  381. >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
  382. >>> y = [0, 0, 1, 1]
  383. >>> chi2_feature = SkewedChi2Sampler(skewedness=.01,
  384. ... n_components=10,
  385. ... random_state=0)
  386. >>> X_features = chi2_feature.fit_transform(X, y)
  387. >>> clf = SGDClassifier(max_iter=10, tol=1e-3)
  388. >>> clf.fit(X_features, y)
  389. SGDClassifier(max_iter=10)
  390. >>> clf.score(X_features, y)
  391. 1.0
  392. """
  393. _parameter_constraints: dict = {
  394. "skewedness": [Interval(Real, None, None, closed="neither")],
  395. "n_components": [Interval(Integral, 1, None, closed="left")],
  396. "random_state": ["random_state"],
  397. }
  398. def __init__(self, *, skewedness=1.0, n_components=100, random_state=None):
  399. self.skewedness = skewedness
  400. self.n_components = n_components
  401. self.random_state = random_state
  402. @_fit_context(prefer_skip_nested_validation=True)
  403. def fit(self, X, y=None):
  404. """Fit the model with X.
  405. Samples random projection according to n_features.
  406. Parameters
  407. ----------
  408. X : array-like, shape (n_samples, n_features)
  409. Training data, where `n_samples` is the number of samples
  410. and `n_features` is the number of features.
  411. y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
  412. default=None
  413. Target values (None for unsupervised transformations).
  414. Returns
  415. -------
  416. self : object
  417. Returns the instance itself.
  418. """
  419. X = self._validate_data(X)
  420. random_state = check_random_state(self.random_state)
  421. n_features = X.shape[1]
  422. uniform = random_state.uniform(size=(n_features, self.n_components))
  423. # transform by inverse CDF of sech
  424. self.random_weights_ = 1.0 / np.pi * np.log(np.tan(np.pi / 2.0 * uniform))
  425. self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components)
  426. if X.dtype == np.float32:
  427. # Setting the data type of the fitted attribute will ensure the
  428. # output data type during `transform`.
  429. self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False)
  430. self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False)
  431. self._n_features_out = self.n_components
  432. return self
  433. def transform(self, X):
  434. """Apply the approximate feature map to X.
  435. Parameters
  436. ----------
  437. X : array-like, shape (n_samples, n_features)
  438. New data, where `n_samples` is the number of samples
  439. and `n_features` is the number of features. All values of X must be
  440. strictly greater than "-skewedness".
  441. Returns
  442. -------
  443. X_new : array-like, shape (n_samples, n_components)
  444. Returns the instance itself.
  445. """
  446. check_is_fitted(self)
  447. X = self._validate_data(
  448. X, copy=True, dtype=[np.float64, np.float32], reset=False
  449. )
  450. if (X <= -self.skewedness).any():
  451. raise ValueError("X may not contain entries smaller than -skewedness.")
  452. X += self.skewedness
  453. np.log(X, X)
  454. projection = safe_sparse_dot(X, self.random_weights_)
  455. projection += self.random_offset_
  456. np.cos(projection, projection)
  457. projection *= np.sqrt(2.0) / np.sqrt(self.n_components)
  458. return projection
  459. def _more_tags(self):
  460. return {"preserves_dtype": [np.float64, np.float32]}
  461. class AdditiveChi2Sampler(TransformerMixin, BaseEstimator):
  462. """Approximate feature map for additive chi2 kernel.
  463. Uses sampling the fourier transform of the kernel characteristic
  464. at regular intervals.
  465. Since the kernel that is to be approximated is additive, the components of
  466. the input vectors can be treated separately. Each entry in the original
  467. space is transformed into 2*sample_steps-1 features, where sample_steps is
  468. a parameter of the method. Typical values of sample_steps include 1, 2 and
  469. 3.
  470. Optimal choices for the sampling interval for certain data ranges can be
  471. computed (see the reference). The default values should be reasonable.
  472. Read more in the :ref:`User Guide <additive_chi_kernel_approx>`.
  473. Parameters
  474. ----------
  475. sample_steps : int, default=2
  476. Gives the number of (complex) sampling points.
  477. sample_interval : float, default=None
  478. Sampling interval. Must be specified when sample_steps not in {1,2,3}.
  479. Attributes
  480. ----------
  481. sample_interval_ : float
  482. Stored sampling interval. Specified as a parameter if `sample_steps`
  483. not in {1,2,3}.
  484. .. deprecated:: 1.3
  485. `sample_interval_` serves internal purposes only and will be removed in 1.5.
  486. n_features_in_ : int
  487. Number of features seen during :term:`fit`.
  488. .. versionadded:: 0.24
  489. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  490. Names of features seen during :term:`fit`. Defined only when `X`
  491. has feature names that are all strings.
  492. .. versionadded:: 1.0
  493. See Also
  494. --------
  495. SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of
  496. the chi squared kernel.
  497. sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.
  498. sklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi
  499. squared kernel.
  500. Notes
  501. -----
  502. This estimator approximates a slightly different version of the additive
  503. chi squared kernel then ``metric.additive_chi2`` computes.
  504. This estimator is stateless and does not need to be fitted. However, we
  505. recommend to call :meth:`fit_transform` instead of :meth:`transform`, as
  506. parameter validation is only performed in :meth:`fit`.
  507. References
  508. ----------
  509. See `"Efficient additive kernels via explicit feature maps"
  510. <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>`_
  511. A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,
  512. 2011
  513. Examples
  514. --------
  515. >>> from sklearn.datasets import load_digits
  516. >>> from sklearn.linear_model import SGDClassifier
  517. >>> from sklearn.kernel_approximation import AdditiveChi2Sampler
  518. >>> X, y = load_digits(return_X_y=True)
  519. >>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)
  520. >>> X_transformed = chi2sampler.fit_transform(X, y)
  521. >>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)
  522. >>> clf.fit(X_transformed, y)
  523. SGDClassifier(max_iter=5, random_state=0)
  524. >>> clf.score(X_transformed, y)
  525. 0.9499...
  526. """
  527. _parameter_constraints: dict = {
  528. "sample_steps": [Interval(Integral, 1, None, closed="left")],
  529. "sample_interval": [Interval(Real, 0, None, closed="left"), None],
  530. }
  531. def __init__(self, *, sample_steps=2, sample_interval=None):
  532. self.sample_steps = sample_steps
  533. self.sample_interval = sample_interval
  534. @_fit_context(prefer_skip_nested_validation=True)
  535. def fit(self, X, y=None):
  536. """Only validates estimator's parameters.
  537. This method allows to: (i) validate the estimator's parameters and
  538. (ii) be consistent with the scikit-learn transformer API.
  539. Parameters
  540. ----------
  541. X : array-like, shape (n_samples, n_features)
  542. Training data, where `n_samples` is the number of samples
  543. and `n_features` is the number of features.
  544. y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
  545. default=None
  546. Target values (None for unsupervised transformations).
  547. Returns
  548. -------
  549. self : object
  550. Returns the transformer.
  551. """
  552. X = self._validate_data(X, accept_sparse="csr")
  553. check_non_negative(X, "X in AdditiveChi2Sampler.fit")
  554. # TODO(1.5): remove the setting of _sample_interval from fit
  555. if self.sample_interval is None:
  556. # See figure 2 c) of "Efficient additive kernels via explicit feature maps"
  557. # <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>
  558. # A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,
  559. # 2011
  560. if self.sample_steps == 1:
  561. self._sample_interval = 0.8
  562. elif self.sample_steps == 2:
  563. self._sample_interval = 0.5
  564. elif self.sample_steps == 3:
  565. self._sample_interval = 0.4
  566. else:
  567. raise ValueError(
  568. "If sample_steps is not in [1, 2, 3],"
  569. " you need to provide sample_interval"
  570. )
  571. else:
  572. self._sample_interval = self.sample_interval
  573. return self
  574. # TODO(1.5): remove
  575. @deprecated( # type: ignore
  576. "The ``sample_interval_`` attribute was deprecated in version 1.3 and "
  577. "will be removed 1.5."
  578. )
  579. @property
  580. def sample_interval_(self):
  581. return self._sample_interval
  582. def transform(self, X):
  583. """Apply approximate feature map to X.
  584. Parameters
  585. ----------
  586. X : {array-like, sparse matrix}, shape (n_samples, n_features)
  587. Training data, where `n_samples` is the number of samples
  588. and `n_features` is the number of features.
  589. Returns
  590. -------
  591. X_new : {ndarray, sparse matrix}, \
  592. shape = (n_samples, n_features * (2*sample_steps - 1))
  593. Whether the return value is an array or sparse matrix depends on
  594. the type of the input X.
  595. """
  596. X = self._validate_data(X, accept_sparse="csr", reset=False)
  597. check_non_negative(X, "X in AdditiveChi2Sampler.transform")
  598. sparse = sp.issparse(X)
  599. if hasattr(self, "_sample_interval"):
  600. # TODO(1.5): remove this branch
  601. sample_interval = self._sample_interval
  602. else:
  603. if self.sample_interval is None:
  604. # See figure 2 c) of "Efficient additive kernels via explicit feature maps" # noqa
  605. # <http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>
  606. # A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence, # noqa
  607. # 2011
  608. if self.sample_steps == 1:
  609. sample_interval = 0.8
  610. elif self.sample_steps == 2:
  611. sample_interval = 0.5
  612. elif self.sample_steps == 3:
  613. sample_interval = 0.4
  614. else:
  615. raise ValueError(
  616. "If sample_steps is not in [1, 2, 3],"
  617. " you need to provide sample_interval"
  618. )
  619. else:
  620. sample_interval = self.sample_interval
  621. # zeroth component
  622. # 1/cosh = sech
  623. # cosh(0) = 1.0
  624. transf = self._transform_sparse if sparse else self._transform_dense
  625. return transf(X, self.sample_steps, sample_interval)
  626. def get_feature_names_out(self, input_features=None):
  627. """Get output feature names for transformation.
  628. Parameters
  629. ----------
  630. input_features : array-like of str or None, default=None
  631. Only used to validate feature names with the names seen in :meth:`fit`.
  632. Returns
  633. -------
  634. feature_names_out : ndarray of str objects
  635. Transformed feature names.
  636. """
  637. check_is_fitted(self, "n_features_in_")
  638. input_features = _check_feature_names_in(
  639. self, input_features, generate_names=True
  640. )
  641. est_name = self.__class__.__name__.lower()
  642. names_list = [f"{est_name}_{name}_sqrt" for name in input_features]
  643. for j in range(1, self.sample_steps):
  644. cos_names = [f"{est_name}_{name}_cos{j}" for name in input_features]
  645. sin_names = [f"{est_name}_{name}_sin{j}" for name in input_features]
  646. names_list.extend(cos_names + sin_names)
  647. return np.asarray(names_list, dtype=object)
  648. @staticmethod
  649. def _transform_dense(X, sample_steps, sample_interval):
  650. non_zero = X != 0.0
  651. X_nz = X[non_zero]
  652. X_step = np.zeros_like(X)
  653. X_step[non_zero] = np.sqrt(X_nz * sample_interval)
  654. X_new = [X_step]
  655. log_step_nz = sample_interval * np.log(X_nz)
  656. step_nz = 2 * X_nz * sample_interval
  657. for j in range(1, sample_steps):
  658. factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * sample_interval))
  659. X_step = np.zeros_like(X)
  660. X_step[non_zero] = factor_nz * np.cos(j * log_step_nz)
  661. X_new.append(X_step)
  662. X_step = np.zeros_like(X)
  663. X_step[non_zero] = factor_nz * np.sin(j * log_step_nz)
  664. X_new.append(X_step)
  665. return np.hstack(X_new)
  666. @staticmethod
  667. def _transform_sparse(X, sample_steps, sample_interval):
  668. indices = X.indices.copy()
  669. indptr = X.indptr.copy()
  670. data_step = np.sqrt(X.data * sample_interval)
  671. X_step = sp.csr_matrix(
  672. (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False
  673. )
  674. X_new = [X_step]
  675. log_step_nz = sample_interval * np.log(X.data)
  676. step_nz = 2 * X.data * sample_interval
  677. for j in range(1, sample_steps):
  678. factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * sample_interval))
  679. data_step = factor_nz * np.cos(j * log_step_nz)
  680. X_step = sp.csr_matrix(
  681. (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False
  682. )
  683. X_new.append(X_step)
  684. data_step = factor_nz * np.sin(j * log_step_nz)
  685. X_step = sp.csr_matrix(
  686. (data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False
  687. )
  688. X_new.append(X_step)
  689. return sp.hstack(X_new)
  690. def _more_tags(self):
  691. return {"stateless": True, "requires_positive_X": True}
  692. class Nystroem(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
  693. """Approximate a kernel map using a subset of the training data.
  694. Constructs an approximate feature map for an arbitrary kernel
  695. using a subset of the data as basis.
  696. Read more in the :ref:`User Guide <nystroem_kernel_approx>`.
  697. .. versionadded:: 0.13
  698. Parameters
  699. ----------
  700. kernel : str or callable, default='rbf'
  701. Kernel map to be approximated. A callable should accept two arguments
  702. and the keyword arguments passed to this object as `kernel_params`, and
  703. should return a floating point number.
  704. gamma : float, default=None
  705. Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
  706. and sigmoid kernels. Interpretation of the default value is left to
  707. the kernel; see the documentation for sklearn.metrics.pairwise.
  708. Ignored by other kernels.
  709. coef0 : float, default=None
  710. Zero coefficient for polynomial and sigmoid kernels.
  711. Ignored by other kernels.
  712. degree : float, default=None
  713. Degree of the polynomial kernel. Ignored by other kernels.
  714. kernel_params : dict, default=None
  715. Additional parameters (keyword arguments) for kernel function passed
  716. as callable object.
  717. n_components : int, default=100
  718. Number of features to construct.
  719. How many data points will be used to construct the mapping.
  720. random_state : int, RandomState instance or None, default=None
  721. Pseudo-random number generator to control the uniform sampling without
  722. replacement of `n_components` of the training data to construct the
  723. basis kernel.
  724. Pass an int for reproducible output across multiple function calls.
  725. See :term:`Glossary <random_state>`.
  726. n_jobs : int, default=None
  727. The number of jobs to use for the computation. This works by breaking
  728. down the kernel matrix into `n_jobs` even slices and computing them in
  729. parallel.
  730. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
  731. ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
  732. for more details.
  733. .. versionadded:: 0.24
  734. Attributes
  735. ----------
  736. components_ : ndarray of shape (n_components, n_features)
  737. Subset of training points used to construct the feature map.
  738. component_indices_ : ndarray of shape (n_components)
  739. Indices of ``components_`` in the training set.
  740. normalization_ : ndarray of shape (n_components, n_components)
  741. Normalization matrix needed for embedding.
  742. Square root of the kernel matrix on ``components_``.
  743. n_features_in_ : int
  744. Number of features seen during :term:`fit`.
  745. .. versionadded:: 0.24
  746. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  747. Names of features seen during :term:`fit`. Defined only when `X`
  748. has feature names that are all strings.
  749. .. versionadded:: 1.0
  750. See Also
  751. --------
  752. AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
  753. PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.
  754. RBFSampler : Approximate a RBF kernel feature map using random Fourier
  755. features.
  756. SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel.
  757. sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
  758. References
  759. ----------
  760. * Williams, C.K.I. and Seeger, M.
  761. "Using the Nystroem method to speed up kernel machines",
  762. Advances in neural information processing systems 2001
  763. * T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou
  764. "Nystroem Method vs Random Fourier Features: A Theoretical and Empirical
  765. Comparison",
  766. Advances in Neural Information Processing Systems 2012
  767. Examples
  768. --------
  769. >>> from sklearn import datasets, svm
  770. >>> from sklearn.kernel_approximation import Nystroem
  771. >>> X, y = datasets.load_digits(n_class=9, return_X_y=True)
  772. >>> data = X / 16.
  773. >>> clf = svm.LinearSVC(dual="auto")
  774. >>> feature_map_nystroem = Nystroem(gamma=.2,
  775. ... random_state=1,
  776. ... n_components=300)
  777. >>> data_transformed = feature_map_nystroem.fit_transform(data)
  778. >>> clf.fit(data_transformed, y)
  779. LinearSVC(dual='auto')
  780. >>> clf.score(data_transformed, y)
  781. 0.9987...
  782. """
  783. _parameter_constraints: dict = {
  784. "kernel": [
  785. StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {"precomputed"}),
  786. callable,
  787. ],
  788. "gamma": [Interval(Real, 0, None, closed="left"), None],
  789. "coef0": [Interval(Real, None, None, closed="neither"), None],
  790. "degree": [Interval(Real, 1, None, closed="left"), None],
  791. "kernel_params": [dict, None],
  792. "n_components": [Interval(Integral, 1, None, closed="left")],
  793. "random_state": ["random_state"],
  794. "n_jobs": [Integral, None],
  795. }
  796. def __init__(
  797. self,
  798. kernel="rbf",
  799. *,
  800. gamma=None,
  801. coef0=None,
  802. degree=None,
  803. kernel_params=None,
  804. n_components=100,
  805. random_state=None,
  806. n_jobs=None,
  807. ):
  808. self.kernel = kernel
  809. self.gamma = gamma
  810. self.coef0 = coef0
  811. self.degree = degree
  812. self.kernel_params = kernel_params
  813. self.n_components = n_components
  814. self.random_state = random_state
  815. self.n_jobs = n_jobs
  816. @_fit_context(prefer_skip_nested_validation=True)
  817. def fit(self, X, y=None):
  818. """Fit estimator to data.
  819. Samples a subset of training points, computes kernel
  820. on these and computes normalization matrix.
  821. Parameters
  822. ----------
  823. X : array-like, shape (n_samples, n_features)
  824. Training data, where `n_samples` is the number of samples
  825. and `n_features` is the number of features.
  826. y : array-like, shape (n_samples,) or (n_samples, n_outputs), \
  827. default=None
  828. Target values (None for unsupervised transformations).
  829. Returns
  830. -------
  831. self : object
  832. Returns the instance itself.
  833. """
  834. X = self._validate_data(X, accept_sparse="csr")
  835. rnd = check_random_state(self.random_state)
  836. n_samples = X.shape[0]
  837. # get basis vectors
  838. if self.n_components > n_samples:
  839. # XXX should we just bail?
  840. n_components = n_samples
  841. warnings.warn(
  842. "n_components > n_samples. This is not possible.\n"
  843. "n_components was set to n_samples, which results"
  844. " in inefficient evaluation of the full kernel."
  845. )
  846. else:
  847. n_components = self.n_components
  848. n_components = min(n_samples, n_components)
  849. inds = rnd.permutation(n_samples)
  850. basis_inds = inds[:n_components]
  851. basis = X[basis_inds]
  852. basis_kernel = pairwise_kernels(
  853. basis,
  854. metric=self.kernel,
  855. filter_params=True,
  856. n_jobs=self.n_jobs,
  857. **self._get_kernel_params(),
  858. )
  859. # sqrt of kernel matrix on basis vectors
  860. U, S, V = svd(basis_kernel)
  861. S = np.maximum(S, 1e-12)
  862. self.normalization_ = np.dot(U / np.sqrt(S), V)
  863. self.components_ = basis
  864. self.component_indices_ = basis_inds
  865. self._n_features_out = n_components
  866. return self
  867. def transform(self, X):
  868. """Apply feature map to X.
  869. Computes an approximate feature map using the kernel
  870. between some training points and X.
  871. Parameters
  872. ----------
  873. X : array-like of shape (n_samples, n_features)
  874. Data to transform.
  875. Returns
  876. -------
  877. X_transformed : ndarray of shape (n_samples, n_components)
  878. Transformed data.
  879. """
  880. check_is_fitted(self)
  881. X = self._validate_data(X, accept_sparse="csr", reset=False)
  882. kernel_params = self._get_kernel_params()
  883. embedded = pairwise_kernels(
  884. X,
  885. self.components_,
  886. metric=self.kernel,
  887. filter_params=True,
  888. n_jobs=self.n_jobs,
  889. **kernel_params,
  890. )
  891. return np.dot(embedded, self.normalization_.T)
  892. def _get_kernel_params(self):
  893. params = self.kernel_params
  894. if params is None:
  895. params = {}
  896. if not callable(self.kernel) and self.kernel != "precomputed":
  897. for param in KERNEL_PARAMS[self.kernel]:
  898. if getattr(self, param) is not None:
  899. params[param] = getattr(self, param)
  900. else:
  901. if (
  902. self.gamma is not None
  903. or self.coef0 is not None
  904. or self.degree is not None
  905. ):
  906. raise ValueError(
  907. "Don't pass gamma, coef0 or degree to "
  908. "Nystroem if using a callable "
  909. "or precomputed kernel"
  910. )
  911. return params
  912. def _more_tags(self):
  913. return {
  914. "_xfail_checks": {
  915. "check_transformer_preserve_dtypes": (
  916. "dtypes are preserved but not at a close enough precision"
  917. )
  918. },
  919. "preserves_dtype": [np.float64, np.float32],
  920. }