test_kernel_approximation.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. import re
  2. import numpy as np
  3. import pytest
  4. from scipy.sparse import csr_matrix
  5. from sklearn.datasets import make_classification
  6. from sklearn.kernel_approximation import (
  7. AdditiveChi2Sampler,
  8. Nystroem,
  9. PolynomialCountSketch,
  10. RBFSampler,
  11. SkewedChi2Sampler,
  12. )
  13. from sklearn.metrics.pairwise import (
  14. chi2_kernel,
  15. kernel_metrics,
  16. polynomial_kernel,
  17. rbf_kernel,
  18. )
  19. from sklearn.utils._testing import (
  20. assert_allclose,
  21. assert_array_almost_equal,
  22. assert_array_equal,
  23. )
  24. # generate data
  25. rng = np.random.RandomState(0)
  26. X = rng.random_sample(size=(300, 50))
  27. Y = rng.random_sample(size=(300, 50))
  28. X /= X.sum(axis=1)[:, np.newaxis]
  29. Y /= Y.sum(axis=1)[:, np.newaxis]
  30. @pytest.mark.parametrize("gamma", [0.1, 1, 2.5])
  31. @pytest.mark.parametrize("degree, n_components", [(1, 500), (2, 500), (3, 5000)])
  32. @pytest.mark.parametrize("coef0", [0, 2.5])
  33. def test_polynomial_count_sketch(gamma, degree, coef0, n_components):
  34. # test that PolynomialCountSketch approximates polynomial
  35. # kernel on random data
  36. # compute exact kernel
  37. kernel = polynomial_kernel(X, Y, gamma=gamma, degree=degree, coef0=coef0)
  38. # approximate kernel mapping
  39. ps_transform = PolynomialCountSketch(
  40. n_components=n_components,
  41. gamma=gamma,
  42. coef0=coef0,
  43. degree=degree,
  44. random_state=42,
  45. )
  46. X_trans = ps_transform.fit_transform(X)
  47. Y_trans = ps_transform.transform(Y)
  48. kernel_approx = np.dot(X_trans, Y_trans.T)
  49. error = kernel - kernel_approx
  50. assert np.abs(np.mean(error)) <= 0.05 # close to unbiased
  51. np.abs(error, out=error)
  52. assert np.max(error) <= 0.1 # nothing too far off
  53. assert np.mean(error) <= 0.05 # mean is fairly close
  54. @pytest.mark.parametrize("gamma", [0.1, 1.0])
  55. @pytest.mark.parametrize("degree", [1, 2, 3])
  56. @pytest.mark.parametrize("coef0", [0, 2.5])
  57. def test_polynomial_count_sketch_dense_sparse(gamma, degree, coef0):
  58. """Check that PolynomialCountSketch results are the same for dense and sparse
  59. input.
  60. """
  61. ps_dense = PolynomialCountSketch(
  62. n_components=500, gamma=gamma, degree=degree, coef0=coef0, random_state=42
  63. )
  64. Xt_dense = ps_dense.fit_transform(X)
  65. Yt_dense = ps_dense.transform(Y)
  66. ps_sparse = PolynomialCountSketch(
  67. n_components=500, gamma=gamma, degree=degree, coef0=coef0, random_state=42
  68. )
  69. Xt_sparse = ps_sparse.fit_transform(csr_matrix(X))
  70. Yt_sparse = ps_sparse.transform(csr_matrix(Y))
  71. assert_allclose(Xt_dense, Xt_sparse)
  72. assert_allclose(Yt_dense, Yt_sparse)
  73. def _linear_kernel(X, Y):
  74. return np.dot(X, Y.T)
  75. def test_additive_chi2_sampler():
  76. # test that AdditiveChi2Sampler approximates kernel on random data
  77. # compute exact kernel
  78. # abbreviations for easier formula
  79. X_ = X[:, np.newaxis, :]
  80. Y_ = Y[np.newaxis, :, :]
  81. large_kernel = 2 * X_ * Y_ / (X_ + Y_)
  82. # reduce to n_samples_x x n_samples_y by summing over features
  83. kernel = large_kernel.sum(axis=2)
  84. # approximate kernel mapping
  85. transform = AdditiveChi2Sampler(sample_steps=3)
  86. X_trans = transform.fit_transform(X)
  87. Y_trans = transform.transform(Y)
  88. kernel_approx = np.dot(X_trans, Y_trans.T)
  89. assert_array_almost_equal(kernel, kernel_approx, 1)
  90. X_sp_trans = transform.fit_transform(csr_matrix(X))
  91. Y_sp_trans = transform.transform(csr_matrix(Y))
  92. assert_array_equal(X_trans, X_sp_trans.toarray())
  93. assert_array_equal(Y_trans, Y_sp_trans.toarray())
  94. # test error is raised on negative input
  95. Y_neg = Y.copy()
  96. Y_neg[0, 0] = -1
  97. msg = "Negative values in data passed to"
  98. with pytest.raises(ValueError, match=msg):
  99. transform.fit(Y_neg)
  100. @pytest.mark.parametrize("method", ["fit", "fit_transform", "transform"])
  101. @pytest.mark.parametrize("sample_steps", range(1, 4))
  102. def test_additive_chi2_sampler_sample_steps(method, sample_steps):
  103. """Check that the input sample step doesn't raise an error
  104. and that sample interval doesn't change after fit.
  105. """
  106. transformer = AdditiveChi2Sampler(sample_steps=sample_steps)
  107. getattr(transformer, method)(X)
  108. sample_interval = 0.5
  109. transformer = AdditiveChi2Sampler(
  110. sample_steps=sample_steps,
  111. sample_interval=sample_interval,
  112. )
  113. getattr(transformer, method)(X)
  114. transformer.sample_interval == sample_interval
  115. # TODO(1.5): remove
  116. def test_additive_chi2_sampler_future_warnings():
  117. """Check that we raise a FutureWarning when accessing to `sample_interval_`."""
  118. transformer = AdditiveChi2Sampler()
  119. transformer.fit(X)
  120. msg = re.escape(
  121. "The ``sample_interval_`` attribute was deprecated in version 1.3 and "
  122. "will be removed 1.5."
  123. )
  124. with pytest.warns(FutureWarning, match=msg):
  125. assert transformer.sample_interval_ is not None
  126. @pytest.mark.parametrize("method", ["fit", "fit_transform", "transform"])
  127. def test_additive_chi2_sampler_wrong_sample_steps(method):
  128. """Check that we raise a ValueError on invalid sample_steps"""
  129. transformer = AdditiveChi2Sampler(sample_steps=4)
  130. msg = re.escape(
  131. "If sample_steps is not in [1, 2, 3], you need to provide sample_interval"
  132. )
  133. with pytest.raises(ValueError, match=msg):
  134. getattr(transformer, method)(X)
  135. def test_skewed_chi2_sampler():
  136. # test that RBFSampler approximates kernel on random data
  137. # compute exact kernel
  138. c = 0.03
  139. # set on negative component but greater than c to ensure that the kernel
  140. # approximation is valid on the group (-c; +\infty) endowed with the skewed
  141. # multiplication.
  142. Y[0, 0] = -c / 2.0
  143. # abbreviations for easier formula
  144. X_c = (X + c)[:, np.newaxis, :]
  145. Y_c = (Y + c)[np.newaxis, :, :]
  146. # we do it in log-space in the hope that it's more stable
  147. # this array is n_samples_x x n_samples_y big x n_features
  148. log_kernel = (
  149. (np.log(X_c) / 2.0) + (np.log(Y_c) / 2.0) + np.log(2.0) - np.log(X_c + Y_c)
  150. )
  151. # reduce to n_samples_x x n_samples_y by summing over features in log-space
  152. kernel = np.exp(log_kernel.sum(axis=2))
  153. # approximate kernel mapping
  154. transform = SkewedChi2Sampler(skewedness=c, n_components=1000, random_state=42)
  155. X_trans = transform.fit_transform(X)
  156. Y_trans = transform.transform(Y)
  157. kernel_approx = np.dot(X_trans, Y_trans.T)
  158. assert_array_almost_equal(kernel, kernel_approx, 1)
  159. assert np.isfinite(kernel).all(), "NaNs found in the Gram matrix"
  160. assert np.isfinite(kernel_approx).all(), "NaNs found in the approximate Gram matrix"
  161. # test error is raised on when inputs contains values smaller than -c
  162. Y_neg = Y.copy()
  163. Y_neg[0, 0] = -c * 2.0
  164. msg = "X may not contain entries smaller than -skewedness"
  165. with pytest.raises(ValueError, match=msg):
  166. transform.transform(Y_neg)
  167. def test_additive_chi2_sampler_exceptions():
  168. """Ensures correct error message"""
  169. transformer = AdditiveChi2Sampler()
  170. X_neg = X.copy()
  171. X_neg[0, 0] = -1
  172. with pytest.raises(ValueError, match="X in AdditiveChi2Sampler.fit"):
  173. transformer.fit(X_neg)
  174. with pytest.raises(ValueError, match="X in AdditiveChi2Sampler.transform"):
  175. transformer.fit(X)
  176. transformer.transform(X_neg)
  177. def test_rbf_sampler():
  178. # test that RBFSampler approximates kernel on random data
  179. # compute exact kernel
  180. gamma = 10.0
  181. kernel = rbf_kernel(X, Y, gamma=gamma)
  182. # approximate kernel mapping
  183. rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
  184. X_trans = rbf_transform.fit_transform(X)
  185. Y_trans = rbf_transform.transform(Y)
  186. kernel_approx = np.dot(X_trans, Y_trans.T)
  187. error = kernel - kernel_approx
  188. assert np.abs(np.mean(error)) <= 0.01 # close to unbiased
  189. np.abs(error, out=error)
  190. assert np.max(error) <= 0.1 # nothing too far off
  191. assert np.mean(error) <= 0.05 # mean is fairly close
  192. def test_rbf_sampler_fitted_attributes_dtype(global_dtype):
  193. """Check that the fitted attributes are stored accordingly to the
  194. data type of X."""
  195. rbf = RBFSampler()
  196. X = np.array([[1, 2], [3, 4], [5, 6]], dtype=global_dtype)
  197. rbf.fit(X)
  198. assert rbf.random_offset_.dtype == global_dtype
  199. assert rbf.random_weights_.dtype == global_dtype
  200. def test_rbf_sampler_dtype_equivalence():
  201. """Check the equivalence of the results with 32 and 64 bits input."""
  202. rbf32 = RBFSampler(random_state=42)
  203. X32 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
  204. rbf32.fit(X32)
  205. rbf64 = RBFSampler(random_state=42)
  206. X64 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
  207. rbf64.fit(X64)
  208. assert_allclose(rbf32.random_offset_, rbf64.random_offset_)
  209. assert_allclose(rbf32.random_weights_, rbf64.random_weights_)
  210. def test_rbf_sampler_gamma_scale():
  211. """Check the inner value computed when `gamma='scale'`."""
  212. X, y = [[0.0], [1.0]], [0, 1]
  213. rbf = RBFSampler(gamma="scale")
  214. rbf.fit(X, y)
  215. assert rbf._gamma == pytest.approx(4)
  216. def test_skewed_chi2_sampler_fitted_attributes_dtype(global_dtype):
  217. """Check that the fitted attributes are stored accordingly to the
  218. data type of X."""
  219. skewed_chi2_sampler = SkewedChi2Sampler()
  220. X = np.array([[1, 2], [3, 4], [5, 6]], dtype=global_dtype)
  221. skewed_chi2_sampler.fit(X)
  222. assert skewed_chi2_sampler.random_offset_.dtype == global_dtype
  223. assert skewed_chi2_sampler.random_weights_.dtype == global_dtype
  224. def test_skewed_chi2_sampler_dtype_equivalence():
  225. """Check the equivalence of the results with 32 and 64 bits input."""
  226. skewed_chi2_sampler_32 = SkewedChi2Sampler(random_state=42)
  227. X_32 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32)
  228. skewed_chi2_sampler_32.fit(X_32)
  229. skewed_chi2_sampler_64 = SkewedChi2Sampler(random_state=42)
  230. X_64 = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float64)
  231. skewed_chi2_sampler_64.fit(X_64)
  232. assert_allclose(
  233. skewed_chi2_sampler_32.random_offset_, skewed_chi2_sampler_64.random_offset_
  234. )
  235. assert_allclose(
  236. skewed_chi2_sampler_32.random_weights_, skewed_chi2_sampler_64.random_weights_
  237. )
  238. def test_input_validation():
  239. # Regression test: kernel approx. transformers should work on lists
  240. # No assertions; the old versions would simply crash
  241. X = [[1, 2], [3, 4], [5, 6]]
  242. AdditiveChi2Sampler().fit(X).transform(X)
  243. SkewedChi2Sampler().fit(X).transform(X)
  244. RBFSampler().fit(X).transform(X)
  245. X = csr_matrix(X)
  246. RBFSampler().fit(X).transform(X)
  247. def test_nystroem_approximation():
  248. # some basic tests
  249. rnd = np.random.RandomState(0)
  250. X = rnd.uniform(size=(10, 4))
  251. # With n_components = n_samples this is exact
  252. X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X)
  253. K = rbf_kernel(X)
  254. assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
  255. trans = Nystroem(n_components=2, random_state=rnd)
  256. X_transformed = trans.fit(X).transform(X)
  257. assert X_transformed.shape == (X.shape[0], 2)
  258. # test callable kernel
  259. trans = Nystroem(n_components=2, kernel=_linear_kernel, random_state=rnd)
  260. X_transformed = trans.fit(X).transform(X)
  261. assert X_transformed.shape == (X.shape[0], 2)
  262. # test that available kernels fit and transform
  263. kernels_available = kernel_metrics()
  264. for kern in kernels_available:
  265. trans = Nystroem(n_components=2, kernel=kern, random_state=rnd)
  266. X_transformed = trans.fit(X).transform(X)
  267. assert X_transformed.shape == (X.shape[0], 2)
  268. def test_nystroem_default_parameters():
  269. rnd = np.random.RandomState(42)
  270. X = rnd.uniform(size=(10, 4))
  271. # rbf kernel should behave as gamma=None by default
  272. # aka gamma = 1 / n_features
  273. nystroem = Nystroem(n_components=10)
  274. X_transformed = nystroem.fit_transform(X)
  275. K = rbf_kernel(X, gamma=None)
  276. K2 = np.dot(X_transformed, X_transformed.T)
  277. assert_array_almost_equal(K, K2)
  278. # chi2 kernel should behave as gamma=1 by default
  279. nystroem = Nystroem(kernel="chi2", n_components=10)
  280. X_transformed = nystroem.fit_transform(X)
  281. K = chi2_kernel(X, gamma=1)
  282. K2 = np.dot(X_transformed, X_transformed.T)
  283. assert_array_almost_equal(K, K2)
  284. def test_nystroem_singular_kernel():
  285. # test that nystroem works with singular kernel matrix
  286. rng = np.random.RandomState(0)
  287. X = rng.rand(10, 20)
  288. X = np.vstack([X] * 2) # duplicate samples
  289. gamma = 100
  290. N = Nystroem(gamma=gamma, n_components=X.shape[0]).fit(X)
  291. X_transformed = N.transform(X)
  292. K = rbf_kernel(X, gamma=gamma)
  293. assert_array_almost_equal(K, np.dot(X_transformed, X_transformed.T))
  294. assert np.all(np.isfinite(Y))
  295. def test_nystroem_poly_kernel_params():
  296. # Non-regression: Nystroem should pass other parameters beside gamma.
  297. rnd = np.random.RandomState(37)
  298. X = rnd.uniform(size=(10, 4))
  299. K = polynomial_kernel(X, degree=3.1, coef0=0.1)
  300. nystroem = Nystroem(
  301. kernel="polynomial", n_components=X.shape[0], degree=3.1, coef0=0.1
  302. )
  303. X_transformed = nystroem.fit_transform(X)
  304. assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
  305. def test_nystroem_callable():
  306. # Test Nystroem on a callable.
  307. rnd = np.random.RandomState(42)
  308. n_samples = 10
  309. X = rnd.uniform(size=(n_samples, 4))
  310. def logging_histogram_kernel(x, y, log):
  311. """Histogram kernel that writes to a log."""
  312. log.append(1)
  313. return np.minimum(x, y).sum()
  314. kernel_log = []
  315. X = list(X) # test input validation
  316. Nystroem(
  317. kernel=logging_histogram_kernel,
  318. n_components=(n_samples - 1),
  319. kernel_params={"log": kernel_log},
  320. ).fit(X)
  321. assert len(kernel_log) == n_samples * (n_samples - 1) / 2
  322. # if degree, gamma or coef0 is passed, we raise a ValueError
  323. msg = "Don't pass gamma, coef0 or degree to Nystroem"
  324. params = ({"gamma": 1}, {"coef0": 1}, {"degree": 2})
  325. for param in params:
  326. ny = Nystroem(kernel=_linear_kernel, n_components=(n_samples - 1), **param)
  327. with pytest.raises(ValueError, match=msg):
  328. ny.fit(X)
  329. def test_nystroem_precomputed_kernel():
  330. # Non-regression: test Nystroem on precomputed kernel.
  331. # PR - 14706
  332. rnd = np.random.RandomState(12)
  333. X = rnd.uniform(size=(10, 4))
  334. K = polynomial_kernel(X, degree=2, coef0=0.1)
  335. nystroem = Nystroem(kernel="precomputed", n_components=X.shape[0])
  336. X_transformed = nystroem.fit_transform(K)
  337. assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
  338. # if degree, gamma or coef0 is passed, we raise a ValueError
  339. msg = "Don't pass gamma, coef0 or degree to Nystroem"
  340. params = ({"gamma": 1}, {"coef0": 1}, {"degree": 2})
  341. for param in params:
  342. ny = Nystroem(kernel="precomputed", n_components=X.shape[0], **param)
  343. with pytest.raises(ValueError, match=msg):
  344. ny.fit(K)
  345. def test_nystroem_component_indices():
  346. """Check that `component_indices_` corresponds to the subset of
  347. training points used to construct the feature map.
  348. Non-regression test for:
  349. https://github.com/scikit-learn/scikit-learn/issues/20474
  350. """
  351. X, _ = make_classification(n_samples=100, n_features=20)
  352. feature_map_nystroem = Nystroem(
  353. n_components=10,
  354. random_state=0,
  355. )
  356. feature_map_nystroem.fit(X)
  357. assert feature_map_nystroem.component_indices_.shape == (10,)
  358. @pytest.mark.parametrize(
  359. "Estimator", [PolynomialCountSketch, RBFSampler, SkewedChi2Sampler, Nystroem]
  360. )
  361. def test_get_feature_names_out(Estimator):
  362. """Check get_feature_names_out"""
  363. est = Estimator().fit(X)
  364. X_trans = est.transform(X)
  365. names_out = est.get_feature_names_out()
  366. class_name = Estimator.__name__.lower()
  367. expected_names = [f"{class_name}{i}" for i in range(X_trans.shape[1])]
  368. assert_array_equal(names_out, expected_names)
  369. def test_additivechi2sampler_get_feature_names_out():
  370. """Check get_feature_names_out for AdditiveChi2Sampler."""
  371. rng = np.random.RandomState(0)
  372. X = rng.random_sample(size=(300, 3))
  373. chi2_sampler = AdditiveChi2Sampler(sample_steps=3).fit(X)
  374. input_names = ["f0", "f1", "f2"]
  375. suffixes = [
  376. "f0_sqrt",
  377. "f1_sqrt",
  378. "f2_sqrt",
  379. "f0_cos1",
  380. "f1_cos1",
  381. "f2_cos1",
  382. "f0_sin1",
  383. "f1_sin1",
  384. "f2_sin1",
  385. "f0_cos2",
  386. "f1_cos2",
  387. "f2_cos2",
  388. "f0_sin2",
  389. "f1_sin2",
  390. "f2_sin2",
  391. ]
  392. names_out = chi2_sampler.get_feature_names_out(input_features=input_names)
  393. expected_names = [f"additivechi2sampler_{suffix}" for suffix in suffixes]
  394. assert_array_equal(names_out, expected_names)