test_gpc.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. """Testing for Gaussian process classification """
  2. # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
  3. # License: BSD 3 clause
  4. import warnings
  5. import numpy as np
  6. import pytest
  7. from scipy.optimize import approx_fprime
  8. from sklearn.exceptions import ConvergenceWarning
  9. from sklearn.gaussian_process import GaussianProcessClassifier
  10. from sklearn.gaussian_process.kernels import (
  11. RBF,
  12. CompoundKernel,
  13. WhiteKernel,
  14. )
  15. from sklearn.gaussian_process.kernels import (
  16. ConstantKernel as C,
  17. )
  18. from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel
  19. from sklearn.utils._testing import assert_almost_equal, assert_array_equal
  20. def f(x):
  21. return np.sin(x)
  22. X = np.atleast_2d(np.linspace(0, 10, 30)).T
  23. X2 = np.atleast_2d([2.0, 4.0, 5.5, 6.5, 7.5]).T
  24. y = np.array(f(X).ravel() > 0, dtype=int)
  25. fX = f(X).ravel()
  26. y_mc = np.empty(y.shape, dtype=int) # multi-class
  27. y_mc[fX < -0.35] = 0
  28. y_mc[(fX >= -0.35) & (fX < 0.35)] = 1
  29. y_mc[fX > 0.35] = 2
  30. fixed_kernel = RBF(length_scale=1.0, length_scale_bounds="fixed")
  31. kernels = [
  32. RBF(length_scale=0.1),
  33. fixed_kernel,
  34. RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
  35. C(1.0, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
  36. ]
  37. non_fixed_kernels = [kernel for kernel in kernels if kernel != fixed_kernel]
  38. @pytest.mark.parametrize("kernel", kernels)
  39. def test_predict_consistent(kernel):
  40. # Check binary predict decision has also predicted probability above 0.5.
  41. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
  42. assert_array_equal(gpc.predict(X), gpc.predict_proba(X)[:, 1] >= 0.5)
  43. def test_predict_consistent_structured():
  44. # Check binary predict decision has also predicted probability above 0.5.
  45. X = ["A", "AB", "B"]
  46. y = np.array([True, False, True])
  47. kernel = MiniSeqKernel(baseline_similarity_bounds="fixed")
  48. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
  49. assert_array_equal(gpc.predict(X), gpc.predict_proba(X)[:, 1] >= 0.5)
  50. @pytest.mark.parametrize("kernel", non_fixed_kernels)
  51. def test_lml_improving(kernel):
  52. # Test that hyperparameter-tuning improves log-marginal likelihood.
  53. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
  54. assert gpc.log_marginal_likelihood(gpc.kernel_.theta) > gpc.log_marginal_likelihood(
  55. kernel.theta
  56. )
  57. @pytest.mark.parametrize("kernel", kernels)
  58. def test_lml_precomputed(kernel):
  59. # Test that lml of optimized kernel is stored correctly.
  60. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
  61. assert_almost_equal(
  62. gpc.log_marginal_likelihood(gpc.kernel_.theta), gpc.log_marginal_likelihood(), 7
  63. )
  64. @pytest.mark.parametrize("kernel", kernels)
  65. def test_lml_without_cloning_kernel(kernel):
  66. # Test that clone_kernel=False has side-effects of kernel.theta.
  67. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
  68. input_theta = np.ones(gpc.kernel_.theta.shape, dtype=np.float64)
  69. gpc.log_marginal_likelihood(input_theta, clone_kernel=False)
  70. assert_almost_equal(gpc.kernel_.theta, input_theta, 7)
  71. @pytest.mark.parametrize("kernel", non_fixed_kernels)
  72. def test_converged_to_local_maximum(kernel):
  73. # Test that we are in local maximum after hyperparameter-optimization.
  74. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
  75. lml, lml_gradient = gpc.log_marginal_likelihood(gpc.kernel_.theta, True)
  76. assert np.all(
  77. (np.abs(lml_gradient) < 1e-4)
  78. | (gpc.kernel_.theta == gpc.kernel_.bounds[:, 0])
  79. | (gpc.kernel_.theta == gpc.kernel_.bounds[:, 1])
  80. )
  81. @pytest.mark.parametrize("kernel", kernels)
  82. def test_lml_gradient(kernel):
  83. # Compare analytic and numeric gradient of log marginal likelihood.
  84. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
  85. lml, lml_gradient = gpc.log_marginal_likelihood(kernel.theta, True)
  86. lml_gradient_approx = approx_fprime(
  87. kernel.theta, lambda theta: gpc.log_marginal_likelihood(theta, False), 1e-10
  88. )
  89. assert_almost_equal(lml_gradient, lml_gradient_approx, 3)
  90. def test_random_starts(global_random_seed):
  91. # Test that an increasing number of random-starts of GP fitting only
  92. # increases the log marginal likelihood of the chosen theta.
  93. n_samples, n_features = 25, 2
  94. rng = np.random.RandomState(global_random_seed)
  95. X = rng.randn(n_samples, n_features) * 2 - 1
  96. y = (np.sin(X).sum(axis=1) + np.sin(3 * X).sum(axis=1)) > 0
  97. kernel = C(1.0, (1e-2, 1e2)) * RBF(
  98. length_scale=[1e-3] * n_features, length_scale_bounds=[(1e-4, 1e2)] * n_features
  99. )
  100. last_lml = -np.inf
  101. for n_restarts_optimizer in range(5):
  102. gp = GaussianProcessClassifier(
  103. kernel=kernel,
  104. n_restarts_optimizer=n_restarts_optimizer,
  105. random_state=global_random_seed,
  106. ).fit(X, y)
  107. lml = gp.log_marginal_likelihood(gp.kernel_.theta)
  108. assert lml > last_lml - np.finfo(np.float32).eps
  109. last_lml = lml
  110. @pytest.mark.parametrize("kernel", non_fixed_kernels)
  111. def test_custom_optimizer(kernel, global_random_seed):
  112. # Test that GPC can use externally defined optimizers.
  113. # Define a dummy optimizer that simply tests 10 random hyperparameters
  114. def optimizer(obj_func, initial_theta, bounds):
  115. rng = np.random.RandomState(global_random_seed)
  116. theta_opt, func_min = initial_theta, obj_func(
  117. initial_theta, eval_gradient=False
  118. )
  119. for _ in range(10):
  120. theta = np.atleast_1d(
  121. rng.uniform(np.maximum(-2, bounds[:, 0]), np.minimum(1, bounds[:, 1]))
  122. )
  123. f = obj_func(theta, eval_gradient=False)
  124. if f < func_min:
  125. theta_opt, func_min = theta, f
  126. return theta_opt, func_min
  127. gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
  128. gpc.fit(X, y_mc)
  129. # Checks that optimizer improved marginal likelihood
  130. assert gpc.log_marginal_likelihood(
  131. gpc.kernel_.theta
  132. ) >= gpc.log_marginal_likelihood(kernel.theta)
  133. @pytest.mark.parametrize("kernel", kernels)
  134. def test_multi_class(kernel):
  135. # Test GPC for multi-class classification problems.
  136. gpc = GaussianProcessClassifier(kernel=kernel)
  137. gpc.fit(X, y_mc)
  138. y_prob = gpc.predict_proba(X2)
  139. assert_almost_equal(y_prob.sum(1), 1)
  140. y_pred = gpc.predict(X2)
  141. assert_array_equal(np.argmax(y_prob, 1), y_pred)
  142. @pytest.mark.parametrize("kernel", kernels)
  143. def test_multi_class_n_jobs(kernel):
  144. # Test that multi-class GPC produces identical results with n_jobs>1.
  145. gpc = GaussianProcessClassifier(kernel=kernel)
  146. gpc.fit(X, y_mc)
  147. gpc_2 = GaussianProcessClassifier(kernel=kernel, n_jobs=2)
  148. gpc_2.fit(X, y_mc)
  149. y_prob = gpc.predict_proba(X2)
  150. y_prob_2 = gpc_2.predict_proba(X2)
  151. assert_almost_equal(y_prob, y_prob_2)
  152. def test_warning_bounds():
  153. kernel = RBF(length_scale_bounds=[1e-5, 1e-3])
  154. gpc = GaussianProcessClassifier(kernel=kernel)
  155. warning_message = (
  156. "The optimal value found for dimension 0 of parameter "
  157. "length_scale is close to the specified upper bound "
  158. "0.001. Increasing the bound and calling fit again may "
  159. "find a better value."
  160. )
  161. with pytest.warns(ConvergenceWarning, match=warning_message):
  162. gpc.fit(X, y)
  163. kernel_sum = WhiteKernel(noise_level_bounds=[1e-5, 1e-3]) + RBF(
  164. length_scale_bounds=[1e3, 1e5]
  165. )
  166. gpc_sum = GaussianProcessClassifier(kernel=kernel_sum)
  167. with warnings.catch_warnings(record=True) as record:
  168. warnings.simplefilter("always")
  169. gpc_sum.fit(X, y)
  170. assert len(record) == 2
  171. assert issubclass(record[0].category, ConvergenceWarning)
  172. assert (
  173. record[0].message.args[0]
  174. == "The optimal value found for "
  175. "dimension 0 of parameter "
  176. "k1__noise_level is close to the "
  177. "specified upper bound 0.001. "
  178. "Increasing the bound and calling "
  179. "fit again may find a better value."
  180. )
  181. assert issubclass(record[1].category, ConvergenceWarning)
  182. assert (
  183. record[1].message.args[0]
  184. == "The optimal value found for "
  185. "dimension 0 of parameter "
  186. "k2__length_scale is close to the "
  187. "specified lower bound 1000.0. "
  188. "Decreasing the bound and calling "
  189. "fit again may find a better value."
  190. )
  191. X_tile = np.tile(X, 2)
  192. kernel_dims = RBF(length_scale=[1.0, 2.0], length_scale_bounds=[1e1, 1e2])
  193. gpc_dims = GaussianProcessClassifier(kernel=kernel_dims)
  194. with warnings.catch_warnings(record=True) as record:
  195. warnings.simplefilter("always")
  196. gpc_dims.fit(X_tile, y)
  197. assert len(record) == 2
  198. assert issubclass(record[0].category, ConvergenceWarning)
  199. assert (
  200. record[0].message.args[0]
  201. == "The optimal value found for "
  202. "dimension 0 of parameter "
  203. "length_scale is close to the "
  204. "specified upper bound 100.0. "
  205. "Increasing the bound and calling "
  206. "fit again may find a better value."
  207. )
  208. assert issubclass(record[1].category, ConvergenceWarning)
  209. assert (
  210. record[1].message.args[0]
  211. == "The optimal value found for "
  212. "dimension 1 of parameter "
  213. "length_scale is close to the "
  214. "specified upper bound 100.0. "
  215. "Increasing the bound and calling "
  216. "fit again may find a better value."
  217. )
  218. @pytest.mark.parametrize(
  219. "params, error_type, err_msg",
  220. [
  221. (
  222. {"kernel": CompoundKernel(0)},
  223. ValueError,
  224. "kernel cannot be a CompoundKernel",
  225. )
  226. ],
  227. )
  228. def test_gpc_fit_error(params, error_type, err_msg):
  229. """Check that expected error are raised during fit."""
  230. gpc = GaussianProcessClassifier(**params)
  231. with pytest.raises(error_type, match=err_msg):
  232. gpc.fit(X, y)