test_mocking.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import numpy as np
  2. import pytest
  3. from numpy.testing import assert_allclose, assert_array_equal
  4. from scipy import sparse
  5. from sklearn.datasets import load_iris
  6. from sklearn.utils import _safe_indexing, check_array
  7. from sklearn.utils._mocking import (
  8. CheckingClassifier,
  9. _MockEstimatorOnOffPrediction,
  10. )
  11. from sklearn.utils._testing import _convert_container
  12. @pytest.fixture
  13. def iris():
  14. return load_iris(return_X_y=True)
  15. def _success(x):
  16. return True
  17. def _fail(x):
  18. return False
  19. @pytest.mark.parametrize(
  20. "kwargs",
  21. [
  22. {},
  23. {"check_X": _success},
  24. {"check_y": _success},
  25. {"check_X": _success, "check_y": _success},
  26. ],
  27. )
  28. def test_check_on_fit_success(iris, kwargs):
  29. X, y = iris
  30. CheckingClassifier(**kwargs).fit(X, y)
  31. @pytest.mark.parametrize(
  32. "kwargs",
  33. [
  34. {"check_X": _fail},
  35. {"check_y": _fail},
  36. {"check_X": _success, "check_y": _fail},
  37. {"check_X": _fail, "check_y": _success},
  38. {"check_X": _fail, "check_y": _fail},
  39. ],
  40. )
  41. def test_check_on_fit_fail(iris, kwargs):
  42. X, y = iris
  43. clf = CheckingClassifier(**kwargs)
  44. with pytest.raises(AssertionError):
  45. clf.fit(X, y)
  46. @pytest.mark.parametrize(
  47. "pred_func", ["predict", "predict_proba", "decision_function", "score"]
  48. )
  49. def test_check_X_on_predict_success(iris, pred_func):
  50. X, y = iris
  51. clf = CheckingClassifier(check_X=_success).fit(X, y)
  52. getattr(clf, pred_func)(X)
  53. @pytest.mark.parametrize(
  54. "pred_func", ["predict", "predict_proba", "decision_function", "score"]
  55. )
  56. def test_check_X_on_predict_fail(iris, pred_func):
  57. X, y = iris
  58. clf = CheckingClassifier(check_X=_success).fit(X, y)
  59. clf.set_params(check_X=_fail)
  60. with pytest.raises(AssertionError):
  61. getattr(clf, pred_func)(X)
  62. @pytest.mark.parametrize("input_type", ["list", "array", "sparse", "dataframe"])
  63. def test_checking_classifier(iris, input_type):
  64. # Check that the CheckingClassifier outputs what we expect
  65. X, y = iris
  66. X = _convert_container(X, input_type)
  67. clf = CheckingClassifier()
  68. clf.fit(X, y)
  69. assert_array_equal(clf.classes_, np.unique(y))
  70. assert len(clf.classes_) == 3
  71. assert clf.n_features_in_ == 4
  72. y_pred = clf.predict(X)
  73. assert_array_equal(y_pred, np.zeros(y_pred.size, dtype=int))
  74. assert clf.score(X) == pytest.approx(0)
  75. clf.set_params(foo_param=10)
  76. assert clf.fit(X, y).score(X) == pytest.approx(1)
  77. y_proba = clf.predict_proba(X)
  78. assert y_proba.shape == (150, 3)
  79. assert_allclose(y_proba[:, 0], 1)
  80. assert_allclose(y_proba[:, 1:], 0)
  81. y_decision = clf.decision_function(X)
  82. assert y_decision.shape == (150, 3)
  83. assert_allclose(y_decision[:, 0], 1)
  84. assert_allclose(y_decision[:, 1:], 0)
  85. # check the shape in case of binary classification
  86. first_2_classes = np.logical_or(y == 0, y == 1)
  87. X = _safe_indexing(X, first_2_classes)
  88. y = _safe_indexing(y, first_2_classes)
  89. clf.fit(X, y)
  90. y_proba = clf.predict_proba(X)
  91. assert y_proba.shape == (100, 2)
  92. assert_allclose(y_proba[:, 0], 1)
  93. assert_allclose(y_proba[:, 1], 0)
  94. y_decision = clf.decision_function(X)
  95. assert y_decision.shape == (100,)
  96. assert_allclose(y_decision, 0)
  97. def test_checking_classifier_with_params(iris):
  98. X, y = iris
  99. X_sparse = sparse.csr_matrix(X)
  100. clf = CheckingClassifier(check_X=sparse.issparse)
  101. with pytest.raises(AssertionError):
  102. clf.fit(X, y)
  103. clf.fit(X_sparse, y)
  104. clf = CheckingClassifier(
  105. check_X=check_array, check_X_params={"accept_sparse": False}
  106. )
  107. clf.fit(X, y)
  108. with pytest.raises(TypeError, match="A sparse matrix was passed"):
  109. clf.fit(X_sparse, y)
  110. def test_checking_classifier_fit_params(iris):
  111. # check the error raised when the number of samples is not the one expected
  112. X, y = iris
  113. clf = CheckingClassifier(expected_sample_weight=True)
  114. sample_weight = np.ones(len(X) // 2)
  115. msg = f"sample_weight.shape == ({len(X) // 2},), expected ({len(X)},)!"
  116. with pytest.raises(ValueError) as exc:
  117. clf.fit(X, y, sample_weight=sample_weight)
  118. assert exc.value.args[0] == msg
  119. def test_checking_classifier_missing_fit_params(iris):
  120. X, y = iris
  121. clf = CheckingClassifier(expected_sample_weight=True)
  122. err_msg = "Expected sample_weight to be passed"
  123. with pytest.raises(AssertionError, match=err_msg):
  124. clf.fit(X, y)
  125. @pytest.mark.parametrize(
  126. "methods_to_check",
  127. [["predict"], ["predict", "predict_proba"]],
  128. )
  129. @pytest.mark.parametrize(
  130. "predict_method", ["predict", "predict_proba", "decision_function", "score"]
  131. )
  132. def test_checking_classifier_methods_to_check(iris, methods_to_check, predict_method):
  133. # check that methods_to_check allows to bypass checks
  134. X, y = iris
  135. clf = CheckingClassifier(
  136. check_X=sparse.issparse,
  137. methods_to_check=methods_to_check,
  138. )
  139. clf.fit(X, y)
  140. if predict_method in methods_to_check:
  141. with pytest.raises(AssertionError):
  142. getattr(clf, predict_method)(X)
  143. else:
  144. getattr(clf, predict_method)(X)
  145. @pytest.mark.parametrize(
  146. "response_methods",
  147. [
  148. ["predict"],
  149. ["predict", "predict_proba"],
  150. ["predict", "decision_function"],
  151. ["predict", "predict_proba", "decision_function"],
  152. ],
  153. )
  154. def test_mock_estimator_on_off_prediction(iris, response_methods):
  155. X, y = iris
  156. estimator = _MockEstimatorOnOffPrediction(response_methods=response_methods)
  157. estimator.fit(X, y)
  158. assert hasattr(estimator, "classes_")
  159. assert_array_equal(estimator.classes_, np.unique(y))
  160. possible_responses = ["predict", "predict_proba", "decision_function"]
  161. for response in possible_responses:
  162. if response in response_methods:
  163. assert hasattr(estimator, response)
  164. assert getattr(estimator, response)(X) == response
  165. else:
  166. assert not hasattr(estimator, response)