_mocking.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. import numpy as np
  2. from ..base import BaseEstimator, ClassifierMixin
  3. from ..utils._metadata_requests import RequestMethod
  4. from .metaestimators import available_if
  5. from .validation import _check_sample_weight, _num_samples, check_array, check_is_fitted
  6. class ArraySlicingWrapper:
  7. """
  8. Parameters
  9. ----------
  10. array
  11. """
  12. def __init__(self, array):
  13. self.array = array
  14. def __getitem__(self, aslice):
  15. return MockDataFrame(self.array[aslice])
  16. class MockDataFrame:
  17. """
  18. Parameters
  19. ----------
  20. array
  21. """
  22. # have shape and length but don't support indexing.
  23. def __init__(self, array):
  24. self.array = array
  25. self.values = array
  26. self.shape = array.shape
  27. self.ndim = array.ndim
  28. # ugly hack to make iloc work.
  29. self.iloc = ArraySlicingWrapper(array)
  30. def __len__(self):
  31. return len(self.array)
  32. def __array__(self, dtype=None):
  33. # Pandas data frames also are array-like: we want to make sure that
  34. # input validation in cross-validation does not try to call that
  35. # method.
  36. return self.array
  37. def __eq__(self, other):
  38. return MockDataFrame(self.array == other.array)
  39. def __ne__(self, other):
  40. return not self == other
  41. def take(self, indices, axis=0):
  42. return MockDataFrame(self.array.take(indices, axis=axis))
  43. class CheckingClassifier(ClassifierMixin, BaseEstimator):
  44. """Dummy classifier to test pipelining and meta-estimators.
  45. Checks some property of `X` and `y`in fit / predict.
  46. This allows testing whether pipelines / cross-validation or metaestimators
  47. changed the input.
  48. Can also be used to check if `fit_params` are passed correctly, and
  49. to force a certain score to be returned.
  50. Parameters
  51. ----------
  52. check_y, check_X : callable, default=None
  53. The callable used to validate `X` and `y`. These callable should return
  54. a bool where `False` will trigger an `AssertionError`. If `None`, the
  55. data is not validated. Default is `None`.
  56. check_y_params, check_X_params : dict, default=None
  57. The optional parameters to pass to `check_X` and `check_y`. If `None`,
  58. then no parameters are passed in.
  59. methods_to_check : "all" or list of str, default="all"
  60. The methods in which the checks should be applied. By default,
  61. all checks will be done on all methods (`fit`, `predict`,
  62. `predict_proba`, `decision_function` and `score`).
  63. foo_param : int, default=0
  64. A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1
  65. otherwise it is 0.
  66. expected_sample_weight : bool, default=False
  67. Whether to check if a valid `sample_weight` was passed to `fit`.
  68. expected_fit_params : list of str, default=None
  69. A list of the expected parameters given when calling `fit`.
  70. Attributes
  71. ----------
  72. classes_ : int
  73. The classes seen during `fit`.
  74. n_features_in_ : int
  75. The number of features seen during `fit`.
  76. Examples
  77. --------
  78. >>> from sklearn.utils._mocking import CheckingClassifier
  79. This helper allow to assert to specificities regarding `X` or `y`. In this
  80. case we expect `check_X` or `check_y` to return a boolean.
  81. >>> from sklearn.datasets import load_iris
  82. >>> X, y = load_iris(return_X_y=True)
  83. >>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4))
  84. >>> clf.fit(X, y)
  85. CheckingClassifier(...)
  86. We can also provide a check which might raise an error. In this case, we
  87. expect `check_X` to return `X` and `check_y` to return `y`.
  88. >>> from sklearn.utils import check_array
  89. >>> clf = CheckingClassifier(check_X=check_array)
  90. >>> clf.fit(X, y)
  91. CheckingClassifier(...)
  92. """
  93. def __init__(
  94. self,
  95. *,
  96. check_y=None,
  97. check_y_params=None,
  98. check_X=None,
  99. check_X_params=None,
  100. methods_to_check="all",
  101. foo_param=0,
  102. expected_sample_weight=None,
  103. expected_fit_params=None,
  104. ):
  105. self.check_y = check_y
  106. self.check_y_params = check_y_params
  107. self.check_X = check_X
  108. self.check_X_params = check_X_params
  109. self.methods_to_check = methods_to_check
  110. self.foo_param = foo_param
  111. self.expected_sample_weight = expected_sample_weight
  112. self.expected_fit_params = expected_fit_params
  113. def _check_X_y(self, X, y=None, should_be_fitted=True):
  114. """Validate X and y and make extra check.
  115. Parameters
  116. ----------
  117. X : array-like of shape (n_samples, n_features)
  118. The data set.
  119. `X` is checked only if `check_X` is not `None` (default is None).
  120. y : array-like of shape (n_samples), default=None
  121. The corresponding target, by default `None`.
  122. `y` is checked only if `check_y` is not `None` (default is None).
  123. should_be_fitted : bool, default=True
  124. Whether or not the classifier should be already fitted.
  125. By default True.
  126. Returns
  127. -------
  128. X, y
  129. """
  130. if should_be_fitted:
  131. check_is_fitted(self)
  132. if self.check_X is not None:
  133. params = {} if self.check_X_params is None else self.check_X_params
  134. checked_X = self.check_X(X, **params)
  135. if isinstance(checked_X, (bool, np.bool_)):
  136. assert checked_X
  137. else:
  138. X = checked_X
  139. if y is not None and self.check_y is not None:
  140. params = {} if self.check_y_params is None else self.check_y_params
  141. checked_y = self.check_y(y, **params)
  142. if isinstance(checked_y, (bool, np.bool_)):
  143. assert checked_y
  144. else:
  145. y = checked_y
  146. return X, y
  147. def fit(self, X, y, sample_weight=None, **fit_params):
  148. """Fit classifier.
  149. Parameters
  150. ----------
  151. X : array-like of shape (n_samples, n_features)
  152. Training vector, where `n_samples` is the number of samples and
  153. `n_features` is the number of features.
  154. y : array-like of shape (n_samples, n_outputs) or (n_samples,), \
  155. default=None
  156. Target relative to X for classification or regression;
  157. None for unsupervised learning.
  158. sample_weight : array-like of shape (n_samples,), default=None
  159. Sample weights. If None, then samples are equally weighted.
  160. **fit_params : dict of string -> object
  161. Parameters passed to the ``fit`` method of the estimator
  162. Returns
  163. -------
  164. self
  165. """
  166. assert _num_samples(X) == _num_samples(y)
  167. if self.methods_to_check == "all" or "fit" in self.methods_to_check:
  168. X, y = self._check_X_y(X, y, should_be_fitted=False)
  169. self.n_features_in_ = np.shape(X)[1]
  170. self.classes_ = np.unique(check_array(y, ensure_2d=False, allow_nd=True))
  171. if self.expected_fit_params:
  172. missing = set(self.expected_fit_params) - set(fit_params)
  173. if missing:
  174. raise AssertionError(
  175. f"Expected fit parameter(s) {list(missing)} not seen."
  176. )
  177. for key, value in fit_params.items():
  178. if _num_samples(value) != _num_samples(X):
  179. raise AssertionError(
  180. f"Fit parameter {key} has length {_num_samples(value)}"
  181. f"; expected {_num_samples(X)}."
  182. )
  183. if self.expected_sample_weight:
  184. if sample_weight is None:
  185. raise AssertionError("Expected sample_weight to be passed")
  186. _check_sample_weight(sample_weight, X)
  187. return self
  188. def predict(self, X):
  189. """Predict the first class seen in `classes_`.
  190. Parameters
  191. ----------
  192. X : array-like of shape (n_samples, n_features)
  193. The input data.
  194. Returns
  195. -------
  196. preds : ndarray of shape (n_samples,)
  197. Predictions of the first class seens in `classes_`.
  198. """
  199. if self.methods_to_check == "all" or "predict" in self.methods_to_check:
  200. X, y = self._check_X_y(X)
  201. return self.classes_[np.zeros(_num_samples(X), dtype=int)]
  202. def predict_proba(self, X):
  203. """Predict probabilities for each class.
  204. Here, the dummy classifier will provide a probability of 1 for the
  205. first class of `classes_` and 0 otherwise.
  206. Parameters
  207. ----------
  208. X : array-like of shape (n_samples, n_features)
  209. The input data.
  210. Returns
  211. -------
  212. proba : ndarray of shape (n_samples, n_classes)
  213. The probabilities for each sample and class.
  214. """
  215. if self.methods_to_check == "all" or "predict_proba" in self.methods_to_check:
  216. X, y = self._check_X_y(X)
  217. proba = np.zeros((_num_samples(X), len(self.classes_)))
  218. proba[:, 0] = 1
  219. return proba
  220. def decision_function(self, X):
  221. """Confidence score.
  222. Parameters
  223. ----------
  224. X : array-like of shape (n_samples, n_features)
  225. The input data.
  226. Returns
  227. -------
  228. decision : ndarray of shape (n_samples,) if n_classes == 2\
  229. else (n_samples, n_classes)
  230. Confidence score.
  231. """
  232. if (
  233. self.methods_to_check == "all"
  234. or "decision_function" in self.methods_to_check
  235. ):
  236. X, y = self._check_X_y(X)
  237. if len(self.classes_) == 2:
  238. # for binary classifier, the confidence score is related to
  239. # classes_[1] and therefore should be null.
  240. return np.zeros(_num_samples(X))
  241. else:
  242. decision = np.zeros((_num_samples(X), len(self.classes_)))
  243. decision[:, 0] = 1
  244. return decision
  245. def score(self, X=None, Y=None):
  246. """Fake score.
  247. Parameters
  248. ----------
  249. X : array-like of shape (n_samples, n_features)
  250. Input data, where `n_samples` is the number of samples and
  251. `n_features` is the number of features.
  252. Y : array-like of shape (n_samples, n_output) or (n_samples,)
  253. Target relative to X for classification or regression;
  254. None for unsupervised learning.
  255. Returns
  256. -------
  257. score : float
  258. Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>
  259. score=1` otherwise `score=0`).
  260. """
  261. if self.methods_to_check == "all" or "score" in self.methods_to_check:
  262. self._check_X_y(X, Y)
  263. if self.foo_param > 1:
  264. score = 1.0
  265. else:
  266. score = 0.0
  267. return score
  268. def _more_tags(self):
  269. return {"_skip_test": True, "X_types": ["1dlabel"]}
  270. # Deactivate key validation for CheckingClassifier because we want to be able to
  271. # call fit with arbitrary fit_params and record them. Without this change, we
  272. # would get an error because those arbitrary params are not expected.
  273. CheckingClassifier.set_fit_request = RequestMethod( # type: ignore
  274. name="fit", keys=[], validate_keys=False
  275. )
  276. class NoSampleWeightWrapper(BaseEstimator):
  277. """Wrap estimator which will not expose `sample_weight`.
  278. Parameters
  279. ----------
  280. est : estimator, default=None
  281. The estimator to wrap.
  282. """
  283. def __init__(self, est=None):
  284. self.est = est
  285. def fit(self, X, y):
  286. return self.est.fit(X, y)
  287. def predict(self, X):
  288. return self.est.predict(X)
  289. def predict_proba(self, X):
  290. return self.est.predict_proba(X)
  291. def _more_tags(self):
  292. return {"_skip_test": True}
  293. def _check_response(method):
  294. def check(self):
  295. return self.response_methods is not None and method in self.response_methods
  296. return check
  297. class _MockEstimatorOnOffPrediction(BaseEstimator):
  298. """Estimator for which we can turn on/off the prediction methods.
  299. Parameters
  300. ----------
  301. response_methods: list of \
  302. {"predict", "predict_proba", "decision_function"}, default=None
  303. List containing the response implemented by the estimator. When, the
  304. response is in the list, it will return the name of the response method
  305. when called. Otherwise, an `AttributeError` is raised. It allows to
  306. use `getattr` as any conventional estimator. By default, no response
  307. methods are mocked.
  308. """
  309. def __init__(self, response_methods=None):
  310. self.response_methods = response_methods
  311. def fit(self, X, y):
  312. self.classes_ = np.unique(y)
  313. return self
  314. @available_if(_check_response("predict"))
  315. def predict(self, X):
  316. return "predict"
  317. @available_if(_check_response("predict_proba"))
  318. def predict_proba(self, X):
  319. return "predict_proba"
  320. @available_if(_check_response("decision_function"))
  321. def decision_function(self, X):
  322. return "decision_function"