| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400 |
- import numpy as np
- from ..base import BaseEstimator, ClassifierMixin
- from ..utils._metadata_requests import RequestMethod
- from .metaestimators import available_if
- from .validation import _check_sample_weight, _num_samples, check_array, check_is_fitted
- class ArraySlicingWrapper:
- """
- Parameters
- ----------
- array
- """
- def __init__(self, array):
- self.array = array
- def __getitem__(self, aslice):
- return MockDataFrame(self.array[aslice])
- class MockDataFrame:
- """
- Parameters
- ----------
- array
- """
- # have shape and length but don't support indexing.
- def __init__(self, array):
- self.array = array
- self.values = array
- self.shape = array.shape
- self.ndim = array.ndim
- # ugly hack to make iloc work.
- self.iloc = ArraySlicingWrapper(array)
- def __len__(self):
- return len(self.array)
- def __array__(self, dtype=None):
- # Pandas data frames also are array-like: we want to make sure that
- # input validation in cross-validation does not try to call that
- # method.
- return self.array
- def __eq__(self, other):
- return MockDataFrame(self.array == other.array)
- def __ne__(self, other):
- return not self == other
- def take(self, indices, axis=0):
- return MockDataFrame(self.array.take(indices, axis=axis))
- class CheckingClassifier(ClassifierMixin, BaseEstimator):
- """Dummy classifier to test pipelining and meta-estimators.
- Checks some property of `X` and `y`in fit / predict.
- This allows testing whether pipelines / cross-validation or metaestimators
- changed the input.
- Can also be used to check if `fit_params` are passed correctly, and
- to force a certain score to be returned.
- Parameters
- ----------
- check_y, check_X : callable, default=None
- The callable used to validate `X` and `y`. These callable should return
- a bool where `False` will trigger an `AssertionError`. If `None`, the
- data is not validated. Default is `None`.
- check_y_params, check_X_params : dict, default=None
- The optional parameters to pass to `check_X` and `check_y`. If `None`,
- then no parameters are passed in.
- methods_to_check : "all" or list of str, default="all"
- The methods in which the checks should be applied. By default,
- all checks will be done on all methods (`fit`, `predict`,
- `predict_proba`, `decision_function` and `score`).
- foo_param : int, default=0
- A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1
- otherwise it is 0.
- expected_sample_weight : bool, default=False
- Whether to check if a valid `sample_weight` was passed to `fit`.
- expected_fit_params : list of str, default=None
- A list of the expected parameters given when calling `fit`.
- Attributes
- ----------
- classes_ : int
- The classes seen during `fit`.
- n_features_in_ : int
- The number of features seen during `fit`.
- Examples
- --------
- >>> from sklearn.utils._mocking import CheckingClassifier
- This helper allow to assert to specificities regarding `X` or `y`. In this
- case we expect `check_X` or `check_y` to return a boolean.
- >>> from sklearn.datasets import load_iris
- >>> X, y = load_iris(return_X_y=True)
- >>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4))
- >>> clf.fit(X, y)
- CheckingClassifier(...)
- We can also provide a check which might raise an error. In this case, we
- expect `check_X` to return `X` and `check_y` to return `y`.
- >>> from sklearn.utils import check_array
- >>> clf = CheckingClassifier(check_X=check_array)
- >>> clf.fit(X, y)
- CheckingClassifier(...)
- """
- def __init__(
- self,
- *,
- check_y=None,
- check_y_params=None,
- check_X=None,
- check_X_params=None,
- methods_to_check="all",
- foo_param=0,
- expected_sample_weight=None,
- expected_fit_params=None,
- ):
- self.check_y = check_y
- self.check_y_params = check_y_params
- self.check_X = check_X
- self.check_X_params = check_X_params
- self.methods_to_check = methods_to_check
- self.foo_param = foo_param
- self.expected_sample_weight = expected_sample_weight
- self.expected_fit_params = expected_fit_params
- def _check_X_y(self, X, y=None, should_be_fitted=True):
- """Validate X and y and make extra check.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- The data set.
- `X` is checked only if `check_X` is not `None` (default is None).
- y : array-like of shape (n_samples), default=None
- The corresponding target, by default `None`.
- `y` is checked only if `check_y` is not `None` (default is None).
- should_be_fitted : bool, default=True
- Whether or not the classifier should be already fitted.
- By default True.
- Returns
- -------
- X, y
- """
- if should_be_fitted:
- check_is_fitted(self)
- if self.check_X is not None:
- params = {} if self.check_X_params is None else self.check_X_params
- checked_X = self.check_X(X, **params)
- if isinstance(checked_X, (bool, np.bool_)):
- assert checked_X
- else:
- X = checked_X
- if y is not None and self.check_y is not None:
- params = {} if self.check_y_params is None else self.check_y_params
- checked_y = self.check_y(y, **params)
- if isinstance(checked_y, (bool, np.bool_)):
- assert checked_y
- else:
- y = checked_y
- return X, y
- def fit(self, X, y, sample_weight=None, **fit_params):
- """Fit classifier.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Training vector, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- y : array-like of shape (n_samples, n_outputs) or (n_samples,), \
- default=None
- Target relative to X for classification or regression;
- None for unsupervised learning.
- sample_weight : array-like of shape (n_samples,), default=None
- Sample weights. If None, then samples are equally weighted.
- **fit_params : dict of string -> object
- Parameters passed to the ``fit`` method of the estimator
- Returns
- -------
- self
- """
- assert _num_samples(X) == _num_samples(y)
- if self.methods_to_check == "all" or "fit" in self.methods_to_check:
- X, y = self._check_X_y(X, y, should_be_fitted=False)
- self.n_features_in_ = np.shape(X)[1]
- self.classes_ = np.unique(check_array(y, ensure_2d=False, allow_nd=True))
- if self.expected_fit_params:
- missing = set(self.expected_fit_params) - set(fit_params)
- if missing:
- raise AssertionError(
- f"Expected fit parameter(s) {list(missing)} not seen."
- )
- for key, value in fit_params.items():
- if _num_samples(value) != _num_samples(X):
- raise AssertionError(
- f"Fit parameter {key} has length {_num_samples(value)}"
- f"; expected {_num_samples(X)}."
- )
- if self.expected_sample_weight:
- if sample_weight is None:
- raise AssertionError("Expected sample_weight to be passed")
- _check_sample_weight(sample_weight, X)
- return self
- def predict(self, X):
- """Predict the first class seen in `classes_`.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- The input data.
- Returns
- -------
- preds : ndarray of shape (n_samples,)
- Predictions of the first class seens in `classes_`.
- """
- if self.methods_to_check == "all" or "predict" in self.methods_to_check:
- X, y = self._check_X_y(X)
- return self.classes_[np.zeros(_num_samples(X), dtype=int)]
- def predict_proba(self, X):
- """Predict probabilities for each class.
- Here, the dummy classifier will provide a probability of 1 for the
- first class of `classes_` and 0 otherwise.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- The input data.
- Returns
- -------
- proba : ndarray of shape (n_samples, n_classes)
- The probabilities for each sample and class.
- """
- if self.methods_to_check == "all" or "predict_proba" in self.methods_to_check:
- X, y = self._check_X_y(X)
- proba = np.zeros((_num_samples(X), len(self.classes_)))
- proba[:, 0] = 1
- return proba
- def decision_function(self, X):
- """Confidence score.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- The input data.
- Returns
- -------
- decision : ndarray of shape (n_samples,) if n_classes == 2\
- else (n_samples, n_classes)
- Confidence score.
- """
- if (
- self.methods_to_check == "all"
- or "decision_function" in self.methods_to_check
- ):
- X, y = self._check_X_y(X)
- if len(self.classes_) == 2:
- # for binary classifier, the confidence score is related to
- # classes_[1] and therefore should be null.
- return np.zeros(_num_samples(X))
- else:
- decision = np.zeros((_num_samples(X), len(self.classes_)))
- decision[:, 0] = 1
- return decision
- def score(self, X=None, Y=None):
- """Fake score.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- Y : array-like of shape (n_samples, n_output) or (n_samples,)
- Target relative to X for classification or regression;
- None for unsupervised learning.
- Returns
- -------
- score : float
- Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>
- score=1` otherwise `score=0`).
- """
- if self.methods_to_check == "all" or "score" in self.methods_to_check:
- self._check_X_y(X, Y)
- if self.foo_param > 1:
- score = 1.0
- else:
- score = 0.0
- return score
- def _more_tags(self):
- return {"_skip_test": True, "X_types": ["1dlabel"]}
- # Deactivate key validation for CheckingClassifier because we want to be able to
- # call fit with arbitrary fit_params and record them. Without this change, we
- # would get an error because those arbitrary params are not expected.
- CheckingClassifier.set_fit_request = RequestMethod( # type: ignore
- name="fit", keys=[], validate_keys=False
- )
- class NoSampleWeightWrapper(BaseEstimator):
- """Wrap estimator which will not expose `sample_weight`.
- Parameters
- ----------
- est : estimator, default=None
- The estimator to wrap.
- """
- def __init__(self, est=None):
- self.est = est
- def fit(self, X, y):
- return self.est.fit(X, y)
- def predict(self, X):
- return self.est.predict(X)
- def predict_proba(self, X):
- return self.est.predict_proba(X)
- def _more_tags(self):
- return {"_skip_test": True}
- def _check_response(method):
- def check(self):
- return self.response_methods is not None and method in self.response_methods
- return check
- class _MockEstimatorOnOffPrediction(BaseEstimator):
- """Estimator for which we can turn on/off the prediction methods.
- Parameters
- ----------
- response_methods: list of \
- {"predict", "predict_proba", "decision_function"}, default=None
- List containing the response implemented by the estimator. When, the
- response is in the list, it will return the name of the response method
- when called. Otherwise, an `AttributeError` is raised. It allows to
- use `getattr` as any conventional estimator. By default, no response
- methods are mocked.
- """
- def __init__(self, response_methods=None):
- self.response_methods = response_methods
- def fit(self, X, y):
- self.classes_ = np.unique(y)
- return self
- @available_if(_check_response("predict"))
- def predict(self, X):
- return "predict"
- @available_if(_check_response("predict_proba"))
- def predict_proba(self, X):
- return "predict_proba"
- @available_if(_check_response("decision_function"))
- def decision_function(self, X):
- return "decision_function"
|