| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071 |
- """
- Multiclass classification strategies
- ====================================
- This module implements multiclass learning algorithms:
- - one-vs-the-rest / one-vs-all
- - one-vs-one
- - error correcting output codes
- The estimators provided in this module are meta-estimators: they require a base
- estimator to be provided in their constructor. For example, it is possible to
- use these estimators to turn a binary classifier or a regressor into a
- multiclass classifier. It is also possible to use these estimators with
- multiclass estimators in the hope that their accuracy or runtime performance
- improves.
- All classifiers in scikit-learn implement multiclass classification; you
- only need to use this module if you want to experiment with custom multiclass
- strategies.
- The one-vs-the-rest meta-classifier also implements a `predict_proba` method,
- so long as such a method is implemented by the base classifier. This method
- returns probabilities of class membership in both the single label and
- multilabel case. Note that in the multilabel case, probabilities are the
- marginal probability that a given sample falls in the given class. As such, in
- the multilabel case the sum of these probabilities over all possible labels
- for a given sample *will not* sum to unity, as they do in the single label
- case.
- """
- # Author: Mathieu Blondel <mathieu@mblondel.org>
- # Author: Hamzeh Alsalhi <93hamsal@gmail.com>
- #
- # License: BSD 3 clause
- import array
- import itertools
- import warnings
- from numbers import Integral, Real
- import numpy as np
- import scipy.sparse as sp
- from .base import (
- BaseEstimator,
- ClassifierMixin,
- MetaEstimatorMixin,
- MultiOutputMixin,
- _fit_context,
- clone,
- is_classifier,
- is_regressor,
- )
- from .metrics.pairwise import pairwise_distances_argmin
- from .preprocessing import LabelBinarizer
- from .utils import check_random_state
- from .utils._param_validation import HasMethods, Interval
- from .utils._tags import _safe_tags
- from .utils.metaestimators import _safe_split, available_if
- from .utils.multiclass import (
- _check_partial_fit_first_call,
- _ovr_decision_function,
- check_classification_targets,
- )
- from .utils.parallel import Parallel, delayed
- from .utils.validation import _num_samples, check_is_fitted
- __all__ = [
- "OneVsRestClassifier",
- "OneVsOneClassifier",
- "OutputCodeClassifier",
- ]
- def _fit_binary(estimator, X, y, classes=None):
- """Fit a single binary estimator."""
- unique_y = np.unique(y)
- if len(unique_y) == 1:
- if classes is not None:
- if y[0] == -1:
- c = 0
- else:
- c = y[0]
- warnings.warn(
- "Label %s is present in all training examples." % str(classes[c])
- )
- estimator = _ConstantPredictor().fit(X, unique_y)
- else:
- estimator = clone(estimator)
- estimator.fit(X, y)
- return estimator
- def _partial_fit_binary(estimator, X, y):
- """Partially fit a single binary estimator."""
- estimator.partial_fit(X, y, np.array((0, 1)))
- return estimator
- def _predict_binary(estimator, X):
- """Make predictions using a single binary estimator."""
- if is_regressor(estimator):
- return estimator.predict(X)
- try:
- score = np.ravel(estimator.decision_function(X))
- except (AttributeError, NotImplementedError):
- # probabilities of the positive class
- score = estimator.predict_proba(X)[:, 1]
- return score
- def _threshold_for_binary_predict(estimator):
- """Threshold for predictions from binary estimator."""
- if hasattr(estimator, "decision_function") and is_classifier(estimator):
- return 0.0
- else:
- # predict_proba threshold
- return 0.5
- class _ConstantPredictor(BaseEstimator):
- def fit(self, X, y):
- check_params = dict(
- force_all_finite=False, dtype=None, ensure_2d=False, accept_sparse=True
- )
- self._validate_data(
- X, y, reset=True, validate_separately=(check_params, check_params)
- )
- self.y_ = y
- return self
- def predict(self, X):
- check_is_fitted(self)
- self._validate_data(
- X,
- force_all_finite=False,
- dtype=None,
- accept_sparse=True,
- ensure_2d=False,
- reset=False,
- )
- return np.repeat(self.y_, _num_samples(X))
- def decision_function(self, X):
- check_is_fitted(self)
- self._validate_data(
- X,
- force_all_finite=False,
- dtype=None,
- accept_sparse=True,
- ensure_2d=False,
- reset=False,
- )
- return np.repeat(self.y_, _num_samples(X))
- def predict_proba(self, X):
- check_is_fitted(self)
- self._validate_data(
- X,
- force_all_finite=False,
- dtype=None,
- accept_sparse=True,
- ensure_2d=False,
- reset=False,
- )
- y_ = self.y_.astype(np.float64)
- return np.repeat([np.hstack([1 - y_, y_])], _num_samples(X), axis=0)
- def _estimators_has(attr):
- """Check if self.estimator or self.estimators_[0] has attr.
- If `self.estimators_[0]` has the attr, then its safe to assume that other
- values has it too. This function is used together with `avaliable_if`.
- """
- return lambda self: (
- hasattr(self.estimator, attr)
- or (hasattr(self, "estimators_") and hasattr(self.estimators_[0], attr))
- )
- class OneVsRestClassifier(
- MultiOutputMixin, ClassifierMixin, MetaEstimatorMixin, BaseEstimator
- ):
- """One-vs-the-rest (OvR) multiclass strategy.
- Also known as one-vs-all, this strategy consists in fitting one classifier
- per class. For each classifier, the class is fitted against all the other
- classes. In addition to its computational efficiency (only `n_classes`
- classifiers are needed), one advantage of this approach is its
- interpretability. Since each class is represented by one and one classifier
- only, it is possible to gain knowledge about the class by inspecting its
- corresponding classifier. This is the most commonly used strategy for
- multiclass classification and is a fair default choice.
- OneVsRestClassifier can also be used for multilabel classification. To use
- this feature, provide an indicator matrix for the target `y` when calling
- `.fit`. In other words, the target labels should be formatted as a 2D
- binary (0/1) matrix, where [i, j] == 1 indicates the presence of label j
- in sample i. This estimator uses the binary relevance method to perform
- multilabel classification, which involves training one binary classifier
- independently for each label.
- Read more in the :ref:`User Guide <ovr_classification>`.
- Parameters
- ----------
- estimator : estimator object
- A regressor or a classifier that implements :term:`fit`.
- When a classifier is passed, :term:`decision_function` will be used
- in priority and it will fallback to :term:`predict_proba` if it is not
- available.
- When a regressor is passed, :term:`predict` is used.
- n_jobs : int, default=None
- The number of jobs to use for the computation: the `n_classes`
- one-vs-rest problems are computed in parallel.
- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
- ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
- for more details.
- .. versionchanged:: 0.20
- `n_jobs` default changed from 1 to None
- verbose : int, default=0
- The verbosity level, if non zero, progress messages are printed.
- Below 50, the output is sent to stderr. Otherwise, the output is sent
- to stdout. The frequency of the messages increases with the verbosity
- level, reporting all iterations at 10. See :class:`joblib.Parallel` for
- more details.
- .. versionadded:: 1.1
- Attributes
- ----------
- estimators_ : list of `n_classes` estimators
- Estimators used for predictions.
- classes_ : array, shape = [`n_classes`]
- Class labels.
- n_classes_ : int
- Number of classes.
- label_binarizer_ : LabelBinarizer object
- Object used to transform multiclass labels to binary labels and
- vice-versa.
- multilabel_ : boolean
- Whether a OneVsRestClassifier is a multilabel classifier.
- n_features_in_ : int
- Number of features seen during :term:`fit`. Only defined if the
- underlying estimator exposes such an attribute when fit.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Only defined if the
- underlying estimator exposes such an attribute when fit.
- .. versionadded:: 1.0
- See Also
- --------
- OneVsOneClassifier : One-vs-one multiclass strategy.
- OutputCodeClassifier : (Error-Correcting) Output-Code multiclass strategy.
- sklearn.multioutput.MultiOutputClassifier : Alternate way of extending an
- estimator for multilabel classification.
- sklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables
- to binary indicator matrix.
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.multiclass import OneVsRestClassifier
- >>> from sklearn.svm import SVC
- >>> X = np.array([
- ... [10, 10],
- ... [8, 10],
- ... [-5, 5.5],
- ... [-5.4, 5.5],
- ... [-20, -20],
- ... [-15, -20]
- ... ])
- >>> y = np.array([0, 0, 1, 1, 2, 2])
- >>> clf = OneVsRestClassifier(SVC()).fit(X, y)
- >>> clf.predict([[-19, -20], [9, 9], [-5, 5]])
- array([2, 0, 1])
- """
- _parameter_constraints = {
- "estimator": [HasMethods(["fit"])],
- "n_jobs": [Integral, None],
- "verbose": ["verbose"],
- }
- def __init__(self, estimator, *, n_jobs=None, verbose=0):
- self.estimator = estimator
- self.n_jobs = n_jobs
- self.verbose = verbose
- @_fit_context(
- # OneVsRestClassifier.estimator is not validated yet
- prefer_skip_nested_validation=False
- )
- def fit(self, X, y):
- """Fit underlying estimators.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Data.
- y : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)
- Multi-class targets. An indicator matrix turns on multilabel
- classification.
- Returns
- -------
- self : object
- Instance of fitted estimator.
- """
- # A sparse LabelBinarizer, with sparse_output=True, has been shown to
- # outperform or match a dense label binarizer in all cases and has also
- # resulted in less or equal memory consumption in the fit_ovr function
- # overall.
- self.label_binarizer_ = LabelBinarizer(sparse_output=True)
- Y = self.label_binarizer_.fit_transform(y)
- Y = Y.tocsc()
- self.classes_ = self.label_binarizer_.classes_
- columns = (col.toarray().ravel() for col in Y.T)
- # In cases where individual estimators are very fast to train setting
- # n_jobs > 1 in can results in slower performance due to the overhead
- # of spawning threads. See joblib issue #112.
- self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
- delayed(_fit_binary)(
- self.estimator,
- X,
- column,
- classes=[
- "not %s" % self.label_binarizer_.classes_[i],
- self.label_binarizer_.classes_[i],
- ],
- )
- for i, column in enumerate(columns)
- )
- if hasattr(self.estimators_[0], "n_features_in_"):
- self.n_features_in_ = self.estimators_[0].n_features_in_
- if hasattr(self.estimators_[0], "feature_names_in_"):
- self.feature_names_in_ = self.estimators_[0].feature_names_in_
- return self
- @available_if(_estimators_has("partial_fit"))
- @_fit_context(
- # OneVsRestClassifier.estimator is not validated yet
- prefer_skip_nested_validation=False
- )
- def partial_fit(self, X, y, classes=None):
- """Partially fit underlying estimators.
- Should be used when memory is inefficient to train all data.
- Chunks of data can be passed in several iteration.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Data.
- y : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)
- Multi-class targets. An indicator matrix turns on multilabel
- classification.
- classes : array, shape (n_classes, )
- Classes across all calls to partial_fit.
- Can be obtained via `np.unique(y_all)`, where y_all is the
- target vector of the entire dataset.
- This argument is only required in the first call of partial_fit
- and can be omitted in the subsequent calls.
- Returns
- -------
- self : object
- Instance of partially fitted estimator.
- """
- if _check_partial_fit_first_call(self, classes):
- if not hasattr(self.estimator, "partial_fit"):
- raise ValueError(
- ("Base estimator {0}, doesn't have partial_fit method").format(
- self.estimator
- )
- )
- self.estimators_ = [clone(self.estimator) for _ in range(self.n_classes_)]
- # A sparse LabelBinarizer, with sparse_output=True, has been
- # shown to outperform or match a dense label binarizer in all
- # cases and has also resulted in less or equal memory consumption
- # in the fit_ovr function overall.
- self.label_binarizer_ = LabelBinarizer(sparse_output=True)
- self.label_binarizer_.fit(self.classes_)
- if len(np.setdiff1d(y, self.classes_)):
- raise ValueError(
- (
- "Mini-batch contains {0} while classes " + "must be subset of {1}"
- ).format(np.unique(y), self.classes_)
- )
- Y = self.label_binarizer_.transform(y)
- Y = Y.tocsc()
- columns = (col.toarray().ravel() for col in Y.T)
- self.estimators_ = Parallel(n_jobs=self.n_jobs)(
- delayed(_partial_fit_binary)(estimator, X, column)
- for estimator, column in zip(self.estimators_, columns)
- )
- if hasattr(self.estimators_[0], "n_features_in_"):
- self.n_features_in_ = self.estimators_[0].n_features_in_
- return self
- def predict(self, X):
- """Predict multi-class targets using underlying estimators.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Data.
- Returns
- -------
- y : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)
- Predicted multi-class targets.
- """
- check_is_fitted(self)
- n_samples = _num_samples(X)
- if self.label_binarizer_.y_type_ == "multiclass":
- maxima = np.empty(n_samples, dtype=float)
- maxima.fill(-np.inf)
- argmaxima = np.zeros(n_samples, dtype=int)
- for i, e in enumerate(self.estimators_):
- pred = _predict_binary(e, X)
- np.maximum(maxima, pred, out=maxima)
- argmaxima[maxima == pred] = i
- return self.classes_[argmaxima]
- else:
- thresh = _threshold_for_binary_predict(self.estimators_[0])
- indices = array.array("i")
- indptr = array.array("i", [0])
- for e in self.estimators_:
- indices.extend(np.where(_predict_binary(e, X) > thresh)[0])
- indptr.append(len(indices))
- data = np.ones(len(indices), dtype=int)
- indicator = sp.csc_matrix(
- (data, indices, indptr), shape=(n_samples, len(self.estimators_))
- )
- return self.label_binarizer_.inverse_transform(indicator)
- @available_if(_estimators_has("predict_proba"))
- def predict_proba(self, X):
- """Probability estimates.
- The returned estimates for all classes are ordered by label of classes.
- Note that in the multilabel case, each sample can have any number of
- labels. This returns the marginal probability that the given sample has
- the label in question. For example, it is entirely consistent that two
- labels both have a 90% probability of applying to a given sample.
- In the single label multiclass case, the rows of the returned matrix
- sum to 1.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Input data.
- Returns
- -------
- T : array-like of shape (n_samples, n_classes)
- Returns the probability of the sample for each class in the model,
- where classes are ordered as they are in `self.classes_`.
- """
- check_is_fitted(self)
- # Y[i, j] gives the probability that sample i has the label j.
- # In the multi-label case, these are not disjoint.
- Y = np.array([e.predict_proba(X)[:, 1] for e in self.estimators_]).T
- if len(self.estimators_) == 1:
- # Only one estimator, but we still want to return probabilities
- # for two classes.
- Y = np.concatenate(((1 - Y), Y), axis=1)
- if not self.multilabel_:
- # Then, probabilities should be normalized to 1.
- Y /= np.sum(Y, axis=1)[:, np.newaxis]
- return Y
- @available_if(_estimators_has("decision_function"))
- def decision_function(self, X):
- """Decision function for the OneVsRestClassifier.
- Return the distance of each sample from the decision boundary for each
- class. This can only be used with estimators which implement the
- `decision_function` method.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data.
- Returns
- -------
- T : array-like of shape (n_samples, n_classes) or (n_samples,) for \
- binary classification.
- Result of calling `decision_function` on the final estimator.
- .. versionchanged:: 0.19
- output shape changed to ``(n_samples,)`` to conform to
- scikit-learn conventions for binary classification.
- """
- check_is_fitted(self)
- if len(self.estimators_) == 1:
- return self.estimators_[0].decision_function(X)
- return np.array(
- [est.decision_function(X).ravel() for est in self.estimators_]
- ).T
- @property
- def multilabel_(self):
- """Whether this is a multilabel classifier."""
- return self.label_binarizer_.y_type_.startswith("multilabel")
- @property
- def n_classes_(self):
- """Number of classes."""
- return len(self.classes_)
- def _more_tags(self):
- """Indicate if wrapped estimator is using a precomputed Gram matrix"""
- return {"pairwise": _safe_tags(self.estimator, key="pairwise")}
- def _fit_ovo_binary(estimator, X, y, i, j):
- """Fit a single binary estimator (one-vs-one)."""
- cond = np.logical_or(y == i, y == j)
- y = y[cond]
- y_binary = np.empty(y.shape, int)
- y_binary[y == i] = 0
- y_binary[y == j] = 1
- indcond = np.arange(_num_samples(X))[cond]
- return (
- _fit_binary(
- estimator,
- _safe_split(estimator, X, None, indices=indcond)[0],
- y_binary,
- classes=[i, j],
- ),
- indcond,
- )
- def _partial_fit_ovo_binary(estimator, X, y, i, j):
- """Partially fit a single binary estimator(one-vs-one)."""
- cond = np.logical_or(y == i, y == j)
- y = y[cond]
- if len(y) != 0:
- y_binary = np.zeros_like(y)
- y_binary[y == j] = 1
- return _partial_fit_binary(estimator, X[cond], y_binary)
- return estimator
- class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
- """One-vs-one multiclass strategy.
- This strategy consists in fitting one classifier per class pair.
- At prediction time, the class which received the most votes is selected.
- Since it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,
- this method is usually slower than one-vs-the-rest, due to its
- O(n_classes^2) complexity. However, this method may be advantageous for
- algorithms such as kernel algorithms which don't scale well with
- `n_samples`. This is because each individual learning problem only involves
- a small subset of the data whereas, with one-vs-the-rest, the complete
- dataset is used `n_classes` times.
- Read more in the :ref:`User Guide <ovo_classification>`.
- Parameters
- ----------
- estimator : estimator object
- A regressor or a classifier that implements :term:`fit`.
- When a classifier is passed, :term:`decision_function` will be used
- in priority and it will fallback to :term:`predict_proba` if it is not
- available.
- When a regressor is passed, :term:`predict` is used.
- n_jobs : int, default=None
- The number of jobs to use for the computation: the `n_classes * (
- n_classes - 1) / 2` OVO problems are computed in parallel.
- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
- ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
- for more details.
- Attributes
- ----------
- estimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators
- Estimators used for predictions.
- classes_ : numpy array of shape [n_classes]
- Array containing labels.
- n_classes_ : int
- Number of classes.
- pairwise_indices_ : list, length = ``len(estimators_)``, or ``None``
- Indices of samples used when training the estimators.
- ``None`` when ``estimator``'s `pairwise` tag is False.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- OneVsRestClassifier : One-vs-all multiclass strategy.
- OutputCodeClassifier : (Error-Correcting) Output-Code multiclass strategy.
- Examples
- --------
- >>> from sklearn.datasets import load_iris
- >>> from sklearn.model_selection import train_test_split
- >>> from sklearn.multiclass import OneVsOneClassifier
- >>> from sklearn.svm import LinearSVC
- >>> X, y = load_iris(return_X_y=True)
- >>> X_train, X_test, y_train, y_test = train_test_split(
- ... X, y, test_size=0.33, shuffle=True, random_state=0)
- >>> clf = OneVsOneClassifier(
- ... LinearSVC(dual="auto", random_state=0)).fit(X_train, y_train)
- >>> clf.predict(X_test[:10])
- array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])
- """
- _parameter_constraints: dict = {
- "estimator": [HasMethods(["fit"])],
- "n_jobs": [Integral, None],
- }
- def __init__(self, estimator, *, n_jobs=None):
- self.estimator = estimator
- self.n_jobs = n_jobs
- @_fit_context(
- # OneVsOneClassifier.estimator is not validated yet
- prefer_skip_nested_validation=False
- )
- def fit(self, X, y):
- """Fit underlying estimators.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Data.
- y : array-like of shape (n_samples,)
- Multi-class targets.
- Returns
- -------
- self : object
- The fitted underlying estimator.
- """
- # We need to validate the data because we do a safe_indexing later.
- X, y = self._validate_data(
- X, y, accept_sparse=["csr", "csc"], force_all_finite=False
- )
- check_classification_targets(y)
- self.classes_ = np.unique(y)
- if len(self.classes_) == 1:
- raise ValueError(
- "OneVsOneClassifier can not be fit when only one class is present."
- )
- n_classes = self.classes_.shape[0]
- estimators_indices = list(
- zip(
- *(
- Parallel(n_jobs=self.n_jobs)(
- delayed(_fit_ovo_binary)(
- self.estimator, X, y, self.classes_[i], self.classes_[j]
- )
- for i in range(n_classes)
- for j in range(i + 1, n_classes)
- )
- )
- )
- )
- self.estimators_ = estimators_indices[0]
- pairwise = self._get_tags()["pairwise"]
- self.pairwise_indices_ = estimators_indices[1] if pairwise else None
- return self
- @available_if(_estimators_has("partial_fit"))
- @_fit_context(
- # OneVsOneClassifier.estimator is not validated yet
- prefer_skip_nested_validation=False
- )
- def partial_fit(self, X, y, classes=None):
- """Partially fit underlying estimators.
- Should be used when memory is inefficient to train all data. Chunks
- of data can be passed in several iteration, where the first call
- should have an array of all target variables.
- Parameters
- ----------
- X : {array-like, sparse matrix) of shape (n_samples, n_features)
- Data.
- y : array-like of shape (n_samples,)
- Multi-class targets.
- classes : array, shape (n_classes, )
- Classes across all calls to partial_fit.
- Can be obtained via `np.unique(y_all)`, where y_all is the
- target vector of the entire dataset.
- This argument is only required in the first call of partial_fit
- and can be omitted in the subsequent calls.
- Returns
- -------
- self : object
- The partially fitted underlying estimator.
- """
- first_call = _check_partial_fit_first_call(self, classes)
- if first_call:
- self.estimators_ = [
- clone(self.estimator)
- for _ in range(self.n_classes_ * (self.n_classes_ - 1) // 2)
- ]
- if len(np.setdiff1d(y, self.classes_)):
- raise ValueError(
- "Mini-batch contains {0} while it must be subset of {1}".format(
- np.unique(y), self.classes_
- )
- )
- X, y = self._validate_data(
- X,
- y,
- accept_sparse=["csr", "csc"],
- force_all_finite=False,
- reset=first_call,
- )
- check_classification_targets(y)
- combinations = itertools.combinations(range(self.n_classes_), 2)
- self.estimators_ = Parallel(n_jobs=self.n_jobs)(
- delayed(_partial_fit_ovo_binary)(
- estimator, X, y, self.classes_[i], self.classes_[j]
- )
- for estimator, (i, j) in zip(self.estimators_, (combinations))
- )
- self.pairwise_indices_ = None
- if hasattr(self.estimators_[0], "n_features_in_"):
- self.n_features_in_ = self.estimators_[0].n_features_in_
- return self
- def predict(self, X):
- """Estimate the best class label for each sample in X.
- This is implemented as ``argmax(decision_function(X), axis=1)`` which
- will return the label of the class with most votes by estimators
- predicting the outcome of a decision for each possible class pair.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Data.
- Returns
- -------
- y : numpy array of shape [n_samples]
- Predicted multi-class targets.
- """
- Y = self.decision_function(X)
- if self.n_classes_ == 2:
- thresh = _threshold_for_binary_predict(self.estimators_[0])
- return self.classes_[(Y > thresh).astype(int)]
- return self.classes_[Y.argmax(axis=1)]
- def decision_function(self, X):
- """Decision function for the OneVsOneClassifier.
- The decision values for the samples are computed by adding the
- normalized sum of pair-wise classification confidence levels to the
- votes in order to disambiguate between the decision values when the
- votes for all the classes are equal leading to a tie.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Input data.
- Returns
- -------
- Y : array-like of shape (n_samples, n_classes) or (n_samples,)
- Result of calling `decision_function` on the final estimator.
- .. versionchanged:: 0.19
- output shape changed to ``(n_samples,)`` to conform to
- scikit-learn conventions for binary classification.
- """
- check_is_fitted(self)
- X = self._validate_data(
- X,
- accept_sparse=True,
- force_all_finite=False,
- reset=False,
- )
- indices = self.pairwise_indices_
- if indices is None:
- Xs = [X] * len(self.estimators_)
- else:
- Xs = [X[:, idx] for idx in indices]
- predictions = np.vstack(
- [est.predict(Xi) for est, Xi in zip(self.estimators_, Xs)]
- ).T
- confidences = np.vstack(
- [_predict_binary(est, Xi) for est, Xi in zip(self.estimators_, Xs)]
- ).T
- Y = _ovr_decision_function(predictions, confidences, len(self.classes_))
- if self.n_classes_ == 2:
- return Y[:, 1]
- return Y
- @property
- def n_classes_(self):
- """Number of classes."""
- return len(self.classes_)
- def _more_tags(self):
- """Indicate if wrapped estimator is using a precomputed Gram matrix"""
- return {"pairwise": _safe_tags(self.estimator, key="pairwise")}
- class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):
- """(Error-Correcting) Output-Code multiclass strategy.
- Output-code based strategies consist in representing each class with a
- binary code (an array of 0s and 1s). At fitting time, one binary
- classifier per bit in the code book is fitted. At prediction time, the
- classifiers are used to project new points in the class space and the class
- closest to the points is chosen. The main advantage of these strategies is
- that the number of classifiers used can be controlled by the user, either
- for compressing the model (0 < `code_size` < 1) or for making the model more
- robust to errors (`code_size` > 1). See the documentation for more details.
- Read more in the :ref:`User Guide <ecoc>`.
- Parameters
- ----------
- estimator : estimator object
- An estimator object implementing :term:`fit` and one of
- :term:`decision_function` or :term:`predict_proba`.
- code_size : float, default=1.5
- Percentage of the number of classes to be used to create the code book.
- A number between 0 and 1 will require fewer classifiers than
- one-vs-the-rest. A number greater than 1 will require more classifiers
- than one-vs-the-rest.
- random_state : int, RandomState instance, default=None
- The generator used to initialize the codebook.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- n_jobs : int, default=None
- The number of jobs to use for the computation: the multiclass problems
- are computed in parallel.
- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
- ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
- for more details.
- Attributes
- ----------
- estimators_ : list of `int(n_classes * code_size)` estimators
- Estimators used for predictions.
- classes_ : ndarray of shape (n_classes,)
- Array containing labels.
- code_book_ : ndarray of shape (n_classes, code_size)
- Binary array containing the code of each class.
- n_features_in_ : int
- Number of features seen during :term:`fit`. Only defined if the
- underlying estimator exposes such an attribute when fit.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Only defined if the
- underlying estimator exposes such an attribute when fit.
- .. versionadded:: 1.0
- See Also
- --------
- OneVsRestClassifier : One-vs-all multiclass strategy.
- OneVsOneClassifier : One-vs-one multiclass strategy.
- References
- ----------
- .. [1] "Solving multiclass learning problems via error-correcting output
- codes",
- Dietterich T., Bakiri G.,
- Journal of Artificial Intelligence Research 2,
- 1995.
- .. [2] "The error coding method and PICTs",
- James G., Hastie T.,
- Journal of Computational and Graphical statistics 7,
- 1998.
- .. [3] "The Elements of Statistical Learning",
- Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)
- 2008.
- Examples
- --------
- >>> from sklearn.multiclass import OutputCodeClassifier
- >>> from sklearn.ensemble import RandomForestClassifier
- >>> from sklearn.datasets import make_classification
- >>> X, y = make_classification(n_samples=100, n_features=4,
- ... n_informative=2, n_redundant=0,
- ... random_state=0, shuffle=False)
- >>> clf = OutputCodeClassifier(
- ... estimator=RandomForestClassifier(random_state=0),
- ... random_state=0).fit(X, y)
- >>> clf.predict([[0, 0, 0, 0]])
- array([1])
- """
- _parameter_constraints: dict = {
- "estimator": [
- HasMethods(["fit", "decision_function"]),
- HasMethods(["fit", "predict_proba"]),
- ],
- "code_size": [Interval(Real, 0.0, None, closed="neither")],
- "random_state": ["random_state"],
- "n_jobs": [Integral, None],
- }
- def __init__(self, estimator, *, code_size=1.5, random_state=None, n_jobs=None):
- self.estimator = estimator
- self.code_size = code_size
- self.random_state = random_state
- self.n_jobs = n_jobs
- @_fit_context(
- # OutputCodeClassifier.estimator is not validated yet
- prefer_skip_nested_validation=False
- )
- def fit(self, X, y):
- """Fit underlying estimators.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Data.
- y : array-like of shape (n_samples,)
- Multi-class targets.
- Returns
- -------
- self : object
- Returns a fitted instance of self.
- """
- y = self._validate_data(X="no_validation", y=y)
- random_state = check_random_state(self.random_state)
- check_classification_targets(y)
- self.classes_ = np.unique(y)
- n_classes = self.classes_.shape[0]
- if n_classes == 0:
- raise ValueError(
- "OutputCodeClassifier can not be fit when no class is present."
- )
- code_size_ = int(n_classes * self.code_size)
- # FIXME: there are more elaborate methods than generating the codebook
- # randomly.
- self.code_book_ = random_state.uniform(size=(n_classes, code_size_))
- self.code_book_[self.code_book_ > 0.5] = 1.0
- if hasattr(self.estimator, "decision_function"):
- self.code_book_[self.code_book_ != 1] = -1.0
- else:
- self.code_book_[self.code_book_ != 1] = 0.0
- classes_index = {c: i for i, c in enumerate(self.classes_)}
- Y = np.array(
- [self.code_book_[classes_index[y[i]]] for i in range(_num_samples(y))],
- dtype=int,
- )
- self.estimators_ = Parallel(n_jobs=self.n_jobs)(
- delayed(_fit_binary)(self.estimator, X, Y[:, i]) for i in range(Y.shape[1])
- )
- if hasattr(self.estimators_[0], "n_features_in_"):
- self.n_features_in_ = self.estimators_[0].n_features_in_
- if hasattr(self.estimators_[0], "feature_names_in_"):
- self.feature_names_in_ = self.estimators_[0].feature_names_in_
- return self
- def predict(self, X):
- """Predict multi-class targets using underlying estimators.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Data.
- Returns
- -------
- y : ndarray of shape (n_samples,)
- Predicted multi-class targets.
- """
- check_is_fitted(self)
- # ArgKmin only accepts C-contiguous array. The aggregated predictions need to be
- # transposed. We therefore create a F-contiguous array to avoid a copy and have
- # a C-contiguous array after the transpose operation.
- Y = np.array(
- [_predict_binary(e, X) for e in self.estimators_],
- order="F",
- dtype=np.float64,
- ).T
- pred = pairwise_distances_argmin(Y, self.code_book_, metric="euclidean")
- return self.classes_[pred]
|