| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273 |
- """Weight Boosting.
- This module contains weight boosting estimators for both classification and
- regression.
- The module structure is the following:
- - The `BaseWeightBoosting` base class implements a common ``fit`` method
- for all the estimators in the module. Regression and classification
- only differ from each other in the loss function that is optimized.
- - :class:`~sklearn.ensemble.AdaBoostClassifier` implements adaptive boosting
- (AdaBoost-SAMME) for classification problems.
- - :class:`~sklearn.ensemble.AdaBoostRegressor` implements adaptive boosting
- (AdaBoost.R2) for regression problems.
- """
- # Authors: Noel Dawe <noel@dawe.me>
- # Gilles Louppe <g.louppe@gmail.com>
- # Hamzeh Alsalhi <ha258@cornell.edu>
- # Arnaud Joly <arnaud.v.joly@gmail.com>
- #
- # License: BSD 3 clause
- import warnings
- from abc import ABCMeta, abstractmethod
- from numbers import Integral, Real
- import numpy as np
- from scipy.special import xlogy
- from ..base import (
- ClassifierMixin,
- RegressorMixin,
- _fit_context,
- is_classifier,
- is_regressor,
- )
- from ..metrics import accuracy_score, r2_score
- from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
- from ..utils import _safe_indexing, check_random_state
- from ..utils._param_validation import HasMethods, Interval, StrOptions
- from ..utils.extmath import softmax, stable_cumsum
- from ..utils.validation import (
- _check_sample_weight,
- _num_samples,
- check_is_fitted,
- has_fit_parameter,
- )
- from ._base import BaseEnsemble
- __all__ = [
- "AdaBoostClassifier",
- "AdaBoostRegressor",
- ]
- class BaseWeightBoosting(BaseEnsemble, metaclass=ABCMeta):
- """Base class for AdaBoost estimators.
- Warning: This class should not be used directly. Use derived classes
- instead.
- """
- _parameter_constraints: dict = {
- "estimator": [HasMethods(["fit", "predict"]), None],
- "n_estimators": [Interval(Integral, 1, None, closed="left")],
- "learning_rate": [Interval(Real, 0, None, closed="neither")],
- "random_state": ["random_state"],
- "base_estimator": [
- HasMethods(["fit", "predict"]),
- StrOptions({"deprecated"}),
- None,
- ],
- }
- @abstractmethod
- def __init__(
- self,
- estimator=None,
- *,
- n_estimators=50,
- estimator_params=tuple(),
- learning_rate=1.0,
- random_state=None,
- base_estimator="deprecated",
- ):
- super().__init__(
- estimator=estimator,
- n_estimators=n_estimators,
- estimator_params=estimator_params,
- base_estimator=base_estimator,
- )
- self.learning_rate = learning_rate
- self.random_state = random_state
- def _check_X(self, X):
- # Only called to validate X in non-fit methods, therefore reset=False
- return self._validate_data(
- X,
- accept_sparse=["csr", "csc"],
- ensure_2d=True,
- allow_nd=True,
- dtype=None,
- reset=False,
- )
- @_fit_context(
- # AdaBoost*.estimator is not validated yet
- prefer_skip_nested_validation=False
- )
- def fit(self, X, y, sample_weight=None):
- """Build a boosted classifier/regressor from the training set (X, y).
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- y : array-like of shape (n_samples,)
- The target values.
- sample_weight : array-like of shape (n_samples,), default=None
- Sample weights. If None, the sample weights are initialized to
- 1 / n_samples.
- Returns
- -------
- self : object
- Fitted estimator.
- """
- X, y = self._validate_data(
- X,
- y,
- accept_sparse=["csr", "csc"],
- ensure_2d=True,
- allow_nd=True,
- dtype=None,
- y_numeric=is_regressor(self),
- )
- sample_weight = _check_sample_weight(
- sample_weight, X, np.float64, copy=True, only_non_negative=True
- )
- sample_weight /= sample_weight.sum()
- # Check parameters
- self._validate_estimator()
- # Clear any previous fit results
- self.estimators_ = []
- self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)
- self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64)
- # Initialization of the random number instance that will be used to
- # generate a seed at each iteration
- random_state = check_random_state(self.random_state)
- epsilon = np.finfo(sample_weight.dtype).eps
- zero_weight_mask = sample_weight == 0.0
- for iboost in range(self.n_estimators):
- # avoid extremely small sample weight, for details see issue #20320
- sample_weight = np.clip(sample_weight, a_min=epsilon, a_max=None)
- # do not clip sample weights that were exactly zero originally
- sample_weight[zero_weight_mask] = 0.0
- # Boosting step
- sample_weight, estimator_weight, estimator_error = self._boost(
- iboost, X, y, sample_weight, random_state
- )
- # Early termination
- if sample_weight is None:
- break
- self.estimator_weights_[iboost] = estimator_weight
- self.estimator_errors_[iboost] = estimator_error
- # Stop if error is zero
- if estimator_error == 0:
- break
- sample_weight_sum = np.sum(sample_weight)
- if not np.isfinite(sample_weight_sum):
- warnings.warn(
- (
- "Sample weights have reached infinite values,"
- f" at iteration {iboost}, causing overflow. "
- "Iterations stopped. Try lowering the learning rate."
- ),
- stacklevel=2,
- )
- break
- # Stop if the sum of sample weights has become non-positive
- if sample_weight_sum <= 0:
- break
- if iboost < self.n_estimators - 1:
- # Normalize
- sample_weight /= sample_weight_sum
- return self
- @abstractmethod
- def _boost(self, iboost, X, y, sample_weight, random_state):
- """Implement a single boost.
- Warning: This method needs to be overridden by subclasses.
- Parameters
- ----------
- iboost : int
- The index of the current boost iteration.
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- y : array-like of shape (n_samples,)
- The target values (class labels).
- sample_weight : array-like of shape (n_samples,)
- The current sample weights.
- random_state : RandomState
- The current random number generator
- Returns
- -------
- sample_weight : array-like of shape (n_samples,) or None
- The reweighted sample weights.
- If None then boosting has terminated early.
- estimator_weight : float
- The weight for the current boost.
- If None then boosting has terminated early.
- error : float
- The classification error for the current boost.
- If None then boosting has terminated early.
- """
- pass
- def staged_score(self, X, y, sample_weight=None):
- """Return staged scores for X, y.
- This generator method yields the ensemble score after each iteration of
- boosting and therefore allows monitoring, such as to determine the
- score on a test set after each boost.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- y : array-like of shape (n_samples,)
- Labels for X.
- sample_weight : array-like of shape (n_samples,), default=None
- Sample weights.
- Yields
- ------
- z : float
- """
- X = self._check_X(X)
- for y_pred in self.staged_predict(X):
- if is_classifier(self):
- yield accuracy_score(y, y_pred, sample_weight=sample_weight)
- else:
- yield r2_score(y, y_pred, sample_weight=sample_weight)
- @property
- def feature_importances_(self):
- """The impurity-based feature importances.
- The higher, the more important the feature.
- The importance of a feature is computed as the (normalized)
- total reduction of the criterion brought by that feature. It is also
- known as the Gini importance.
- Warning: impurity-based feature importances can be misleading for
- high cardinality features (many unique values). See
- :func:`sklearn.inspection.permutation_importance` as an alternative.
- Returns
- -------
- feature_importances_ : ndarray of shape (n_features,)
- The feature importances.
- """
- if self.estimators_ is None or len(self.estimators_) == 0:
- raise ValueError(
- "Estimator not fitted, call `fit` before `feature_importances_`."
- )
- try:
- norm = self.estimator_weights_.sum()
- return (
- sum(
- weight * clf.feature_importances_
- for weight, clf in zip(self.estimator_weights_, self.estimators_)
- )
- / norm
- )
- except AttributeError as e:
- raise AttributeError(
- "Unable to compute feature importances "
- "since estimator does not have a "
- "feature_importances_ attribute"
- ) from e
- def _samme_proba(estimator, n_classes, X):
- """Calculate algorithm 4, step 2, equation c) of Zhu et al [1].
- References
- ----------
- .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009.
- """
- proba = estimator.predict_proba(X)
- # Displace zero probabilities so the log is defined.
- # Also fix negative elements which may occur with
- # negative sample weights.
- np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)
- log_proba = np.log(proba)
- return (n_classes - 1) * (
- log_proba - (1.0 / n_classes) * log_proba.sum(axis=1)[:, np.newaxis]
- )
- class AdaBoostClassifier(ClassifierMixin, BaseWeightBoosting):
- """An AdaBoost classifier.
- An AdaBoost [1] classifier is a meta-estimator that begins by fitting a
- classifier on the original dataset and then fits additional copies of the
- classifier on the same dataset but where the weights of incorrectly
- classified instances are adjusted such that subsequent classifiers focus
- more on difficult cases.
- This class implements the algorithm known as AdaBoost-SAMME [2].
- Read more in the :ref:`User Guide <adaboost>`.
- .. versionadded:: 0.14
- Parameters
- ----------
- estimator : object, default=None
- The base estimator from which the boosted ensemble is built.
- Support for sample weighting is required, as well as proper
- ``classes_`` and ``n_classes_`` attributes. If ``None``, then
- the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`
- initialized with `max_depth=1`.
- .. versionadded:: 1.2
- `base_estimator` was renamed to `estimator`.
- n_estimators : int, default=50
- The maximum number of estimators at which boosting is terminated.
- In case of perfect fit, the learning procedure is stopped early.
- Values must be in the range `[1, inf)`.
- learning_rate : float, default=1.0
- Weight applied to each classifier at each boosting iteration. A higher
- learning rate increases the contribution of each classifier. There is
- a trade-off between the `learning_rate` and `n_estimators` parameters.
- Values must be in the range `(0.0, inf)`.
- algorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'
- If 'SAMME.R' then use the SAMME.R real boosting algorithm.
- ``estimator`` must support calculation of class probabilities.
- If 'SAMME' then use the SAMME discrete boosting algorithm.
- The SAMME.R algorithm typically converges faster than SAMME,
- achieving a lower test error with fewer boosting iterations.
- random_state : int, RandomState instance or None, default=None
- Controls the random seed given at each `estimator` at each
- boosting iteration.
- Thus, it is only used when `estimator` exposes a `random_state`.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- base_estimator : object, default=None
- The base estimator from which the boosted ensemble is built.
- Support for sample weighting is required, as well as proper
- ``classes_`` and ``n_classes_`` attributes. If ``None``, then
- the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`
- initialized with `max_depth=1`.
- .. deprecated:: 1.2
- `base_estimator` is deprecated and will be removed in 1.4.
- Use `estimator` instead.
- Attributes
- ----------
- estimator_ : estimator
- The base estimator from which the ensemble is grown.
- .. versionadded:: 1.2
- `base_estimator_` was renamed to `estimator_`.
- base_estimator_ : estimator
- The base estimator from which the ensemble is grown.
- .. deprecated:: 1.2
- `base_estimator_` is deprecated and will be removed in 1.4.
- Use `estimator_` instead.
- estimators_ : list of classifiers
- The collection of fitted sub-estimators.
- classes_ : ndarray of shape (n_classes,)
- The classes labels.
- n_classes_ : int
- The number of classes.
- estimator_weights_ : ndarray of floats
- Weights for each estimator in the boosted ensemble.
- estimator_errors_ : ndarray of floats
- Classification error for each estimator in the boosted
- ensemble.
- feature_importances_ : ndarray of shape (n_features,)
- The impurity-based feature importances if supported by the
- ``estimator`` (when based on decision trees).
- Warning: impurity-based feature importances can be misleading for
- high cardinality features (many unique values). See
- :func:`sklearn.inspection.permutation_importance` as an alternative.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- AdaBoostRegressor : An AdaBoost regressor that begins by fitting a
- regressor on the original dataset and then fits additional copies of
- the regressor on the same dataset but where the weights of instances
- are adjusted according to the error of the current prediction.
- GradientBoostingClassifier : GB builds an additive model in a forward
- stage-wise fashion. Regression trees are fit on the negative gradient
- of the binomial or multinomial deviance loss function. Binary
- classification is a special case where only a single regression tree is
- induced.
- sklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning
- method used for classification.
- Creates a model that predicts the value of a target variable by
- learning simple decision rules inferred from the data features.
- References
- ----------
- .. [1] Y. Freund, R. Schapire, "A Decision-Theoretic Generalization of
- on-Line Learning and an Application to Boosting", 1995.
- .. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009.
- Examples
- --------
- >>> from sklearn.ensemble import AdaBoostClassifier
- >>> from sklearn.datasets import make_classification
- >>> X, y = make_classification(n_samples=1000, n_features=4,
- ... n_informative=2, n_redundant=0,
- ... random_state=0, shuffle=False)
- >>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)
- >>> clf.fit(X, y)
- AdaBoostClassifier(n_estimators=100, random_state=0)
- >>> clf.predict([[0, 0, 0, 0]])
- array([1])
- >>> clf.score(X, y)
- 0.983...
- """
- _parameter_constraints: dict = {
- **BaseWeightBoosting._parameter_constraints,
- "algorithm": [StrOptions({"SAMME", "SAMME.R"})],
- }
- def __init__(
- self,
- estimator=None,
- *,
- n_estimators=50,
- learning_rate=1.0,
- algorithm="SAMME.R",
- random_state=None,
- base_estimator="deprecated",
- ):
- super().__init__(
- estimator=estimator,
- n_estimators=n_estimators,
- learning_rate=learning_rate,
- random_state=random_state,
- base_estimator=base_estimator,
- )
- self.algorithm = algorithm
- def _validate_estimator(self):
- """Check the estimator and set the estimator_ attribute."""
- super()._validate_estimator(default=DecisionTreeClassifier(max_depth=1))
- # SAMME-R requires predict_proba-enabled base estimators
- if self.algorithm == "SAMME.R":
- if not hasattr(self.estimator_, "predict_proba"):
- raise TypeError(
- "AdaBoostClassifier with algorithm='SAMME.R' requires "
- "that the weak learner supports the calculation of class "
- "probabilities with a predict_proba method.\n"
- "Please change the base estimator or set "
- "algorithm='SAMME' instead."
- )
- if not has_fit_parameter(self.estimator_, "sample_weight"):
- raise ValueError(
- f"{self.estimator.__class__.__name__} doesn't support sample_weight."
- )
- def _boost(self, iboost, X, y, sample_weight, random_state):
- """Implement a single boost.
- Perform a single boost according to the real multi-class SAMME.R
- algorithm or to the discrete SAMME algorithm and return the updated
- sample weights.
- Parameters
- ----------
- iboost : int
- The index of the current boost iteration.
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples.
- y : array-like of shape (n_samples,)
- The target values (class labels).
- sample_weight : array-like of shape (n_samples,)
- The current sample weights.
- random_state : RandomState instance
- The RandomState instance used if the base estimator accepts a
- `random_state` attribute.
- Returns
- -------
- sample_weight : array-like of shape (n_samples,) or None
- The reweighted sample weights.
- If None then boosting has terminated early.
- estimator_weight : float
- The weight for the current boost.
- If None then boosting has terminated early.
- estimator_error : float
- The classification error for the current boost.
- If None then boosting has terminated early.
- """
- if self.algorithm == "SAMME.R":
- return self._boost_real(iboost, X, y, sample_weight, random_state)
- else: # elif self.algorithm == "SAMME":
- return self._boost_discrete(iboost, X, y, sample_weight, random_state)
- def _boost_real(self, iboost, X, y, sample_weight, random_state):
- """Implement a single boost using the SAMME.R real algorithm."""
- estimator = self._make_estimator(random_state=random_state)
- estimator.fit(X, y, sample_weight=sample_weight)
- y_predict_proba = estimator.predict_proba(X)
- if iboost == 0:
- self.classes_ = getattr(estimator, "classes_", None)
- self.n_classes_ = len(self.classes_)
- y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1), axis=0)
- # Instances incorrectly classified
- incorrect = y_predict != y
- # Error fraction
- estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))
- # Stop if classification is perfect
- if estimator_error <= 0:
- return sample_weight, 1.0, 0.0
- # Construct y coding as described in Zhu et al [2]:
- #
- # y_k = 1 if c == k else -1 / (K - 1)
- #
- # where K == n_classes_ and c, k in [0, K) are indices along the second
- # axis of the y coding with c being the index corresponding to the true
- # class label.
- n_classes = self.n_classes_
- classes = self.classes_
- y_codes = np.array([-1.0 / (n_classes - 1), 1.0])
- y_coding = y_codes.take(classes == y[:, np.newaxis])
- # Displace zero probabilities so the log is defined.
- # Also fix negative elements which may occur with
- # negative sample weights.
- proba = y_predict_proba # alias for readability
- np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)
- # Boost weight using multi-class AdaBoost SAMME.R alg
- estimator_weight = (
- -1.0
- * self.learning_rate
- * ((n_classes - 1.0) / n_classes)
- * xlogy(y_coding, y_predict_proba).sum(axis=1)
- )
- # Only boost the weights if it will fit again
- if not iboost == self.n_estimators - 1:
- # Only boost positive weights
- sample_weight *= np.exp(
- estimator_weight * ((sample_weight > 0) | (estimator_weight < 0))
- )
- return sample_weight, 1.0, estimator_error
- def _boost_discrete(self, iboost, X, y, sample_weight, random_state):
- """Implement a single boost using the SAMME discrete algorithm."""
- estimator = self._make_estimator(random_state=random_state)
- estimator.fit(X, y, sample_weight=sample_weight)
- y_predict = estimator.predict(X)
- if iboost == 0:
- self.classes_ = getattr(estimator, "classes_", None)
- self.n_classes_ = len(self.classes_)
- # Instances incorrectly classified
- incorrect = y_predict != y
- # Error fraction
- estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0))
- # Stop if classification is perfect
- if estimator_error <= 0:
- return sample_weight, 1.0, 0.0
- n_classes = self.n_classes_
- # Stop if the error is at least as bad as random guessing
- if estimator_error >= 1.0 - (1.0 / n_classes):
- self.estimators_.pop(-1)
- if len(self.estimators_) == 0:
- raise ValueError(
- "BaseClassifier in AdaBoostClassifier "
- "ensemble is worse than random, ensemble "
- "can not be fit."
- )
- return None, None, None
- # Boost weight using multi-class AdaBoost SAMME alg
- estimator_weight = self.learning_rate * (
- np.log((1.0 - estimator_error) / estimator_error) + np.log(n_classes - 1.0)
- )
- # Only boost the weights if it will fit again
- if not iboost == self.n_estimators - 1:
- # Only boost positive weights
- sample_weight = np.exp(
- np.log(sample_weight)
- + estimator_weight * incorrect * (sample_weight > 0)
- )
- return sample_weight, estimator_weight, estimator_error
- def predict(self, X):
- """Predict classes for X.
- The predicted class of an input sample is computed as the weighted mean
- prediction of the classifiers in the ensemble.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- Returns
- -------
- y : ndarray of shape (n_samples,)
- The predicted classes.
- """
- pred = self.decision_function(X)
- if self.n_classes_ == 2:
- return self.classes_.take(pred > 0, axis=0)
- return self.classes_.take(np.argmax(pred, axis=1), axis=0)
- def staged_predict(self, X):
- """Return staged predictions for X.
- The predicted class of an input sample is computed as the weighted mean
- prediction of the classifiers in the ensemble.
- This generator method yields the ensemble prediction after each
- iteration of boosting and therefore allows monitoring, such as to
- determine the prediction on a test set after each boost.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- The input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- Yields
- ------
- y : generator of ndarray of shape (n_samples,)
- The predicted classes.
- """
- X = self._check_X(X)
- n_classes = self.n_classes_
- classes = self.classes_
- if n_classes == 2:
- for pred in self.staged_decision_function(X):
- yield np.array(classes.take(pred > 0, axis=0))
- else:
- for pred in self.staged_decision_function(X):
- yield np.array(classes.take(np.argmax(pred, axis=1), axis=0))
- def decision_function(self, X):
- """Compute the decision function of ``X``.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- Returns
- -------
- score : ndarray of shape of (n_samples, k)
- The decision function of the input samples. The order of
- outputs is the same as that of the :term:`classes_` attribute.
- Binary classification is a special cases with ``k == 1``,
- otherwise ``k==n_classes``. For binary classification,
- values closer to -1 or 1 mean more like the first or second
- class in ``classes_``, respectively.
- """
- check_is_fitted(self)
- X = self._check_X(X)
- n_classes = self.n_classes_
- classes = self.classes_[:, np.newaxis]
- if self.algorithm == "SAMME.R":
- # The weights are all 1. for SAMME.R
- pred = sum(
- _samme_proba(estimator, n_classes, X) for estimator in self.estimators_
- )
- else: # self.algorithm == "SAMME"
- pred = sum(
- np.where(
- (estimator.predict(X) == classes).T,
- w,
- -1 / (n_classes - 1) * w,
- )
- for estimator, w in zip(self.estimators_, self.estimator_weights_)
- )
- pred /= self.estimator_weights_.sum()
- if n_classes == 2:
- pred[:, 0] *= -1
- return pred.sum(axis=1)
- return pred
- def staged_decision_function(self, X):
- """Compute decision function of ``X`` for each boosting iteration.
- This method allows monitoring (i.e. determine error on testing set)
- after each boosting iteration.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- Yields
- ------
- score : generator of ndarray of shape (n_samples, k)
- The decision function of the input samples. The order of
- outputs is the same of that of the :term:`classes_` attribute.
- Binary classification is a special cases with ``k == 1``,
- otherwise ``k==n_classes``. For binary classification,
- values closer to -1 or 1 mean more like the first or second
- class in ``classes_``, respectively.
- """
- check_is_fitted(self)
- X = self._check_X(X)
- n_classes = self.n_classes_
- classes = self.classes_[:, np.newaxis]
- pred = None
- norm = 0.0
- for weight, estimator in zip(self.estimator_weights_, self.estimators_):
- norm += weight
- if self.algorithm == "SAMME.R":
- # The weights are all 1. for SAMME.R
- current_pred = _samme_proba(estimator, n_classes, X)
- else: # elif self.algorithm == "SAMME":
- current_pred = np.where(
- (estimator.predict(X) == classes).T,
- weight,
- -1 / (n_classes - 1) * weight,
- )
- if pred is None:
- pred = current_pred
- else:
- pred += current_pred
- if n_classes == 2:
- tmp_pred = np.copy(pred)
- tmp_pred[:, 0] *= -1
- yield (tmp_pred / norm).sum(axis=1)
- else:
- yield pred / norm
- @staticmethod
- def _compute_proba_from_decision(decision, n_classes):
- """Compute probabilities from the decision function.
- This is based eq. (4) of [1] where:
- p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))
- = softmax((1 / K-1) * f(X))
- References
- ----------
- .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost",
- 2009.
- """
- if n_classes == 2:
- decision = np.vstack([-decision, decision]).T / 2
- else:
- decision /= n_classes - 1
- return softmax(decision, copy=False)
- def predict_proba(self, X):
- """Predict class probabilities for X.
- The predicted class probabilities of an input sample is computed as
- the weighted mean predicted class probabilities of the classifiers
- in the ensemble.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- Returns
- -------
- p : ndarray of shape (n_samples, n_classes)
- The class probabilities of the input samples. The order of
- outputs is the same of that of the :term:`classes_` attribute.
- """
- check_is_fitted(self)
- n_classes = self.n_classes_
- if n_classes == 1:
- return np.ones((_num_samples(X), 1))
- decision = self.decision_function(X)
- return self._compute_proba_from_decision(decision, n_classes)
- def staged_predict_proba(self, X):
- """Predict class probabilities for X.
- The predicted class probabilities of an input sample is computed as
- the weighted mean predicted class probabilities of the classifiers
- in the ensemble.
- This generator method yields the ensemble predicted class probabilities
- after each iteration of boosting and therefore allows monitoring, such
- as to determine the predicted class probabilities on a test set after
- each boost.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- Yields
- ------
- p : generator of ndarray of shape (n_samples,)
- The class probabilities of the input samples. The order of
- outputs is the same of that of the :term:`classes_` attribute.
- """
- n_classes = self.n_classes_
- for decision in self.staged_decision_function(X):
- yield self._compute_proba_from_decision(decision, n_classes)
- def predict_log_proba(self, X):
- """Predict class log-probabilities for X.
- The predicted class log-probabilities of an input sample is computed as
- the weighted mean predicted class log-probabilities of the classifiers
- in the ensemble.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- Returns
- -------
- p : ndarray of shape (n_samples, n_classes)
- The class probabilities of the input samples. The order of
- outputs is the same of that of the :term:`classes_` attribute.
- """
- return np.log(self.predict_proba(X))
- class AdaBoostRegressor(RegressorMixin, BaseWeightBoosting):
- """An AdaBoost regressor.
- An AdaBoost [1] regressor is a meta-estimator that begins by fitting a
- regressor on the original dataset and then fits additional copies of the
- regressor on the same dataset but where the weights of instances are
- adjusted according to the error of the current prediction. As such,
- subsequent regressors focus more on difficult cases.
- This class implements the algorithm known as AdaBoost.R2 [2].
- Read more in the :ref:`User Guide <adaboost>`.
- .. versionadded:: 0.14
- Parameters
- ----------
- estimator : object, default=None
- The base estimator from which the boosted ensemble is built.
- If ``None``, then the base estimator is
- :class:`~sklearn.tree.DecisionTreeRegressor` initialized with
- `max_depth=3`.
- .. versionadded:: 1.2
- `base_estimator` was renamed to `estimator`.
- n_estimators : int, default=50
- The maximum number of estimators at which boosting is terminated.
- In case of perfect fit, the learning procedure is stopped early.
- Values must be in the range `[1, inf)`.
- learning_rate : float, default=1.0
- Weight applied to each regressor at each boosting iteration. A higher
- learning rate increases the contribution of each regressor. There is
- a trade-off between the `learning_rate` and `n_estimators` parameters.
- Values must be in the range `(0.0, inf)`.
- loss : {'linear', 'square', 'exponential'}, default='linear'
- The loss function to use when updating the weights after each
- boosting iteration.
- random_state : int, RandomState instance or None, default=None
- Controls the random seed given at each `estimator` at each
- boosting iteration.
- Thus, it is only used when `estimator` exposes a `random_state`.
- In addition, it controls the bootstrap of the weights used to train the
- `estimator` at each boosting iteration.
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- base_estimator : object, default=None
- The base estimator from which the boosted ensemble is built.
- If ``None``, then the base estimator is
- :class:`~sklearn.tree.DecisionTreeRegressor` initialized with
- `max_depth=3`.
- .. deprecated:: 1.2
- `base_estimator` is deprecated and will be removed in 1.4.
- Use `estimator` instead.
- Attributes
- ----------
- estimator_ : estimator
- The base estimator from which the ensemble is grown.
- .. versionadded:: 1.2
- `base_estimator_` was renamed to `estimator_`.
- base_estimator_ : estimator
- The base estimator from which the ensemble is grown.
- .. deprecated:: 1.2
- `base_estimator_` is deprecated and will be removed in 1.4.
- Use `estimator_` instead.
- estimators_ : list of regressors
- The collection of fitted sub-estimators.
- estimator_weights_ : ndarray of floats
- Weights for each estimator in the boosted ensemble.
- estimator_errors_ : ndarray of floats
- Regression error for each estimator in the boosted ensemble.
- feature_importances_ : ndarray of shape (n_features,)
- The impurity-based feature importances if supported by the
- ``estimator`` (when based on decision trees).
- Warning: impurity-based feature importances can be misleading for
- high cardinality features (many unique values). See
- :func:`sklearn.inspection.permutation_importance` as an alternative.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- AdaBoostClassifier : An AdaBoost classifier.
- GradientBoostingRegressor : Gradient Boosting Classification Tree.
- sklearn.tree.DecisionTreeRegressor : A decision tree regressor.
- References
- ----------
- .. [1] Y. Freund, R. Schapire, "A Decision-Theoretic Generalization of
- on-Line Learning and an Application to Boosting", 1995.
- .. [2] H. Drucker, "Improving Regressors using Boosting Techniques", 1997.
- Examples
- --------
- >>> from sklearn.ensemble import AdaBoostRegressor
- >>> from sklearn.datasets import make_regression
- >>> X, y = make_regression(n_features=4, n_informative=2,
- ... random_state=0, shuffle=False)
- >>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)
- >>> regr.fit(X, y)
- AdaBoostRegressor(n_estimators=100, random_state=0)
- >>> regr.predict([[0, 0, 0, 0]])
- array([4.7972...])
- >>> regr.score(X, y)
- 0.9771...
- """
- _parameter_constraints: dict = {
- **BaseWeightBoosting._parameter_constraints,
- "loss": [StrOptions({"linear", "square", "exponential"})],
- }
- def __init__(
- self,
- estimator=None,
- *,
- n_estimators=50,
- learning_rate=1.0,
- loss="linear",
- random_state=None,
- base_estimator="deprecated",
- ):
- super().__init__(
- estimator=estimator,
- n_estimators=n_estimators,
- learning_rate=learning_rate,
- random_state=random_state,
- base_estimator=base_estimator,
- )
- self.loss = loss
- self.random_state = random_state
- def _validate_estimator(self):
- """Check the estimator and set the estimator_ attribute."""
- super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3))
- def _boost(self, iboost, X, y, sample_weight, random_state):
- """Implement a single boost for regression
- Perform a single boost according to the AdaBoost.R2 algorithm and
- return the updated sample weights.
- Parameters
- ----------
- iboost : int
- The index of the current boost iteration.
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples.
- y : array-like of shape (n_samples,)
- The target values (class labels in classification, real numbers in
- regression).
- sample_weight : array-like of shape (n_samples,)
- The current sample weights.
- random_state : RandomState
- The RandomState instance used if the base estimator accepts a
- `random_state` attribute.
- Controls also the bootstrap of the weights used to train the weak
- learner.
- replacement.
- Returns
- -------
- sample_weight : array-like of shape (n_samples,) or None
- The reweighted sample weights.
- If None then boosting has terminated early.
- estimator_weight : float
- The weight for the current boost.
- If None then boosting has terminated early.
- estimator_error : float
- The regression error for the current boost.
- If None then boosting has terminated early.
- """
- estimator = self._make_estimator(random_state=random_state)
- # Weighted sampling of the training set with replacement
- bootstrap_idx = random_state.choice(
- np.arange(_num_samples(X)),
- size=_num_samples(X),
- replace=True,
- p=sample_weight,
- )
- # Fit on the bootstrapped sample and obtain a prediction
- # for all samples in the training set
- X_ = _safe_indexing(X, bootstrap_idx)
- y_ = _safe_indexing(y, bootstrap_idx)
- estimator.fit(X_, y_)
- y_predict = estimator.predict(X)
- error_vect = np.abs(y_predict - y)
- sample_mask = sample_weight > 0
- masked_sample_weight = sample_weight[sample_mask]
- masked_error_vector = error_vect[sample_mask]
- error_max = masked_error_vector.max()
- if error_max != 0:
- masked_error_vector /= error_max
- if self.loss == "square":
- masked_error_vector **= 2
- elif self.loss == "exponential":
- masked_error_vector = 1.0 - np.exp(-masked_error_vector)
- # Calculate the average loss
- estimator_error = (masked_sample_weight * masked_error_vector).sum()
- if estimator_error <= 0:
- # Stop if fit is perfect
- return sample_weight, 1.0, 0.0
- elif estimator_error >= 0.5:
- # Discard current estimator only if it isn't the only one
- if len(self.estimators_) > 1:
- self.estimators_.pop(-1)
- return None, None, None
- beta = estimator_error / (1.0 - estimator_error)
- # Boost weight using AdaBoost.R2 alg
- estimator_weight = self.learning_rate * np.log(1.0 / beta)
- if not iboost == self.n_estimators - 1:
- sample_weight[sample_mask] *= np.power(
- beta, (1.0 - masked_error_vector) * self.learning_rate
- )
- return sample_weight, estimator_weight, estimator_error
- def _get_median_predict(self, X, limit):
- # Evaluate predictions of all estimators
- predictions = np.array([est.predict(X) for est in self.estimators_[:limit]]).T
- # Sort the predictions
- sorted_idx = np.argsort(predictions, axis=1)
- # Find index of median prediction for each sample
- weight_cdf = stable_cumsum(self.estimator_weights_[sorted_idx], axis=1)
- median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis]
- median_idx = median_or_above.argmax(axis=1)
- median_estimators = sorted_idx[np.arange(_num_samples(X)), median_idx]
- # Return median predictions
- return predictions[np.arange(_num_samples(X)), median_estimators]
- def predict(self, X):
- """Predict regression value for X.
- The predicted regression value of an input sample is computed
- as the weighted median prediction of the regressors in the ensemble.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples. Sparse matrix can be CSC, CSR, COO,
- DOK, or LIL. COO, DOK, and LIL are converted to CSR.
- Returns
- -------
- y : ndarray of shape (n_samples,)
- The predicted regression values.
- """
- check_is_fitted(self)
- X = self._check_X(X)
- return self._get_median_predict(X, len(self.estimators_))
- def staged_predict(self, X):
- """Return staged predictions for X.
- The predicted regression value of an input sample is computed
- as the weighted median prediction of the regressors in the ensemble.
- This generator method yields the ensemble prediction after each
- iteration of boosting and therefore allows monitoring, such as to
- determine the prediction on a test set after each boost.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- The training input samples.
- Yields
- ------
- y : generator of ndarray of shape (n_samples,)
- The predicted regression values.
- """
- check_is_fitted(self)
- X = self._check_X(X)
- for i, _ in enumerate(self.estimators_, 1):
- yield self._get_median_predict(X, limit=i)
|