multioutput.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173
  1. """
  2. This module implements multioutput regression and classification.
  3. The estimators provided in this module are meta-estimators: they require
  4. a base estimator to be provided in their constructor. The meta-estimator
  5. extends single output estimators to multioutput estimators.
  6. """
  7. # Author: Tim Head <betatim@gmail.com>
  8. # Author: Hugo Bowne-Anderson <hugobowne@gmail.com>
  9. # Author: Chris Rivera <chris.richard.rivera@gmail.com>
  10. # Author: Michael Williamson
  11. # Author: James Ashton Nichols <james.ashton.nichols@gmail.com>
  12. #
  13. # License: BSD 3 clause
  14. from abc import ABCMeta, abstractmethod
  15. from numbers import Integral
  16. import numpy as np
  17. import scipy.sparse as sp
  18. from .base import (
  19. BaseEstimator,
  20. ClassifierMixin,
  21. MetaEstimatorMixin,
  22. RegressorMixin,
  23. _fit_context,
  24. clone,
  25. is_classifier,
  26. )
  27. from .model_selection import cross_val_predict
  28. from .utils import Bunch, _print_elapsed_time, check_random_state
  29. from .utils._param_validation import HasMethods, StrOptions
  30. from .utils.metadata_routing import (
  31. MetadataRouter,
  32. MethodMapping,
  33. _routing_enabled,
  34. process_routing,
  35. )
  36. from .utils.metaestimators import available_if
  37. from .utils.multiclass import check_classification_targets
  38. from .utils.parallel import Parallel, delayed
  39. from .utils.validation import _check_fit_params, check_is_fitted, has_fit_parameter
  40. __all__ = [
  41. "MultiOutputRegressor",
  42. "MultiOutputClassifier",
  43. "ClassifierChain",
  44. "RegressorChain",
  45. ]
  46. def _fit_estimator(estimator, X, y, sample_weight=None, **fit_params):
  47. estimator = clone(estimator)
  48. if sample_weight is not None:
  49. estimator.fit(X, y, sample_weight=sample_weight, **fit_params)
  50. else:
  51. estimator.fit(X, y, **fit_params)
  52. return estimator
  53. def _partial_fit_estimator(
  54. estimator, X, y, classes=None, partial_fit_params=None, first_time=True
  55. ):
  56. partial_fit_params = {} if partial_fit_params is None else partial_fit_params
  57. if first_time:
  58. estimator = clone(estimator)
  59. if classes is not None:
  60. estimator.partial_fit(X, y, classes=classes, **partial_fit_params)
  61. else:
  62. estimator.partial_fit(X, y, **partial_fit_params)
  63. return estimator
  64. def _available_if_estimator_has(attr):
  65. """Return a function to check if the sub-estimator(s) has(have) `attr`.
  66. Helper for Chain implementations.
  67. """
  68. def _check(self):
  69. if hasattr(self, "estimators_"):
  70. return all(hasattr(est, attr) for est in self.estimators_)
  71. if hasattr(self.estimator, attr):
  72. return True
  73. return False
  74. return available_if(_check)
  75. class _MultiOutputEstimator(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):
  76. _parameter_constraints: dict = {
  77. "estimator": [HasMethods(["fit", "predict"])],
  78. "n_jobs": [Integral, None],
  79. }
  80. @abstractmethod
  81. def __init__(self, estimator, *, n_jobs=None):
  82. self.estimator = estimator
  83. self.n_jobs = n_jobs
  84. @_available_if_estimator_has("partial_fit")
  85. @_fit_context(
  86. # MultiOutput*.estimator is not validated yet
  87. prefer_skip_nested_validation=False
  88. )
  89. def partial_fit(self, X, y, classes=None, sample_weight=None, **partial_fit_params):
  90. """Incrementally fit a separate model for each class output.
  91. Parameters
  92. ----------
  93. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  94. The input data.
  95. y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
  96. Multi-output targets.
  97. classes : list of ndarray of shape (n_outputs,), default=None
  98. Each array is unique classes for one output in str/int.
  99. Can be obtained via
  100. ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where `y`
  101. is the target matrix of the entire dataset.
  102. This argument is required for the first call to partial_fit
  103. and can be omitted in the subsequent calls.
  104. Note that `y` doesn't need to contain all labels in `classes`.
  105. sample_weight : array-like of shape (n_samples,), default=None
  106. Sample weights. If `None`, then samples are equally weighted.
  107. Only supported if the underlying regressor supports sample
  108. weights.
  109. **partial_fit_params : dict of str -> object
  110. Parameters passed to the ``estimator.partial_fit`` method of each
  111. sub-estimator.
  112. Only available if `enable_metadata_routing=True`. See the
  113. :ref:`User Guide <metadata_routing>`.
  114. .. versionadded:: 1.3
  115. Returns
  116. -------
  117. self : object
  118. Returns a fitted instance.
  119. """
  120. if partial_fit_params and not _routing_enabled():
  121. raise ValueError(
  122. "partial_fit_params is only supported if enable_metadata_routing=True."
  123. " See the User Guide for more information."
  124. )
  125. first_time = not hasattr(self, "estimators_")
  126. y = self._validate_data(X="no_validation", y=y, multi_output=True)
  127. if y.ndim == 1:
  128. raise ValueError(
  129. "y must have at least two dimensions for "
  130. "multi-output regression but has only one."
  131. )
  132. if _routing_enabled():
  133. routed_params = process_routing(
  134. obj=self,
  135. method="partial_fit",
  136. other_params=partial_fit_params,
  137. sample_weight=sample_weight,
  138. )
  139. else:
  140. if sample_weight is not None and not has_fit_parameter(
  141. self.estimator, "sample_weight"
  142. ):
  143. raise ValueError(
  144. "Underlying estimator does not support sample weights."
  145. )
  146. if sample_weight is not None:
  147. routed_params = Bunch(
  148. estimator=Bunch(partial_fit=Bunch(sample_weight=sample_weight))
  149. )
  150. else:
  151. routed_params = Bunch(estimator=Bunch(partial_fit=Bunch()))
  152. self.estimators_ = Parallel(n_jobs=self.n_jobs)(
  153. delayed(_partial_fit_estimator)(
  154. self.estimators_[i] if not first_time else self.estimator,
  155. X,
  156. y[:, i],
  157. classes[i] if classes is not None else None,
  158. partial_fit_params=routed_params.estimator.partial_fit,
  159. first_time=first_time,
  160. )
  161. for i in range(y.shape[1])
  162. )
  163. if first_time and hasattr(self.estimators_[0], "n_features_in_"):
  164. self.n_features_in_ = self.estimators_[0].n_features_in_
  165. if first_time and hasattr(self.estimators_[0], "feature_names_in_"):
  166. self.feature_names_in_ = self.estimators_[0].feature_names_in_
  167. return self
  168. @_fit_context(
  169. # MultiOutput*.estimator is not validated yet
  170. prefer_skip_nested_validation=False
  171. )
  172. def fit(self, X, y, sample_weight=None, **fit_params):
  173. """Fit the model to data, separately for each output variable.
  174. Parameters
  175. ----------
  176. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  177. The input data.
  178. y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
  179. Multi-output targets. An indicator matrix turns on multilabel
  180. estimation.
  181. sample_weight : array-like of shape (n_samples,), default=None
  182. Sample weights. If `None`, then samples are equally weighted.
  183. Only supported if the underlying regressor supports sample
  184. weights.
  185. **fit_params : dict of string -> object
  186. Parameters passed to the ``estimator.fit`` method of each step.
  187. .. versionadded:: 0.23
  188. Returns
  189. -------
  190. self : object
  191. Returns a fitted instance.
  192. """
  193. if not hasattr(self.estimator, "fit"):
  194. raise ValueError("The base estimator should implement a fit method")
  195. y = self._validate_data(X="no_validation", y=y, multi_output=True)
  196. if is_classifier(self):
  197. check_classification_targets(y)
  198. if y.ndim == 1:
  199. raise ValueError(
  200. "y must have at least two dimensions for "
  201. "multi-output regression but has only one."
  202. )
  203. if _routing_enabled():
  204. routed_params = process_routing(
  205. obj=self,
  206. method="fit",
  207. other_params=fit_params,
  208. sample_weight=sample_weight,
  209. )
  210. else:
  211. if sample_weight is not None and not has_fit_parameter(
  212. self.estimator, "sample_weight"
  213. ):
  214. raise ValueError(
  215. "Underlying estimator does not support sample weights."
  216. )
  217. fit_params_validated = _check_fit_params(X, fit_params)
  218. routed_params = Bunch(estimator=Bunch(fit=fit_params_validated))
  219. if sample_weight is not None:
  220. routed_params.estimator.fit["sample_weight"] = sample_weight
  221. self.estimators_ = Parallel(n_jobs=self.n_jobs)(
  222. delayed(_fit_estimator)(
  223. self.estimator, X, y[:, i], **routed_params.estimator.fit
  224. )
  225. for i in range(y.shape[1])
  226. )
  227. if hasattr(self.estimators_[0], "n_features_in_"):
  228. self.n_features_in_ = self.estimators_[0].n_features_in_
  229. if hasattr(self.estimators_[0], "feature_names_in_"):
  230. self.feature_names_in_ = self.estimators_[0].feature_names_in_
  231. return self
  232. def predict(self, X):
  233. """Predict multi-output variable using model for each target variable.
  234. Parameters
  235. ----------
  236. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  237. The input data.
  238. Returns
  239. -------
  240. y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
  241. Multi-output targets predicted across multiple predictors.
  242. Note: Separate models are generated for each predictor.
  243. """
  244. check_is_fitted(self)
  245. if not hasattr(self.estimators_[0], "predict"):
  246. raise ValueError("The base estimator should implement a predict method")
  247. y = Parallel(n_jobs=self.n_jobs)(
  248. delayed(e.predict)(X) for e in self.estimators_
  249. )
  250. return np.asarray(y).T
  251. def _more_tags(self):
  252. return {"multioutput_only": True}
  253. def get_metadata_routing(self):
  254. """Get metadata routing of this object.
  255. Please check :ref:`User Guide <metadata_routing>` on how the routing
  256. mechanism works.
  257. .. versionadded:: 1.3
  258. Returns
  259. -------
  260. routing : MetadataRouter
  261. A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
  262. routing information.
  263. """
  264. router = MetadataRouter(owner=self.__class__.__name__).add(
  265. estimator=self.estimator,
  266. method_mapping=MethodMapping()
  267. .add(callee="partial_fit", caller="partial_fit")
  268. .add(callee="fit", caller="fit"),
  269. )
  270. return router
  271. class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator):
  272. """Multi target regression.
  273. This strategy consists of fitting one regressor per target. This is a
  274. simple strategy for extending regressors that do not natively support
  275. multi-target regression.
  276. .. versionadded:: 0.18
  277. Parameters
  278. ----------
  279. estimator : estimator object
  280. An estimator object implementing :term:`fit` and :term:`predict`.
  281. n_jobs : int or None, optional (default=None)
  282. The number of jobs to run in parallel.
  283. :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported
  284. by the passed estimator) will be parallelized for each target.
  285. When individual estimators are fast to train or predict,
  286. using ``n_jobs > 1`` can result in slower performance due
  287. to the parallelism overhead.
  288. ``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.
  289. ``-1`` means using all available processes / threads.
  290. See :term:`Glossary <n_jobs>` for more details.
  291. .. versionchanged:: 0.20
  292. `n_jobs` default changed from `1` to `None`.
  293. Attributes
  294. ----------
  295. estimators_ : list of ``n_output`` estimators
  296. Estimators used for predictions.
  297. n_features_in_ : int
  298. Number of features seen during :term:`fit`. Only defined if the
  299. underlying `estimator` exposes such an attribute when fit.
  300. .. versionadded:: 0.24
  301. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  302. Names of features seen during :term:`fit`. Only defined if the
  303. underlying estimators expose such an attribute when fit.
  304. .. versionadded:: 1.0
  305. See Also
  306. --------
  307. RegressorChain : A multi-label model that arranges regressions into a
  308. chain.
  309. MultiOutputClassifier : Classifies each output independently rather than
  310. chaining.
  311. Examples
  312. --------
  313. >>> import numpy as np
  314. >>> from sklearn.datasets import load_linnerud
  315. >>> from sklearn.multioutput import MultiOutputRegressor
  316. >>> from sklearn.linear_model import Ridge
  317. >>> X, y = load_linnerud(return_X_y=True)
  318. >>> regr = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)
  319. >>> regr.predict(X[[0]])
  320. array([[176..., 35..., 57...]])
  321. """
  322. def __init__(self, estimator, *, n_jobs=None):
  323. super().__init__(estimator, n_jobs=n_jobs)
  324. @_available_if_estimator_has("partial_fit")
  325. def partial_fit(self, X, y, sample_weight=None, **partial_fit_params):
  326. """Incrementally fit the model to data, for each output variable.
  327. Parameters
  328. ----------
  329. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  330. The input data.
  331. y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
  332. Multi-output targets.
  333. sample_weight : array-like of shape (n_samples,), default=None
  334. Sample weights. If `None`, then samples are equally weighted.
  335. Only supported if the underlying regressor supports sample
  336. weights.
  337. **partial_fit_params : dict of str -> object
  338. Parameters passed to the ``estimator.partial_fit`` method of each
  339. sub-estimator.
  340. Only available if `enable_metadata_routing=True`. See the
  341. :ref:`User Guide <metadata_routing>`.
  342. .. versionadded:: 1.3
  343. Returns
  344. -------
  345. self : object
  346. Returns a fitted instance.
  347. """
  348. super().partial_fit(X, y, sample_weight=sample_weight, **partial_fit_params)
  349. class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator):
  350. """Multi target classification.
  351. This strategy consists of fitting one classifier per target. This is a
  352. simple strategy for extending classifiers that do not natively support
  353. multi-target classification.
  354. Parameters
  355. ----------
  356. estimator : estimator object
  357. An estimator object implementing :term:`fit` and :term:`predict`.
  358. A :term:`predict_proba` method will be exposed only if `estimator` implements
  359. it.
  360. n_jobs : int or None, optional (default=None)
  361. The number of jobs to run in parallel.
  362. :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported
  363. by the passed estimator) will be parallelized for each target.
  364. When individual estimators are fast to train or predict,
  365. using ``n_jobs > 1`` can result in slower performance due
  366. to the parallelism overhead.
  367. ``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.
  368. ``-1`` means using all available processes / threads.
  369. See :term:`Glossary <n_jobs>` for more details.
  370. .. versionchanged:: 0.20
  371. `n_jobs` default changed from `1` to `None`.
  372. Attributes
  373. ----------
  374. classes_ : ndarray of shape (n_classes,)
  375. Class labels.
  376. estimators_ : list of ``n_output`` estimators
  377. Estimators used for predictions.
  378. n_features_in_ : int
  379. Number of features seen during :term:`fit`. Only defined if the
  380. underlying `estimator` exposes such an attribute when fit.
  381. .. versionadded:: 0.24
  382. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  383. Names of features seen during :term:`fit`. Only defined if the
  384. underlying estimators expose such an attribute when fit.
  385. .. versionadded:: 1.0
  386. See Also
  387. --------
  388. ClassifierChain : A multi-label model that arranges binary classifiers
  389. into a chain.
  390. MultiOutputRegressor : Fits one regressor per target variable.
  391. Examples
  392. --------
  393. >>> import numpy as np
  394. >>> from sklearn.datasets import make_multilabel_classification
  395. >>> from sklearn.multioutput import MultiOutputClassifier
  396. >>> from sklearn.linear_model import LogisticRegression
  397. >>> X, y = make_multilabel_classification(n_classes=3, random_state=0)
  398. >>> clf = MultiOutputClassifier(LogisticRegression()).fit(X, y)
  399. >>> clf.predict(X[-2:])
  400. array([[1, 1, 1],
  401. [1, 0, 1]])
  402. """
  403. def __init__(self, estimator, *, n_jobs=None):
  404. super().__init__(estimator, n_jobs=n_jobs)
  405. def fit(self, X, Y, sample_weight=None, **fit_params):
  406. """Fit the model to data matrix X and targets Y.
  407. Parameters
  408. ----------
  409. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  410. The input data.
  411. Y : array-like of shape (n_samples, n_classes)
  412. The target values.
  413. sample_weight : array-like of shape (n_samples,), default=None
  414. Sample weights. If `None`, then samples are equally weighted.
  415. Only supported if the underlying classifier supports sample
  416. weights.
  417. **fit_params : dict of string -> object
  418. Parameters passed to the ``estimator.fit`` method of each step.
  419. .. versionadded:: 0.23
  420. Returns
  421. -------
  422. self : object
  423. Returns a fitted instance.
  424. """
  425. super().fit(X, Y, sample_weight=sample_weight, **fit_params)
  426. self.classes_ = [estimator.classes_ for estimator in self.estimators_]
  427. return self
  428. def _check_predict_proba(self):
  429. if hasattr(self, "estimators_"):
  430. # raise an AttributeError if `predict_proba` does not exist for
  431. # each estimator
  432. [getattr(est, "predict_proba") for est in self.estimators_]
  433. return True
  434. # raise an AttributeError if `predict_proba` does not exist for the
  435. # unfitted estimator
  436. getattr(self.estimator, "predict_proba")
  437. return True
  438. @available_if(_check_predict_proba)
  439. def predict_proba(self, X):
  440. """Return prediction probabilities for each class of each output.
  441. This method will raise a ``ValueError`` if any of the
  442. estimators do not have ``predict_proba``.
  443. Parameters
  444. ----------
  445. X : array-like of shape (n_samples, n_features)
  446. The input data.
  447. Returns
  448. -------
  449. p : array of shape (n_samples, n_classes), or a list of n_outputs \
  450. such arrays if n_outputs > 1.
  451. The class probabilities of the input samples. The order of the
  452. classes corresponds to that in the attribute :term:`classes_`.
  453. .. versionchanged:: 0.19
  454. This function now returns a list of arrays where the length of
  455. the list is ``n_outputs``, and each array is (``n_samples``,
  456. ``n_classes``) for that particular output.
  457. """
  458. check_is_fitted(self)
  459. results = [estimator.predict_proba(X) for estimator in self.estimators_]
  460. return results
  461. def score(self, X, y):
  462. """Return the mean accuracy on the given test data and labels.
  463. Parameters
  464. ----------
  465. X : array-like of shape (n_samples, n_features)
  466. Test samples.
  467. y : array-like of shape (n_samples, n_outputs)
  468. True values for X.
  469. Returns
  470. -------
  471. scores : float
  472. Mean accuracy of predicted target versus true target.
  473. """
  474. check_is_fitted(self)
  475. n_outputs_ = len(self.estimators_)
  476. if y.ndim == 1:
  477. raise ValueError(
  478. "y must have at least two dimensions for "
  479. "multi target classification but has only one"
  480. )
  481. if y.shape[1] != n_outputs_:
  482. raise ValueError(
  483. "The number of outputs of Y for fit {0} and"
  484. " score {1} should be same".format(n_outputs_, y.shape[1])
  485. )
  486. y_pred = self.predict(X)
  487. return np.mean(np.all(y == y_pred, axis=1))
  488. def _more_tags(self):
  489. # FIXME
  490. return {"_skip_test": True}
  491. def _available_if_base_estimator_has(attr):
  492. """Return a function to check if `base_estimator` or `estimators_` has `attr`.
  493. Helper for Chain implementations.
  494. """
  495. def _check(self):
  496. return hasattr(self.base_estimator, attr) or all(
  497. hasattr(est, attr) for est in self.estimators_
  498. )
  499. return available_if(_check)
  500. class _BaseChain(BaseEstimator, metaclass=ABCMeta):
  501. _parameter_constraints: dict = {
  502. "base_estimator": [HasMethods(["fit", "predict"])],
  503. "order": ["array-like", StrOptions({"random"}), None],
  504. "cv": ["cv_object", StrOptions({"prefit"})],
  505. "random_state": ["random_state"],
  506. "verbose": ["boolean"],
  507. }
  508. def __init__(
  509. self, base_estimator, *, order=None, cv=None, random_state=None, verbose=False
  510. ):
  511. self.base_estimator = base_estimator
  512. self.order = order
  513. self.cv = cv
  514. self.random_state = random_state
  515. self.verbose = verbose
  516. def _log_message(self, *, estimator_idx, n_estimators, processing_msg):
  517. if not self.verbose:
  518. return None
  519. return f"({estimator_idx} of {n_estimators}) {processing_msg}"
  520. @abstractmethod
  521. def fit(self, X, Y, **fit_params):
  522. """Fit the model to data matrix X and targets Y.
  523. Parameters
  524. ----------
  525. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  526. The input data.
  527. Y : array-like of shape (n_samples, n_classes)
  528. The target values.
  529. **fit_params : dict of string -> object
  530. Parameters passed to the `fit` method of each step.
  531. .. versionadded:: 0.23
  532. Returns
  533. -------
  534. self : object
  535. Returns a fitted instance.
  536. """
  537. X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True)
  538. random_state = check_random_state(self.random_state)
  539. self.order_ = self.order
  540. if isinstance(self.order_, tuple):
  541. self.order_ = np.array(self.order_)
  542. if self.order_ is None:
  543. self.order_ = np.array(range(Y.shape[1]))
  544. elif isinstance(self.order_, str):
  545. if self.order_ == "random":
  546. self.order_ = random_state.permutation(Y.shape[1])
  547. elif sorted(self.order_) != list(range(Y.shape[1])):
  548. raise ValueError("invalid order")
  549. self.estimators_ = [clone(self.base_estimator) for _ in range(Y.shape[1])]
  550. if self.cv is None:
  551. Y_pred_chain = Y[:, self.order_]
  552. if sp.issparse(X):
  553. X_aug = sp.hstack((X, Y_pred_chain), format="lil")
  554. X_aug = X_aug.tocsr()
  555. else:
  556. X_aug = np.hstack((X, Y_pred_chain))
  557. elif sp.issparse(X):
  558. Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1]))
  559. X_aug = sp.hstack((X, Y_pred_chain), format="lil")
  560. else:
  561. Y_pred_chain = np.zeros((X.shape[0], Y.shape[1]))
  562. X_aug = np.hstack((X, Y_pred_chain))
  563. del Y_pred_chain
  564. if _routing_enabled():
  565. routed_params = process_routing(
  566. obj=self, method="fit", other_params=fit_params
  567. )
  568. else:
  569. routed_params = Bunch(estimator=Bunch(fit=fit_params))
  570. for chain_idx, estimator in enumerate(self.estimators_):
  571. message = self._log_message(
  572. estimator_idx=chain_idx + 1,
  573. n_estimators=len(self.estimators_),
  574. processing_msg=f"Processing order {self.order_[chain_idx]}",
  575. )
  576. y = Y[:, self.order_[chain_idx]]
  577. with _print_elapsed_time("Chain", message):
  578. estimator.fit(
  579. X_aug[:, : (X.shape[1] + chain_idx)],
  580. y,
  581. **routed_params.estimator.fit,
  582. )
  583. if self.cv is not None and chain_idx < len(self.estimators_) - 1:
  584. col_idx = X.shape[1] + chain_idx
  585. cv_result = cross_val_predict(
  586. self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv
  587. )
  588. if sp.issparse(X_aug):
  589. X_aug[:, col_idx] = np.expand_dims(cv_result, 1)
  590. else:
  591. X_aug[:, col_idx] = cv_result
  592. return self
  593. def predict(self, X):
  594. """Predict on the data matrix X using the ClassifierChain model.
  595. Parameters
  596. ----------
  597. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  598. The input data.
  599. Returns
  600. -------
  601. Y_pred : array-like of shape (n_samples, n_classes)
  602. The predicted values.
  603. """
  604. check_is_fitted(self)
  605. X = self._validate_data(X, accept_sparse=True, reset=False)
  606. Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))
  607. for chain_idx, estimator in enumerate(self.estimators_):
  608. previous_predictions = Y_pred_chain[:, :chain_idx]
  609. if sp.issparse(X):
  610. if chain_idx == 0:
  611. X_aug = X
  612. else:
  613. X_aug = sp.hstack((X, previous_predictions))
  614. else:
  615. X_aug = np.hstack((X, previous_predictions))
  616. Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)
  617. inv_order = np.empty_like(self.order_)
  618. inv_order[self.order_] = np.arange(len(self.order_))
  619. Y_pred = Y_pred_chain[:, inv_order]
  620. return Y_pred
  621. class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain):
  622. """A multi-label model that arranges binary classifiers into a chain.
  623. Each model makes a prediction in the order specified by the chain using
  624. all of the available features provided to the model plus the predictions
  625. of models that are earlier in the chain.
  626. For an example of how to use ``ClassifierChain`` and benefit from its
  627. ensemble, see
  628. :ref:`ClassifierChain on a yeast dataset
  629. <sphx_glr_auto_examples_multioutput_plot_classifier_chain_yeast.py>` example.
  630. Read more in the :ref:`User Guide <classifierchain>`.
  631. .. versionadded:: 0.19
  632. Parameters
  633. ----------
  634. base_estimator : estimator
  635. The base estimator from which the classifier chain is built.
  636. order : array-like of shape (n_outputs,) or 'random', default=None
  637. If `None`, the order will be determined by the order of columns in
  638. the label matrix Y.::
  639. order = [0, 1, 2, ..., Y.shape[1] - 1]
  640. The order of the chain can be explicitly set by providing a list of
  641. integers. For example, for a chain of length 5.::
  642. order = [1, 3, 2, 4, 0]
  643. means that the first model in the chain will make predictions for
  644. column 1 in the Y matrix, the second model will make predictions
  645. for column 3, etc.
  646. If order is `random` a random ordering will be used.
  647. cv : int, cross-validation generator or an iterable, default=None
  648. Determines whether to use cross validated predictions or true
  649. labels for the results of previous estimators in the chain.
  650. Possible inputs for cv are:
  651. - None, to use true labels when fitting,
  652. - integer, to specify the number of folds in a (Stratified)KFold,
  653. - :term:`CV splitter`,
  654. - An iterable yielding (train, test) splits as arrays of indices.
  655. random_state : int, RandomState instance or None, optional (default=None)
  656. If ``order='random'``, determines random number generation for the
  657. chain order.
  658. In addition, it controls the random seed given at each `base_estimator`
  659. at each chaining iteration. Thus, it is only used when `base_estimator`
  660. exposes a `random_state`.
  661. Pass an int for reproducible output across multiple function calls.
  662. See :term:`Glossary <random_state>`.
  663. verbose : bool, default=False
  664. If True, chain progress is output as each model is completed.
  665. .. versionadded:: 1.2
  666. Attributes
  667. ----------
  668. classes_ : list
  669. A list of arrays of length ``len(estimators_)`` containing the
  670. class labels for each estimator in the chain.
  671. estimators_ : list
  672. A list of clones of base_estimator.
  673. order_ : list
  674. The order of labels in the classifier chain.
  675. n_features_in_ : int
  676. Number of features seen during :term:`fit`. Only defined if the
  677. underlying `base_estimator` exposes such an attribute when fit.
  678. .. versionadded:: 0.24
  679. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  680. Names of features seen during :term:`fit`. Defined only when `X`
  681. has feature names that are all strings.
  682. .. versionadded:: 1.0
  683. See Also
  684. --------
  685. RegressorChain : Equivalent for regression.
  686. MultiOutputClassifier : Classifies each output independently rather than
  687. chaining.
  688. References
  689. ----------
  690. Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, "Classifier
  691. Chains for Multi-label Classification", 2009.
  692. Examples
  693. --------
  694. >>> from sklearn.datasets import make_multilabel_classification
  695. >>> from sklearn.linear_model import LogisticRegression
  696. >>> from sklearn.model_selection import train_test_split
  697. >>> from sklearn.multioutput import ClassifierChain
  698. >>> X, Y = make_multilabel_classification(
  699. ... n_samples=12, n_classes=3, random_state=0
  700. ... )
  701. >>> X_train, X_test, Y_train, Y_test = train_test_split(
  702. ... X, Y, random_state=0
  703. ... )
  704. >>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)
  705. >>> chain = ClassifierChain(base_lr, order='random', random_state=0)
  706. >>> chain.fit(X_train, Y_train).predict(X_test)
  707. array([[1., 1., 0.],
  708. [1., 0., 0.],
  709. [0., 1., 0.]])
  710. >>> chain.predict_proba(X_test)
  711. array([[0.8387..., 0.9431..., 0.4576...],
  712. [0.8878..., 0.3684..., 0.2640...],
  713. [0.0321..., 0.9935..., 0.0625...]])
  714. """
  715. @_fit_context(
  716. # ClassifierChain.base_estimator is not validated yet
  717. prefer_skip_nested_validation=False
  718. )
  719. def fit(self, X, Y, **fit_params):
  720. """Fit the model to data matrix X and targets Y.
  721. Parameters
  722. ----------
  723. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  724. The input data.
  725. Y : array-like of shape (n_samples, n_classes)
  726. The target values.
  727. **fit_params : dict of string -> object
  728. Parameters passed to the `fit` method of each step.
  729. Only available if `enable_metadata_routing=True`. See the
  730. :ref:`User Guide <metadata_routing>`.
  731. .. versionadded:: 1.3
  732. Returns
  733. -------
  734. self : object
  735. Class instance.
  736. """
  737. if fit_params and not _routing_enabled():
  738. raise ValueError(
  739. "fit_params is only supported if enable_metadata_routing=True. "
  740. "See the User Guide for more information."
  741. )
  742. super().fit(X, Y, **fit_params)
  743. self.classes_ = [
  744. estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_)
  745. ]
  746. return self
  747. @_available_if_base_estimator_has("predict_proba")
  748. def predict_proba(self, X):
  749. """Predict probability estimates.
  750. Parameters
  751. ----------
  752. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  753. The input data.
  754. Returns
  755. -------
  756. Y_prob : array-like of shape (n_samples, n_classes)
  757. The predicted probabilities.
  758. """
  759. X = self._validate_data(X, accept_sparse=True, reset=False)
  760. Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_)))
  761. Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))
  762. for chain_idx, estimator in enumerate(self.estimators_):
  763. previous_predictions = Y_pred_chain[:, :chain_idx]
  764. if sp.issparse(X):
  765. X_aug = sp.hstack((X, previous_predictions))
  766. else:
  767. X_aug = np.hstack((X, previous_predictions))
  768. Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1]
  769. Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)
  770. inv_order = np.empty_like(self.order_)
  771. inv_order[self.order_] = np.arange(len(self.order_))
  772. Y_prob = Y_prob_chain[:, inv_order]
  773. return Y_prob
  774. @_available_if_base_estimator_has("decision_function")
  775. def decision_function(self, X):
  776. """Evaluate the decision_function of the models in the chain.
  777. Parameters
  778. ----------
  779. X : array-like of shape (n_samples, n_features)
  780. The input data.
  781. Returns
  782. -------
  783. Y_decision : array-like of shape (n_samples, n_classes)
  784. Returns the decision function of the sample for each model
  785. in the chain.
  786. """
  787. X = self._validate_data(X, accept_sparse=True, reset=False)
  788. Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_)))
  789. Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))
  790. for chain_idx, estimator in enumerate(self.estimators_):
  791. previous_predictions = Y_pred_chain[:, :chain_idx]
  792. if sp.issparse(X):
  793. X_aug = sp.hstack((X, previous_predictions))
  794. else:
  795. X_aug = np.hstack((X, previous_predictions))
  796. Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug)
  797. Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)
  798. inv_order = np.empty_like(self.order_)
  799. inv_order[self.order_] = np.arange(len(self.order_))
  800. Y_decision = Y_decision_chain[:, inv_order]
  801. return Y_decision
  802. def get_metadata_routing(self):
  803. """Get metadata routing of this object.
  804. Please check :ref:`User Guide <metadata_routing>` on how the routing
  805. mechanism works.
  806. .. versionadded:: 1.3
  807. Returns
  808. -------
  809. routing : MetadataRouter
  810. A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
  811. routing information.
  812. """
  813. router = MetadataRouter(owner=self.__class__.__name__).add(
  814. estimator=self.base_estimator,
  815. method_mapping=MethodMapping().add(callee="fit", caller="fit"),
  816. )
  817. return router
  818. def _more_tags(self):
  819. return {"_skip_test": True, "multioutput_only": True}
  820. class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain):
  821. """A multi-label model that arranges regressions into a chain.
  822. Each model makes a prediction in the order specified by the chain using
  823. all of the available features provided to the model plus the predictions
  824. of models that are earlier in the chain.
  825. Read more in the :ref:`User Guide <regressorchain>`.
  826. .. versionadded:: 0.20
  827. Parameters
  828. ----------
  829. base_estimator : estimator
  830. The base estimator from which the regressor chain is built.
  831. order : array-like of shape (n_outputs,) or 'random', default=None
  832. If `None`, the order will be determined by the order of columns in
  833. the label matrix Y.::
  834. order = [0, 1, 2, ..., Y.shape[1] - 1]
  835. The order of the chain can be explicitly set by providing a list of
  836. integers. For example, for a chain of length 5.::
  837. order = [1, 3, 2, 4, 0]
  838. means that the first model in the chain will make predictions for
  839. column 1 in the Y matrix, the second model will make predictions
  840. for column 3, etc.
  841. If order is 'random' a random ordering will be used.
  842. cv : int, cross-validation generator or an iterable, default=None
  843. Determines whether to use cross validated predictions or true
  844. labels for the results of previous estimators in the chain.
  845. Possible inputs for cv are:
  846. - None, to use true labels when fitting,
  847. - integer, to specify the number of folds in a (Stratified)KFold,
  848. - :term:`CV splitter`,
  849. - An iterable yielding (train, test) splits as arrays of indices.
  850. random_state : int, RandomState instance or None, optional (default=None)
  851. If ``order='random'``, determines random number generation for the
  852. chain order.
  853. In addition, it controls the random seed given at each `base_estimator`
  854. at each chaining iteration. Thus, it is only used when `base_estimator`
  855. exposes a `random_state`.
  856. Pass an int for reproducible output across multiple function calls.
  857. See :term:`Glossary <random_state>`.
  858. verbose : bool, default=False
  859. If True, chain progress is output as each model is completed.
  860. .. versionadded:: 1.2
  861. Attributes
  862. ----------
  863. estimators_ : list
  864. A list of clones of base_estimator.
  865. order_ : list
  866. The order of labels in the classifier chain.
  867. n_features_in_ : int
  868. Number of features seen during :term:`fit`. Only defined if the
  869. underlying `base_estimator` exposes such an attribute when fit.
  870. .. versionadded:: 0.24
  871. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  872. Names of features seen during :term:`fit`. Defined only when `X`
  873. has feature names that are all strings.
  874. .. versionadded:: 1.0
  875. See Also
  876. --------
  877. ClassifierChain : Equivalent for classification.
  878. MultiOutputRegressor : Learns each output independently rather than
  879. chaining.
  880. Examples
  881. --------
  882. >>> from sklearn.multioutput import RegressorChain
  883. >>> from sklearn.linear_model import LogisticRegression
  884. >>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')
  885. >>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]
  886. >>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)
  887. >>> chain.predict(X)
  888. array([[0., 2.],
  889. [1., 1.],
  890. [2., 0.]])
  891. """
  892. @_fit_context(
  893. # RegressorChain.base_estimator is not validated yet
  894. prefer_skip_nested_validation=False
  895. )
  896. def fit(self, X, Y, **fit_params):
  897. """Fit the model to data matrix X and targets Y.
  898. Parameters
  899. ----------
  900. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  901. The input data.
  902. Y : array-like of shape (n_samples, n_classes)
  903. The target values.
  904. **fit_params : dict of string -> object
  905. Parameters passed to the `fit` method at each step
  906. of the regressor chain.
  907. .. versionadded:: 0.23
  908. Returns
  909. -------
  910. self : object
  911. Returns a fitted instance.
  912. """
  913. super().fit(X, Y, **fit_params)
  914. return self
  915. def get_metadata_routing(self):
  916. """Get metadata routing of this object.
  917. Please check :ref:`User Guide <metadata_routing>` on how the routing
  918. mechanism works.
  919. .. versionadded:: 1.3
  920. Returns
  921. -------
  922. routing : MetadataRouter
  923. A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
  924. routing information.
  925. """
  926. router = MetadataRouter(owner=self.__class__.__name__).add(
  927. estimator=self.base_estimator,
  928. method_mapping=MethodMapping().add(callee="fit", caller="fit"),
  929. )
  930. return router
  931. def _more_tags(self):
  932. return {"multioutput_only": True}