| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676 |
- """Testing for the VotingClassifier and VotingRegressor"""
- import re
- import numpy as np
- import pytest
- from sklearn import datasets
- from sklearn.base import BaseEstimator, ClassifierMixin, clone
- from sklearn.datasets import make_multilabel_classification
- from sklearn.dummy import DummyRegressor
- from sklearn.ensemble import (
- RandomForestClassifier,
- RandomForestRegressor,
- VotingClassifier,
- VotingRegressor,
- )
- from sklearn.exceptions import NotFittedError
- from sklearn.linear_model import LinearRegression, LogisticRegression
- from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
- from sklearn.multiclass import OneVsRestClassifier
- from sklearn.naive_bayes import GaussianNB
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.preprocessing import StandardScaler
- from sklearn.svm import SVC
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
- from sklearn.utils._testing import (
- assert_almost_equal,
- assert_array_almost_equal,
- assert_array_equal,
- )
- # Load datasets
- iris = datasets.load_iris()
- X, y = iris.data[:, 1:3], iris.target
- # Scaled to solve ConvergenceWarning throw by Logistic Regression
- X_scaled = StandardScaler().fit_transform(X)
- X_r, y_r = datasets.load_diabetes(return_X_y=True)
- @pytest.mark.parametrize(
- "params, err_msg",
- [
- (
- {"estimators": []},
- "Invalid 'estimators' attribute, 'estimators' should be a non-empty list",
- ),
- (
- {"estimators": [("lr", LogisticRegression())], "weights": [1, 2]},
- "Number of `estimators` and weights must be equal",
- ),
- ],
- )
- def test_voting_classifier_estimator_init(params, err_msg):
- ensemble = VotingClassifier(**params)
- with pytest.raises(ValueError, match=err_msg):
- ensemble.fit(X, y)
- def test_predictproba_hardvoting():
- eclf = VotingClassifier(
- estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())],
- voting="hard",
- )
- msg = "predict_proba is not available when voting='hard'"
- with pytest.raises(AttributeError, match=msg):
- eclf.predict_proba
- assert not hasattr(eclf, "predict_proba")
- eclf.fit(X_scaled, y)
- assert not hasattr(eclf, "predict_proba")
- def test_notfitted():
- eclf = VotingClassifier(
- estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())],
- voting="soft",
- )
- ereg = VotingRegressor([("dr", DummyRegressor())])
- msg = (
- "This %s instance is not fitted yet. Call 'fit'"
- " with appropriate arguments before using this estimator."
- )
- with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
- eclf.predict(X)
- with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
- eclf.predict_proba(X)
- with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
- eclf.transform(X)
- with pytest.raises(NotFittedError, match=msg % "VotingRegressor"):
- ereg.predict(X_r)
- with pytest.raises(NotFittedError, match=msg % "VotingRegressor"):
- ereg.transform(X_r)
- def test_majority_label_iris(global_random_seed):
- """Check classification by majority label on dataset iris."""
- clf1 = LogisticRegression(solver="liblinear", random_state=global_random_seed)
- clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
- clf3 = GaussianNB()
- eclf = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="hard"
- )
- scores = cross_val_score(eclf, X, y, scoring="accuracy")
- assert scores.mean() >= 0.9
- def test_tie_situation():
- """Check voting classifier selects smaller class label in tie situation."""
- clf1 = LogisticRegression(random_state=123, solver="liblinear")
- clf2 = RandomForestClassifier(random_state=123)
- eclf = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], voting="hard")
- assert clf1.fit(X, y).predict(X)[73] == 2
- assert clf2.fit(X, y).predict(X)[73] == 1
- assert eclf.fit(X, y).predict(X)[73] == 1
- def test_weights_iris(global_random_seed):
- """Check classification by average probabilities on dataset iris."""
- clf1 = LogisticRegression(random_state=global_random_seed)
- clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
- clf3 = GaussianNB()
- eclf = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
- voting="soft",
- weights=[1, 2, 10],
- )
- scores = cross_val_score(eclf, X_scaled, y, scoring="accuracy")
- assert scores.mean() >= 0.9
- def test_weights_regressor():
- """Check weighted average regression prediction on diabetes dataset."""
- reg1 = DummyRegressor(strategy="mean")
- reg2 = DummyRegressor(strategy="median")
- reg3 = DummyRegressor(strategy="quantile", quantile=0.2)
- ereg = VotingRegressor(
- [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=[1, 2, 10]
- )
- X_r_train, X_r_test, y_r_train, y_r_test = train_test_split(
- X_r, y_r, test_size=0.25
- )
- reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
- reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
- reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
- ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)
- avg = np.average(
- np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0, weights=[1, 2, 10]
- )
- assert_almost_equal(ereg_pred, avg, decimal=2)
- ereg_weights_none = VotingRegressor(
- [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=None
- )
- ereg_weights_equal = VotingRegressor(
- [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=[1, 1, 1]
- )
- ereg_weights_none.fit(X_r_train, y_r_train)
- ereg_weights_equal.fit(X_r_train, y_r_train)
- ereg_none_pred = ereg_weights_none.predict(X_r_test)
- ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
- assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
- def test_predict_on_toy_problem(global_random_seed):
- """Manually check predicted class labels for toy dataset."""
- clf1 = LogisticRegression(random_state=global_random_seed)
- clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
- clf3 = GaussianNB()
- X = np.array(
- [[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2], [2.1, 1.4], [3.1, 2.3]]
- )
- y = np.array([1, 1, 1, 2, 2, 2])
- assert_array_equal(clf1.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
- assert_array_equal(clf2.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
- assert_array_equal(clf3.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
- eclf = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
- voting="hard",
- weights=[1, 1, 1],
- )
- assert_array_equal(eclf.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
- eclf = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
- voting="soft",
- weights=[1, 1, 1],
- )
- assert_array_equal(eclf.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
- def test_predict_proba_on_toy_problem():
- """Calculate predicted probabilities on toy dataset."""
- clf1 = LogisticRegression(random_state=123)
- clf2 = RandomForestClassifier(random_state=123)
- clf3 = GaussianNB()
- X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
- y = np.array([1, 1, 2, 2])
- clf1_res = np.array(
- [
- [0.59790391, 0.40209609],
- [0.57622162, 0.42377838],
- [0.50728456, 0.49271544],
- [0.40241774, 0.59758226],
- ]
- )
- clf2_res = np.array([[0.8, 0.2], [0.8, 0.2], [0.2, 0.8], [0.3, 0.7]])
- clf3_res = np.array(
- [[0.9985082, 0.0014918], [0.99845843, 0.00154157], [0.0, 1.0], [0.0, 1.0]]
- )
- t00 = (2 * clf1_res[0][0] + clf2_res[0][0] + clf3_res[0][0]) / 4
- t11 = (2 * clf1_res[1][1] + clf2_res[1][1] + clf3_res[1][1]) / 4
- t21 = (2 * clf1_res[2][1] + clf2_res[2][1] + clf3_res[2][1]) / 4
- t31 = (2 * clf1_res[3][1] + clf2_res[3][1] + clf3_res[3][1]) / 4
- eclf = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
- voting="soft",
- weights=[2, 1, 1],
- )
- eclf_res = eclf.fit(X, y).predict_proba(X)
- assert_almost_equal(t00, eclf_res[0][0], decimal=1)
- assert_almost_equal(t11, eclf_res[1][1], decimal=1)
- assert_almost_equal(t21, eclf_res[2][1], decimal=1)
- assert_almost_equal(t31, eclf_res[3][1], decimal=1)
- with pytest.raises(
- AttributeError, match="predict_proba is not available when voting='hard'"
- ):
- eclf = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="hard"
- )
- eclf.fit(X, y).predict_proba(X)
- def test_multilabel():
- """Check if error is raised for multilabel classification."""
- X, y = make_multilabel_classification(
- n_classes=2, n_labels=1, allow_unlabeled=False, random_state=123
- )
- clf = OneVsRestClassifier(SVC(kernel="linear"))
- eclf = VotingClassifier(estimators=[("ovr", clf)], voting="hard")
- try:
- eclf.fit(X, y)
- except NotImplementedError:
- return
- def test_gridsearch():
- """Check GridSearch support."""
- clf1 = LogisticRegression(random_state=1)
- clf2 = RandomForestClassifier(random_state=1, n_estimators=3)
- clf3 = GaussianNB()
- eclf = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft"
- )
- params = {
- "lr__C": [1.0, 100.0],
- "voting": ["soft", "hard"],
- "weights": [[0.5, 0.5, 0.5], [1.0, 0.5, 0.5]],
- }
- grid = GridSearchCV(estimator=eclf, param_grid=params, cv=2)
- grid.fit(X_scaled, y)
- def test_parallel_fit(global_random_seed):
- """Check parallel backend of VotingClassifier on toy dataset."""
- clf1 = LogisticRegression(random_state=global_random_seed)
- clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
- clf3 = GaussianNB()
- X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
- y = np.array([1, 1, 2, 2])
- eclf1 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=1
- ).fit(X, y)
- eclf2 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=2
- ).fit(X, y)
- assert_array_equal(eclf1.predict(X), eclf2.predict(X))
- assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
- def test_sample_weight(global_random_seed):
- """Tests sample_weight parameter of VotingClassifier"""
- clf1 = LogisticRegression(random_state=global_random_seed)
- clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
- clf3 = SVC(probability=True, random_state=global_random_seed)
- eclf1 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("svc", clf3)], voting="soft"
- ).fit(X_scaled, y, sample_weight=np.ones((len(y),)))
- eclf2 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("svc", clf3)], voting="soft"
- ).fit(X_scaled, y)
- assert_array_equal(eclf1.predict(X_scaled), eclf2.predict(X_scaled))
- assert_array_almost_equal(
- eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled)
- )
- sample_weight = np.random.RandomState(global_random_seed).uniform(size=(len(y),))
- eclf3 = VotingClassifier(estimators=[("lr", clf1)], voting="soft")
- eclf3.fit(X_scaled, y, sample_weight)
- clf1.fit(X_scaled, y, sample_weight)
- assert_array_equal(eclf3.predict(X_scaled), clf1.predict(X_scaled))
- assert_array_almost_equal(
- eclf3.predict_proba(X_scaled), clf1.predict_proba(X_scaled)
- )
- # check that an error is raised and indicative if sample_weight is not
- # supported.
- clf4 = KNeighborsClassifier()
- eclf3 = VotingClassifier(
- estimators=[("lr", clf1), ("svc", clf3), ("knn", clf4)], voting="soft"
- )
- msg = "Underlying estimator KNeighborsClassifier does not support sample weights."
- with pytest.raises(TypeError, match=msg):
- eclf3.fit(X_scaled, y, sample_weight)
- # check that _fit_single_estimator will raise the right error
- # it should raise the original error if this is not linked to sample_weight
- class ClassifierErrorFit(ClassifierMixin, BaseEstimator):
- def fit(self, X_scaled, y, sample_weight):
- raise TypeError("Error unrelated to sample_weight.")
- clf = ClassifierErrorFit()
- with pytest.raises(TypeError, match="Error unrelated to sample_weight"):
- clf.fit(X_scaled, y, sample_weight=sample_weight)
- def test_sample_weight_kwargs():
- """Check that VotingClassifier passes sample_weight as kwargs"""
- class MockClassifier(ClassifierMixin, BaseEstimator):
- """Mock Classifier to check that sample_weight is received as kwargs"""
- def fit(self, X, y, *args, **sample_weight):
- assert "sample_weight" in sample_weight
- clf = MockClassifier()
- eclf = VotingClassifier(estimators=[("mock", clf)], voting="soft")
- # Should not raise an error.
- eclf.fit(X, y, sample_weight=np.ones((len(y),)))
- def test_voting_classifier_set_params(global_random_seed):
- # check equivalence in the output when setting underlying estimators
- clf1 = LogisticRegression(random_state=global_random_seed)
- clf2 = RandomForestClassifier(
- n_estimators=10, random_state=global_random_seed, max_depth=None
- )
- clf3 = GaussianNB()
- eclf1 = VotingClassifier(
- [("lr", clf1), ("rf", clf2)], voting="soft", weights=[1, 2]
- ).fit(X_scaled, y)
- eclf2 = VotingClassifier(
- [("lr", clf1), ("nb", clf3)], voting="soft", weights=[1, 2]
- )
- eclf2.set_params(nb=clf2).fit(X_scaled, y)
- assert_array_equal(eclf1.predict(X_scaled), eclf2.predict(X_scaled))
- assert_array_almost_equal(
- eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled)
- )
- assert eclf2.estimators[0][1].get_params() == clf1.get_params()
- assert eclf2.estimators[1][1].get_params() == clf2.get_params()
- def test_set_estimator_drop():
- # VotingClassifier set_params should be able to set estimators as drop
- # Test predict
- clf1 = LogisticRegression(random_state=123)
- clf2 = RandomForestClassifier(n_estimators=10, random_state=123)
- clf3 = GaussianNB()
- eclf1 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("nb", clf3)],
- voting="hard",
- weights=[1, 0, 0.5],
- ).fit(X, y)
- eclf2 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("nb", clf3)],
- voting="hard",
- weights=[1, 1, 0.5],
- )
- eclf2.set_params(rf="drop").fit(X, y)
- assert_array_equal(eclf1.predict(X), eclf2.predict(X))
- assert dict(eclf2.estimators)["rf"] == "drop"
- assert len(eclf2.estimators_) == 2
- assert all(
- isinstance(est, (LogisticRegression, GaussianNB)) for est in eclf2.estimators_
- )
- assert eclf2.get_params()["rf"] == "drop"
- eclf1.set_params(voting="soft").fit(X, y)
- eclf2.set_params(voting="soft").fit(X, y)
- assert_array_equal(eclf1.predict(X), eclf2.predict(X))
- assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
- msg = "All estimators are dropped. At least one is required"
- with pytest.raises(ValueError, match=msg):
- eclf2.set_params(lr="drop", rf="drop", nb="drop").fit(X, y)
- # Test soft voting transform
- X1 = np.array([[1], [2]])
- y1 = np.array([1, 2])
- eclf1 = VotingClassifier(
- estimators=[("rf", clf2), ("nb", clf3)],
- voting="soft",
- weights=[0, 0.5],
- flatten_transform=False,
- ).fit(X1, y1)
- eclf2 = VotingClassifier(
- estimators=[("rf", clf2), ("nb", clf3)],
- voting="soft",
- weights=[1, 0.5],
- flatten_transform=False,
- )
- eclf2.set_params(rf="drop").fit(X1, y1)
- assert_array_almost_equal(
- eclf1.transform(X1),
- np.array([[[0.7, 0.3], [0.3, 0.7]], [[1.0, 0.0], [0.0, 1.0]]]),
- )
- assert_array_almost_equal(eclf2.transform(X1), np.array([[[1.0, 0.0], [0.0, 1.0]]]))
- eclf1.set_params(voting="hard")
- eclf2.set_params(voting="hard")
- assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]]))
- assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
- def test_estimator_weights_format(global_random_seed):
- # Test estimator weights inputs as list and array
- clf1 = LogisticRegression(random_state=global_random_seed)
- clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
- eclf1 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2)], weights=[1, 2], voting="soft"
- )
- eclf2 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2)], weights=np.array((1, 2)), voting="soft"
- )
- eclf1.fit(X_scaled, y)
- eclf2.fit(X_scaled, y)
- assert_array_almost_equal(
- eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled)
- )
- def test_transform(global_random_seed):
- """Check transform method of VotingClassifier on toy dataset."""
- clf1 = LogisticRegression(random_state=global_random_seed)
- clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
- clf3 = GaussianNB()
- X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
- y = np.array([1, 1, 2, 2])
- eclf1 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft"
- ).fit(X, y)
- eclf2 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
- voting="soft",
- flatten_transform=True,
- ).fit(X, y)
- eclf3 = VotingClassifier(
- estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
- voting="soft",
- flatten_transform=False,
- ).fit(X, y)
- assert_array_equal(eclf1.transform(X).shape, (4, 6))
- assert_array_equal(eclf2.transform(X).shape, (4, 6))
- assert_array_equal(eclf3.transform(X).shape, (3, 4, 2))
- assert_array_almost_equal(eclf1.transform(X), eclf2.transform(X))
- assert_array_almost_equal(
- eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)), eclf2.transform(X)
- )
- @pytest.mark.parametrize(
- "X, y, voter",
- [
- (
- X,
- y,
- VotingClassifier(
- [
- ("lr", LogisticRegression()),
- ("rf", RandomForestClassifier(n_estimators=5)),
- ]
- ),
- ),
- (
- X_r,
- y_r,
- VotingRegressor(
- [
- ("lr", LinearRegression()),
- ("rf", RandomForestRegressor(n_estimators=5)),
- ]
- ),
- ),
- ],
- )
- def test_none_estimator_with_weights(X, y, voter):
- # check that an estimator can be set to 'drop' and passing some weight
- # regression test for
- # https://github.com/scikit-learn/scikit-learn/issues/13777
- voter = clone(voter)
- # Scaled to solve ConvergenceWarning throw by Logistic Regression
- X_scaled = StandardScaler().fit_transform(X)
- voter.fit(X_scaled, y, sample_weight=np.ones(y.shape))
- voter.set_params(lr="drop")
- voter.fit(X_scaled, y, sample_weight=np.ones(y.shape))
- y_pred = voter.predict(X_scaled)
- assert y_pred.shape == y.shape
- @pytest.mark.parametrize(
- "est",
- [
- VotingRegressor(
- estimators=[
- ("lr", LinearRegression()),
- ("tree", DecisionTreeRegressor(random_state=0)),
- ]
- ),
- VotingClassifier(
- estimators=[
- ("lr", LogisticRegression(random_state=0)),
- ("tree", DecisionTreeClassifier(random_state=0)),
- ]
- ),
- ],
- ids=["VotingRegressor", "VotingClassifier"],
- )
- def test_n_features_in(est):
- X = [[1, 2], [3, 4], [5, 6]]
- y = [0, 1, 2]
- assert not hasattr(est, "n_features_in_")
- est.fit(X, y)
- assert est.n_features_in_ == 2
- @pytest.mark.parametrize(
- "estimator",
- [
- VotingRegressor(
- estimators=[
- ("lr", LinearRegression()),
- ("rf", RandomForestRegressor(random_state=123)),
- ],
- verbose=True,
- ),
- VotingClassifier(
- estimators=[
- ("lr", LogisticRegression(random_state=123)),
- ("rf", RandomForestClassifier(random_state=123)),
- ],
- verbose=True,
- ),
- ],
- )
- def test_voting_verbose(estimator, capsys):
- X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
- y = np.array([1, 1, 2, 2])
- pattern = (
- r"\[Voting\].*\(1 of 2\) Processing lr, total=.*\n"
- r"\[Voting\].*\(2 of 2\) Processing rf, total=.*\n$"
- )
- estimator.fit(X, y)
- assert re.match(pattern, capsys.readouterr()[0])
- def test_get_features_names_out_regressor():
- """Check get_feature_names_out output for regressor."""
- X = [[1, 2], [3, 4], [5, 6]]
- y = [0, 1, 2]
- voting = VotingRegressor(
- estimators=[
- ("lr", LinearRegression()),
- ("tree", DecisionTreeRegressor(random_state=0)),
- ("ignore", "drop"),
- ]
- )
- voting.fit(X, y)
- names_out = voting.get_feature_names_out()
- expected_names = ["votingregressor_lr", "votingregressor_tree"]
- assert_array_equal(names_out, expected_names)
- @pytest.mark.parametrize(
- "kwargs, expected_names",
- [
- (
- {"voting": "soft", "flatten_transform": True},
- [
- "votingclassifier_lr0",
- "votingclassifier_lr1",
- "votingclassifier_lr2",
- "votingclassifier_tree0",
- "votingclassifier_tree1",
- "votingclassifier_tree2",
- ],
- ),
- ({"voting": "hard"}, ["votingclassifier_lr", "votingclassifier_tree"]),
- ],
- )
- def test_get_features_names_out_classifier(kwargs, expected_names):
- """Check get_feature_names_out for classifier for different settings."""
- X = [[1, 2], [3, 4], [5, 6], [1, 1.2]]
- y = [0, 1, 2, 0]
- voting = VotingClassifier(
- estimators=[
- ("lr", LogisticRegression(random_state=0)),
- ("tree", DecisionTreeClassifier(random_state=0)),
- ],
- **kwargs,
- )
- voting.fit(X, y)
- X_trans = voting.transform(X)
- names_out = voting.get_feature_names_out()
- assert X_trans.shape[1] == len(expected_names)
- assert_array_equal(names_out, expected_names)
- def test_get_features_names_out_classifier_error():
- """Check that error is raised when voting="soft" and flatten_transform=False."""
- X = [[1, 2], [3, 4], [5, 6]]
- y = [0, 1, 2]
- voting = VotingClassifier(
- estimators=[
- ("lr", LogisticRegression(random_state=0)),
- ("tree", DecisionTreeClassifier(random_state=0)),
- ],
- voting="soft",
- flatten_transform=False,
- )
- voting.fit(X, y)
- msg = (
- "get_feature_names_out is not supported when `voting='soft'` and "
- "`flatten_transform=False`"
- )
- with pytest.raises(ValueError, match=msg):
- voting.get_feature_names_out()
|