| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412 |
- """Test the validation module"""
- import os
- import re
- import sys
- import tempfile
- import warnings
- from functools import partial
- from io import StringIO
- from time import sleep
- import numpy as np
- import pytest
- from scipy.sparse import coo_matrix, csr_matrix, issparse
- from sklearn.base import BaseEstimator, clone
- from sklearn.cluster import KMeans
- from sklearn.datasets import (
- load_diabetes,
- load_digits,
- load_iris,
- make_classification,
- make_multilabel_classification,
- make_regression,
- )
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.exceptions import FitFailedWarning
- from sklearn.impute import SimpleImputer
- from sklearn.linear_model import (
- LogisticRegression,
- PassiveAggressiveClassifier,
- Ridge,
- RidgeClassifier,
- SGDClassifier,
- )
- from sklearn.metrics import (
- accuracy_score,
- check_scoring,
- confusion_matrix,
- explained_variance_score,
- make_scorer,
- mean_squared_error,
- precision_recall_fscore_support,
- precision_score,
- r2_score,
- )
- from sklearn.model_selection import (
- GridSearchCV,
- GroupKFold,
- GroupShuffleSplit,
- KFold,
- LeaveOneGroupOut,
- LeaveOneOut,
- LeavePGroupsOut,
- ShuffleSplit,
- StratifiedKFold,
- cross_val_predict,
- cross_val_score,
- cross_validate,
- learning_curve,
- permutation_test_score,
- validation_curve,
- )
- from sklearn.model_selection._validation import (
- _check_is_permutation,
- _fit_and_score,
- _score,
- )
- from sklearn.model_selection.tests.common import OneTimeSplitter
- from sklearn.model_selection.tests.test_search import FailingClassifier
- from sklearn.multiclass import OneVsRestClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.neural_network import MLPRegressor
- from sklearn.pipeline import Pipeline
- from sklearn.preprocessing import LabelEncoder, scale
- from sklearn.svm import SVC, LinearSVC
- from sklearn.utils import shuffle
- from sklearn.utils._mocking import CheckingClassifier, MockDataFrame
- from sklearn.utils._testing import (
- assert_allclose,
- assert_almost_equal,
- assert_array_almost_equal,
- assert_array_equal,
- )
- from sklearn.utils.validation import _num_samples
- class MockImprovingEstimator(BaseEstimator):
- """Dummy classifier to test the learning curve"""
- def __init__(self, n_max_train_sizes):
- self.n_max_train_sizes = n_max_train_sizes
- self.train_sizes = 0
- self.X_subset = None
- def fit(self, X_subset, y_subset=None):
- self.X_subset = X_subset
- self.train_sizes = X_subset.shape[0]
- return self
- def predict(self, X):
- raise NotImplementedError
- def score(self, X=None, Y=None):
- # training score becomes worse (2 -> 1), test error better (0 -> 1)
- if self._is_training_data(X):
- return 2.0 - float(self.train_sizes) / self.n_max_train_sizes
- else:
- return float(self.train_sizes) / self.n_max_train_sizes
- def _is_training_data(self, X):
- return X is self.X_subset
- class MockIncrementalImprovingEstimator(MockImprovingEstimator):
- """Dummy classifier that provides partial_fit"""
- def __init__(self, n_max_train_sizes, expected_fit_params=None):
- super().__init__(n_max_train_sizes)
- self.x = None
- self.expected_fit_params = expected_fit_params
- def _is_training_data(self, X):
- return self.x in X
- def partial_fit(self, X, y=None, **params):
- self.train_sizes += X.shape[0]
- self.x = X[0]
- if self.expected_fit_params:
- missing = set(self.expected_fit_params) - set(params)
- if missing:
- raise AssertionError(
- f"Expected fit parameter(s) {list(missing)} not seen."
- )
- for key, value in params.items():
- if key in self.expected_fit_params and _num_samples(
- value
- ) != _num_samples(X):
- raise AssertionError(
- f"Fit parameter {key} has length {_num_samples(value)}"
- f"; expected {_num_samples(X)}."
- )
- class MockEstimatorWithParameter(BaseEstimator):
- """Dummy classifier to test the validation curve"""
- def __init__(self, param=0.5):
- self.X_subset = None
- self.param = param
- def fit(self, X_subset, y_subset):
- self.X_subset = X_subset
- self.train_sizes = X_subset.shape[0]
- return self
- def predict(self, X):
- raise NotImplementedError
- def score(self, X=None, y=None):
- return self.param if self._is_training_data(X) else 1 - self.param
- def _is_training_data(self, X):
- return X is self.X_subset
- class MockEstimatorWithSingleFitCallAllowed(MockEstimatorWithParameter):
- """Dummy classifier that disallows repeated calls of fit method"""
- def fit(self, X_subset, y_subset):
- assert not hasattr(self, "fit_called_"), "fit is called the second time"
- self.fit_called_ = True
- return super().fit(X_subset, y_subset)
- def predict(self, X):
- raise NotImplementedError
- class MockClassifier:
- """Dummy classifier to test the cross-validation"""
- def __init__(self, a=0, allow_nd=False):
- self.a = a
- self.allow_nd = allow_nd
- def fit(
- self,
- X,
- Y=None,
- sample_weight=None,
- class_prior=None,
- sparse_sample_weight=None,
- sparse_param=None,
- dummy_int=None,
- dummy_str=None,
- dummy_obj=None,
- callback=None,
- ):
- """The dummy arguments are to test that this fit function can
- accept non-array arguments through cross-validation, such as:
- - int
- - str (this is actually array-like)
- - object
- - function
- """
- self.dummy_int = dummy_int
- self.dummy_str = dummy_str
- self.dummy_obj = dummy_obj
- if callback is not None:
- callback(self)
- if self.allow_nd:
- X = X.reshape(len(X), -1)
- if X.ndim >= 3 and not self.allow_nd:
- raise ValueError("X cannot be d")
- if sample_weight is not None:
- assert sample_weight.shape[0] == X.shape[0], (
- "MockClassifier extra fit_param "
- "sample_weight.shape[0] is {0}, should be {1}".format(
- sample_weight.shape[0], X.shape[0]
- )
- )
- if class_prior is not None:
- assert class_prior.shape[0] == len(np.unique(y)), (
- "MockClassifier extra fit_param class_prior.shape[0]"
- " is {0}, should be {1}".format(class_prior.shape[0], len(np.unique(y)))
- )
- if sparse_sample_weight is not None:
- fmt = (
- "MockClassifier extra fit_param sparse_sample_weight"
- ".shape[0] is {0}, should be {1}"
- )
- assert sparse_sample_weight.shape[0] == X.shape[0], fmt.format(
- sparse_sample_weight.shape[0], X.shape[0]
- )
- if sparse_param is not None:
- fmt = (
- "MockClassifier extra fit_param sparse_param.shape "
- "is ({0}, {1}), should be ({2}, {3})"
- )
- assert sparse_param.shape == P_sparse.shape, fmt.format(
- sparse_param.shape[0],
- sparse_param.shape[1],
- P_sparse.shape[0],
- P_sparse.shape[1],
- )
- return self
- def predict(self, T):
- if self.allow_nd:
- T = T.reshape(len(T), -1)
- return T[:, 0]
- def predict_proba(self, T):
- return T
- def score(self, X=None, Y=None):
- return 1.0 / (1 + np.abs(self.a))
- def get_params(self, deep=False):
- return {"a": self.a, "allow_nd": self.allow_nd}
- # XXX: use 2D array, since 1D X is being detected as a single sample in
- # check_consistent_length
- X = np.ones((10, 2))
- X_sparse = coo_matrix(X)
- y = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
- # The number of samples per class needs to be > n_splits,
- # for StratifiedKFold(n_splits=3)
- y2 = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
- P_sparse = coo_matrix(np.eye(5))
- def test_cross_val_score():
- clf = MockClassifier()
- for a in range(-10, 10):
- clf.a = a
- # Smoke test
- scores = cross_val_score(clf, X, y2)
- assert_array_equal(scores, clf.score(X, y2))
- # test with multioutput y
- multioutput_y = np.column_stack([y2, y2[::-1]])
- scores = cross_val_score(clf, X_sparse, multioutput_y)
- assert_array_equal(scores, clf.score(X_sparse, multioutput_y))
- scores = cross_val_score(clf, X_sparse, y2)
- assert_array_equal(scores, clf.score(X_sparse, y2))
- # test with multioutput y
- scores = cross_val_score(clf, X_sparse, multioutput_y)
- assert_array_equal(scores, clf.score(X_sparse, multioutput_y))
- # test with X and y as list
- list_check = lambda x: isinstance(x, list)
- clf = CheckingClassifier(check_X=list_check)
- scores = cross_val_score(clf, X.tolist(), y2.tolist(), cv=3)
- clf = CheckingClassifier(check_y=list_check)
- scores = cross_val_score(clf, X, y2.tolist(), cv=3)
- with pytest.raises(ValueError):
- cross_val_score(clf, X, y2, scoring="sklearn")
- # test with 3d X and
- X_3d = X[:, :, np.newaxis]
- clf = MockClassifier(allow_nd=True)
- scores = cross_val_score(clf, X_3d, y2)
- clf = MockClassifier(allow_nd=False)
- with pytest.raises(ValueError):
- cross_val_score(clf, X_3d, y2, error_score="raise")
- def test_cross_validate_many_jobs():
- # regression test for #12154: cv='warn' with n_jobs>1 trigger a copy of
- # the parameters leading to a failure in check_cv due to cv is 'warn'
- # instead of cv == 'warn'.
- X, y = load_iris(return_X_y=True)
- clf = SVC(gamma="auto")
- grid = GridSearchCV(clf, param_grid={"C": [1, 10]})
- cross_validate(grid, X, y, n_jobs=2)
- def test_cross_validate_invalid_scoring_param():
- X, y = make_classification(random_state=0)
- estimator = MockClassifier()
- # Test the errors
- error_message_regexp = ".*must be unique strings.*"
- # List/tuple of callables should raise a message advising users to use
- # dict of names to callables mapping
- with pytest.raises(ValueError, match=error_message_regexp):
- cross_validate(
- estimator,
- X,
- y,
- scoring=(make_scorer(precision_score), make_scorer(accuracy_score)),
- )
- with pytest.raises(ValueError, match=error_message_regexp):
- cross_validate(estimator, X, y, scoring=(make_scorer(precision_score),))
- # So should empty lists/tuples
- with pytest.raises(ValueError, match=error_message_regexp + "Empty list.*"):
- cross_validate(estimator, X, y, scoring=())
- # So should duplicated entries
- with pytest.raises(ValueError, match=error_message_regexp + "Duplicate.*"):
- cross_validate(estimator, X, y, scoring=("f1_micro", "f1_micro"))
- # Nested Lists should raise a generic error message
- with pytest.raises(ValueError, match=error_message_regexp):
- cross_validate(estimator, X, y, scoring=[[make_scorer(precision_score)]])
- # Empty dict should raise invalid scoring error
- with pytest.raises(ValueError, match="An empty dict"):
- cross_validate(estimator, X, y, scoring=(dict()))
- multiclass_scorer = make_scorer(precision_recall_fscore_support)
- # Multiclass Scorers that return multiple values are not supported yet
- # the warning message we're expecting to see
- warning_message = (
- "Scoring failed. The score on this train-test "
- f"partition for these parameters will be set to {np.nan}. "
- "Details: \n"
- )
- with pytest.warns(UserWarning, match=warning_message):
- cross_validate(estimator, X, y, scoring=multiclass_scorer)
- with pytest.warns(UserWarning, match=warning_message):
- cross_validate(estimator, X, y, scoring={"foo": multiclass_scorer})
- def test_cross_validate_nested_estimator():
- # Non-regression test to ensure that nested
- # estimators are properly returned in a list
- # https://github.com/scikit-learn/scikit-learn/pull/17745
- (X, y) = load_iris(return_X_y=True)
- pipeline = Pipeline(
- [
- ("imputer", SimpleImputer()),
- ("classifier", MockClassifier()),
- ]
- )
- results = cross_validate(pipeline, X, y, return_estimator=True)
- estimators = results["estimator"]
- assert isinstance(estimators, list)
- assert all(isinstance(estimator, Pipeline) for estimator in estimators)
- @pytest.mark.parametrize("use_sparse", [False, True])
- def test_cross_validate(use_sparse: bool):
- # Compute train and test mse/r2 scores
- cv = KFold()
- # Regression
- X_reg, y_reg = make_regression(n_samples=30, random_state=0)
- reg = Ridge(random_state=0)
- # Classification
- X_clf, y_clf = make_classification(n_samples=30, random_state=0)
- clf = SVC(kernel="linear", random_state=0)
- if use_sparse:
- X_reg = csr_matrix(X_reg)
- X_clf = csr_matrix(X_clf)
- for X, y, est in ((X_reg, y_reg, reg), (X_clf, y_clf, clf)):
- # It's okay to evaluate regression metrics on classification too
- mse_scorer = check_scoring(est, scoring="neg_mean_squared_error")
- r2_scorer = check_scoring(est, scoring="r2")
- train_mse_scores = []
- test_mse_scores = []
- train_r2_scores = []
- test_r2_scores = []
- fitted_estimators = []
- for train, test in cv.split(X, y):
- est = clone(est).fit(X[train], y[train])
- train_mse_scores.append(mse_scorer(est, X[train], y[train]))
- train_r2_scores.append(r2_scorer(est, X[train], y[train]))
- test_mse_scores.append(mse_scorer(est, X[test], y[test]))
- test_r2_scores.append(r2_scorer(est, X[test], y[test]))
- fitted_estimators.append(est)
- train_mse_scores = np.array(train_mse_scores)
- test_mse_scores = np.array(test_mse_scores)
- train_r2_scores = np.array(train_r2_scores)
- test_r2_scores = np.array(test_r2_scores)
- fitted_estimators = np.array(fitted_estimators)
- scores = (
- train_mse_scores,
- test_mse_scores,
- train_r2_scores,
- test_r2_scores,
- fitted_estimators,
- )
- # To ensure that the test does not suffer from
- # large statistical fluctuations due to slicing small datasets,
- # we pass the cross-validation instance
- check_cross_validate_single_metric(est, X, y, scores, cv)
- check_cross_validate_multi_metric(est, X, y, scores, cv)
- def check_cross_validate_single_metric(clf, X, y, scores, cv):
- (
- train_mse_scores,
- test_mse_scores,
- train_r2_scores,
- test_r2_scores,
- fitted_estimators,
- ) = scores
- # Test single metric evaluation when scoring is string or singleton list
- for return_train_score, dict_len in ((True, 4), (False, 3)):
- # Single metric passed as a string
- if return_train_score:
- mse_scores_dict = cross_validate(
- clf,
- X,
- y,
- scoring="neg_mean_squared_error",
- return_train_score=True,
- cv=cv,
- )
- assert_array_almost_equal(mse_scores_dict["train_score"], train_mse_scores)
- else:
- mse_scores_dict = cross_validate(
- clf,
- X,
- y,
- scoring="neg_mean_squared_error",
- return_train_score=False,
- cv=cv,
- )
- assert isinstance(mse_scores_dict, dict)
- assert len(mse_scores_dict) == dict_len
- assert_array_almost_equal(mse_scores_dict["test_score"], test_mse_scores)
- # Single metric passed as a list
- if return_train_score:
- # It must be True by default - deprecated
- r2_scores_dict = cross_validate(
- clf, X, y, scoring=["r2"], return_train_score=True, cv=cv
- )
- assert_array_almost_equal(r2_scores_dict["train_r2"], train_r2_scores, True)
- else:
- r2_scores_dict = cross_validate(
- clf, X, y, scoring=["r2"], return_train_score=False, cv=cv
- )
- assert isinstance(r2_scores_dict, dict)
- assert len(r2_scores_dict) == dict_len
- assert_array_almost_equal(r2_scores_dict["test_r2"], test_r2_scores)
- # Test return_estimator option
- mse_scores_dict = cross_validate(
- clf, X, y, scoring="neg_mean_squared_error", return_estimator=True, cv=cv
- )
- for k, est in enumerate(mse_scores_dict["estimator"]):
- est_coef = est.coef_.copy()
- if issparse(est_coef):
- est_coef = est_coef.toarray()
- fitted_est_coef = fitted_estimators[k].coef_.copy()
- if issparse(fitted_est_coef):
- fitted_est_coef = fitted_est_coef.toarray()
- assert_almost_equal(est_coef, fitted_est_coef)
- assert_almost_equal(est.intercept_, fitted_estimators[k].intercept_)
- def check_cross_validate_multi_metric(clf, X, y, scores, cv):
- # Test multimetric evaluation when scoring is a list / dict
- (
- train_mse_scores,
- test_mse_scores,
- train_r2_scores,
- test_r2_scores,
- fitted_estimators,
- ) = scores
- def custom_scorer(clf, X, y):
- y_pred = clf.predict(X)
- return {
- "r2": r2_score(y, y_pred),
- "neg_mean_squared_error": -mean_squared_error(y, y_pred),
- }
- all_scoring = (
- ("r2", "neg_mean_squared_error"),
- {
- "r2": make_scorer(r2_score),
- "neg_mean_squared_error": "neg_mean_squared_error",
- },
- custom_scorer,
- )
- keys_sans_train = {
- "test_r2",
- "test_neg_mean_squared_error",
- "fit_time",
- "score_time",
- }
- keys_with_train = keys_sans_train.union(
- {"train_r2", "train_neg_mean_squared_error"}
- )
- for return_train_score in (True, False):
- for scoring in all_scoring:
- if return_train_score:
- # return_train_score must be True by default - deprecated
- cv_results = cross_validate(
- clf, X, y, scoring=scoring, return_train_score=True, cv=cv
- )
- assert_array_almost_equal(cv_results["train_r2"], train_r2_scores)
- assert_array_almost_equal(
- cv_results["train_neg_mean_squared_error"], train_mse_scores
- )
- else:
- cv_results = cross_validate(
- clf, X, y, scoring=scoring, return_train_score=False, cv=cv
- )
- assert isinstance(cv_results, dict)
- assert set(cv_results.keys()) == (
- keys_with_train if return_train_score else keys_sans_train
- )
- assert_array_almost_equal(cv_results["test_r2"], test_r2_scores)
- assert_array_almost_equal(
- cv_results["test_neg_mean_squared_error"], test_mse_scores
- )
- # Make sure all the arrays are of np.ndarray type
- assert type(cv_results["test_r2"]) == np.ndarray
- assert type(cv_results["test_neg_mean_squared_error"]) == np.ndarray
- assert type(cv_results["fit_time"]) == np.ndarray
- assert type(cv_results["score_time"]) == np.ndarray
- # Ensure all the times are within sane limits
- assert np.all(cv_results["fit_time"] >= 0)
- assert np.all(cv_results["fit_time"] < 10)
- assert np.all(cv_results["score_time"] >= 0)
- assert np.all(cv_results["score_time"] < 10)
- def test_cross_val_score_predict_groups():
- # Check if ValueError (when groups is None) propagates to cross_val_score
- # and cross_val_predict
- # And also check if groups is correctly passed to the cv object
- X, y = make_classification(n_samples=20, n_classes=2, random_state=0)
- clf = SVC(kernel="linear")
- group_cvs = [
- LeaveOneGroupOut(),
- LeavePGroupsOut(2),
- GroupKFold(),
- GroupShuffleSplit(),
- ]
- error_message = "The 'groups' parameter should not be None."
- for cv in group_cvs:
- with pytest.raises(ValueError, match=error_message):
- cross_val_score(estimator=clf, X=X, y=y, cv=cv)
- with pytest.raises(ValueError, match=error_message):
- cross_val_predict(estimator=clf, X=X, y=y, cv=cv)
- @pytest.mark.filterwarnings("ignore: Using or importing the ABCs from")
- def test_cross_val_score_pandas():
- # check cross_val_score doesn't destroy pandas dataframe
- types = [(MockDataFrame, MockDataFrame)]
- try:
- from pandas import DataFrame, Series
- types.append((Series, DataFrame))
- except ImportError:
- pass
- for TargetType, InputFeatureType in types:
- # X dataframe, y series
- # 3 fold cross val is used so we need at least 3 samples per class
- X_df, y_ser = InputFeatureType(X), TargetType(y2)
- check_df = lambda x: isinstance(x, InputFeatureType)
- check_series = lambda x: isinstance(x, TargetType)
- clf = CheckingClassifier(check_X=check_df, check_y=check_series)
- cross_val_score(clf, X_df, y_ser, cv=3)
- def test_cross_val_score_mask():
- # test that cross_val_score works with boolean masks
- svm = SVC(kernel="linear")
- iris = load_iris()
- X, y = iris.data, iris.target
- kfold = KFold(5)
- scores_indices = cross_val_score(svm, X, y, cv=kfold)
- kfold = KFold(5)
- cv_masks = []
- for train, test in kfold.split(X, y):
- mask_train = np.zeros(len(y), dtype=bool)
- mask_test = np.zeros(len(y), dtype=bool)
- mask_train[train] = 1
- mask_test[test] = 1
- cv_masks.append((train, test))
- scores_masks = cross_val_score(svm, X, y, cv=cv_masks)
- assert_array_equal(scores_indices, scores_masks)
- def test_cross_val_score_precomputed():
- # test for svm with precomputed kernel
- svm = SVC(kernel="precomputed")
- iris = load_iris()
- X, y = iris.data, iris.target
- linear_kernel = np.dot(X, X.T)
- score_precomputed = cross_val_score(svm, linear_kernel, y)
- svm = SVC(kernel="linear")
- score_linear = cross_val_score(svm, X, y)
- assert_array_almost_equal(score_precomputed, score_linear)
- # test with callable
- svm = SVC(kernel=lambda x, y: np.dot(x, y.T))
- score_callable = cross_val_score(svm, X, y)
- assert_array_almost_equal(score_precomputed, score_callable)
- # Error raised for non-square X
- svm = SVC(kernel="precomputed")
- with pytest.raises(ValueError):
- cross_val_score(svm, X, y)
- # test error is raised when the precomputed kernel is not array-like
- # or sparse
- with pytest.raises(ValueError):
- cross_val_score(svm, linear_kernel.tolist(), y)
- def test_cross_val_score_fit_params():
- clf = MockClassifier()
- n_samples = X.shape[0]
- n_classes = len(np.unique(y))
- W_sparse = coo_matrix(
- (np.array([1]), (np.array([1]), np.array([0]))), shape=(10, 1)
- )
- P_sparse = coo_matrix(np.eye(5))
- DUMMY_INT = 42
- DUMMY_STR = "42"
- DUMMY_OBJ = object()
- def assert_fit_params(clf):
- # Function to test that the values are passed correctly to the
- # classifier arguments for non-array type
- assert clf.dummy_int == DUMMY_INT
- assert clf.dummy_str == DUMMY_STR
- assert clf.dummy_obj == DUMMY_OBJ
- fit_params = {
- "sample_weight": np.ones(n_samples),
- "class_prior": np.full(n_classes, 1.0 / n_classes),
- "sparse_sample_weight": W_sparse,
- "sparse_param": P_sparse,
- "dummy_int": DUMMY_INT,
- "dummy_str": DUMMY_STR,
- "dummy_obj": DUMMY_OBJ,
- "callback": assert_fit_params,
- }
- cross_val_score(clf, X, y, fit_params=fit_params)
- def test_cross_val_score_score_func():
- clf = MockClassifier()
- _score_func_args = []
- def score_func(y_test, y_predict):
- _score_func_args.append((y_test, y_predict))
- return 1.0
- with warnings.catch_warnings(record=True):
- scoring = make_scorer(score_func)
- score = cross_val_score(clf, X, y, scoring=scoring, cv=3)
- assert_array_equal(score, [1.0, 1.0, 1.0])
- # Test that score function is called only 3 times (for cv=3)
- assert len(_score_func_args) == 3
- def test_cross_val_score_errors():
- class BrokenEstimator:
- pass
- with pytest.raises(TypeError):
- cross_val_score(BrokenEstimator(), X)
- def test_cross_val_score_with_score_func_classification():
- iris = load_iris()
- clf = SVC(kernel="linear")
- # Default score (should be the accuracy score)
- scores = cross_val_score(clf, iris.data, iris.target)
- assert_array_almost_equal(scores, [0.97, 1.0, 0.97, 0.97, 1.0], 2)
- # Correct classification score (aka. zero / one score) - should be the
- # same as the default estimator score
- zo_scores = cross_val_score(clf, iris.data, iris.target, scoring="accuracy")
- assert_array_almost_equal(zo_scores, [0.97, 1.0, 0.97, 0.97, 1.0], 2)
- # F1 score (class are balanced so f1_score should be equal to zero/one
- # score
- f1_scores = cross_val_score(clf, iris.data, iris.target, scoring="f1_weighted")
- assert_array_almost_equal(f1_scores, [0.97, 1.0, 0.97, 0.97, 1.0], 2)
- def test_cross_val_score_with_score_func_regression():
- X, y = make_regression(n_samples=30, n_features=20, n_informative=5, random_state=0)
- reg = Ridge()
- # Default score of the Ridge regression estimator
- scores = cross_val_score(reg, X, y)
- assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
- # R2 score (aka. determination coefficient) - should be the
- # same as the default estimator score
- r2_scores = cross_val_score(reg, X, y, scoring="r2")
- assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
- # Mean squared error; this is a loss function, so "scores" are negative
- neg_mse_scores = cross_val_score(reg, X, y, scoring="neg_mean_squared_error")
- expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
- assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)
- # Explained variance
- scoring = make_scorer(explained_variance_score)
- ev_scores = cross_val_score(reg, X, y, scoring=scoring)
- assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
- def test_permutation_score():
- iris = load_iris()
- X = iris.data
- X_sparse = coo_matrix(X)
- y = iris.target
- svm = SVC(kernel="linear")
- cv = StratifiedKFold(2)
- score, scores, pvalue = permutation_test_score(
- svm, X, y, n_permutations=30, cv=cv, scoring="accuracy"
- )
- assert score > 0.9
- assert_almost_equal(pvalue, 0.0, 1)
- score_group, _, pvalue_group = permutation_test_score(
- svm,
- X,
- y,
- n_permutations=30,
- cv=cv,
- scoring="accuracy",
- groups=np.ones(y.size),
- random_state=0,
- )
- assert score_group == score
- assert pvalue_group == pvalue
- # check that we obtain the same results with a sparse representation
- svm_sparse = SVC(kernel="linear")
- cv_sparse = StratifiedKFold(2)
- score_group, _, pvalue_group = permutation_test_score(
- svm_sparse,
- X_sparse,
- y,
- n_permutations=30,
- cv=cv_sparse,
- scoring="accuracy",
- groups=np.ones(y.size),
- random_state=0,
- )
- assert score_group == score
- assert pvalue_group == pvalue
- # test with custom scoring object
- def custom_score(y_true, y_pred):
- return ((y_true == y_pred).sum() - (y_true != y_pred).sum()) / y_true.shape[0]
- scorer = make_scorer(custom_score)
- score, _, pvalue = permutation_test_score(
- svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0
- )
- assert_almost_equal(score, 0.93, 2)
- assert_almost_equal(pvalue, 0.01, 3)
- # set random y
- y = np.mod(np.arange(len(y)), 3)
- score, scores, pvalue = permutation_test_score(
- svm, X, y, n_permutations=30, cv=cv, scoring="accuracy"
- )
- assert score < 0.5
- assert pvalue > 0.2
- def test_permutation_test_score_allow_nans():
- # Check that permutation_test_score allows input data with NaNs
- X = np.arange(200, dtype=np.float64).reshape(10, -1)
- X[2, :] = np.nan
- y = np.repeat([0, 1], X.shape[0] / 2)
- p = Pipeline(
- [
- ("imputer", SimpleImputer(strategy="mean", missing_values=np.nan)),
- ("classifier", MockClassifier()),
- ]
- )
- permutation_test_score(p, X, y)
- def test_permutation_test_score_fit_params():
- X = np.arange(100).reshape(10, 10)
- y = np.array([0] * 5 + [1] * 5)
- clf = CheckingClassifier(expected_sample_weight=True)
- err_msg = r"Expected sample_weight to be passed"
- with pytest.raises(AssertionError, match=err_msg):
- permutation_test_score(clf, X, y)
- err_msg = r"sample_weight.shape == \(1,\), expected \(8,\)!"
- with pytest.raises(ValueError, match=err_msg):
- permutation_test_score(clf, X, y, fit_params={"sample_weight": np.ones(1)})
- permutation_test_score(clf, X, y, fit_params={"sample_weight": np.ones(10)})
- def test_cross_val_score_allow_nans():
- # Check that cross_val_score allows input data with NaNs
- X = np.arange(200, dtype=np.float64).reshape(10, -1)
- X[2, :] = np.nan
- y = np.repeat([0, 1], X.shape[0] / 2)
- p = Pipeline(
- [
- ("imputer", SimpleImputer(strategy="mean", missing_values=np.nan)),
- ("classifier", MockClassifier()),
- ]
- )
- cross_val_score(p, X, y)
- def test_cross_val_score_multilabel():
- X = np.array(
- [
- [-3, 4],
- [2, 4],
- [3, 3],
- [0, 2],
- [-3, 1],
- [-2, 1],
- [0, 0],
- [-2, -1],
- [-1, -2],
- [1, -2],
- ]
- )
- y = np.array(
- [[1, 1], [0, 1], [0, 1], [0, 1], [1, 1], [0, 1], [1, 0], [1, 1], [1, 0], [0, 0]]
- )
- clf = KNeighborsClassifier(n_neighbors=1)
- scoring_micro = make_scorer(precision_score, average="micro")
- scoring_macro = make_scorer(precision_score, average="macro")
- scoring_samples = make_scorer(precision_score, average="samples")
- score_micro = cross_val_score(clf, X, y, scoring=scoring_micro)
- score_macro = cross_val_score(clf, X, y, scoring=scoring_macro)
- score_samples = cross_val_score(clf, X, y, scoring=scoring_samples)
- assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
- assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
- assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
- def test_cross_val_predict():
- X, y = load_diabetes(return_X_y=True)
- cv = KFold()
- est = Ridge()
- # Naive loop (should be same as cross_val_predict):
- preds2 = np.zeros_like(y)
- for train, test in cv.split(X, y):
- est.fit(X[train], y[train])
- preds2[test] = est.predict(X[test])
- preds = cross_val_predict(est, X, y, cv=cv)
- assert_array_almost_equal(preds, preds2)
- preds = cross_val_predict(est, X, y)
- assert len(preds) == len(y)
- cv = LeaveOneOut()
- preds = cross_val_predict(est, X, y, cv=cv)
- assert len(preds) == len(y)
- Xsp = X.copy()
- Xsp *= Xsp > np.median(Xsp)
- Xsp = coo_matrix(Xsp)
- preds = cross_val_predict(est, Xsp, y)
- assert_array_almost_equal(len(preds), len(y))
- preds = cross_val_predict(KMeans(n_init="auto"), X)
- assert len(preds) == len(y)
- class BadCV:
- def split(self, X, y=None, groups=None):
- for i in range(4):
- yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])
- with pytest.raises(ValueError):
- cross_val_predict(est, X, y, cv=BadCV())
- X, y = load_iris(return_X_y=True)
- warning_message = (
- r"Number of classes in training fold \(2\) does "
- r"not match total number of classes \(3\). "
- "Results may not be appropriate for your use case."
- )
- with pytest.warns(RuntimeWarning, match=warning_message):
- cross_val_predict(
- LogisticRegression(solver="liblinear"),
- X,
- y,
- method="predict_proba",
- cv=KFold(2),
- )
- def test_cross_val_predict_decision_function_shape():
- X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
- preds = cross_val_predict(
- LogisticRegression(solver="liblinear"), X, y, method="decision_function"
- )
- assert preds.shape == (50,)
- X, y = load_iris(return_X_y=True)
- preds = cross_val_predict(
- LogisticRegression(solver="liblinear"), X, y, method="decision_function"
- )
- assert preds.shape == (150, 3)
- # This specifically tests imbalanced splits for binary
- # classification with decision_function. This is only
- # applicable to classifiers that can be fit on a single
- # class.
- X = X[:100]
- y = y[:100]
- error_message = (
- "Only 1 class/es in training fold,"
- " but 2 in overall dataset. This"
- " is not supported for decision_function"
- " with imbalanced folds. To fix "
- "this, use a cross-validation technique "
- "resulting in properly stratified folds"
- )
- with pytest.raises(ValueError, match=error_message):
- cross_val_predict(
- RidgeClassifier(), X, y, method="decision_function", cv=KFold(2)
- )
- X, y = load_digits(return_X_y=True)
- est = SVC(kernel="linear", decision_function_shape="ovo")
- preds = cross_val_predict(est, X, y, method="decision_function")
- assert preds.shape == (1797, 45)
- ind = np.argsort(y)
- X, y = X[ind], y[ind]
- error_message_regexp = (
- r"Output shape \(599L?, 21L?\) of "
- "decision_function does not match number of "
- r"classes \(7\) in fold. Irregular "
- "decision_function .*"
- )
- with pytest.raises(ValueError, match=error_message_regexp):
- cross_val_predict(est, X, y, cv=KFold(n_splits=3), method="decision_function")
- def test_cross_val_predict_predict_proba_shape():
- X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
- preds = cross_val_predict(
- LogisticRegression(solver="liblinear"), X, y, method="predict_proba"
- )
- assert preds.shape == (50, 2)
- X, y = load_iris(return_X_y=True)
- preds = cross_val_predict(
- LogisticRegression(solver="liblinear"), X, y, method="predict_proba"
- )
- assert preds.shape == (150, 3)
- def test_cross_val_predict_predict_log_proba_shape():
- X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
- preds = cross_val_predict(
- LogisticRegression(solver="liblinear"), X, y, method="predict_log_proba"
- )
- assert preds.shape == (50, 2)
- X, y = load_iris(return_X_y=True)
- preds = cross_val_predict(
- LogisticRegression(solver="liblinear"), X, y, method="predict_log_proba"
- )
- assert preds.shape == (150, 3)
- def test_cross_val_predict_input_types():
- iris = load_iris()
- X, y = iris.data, iris.target
- X_sparse = coo_matrix(X)
- multioutput_y = np.column_stack([y, y[::-1]])
- clf = Ridge(fit_intercept=False, random_state=0)
- # 3 fold cv is used --> at least 3 samples per class
- # Smoke test
- predictions = cross_val_predict(clf, X, y)
- assert predictions.shape == (150,)
- # test with multioutput y
- predictions = cross_val_predict(clf, X_sparse, multioutput_y)
- assert predictions.shape == (150, 2)
- predictions = cross_val_predict(clf, X_sparse, y)
- assert_array_equal(predictions.shape, (150,))
- # test with multioutput y
- predictions = cross_val_predict(clf, X_sparse, multioutput_y)
- assert_array_equal(predictions.shape, (150, 2))
- # test with X and y as list
- list_check = lambda x: isinstance(x, list)
- clf = CheckingClassifier(check_X=list_check)
- predictions = cross_val_predict(clf, X.tolist(), y.tolist())
- clf = CheckingClassifier(check_y=list_check)
- predictions = cross_val_predict(clf, X, y.tolist())
- # test with X and y as list and non empty method
- predictions = cross_val_predict(
- LogisticRegression(solver="liblinear"),
- X.tolist(),
- y.tolist(),
- method="decision_function",
- )
- predictions = cross_val_predict(
- LogisticRegression(solver="liblinear"),
- X,
- y.tolist(),
- method="decision_function",
- )
- # test with 3d X and
- X_3d = X[:, :, np.newaxis]
- check_3d = lambda x: x.ndim == 3
- clf = CheckingClassifier(check_X=check_3d)
- predictions = cross_val_predict(clf, X_3d, y)
- assert_array_equal(predictions.shape, (150,))
- @pytest.mark.filterwarnings("ignore: Using or importing the ABCs from")
- # python3.7 deprecation warnings in pandas via matplotlib :-/
- def test_cross_val_predict_pandas():
- # check cross_val_score doesn't destroy pandas dataframe
- types = [(MockDataFrame, MockDataFrame)]
- try:
- from pandas import DataFrame, Series
- types.append((Series, DataFrame))
- except ImportError:
- pass
- for TargetType, InputFeatureType in types:
- # X dataframe, y series
- X_df, y_ser = InputFeatureType(X), TargetType(y2)
- check_df = lambda x: isinstance(x, InputFeatureType)
- check_series = lambda x: isinstance(x, TargetType)
- clf = CheckingClassifier(check_X=check_df, check_y=check_series)
- cross_val_predict(clf, X_df, y_ser, cv=3)
- def test_cross_val_predict_unbalanced():
- X, y = make_classification(
- n_samples=100,
- n_features=2,
- n_redundant=0,
- n_informative=2,
- n_clusters_per_class=1,
- random_state=1,
- )
- # Change the first sample to a new class
- y[0] = 2
- clf = LogisticRegression(random_state=1, solver="liblinear")
- cv = StratifiedKFold(n_splits=2)
- train, test = list(cv.split(X, y))
- yhat_proba = cross_val_predict(clf, X, y, cv=cv, method="predict_proba")
- assert y[test[0]][0] == 2 # sanity check for further assertions
- assert np.all(yhat_proba[test[0]][:, 2] == 0)
- assert np.all(yhat_proba[test[0]][:, 0:1] > 0)
- assert np.all(yhat_proba[test[1]] > 0)
- assert_array_almost_equal(yhat_proba.sum(axis=1), np.ones(y.shape), decimal=12)
- def test_cross_val_predict_y_none():
- # ensure that cross_val_predict works when y is None
- mock_classifier = MockClassifier()
- rng = np.random.RandomState(42)
- X = rng.rand(100, 10)
- y_hat = cross_val_predict(mock_classifier, X, y=None, cv=5, method="predict")
- assert_allclose(X[:, 0], y_hat)
- y_hat_proba = cross_val_predict(
- mock_classifier, X, y=None, cv=5, method="predict_proba"
- )
- assert_allclose(X, y_hat_proba)
- def test_cross_val_score_sparse_fit_params():
- iris = load_iris()
- X, y = iris.data, iris.target
- clf = MockClassifier()
- fit_params = {"sparse_sample_weight": coo_matrix(np.eye(X.shape[0]))}
- a = cross_val_score(clf, X, y, fit_params=fit_params, cv=3)
- assert_array_equal(a, np.ones(3))
- def test_learning_curve():
- n_samples = 30
- n_splits = 3
- X, y = make_classification(
- n_samples=n_samples,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits))
- for shuffle_train in [False, True]:
- with warnings.catch_warnings(record=True) as w:
- (
- train_sizes,
- train_scores,
- test_scores,
- fit_times,
- score_times,
- ) = learning_curve(
- estimator,
- X,
- y,
- cv=KFold(n_splits=n_splits),
- train_sizes=np.linspace(0.1, 1.0, 10),
- shuffle=shuffle_train,
- return_times=True,
- )
- if len(w) > 0:
- raise RuntimeError("Unexpected warning: %r" % w[0].message)
- assert train_scores.shape == (10, 3)
- assert test_scores.shape == (10, 3)
- assert fit_times.shape == (10, 3)
- assert score_times.shape == (10, 3)
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
- # Cannot use assert_array_almost_equal for fit and score times because
- # the values are hardware-dependant
- assert fit_times.dtype == "float64"
- assert score_times.dtype == "float64"
- # Test a custom cv splitter that can iterate only once
- with warnings.catch_warnings(record=True) as w:
- train_sizes2, train_scores2, test_scores2 = learning_curve(
- estimator,
- X,
- y,
- cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples),
- train_sizes=np.linspace(0.1, 1.0, 10),
- shuffle=shuffle_train,
- )
- if len(w) > 0:
- raise RuntimeError("Unexpected warning: %r" % w[0].message)
- assert_array_almost_equal(train_scores2, train_scores)
- assert_array_almost_equal(test_scores2, test_scores)
- def test_learning_curve_unsupervised():
- X, _ = make_classification(
- n_samples=30,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockImprovingEstimator(20)
- train_sizes, train_scores, test_scores = learning_curve(
- estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10)
- )
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
- def test_learning_curve_verbose():
- X, y = make_classification(
- n_samples=30,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockImprovingEstimator(20)
- old_stdout = sys.stdout
- sys.stdout = StringIO()
- try:
- train_sizes, train_scores, test_scores = learning_curve(
- estimator, X, y, cv=3, verbose=1
- )
- finally:
- out = sys.stdout.getvalue()
- sys.stdout.close()
- sys.stdout = old_stdout
- assert "[learning_curve]" in out
- def test_learning_curve_incremental_learning_not_possible():
- X, y = make_classification(
- n_samples=2,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- # The mockup does not have partial_fit()
- estimator = MockImprovingEstimator(1)
- with pytest.raises(ValueError):
- learning_curve(estimator, X, y, exploit_incremental_learning=True)
- def test_learning_curve_incremental_learning():
- X, y = make_classification(
- n_samples=30,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockIncrementalImprovingEstimator(20)
- for shuffle_train in [False, True]:
- train_sizes, train_scores, test_scores = learning_curve(
- estimator,
- X,
- y,
- cv=3,
- exploit_incremental_learning=True,
- train_sizes=np.linspace(0.1, 1.0, 10),
- shuffle=shuffle_train,
- )
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
- def test_learning_curve_incremental_learning_unsupervised():
- X, _ = make_classification(
- n_samples=30,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockIncrementalImprovingEstimator(20)
- train_sizes, train_scores, test_scores = learning_curve(
- estimator,
- X,
- y=None,
- cv=3,
- exploit_incremental_learning=True,
- train_sizes=np.linspace(0.1, 1.0, 10),
- )
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
- def test_learning_curve_batch_and_incremental_learning_are_equal():
- X, y = make_classification(
- n_samples=30,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- train_sizes = np.linspace(0.2, 1.0, 5)
- estimator = PassiveAggressiveClassifier(max_iter=1, tol=None, shuffle=False)
- train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve(
- estimator,
- X,
- y,
- train_sizes=train_sizes,
- cv=3,
- exploit_incremental_learning=True,
- )
- train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve(
- estimator,
- X,
- y,
- cv=3,
- train_sizes=train_sizes,
- exploit_incremental_learning=False,
- )
- assert_array_equal(train_sizes_inc, train_sizes_batch)
- assert_array_almost_equal(
- train_scores_inc.mean(axis=1), train_scores_batch.mean(axis=1)
- )
- assert_array_almost_equal(
- test_scores_inc.mean(axis=1), test_scores_batch.mean(axis=1)
- )
- def test_learning_curve_n_sample_range_out_of_bounds():
- X, y = make_classification(
- n_samples=30,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockImprovingEstimator(20)
- with pytest.raises(ValueError):
- learning_curve(estimator, X, y, cv=3, train_sizes=[0, 1])
- with pytest.raises(ValueError):
- learning_curve(estimator, X, y, cv=3, train_sizes=[0.0, 1.0])
- with pytest.raises(ValueError):
- learning_curve(estimator, X, y, cv=3, train_sizes=[0.1, 1.1])
- with pytest.raises(ValueError):
- learning_curve(estimator, X, y, cv=3, train_sizes=[0, 20])
- with pytest.raises(ValueError):
- learning_curve(estimator, X, y, cv=3, train_sizes=[1, 21])
- def test_learning_curve_remove_duplicate_sample_sizes():
- X, y = make_classification(
- n_samples=3,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockImprovingEstimator(2)
- warning_message = (
- "Removed duplicate entries from 'train_sizes'. Number of ticks "
- "will be less than the size of 'train_sizes': 2 instead of 3."
- )
- with pytest.warns(RuntimeWarning, match=warning_message):
- train_sizes, _, _ = learning_curve(
- estimator, X, y, cv=3, train_sizes=np.linspace(0.33, 1.0, 3)
- )
- assert_array_equal(train_sizes, [1, 2])
- def test_learning_curve_with_boolean_indices():
- X, y = make_classification(
- n_samples=30,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockImprovingEstimator(20)
- cv = KFold(n_splits=3)
- train_sizes, train_scores, test_scores = learning_curve(
- estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10)
- )
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1), np.linspace(0.1, 1.0, 10))
- def test_learning_curve_with_shuffle():
- # Following test case was designed this way to verify the code
- # changes made in pull request: #7506.
- X = np.array(
- [
- [1, 2],
- [3, 4],
- [5, 6],
- [7, 8],
- [11, 12],
- [13, 14],
- [15, 16],
- [17, 18],
- [19, 20],
- [7, 8],
- [9, 10],
- [11, 12],
- [13, 14],
- [15, 16],
- [17, 18],
- ]
- )
- y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4])
- groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4])
- # Splits on these groups fail without shuffle as the first iteration
- # of the learning curve doesn't contain label 4 in the training set.
- estimator = PassiveAggressiveClassifier(max_iter=5, tol=None, shuffle=False)
- cv = GroupKFold(n_splits=2)
- train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve(
- estimator,
- X,
- y,
- cv=cv,
- n_jobs=1,
- train_sizes=np.linspace(0.3, 1.0, 3),
- groups=groups,
- shuffle=True,
- random_state=2,
- )
- assert_array_almost_equal(
- train_scores_batch.mean(axis=1), np.array([0.75, 0.3, 0.36111111])
- )
- assert_array_almost_equal(
- test_scores_batch.mean(axis=1), np.array([0.36111111, 0.25, 0.25])
- )
- with pytest.raises(ValueError):
- learning_curve(
- estimator,
- X,
- y,
- cv=cv,
- n_jobs=1,
- train_sizes=np.linspace(0.3, 1.0, 3),
- groups=groups,
- error_score="raise",
- )
- train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve(
- estimator,
- X,
- y,
- cv=cv,
- n_jobs=1,
- train_sizes=np.linspace(0.3, 1.0, 3),
- groups=groups,
- shuffle=True,
- random_state=2,
- exploit_incremental_learning=True,
- )
- assert_array_almost_equal(
- train_scores_inc.mean(axis=1), train_scores_batch.mean(axis=1)
- )
- assert_array_almost_equal(
- test_scores_inc.mean(axis=1), test_scores_batch.mean(axis=1)
- )
- def test_learning_curve_fit_params():
- X = np.arange(100).reshape(10, 10)
- y = np.array([0] * 5 + [1] * 5)
- clf = CheckingClassifier(expected_sample_weight=True)
- err_msg = r"Expected sample_weight to be passed"
- with pytest.raises(AssertionError, match=err_msg):
- learning_curve(clf, X, y, error_score="raise")
- err_msg = r"sample_weight.shape == \(1,\), expected \(2,\)!"
- with pytest.raises(ValueError, match=err_msg):
- learning_curve(
- clf, X, y, error_score="raise", fit_params={"sample_weight": np.ones(1)}
- )
- learning_curve(
- clf, X, y, error_score="raise", fit_params={"sample_weight": np.ones(10)}
- )
- def test_learning_curve_incremental_learning_fit_params():
- X, y = make_classification(
- n_samples=30,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- estimator = MockIncrementalImprovingEstimator(20, ["sample_weight"])
- err_msg = r"Expected fit parameter\(s\) \['sample_weight'\] not seen."
- with pytest.raises(AssertionError, match=err_msg):
- learning_curve(
- estimator,
- X,
- y,
- cv=3,
- exploit_incremental_learning=True,
- train_sizes=np.linspace(0.1, 1.0, 10),
- error_score="raise",
- )
- err_msg = "Fit parameter sample_weight has length 3; expected"
- with pytest.raises(AssertionError, match=err_msg):
- learning_curve(
- estimator,
- X,
- y,
- cv=3,
- exploit_incremental_learning=True,
- train_sizes=np.linspace(0.1, 1.0, 10),
- error_score="raise",
- fit_params={"sample_weight": np.ones(3)},
- )
- learning_curve(
- estimator,
- X,
- y,
- cv=3,
- exploit_incremental_learning=True,
- train_sizes=np.linspace(0.1, 1.0, 10),
- error_score="raise",
- fit_params={"sample_weight": np.ones(2)},
- )
- def test_validation_curve():
- X, y = make_classification(
- n_samples=2,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- param_range = np.linspace(0, 1, 10)
- with warnings.catch_warnings(record=True) as w:
- train_scores, test_scores = validation_curve(
- MockEstimatorWithParameter(),
- X,
- y,
- param_name="param",
- param_range=param_range,
- cv=2,
- )
- if len(w) > 0:
- raise RuntimeError("Unexpected warning: %r" % w[0].message)
- assert_array_almost_equal(train_scores.mean(axis=1), param_range)
- assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range)
- def test_validation_curve_clone_estimator():
- X, y = make_classification(
- n_samples=2,
- n_features=1,
- n_informative=1,
- n_redundant=0,
- n_classes=2,
- n_clusters_per_class=1,
- random_state=0,
- )
- param_range = np.linspace(1, 0, 10)
- _, _ = validation_curve(
- MockEstimatorWithSingleFitCallAllowed(),
- X,
- y,
- param_name="param",
- param_range=param_range,
- cv=2,
- )
- def test_validation_curve_cv_splits_consistency():
- n_samples = 100
- n_splits = 5
- X, y = make_classification(n_samples=100, random_state=0)
- scores1 = validation_curve(
- SVC(kernel="linear", random_state=0),
- X,
- y,
- param_name="C",
- param_range=[0.1, 0.1, 0.2, 0.2],
- cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples),
- )
- # The OneTimeSplitter is a non-re-entrant cv splitter. Unless, the
- # `split` is called for each parameter, the following should produce
- # identical results for param setting 1 and param setting 2 as both have
- # the same C value.
- assert_array_almost_equal(*np.vsplit(np.hstack(scores1)[(0, 2, 1, 3), :], 2))
- scores2 = validation_curve(
- SVC(kernel="linear", random_state=0),
- X,
- y,
- param_name="C",
- param_range=[0.1, 0.1, 0.2, 0.2],
- cv=KFold(n_splits=n_splits, shuffle=True),
- )
- # For scores2, compare the 1st and 2nd parameter's scores
- # (Since the C value for 1st two param setting is 0.1, they must be
- # consistent unless the train test folds differ between the param settings)
- assert_array_almost_equal(*np.vsplit(np.hstack(scores2)[(0, 2, 1, 3), :], 2))
- scores3 = validation_curve(
- SVC(kernel="linear", random_state=0),
- X,
- y,
- param_name="C",
- param_range=[0.1, 0.1, 0.2, 0.2],
- cv=KFold(n_splits=n_splits),
- )
- # OneTimeSplitter is basically unshuffled KFold(n_splits=5). Sanity check.
- assert_array_almost_equal(np.array(scores3), np.array(scores1))
- def test_validation_curve_fit_params():
- X = np.arange(100).reshape(10, 10)
- y = np.array([0] * 5 + [1] * 5)
- clf = CheckingClassifier(expected_sample_weight=True)
- err_msg = r"Expected sample_weight to be passed"
- with pytest.raises(AssertionError, match=err_msg):
- validation_curve(
- clf,
- X,
- y,
- param_name="foo_param",
- param_range=[1, 2, 3],
- error_score="raise",
- )
- err_msg = r"sample_weight.shape == \(1,\), expected \(8,\)!"
- with pytest.raises(ValueError, match=err_msg):
- validation_curve(
- clf,
- X,
- y,
- param_name="foo_param",
- param_range=[1, 2, 3],
- error_score="raise",
- fit_params={"sample_weight": np.ones(1)},
- )
- validation_curve(
- clf,
- X,
- y,
- param_name="foo_param",
- param_range=[1, 2, 3],
- error_score="raise",
- fit_params={"sample_weight": np.ones(10)},
- )
- def test_check_is_permutation():
- rng = np.random.RandomState(0)
- p = np.arange(100)
- rng.shuffle(p)
- assert _check_is_permutation(p, 100)
- assert not _check_is_permutation(np.delete(p, 23), 100)
- p[0] = 23
- assert not _check_is_permutation(p, 100)
- # Check if the additional duplicate indices are caught
- assert not _check_is_permutation(np.hstack((p, 0)), 100)
- def test_cross_val_predict_sparse_prediction():
- # check that cross_val_predict gives same result for sparse and dense input
- X, y = make_multilabel_classification(
- n_classes=2,
- n_labels=1,
- allow_unlabeled=False,
- return_indicator=True,
- random_state=1,
- )
- X_sparse = csr_matrix(X)
- y_sparse = csr_matrix(y)
- classif = OneVsRestClassifier(SVC(kernel="linear"))
- preds = cross_val_predict(classif, X, y, cv=10)
- preds_sparse = cross_val_predict(classif, X_sparse, y_sparse, cv=10)
- preds_sparse = preds_sparse.toarray()
- assert_array_almost_equal(preds_sparse, preds)
- def check_cross_val_predict_binary(est, X, y, method):
- """Helper for tests of cross_val_predict with binary classification"""
- cv = KFold(n_splits=3, shuffle=False)
- # Generate expected outputs
- if y.ndim == 1:
- exp_shape = (len(X),) if method == "decision_function" else (len(X), 2)
- else:
- exp_shape = y.shape
- expected_predictions = np.zeros(exp_shape)
- for train, test in cv.split(X, y):
- est = clone(est).fit(X[train], y[train])
- expected_predictions[test] = getattr(est, method)(X[test])
- # Check actual outputs for several representations of y
- for tg in [y, y + 1, y - 2, y.astype("str")]:
- assert_allclose(
- cross_val_predict(est, X, tg, method=method, cv=cv), expected_predictions
- )
- def check_cross_val_predict_multiclass(est, X, y, method):
- """Helper for tests of cross_val_predict with multiclass classification"""
- cv = KFold(n_splits=3, shuffle=False)
- # Generate expected outputs
- float_min = np.finfo(np.float64).min
- default_values = {
- "decision_function": float_min,
- "predict_log_proba": float_min,
- "predict_proba": 0,
- }
- expected_predictions = np.full(
- (len(X), len(set(y))), default_values[method], dtype=np.float64
- )
- _, y_enc = np.unique(y, return_inverse=True)
- for train, test in cv.split(X, y_enc):
- est = clone(est).fit(X[train], y_enc[train])
- fold_preds = getattr(est, method)(X[test])
- i_cols_fit = np.unique(y_enc[train])
- expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds
- # Check actual outputs for several representations of y
- for tg in [y, y + 1, y - 2, y.astype("str")]:
- assert_allclose(
- cross_val_predict(est, X, tg, method=method, cv=cv), expected_predictions
- )
- def check_cross_val_predict_multilabel(est, X, y, method):
- """Check the output of cross_val_predict for 2D targets using
- Estimators which provide a predictions as a list with one
- element per class.
- """
- cv = KFold(n_splits=3, shuffle=False)
- # Create empty arrays of the correct size to hold outputs
- float_min = np.finfo(np.float64).min
- default_values = {
- "decision_function": float_min,
- "predict_log_proba": float_min,
- "predict_proba": 0,
- }
- n_targets = y.shape[1]
- expected_preds = []
- for i_col in range(n_targets):
- n_classes_in_label = len(set(y[:, i_col]))
- if n_classes_in_label == 2 and method == "decision_function":
- exp_shape = (len(X),)
- else:
- exp_shape = (len(X), n_classes_in_label)
- expected_preds.append(
- np.full(exp_shape, default_values[method], dtype=np.float64)
- )
- # Generate expected outputs
- y_enc_cols = [
- np.unique(y[:, i], return_inverse=True)[1][:, np.newaxis]
- for i in range(y.shape[1])
- ]
- y_enc = np.concatenate(y_enc_cols, axis=1)
- for train, test in cv.split(X, y_enc):
- est = clone(est).fit(X[train], y_enc[train])
- fold_preds = getattr(est, method)(X[test])
- for i_col in range(n_targets):
- fold_cols = np.unique(y_enc[train][:, i_col])
- if expected_preds[i_col].ndim == 1:
- # Decision function with <=2 classes
- expected_preds[i_col][test] = fold_preds[i_col]
- else:
- idx = np.ix_(test, fold_cols)
- expected_preds[i_col][idx] = fold_preds[i_col]
- # Check actual outputs for several representations of y
- for tg in [y, y + 1, y - 2, y.astype("str")]:
- cv_predict_output = cross_val_predict(est, X, tg, method=method, cv=cv)
- assert len(cv_predict_output) == len(expected_preds)
- for i in range(len(cv_predict_output)):
- assert_allclose(cv_predict_output[i], expected_preds[i])
- def check_cross_val_predict_with_method_binary(est):
- # This test includes the decision_function with two classes.
- # This is a special case: it has only one column of output.
- X, y = make_classification(n_classes=2, random_state=0)
- for method in ["decision_function", "predict_proba", "predict_log_proba"]:
- check_cross_val_predict_binary(est, X, y, method)
- def check_cross_val_predict_with_method_multiclass(est):
- iris = load_iris()
- X, y = iris.data, iris.target
- X, y = shuffle(X, y, random_state=0)
- for method in ["decision_function", "predict_proba", "predict_log_proba"]:
- check_cross_val_predict_multiclass(est, X, y, method)
- def test_cross_val_predict_with_method():
- check_cross_val_predict_with_method_binary(LogisticRegression(solver="liblinear"))
- check_cross_val_predict_with_method_multiclass(
- LogisticRegression(solver="liblinear")
- )
- def test_cross_val_predict_method_checking():
- # Regression test for issue #9639. Tests that cross_val_predict does not
- # check estimator methods (e.g. predict_proba) before fitting
- iris = load_iris()
- X, y = iris.data, iris.target
- X, y = shuffle(X, y, random_state=0)
- for method in ["decision_function", "predict_proba", "predict_log_proba"]:
- est = SGDClassifier(loss="log_loss", random_state=2)
- check_cross_val_predict_multiclass(est, X, y, method)
- def test_gridsearchcv_cross_val_predict_with_method():
- iris = load_iris()
- X, y = iris.data, iris.target
- X, y = shuffle(X, y, random_state=0)
- est = GridSearchCV(
- LogisticRegression(random_state=42, solver="liblinear"), {"C": [0.1, 1]}, cv=2
- )
- for method in ["decision_function", "predict_proba", "predict_log_proba"]:
- check_cross_val_predict_multiclass(est, X, y, method)
- def test_cross_val_predict_with_method_multilabel_ovr():
- # OVR does multilabel predictions, but only arrays of
- # binary indicator columns. The output of predict_proba
- # is a 2D array with shape (n_samples, n_classes).
- n_samp = 100
- n_classes = 4
- X, y = make_multilabel_classification(
- n_samples=n_samp, n_labels=3, n_classes=n_classes, n_features=5, random_state=42
- )
- est = OneVsRestClassifier(LogisticRegression(solver="liblinear", random_state=0))
- for method in ["predict_proba", "decision_function"]:
- check_cross_val_predict_binary(est, X, y, method=method)
- class RFWithDecisionFunction(RandomForestClassifier):
- # None of the current multioutput-multiclass estimators have
- # decision function methods. Create a mock decision function
- # to test the cross_val_predict function's handling of this case.
- def decision_function(self, X):
- probs = self.predict_proba(X)
- msg = "This helper should only be used on multioutput-multiclass tasks"
- assert isinstance(probs, list), msg
- probs = [p[:, -1] if p.shape[1] == 2 else p for p in probs]
- return probs
- def test_cross_val_predict_with_method_multilabel_rf():
- # The RandomForest allows multiple classes in each label.
- # Output of predict_proba is a list of outputs of predict_proba
- # for each individual label.
- n_classes = 4
- X, y = make_multilabel_classification(
- n_samples=100, n_labels=3, n_classes=n_classes, n_features=5, random_state=42
- )
- y[:, 0] += y[:, 1] # Put three classes in the first column
- for method in ["predict_proba", "predict_log_proba", "decision_function"]:
- est = RFWithDecisionFunction(n_estimators=5, random_state=0)
- with warnings.catch_warnings():
- # Suppress "RuntimeWarning: divide by zero encountered in log"
- warnings.simplefilter("ignore")
- check_cross_val_predict_multilabel(est, X, y, method=method)
- def test_cross_val_predict_with_method_rare_class():
- # Test a multiclass problem where one class will be missing from
- # one of the CV training sets.
- rng = np.random.RandomState(0)
- X = rng.normal(0, 1, size=(14, 10))
- y = np.array([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 3])
- est = LogisticRegression(solver="liblinear")
- for method in ["predict_proba", "predict_log_proba", "decision_function"]:
- with warnings.catch_warnings():
- # Suppress warning about too few examples of a class
- warnings.simplefilter("ignore")
- check_cross_val_predict_multiclass(est, X, y, method)
- def test_cross_val_predict_with_method_multilabel_rf_rare_class():
- # The RandomForest allows anything for the contents of the labels.
- # Output of predict_proba is a list of outputs of predict_proba
- # for each individual label.
- # In this test, the first label has a class with a single example.
- # We'll have one CV fold where the training data don't include it.
- rng = np.random.RandomState(0)
- X = rng.normal(0, 1, size=(5, 10))
- y = np.array([[0, 0], [1, 1], [2, 1], [0, 1], [1, 0]])
- for method in ["predict_proba", "predict_log_proba"]:
- est = RFWithDecisionFunction(n_estimators=5, random_state=0)
- with warnings.catch_warnings():
- # Suppress "RuntimeWarning: divide by zero encountered in log"
- warnings.simplefilter("ignore")
- check_cross_val_predict_multilabel(est, X, y, method=method)
- def get_expected_predictions(X, y, cv, classes, est, method):
- expected_predictions = np.zeros([len(y), classes])
- func = getattr(est, method)
- for train, test in cv.split(X, y):
- est.fit(X[train], y[train])
- expected_predictions_ = func(X[test])
- # To avoid 2 dimensional indexing
- if method == "predict_proba":
- exp_pred_test = np.zeros((len(test), classes))
- else:
- exp_pred_test = np.full(
- (len(test), classes), np.finfo(expected_predictions.dtype).min
- )
- exp_pred_test[:, est.classes_] = expected_predictions_
- expected_predictions[test] = exp_pred_test
- return expected_predictions
- def test_cross_val_predict_class_subset():
- X = np.arange(200).reshape(100, 2)
- y = np.array([x // 10 for x in range(100)])
- classes = 10
- kfold3 = KFold(n_splits=3)
- kfold4 = KFold(n_splits=4)
- le = LabelEncoder()
- methods = ["decision_function", "predict_proba", "predict_log_proba"]
- for method in methods:
- est = LogisticRegression(solver="liblinear")
- # Test with n_splits=3
- predictions = cross_val_predict(est, X, y, method=method, cv=kfold3)
- # Runs a naive loop (should be same as cross_val_predict):
- expected_predictions = get_expected_predictions(
- X, y, kfold3, classes, est, method
- )
- assert_array_almost_equal(expected_predictions, predictions)
- # Test with n_splits=4
- predictions = cross_val_predict(est, X, y, method=method, cv=kfold4)
- expected_predictions = get_expected_predictions(
- X, y, kfold4, classes, est, method
- )
- assert_array_almost_equal(expected_predictions, predictions)
- # Testing unordered labels
- y = shuffle(np.repeat(range(10), 10), random_state=0)
- predictions = cross_val_predict(est, X, y, method=method, cv=kfold3)
- y = le.fit_transform(y)
- expected_predictions = get_expected_predictions(
- X, y, kfold3, classes, est, method
- )
- assert_array_almost_equal(expected_predictions, predictions)
- def test_score_memmap():
- # Ensure a scalar score of memmap type is accepted
- iris = load_iris()
- X, y = iris.data, iris.target
- clf = MockClassifier()
- tf = tempfile.NamedTemporaryFile(mode="wb", delete=False)
- tf.write(b"Hello world!!!!!")
- tf.close()
- scores = np.memmap(tf.name, dtype=np.float64)
- score = np.memmap(tf.name, shape=(), mode="r", dtype=np.float64)
- try:
- cross_val_score(clf, X, y, scoring=lambda est, X, y: score)
- with pytest.raises(ValueError):
- cross_val_score(clf, X, y, scoring=lambda est, X, y: scores)
- finally:
- # Best effort to release the mmap file handles before deleting the
- # backing file under Windows
- scores, score = None, None
- for _ in range(3):
- try:
- os.unlink(tf.name)
- break
- except OSError:
- sleep(1.0)
- @pytest.mark.filterwarnings("ignore: Using or importing the ABCs from")
- def test_permutation_test_score_pandas():
- # check permutation_test_score doesn't destroy pandas dataframe
- types = [(MockDataFrame, MockDataFrame)]
- try:
- from pandas import DataFrame, Series
- types.append((Series, DataFrame))
- except ImportError:
- pass
- for TargetType, InputFeatureType in types:
- # X dataframe, y series
- iris = load_iris()
- X, y = iris.data, iris.target
- X_df, y_ser = InputFeatureType(X), TargetType(y)
- check_df = lambda x: isinstance(x, InputFeatureType)
- check_series = lambda x: isinstance(x, TargetType)
- clf = CheckingClassifier(check_X=check_df, check_y=check_series)
- permutation_test_score(clf, X_df, y_ser)
- def test_fit_and_score_failing():
- # Create a failing classifier to deliberately fail
- failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
- # dummy X data
- X = np.arange(1, 10)
- fit_and_score_args = [failing_clf, X, None, dict(), None, None, 0, None, None]
- # passing error score to trigger the warning message
- fit_and_score_kwargs = {"error_score": "raise"}
- # check if exception was raised, with default error_score='raise'
- with pytest.raises(ValueError, match="Failing classifier failed as required"):
- _fit_and_score(*fit_and_score_args, **fit_and_score_kwargs)
- # check that functions upstream pass error_score param to _fit_and_score
- error_message_cross_validate = (
- "The 'error_score' parameter of cross_validate must be .*. Got .* instead."
- )
- with pytest.raises(ValueError, match=error_message_cross_validate):
- cross_val_score(failing_clf, X, cv=3, error_score="unvalid-string")
- assert failing_clf.score() == 0.0 # FailingClassifier coverage
- def test_fit_and_score_working():
- X, y = make_classification(n_samples=30, random_state=0)
- clf = SVC(kernel="linear", random_state=0)
- train, test = next(ShuffleSplit().split(X))
- # Test return_parameters option
- fit_and_score_args = [clf, X, y, dict(), train, test, 0]
- fit_and_score_kwargs = {
- "parameters": {"max_iter": 100, "tol": 0.1},
- "fit_params": None,
- "return_parameters": True,
- }
- result = _fit_and_score(*fit_and_score_args, **fit_and_score_kwargs)
- assert result["parameters"] == fit_and_score_kwargs["parameters"]
- class DataDependentFailingClassifier(BaseEstimator):
- def __init__(self, max_x_value=None):
- self.max_x_value = max_x_value
- def fit(self, X, y=None):
- num_values_too_high = (X > self.max_x_value).sum()
- if num_values_too_high:
- raise ValueError(
- f"Classifier fit failed with {num_values_too_high} values too high"
- )
- def score(self, X=None, Y=None):
- return 0.0
- @pytest.mark.parametrize("error_score", [np.nan, 0])
- def test_cross_validate_some_failing_fits_warning(error_score):
- # Create a failing classifier to deliberately fail
- failing_clf = DataDependentFailingClassifier(max_x_value=8)
- # dummy X data
- X = np.arange(1, 10)
- y = np.ones(9)
- # passing error score to trigger the warning message
- cross_validate_args = [failing_clf, X, y]
- cross_validate_kwargs = {"cv": 3, "error_score": error_score}
- # check if the warning message type is as expected
- individual_fit_error_message = (
- "ValueError: Classifier fit failed with 1 values too high"
- )
- warning_message = re.compile(
- (
- "2 fits failed.+total of 3.+The score on these"
- " train-test partitions for these parameters will be set to"
- f" {cross_validate_kwargs['error_score']}.+{individual_fit_error_message}"
- ),
- flags=re.DOTALL,
- )
- with pytest.warns(FitFailedWarning, match=warning_message):
- cross_validate(*cross_validate_args, **cross_validate_kwargs)
- @pytest.mark.parametrize("error_score", [np.nan, 0])
- def test_cross_validate_all_failing_fits_error(error_score):
- # Create a failing classifier to deliberately fail
- failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
- # dummy X data
- X = np.arange(1, 10)
- y = np.ones(9)
- cross_validate_args = [failing_clf, X, y]
- cross_validate_kwargs = {"cv": 7, "error_score": error_score}
- individual_fit_error_message = "ValueError: Failing classifier failed as required"
- error_message = re.compile(
- (
- "All the 7 fits failed.+your model is misconfigured.+"
- f"{individual_fit_error_message}"
- ),
- flags=re.DOTALL,
- )
- with pytest.raises(ValueError, match=error_message):
- cross_validate(*cross_validate_args, **cross_validate_kwargs)
- def _failing_scorer(estimator, X, y, error_msg):
- raise ValueError(error_msg)
- @pytest.mark.filterwarnings("ignore:lbfgs failed to converge")
- @pytest.mark.parametrize("error_score", [np.nan, 0, "raise"])
- def test_cross_val_score_failing_scorer(error_score):
- # check that an estimator can fail during scoring in `cross_val_score` and
- # that we can optionally replaced it with `error_score`
- X, y = load_iris(return_X_y=True)
- clf = LogisticRegression(max_iter=5).fit(X, y)
- error_msg = "This scorer is supposed to fail!!!"
- failing_scorer = partial(_failing_scorer, error_msg=error_msg)
- if error_score == "raise":
- with pytest.raises(ValueError, match=error_msg):
- cross_val_score(
- clf, X, y, cv=3, scoring=failing_scorer, error_score=error_score
- )
- else:
- warning_msg = (
- "Scoring failed. The score on this train-test partition for "
- f"these parameters will be set to {error_score}"
- )
- with pytest.warns(UserWarning, match=warning_msg):
- scores = cross_val_score(
- clf, X, y, cv=3, scoring=failing_scorer, error_score=error_score
- )
- assert_allclose(scores, error_score)
- @pytest.mark.filterwarnings("ignore:lbfgs failed to converge")
- @pytest.mark.parametrize("error_score", [np.nan, 0, "raise"])
- @pytest.mark.parametrize("return_train_score", [True, False])
- @pytest.mark.parametrize("with_multimetric", [False, True])
- def test_cross_validate_failing_scorer(
- error_score, return_train_score, with_multimetric
- ):
- # Check that an estimator can fail during scoring in `cross_validate` and
- # that we can optionally replace it with `error_score`. In the multimetric
- # case also check the result of a non-failing scorer where the other scorers
- # are failing.
- X, y = load_iris(return_X_y=True)
- clf = LogisticRegression(max_iter=5).fit(X, y)
- error_msg = "This scorer is supposed to fail!!!"
- failing_scorer = partial(_failing_scorer, error_msg=error_msg)
- if with_multimetric:
- non_failing_scorer = make_scorer(mean_squared_error)
- scoring = {
- "score_1": failing_scorer,
- "score_2": non_failing_scorer,
- "score_3": failing_scorer,
- }
- else:
- scoring = failing_scorer
- if error_score == "raise":
- with pytest.raises(ValueError, match=error_msg):
- cross_validate(
- clf,
- X,
- y,
- cv=3,
- scoring=scoring,
- return_train_score=return_train_score,
- error_score=error_score,
- )
- else:
- warning_msg = (
- "Scoring failed. The score on this train-test partition for "
- f"these parameters will be set to {error_score}"
- )
- with pytest.warns(UserWarning, match=warning_msg):
- results = cross_validate(
- clf,
- X,
- y,
- cv=3,
- scoring=scoring,
- return_train_score=return_train_score,
- error_score=error_score,
- )
- for key in results:
- if "_score" in key:
- if "_score_2" in key:
- # check the test (and optionally train) score for the
- # scorer that should be non-failing
- for i in results[key]:
- assert isinstance(i, float)
- else:
- # check the test (and optionally train) score for all
- # scorers that should be assigned to `error_score`.
- assert_allclose(results[key], error_score)
- def three_params_scorer(i, j, k):
- return 3.4213
- @pytest.mark.parametrize(
- "train_score, scorer, verbose, split_prg, cdt_prg, expected",
- [
- (
- False,
- three_params_scorer,
- 2,
- (1, 3),
- (0, 1),
- r"\[CV\] END ...................................................."
- r" total time= 0.\ds",
- ),
- (
- True,
- {"sc1": three_params_scorer, "sc2": three_params_scorer},
- 3,
- (1, 3),
- (0, 1),
- r"\[CV 2/3\] END sc1: \(train=3.421, test=3.421\) sc2: "
- r"\(train=3.421, test=3.421\) total time= 0.\ds",
- ),
- (
- False,
- {"sc1": three_params_scorer, "sc2": three_params_scorer},
- 10,
- (1, 3),
- (0, 1),
- r"\[CV 2/3; 1/1\] END ....... sc1: \(test=3.421\) sc2: \(test=3.421\)"
- r" total time= 0.\ds",
- ),
- ],
- )
- def test_fit_and_score_verbosity(
- capsys, train_score, scorer, verbose, split_prg, cdt_prg, expected
- ):
- X, y = make_classification(n_samples=30, random_state=0)
- clf = SVC(kernel="linear", random_state=0)
- train, test = next(ShuffleSplit().split(X))
- # test print without train score
- fit_and_score_args = [clf, X, y, scorer, train, test, verbose, None, None]
- fit_and_score_kwargs = {
- "return_train_score": train_score,
- "split_progress": split_prg,
- "candidate_progress": cdt_prg,
- }
- _fit_and_score(*fit_and_score_args, **fit_and_score_kwargs)
- out, _ = capsys.readouterr()
- outlines = out.split("\n")
- if len(outlines) > 2:
- assert re.match(expected, outlines[1])
- else:
- assert re.match(expected, outlines[0])
- def test_score():
- error_message = "scoring must return a number, got None"
- def two_params_scorer(estimator, X_test):
- return None
- fit_and_score_args = [None, None, None, two_params_scorer]
- with pytest.raises(ValueError, match=error_message):
- _score(*fit_and_score_args, error_score=np.nan)
- def test_callable_multimetric_confusion_matrix_cross_validate():
- def custom_scorer(clf, X, y):
- y_pred = clf.predict(X)
- cm = confusion_matrix(y, y_pred)
- return {"tn": cm[0, 0], "fp": cm[0, 1], "fn": cm[1, 0], "tp": cm[1, 1]}
- X, y = make_classification(n_samples=40, n_features=4, random_state=42)
- est = LinearSVC(dual="auto", random_state=42)
- est.fit(X, y)
- cv_results = cross_validate(est, X, y, cv=5, scoring=custom_scorer)
- score_names = ["tn", "fp", "fn", "tp"]
- for name in score_names:
- assert "test_{}".format(name) in cv_results
- def test_learning_curve_partial_fit_regressors():
- """Check that regressors with partial_fit is supported.
- Non-regression test for #22981.
- """
- X, y = make_regression(random_state=42)
- # Does not error
- learning_curve(MLPRegressor(), X, y, exploit_incremental_learning=True, cv=2)
- def test_cross_validate_return_indices(global_random_seed):
- """Check the behaviour of `return_indices` in `cross_validate`."""
- X, y = load_iris(return_X_y=True)
- X = scale(X) # scale features for better convergence
- estimator = LogisticRegression()
- cv = KFold(n_splits=3, shuffle=True, random_state=global_random_seed)
- cv_results = cross_validate(estimator, X, y, cv=cv, n_jobs=2, return_indices=False)
- assert "indices" not in cv_results
- cv_results = cross_validate(estimator, X, y, cv=cv, n_jobs=2, return_indices=True)
- assert "indices" in cv_results
- train_indices = cv_results["indices"]["train"]
- test_indices = cv_results["indices"]["test"]
- assert len(train_indices) == cv.n_splits
- assert len(test_indices) == cv.n_splits
- assert_array_equal([indices.size for indices in train_indices], 100)
- assert_array_equal([indices.size for indices in test_indices], 50)
- for split_idx, (expected_train_idx, expected_test_idx) in enumerate(cv.split(X, y)):
- assert_array_equal(train_indices[split_idx], expected_train_idx)
- assert_array_equal(test_indices[split_idx], expected_test_idx)
|