test_logistic.py 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091
  1. import itertools
  2. import os
  3. import warnings
  4. from functools import partial
  5. import numpy as np
  6. import pytest
  7. from numpy.testing import (
  8. assert_allclose,
  9. assert_almost_equal,
  10. assert_array_almost_equal,
  11. assert_array_equal,
  12. )
  13. from scipy import sparse
  14. from sklearn.base import clone
  15. from sklearn.datasets import load_iris, make_classification
  16. from sklearn.exceptions import ConvergenceWarning
  17. from sklearn.linear_model import SGDClassifier
  18. from sklearn.linear_model._logistic import (
  19. LogisticRegression as LogisticRegressionDefault,
  20. )
  21. from sklearn.linear_model._logistic import (
  22. LogisticRegressionCV as LogisticRegressionCVDefault,
  23. )
  24. from sklearn.linear_model._logistic import (
  25. _log_reg_scoring_path,
  26. _logistic_regression_path,
  27. )
  28. from sklearn.metrics import get_scorer, log_loss
  29. from sklearn.model_selection import (
  30. GridSearchCV,
  31. StratifiedKFold,
  32. cross_val_score,
  33. train_test_split,
  34. )
  35. from sklearn.preprocessing import LabelEncoder, StandardScaler, scale
  36. from sklearn.svm import l1_min_c
  37. from sklearn.utils import _IS_32BIT, compute_class_weight, shuffle
  38. from sklearn.utils._testing import ignore_warnings, skip_if_no_parallel
  39. pytestmark = pytest.mark.filterwarnings(
  40. "error::sklearn.exceptions.ConvergenceWarning:sklearn.*"
  41. )
  42. # Fixing random_state helps prevent ConvergenceWarnings
  43. LogisticRegression = partial(LogisticRegressionDefault, random_state=0)
  44. LogisticRegressionCV = partial(LogisticRegressionCVDefault, random_state=0)
  45. SOLVERS = ("lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag", "saga")
  46. X = [[-1, 0], [0, 1], [1, 1]]
  47. X_sp = sparse.csr_matrix(X)
  48. Y1 = [0, 1, 1]
  49. Y2 = [2, 1, 0]
  50. iris = load_iris()
  51. def check_predictions(clf, X, y):
  52. """Check that the model is able to fit the classification data"""
  53. n_samples = len(y)
  54. classes = np.unique(y)
  55. n_classes = classes.shape[0]
  56. predicted = clf.fit(X, y).predict(X)
  57. assert_array_equal(clf.classes_, classes)
  58. assert predicted.shape == (n_samples,)
  59. assert_array_equal(predicted, y)
  60. probabilities = clf.predict_proba(X)
  61. assert probabilities.shape == (n_samples, n_classes)
  62. assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples))
  63. assert_array_equal(probabilities.argmax(axis=1), y)
  64. def test_predict_2_classes():
  65. # Simple sanity check on a 2 classes dataset
  66. # Make sure it predicts the correct result on simple datasets.
  67. check_predictions(LogisticRegression(random_state=0), X, Y1)
  68. check_predictions(LogisticRegression(random_state=0), X_sp, Y1)
  69. check_predictions(LogisticRegression(C=100, random_state=0), X, Y1)
  70. check_predictions(LogisticRegression(C=100, random_state=0), X_sp, Y1)
  71. check_predictions(LogisticRegression(fit_intercept=False, random_state=0), X, Y1)
  72. check_predictions(LogisticRegression(fit_intercept=False, random_state=0), X_sp, Y1)
  73. def test_logistic_cv_mock_scorer():
  74. class MockScorer:
  75. def __init__(self):
  76. self.calls = 0
  77. self.scores = [0.1, 0.4, 0.8, 0.5]
  78. def __call__(self, model, X, y, sample_weight=None):
  79. score = self.scores[self.calls % len(self.scores)]
  80. self.calls += 1
  81. return score
  82. mock_scorer = MockScorer()
  83. Cs = [1, 2, 3, 4]
  84. cv = 2
  85. lr = LogisticRegressionCV(Cs=Cs, scoring=mock_scorer, cv=cv)
  86. X, y = make_classification(random_state=0)
  87. lr.fit(X, y)
  88. # Cs[2] has the highest score (0.8) from MockScorer
  89. assert lr.C_[0] == Cs[2]
  90. # scorer called 8 times (cv*len(Cs))
  91. assert mock_scorer.calls == cv * len(Cs)
  92. # reset mock_scorer
  93. mock_scorer.calls = 0
  94. custom_score = lr.score(X, lr.predict(X))
  95. assert custom_score == mock_scorer.scores[0]
  96. assert mock_scorer.calls == 1
  97. @skip_if_no_parallel
  98. def test_lr_liblinear_warning():
  99. n_samples, n_features = iris.data.shape
  100. target = iris.target_names[iris.target]
  101. lr = LogisticRegression(solver="liblinear", n_jobs=2)
  102. warning_message = (
  103. "'n_jobs' > 1 does not have any effect when"
  104. " 'solver' is set to 'liblinear'. Got 'n_jobs'"
  105. " = 2."
  106. )
  107. with pytest.warns(UserWarning, match=warning_message):
  108. lr.fit(iris.data, target)
  109. def test_predict_3_classes():
  110. check_predictions(LogisticRegression(C=10), X, Y2)
  111. check_predictions(LogisticRegression(C=10), X_sp, Y2)
  112. @pytest.mark.parametrize(
  113. "clf",
  114. [
  115. LogisticRegression(C=len(iris.data), solver="liblinear", multi_class="ovr"),
  116. LogisticRegression(C=len(iris.data), solver="lbfgs", multi_class="multinomial"),
  117. LogisticRegression(
  118. C=len(iris.data), solver="newton-cg", multi_class="multinomial"
  119. ),
  120. LogisticRegression(
  121. C=len(iris.data), solver="sag", tol=1e-2, multi_class="ovr", random_state=42
  122. ),
  123. LogisticRegression(
  124. C=len(iris.data),
  125. solver="saga",
  126. tol=1e-2,
  127. multi_class="ovr",
  128. random_state=42,
  129. ),
  130. LogisticRegression(
  131. C=len(iris.data), solver="newton-cholesky", multi_class="ovr"
  132. ),
  133. ],
  134. )
  135. def test_predict_iris(clf):
  136. """Test logistic regression with the iris dataset.
  137. Test that both multinomial and OvR solvers handle multiclass data correctly and
  138. give good accuracy score (>0.95) for the training data.
  139. """
  140. n_samples, n_features = iris.data.shape
  141. target = iris.target_names[iris.target]
  142. if clf.solver == "lbfgs":
  143. # lbfgs has convergence issues on the iris data with its default max_iter=100
  144. with warnings.catch_warnings():
  145. warnings.simplefilter("ignore", ConvergenceWarning)
  146. clf.fit(iris.data, target)
  147. else:
  148. clf.fit(iris.data, target)
  149. assert_array_equal(np.unique(target), clf.classes_)
  150. pred = clf.predict(iris.data)
  151. assert np.mean(pred == target) > 0.95
  152. probabilities = clf.predict_proba(iris.data)
  153. assert_allclose(probabilities.sum(axis=1), np.ones(n_samples))
  154. pred = iris.target_names[probabilities.argmax(axis=1)]
  155. assert np.mean(pred == target) > 0.95
  156. @pytest.mark.parametrize("LR", [LogisticRegression, LogisticRegressionCV])
  157. def test_check_solver_option(LR):
  158. X, y = iris.data, iris.target
  159. # only 'liblinear' and 'newton-cholesky' solver
  160. for solver in ["liblinear", "newton-cholesky"]:
  161. msg = f"Solver {solver} does not support a multinomial backend."
  162. lr = LR(solver=solver, multi_class="multinomial")
  163. with pytest.raises(ValueError, match=msg):
  164. lr.fit(X, y)
  165. # all solvers except 'liblinear' and 'saga'
  166. for solver in ["lbfgs", "newton-cg", "newton-cholesky", "sag"]:
  167. msg = "Solver %s supports only 'l2' or 'none' penalties," % solver
  168. lr = LR(solver=solver, penalty="l1", multi_class="ovr")
  169. with pytest.raises(ValueError, match=msg):
  170. lr.fit(X, y)
  171. for solver in ["lbfgs", "newton-cg", "newton-cholesky", "sag", "saga"]:
  172. msg = "Solver %s supports only dual=False, got dual=True" % solver
  173. lr = LR(solver=solver, dual=True, multi_class="ovr")
  174. with pytest.raises(ValueError, match=msg):
  175. lr.fit(X, y)
  176. # only saga supports elasticnet. We only test for liblinear because the
  177. # error is raised before for the other solvers (solver %s supports only l2
  178. # penalties)
  179. for solver in ["liblinear"]:
  180. msg = "Only 'saga' solver supports elasticnet penalty, got solver={}.".format(
  181. solver
  182. )
  183. lr = LR(solver=solver, penalty="elasticnet")
  184. with pytest.raises(ValueError, match=msg):
  185. lr.fit(X, y)
  186. # liblinear does not support penalty='none'
  187. # (LogisticRegressionCV does not supports penalty='none' at all)
  188. if LR is LogisticRegression:
  189. msg = "penalty='none' is not supported for the liblinear solver"
  190. lr = LR(penalty="none", solver="liblinear")
  191. with pytest.raises(ValueError, match=msg):
  192. lr.fit(X, y)
  193. @pytest.mark.parametrize("LR", [LogisticRegression, LogisticRegressionCV])
  194. def test_elasticnet_l1_ratio_err_helpful(LR):
  195. # Check that an informative error message is raised when penalty="elasticnet"
  196. # but l1_ratio is not specified.
  197. model = LR(penalty="elasticnet", solver="saga")
  198. with pytest.raises(ValueError, match=r".*l1_ratio.*"):
  199. model.fit(np.array([[1, 2], [3, 4]]), np.array([0, 1]))
  200. @pytest.mark.parametrize("solver", ["lbfgs", "newton-cg", "sag", "saga"])
  201. def test_multinomial_binary(solver):
  202. # Test multinomial LR on a binary problem.
  203. target = (iris.target > 0).astype(np.intp)
  204. target = np.array(["setosa", "not-setosa"])[target]
  205. clf = LogisticRegression(
  206. solver=solver, multi_class="multinomial", random_state=42, max_iter=2000
  207. )
  208. clf.fit(iris.data, target)
  209. assert clf.coef_.shape == (1, iris.data.shape[1])
  210. assert clf.intercept_.shape == (1,)
  211. assert_array_equal(clf.predict(iris.data), target)
  212. mlr = LogisticRegression(
  213. solver=solver, multi_class="multinomial", random_state=42, fit_intercept=False
  214. )
  215. mlr.fit(iris.data, target)
  216. pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data), axis=1)]
  217. assert np.mean(pred == target) > 0.9
  218. def test_multinomial_binary_probabilities(global_random_seed):
  219. # Test multinomial LR gives expected probabilities based on the
  220. # decision function, for a binary problem.
  221. X, y = make_classification(random_state=global_random_seed)
  222. clf = LogisticRegression(
  223. multi_class="multinomial",
  224. solver="saga",
  225. tol=1e-3,
  226. random_state=global_random_seed,
  227. )
  228. clf.fit(X, y)
  229. decision = clf.decision_function(X)
  230. proba = clf.predict_proba(X)
  231. expected_proba_class_1 = np.exp(decision) / (np.exp(decision) + np.exp(-decision))
  232. expected_proba = np.c_[1 - expected_proba_class_1, expected_proba_class_1]
  233. assert_almost_equal(proba, expected_proba)
  234. def test_sparsify():
  235. # Test sparsify and densify members.
  236. n_samples, n_features = iris.data.shape
  237. target = iris.target_names[iris.target]
  238. X = scale(iris.data)
  239. clf = LogisticRegression(random_state=0).fit(X, target)
  240. pred_d_d = clf.decision_function(X)
  241. clf.sparsify()
  242. assert sparse.issparse(clf.coef_)
  243. pred_s_d = clf.decision_function(X)
  244. sp_data = sparse.coo_matrix(X)
  245. pred_s_s = clf.decision_function(sp_data)
  246. clf.densify()
  247. pred_d_s = clf.decision_function(sp_data)
  248. assert_array_almost_equal(pred_d_d, pred_s_d)
  249. assert_array_almost_equal(pred_d_d, pred_s_s)
  250. assert_array_almost_equal(pred_d_d, pred_d_s)
  251. def test_inconsistent_input():
  252. # Test that an exception is raised on inconsistent input
  253. rng = np.random.RandomState(0)
  254. X_ = rng.random_sample((5, 10))
  255. y_ = np.ones(X_.shape[0])
  256. y_[0] = 0
  257. clf = LogisticRegression(random_state=0)
  258. # Wrong dimensions for training data
  259. y_wrong = y_[:-1]
  260. with pytest.raises(ValueError):
  261. clf.fit(X, y_wrong)
  262. # Wrong dimensions for test data
  263. with pytest.raises(ValueError):
  264. clf.fit(X_, y_).predict(rng.random_sample((3, 12)))
  265. def test_write_parameters():
  266. # Test that we can write to coef_ and intercept_
  267. clf = LogisticRegression(random_state=0)
  268. clf.fit(X, Y1)
  269. clf.coef_[:] = 0
  270. clf.intercept_[:] = 0
  271. assert_array_almost_equal(clf.decision_function(X), 0)
  272. def test_nan():
  273. # Test proper NaN handling.
  274. # Regression test for Issue #252: fit used to go into an infinite loop.
  275. Xnan = np.array(X, dtype=np.float64)
  276. Xnan[0, 1] = np.nan
  277. logistic = LogisticRegression(random_state=0)
  278. with pytest.raises(ValueError):
  279. logistic.fit(Xnan, Y1)
  280. def test_consistency_path():
  281. # Test that the path algorithm is consistent
  282. rng = np.random.RandomState(0)
  283. X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
  284. y = [1] * 100 + [-1] * 100
  285. Cs = np.logspace(0, 4, 10)
  286. f = ignore_warnings
  287. # can't test with fit_intercept=True since LIBLINEAR
  288. # penalizes the intercept
  289. for solver in ["sag", "saga"]:
  290. coefs, Cs, _ = f(_logistic_regression_path)(
  291. X,
  292. y,
  293. Cs=Cs,
  294. fit_intercept=False,
  295. tol=1e-5,
  296. solver=solver,
  297. max_iter=1000,
  298. multi_class="ovr",
  299. random_state=0,
  300. )
  301. for i, C in enumerate(Cs):
  302. lr = LogisticRegression(
  303. C=C,
  304. fit_intercept=False,
  305. tol=1e-5,
  306. solver=solver,
  307. multi_class="ovr",
  308. random_state=0,
  309. max_iter=1000,
  310. )
  311. lr.fit(X, y)
  312. lr_coef = lr.coef_.ravel()
  313. assert_array_almost_equal(
  314. lr_coef, coefs[i], decimal=4, err_msg="with solver = %s" % solver
  315. )
  316. # test for fit_intercept=True
  317. for solver in ("lbfgs", "newton-cg", "newton-cholesky", "liblinear", "sag", "saga"):
  318. Cs = [1e3]
  319. coefs, Cs, _ = f(_logistic_regression_path)(
  320. X,
  321. y,
  322. Cs=Cs,
  323. tol=1e-6,
  324. solver=solver,
  325. intercept_scaling=10000.0,
  326. random_state=0,
  327. multi_class="ovr",
  328. )
  329. lr = LogisticRegression(
  330. C=Cs[0],
  331. tol=1e-6,
  332. intercept_scaling=10000.0,
  333. random_state=0,
  334. multi_class="ovr",
  335. solver=solver,
  336. )
  337. lr.fit(X, y)
  338. lr_coef = np.concatenate([lr.coef_.ravel(), lr.intercept_])
  339. assert_array_almost_equal(
  340. lr_coef, coefs[0], decimal=4, err_msg="with solver = %s" % solver
  341. )
  342. def test_logistic_regression_path_convergence_fail():
  343. rng = np.random.RandomState(0)
  344. X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
  345. y = [1] * 100 + [-1] * 100
  346. Cs = [1e3]
  347. # Check that the convergence message points to both a model agnostic
  348. # advice (scaling the data) and to the logistic regression specific
  349. # documentation that includes hints on the solver configuration.
  350. with pytest.warns(ConvergenceWarning) as record:
  351. _logistic_regression_path(
  352. X, y, Cs=Cs, tol=0.0, max_iter=1, random_state=0, verbose=0
  353. )
  354. assert len(record) == 1
  355. warn_msg = record[0].message.args[0]
  356. assert "lbfgs failed to converge" in warn_msg
  357. assert "Increase the number of iterations" in warn_msg
  358. assert "scale the data" in warn_msg
  359. assert "linear_model.html#logistic-regression" in warn_msg
  360. def test_liblinear_dual_random_state():
  361. # random_state is relevant for liblinear solver only if dual=True
  362. X, y = make_classification(n_samples=20, random_state=0)
  363. lr1 = LogisticRegression(
  364. random_state=0,
  365. dual=True,
  366. tol=1e-3,
  367. solver="liblinear",
  368. multi_class="ovr",
  369. )
  370. lr1.fit(X, y)
  371. lr2 = LogisticRegression(
  372. random_state=0,
  373. dual=True,
  374. tol=1e-3,
  375. solver="liblinear",
  376. multi_class="ovr",
  377. )
  378. lr2.fit(X, y)
  379. lr3 = LogisticRegression(
  380. random_state=8,
  381. dual=True,
  382. tol=1e-3,
  383. solver="liblinear",
  384. multi_class="ovr",
  385. )
  386. lr3.fit(X, y)
  387. # same result for same random state
  388. assert_array_almost_equal(lr1.coef_, lr2.coef_)
  389. # different results for different random states
  390. msg = "Arrays are not almost equal to 6 decimals"
  391. with pytest.raises(AssertionError, match=msg):
  392. assert_array_almost_equal(lr1.coef_, lr3.coef_)
  393. def test_logistic_cv():
  394. # test for LogisticRegressionCV object
  395. n_samples, n_features = 50, 5
  396. rng = np.random.RandomState(0)
  397. X_ref = rng.randn(n_samples, n_features)
  398. y = np.sign(X_ref.dot(5 * rng.randn(n_features)))
  399. X_ref -= X_ref.mean()
  400. X_ref /= X_ref.std()
  401. lr_cv = LogisticRegressionCV(
  402. Cs=[1.0], fit_intercept=False, solver="liblinear", multi_class="ovr", cv=3
  403. )
  404. lr_cv.fit(X_ref, y)
  405. lr = LogisticRegression(
  406. C=1.0, fit_intercept=False, solver="liblinear", multi_class="ovr"
  407. )
  408. lr.fit(X_ref, y)
  409. assert_array_almost_equal(lr.coef_, lr_cv.coef_)
  410. assert_array_equal(lr_cv.coef_.shape, (1, n_features))
  411. assert_array_equal(lr_cv.classes_, [-1, 1])
  412. assert len(lr_cv.classes_) == 2
  413. coefs_paths = np.asarray(list(lr_cv.coefs_paths_.values()))
  414. assert_array_equal(coefs_paths.shape, (1, 3, 1, n_features))
  415. assert_array_equal(lr_cv.Cs_.shape, (1,))
  416. scores = np.asarray(list(lr_cv.scores_.values()))
  417. assert_array_equal(scores.shape, (1, 3, 1))
  418. @pytest.mark.parametrize(
  419. "scoring, multiclass_agg_list",
  420. [
  421. ("accuracy", [""]),
  422. ("precision", ["_macro", "_weighted"]),
  423. # no need to test for micro averaging because it
  424. # is the same as accuracy for f1, precision,
  425. # and recall (see https://github.com/
  426. # scikit-learn/scikit-learn/pull/
  427. # 11578#discussion_r203250062)
  428. ("f1", ["_macro", "_weighted"]),
  429. ("neg_log_loss", [""]),
  430. ("recall", ["_macro", "_weighted"]),
  431. ],
  432. )
  433. def test_logistic_cv_multinomial_score(scoring, multiclass_agg_list):
  434. # test that LogisticRegressionCV uses the right score to compute its
  435. # cross-validation scores when using a multinomial scoring
  436. # see https://github.com/scikit-learn/scikit-learn/issues/8720
  437. X, y = make_classification(
  438. n_samples=100, random_state=0, n_classes=3, n_informative=6
  439. )
  440. train, test = np.arange(80), np.arange(80, 100)
  441. lr = LogisticRegression(C=1.0, multi_class="multinomial")
  442. # we use lbfgs to support multinomial
  443. params = lr.get_params()
  444. # we store the params to set them further in _log_reg_scoring_path
  445. for key in ["C", "n_jobs", "warm_start"]:
  446. del params[key]
  447. lr.fit(X[train], y[train])
  448. for averaging in multiclass_agg_list:
  449. scorer = get_scorer(scoring + averaging)
  450. assert_array_almost_equal(
  451. _log_reg_scoring_path(
  452. X, y, train, test, Cs=[1.0], scoring=scorer, **params
  453. )[2][0],
  454. scorer(lr, X[test], y[test]),
  455. )
  456. def test_multinomial_logistic_regression_string_inputs():
  457. # Test with string labels for LogisticRegression(CV)
  458. n_samples, n_features, n_classes = 50, 5, 3
  459. X_ref, y = make_classification(
  460. n_samples=n_samples,
  461. n_features=n_features,
  462. n_classes=n_classes,
  463. n_informative=3,
  464. random_state=0,
  465. )
  466. y_str = LabelEncoder().fit(["bar", "baz", "foo"]).inverse_transform(y)
  467. # For numerical labels, let y values be taken from set (-1, 0, 1)
  468. y = np.array(y) - 1
  469. # Test for string labels
  470. lr = LogisticRegression(multi_class="multinomial")
  471. lr_cv = LogisticRegressionCV(multi_class="multinomial", Cs=3)
  472. lr_str = LogisticRegression(multi_class="multinomial")
  473. lr_cv_str = LogisticRegressionCV(multi_class="multinomial", Cs=3)
  474. lr.fit(X_ref, y)
  475. lr_cv.fit(X_ref, y)
  476. lr_str.fit(X_ref, y_str)
  477. lr_cv_str.fit(X_ref, y_str)
  478. assert_array_almost_equal(lr.coef_, lr_str.coef_)
  479. assert sorted(lr_str.classes_) == ["bar", "baz", "foo"]
  480. assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_)
  481. assert sorted(lr_str.classes_) == ["bar", "baz", "foo"]
  482. assert sorted(lr_cv_str.classes_) == ["bar", "baz", "foo"]
  483. # The predictions should be in original labels
  484. assert sorted(np.unique(lr_str.predict(X_ref))) == ["bar", "baz", "foo"]
  485. assert sorted(np.unique(lr_cv_str.predict(X_ref))) == ["bar", "baz", "foo"]
  486. # Make sure class weights can be given with string labels
  487. lr_cv_str = LogisticRegression(
  488. class_weight={"bar": 1, "baz": 2, "foo": 0}, multi_class="multinomial"
  489. ).fit(X_ref, y_str)
  490. assert sorted(np.unique(lr_cv_str.predict(X_ref))) == ["bar", "baz"]
  491. def test_logistic_cv_sparse():
  492. X, y = make_classification(n_samples=50, n_features=5, random_state=0)
  493. X[X < 1.0] = 0.0
  494. csr = sparse.csr_matrix(X)
  495. clf = LogisticRegressionCV()
  496. clf.fit(X, y)
  497. clfs = LogisticRegressionCV()
  498. clfs.fit(csr, y)
  499. assert_array_almost_equal(clfs.coef_, clf.coef_)
  500. assert_array_almost_equal(clfs.intercept_, clf.intercept_)
  501. assert clfs.C_ == clf.C_
  502. def test_ovr_multinomial_iris():
  503. # Test that OvR and multinomial are correct using the iris dataset.
  504. train, target = iris.data, iris.target
  505. n_samples, n_features = train.shape
  506. # The cv indices from stratified kfold (where stratification is done based
  507. # on the fine-grained iris classes, i.e, before the classes 0 and 1 are
  508. # conflated) is used for both clf and clf1
  509. n_cv = 2
  510. cv = StratifiedKFold(n_cv)
  511. precomputed_folds = list(cv.split(train, target))
  512. # Train clf on the original dataset where classes 0 and 1 are separated
  513. clf = LogisticRegressionCV(cv=precomputed_folds, multi_class="ovr")
  514. clf.fit(train, target)
  515. # Conflate classes 0 and 1 and train clf1 on this modified dataset
  516. clf1 = LogisticRegressionCV(cv=precomputed_folds, multi_class="ovr")
  517. target_copy = target.copy()
  518. target_copy[target_copy == 0] = 1
  519. clf1.fit(train, target_copy)
  520. # Ensure that what OvR learns for class2 is same regardless of whether
  521. # classes 0 and 1 are separated or not
  522. assert_allclose(clf.scores_[2], clf1.scores_[2])
  523. assert_allclose(clf.intercept_[2:], clf1.intercept_)
  524. assert_allclose(clf.coef_[2][np.newaxis, :], clf1.coef_)
  525. # Test the shape of various attributes.
  526. assert clf.coef_.shape == (3, n_features)
  527. assert_array_equal(clf.classes_, [0, 1, 2])
  528. coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
  529. assert coefs_paths.shape == (3, n_cv, 10, n_features + 1)
  530. assert clf.Cs_.shape == (10,)
  531. scores = np.asarray(list(clf.scores_.values()))
  532. assert scores.shape == (3, n_cv, 10)
  533. # Test that for the iris data multinomial gives a better accuracy than OvR
  534. for solver in ["lbfgs", "newton-cg", "sag", "saga"]:
  535. max_iter = 500 if solver in ["sag", "saga"] else 30
  536. clf_multi = LogisticRegressionCV(
  537. solver=solver,
  538. multi_class="multinomial",
  539. max_iter=max_iter,
  540. random_state=42,
  541. tol=1e-3 if solver in ["sag", "saga"] else 1e-2,
  542. cv=2,
  543. )
  544. if solver == "lbfgs":
  545. # lbfgs requires scaling to avoid convergence warnings
  546. train = scale(train)
  547. clf_multi.fit(train, target)
  548. multi_score = clf_multi.score(train, target)
  549. ovr_score = clf.score(train, target)
  550. assert multi_score > ovr_score
  551. # Test attributes of LogisticRegressionCV
  552. assert clf.coef_.shape == clf_multi.coef_.shape
  553. assert_array_equal(clf_multi.classes_, [0, 1, 2])
  554. coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
  555. assert coefs_paths.shape == (3, n_cv, 10, n_features + 1)
  556. assert clf_multi.Cs_.shape == (10,)
  557. scores = np.asarray(list(clf_multi.scores_.values()))
  558. assert scores.shape == (3, n_cv, 10)
  559. def test_logistic_regression_solvers():
  560. """Test solvers converge to the same result."""
  561. X, y = make_classification(n_features=10, n_informative=5, random_state=0)
  562. params = dict(fit_intercept=False, random_state=42, multi_class="ovr")
  563. regressors = {
  564. solver: LogisticRegression(solver=solver, **params).fit(X, y)
  565. for solver in SOLVERS
  566. }
  567. for solver_1, solver_2 in itertools.combinations(regressors, r=2):
  568. assert_array_almost_equal(
  569. regressors[solver_1].coef_, regressors[solver_2].coef_, decimal=3
  570. )
  571. def test_logistic_regression_solvers_multiclass():
  572. """Test solvers converge to the same result for multiclass problems."""
  573. X, y = make_classification(
  574. n_samples=20, n_features=20, n_informative=10, n_classes=3, random_state=0
  575. )
  576. tol = 1e-7
  577. params = dict(fit_intercept=False, tol=tol, random_state=42, multi_class="ovr")
  578. # Override max iteration count for specific solvers to allow for
  579. # proper convergence.
  580. solver_max_iter = {"sag": 1000, "saga": 10000}
  581. regressors = {
  582. solver: LogisticRegression(
  583. solver=solver, max_iter=solver_max_iter.get(solver, 100), **params
  584. ).fit(X, y)
  585. for solver in SOLVERS
  586. }
  587. for solver_1, solver_2 in itertools.combinations(regressors, r=2):
  588. assert_array_almost_equal(
  589. regressors[solver_1].coef_, regressors[solver_2].coef_, decimal=4
  590. )
  591. @pytest.mark.parametrize("weight", [{0: 0.1, 1: 0.2}, {0: 0.1, 1: 0.2, 2: 0.5}])
  592. @pytest.mark.parametrize("class_weight", ["weight", "balanced"])
  593. def test_logistic_regressioncv_class_weights(weight, class_weight):
  594. """Test class_weight for LogisticRegressionCV."""
  595. n_classes = len(weight)
  596. if class_weight == "weight":
  597. class_weight = weight
  598. X, y = make_classification(
  599. n_samples=30,
  600. n_features=3,
  601. n_repeated=0,
  602. n_informative=3,
  603. n_redundant=0,
  604. n_classes=n_classes,
  605. random_state=0,
  606. )
  607. params = dict(
  608. Cs=1,
  609. fit_intercept=False,
  610. multi_class="ovr",
  611. class_weight=class_weight,
  612. )
  613. clf_lbfgs = LogisticRegressionCV(solver="lbfgs", **params)
  614. clf_lbfgs.fit(X, y)
  615. for solver in set(SOLVERS) - set(["lbfgs"]):
  616. clf = LogisticRegressionCV(solver=solver, **params)
  617. if solver in ("sag", "saga"):
  618. clf.set_params(tol=1e-5, max_iter=10000, random_state=0)
  619. clf.fit(X, y)
  620. assert_allclose(clf.coef_, clf_lbfgs.coef_, rtol=1e-3)
  621. def test_logistic_regression_sample_weights():
  622. X, y = make_classification(
  623. n_samples=20, n_features=5, n_informative=3, n_classes=2, random_state=0
  624. )
  625. sample_weight = y + 1
  626. for LR in [LogisticRegression, LogisticRegressionCV]:
  627. kw = {"random_state": 42, "fit_intercept": False, "multi_class": "ovr"}
  628. if LR is LogisticRegressionCV:
  629. kw.update({"Cs": 3, "cv": 3})
  630. # Test that passing sample_weight as ones is the same as
  631. # not passing them at all (default None)
  632. for solver in ["lbfgs", "liblinear"]:
  633. clf_sw_none = LR(solver=solver, **kw)
  634. clf_sw_ones = LR(solver=solver, **kw)
  635. clf_sw_none.fit(X, y)
  636. clf_sw_ones.fit(X, y, sample_weight=np.ones(y.shape[0]))
  637. assert_allclose(clf_sw_none.coef_, clf_sw_ones.coef_, rtol=1e-4)
  638. # Test that sample weights work the same with the lbfgs,
  639. # newton-cg, newton-cholesky and 'sag' solvers
  640. clf_sw_lbfgs = LR(**kw)
  641. clf_sw_lbfgs.fit(X, y, sample_weight=sample_weight)
  642. for solver in set(SOLVERS) - set(("lbfgs", "saga")):
  643. clf_sw = LR(solver=solver, tol=1e-10 if solver == "sag" else 1e-5, **kw)
  644. # ignore convergence warning due to small dataset with sag
  645. with ignore_warnings():
  646. clf_sw.fit(X, y, sample_weight=sample_weight)
  647. assert_allclose(clf_sw_lbfgs.coef_, clf_sw.coef_, rtol=1e-4)
  648. # Test that passing class_weight as [1,2] is the same as
  649. # passing class weight = [1,1] but adjusting sample weights
  650. # to be 2 for all instances of class 2
  651. for solver in ["lbfgs", "liblinear"]:
  652. clf_cw_12 = LR(solver=solver, class_weight={0: 1, 1: 2}, **kw)
  653. clf_cw_12.fit(X, y)
  654. clf_sw_12 = LR(solver=solver, **kw)
  655. clf_sw_12.fit(X, y, sample_weight=sample_weight)
  656. assert_allclose(clf_cw_12.coef_, clf_sw_12.coef_, rtol=1e-4)
  657. # Test the above for l1 penalty and l2 penalty with dual=True.
  658. # since the patched liblinear code is different.
  659. clf_cw = LogisticRegression(
  660. solver="liblinear",
  661. fit_intercept=False,
  662. class_weight={0: 1, 1: 2},
  663. penalty="l1",
  664. tol=1e-5,
  665. random_state=42,
  666. multi_class="ovr",
  667. )
  668. clf_cw.fit(X, y)
  669. clf_sw = LogisticRegression(
  670. solver="liblinear",
  671. fit_intercept=False,
  672. penalty="l1",
  673. tol=1e-5,
  674. random_state=42,
  675. multi_class="ovr",
  676. )
  677. clf_sw.fit(X, y, sample_weight)
  678. assert_array_almost_equal(clf_cw.coef_, clf_sw.coef_, decimal=4)
  679. clf_cw = LogisticRegression(
  680. solver="liblinear",
  681. fit_intercept=False,
  682. class_weight={0: 1, 1: 2},
  683. penalty="l2",
  684. dual=True,
  685. random_state=42,
  686. multi_class="ovr",
  687. )
  688. clf_cw.fit(X, y)
  689. clf_sw = LogisticRegression(
  690. solver="liblinear",
  691. fit_intercept=False,
  692. penalty="l2",
  693. dual=True,
  694. random_state=42,
  695. multi_class="ovr",
  696. )
  697. clf_sw.fit(X, y, sample_weight)
  698. assert_array_almost_equal(clf_cw.coef_, clf_sw.coef_, decimal=4)
  699. def _compute_class_weight_dictionary(y):
  700. # helper for returning a dictionary instead of an array
  701. classes = np.unique(y)
  702. class_weight = compute_class_weight("balanced", classes=classes, y=y)
  703. class_weight_dict = dict(zip(classes, class_weight))
  704. return class_weight_dict
  705. def test_logistic_regression_class_weights():
  706. # Scale data to avoid convergence warnings with the lbfgs solver
  707. X_iris = scale(iris.data)
  708. # Multinomial case: remove 90% of class 0
  709. X = X_iris[45:, :]
  710. y = iris.target[45:]
  711. solvers = ("lbfgs", "newton-cg")
  712. class_weight_dict = _compute_class_weight_dictionary(y)
  713. for solver in solvers:
  714. clf1 = LogisticRegression(
  715. solver=solver, multi_class="multinomial", class_weight="balanced"
  716. )
  717. clf2 = LogisticRegression(
  718. solver=solver, multi_class="multinomial", class_weight=class_weight_dict
  719. )
  720. clf1.fit(X, y)
  721. clf2.fit(X, y)
  722. assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=4)
  723. # Binary case: remove 90% of class 0 and 100% of class 2
  724. X = X_iris[45:100, :]
  725. y = iris.target[45:100]
  726. class_weight_dict = _compute_class_weight_dictionary(y)
  727. for solver in set(SOLVERS) - set(("sag", "saga")):
  728. clf1 = LogisticRegression(
  729. solver=solver, multi_class="ovr", class_weight="balanced"
  730. )
  731. clf2 = LogisticRegression(
  732. solver=solver, multi_class="ovr", class_weight=class_weight_dict
  733. )
  734. clf1.fit(X, y)
  735. clf2.fit(X, y)
  736. assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=6)
  737. def test_logistic_regression_multinomial():
  738. # Tests for the multinomial option in logistic regression
  739. # Some basic attributes of Logistic Regression
  740. n_samples, n_features, n_classes = 50, 20, 3
  741. X, y = make_classification(
  742. n_samples=n_samples,
  743. n_features=n_features,
  744. n_informative=10,
  745. n_classes=n_classes,
  746. random_state=0,
  747. )
  748. X = StandardScaler(with_mean=False).fit_transform(X)
  749. # 'lbfgs' is used as a referenced
  750. solver = "lbfgs"
  751. ref_i = LogisticRegression(solver=solver, multi_class="multinomial")
  752. ref_w = LogisticRegression(
  753. solver=solver, multi_class="multinomial", fit_intercept=False
  754. )
  755. ref_i.fit(X, y)
  756. ref_w.fit(X, y)
  757. assert ref_i.coef_.shape == (n_classes, n_features)
  758. assert ref_w.coef_.shape == (n_classes, n_features)
  759. for solver in ["sag", "saga", "newton-cg"]:
  760. clf_i = LogisticRegression(
  761. solver=solver,
  762. multi_class="multinomial",
  763. random_state=42,
  764. max_iter=2000,
  765. tol=1e-7,
  766. )
  767. clf_w = LogisticRegression(
  768. solver=solver,
  769. multi_class="multinomial",
  770. random_state=42,
  771. max_iter=2000,
  772. tol=1e-7,
  773. fit_intercept=False,
  774. )
  775. clf_i.fit(X, y)
  776. clf_w.fit(X, y)
  777. assert clf_i.coef_.shape == (n_classes, n_features)
  778. assert clf_w.coef_.shape == (n_classes, n_features)
  779. # Compare solutions between lbfgs and the other solvers
  780. assert_allclose(ref_i.coef_, clf_i.coef_, rtol=1e-2)
  781. assert_allclose(ref_w.coef_, clf_w.coef_, rtol=1e-2)
  782. assert_allclose(ref_i.intercept_, clf_i.intercept_, rtol=1e-2)
  783. # Test that the path give almost the same results. However since in this
  784. # case we take the average of the coefs after fitting across all the
  785. # folds, it need not be exactly the same.
  786. for solver in ["lbfgs", "newton-cg", "sag", "saga"]:
  787. clf_path = LogisticRegressionCV(
  788. solver=solver, max_iter=2000, tol=1e-6, multi_class="multinomial", Cs=[1.0]
  789. )
  790. clf_path.fit(X, y)
  791. assert_allclose(clf_path.coef_, ref_i.coef_, rtol=2e-2)
  792. assert_allclose(clf_path.intercept_, ref_i.intercept_, rtol=2e-2)
  793. def test_liblinear_decision_function_zero():
  794. # Test negative prediction when decision_function values are zero.
  795. # Liblinear predicts the positive class when decision_function values
  796. # are zero. This is a test to verify that we do not do the same.
  797. # See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600
  798. # and the PR https://github.com/scikit-learn/scikit-learn/pull/3623
  799. X, y = make_classification(n_samples=5, n_features=5, random_state=0)
  800. clf = LogisticRegression(fit_intercept=False, solver="liblinear", multi_class="ovr")
  801. clf.fit(X, y)
  802. # Dummy data such that the decision function becomes zero.
  803. X = np.zeros((5, 5))
  804. assert_array_equal(clf.predict(X), np.zeros(5))
  805. def test_liblinear_logregcv_sparse():
  806. # Test LogRegCV with solver='liblinear' works for sparse matrices
  807. X, y = make_classification(n_samples=10, n_features=5, random_state=0)
  808. clf = LogisticRegressionCV(solver="liblinear", multi_class="ovr")
  809. clf.fit(sparse.csr_matrix(X), y)
  810. def test_saga_sparse():
  811. # Test LogRegCV with solver='liblinear' works for sparse matrices
  812. X, y = make_classification(n_samples=10, n_features=5, random_state=0)
  813. clf = LogisticRegressionCV(solver="saga", tol=1e-2)
  814. clf.fit(sparse.csr_matrix(X), y)
  815. def test_logreg_intercept_scaling_zero():
  816. # Test that intercept_scaling is ignored when fit_intercept is False
  817. clf = LogisticRegression(fit_intercept=False)
  818. clf.fit(X, Y1)
  819. assert clf.intercept_ == 0.0
  820. def test_logreg_l1():
  821. # Because liblinear penalizes the intercept and saga does not, we do not
  822. # fit the intercept to make it possible to compare the coefficients of
  823. # the two models at convergence.
  824. rng = np.random.RandomState(42)
  825. n_samples = 50
  826. X, y = make_classification(n_samples=n_samples, n_features=20, random_state=0)
  827. X_noise = rng.normal(size=(n_samples, 3))
  828. X_constant = np.ones(shape=(n_samples, 2))
  829. X = np.concatenate((X, X_noise, X_constant), axis=1)
  830. lr_liblinear = LogisticRegression(
  831. penalty="l1",
  832. C=1.0,
  833. solver="liblinear",
  834. fit_intercept=False,
  835. multi_class="ovr",
  836. tol=1e-10,
  837. )
  838. lr_liblinear.fit(X, y)
  839. lr_saga = LogisticRegression(
  840. penalty="l1",
  841. C=1.0,
  842. solver="saga",
  843. fit_intercept=False,
  844. multi_class="ovr",
  845. max_iter=1000,
  846. tol=1e-10,
  847. )
  848. lr_saga.fit(X, y)
  849. assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_)
  850. # Noise and constant features should be regularized to zero by the l1
  851. # penalty
  852. assert_array_almost_equal(lr_liblinear.coef_[0, -5:], np.zeros(5))
  853. assert_array_almost_equal(lr_saga.coef_[0, -5:], np.zeros(5))
  854. def test_logreg_l1_sparse_data():
  855. # Because liblinear penalizes the intercept and saga does not, we do not
  856. # fit the intercept to make it possible to compare the coefficients of
  857. # the two models at convergence.
  858. rng = np.random.RandomState(42)
  859. n_samples = 50
  860. X, y = make_classification(n_samples=n_samples, n_features=20, random_state=0)
  861. X_noise = rng.normal(scale=0.1, size=(n_samples, 3))
  862. X_constant = np.zeros(shape=(n_samples, 2))
  863. X = np.concatenate((X, X_noise, X_constant), axis=1)
  864. X[X < 1] = 0
  865. X = sparse.csr_matrix(X)
  866. lr_liblinear = LogisticRegression(
  867. penalty="l1",
  868. C=1.0,
  869. solver="liblinear",
  870. fit_intercept=False,
  871. multi_class="ovr",
  872. tol=1e-10,
  873. )
  874. lr_liblinear.fit(X, y)
  875. lr_saga = LogisticRegression(
  876. penalty="l1",
  877. C=1.0,
  878. solver="saga",
  879. fit_intercept=False,
  880. multi_class="ovr",
  881. max_iter=1000,
  882. tol=1e-10,
  883. )
  884. lr_saga.fit(X, y)
  885. assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_)
  886. # Noise and constant features should be regularized to zero by the l1
  887. # penalty
  888. assert_array_almost_equal(lr_liblinear.coef_[0, -5:], np.zeros(5))
  889. assert_array_almost_equal(lr_saga.coef_[0, -5:], np.zeros(5))
  890. # Check that solving on the sparse and dense data yield the same results
  891. lr_saga_dense = LogisticRegression(
  892. penalty="l1",
  893. C=1.0,
  894. solver="saga",
  895. fit_intercept=False,
  896. multi_class="ovr",
  897. max_iter=1000,
  898. tol=1e-10,
  899. )
  900. lr_saga_dense.fit(X.toarray(), y)
  901. assert_array_almost_equal(lr_saga.coef_, lr_saga_dense.coef_)
  902. @pytest.mark.parametrize("random_seed", [42])
  903. @pytest.mark.parametrize("penalty", ["l1", "l2"])
  904. def test_logistic_regression_cv_refit(random_seed, penalty):
  905. # Test that when refit=True, logistic regression cv with the saga solver
  906. # converges to the same solution as logistic regression with a fixed
  907. # regularization parameter.
  908. # Internally the LogisticRegressionCV model uses a warm start to refit on
  909. # the full data model with the optimal C found by CV. As the penalized
  910. # logistic regression loss is convex, we should still recover exactly
  911. # the same solution as long as the stopping criterion is strict enough (and
  912. # that there are no exactly duplicated features when penalty='l1').
  913. X, y = make_classification(n_samples=100, n_features=20, random_state=random_seed)
  914. common_params = dict(
  915. solver="saga",
  916. penalty=penalty,
  917. random_state=random_seed,
  918. max_iter=1000,
  919. tol=1e-12,
  920. )
  921. lr_cv = LogisticRegressionCV(Cs=[1.0], refit=True, **common_params)
  922. lr_cv.fit(X, y)
  923. lr = LogisticRegression(C=1.0, **common_params)
  924. lr.fit(X, y)
  925. assert_array_almost_equal(lr_cv.coef_, lr.coef_)
  926. def test_logreg_predict_proba_multinomial():
  927. X, y = make_classification(
  928. n_samples=10, n_features=20, random_state=0, n_classes=3, n_informative=10
  929. )
  930. # Predicted probabilities using the true-entropy loss should give a
  931. # smaller loss than those using the ovr method.
  932. clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs")
  933. clf_multi.fit(X, y)
  934. clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
  935. clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs")
  936. clf_ovr.fit(X, y)
  937. clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X))
  938. assert clf_ovr_loss > clf_multi_loss
  939. # Predicted probabilities using the soft-max function should give a
  940. # smaller loss than those using the logistic function.
  941. clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
  942. clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X))
  943. assert clf_wrong_loss > clf_multi_loss
  944. @pytest.mark.parametrize("max_iter", np.arange(1, 5))
  945. @pytest.mark.parametrize("multi_class", ["ovr", "multinomial"])
  946. @pytest.mark.parametrize(
  947. "solver, message",
  948. [
  949. (
  950. "newton-cg",
  951. "newton-cg failed to converge. Increase the number of iterations.",
  952. ),
  953. (
  954. "liblinear",
  955. "Liblinear failed to converge, increase the number of iterations.",
  956. ),
  957. ("sag", "The max_iter was reached which means the coef_ did not converge"),
  958. ("saga", "The max_iter was reached which means the coef_ did not converge"),
  959. ("lbfgs", "lbfgs failed to converge"),
  960. ("newton-cholesky", "Newton solver did not converge after [0-9]* iterations"),
  961. ],
  962. )
  963. def test_max_iter(max_iter, multi_class, solver, message):
  964. # Test that the maximum number of iteration is reached
  965. X, y_bin = iris.data, iris.target.copy()
  966. y_bin[y_bin == 2] = 0
  967. if solver in ("liblinear", "newton-cholesky") and multi_class == "multinomial":
  968. pytest.skip("'multinomial' is not supported by liblinear and newton-cholesky")
  969. if solver == "newton-cholesky" and max_iter > 1:
  970. pytest.skip("solver newton-cholesky might converge very fast")
  971. lr = LogisticRegression(
  972. max_iter=max_iter,
  973. tol=1e-15,
  974. multi_class=multi_class,
  975. random_state=0,
  976. solver=solver,
  977. )
  978. with pytest.warns(ConvergenceWarning, match=message):
  979. lr.fit(X, y_bin)
  980. assert lr.n_iter_[0] == max_iter
  981. @pytest.mark.parametrize("solver", SOLVERS)
  982. def test_n_iter(solver):
  983. # Test that self.n_iter_ has the correct format.
  984. X, y = iris.data, iris.target
  985. if solver == "lbfgs":
  986. # lbfgs requires scaling to avoid convergence warnings
  987. X = scale(X)
  988. n_classes = np.unique(y).shape[0]
  989. assert n_classes == 3
  990. # Also generate a binary classification sub-problem.
  991. y_bin = y.copy()
  992. y_bin[y_bin == 2] = 0
  993. n_Cs = 4
  994. n_cv_fold = 2
  995. # Binary classification case
  996. clf = LogisticRegression(tol=1e-2, C=1.0, solver=solver, random_state=42)
  997. clf.fit(X, y_bin)
  998. assert clf.n_iter_.shape == (1,)
  999. clf_cv = LogisticRegressionCV(
  1000. tol=1e-2, solver=solver, Cs=n_Cs, cv=n_cv_fold, random_state=42
  1001. )
  1002. clf_cv.fit(X, y_bin)
  1003. assert clf_cv.n_iter_.shape == (1, n_cv_fold, n_Cs)
  1004. # OvR case
  1005. clf.set_params(multi_class="ovr").fit(X, y)
  1006. assert clf.n_iter_.shape == (n_classes,)
  1007. clf_cv.set_params(multi_class="ovr").fit(X, y)
  1008. assert clf_cv.n_iter_.shape == (n_classes, n_cv_fold, n_Cs)
  1009. # multinomial case
  1010. if solver in ("liblinear", "newton-cholesky"):
  1011. # This solver only supports one-vs-rest multiclass classification.
  1012. return
  1013. # When using the multinomial objective function, there is a single
  1014. # optimization problem to solve for all classes at once:
  1015. clf.set_params(multi_class="multinomial").fit(X, y)
  1016. assert clf.n_iter_.shape == (1,)
  1017. clf_cv.set_params(multi_class="multinomial").fit(X, y)
  1018. assert clf_cv.n_iter_.shape == (1, n_cv_fold, n_Cs)
  1019. @pytest.mark.parametrize("solver", sorted(set(SOLVERS) - set(["liblinear"])))
  1020. @pytest.mark.parametrize("warm_start", (True, False))
  1021. @pytest.mark.parametrize("fit_intercept", (True, False))
  1022. @pytest.mark.parametrize("multi_class", ["ovr", "multinomial"])
  1023. def test_warm_start(solver, warm_start, fit_intercept, multi_class):
  1024. # A 1-iteration second fit on same data should give almost same result
  1025. # with warm starting, and quite different result without warm starting.
  1026. # Warm starting does not work with liblinear solver.
  1027. X, y = iris.data, iris.target
  1028. if solver == "newton-cholesky" and multi_class == "multinomial":
  1029. # solver does only support OvR
  1030. return
  1031. clf = LogisticRegression(
  1032. tol=1e-4,
  1033. multi_class=multi_class,
  1034. warm_start=warm_start,
  1035. solver=solver,
  1036. random_state=42,
  1037. fit_intercept=fit_intercept,
  1038. )
  1039. with ignore_warnings(category=ConvergenceWarning):
  1040. clf.fit(X, y)
  1041. coef_1 = clf.coef_
  1042. clf.max_iter = 1
  1043. clf.fit(X, y)
  1044. cum_diff = np.sum(np.abs(coef_1 - clf.coef_))
  1045. msg = (
  1046. "Warm starting issue with %s solver in %s mode "
  1047. "with fit_intercept=%s and warm_start=%s"
  1048. % (solver, multi_class, str(fit_intercept), str(warm_start))
  1049. )
  1050. if warm_start:
  1051. assert 2.0 > cum_diff, msg
  1052. else:
  1053. assert cum_diff > 2.0, msg
  1054. def test_saga_vs_liblinear():
  1055. iris = load_iris()
  1056. X, y = iris.data, iris.target
  1057. X = np.concatenate([X] * 3)
  1058. y = np.concatenate([y] * 3)
  1059. X_bin = X[y <= 1]
  1060. y_bin = y[y <= 1] * 2 - 1
  1061. X_sparse, y_sparse = make_classification(
  1062. n_samples=50, n_features=20, random_state=0
  1063. )
  1064. X_sparse = sparse.csr_matrix(X_sparse)
  1065. for X, y in ((X_bin, y_bin), (X_sparse, y_sparse)):
  1066. for penalty in ["l1", "l2"]:
  1067. n_samples = X.shape[0]
  1068. # alpha=1e-3 is time consuming
  1069. for alpha in np.logspace(-1, 1, 3):
  1070. saga = LogisticRegression(
  1071. C=1.0 / (n_samples * alpha),
  1072. solver="saga",
  1073. multi_class="ovr",
  1074. max_iter=200,
  1075. fit_intercept=False,
  1076. penalty=penalty,
  1077. random_state=0,
  1078. tol=1e-6,
  1079. )
  1080. liblinear = LogisticRegression(
  1081. C=1.0 / (n_samples * alpha),
  1082. solver="liblinear",
  1083. multi_class="ovr",
  1084. max_iter=200,
  1085. fit_intercept=False,
  1086. penalty=penalty,
  1087. random_state=0,
  1088. tol=1e-6,
  1089. )
  1090. saga.fit(X, y)
  1091. liblinear.fit(X, y)
  1092. # Convergence for alpha=1e-3 is very slow
  1093. assert_array_almost_equal(saga.coef_, liblinear.coef_, 3)
  1094. @pytest.mark.parametrize("multi_class", ["ovr", "multinomial"])
  1095. @pytest.mark.parametrize(
  1096. "solver", ["liblinear", "newton-cg", "newton-cholesky", "saga"]
  1097. )
  1098. @pytest.mark.parametrize("fit_intercept", [False, True])
  1099. def test_dtype_match(solver, multi_class, fit_intercept):
  1100. # Test that np.float32 input data is not cast to np.float64 when possible
  1101. # and that the output is approximately the same no matter the input format.
  1102. if solver in ("liblinear", "newton-cholesky") and multi_class == "multinomial":
  1103. pytest.skip(f"Solver={solver} does not support multinomial logistic.")
  1104. out32_type = np.float64 if solver == "liblinear" else np.float32
  1105. X_32 = np.array(X).astype(np.float32)
  1106. y_32 = np.array(Y1).astype(np.float32)
  1107. X_64 = np.array(X).astype(np.float64)
  1108. y_64 = np.array(Y1).astype(np.float64)
  1109. X_sparse_32 = sparse.csr_matrix(X, dtype=np.float32)
  1110. X_sparse_64 = sparse.csr_matrix(X, dtype=np.float64)
  1111. solver_tol = 5e-4
  1112. lr_templ = LogisticRegression(
  1113. solver=solver,
  1114. multi_class=multi_class,
  1115. random_state=42,
  1116. tol=solver_tol,
  1117. fit_intercept=fit_intercept,
  1118. )
  1119. # Check 32-bit type consistency
  1120. lr_32 = clone(lr_templ)
  1121. lr_32.fit(X_32, y_32)
  1122. assert lr_32.coef_.dtype == out32_type
  1123. # Check 32-bit type consistency with sparsity
  1124. lr_32_sparse = clone(lr_templ)
  1125. lr_32_sparse.fit(X_sparse_32, y_32)
  1126. assert lr_32_sparse.coef_.dtype == out32_type
  1127. # Check 64-bit type consistency
  1128. lr_64 = clone(lr_templ)
  1129. lr_64.fit(X_64, y_64)
  1130. assert lr_64.coef_.dtype == np.float64
  1131. # Check 64-bit type consistency with sparsity
  1132. lr_64_sparse = clone(lr_templ)
  1133. lr_64_sparse.fit(X_sparse_64, y_64)
  1134. assert lr_64_sparse.coef_.dtype == np.float64
  1135. # solver_tol bounds the norm of the loss gradient
  1136. # dw ~= inv(H)*grad ==> |dw| ~= |inv(H)| * solver_tol, where H - hessian
  1137. #
  1138. # See https://github.com/scikit-learn/scikit-learn/pull/13645
  1139. #
  1140. # with Z = np.hstack((np.ones((3,1)), np.array(X)))
  1141. # In [8]: np.linalg.norm(np.diag([0,2,2]) + np.linalg.inv((Z.T @ Z)/4))
  1142. # Out[8]: 1.7193336918135917
  1143. # factor of 2 to get the ball diameter
  1144. atol = 2 * 1.72 * solver_tol
  1145. if os.name == "nt" and _IS_32BIT:
  1146. # FIXME
  1147. atol = 1e-2
  1148. # Check accuracy consistency
  1149. assert_allclose(lr_32.coef_, lr_64.coef_.astype(np.float32), atol=atol)
  1150. if solver == "saga" and fit_intercept:
  1151. # FIXME: SAGA on sparse data fits the intercept inaccurately with the
  1152. # default tol and max_iter parameters.
  1153. atol = 1e-1
  1154. assert_allclose(lr_32.coef_, lr_32_sparse.coef_, atol=atol)
  1155. assert_allclose(lr_64.coef_, lr_64_sparse.coef_, atol=atol)
  1156. def test_warm_start_converge_LR():
  1157. # Test to see that the logistic regression converges on warm start,
  1158. # with multi_class='multinomial'. Non-regressive test for #10836
  1159. rng = np.random.RandomState(0)
  1160. X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2)))
  1161. y = np.array([1] * 100 + [-1] * 100)
  1162. lr_no_ws = LogisticRegression(
  1163. multi_class="multinomial", solver="sag", warm_start=False, random_state=0
  1164. )
  1165. lr_ws = LogisticRegression(
  1166. multi_class="multinomial", solver="sag", warm_start=True, random_state=0
  1167. )
  1168. lr_no_ws_loss = log_loss(y, lr_no_ws.fit(X, y).predict_proba(X))
  1169. for i in range(5):
  1170. lr_ws.fit(X, y)
  1171. lr_ws_loss = log_loss(y, lr_ws.predict_proba(X))
  1172. assert_allclose(lr_no_ws_loss, lr_ws_loss, rtol=1e-5)
  1173. def test_elastic_net_coeffs():
  1174. # make sure elasticnet penalty gives different coefficients from l1 and l2
  1175. # with saga solver (l1_ratio different from 0 or 1)
  1176. X, y = make_classification(random_state=0)
  1177. C = 2.0
  1178. l1_ratio = 0.5
  1179. coeffs = list()
  1180. for penalty, ratio in (("elasticnet", l1_ratio), ("l1", None), ("l2", None)):
  1181. lr = LogisticRegression(
  1182. penalty=penalty,
  1183. C=C,
  1184. solver="saga",
  1185. random_state=0,
  1186. l1_ratio=ratio,
  1187. tol=1e-3,
  1188. max_iter=200,
  1189. )
  1190. lr.fit(X, y)
  1191. coeffs.append(lr.coef_)
  1192. elastic_net_coeffs, l1_coeffs, l2_coeffs = coeffs
  1193. # make sure coeffs differ by at least .1
  1194. assert not np.allclose(elastic_net_coeffs, l1_coeffs, rtol=0, atol=0.1)
  1195. assert not np.allclose(elastic_net_coeffs, l2_coeffs, rtol=0, atol=0.1)
  1196. assert not np.allclose(l2_coeffs, l1_coeffs, rtol=0, atol=0.1)
  1197. @pytest.mark.parametrize("C", [0.001, 0.1, 1, 10, 100, 1000, 1e6])
  1198. @pytest.mark.parametrize("penalty, l1_ratio", [("l1", 1), ("l2", 0)])
  1199. def test_elastic_net_l1_l2_equivalence(C, penalty, l1_ratio):
  1200. # Make sure elasticnet is equivalent to l1 when l1_ratio=1 and to l2 when
  1201. # l1_ratio=0.
  1202. X, y = make_classification(random_state=0)
  1203. lr_enet = LogisticRegression(
  1204. penalty="elasticnet",
  1205. C=C,
  1206. l1_ratio=l1_ratio,
  1207. solver="saga",
  1208. random_state=0,
  1209. tol=1e-2,
  1210. )
  1211. lr_expected = LogisticRegression(
  1212. penalty=penalty, C=C, solver="saga", random_state=0, tol=1e-2
  1213. )
  1214. lr_enet.fit(X, y)
  1215. lr_expected.fit(X, y)
  1216. assert_array_almost_equal(lr_enet.coef_, lr_expected.coef_)
  1217. @pytest.mark.parametrize("C", [0.001, 1, 100, 1e6])
  1218. def test_elastic_net_vs_l1_l2(C):
  1219. # Make sure that elasticnet with grid search on l1_ratio gives same or
  1220. # better results than just l1 or just l2.
  1221. X, y = make_classification(500, random_state=0)
  1222. X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
  1223. param_grid = {"l1_ratio": np.linspace(0, 1, 5)}
  1224. enet_clf = LogisticRegression(
  1225. penalty="elasticnet", C=C, solver="saga", random_state=0, tol=1e-2
  1226. )
  1227. gs = GridSearchCV(enet_clf, param_grid, refit=True)
  1228. l1_clf = LogisticRegression(
  1229. penalty="l1", C=C, solver="saga", random_state=0, tol=1e-2
  1230. )
  1231. l2_clf = LogisticRegression(
  1232. penalty="l2", C=C, solver="saga", random_state=0, tol=1e-2
  1233. )
  1234. for clf in (gs, l1_clf, l2_clf):
  1235. clf.fit(X_train, y_train)
  1236. assert gs.score(X_test, y_test) >= l1_clf.score(X_test, y_test)
  1237. assert gs.score(X_test, y_test) >= l2_clf.score(X_test, y_test)
  1238. @pytest.mark.parametrize("C", np.logspace(-3, 2, 4))
  1239. @pytest.mark.parametrize("l1_ratio", [0.1, 0.5, 0.9])
  1240. def test_LogisticRegression_elastic_net_objective(C, l1_ratio):
  1241. # Check that training with a penalty matching the objective leads
  1242. # to a lower objective.
  1243. # Here we train a logistic regression with l2 (a) and elasticnet (b)
  1244. # penalties, and compute the elasticnet objective. That of a should be
  1245. # greater than that of b (both objectives are convex).
  1246. X, y = make_classification(
  1247. n_samples=1000,
  1248. n_classes=2,
  1249. n_features=20,
  1250. n_informative=10,
  1251. n_redundant=0,
  1252. n_repeated=0,
  1253. random_state=0,
  1254. )
  1255. X = scale(X)
  1256. lr_enet = LogisticRegression(
  1257. penalty="elasticnet",
  1258. solver="saga",
  1259. random_state=0,
  1260. C=C,
  1261. l1_ratio=l1_ratio,
  1262. fit_intercept=False,
  1263. )
  1264. lr_l2 = LogisticRegression(
  1265. penalty="l2", solver="saga", random_state=0, C=C, fit_intercept=False
  1266. )
  1267. lr_enet.fit(X, y)
  1268. lr_l2.fit(X, y)
  1269. def enet_objective(lr):
  1270. coef = lr.coef_.ravel()
  1271. obj = C * log_loss(y, lr.predict_proba(X))
  1272. obj += l1_ratio * np.sum(np.abs(coef))
  1273. obj += (1.0 - l1_ratio) * 0.5 * np.dot(coef, coef)
  1274. return obj
  1275. assert enet_objective(lr_enet) < enet_objective(lr_l2)
  1276. @pytest.mark.parametrize("multi_class", ("ovr", "multinomial"))
  1277. def test_LogisticRegressionCV_GridSearchCV_elastic_net(multi_class):
  1278. # make sure LogisticRegressionCV gives same best params (l1 and C) as
  1279. # GridSearchCV when penalty is elasticnet
  1280. if multi_class == "ovr":
  1281. # This is actually binary classification, ovr multiclass is treated in
  1282. # test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr
  1283. X, y = make_classification(random_state=0)
  1284. else:
  1285. X, y = make_classification(
  1286. n_samples=100, n_classes=3, n_informative=3, random_state=0
  1287. )
  1288. cv = StratifiedKFold(5)
  1289. l1_ratios = np.linspace(0, 1, 3)
  1290. Cs = np.logspace(-4, 4, 3)
  1291. lrcv = LogisticRegressionCV(
  1292. penalty="elasticnet",
  1293. Cs=Cs,
  1294. solver="saga",
  1295. cv=cv,
  1296. l1_ratios=l1_ratios,
  1297. random_state=0,
  1298. multi_class=multi_class,
  1299. tol=1e-2,
  1300. )
  1301. lrcv.fit(X, y)
  1302. param_grid = {"C": Cs, "l1_ratio": l1_ratios}
  1303. lr = LogisticRegression(
  1304. penalty="elasticnet",
  1305. solver="saga",
  1306. random_state=0,
  1307. multi_class=multi_class,
  1308. tol=1e-2,
  1309. )
  1310. gs = GridSearchCV(lr, param_grid, cv=cv)
  1311. gs.fit(X, y)
  1312. assert gs.best_params_["l1_ratio"] == lrcv.l1_ratio_[0]
  1313. assert gs.best_params_["C"] == lrcv.C_[0]
  1314. def test_LogisticRegressionCV_GridSearchCV_elastic_net_ovr():
  1315. # make sure LogisticRegressionCV gives same best params (l1 and C) as
  1316. # GridSearchCV when penalty is elasticnet and multiclass is ovr. We can't
  1317. # compare best_params like in the previous test because
  1318. # LogisticRegressionCV with multi_class='ovr' will have one C and one
  1319. # l1_param for each class, while LogisticRegression will share the
  1320. # parameters over the *n_classes* classifiers.
  1321. X, y = make_classification(
  1322. n_samples=100, n_classes=3, n_informative=3, random_state=0
  1323. )
  1324. X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
  1325. cv = StratifiedKFold(5)
  1326. l1_ratios = np.linspace(0, 1, 3)
  1327. Cs = np.logspace(-4, 4, 3)
  1328. lrcv = LogisticRegressionCV(
  1329. penalty="elasticnet",
  1330. Cs=Cs,
  1331. solver="saga",
  1332. cv=cv,
  1333. l1_ratios=l1_ratios,
  1334. random_state=0,
  1335. multi_class="ovr",
  1336. tol=1e-2,
  1337. )
  1338. lrcv.fit(X_train, y_train)
  1339. param_grid = {"C": Cs, "l1_ratio": l1_ratios}
  1340. lr = LogisticRegression(
  1341. penalty="elasticnet",
  1342. solver="saga",
  1343. random_state=0,
  1344. multi_class="ovr",
  1345. tol=1e-2,
  1346. )
  1347. gs = GridSearchCV(lr, param_grid, cv=cv)
  1348. gs.fit(X_train, y_train)
  1349. # Check that predictions are 80% the same
  1350. assert (lrcv.predict(X_train) == gs.predict(X_train)).mean() >= 0.8
  1351. assert (lrcv.predict(X_test) == gs.predict(X_test)).mean() >= 0.8
  1352. @pytest.mark.parametrize("penalty", ("l2", "elasticnet"))
  1353. @pytest.mark.parametrize("multi_class", ("ovr", "multinomial", "auto"))
  1354. def test_LogisticRegressionCV_no_refit(penalty, multi_class):
  1355. # Test LogisticRegressionCV attribute shapes when refit is False
  1356. n_classes = 3
  1357. n_features = 20
  1358. X, y = make_classification(
  1359. n_samples=200,
  1360. n_classes=n_classes,
  1361. n_informative=n_classes,
  1362. n_features=n_features,
  1363. random_state=0,
  1364. )
  1365. Cs = np.logspace(-4, 4, 3)
  1366. if penalty == "elasticnet":
  1367. l1_ratios = np.linspace(0, 1, 2)
  1368. else:
  1369. l1_ratios = None
  1370. lrcv = LogisticRegressionCV(
  1371. penalty=penalty,
  1372. Cs=Cs,
  1373. solver="saga",
  1374. l1_ratios=l1_ratios,
  1375. random_state=0,
  1376. multi_class=multi_class,
  1377. tol=1e-2,
  1378. refit=False,
  1379. )
  1380. lrcv.fit(X, y)
  1381. assert lrcv.C_.shape == (n_classes,)
  1382. assert lrcv.l1_ratio_.shape == (n_classes,)
  1383. assert lrcv.coef_.shape == (n_classes, n_features)
  1384. def test_LogisticRegressionCV_elasticnet_attribute_shapes():
  1385. # Make sure the shapes of scores_ and coefs_paths_ attributes are correct
  1386. # when using elasticnet (added one dimension for l1_ratios)
  1387. n_classes = 3
  1388. n_features = 20
  1389. X, y = make_classification(
  1390. n_samples=200,
  1391. n_classes=n_classes,
  1392. n_informative=n_classes,
  1393. n_features=n_features,
  1394. random_state=0,
  1395. )
  1396. Cs = np.logspace(-4, 4, 3)
  1397. l1_ratios = np.linspace(0, 1, 2)
  1398. n_folds = 2
  1399. lrcv = LogisticRegressionCV(
  1400. penalty="elasticnet",
  1401. Cs=Cs,
  1402. solver="saga",
  1403. cv=n_folds,
  1404. l1_ratios=l1_ratios,
  1405. multi_class="ovr",
  1406. random_state=0,
  1407. tol=1e-2,
  1408. )
  1409. lrcv.fit(X, y)
  1410. coefs_paths = np.asarray(list(lrcv.coefs_paths_.values()))
  1411. assert coefs_paths.shape == (
  1412. n_classes,
  1413. n_folds,
  1414. Cs.size,
  1415. l1_ratios.size,
  1416. n_features + 1,
  1417. )
  1418. scores = np.asarray(list(lrcv.scores_.values()))
  1419. assert scores.shape == (n_classes, n_folds, Cs.size, l1_ratios.size)
  1420. assert lrcv.n_iter_.shape == (n_classes, n_folds, Cs.size, l1_ratios.size)
  1421. def test_l1_ratio_non_elasticnet():
  1422. msg = (
  1423. r"l1_ratio parameter is only used when penalty is"
  1424. r" 'elasticnet'\. Got \(penalty=l1\)"
  1425. )
  1426. with pytest.warns(UserWarning, match=msg):
  1427. LogisticRegression(penalty="l1", solver="saga", l1_ratio=0.5).fit(X, Y1)
  1428. @pytest.mark.parametrize("C", np.logspace(-3, 2, 4))
  1429. @pytest.mark.parametrize("l1_ratio", [0.1, 0.5, 0.9])
  1430. def test_elastic_net_versus_sgd(C, l1_ratio):
  1431. # Compare elasticnet penalty in LogisticRegression() and SGD(loss='log')
  1432. n_samples = 500
  1433. X, y = make_classification(
  1434. n_samples=n_samples,
  1435. n_classes=2,
  1436. n_features=5,
  1437. n_informative=5,
  1438. n_redundant=0,
  1439. n_repeated=0,
  1440. random_state=1,
  1441. )
  1442. X = scale(X)
  1443. sgd = SGDClassifier(
  1444. penalty="elasticnet",
  1445. random_state=1,
  1446. fit_intercept=False,
  1447. tol=None,
  1448. max_iter=2000,
  1449. l1_ratio=l1_ratio,
  1450. alpha=1.0 / C / n_samples,
  1451. loss="log_loss",
  1452. )
  1453. log = LogisticRegression(
  1454. penalty="elasticnet",
  1455. random_state=1,
  1456. fit_intercept=False,
  1457. tol=1e-5,
  1458. max_iter=1000,
  1459. l1_ratio=l1_ratio,
  1460. C=C,
  1461. solver="saga",
  1462. )
  1463. sgd.fit(X, y)
  1464. log.fit(X, y)
  1465. assert_array_almost_equal(sgd.coef_, log.coef_, decimal=1)
  1466. def test_logistic_regression_path_coefs_multinomial():
  1467. # Make sure that the returned coefs by logistic_regression_path when
  1468. # multi_class='multinomial' don't override each other (used to be a
  1469. # bug).
  1470. X, y = make_classification(
  1471. n_samples=200,
  1472. n_classes=3,
  1473. n_informative=2,
  1474. n_redundant=0,
  1475. n_clusters_per_class=1,
  1476. random_state=0,
  1477. n_features=2,
  1478. )
  1479. Cs = [0.00001, 1, 10000]
  1480. coefs, _, _ = _logistic_regression_path(
  1481. X,
  1482. y,
  1483. penalty="l1",
  1484. Cs=Cs,
  1485. solver="saga",
  1486. random_state=0,
  1487. multi_class="multinomial",
  1488. )
  1489. with pytest.raises(AssertionError):
  1490. assert_array_almost_equal(coefs[0], coefs[1], decimal=1)
  1491. with pytest.raises(AssertionError):
  1492. assert_array_almost_equal(coefs[0], coefs[2], decimal=1)
  1493. with pytest.raises(AssertionError):
  1494. assert_array_almost_equal(coefs[1], coefs[2], decimal=1)
  1495. @pytest.mark.parametrize(
  1496. "est",
  1497. [
  1498. LogisticRegression(random_state=0, max_iter=500),
  1499. LogisticRegressionCV(random_state=0, cv=3, Cs=3, tol=1e-3, max_iter=500),
  1500. ],
  1501. ids=lambda x: x.__class__.__name__,
  1502. )
  1503. @pytest.mark.parametrize("solver", SOLVERS)
  1504. def test_logistic_regression_multi_class_auto(est, solver):
  1505. # check multi_class='auto' => multi_class='ovr'
  1506. # iff binary y or liblinear or newton-cholesky
  1507. def fit(X, y, **kw):
  1508. return clone(est).set_params(**kw).fit(X, y)
  1509. scaled_data = scale(iris.data)
  1510. X = scaled_data[::10]
  1511. X2 = scaled_data[1::10]
  1512. y_multi = iris.target[::10]
  1513. y_bin = y_multi == 0
  1514. est_auto_bin = fit(X, y_bin, multi_class="auto", solver=solver)
  1515. est_ovr_bin = fit(X, y_bin, multi_class="ovr", solver=solver)
  1516. assert_allclose(est_auto_bin.coef_, est_ovr_bin.coef_)
  1517. assert_allclose(est_auto_bin.predict_proba(X2), est_ovr_bin.predict_proba(X2))
  1518. est_auto_multi = fit(X, y_multi, multi_class="auto", solver=solver)
  1519. if solver in ("liblinear", "newton-cholesky"):
  1520. est_ovr_multi = fit(X, y_multi, multi_class="ovr", solver=solver)
  1521. assert_allclose(est_auto_multi.coef_, est_ovr_multi.coef_)
  1522. assert_allclose(
  1523. est_auto_multi.predict_proba(X2), est_ovr_multi.predict_proba(X2)
  1524. )
  1525. else:
  1526. est_multi_multi = fit(X, y_multi, multi_class="multinomial", solver=solver)
  1527. assert_allclose(est_auto_multi.coef_, est_multi_multi.coef_)
  1528. assert_allclose(
  1529. est_auto_multi.predict_proba(X2), est_multi_multi.predict_proba(X2)
  1530. )
  1531. # Make sure multi_class='ovr' is distinct from ='multinomial'
  1532. assert not np.allclose(
  1533. est_auto_bin.coef_,
  1534. fit(X, y_bin, multi_class="multinomial", solver=solver).coef_,
  1535. )
  1536. assert not np.allclose(
  1537. est_auto_bin.coef_,
  1538. fit(X, y_multi, multi_class="multinomial", solver=solver).coef_,
  1539. )
  1540. @pytest.mark.parametrize("solver", sorted(set(SOLVERS) - set(["liblinear"])))
  1541. def test_penalty_none(solver):
  1542. # - Make sure warning is raised if penalty=None and C is set to a
  1543. # non-default value.
  1544. # - Make sure setting penalty=None is equivalent to setting C=np.inf with
  1545. # l2 penalty.
  1546. X, y = make_classification(n_samples=1000, n_redundant=0, random_state=0)
  1547. msg = "Setting penalty=None will ignore the C"
  1548. lr = LogisticRegression(penalty=None, solver=solver, C=4)
  1549. with pytest.warns(UserWarning, match=msg):
  1550. lr.fit(X, y)
  1551. lr_none = LogisticRegression(penalty=None, solver=solver, random_state=0)
  1552. lr_l2_C_inf = LogisticRegression(
  1553. penalty="l2", C=np.inf, solver=solver, random_state=0
  1554. )
  1555. pred_none = lr_none.fit(X, y).predict(X)
  1556. pred_l2_C_inf = lr_l2_C_inf.fit(X, y).predict(X)
  1557. assert_array_equal(pred_none, pred_l2_C_inf)
  1558. @pytest.mark.parametrize(
  1559. "params",
  1560. [
  1561. {"penalty": "l1", "dual": False, "tol": 1e-6, "max_iter": 1000},
  1562. {"penalty": "l2", "dual": True, "tol": 1e-12, "max_iter": 1000},
  1563. {"penalty": "l2", "dual": False, "tol": 1e-12, "max_iter": 1000},
  1564. ],
  1565. )
  1566. def test_logisticregression_liblinear_sample_weight(params):
  1567. # check that we support sample_weight with liblinear in all possible cases:
  1568. # l1-primal, l2-primal, l2-dual
  1569. X = np.array(
  1570. [
  1571. [1, 3],
  1572. [1, 3],
  1573. [1, 3],
  1574. [1, 3],
  1575. [2, 1],
  1576. [2, 1],
  1577. [2, 1],
  1578. [2, 1],
  1579. [3, 3],
  1580. [3, 3],
  1581. [3, 3],
  1582. [3, 3],
  1583. [4, 1],
  1584. [4, 1],
  1585. [4, 1],
  1586. [4, 1],
  1587. ],
  1588. dtype=np.dtype("float"),
  1589. )
  1590. y = np.array(
  1591. [1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype("int")
  1592. )
  1593. X2 = np.vstack([X, X])
  1594. y2 = np.hstack([y, 3 - y])
  1595. sample_weight = np.ones(shape=len(y) * 2)
  1596. sample_weight[len(y) :] = 0
  1597. X2, y2, sample_weight = shuffle(X2, y2, sample_weight, random_state=0)
  1598. base_clf = LogisticRegression(solver="liblinear", random_state=42)
  1599. base_clf.set_params(**params)
  1600. clf_no_weight = clone(base_clf).fit(X, y)
  1601. clf_with_weight = clone(base_clf).fit(X2, y2, sample_weight=sample_weight)
  1602. for method in ("predict", "predict_proba", "decision_function"):
  1603. X_clf_no_weight = getattr(clf_no_weight, method)(X)
  1604. X_clf_with_weight = getattr(clf_with_weight, method)(X)
  1605. assert_allclose(X_clf_no_weight, X_clf_with_weight)
  1606. def test_scores_attribute_layout_elasticnet():
  1607. # Non regression test for issue #14955.
  1608. # when penalty is elastic net the scores_ attribute has shape
  1609. # (n_classes, n_Cs, n_l1_ratios)
  1610. # We here make sure that the second dimension indeed corresponds to Cs and
  1611. # the third dimension corresponds to l1_ratios.
  1612. X, y = make_classification(n_samples=1000, random_state=0)
  1613. cv = StratifiedKFold(n_splits=5)
  1614. l1_ratios = [0.1, 0.9]
  1615. Cs = [0.1, 1, 10]
  1616. lrcv = LogisticRegressionCV(
  1617. penalty="elasticnet",
  1618. solver="saga",
  1619. l1_ratios=l1_ratios,
  1620. Cs=Cs,
  1621. cv=cv,
  1622. random_state=0,
  1623. max_iter=250,
  1624. tol=1e-3,
  1625. )
  1626. lrcv.fit(X, y)
  1627. avg_scores_lrcv = lrcv.scores_[1].mean(axis=0) # average over folds
  1628. for i, C in enumerate(Cs):
  1629. for j, l1_ratio in enumerate(l1_ratios):
  1630. lr = LogisticRegression(
  1631. penalty="elasticnet",
  1632. solver="saga",
  1633. C=C,
  1634. l1_ratio=l1_ratio,
  1635. random_state=0,
  1636. max_iter=250,
  1637. tol=1e-3,
  1638. )
  1639. avg_score_lr = cross_val_score(lr, X, y, cv=cv).mean()
  1640. assert avg_scores_lrcv[i, j] == pytest.approx(avg_score_lr)
  1641. @pytest.mark.parametrize("fit_intercept", [False, True])
  1642. def test_multinomial_identifiability_on_iris(fit_intercept):
  1643. """Test that the multinomial classification is identifiable.
  1644. A multinomial with c classes can be modeled with
  1645. probability_k = exp(X@coef_k) / sum(exp(X@coef_l), l=1..c) for k=1..c.
  1646. This is not identifiable, unless one chooses a further constraint.
  1647. According to [1], the maximum of the L2 penalized likelihood automatically
  1648. satisfies the symmetric constraint:
  1649. sum(coef_k, k=1..c) = 0
  1650. Further details can be found in [2].
  1651. Reference
  1652. ---------
  1653. .. [1] :doi:`Zhu, Ji and Trevor J. Hastie. "Classification of gene microarrays by
  1654. penalized logistic regression". Biostatistics 5 3 (2004): 427-43.
  1655. <10.1093/biostatistics/kxg046>`
  1656. .. [2] :arxiv:`Noah Simon and Jerome Friedman and Trevor Hastie. (2013)
  1657. "A Blockwise Descent Algorithm for Group-penalized Multiresponse and
  1658. Multinomial Regression". <1311.6529>`
  1659. """
  1660. # Test logistic regression with the iris dataset
  1661. n_samples, n_features = iris.data.shape
  1662. target = iris.target_names[iris.target]
  1663. clf = LogisticRegression(
  1664. C=len(iris.data),
  1665. solver="lbfgs",
  1666. multi_class="multinomial",
  1667. fit_intercept=fit_intercept,
  1668. )
  1669. # Scaling X to ease convergence.
  1670. X_scaled = scale(iris.data)
  1671. clf.fit(X_scaled, target)
  1672. # axis=0 is sum over classes
  1673. assert_allclose(clf.coef_.sum(axis=0), 0, atol=1e-10)
  1674. if fit_intercept:
  1675. clf.intercept_.sum(axis=0) == pytest.approx(0, abs=1e-15)
  1676. @pytest.mark.parametrize("multi_class", ["ovr", "multinomial", "auto"])
  1677. @pytest.mark.parametrize("class_weight", [{0: 1.0, 1: 10.0, 2: 1.0}, "balanced"])
  1678. def test_sample_weight_not_modified(multi_class, class_weight):
  1679. X, y = load_iris(return_X_y=True)
  1680. n_features = len(X)
  1681. W = np.ones(n_features)
  1682. W[: n_features // 2] = 2
  1683. expected = W.copy()
  1684. clf = LogisticRegression(
  1685. random_state=0, class_weight=class_weight, max_iter=200, multi_class=multi_class
  1686. )
  1687. clf.fit(X, y, sample_weight=W)
  1688. assert_allclose(expected, W)
  1689. @pytest.mark.parametrize("solver", SOLVERS)
  1690. def test_large_sparse_matrix(solver, global_random_seed):
  1691. # Solvers either accept large sparse matrices, or raise helpful error.
  1692. # Non-regression test for pull-request #21093.
  1693. # generate sparse matrix with int64 indices
  1694. X = sparse.rand(20, 10, format="csr", random_state=global_random_seed)
  1695. for attr in ["indices", "indptr"]:
  1696. setattr(X, attr, getattr(X, attr).astype("int64"))
  1697. rng = np.random.RandomState(global_random_seed)
  1698. y = rng.randint(2, size=X.shape[0])
  1699. if solver in ["liblinear", "sag", "saga"]:
  1700. msg = "Only sparse matrices with 32-bit integer indices"
  1701. with pytest.raises(ValueError, match=msg):
  1702. LogisticRegression(solver=solver).fit(X, y)
  1703. else:
  1704. LogisticRegression(solver=solver).fit(X, y)
  1705. def test_single_feature_newton_cg():
  1706. # Test that Newton-CG works with a single feature and intercept.
  1707. # Non-regression test for issue #23605.
  1708. X = np.array([[0.5, 0.65, 1.1, 1.25, 0.8, 0.54, 0.95, 0.7]]).T
  1709. y = np.array([1, 1, 0, 0, 1, 1, 0, 1])
  1710. assert X.shape[1] == 1
  1711. LogisticRegression(solver="newton-cg", fit_intercept=True).fit(X, y)
  1712. # TODO(1.4): Remove
  1713. def test_warning_on_penalty_string_none():
  1714. # Test that warning message is shown when penalty='none'
  1715. target = iris.target_names[iris.target]
  1716. lr = LogisticRegression(penalty="none")
  1717. warning_message = (
  1718. "`penalty='none'`has been deprecated in 1.2 and will be removed in 1.4."
  1719. " To keep the past behaviour, set `penalty=None`."
  1720. )
  1721. with pytest.warns(FutureWarning, match=warning_message):
  1722. lr.fit(iris.data, target)
  1723. def test_liblinear_not_stuck():
  1724. # Non-regression https://github.com/scikit-learn/scikit-learn/issues/18264
  1725. X = iris.data.copy()
  1726. y = iris.target.copy()
  1727. X = X[y != 2]
  1728. y = y[y != 2]
  1729. X_prep = StandardScaler().fit_transform(X)
  1730. C = l1_min_c(X, y, loss="log") * 10 ** (10 / 29)
  1731. clf = LogisticRegression(
  1732. penalty="l1",
  1733. solver="liblinear",
  1734. tol=1e-6,
  1735. max_iter=100,
  1736. intercept_scaling=10000.0,
  1737. random_state=0,
  1738. C=C,
  1739. )
  1740. # test that the fit does not raise a ConvergenceWarning
  1741. with warnings.catch_warnings():
  1742. warnings.simplefilter("error", ConvergenceWarning)
  1743. clf.fit(X_prep, y)
  1744. @pytest.mark.parametrize("solver", SOLVERS)
  1745. def test_zero_max_iter(solver):
  1746. # Make sure we can inspect the state of LogisticRegression right after
  1747. # initialization (before the first weight update).
  1748. X, y = load_iris(return_X_y=True)
  1749. y = y == 2
  1750. with ignore_warnings(category=ConvergenceWarning):
  1751. clf = LogisticRegression(solver=solver, max_iter=0).fit(X, y)
  1752. if solver not in ["saga", "sag"]:
  1753. # XXX: sag and saga have n_iter_ = [1]...
  1754. assert clf.n_iter_ == 0
  1755. if solver != "lbfgs":
  1756. # XXX: lbfgs has already started to update the coefficients...
  1757. assert_allclose(clf.coef_, np.zeros_like(clf.coef_))
  1758. assert_allclose(
  1759. clf.decision_function(X),
  1760. np.full(shape=X.shape[0], fill_value=clf.intercept_),
  1761. )
  1762. assert_allclose(
  1763. clf.predict_proba(X),
  1764. np.full(shape=(X.shape[0], 2), fill_value=0.5),
  1765. )
  1766. assert clf.score(X, y) < 0.7