test_sag.py 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012
  1. # Authors: Danny Sullivan <dbsullivan23@gmail.com>
  2. # Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
  3. #
  4. # License: BSD 3 clause
  5. import math
  6. import re
  7. import numpy as np
  8. import pytest
  9. import scipy.sparse as sp
  10. from scipy.special import logsumexp
  11. from sklearn._loss.loss import HalfMultinomialLoss
  12. from sklearn.base import clone
  13. from sklearn.datasets import load_iris, make_blobs, make_classification
  14. from sklearn.linear_model import LogisticRegression, Ridge
  15. from sklearn.linear_model._base import make_dataset
  16. from sklearn.linear_model._linear_loss import LinearModelLoss
  17. from sklearn.linear_model._sag import get_auto_step_size
  18. from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples
  19. from sklearn.preprocessing import LabelBinarizer, LabelEncoder
  20. from sklearn.utils import check_random_state, compute_class_weight
  21. from sklearn.utils._testing import (
  22. assert_allclose,
  23. assert_almost_equal,
  24. assert_array_almost_equal,
  25. )
  26. from sklearn.utils.extmath import row_norms
  27. iris = load_iris()
  28. # this is used for sag classification
  29. def log_dloss(p, y):
  30. z = p * y
  31. # approximately equal and saves the computation of the log
  32. if z > 18.0:
  33. return math.exp(-z) * -y
  34. if z < -18.0:
  35. return -y
  36. return -y / (math.exp(z) + 1.0)
  37. def log_loss(p, y):
  38. return np.mean(np.log(1.0 + np.exp(-y * p)))
  39. # this is used for sag regression
  40. def squared_dloss(p, y):
  41. return p - y
  42. def squared_loss(p, y):
  43. return np.mean(0.5 * (p - y) * (p - y))
  44. # function for measuring the log loss
  45. def get_pobj(w, alpha, myX, myy, loss):
  46. w = w.ravel()
  47. pred = np.dot(myX, w)
  48. p = loss(pred, myy)
  49. p += alpha * w.dot(w) / 2.0
  50. return p
  51. def sag(
  52. X,
  53. y,
  54. step_size,
  55. alpha,
  56. n_iter=1,
  57. dloss=None,
  58. sparse=False,
  59. sample_weight=None,
  60. fit_intercept=True,
  61. saga=False,
  62. ):
  63. n_samples, n_features = X.shape[0], X.shape[1]
  64. weights = np.zeros(X.shape[1])
  65. sum_gradient = np.zeros(X.shape[1])
  66. gradient_memory = np.zeros((n_samples, n_features))
  67. intercept = 0.0
  68. intercept_sum_gradient = 0.0
  69. intercept_gradient_memory = np.zeros(n_samples)
  70. rng = np.random.RandomState(77)
  71. decay = 1.0
  72. seen = set()
  73. # sparse data has a fixed decay of .01
  74. if sparse:
  75. decay = 0.01
  76. for epoch in range(n_iter):
  77. for k in range(n_samples):
  78. idx = int(rng.rand() * n_samples)
  79. # idx = k
  80. entry = X[idx]
  81. seen.add(idx)
  82. p = np.dot(entry, weights) + intercept
  83. gradient = dloss(p, y[idx])
  84. if sample_weight is not None:
  85. gradient *= sample_weight[idx]
  86. update = entry * gradient + alpha * weights
  87. gradient_correction = update - gradient_memory[idx]
  88. sum_gradient += gradient_correction
  89. gradient_memory[idx] = update
  90. if saga:
  91. weights -= gradient_correction * step_size * (1 - 1.0 / len(seen))
  92. if fit_intercept:
  93. gradient_correction = gradient - intercept_gradient_memory[idx]
  94. intercept_gradient_memory[idx] = gradient
  95. intercept_sum_gradient += gradient_correction
  96. gradient_correction *= step_size * (1.0 - 1.0 / len(seen))
  97. if saga:
  98. intercept -= (
  99. step_size * intercept_sum_gradient / len(seen) * decay
  100. ) + gradient_correction
  101. else:
  102. intercept -= step_size * intercept_sum_gradient / len(seen) * decay
  103. weights -= step_size * sum_gradient / len(seen)
  104. return weights, intercept
  105. def sag_sparse(
  106. X,
  107. y,
  108. step_size,
  109. alpha,
  110. n_iter=1,
  111. dloss=None,
  112. sample_weight=None,
  113. sparse=False,
  114. fit_intercept=True,
  115. saga=False,
  116. random_state=0,
  117. ):
  118. if step_size * alpha == 1.0:
  119. raise ZeroDivisionError(
  120. "Sparse sag does not handle the case step_size * alpha == 1"
  121. )
  122. n_samples, n_features = X.shape[0], X.shape[1]
  123. weights = np.zeros(n_features)
  124. sum_gradient = np.zeros(n_features)
  125. last_updated = np.zeros(n_features, dtype=int)
  126. gradient_memory = np.zeros(n_samples)
  127. rng = check_random_state(random_state)
  128. intercept = 0.0
  129. intercept_sum_gradient = 0.0
  130. wscale = 1.0
  131. decay = 1.0
  132. seen = set()
  133. c_sum = np.zeros(n_iter * n_samples)
  134. # sparse data has a fixed decay of .01
  135. if sparse:
  136. decay = 0.01
  137. counter = 0
  138. for epoch in range(n_iter):
  139. for k in range(n_samples):
  140. # idx = k
  141. idx = int(rng.rand() * n_samples)
  142. entry = X[idx]
  143. seen.add(idx)
  144. if counter >= 1:
  145. for j in range(n_features):
  146. if last_updated[j] == 0:
  147. weights[j] -= c_sum[counter - 1] * sum_gradient[j]
  148. else:
  149. weights[j] -= (
  150. c_sum[counter - 1] - c_sum[last_updated[j] - 1]
  151. ) * sum_gradient[j]
  152. last_updated[j] = counter
  153. p = (wscale * np.dot(entry, weights)) + intercept
  154. gradient = dloss(p, y[idx])
  155. if sample_weight is not None:
  156. gradient *= sample_weight[idx]
  157. update = entry * gradient
  158. gradient_correction = update - (gradient_memory[idx] * entry)
  159. sum_gradient += gradient_correction
  160. if saga:
  161. for j in range(n_features):
  162. weights[j] -= (
  163. gradient_correction[j]
  164. * step_size
  165. * (1 - 1.0 / len(seen))
  166. / wscale
  167. )
  168. if fit_intercept:
  169. gradient_correction = gradient - gradient_memory[idx]
  170. intercept_sum_gradient += gradient_correction
  171. gradient_correction *= step_size * (1.0 - 1.0 / len(seen))
  172. if saga:
  173. intercept -= (
  174. step_size * intercept_sum_gradient / len(seen) * decay
  175. ) + gradient_correction
  176. else:
  177. intercept -= step_size * intercept_sum_gradient / len(seen) * decay
  178. gradient_memory[idx] = gradient
  179. wscale *= 1.0 - alpha * step_size
  180. if counter == 0:
  181. c_sum[0] = step_size / (wscale * len(seen))
  182. else:
  183. c_sum[counter] = c_sum[counter - 1] + step_size / (wscale * len(seen))
  184. if counter >= 1 and wscale < 1e-9:
  185. for j in range(n_features):
  186. if last_updated[j] == 0:
  187. weights[j] -= c_sum[counter] * sum_gradient[j]
  188. else:
  189. weights[j] -= (
  190. c_sum[counter] - c_sum[last_updated[j] - 1]
  191. ) * sum_gradient[j]
  192. last_updated[j] = counter + 1
  193. c_sum[counter] = 0
  194. weights *= wscale
  195. wscale = 1.0
  196. counter += 1
  197. for j in range(n_features):
  198. if last_updated[j] == 0:
  199. weights[j] -= c_sum[counter - 1] * sum_gradient[j]
  200. else:
  201. weights[j] -= (
  202. c_sum[counter - 1] - c_sum[last_updated[j] - 1]
  203. ) * sum_gradient[j]
  204. weights *= wscale
  205. return weights, intercept
  206. def get_step_size(X, alpha, fit_intercept, classification=True):
  207. if classification:
  208. return 4.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + 4.0 * alpha)
  209. else:
  210. return 1.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + alpha)
  211. def test_classifier_matching():
  212. n_samples = 20
  213. X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
  214. y[y == 0] = -1
  215. alpha = 1.1
  216. fit_intercept = True
  217. step_size = get_step_size(X, alpha, fit_intercept)
  218. for solver in ["sag", "saga"]:
  219. if solver == "sag":
  220. n_iter = 80
  221. else:
  222. # SAGA variance w.r.t. stream order is higher
  223. n_iter = 300
  224. clf = LogisticRegression(
  225. solver=solver,
  226. fit_intercept=fit_intercept,
  227. tol=1e-11,
  228. C=1.0 / alpha / n_samples,
  229. max_iter=n_iter,
  230. random_state=10,
  231. multi_class="ovr",
  232. )
  233. clf.fit(X, y)
  234. weights, intercept = sag_sparse(
  235. X,
  236. y,
  237. step_size,
  238. alpha,
  239. n_iter=n_iter,
  240. dloss=log_dloss,
  241. fit_intercept=fit_intercept,
  242. saga=solver == "saga",
  243. )
  244. weights2, intercept2 = sag(
  245. X,
  246. y,
  247. step_size,
  248. alpha,
  249. n_iter=n_iter,
  250. dloss=log_dloss,
  251. fit_intercept=fit_intercept,
  252. saga=solver == "saga",
  253. )
  254. weights = np.atleast_2d(weights)
  255. intercept = np.atleast_1d(intercept)
  256. weights2 = np.atleast_2d(weights2)
  257. intercept2 = np.atleast_1d(intercept2)
  258. assert_array_almost_equal(weights, clf.coef_, decimal=9)
  259. assert_array_almost_equal(intercept, clf.intercept_, decimal=9)
  260. assert_array_almost_equal(weights2, clf.coef_, decimal=9)
  261. assert_array_almost_equal(intercept2, clf.intercept_, decimal=9)
  262. def test_regressor_matching():
  263. n_samples = 10
  264. n_features = 5
  265. rng = np.random.RandomState(10)
  266. X = rng.normal(size=(n_samples, n_features))
  267. true_w = rng.normal(size=n_features)
  268. y = X.dot(true_w)
  269. alpha = 1.0
  270. n_iter = 100
  271. fit_intercept = True
  272. step_size = get_step_size(X, alpha, fit_intercept, classification=False)
  273. clf = Ridge(
  274. fit_intercept=fit_intercept,
  275. tol=0.00000000001,
  276. solver="sag",
  277. alpha=alpha * n_samples,
  278. max_iter=n_iter,
  279. )
  280. clf.fit(X, y)
  281. weights1, intercept1 = sag_sparse(
  282. X,
  283. y,
  284. step_size,
  285. alpha,
  286. n_iter=n_iter,
  287. dloss=squared_dloss,
  288. fit_intercept=fit_intercept,
  289. )
  290. weights2, intercept2 = sag(
  291. X,
  292. y,
  293. step_size,
  294. alpha,
  295. n_iter=n_iter,
  296. dloss=squared_dloss,
  297. fit_intercept=fit_intercept,
  298. )
  299. assert_allclose(weights1, clf.coef_)
  300. assert_allclose(intercept1, clf.intercept_)
  301. assert_allclose(weights2, clf.coef_)
  302. assert_allclose(intercept2, clf.intercept_)
  303. @pytest.mark.filterwarnings("ignore:The max_iter was reached")
  304. def test_sag_pobj_matches_logistic_regression():
  305. """tests if the sag pobj matches log reg"""
  306. n_samples = 100
  307. alpha = 1.0
  308. max_iter = 20
  309. X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
  310. clf1 = LogisticRegression(
  311. solver="sag",
  312. fit_intercept=False,
  313. tol=0.0000001,
  314. C=1.0 / alpha / n_samples,
  315. max_iter=max_iter,
  316. random_state=10,
  317. multi_class="ovr",
  318. )
  319. clf2 = clone(clf1)
  320. clf3 = LogisticRegression(
  321. fit_intercept=False,
  322. tol=0.0000001,
  323. C=1.0 / alpha / n_samples,
  324. max_iter=max_iter,
  325. random_state=10,
  326. multi_class="ovr",
  327. )
  328. clf1.fit(X, y)
  329. clf2.fit(sp.csr_matrix(X), y)
  330. clf3.fit(X, y)
  331. pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
  332. pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
  333. pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)
  334. assert_array_almost_equal(pobj1, pobj2, decimal=4)
  335. assert_array_almost_equal(pobj2, pobj3, decimal=4)
  336. assert_array_almost_equal(pobj3, pobj1, decimal=4)
  337. @pytest.mark.filterwarnings("ignore:The max_iter was reached")
  338. def test_sag_pobj_matches_ridge_regression():
  339. """tests if the sag pobj matches ridge reg"""
  340. n_samples = 100
  341. n_features = 10
  342. alpha = 1.0
  343. n_iter = 100
  344. fit_intercept = False
  345. rng = np.random.RandomState(10)
  346. X = rng.normal(size=(n_samples, n_features))
  347. true_w = rng.normal(size=n_features)
  348. y = X.dot(true_w)
  349. clf1 = Ridge(
  350. fit_intercept=fit_intercept,
  351. tol=0.00000000001,
  352. solver="sag",
  353. alpha=alpha,
  354. max_iter=n_iter,
  355. random_state=42,
  356. )
  357. clf2 = clone(clf1)
  358. clf3 = Ridge(
  359. fit_intercept=fit_intercept,
  360. tol=0.00001,
  361. solver="lsqr",
  362. alpha=alpha,
  363. max_iter=n_iter,
  364. random_state=42,
  365. )
  366. clf1.fit(X, y)
  367. clf2.fit(sp.csr_matrix(X), y)
  368. clf3.fit(X, y)
  369. pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss)
  370. pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss)
  371. pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss)
  372. assert_array_almost_equal(pobj1, pobj2, decimal=4)
  373. assert_array_almost_equal(pobj1, pobj3, decimal=4)
  374. assert_array_almost_equal(pobj3, pobj2, decimal=4)
  375. @pytest.mark.filterwarnings("ignore:The max_iter was reached")
  376. def test_sag_regressor_computed_correctly():
  377. """tests if the sag regressor is computed correctly"""
  378. alpha = 0.1
  379. n_features = 10
  380. n_samples = 40
  381. max_iter = 100
  382. tol = 0.000001
  383. fit_intercept = True
  384. rng = np.random.RandomState(0)
  385. X = rng.normal(size=(n_samples, n_features))
  386. w = rng.normal(size=n_features)
  387. y = np.dot(X, w) + 2.0
  388. step_size = get_step_size(X, alpha, fit_intercept, classification=False)
  389. clf1 = Ridge(
  390. fit_intercept=fit_intercept,
  391. tol=tol,
  392. solver="sag",
  393. alpha=alpha * n_samples,
  394. max_iter=max_iter,
  395. random_state=rng,
  396. )
  397. clf2 = clone(clf1)
  398. clf1.fit(X, y)
  399. clf2.fit(sp.csr_matrix(X), y)
  400. spweights1, spintercept1 = sag_sparse(
  401. X,
  402. y,
  403. step_size,
  404. alpha,
  405. n_iter=max_iter,
  406. dloss=squared_dloss,
  407. fit_intercept=fit_intercept,
  408. random_state=rng,
  409. )
  410. spweights2, spintercept2 = sag_sparse(
  411. X,
  412. y,
  413. step_size,
  414. alpha,
  415. n_iter=max_iter,
  416. dloss=squared_dloss,
  417. sparse=True,
  418. fit_intercept=fit_intercept,
  419. random_state=rng,
  420. )
  421. assert_array_almost_equal(clf1.coef_.ravel(), spweights1.ravel(), decimal=3)
  422. assert_almost_equal(clf1.intercept_, spintercept1, decimal=1)
  423. # TODO: uncomment when sparse Ridge with intercept will be fixed (#4710)
  424. # assert_array_almost_equal(clf2.coef_.ravel(),
  425. # spweights2.ravel(),
  426. # decimal=3)
  427. # assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)'''
  428. def test_get_auto_step_size():
  429. X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64)
  430. alpha = 1.2
  431. fit_intercept = False
  432. # sum the squares of the second sample because that's the largest
  433. max_squared_sum = 4 + 9 + 16
  434. max_squared_sum_ = row_norms(X, squared=True).max()
  435. n_samples = X.shape[0]
  436. assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4)
  437. for saga in [True, False]:
  438. for fit_intercept in (True, False):
  439. if saga:
  440. L_sqr = max_squared_sum + alpha + int(fit_intercept)
  441. L_log = (max_squared_sum + 4.0 * alpha + int(fit_intercept)) / 4.0
  442. mun_sqr = min(2 * n_samples * alpha, L_sqr)
  443. mun_log = min(2 * n_samples * alpha, L_log)
  444. step_size_sqr = 1 / (2 * L_sqr + mun_sqr)
  445. step_size_log = 1 / (2 * L_log + mun_log)
  446. else:
  447. step_size_sqr = 1.0 / (max_squared_sum + alpha + int(fit_intercept))
  448. step_size_log = 4.0 / (
  449. max_squared_sum + 4.0 * alpha + int(fit_intercept)
  450. )
  451. step_size_sqr_ = get_auto_step_size(
  452. max_squared_sum_,
  453. alpha,
  454. "squared",
  455. fit_intercept,
  456. n_samples=n_samples,
  457. is_saga=saga,
  458. )
  459. step_size_log_ = get_auto_step_size(
  460. max_squared_sum_,
  461. alpha,
  462. "log",
  463. fit_intercept,
  464. n_samples=n_samples,
  465. is_saga=saga,
  466. )
  467. assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4)
  468. assert_almost_equal(step_size_log, step_size_log_, decimal=4)
  469. msg = "Unknown loss function for SAG solver, got wrong instead of"
  470. with pytest.raises(ValueError, match=msg):
  471. get_auto_step_size(max_squared_sum_, alpha, "wrong", fit_intercept)
  472. @pytest.mark.parametrize("seed", range(3)) # locally tested with 1000 seeds
  473. def test_sag_regressor(seed):
  474. """tests if the sag regressor performs well"""
  475. xmin, xmax = -5, 5
  476. n_samples = 300
  477. tol = 0.001
  478. max_iter = 100
  479. alpha = 0.1
  480. rng = np.random.RandomState(seed)
  481. X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
  482. # simple linear function without noise
  483. y = 0.5 * X.ravel()
  484. clf1 = Ridge(
  485. tol=tol,
  486. solver="sag",
  487. max_iter=max_iter,
  488. alpha=alpha * n_samples,
  489. random_state=rng,
  490. )
  491. clf2 = clone(clf1)
  492. clf1.fit(X, y)
  493. clf2.fit(sp.csr_matrix(X), y)
  494. score1 = clf1.score(X, y)
  495. score2 = clf2.score(X, y)
  496. assert score1 > 0.98
  497. assert score2 > 0.98
  498. # simple linear function with noise
  499. y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
  500. clf1 = Ridge(tol=tol, solver="sag", max_iter=max_iter, alpha=alpha * n_samples)
  501. clf2 = clone(clf1)
  502. clf1.fit(X, y)
  503. clf2.fit(sp.csr_matrix(X), y)
  504. score1 = clf1.score(X, y)
  505. score2 = clf2.score(X, y)
  506. assert score1 > 0.45
  507. assert score2 > 0.45
  508. @pytest.mark.filterwarnings("ignore:The max_iter was reached")
  509. def test_sag_classifier_computed_correctly():
  510. """tests if the binary classifier is computed correctly"""
  511. alpha = 0.1
  512. n_samples = 50
  513. n_iter = 50
  514. tol = 0.00001
  515. fit_intercept = True
  516. X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
  517. step_size = get_step_size(X, alpha, fit_intercept, classification=True)
  518. classes = np.unique(y)
  519. y_tmp = np.ones(n_samples)
  520. y_tmp[y != classes[1]] = -1
  521. y = y_tmp
  522. clf1 = LogisticRegression(
  523. solver="sag",
  524. C=1.0 / alpha / n_samples,
  525. max_iter=n_iter,
  526. tol=tol,
  527. random_state=77,
  528. fit_intercept=fit_intercept,
  529. multi_class="ovr",
  530. )
  531. clf2 = clone(clf1)
  532. clf1.fit(X, y)
  533. clf2.fit(sp.csr_matrix(X), y)
  534. spweights, spintercept = sag_sparse(
  535. X,
  536. y,
  537. step_size,
  538. alpha,
  539. n_iter=n_iter,
  540. dloss=log_dloss,
  541. fit_intercept=fit_intercept,
  542. )
  543. spweights2, spintercept2 = sag_sparse(
  544. X,
  545. y,
  546. step_size,
  547. alpha,
  548. n_iter=n_iter,
  549. dloss=log_dloss,
  550. sparse=True,
  551. fit_intercept=fit_intercept,
  552. )
  553. assert_array_almost_equal(clf1.coef_.ravel(), spweights.ravel(), decimal=2)
  554. assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
  555. assert_array_almost_equal(clf2.coef_.ravel(), spweights2.ravel(), decimal=2)
  556. assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
  557. @pytest.mark.filterwarnings("ignore:The max_iter was reached")
  558. def test_sag_multiclass_computed_correctly():
  559. """tests if the multiclass classifier is computed correctly"""
  560. alpha = 0.1
  561. n_samples = 20
  562. tol = 0.00001
  563. max_iter = 40
  564. fit_intercept = True
  565. X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0, cluster_std=0.1)
  566. step_size = get_step_size(X, alpha, fit_intercept, classification=True)
  567. classes = np.unique(y)
  568. clf1 = LogisticRegression(
  569. solver="sag",
  570. C=1.0 / alpha / n_samples,
  571. max_iter=max_iter,
  572. tol=tol,
  573. random_state=77,
  574. fit_intercept=fit_intercept,
  575. multi_class="ovr",
  576. )
  577. clf2 = clone(clf1)
  578. clf1.fit(X, y)
  579. clf2.fit(sp.csr_matrix(X), y)
  580. coef1 = []
  581. intercept1 = []
  582. coef2 = []
  583. intercept2 = []
  584. for cl in classes:
  585. y_encoded = np.ones(n_samples)
  586. y_encoded[y != cl] = -1
  587. spweights1, spintercept1 = sag_sparse(
  588. X,
  589. y_encoded,
  590. step_size,
  591. alpha,
  592. dloss=log_dloss,
  593. n_iter=max_iter,
  594. fit_intercept=fit_intercept,
  595. )
  596. spweights2, spintercept2 = sag_sparse(
  597. X,
  598. y_encoded,
  599. step_size,
  600. alpha,
  601. dloss=log_dloss,
  602. n_iter=max_iter,
  603. sparse=True,
  604. fit_intercept=fit_intercept,
  605. )
  606. coef1.append(spweights1)
  607. intercept1.append(spintercept1)
  608. coef2.append(spweights2)
  609. intercept2.append(spintercept2)
  610. coef1 = np.vstack(coef1)
  611. intercept1 = np.array(intercept1)
  612. coef2 = np.vstack(coef2)
  613. intercept2 = np.array(intercept2)
  614. for i, cl in enumerate(classes):
  615. assert_array_almost_equal(clf1.coef_[i].ravel(), coef1[i].ravel(), decimal=2)
  616. assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
  617. assert_array_almost_equal(clf2.coef_[i].ravel(), coef2[i].ravel(), decimal=2)
  618. assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
  619. def test_classifier_results():
  620. """tests if classifier results match target"""
  621. alpha = 0.1
  622. n_features = 20
  623. n_samples = 10
  624. tol = 0.01
  625. max_iter = 200
  626. rng = np.random.RandomState(0)
  627. X = rng.normal(size=(n_samples, n_features))
  628. w = rng.normal(size=n_features)
  629. y = np.dot(X, w)
  630. y = np.sign(y)
  631. clf1 = LogisticRegression(
  632. solver="sag",
  633. C=1.0 / alpha / n_samples,
  634. max_iter=max_iter,
  635. tol=tol,
  636. random_state=77,
  637. )
  638. clf2 = clone(clf1)
  639. clf1.fit(X, y)
  640. clf2.fit(sp.csr_matrix(X), y)
  641. pred1 = clf1.predict(X)
  642. pred2 = clf2.predict(X)
  643. assert_almost_equal(pred1, y, decimal=12)
  644. assert_almost_equal(pred2, y, decimal=12)
  645. @pytest.mark.filterwarnings("ignore:The max_iter was reached")
  646. def test_binary_classifier_class_weight():
  647. """tests binary classifier with classweights for each class"""
  648. alpha = 0.1
  649. n_samples = 50
  650. n_iter = 20
  651. tol = 0.00001
  652. fit_intercept = True
  653. X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10, cluster_std=0.1)
  654. step_size = get_step_size(X, alpha, fit_intercept, classification=True)
  655. classes = np.unique(y)
  656. y_tmp = np.ones(n_samples)
  657. y_tmp[y != classes[1]] = -1
  658. y = y_tmp
  659. class_weight = {1: 0.45, -1: 0.55}
  660. clf1 = LogisticRegression(
  661. solver="sag",
  662. C=1.0 / alpha / n_samples,
  663. max_iter=n_iter,
  664. tol=tol,
  665. random_state=77,
  666. fit_intercept=fit_intercept,
  667. multi_class="ovr",
  668. class_weight=class_weight,
  669. )
  670. clf2 = clone(clf1)
  671. clf1.fit(X, y)
  672. clf2.fit(sp.csr_matrix(X), y)
  673. le = LabelEncoder()
  674. class_weight_ = compute_class_weight(class_weight, classes=np.unique(y), y=y)
  675. sample_weight = class_weight_[le.fit_transform(y)]
  676. spweights, spintercept = sag_sparse(
  677. X,
  678. y,
  679. step_size,
  680. alpha,
  681. n_iter=n_iter,
  682. dloss=log_dloss,
  683. sample_weight=sample_weight,
  684. fit_intercept=fit_intercept,
  685. )
  686. spweights2, spintercept2 = sag_sparse(
  687. X,
  688. y,
  689. step_size,
  690. alpha,
  691. n_iter=n_iter,
  692. dloss=log_dloss,
  693. sparse=True,
  694. sample_weight=sample_weight,
  695. fit_intercept=fit_intercept,
  696. )
  697. assert_array_almost_equal(clf1.coef_.ravel(), spweights.ravel(), decimal=2)
  698. assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
  699. assert_array_almost_equal(clf2.coef_.ravel(), spweights2.ravel(), decimal=2)
  700. assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
  701. @pytest.mark.filterwarnings("ignore:The max_iter was reached")
  702. def test_multiclass_classifier_class_weight():
  703. """tests multiclass with classweights for each class"""
  704. alpha = 0.1
  705. n_samples = 20
  706. tol = 0.00001
  707. max_iter = 50
  708. class_weight = {0: 0.45, 1: 0.55, 2: 0.75}
  709. fit_intercept = True
  710. X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0, cluster_std=0.1)
  711. step_size = get_step_size(X, alpha, fit_intercept, classification=True)
  712. classes = np.unique(y)
  713. clf1 = LogisticRegression(
  714. solver="sag",
  715. C=1.0 / alpha / n_samples,
  716. max_iter=max_iter,
  717. tol=tol,
  718. random_state=77,
  719. fit_intercept=fit_intercept,
  720. multi_class="ovr",
  721. class_weight=class_weight,
  722. )
  723. clf2 = clone(clf1)
  724. clf1.fit(X, y)
  725. clf2.fit(sp.csr_matrix(X), y)
  726. le = LabelEncoder()
  727. class_weight_ = compute_class_weight(class_weight, classes=np.unique(y), y=y)
  728. sample_weight = class_weight_[le.fit_transform(y)]
  729. coef1 = []
  730. intercept1 = []
  731. coef2 = []
  732. intercept2 = []
  733. for cl in classes:
  734. y_encoded = np.ones(n_samples)
  735. y_encoded[y != cl] = -1
  736. spweights1, spintercept1 = sag_sparse(
  737. X,
  738. y_encoded,
  739. step_size,
  740. alpha,
  741. n_iter=max_iter,
  742. dloss=log_dloss,
  743. sample_weight=sample_weight,
  744. )
  745. spweights2, spintercept2 = sag_sparse(
  746. X,
  747. y_encoded,
  748. step_size,
  749. alpha,
  750. n_iter=max_iter,
  751. dloss=log_dloss,
  752. sample_weight=sample_weight,
  753. sparse=True,
  754. )
  755. coef1.append(spweights1)
  756. intercept1.append(spintercept1)
  757. coef2.append(spweights2)
  758. intercept2.append(spintercept2)
  759. coef1 = np.vstack(coef1)
  760. intercept1 = np.array(intercept1)
  761. coef2 = np.vstack(coef2)
  762. intercept2 = np.array(intercept2)
  763. for i, cl in enumerate(classes):
  764. assert_array_almost_equal(clf1.coef_[i].ravel(), coef1[i].ravel(), decimal=2)
  765. assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
  766. assert_array_almost_equal(clf2.coef_[i].ravel(), coef2[i].ravel(), decimal=2)
  767. assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
  768. def test_classifier_single_class():
  769. """tests if ValueError is thrown with only one class"""
  770. X = [[1, 2], [3, 4]]
  771. y = [1, 1]
  772. msg = "This solver needs samples of at least 2 classes in the data"
  773. with pytest.raises(ValueError, match=msg):
  774. LogisticRegression(solver="sag").fit(X, y)
  775. def test_step_size_alpha_error():
  776. X = [[0, 0], [0, 0]]
  777. y = [1, -1]
  778. fit_intercept = False
  779. alpha = 1.0
  780. msg = re.escape(
  781. "Current sag implementation does not handle the case"
  782. " step_size * alpha_scaled == 1"
  783. )
  784. clf1 = LogisticRegression(solver="sag", C=1.0 / alpha, fit_intercept=fit_intercept)
  785. with pytest.raises(ZeroDivisionError, match=msg):
  786. clf1.fit(X, y)
  787. clf2 = Ridge(fit_intercept=fit_intercept, solver="sag", alpha=alpha)
  788. with pytest.raises(ZeroDivisionError, match=msg):
  789. clf2.fit(X, y)
  790. def test_multinomial_loss():
  791. # test if the multinomial loss and gradient computations are consistent
  792. X, y = iris.data, iris.target.astype(np.float64)
  793. n_samples, n_features = X.shape
  794. n_classes = len(np.unique(y))
  795. rng = check_random_state(42)
  796. weights = rng.randn(n_features, n_classes)
  797. intercept = rng.randn(n_classes)
  798. sample_weights = rng.randn(n_samples)
  799. np.abs(sample_weights, sample_weights)
  800. # compute loss and gradient like in multinomial SAG
  801. dataset, _ = make_dataset(X, y, sample_weights, random_state=42)
  802. loss_1, grad_1 = _multinomial_grad_loss_all_samples(
  803. dataset, weights, intercept, n_samples, n_features, n_classes
  804. )
  805. # compute loss and gradient like in multinomial LogisticRegression
  806. loss = LinearModelLoss(
  807. base_loss=HalfMultinomialLoss(n_classes=n_classes),
  808. fit_intercept=True,
  809. )
  810. weights_intercept = np.vstack((weights, intercept)).T
  811. loss_2, grad_2 = loss.loss_gradient(
  812. weights_intercept, X, y, l2_reg_strength=0.0, sample_weight=sample_weights
  813. )
  814. grad_2 = grad_2[:, :-1].T
  815. # comparison
  816. assert_array_almost_equal(grad_1, grad_2)
  817. assert_almost_equal(loss_1, loss_2)
  818. def test_multinomial_loss_ground_truth():
  819. # n_samples, n_features, n_classes = 4, 2, 3
  820. n_classes = 3
  821. X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]])
  822. y = np.array([0, 1, 2, 0], dtype=np.float64)
  823. lbin = LabelBinarizer()
  824. Y_bin = lbin.fit_transform(y)
  825. weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]])
  826. intercept = np.array([1.0, 0, -0.2])
  827. sample_weights = np.array([0.8, 1, 1, 0.8])
  828. prediction = np.dot(X, weights) + intercept
  829. logsumexp_prediction = logsumexp(prediction, axis=1)
  830. p = prediction - logsumexp_prediction[:, np.newaxis]
  831. loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum()
  832. diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin)
  833. grad_1 = np.dot(X.T, diff)
  834. loss = LinearModelLoss(
  835. base_loss=HalfMultinomialLoss(n_classes=n_classes),
  836. fit_intercept=True,
  837. )
  838. weights_intercept = np.vstack((weights, intercept)).T
  839. loss_2, grad_2 = loss.loss_gradient(
  840. weights_intercept, X, y, l2_reg_strength=0.0, sample_weight=sample_weights
  841. )
  842. grad_2 = grad_2[:, :-1].T
  843. assert_almost_equal(loss_1, loss_2)
  844. assert_array_almost_equal(grad_1, grad_2)
  845. # ground truth
  846. loss_gt = 11.680360354325961
  847. grad_gt = np.array(
  848. [[-0.557487, -1.619151, +2.176638], [-0.903942, +5.258745, -4.354803]]
  849. )
  850. assert_almost_equal(loss_1, loss_gt)
  851. assert_array_almost_equal(grad_1, grad_gt)
  852. @pytest.mark.parametrize("solver", ["sag", "saga"])
  853. def test_sag_classifier_raises_error(solver):
  854. # Following #13316, the error handling behavior changed in cython sag. This
  855. # is simply a non-regression test to make sure numerical errors are
  856. # properly raised.
  857. # Train a classifier on a simple problem
  858. rng = np.random.RandomState(42)
  859. X, y = make_classification(random_state=rng)
  860. clf = LogisticRegression(solver=solver, random_state=rng, warm_start=True)
  861. clf.fit(X, y)
  862. # Trigger a numerical error by:
  863. # - corrupting the fitted coefficients of the classifier
  864. # - fit it again starting from its current state thanks to warm_start
  865. clf.coef_[:] = np.nan
  866. with pytest.raises(ValueError, match="Floating-point under-/overflow"):
  867. clf.fit(X, y)