test_sgd.py 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198
  1. import pickle
  2. from unittest.mock import Mock
  3. import joblib
  4. import numpy as np
  5. import pytest
  6. import scipy.sparse as sp
  7. from sklearn import datasets, linear_model, metrics
  8. from sklearn.base import clone, is_classifier
  9. from sklearn.exceptions import ConvergenceWarning
  10. from sklearn.kernel_approximation import Nystroem
  11. from sklearn.linear_model import _sgd_fast as sgd_fast
  12. from sklearn.linear_model import _stochastic_gradient
  13. from sklearn.model_selection import (
  14. RandomizedSearchCV,
  15. ShuffleSplit,
  16. StratifiedShuffleSplit,
  17. )
  18. from sklearn.pipeline import make_pipeline
  19. from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, scale
  20. from sklearn.svm import OneClassSVM
  21. from sklearn.utils._testing import (
  22. assert_allclose,
  23. assert_almost_equal,
  24. assert_array_almost_equal,
  25. assert_array_equal,
  26. ignore_warnings,
  27. )
  28. def _update_kwargs(kwargs):
  29. if "random_state" not in kwargs:
  30. kwargs["random_state"] = 42
  31. if "tol" not in kwargs:
  32. kwargs["tol"] = None
  33. if "max_iter" not in kwargs:
  34. kwargs["max_iter"] = 5
  35. class _SparseSGDClassifier(linear_model.SGDClassifier):
  36. def fit(self, X, y, *args, **kw):
  37. X = sp.csr_matrix(X)
  38. return super().fit(X, y, *args, **kw)
  39. def partial_fit(self, X, y, *args, **kw):
  40. X = sp.csr_matrix(X)
  41. return super().partial_fit(X, y, *args, **kw)
  42. def decision_function(self, X):
  43. X = sp.csr_matrix(X)
  44. return super().decision_function(X)
  45. def predict_proba(self, X):
  46. X = sp.csr_matrix(X)
  47. return super().predict_proba(X)
  48. class _SparseSGDRegressor(linear_model.SGDRegressor):
  49. def fit(self, X, y, *args, **kw):
  50. X = sp.csr_matrix(X)
  51. return linear_model.SGDRegressor.fit(self, X, y, *args, **kw)
  52. def partial_fit(self, X, y, *args, **kw):
  53. X = sp.csr_matrix(X)
  54. return linear_model.SGDRegressor.partial_fit(self, X, y, *args, **kw)
  55. def decision_function(self, X, *args, **kw):
  56. # XXX untested as of v0.22
  57. X = sp.csr_matrix(X)
  58. return linear_model.SGDRegressor.decision_function(self, X, *args, **kw)
  59. class _SparseSGDOneClassSVM(linear_model.SGDOneClassSVM):
  60. def fit(self, X, *args, **kw):
  61. X = sp.csr_matrix(X)
  62. return linear_model.SGDOneClassSVM.fit(self, X, *args, **kw)
  63. def partial_fit(self, X, *args, **kw):
  64. X = sp.csr_matrix(X)
  65. return linear_model.SGDOneClassSVM.partial_fit(self, X, *args, **kw)
  66. def decision_function(self, X, *args, **kw):
  67. X = sp.csr_matrix(X)
  68. return linear_model.SGDOneClassSVM.decision_function(self, X, *args, **kw)
  69. def SGDClassifier(**kwargs):
  70. _update_kwargs(kwargs)
  71. return linear_model.SGDClassifier(**kwargs)
  72. def SGDRegressor(**kwargs):
  73. _update_kwargs(kwargs)
  74. return linear_model.SGDRegressor(**kwargs)
  75. def SGDOneClassSVM(**kwargs):
  76. _update_kwargs(kwargs)
  77. return linear_model.SGDOneClassSVM(**kwargs)
  78. def SparseSGDClassifier(**kwargs):
  79. _update_kwargs(kwargs)
  80. return _SparseSGDClassifier(**kwargs)
  81. def SparseSGDRegressor(**kwargs):
  82. _update_kwargs(kwargs)
  83. return _SparseSGDRegressor(**kwargs)
  84. def SparseSGDOneClassSVM(**kwargs):
  85. _update_kwargs(kwargs)
  86. return _SparseSGDOneClassSVM(**kwargs)
  87. # Test Data
  88. # test sample 1
  89. X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
  90. Y = [1, 1, 1, 2, 2, 2]
  91. T = np.array([[-1, -1], [2, 2], [3, 2]])
  92. true_result = [1, 2, 2]
  93. # test sample 2; string class labels
  94. X2 = np.array(
  95. [
  96. [-1, 1],
  97. [-0.75, 0.5],
  98. [-1.5, 1.5],
  99. [1, 1],
  100. [0.75, 0.5],
  101. [1.5, 1.5],
  102. [-1, -1],
  103. [0, -0.5],
  104. [1, -1],
  105. ]
  106. )
  107. Y2 = ["one"] * 3 + ["two"] * 3 + ["three"] * 3
  108. T2 = np.array([[-1.5, 0.5], [1, 2], [0, -2]])
  109. true_result2 = ["one", "two", "three"]
  110. # test sample 3
  111. X3 = np.array(
  112. [
  113. [1, 1, 0, 0, 0, 0],
  114. [1, 1, 0, 0, 0, 0],
  115. [0, 0, 1, 0, 0, 0],
  116. [0, 0, 1, 0, 0, 0],
  117. [0, 0, 0, 0, 1, 1],
  118. [0, 0, 0, 0, 1, 1],
  119. [0, 0, 0, 1, 0, 0],
  120. [0, 0, 0, 1, 0, 0],
  121. ]
  122. )
  123. Y3 = np.array([1, 1, 1, 1, 2, 2, 2, 2])
  124. # test sample 4 - two more or less redundant feature groups
  125. X4 = np.array(
  126. [
  127. [1, 0.9, 0.8, 0, 0, 0],
  128. [1, 0.84, 0.98, 0, 0, 0],
  129. [1, 0.96, 0.88, 0, 0, 0],
  130. [1, 0.91, 0.99, 0, 0, 0],
  131. [0, 0, 0, 0.89, 0.91, 1],
  132. [0, 0, 0, 0.79, 0.84, 1],
  133. [0, 0, 0, 0.91, 0.95, 1],
  134. [0, 0, 0, 0.93, 1, 1],
  135. ]
  136. )
  137. Y4 = np.array([1, 1, 1, 1, 2, 2, 2, 2])
  138. iris = datasets.load_iris()
  139. # test sample 5 - test sample 1 as binary classification problem
  140. X5 = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
  141. Y5 = [1, 1, 1, 2, 2, 2]
  142. true_result5 = [0, 1, 1]
  143. ###############################################################################
  144. # Common Test Case to classification and regression
  145. # a simple implementation of ASGD to use for testing
  146. # uses squared loss to find the gradient
  147. def asgd(klass, X, y, eta, alpha, weight_init=None, intercept_init=0.0):
  148. if weight_init is None:
  149. weights = np.zeros(X.shape[1])
  150. else:
  151. weights = weight_init
  152. average_weights = np.zeros(X.shape[1])
  153. intercept = intercept_init
  154. average_intercept = 0.0
  155. decay = 1.0
  156. # sparse data has a fixed decay of .01
  157. if klass in (SparseSGDClassifier, SparseSGDRegressor):
  158. decay = 0.01
  159. for i, entry in enumerate(X):
  160. p = np.dot(entry, weights)
  161. p += intercept
  162. gradient = p - y[i]
  163. weights *= 1.0 - (eta * alpha)
  164. weights += -(eta * gradient * entry)
  165. intercept += -(eta * gradient) * decay
  166. average_weights *= i
  167. average_weights += weights
  168. average_weights /= i + 1.0
  169. average_intercept *= i
  170. average_intercept += intercept
  171. average_intercept /= i + 1.0
  172. return average_weights, average_intercept
  173. def _test_warm_start(klass, X, Y, lr):
  174. # Test that explicit warm restart...
  175. clf = klass(alpha=0.01, eta0=0.01, shuffle=False, learning_rate=lr)
  176. clf.fit(X, Y)
  177. clf2 = klass(alpha=0.001, eta0=0.01, shuffle=False, learning_rate=lr)
  178. clf2.fit(X, Y, coef_init=clf.coef_.copy(), intercept_init=clf.intercept_.copy())
  179. # ... and implicit warm restart are equivalent.
  180. clf3 = klass(
  181. alpha=0.01, eta0=0.01, shuffle=False, warm_start=True, learning_rate=lr
  182. )
  183. clf3.fit(X, Y)
  184. assert clf3.t_ == clf.t_
  185. assert_array_almost_equal(clf3.coef_, clf.coef_)
  186. clf3.set_params(alpha=0.001)
  187. clf3.fit(X, Y)
  188. assert clf3.t_ == clf2.t_
  189. assert_array_almost_equal(clf3.coef_, clf2.coef_)
  190. @pytest.mark.parametrize(
  191. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  192. )
  193. @pytest.mark.parametrize("lr", ["constant", "optimal", "invscaling", "adaptive"])
  194. def test_warm_start(klass, lr):
  195. _test_warm_start(klass, X, Y, lr)
  196. @pytest.mark.parametrize(
  197. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  198. )
  199. def test_input_format(klass):
  200. # Input format tests.
  201. clf = klass(alpha=0.01, shuffle=False)
  202. clf.fit(X, Y)
  203. Y_ = np.array(Y)[:, np.newaxis]
  204. Y_ = np.c_[Y_, Y_]
  205. with pytest.raises(ValueError):
  206. clf.fit(X, Y_)
  207. @pytest.mark.parametrize(
  208. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  209. )
  210. def test_clone(klass):
  211. # Test whether clone works ok.
  212. clf = klass(alpha=0.01, penalty="l1")
  213. clf = clone(clf)
  214. clf.set_params(penalty="l2")
  215. clf.fit(X, Y)
  216. clf2 = klass(alpha=0.01, penalty="l2")
  217. clf2.fit(X, Y)
  218. assert_array_equal(clf.coef_, clf2.coef_)
  219. @pytest.mark.parametrize(
  220. "klass",
  221. [
  222. SGDClassifier,
  223. SparseSGDClassifier,
  224. SGDRegressor,
  225. SparseSGDRegressor,
  226. SGDOneClassSVM,
  227. SparseSGDOneClassSVM,
  228. ],
  229. )
  230. def test_plain_has_no_average_attr(klass):
  231. clf = klass(average=True, eta0=0.01)
  232. clf.fit(X, Y)
  233. assert hasattr(clf, "_average_coef")
  234. assert hasattr(clf, "_average_intercept")
  235. assert hasattr(clf, "_standard_intercept")
  236. assert hasattr(clf, "_standard_coef")
  237. clf = klass()
  238. clf.fit(X, Y)
  239. assert not hasattr(clf, "_average_coef")
  240. assert not hasattr(clf, "_average_intercept")
  241. assert not hasattr(clf, "_standard_intercept")
  242. assert not hasattr(clf, "_standard_coef")
  243. @pytest.mark.parametrize(
  244. "klass",
  245. [
  246. SGDClassifier,
  247. SparseSGDClassifier,
  248. SGDRegressor,
  249. SparseSGDRegressor,
  250. SGDOneClassSVM,
  251. SparseSGDOneClassSVM,
  252. ],
  253. )
  254. def test_late_onset_averaging_not_reached(klass):
  255. clf1 = klass(average=600)
  256. clf2 = klass()
  257. for _ in range(100):
  258. if is_classifier(clf1):
  259. clf1.partial_fit(X, Y, classes=np.unique(Y))
  260. clf2.partial_fit(X, Y, classes=np.unique(Y))
  261. else:
  262. clf1.partial_fit(X, Y)
  263. clf2.partial_fit(X, Y)
  264. assert_array_almost_equal(clf1.coef_, clf2.coef_, decimal=16)
  265. if klass in [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]:
  266. assert_almost_equal(clf1.intercept_, clf2.intercept_, decimal=16)
  267. elif klass in [SGDOneClassSVM, SparseSGDOneClassSVM]:
  268. assert_allclose(clf1.offset_, clf2.offset_)
  269. @pytest.mark.parametrize(
  270. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  271. )
  272. def test_late_onset_averaging_reached(klass):
  273. eta0 = 0.001
  274. alpha = 0.0001
  275. Y_encode = np.array(Y)
  276. Y_encode[Y_encode == 1] = -1.0
  277. Y_encode[Y_encode == 2] = 1.0
  278. clf1 = klass(
  279. average=7,
  280. learning_rate="constant",
  281. loss="squared_error",
  282. eta0=eta0,
  283. alpha=alpha,
  284. max_iter=2,
  285. shuffle=False,
  286. )
  287. clf2 = klass(
  288. average=0,
  289. learning_rate="constant",
  290. loss="squared_error",
  291. eta0=eta0,
  292. alpha=alpha,
  293. max_iter=1,
  294. shuffle=False,
  295. )
  296. clf1.fit(X, Y_encode)
  297. clf2.fit(X, Y_encode)
  298. average_weights, average_intercept = asgd(
  299. klass,
  300. X,
  301. Y_encode,
  302. eta0,
  303. alpha,
  304. weight_init=clf2.coef_.ravel(),
  305. intercept_init=clf2.intercept_,
  306. )
  307. assert_array_almost_equal(clf1.coef_.ravel(), average_weights.ravel(), decimal=16)
  308. assert_almost_equal(clf1.intercept_, average_intercept, decimal=16)
  309. @pytest.mark.parametrize(
  310. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  311. )
  312. def test_early_stopping(klass):
  313. X = iris.data[iris.target > 0]
  314. Y = iris.target[iris.target > 0]
  315. for early_stopping in [True, False]:
  316. max_iter = 1000
  317. clf = klass(early_stopping=early_stopping, tol=1e-3, max_iter=max_iter).fit(
  318. X, Y
  319. )
  320. assert clf.n_iter_ < max_iter
  321. @pytest.mark.parametrize(
  322. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  323. )
  324. def test_adaptive_longer_than_constant(klass):
  325. clf1 = klass(learning_rate="adaptive", eta0=0.01, tol=1e-3, max_iter=100)
  326. clf1.fit(iris.data, iris.target)
  327. clf2 = klass(learning_rate="constant", eta0=0.01, tol=1e-3, max_iter=100)
  328. clf2.fit(iris.data, iris.target)
  329. assert clf1.n_iter_ > clf2.n_iter_
  330. @pytest.mark.parametrize(
  331. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  332. )
  333. def test_validation_set_not_used_for_training(klass):
  334. X, Y = iris.data, iris.target
  335. validation_fraction = 0.4
  336. seed = 42
  337. shuffle = False
  338. max_iter = 10
  339. clf1 = klass(
  340. early_stopping=True,
  341. random_state=np.random.RandomState(seed),
  342. validation_fraction=validation_fraction,
  343. learning_rate="constant",
  344. eta0=0.01,
  345. tol=None,
  346. max_iter=max_iter,
  347. shuffle=shuffle,
  348. )
  349. clf1.fit(X, Y)
  350. assert clf1.n_iter_ == max_iter
  351. clf2 = klass(
  352. early_stopping=False,
  353. random_state=np.random.RandomState(seed),
  354. learning_rate="constant",
  355. eta0=0.01,
  356. tol=None,
  357. max_iter=max_iter,
  358. shuffle=shuffle,
  359. )
  360. if is_classifier(clf2):
  361. cv = StratifiedShuffleSplit(test_size=validation_fraction, random_state=seed)
  362. else:
  363. cv = ShuffleSplit(test_size=validation_fraction, random_state=seed)
  364. idx_train, idx_val = next(cv.split(X, Y))
  365. idx_train = np.sort(idx_train) # remove shuffling
  366. clf2.fit(X[idx_train], Y[idx_train])
  367. assert clf2.n_iter_ == max_iter
  368. assert_array_equal(clf1.coef_, clf2.coef_)
  369. @pytest.mark.parametrize(
  370. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  371. )
  372. def test_n_iter_no_change(klass):
  373. X, Y = iris.data, iris.target
  374. # test that n_iter_ increases monotonically with n_iter_no_change
  375. for early_stopping in [True, False]:
  376. n_iter_list = [
  377. klass(
  378. early_stopping=early_stopping,
  379. n_iter_no_change=n_iter_no_change,
  380. tol=1e-4,
  381. max_iter=1000,
  382. )
  383. .fit(X, Y)
  384. .n_iter_
  385. for n_iter_no_change in [2, 3, 10]
  386. ]
  387. assert_array_equal(n_iter_list, sorted(n_iter_list))
  388. @pytest.mark.parametrize(
  389. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  390. )
  391. def test_not_enough_sample_for_early_stopping(klass):
  392. # test an error is raised if the training or validation set is empty
  393. clf = klass(early_stopping=True, validation_fraction=0.99)
  394. with pytest.raises(ValueError):
  395. clf.fit(X3, Y3)
  396. ###############################################################################
  397. # Classification Test Case
  398. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  399. def test_sgd_clf(klass):
  400. # Check that SGD gives any results :-)
  401. for loss in ("hinge", "squared_hinge", "log_loss", "modified_huber"):
  402. clf = klass(
  403. penalty="l2",
  404. alpha=0.01,
  405. fit_intercept=True,
  406. loss=loss,
  407. max_iter=10,
  408. shuffle=True,
  409. )
  410. clf.fit(X, Y)
  411. # assert_almost_equal(clf.coef_[0], clf.coef_[1], decimal=7)
  412. assert_array_equal(clf.predict(T), true_result)
  413. @pytest.mark.parametrize(
  414. "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
  415. )
  416. def test_provide_coef(klass):
  417. """Check that the shape of `coef_init` is validated."""
  418. with pytest.raises(ValueError, match="Provided coef_init does not match dataset"):
  419. klass().fit(X, Y, coef_init=np.zeros((3,)))
  420. @pytest.mark.parametrize(
  421. "klass, fit_params",
  422. [
  423. (SGDClassifier, {"intercept_init": np.zeros((3,))}),
  424. (SparseSGDClassifier, {"intercept_init": np.zeros((3,))}),
  425. (SGDOneClassSVM, {"offset_init": np.zeros((3,))}),
  426. (SparseSGDOneClassSVM, {"offset_init": np.zeros((3,))}),
  427. ],
  428. )
  429. def test_set_intercept_offset(klass, fit_params):
  430. """Check that `intercept_init` or `offset_init` is validated."""
  431. sgd_estimator = klass()
  432. with pytest.raises(ValueError, match="does not match dataset"):
  433. sgd_estimator.fit(X, Y, **fit_params)
  434. @pytest.mark.parametrize(
  435. "klass", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
  436. )
  437. def test_sgd_early_stopping_with_partial_fit(klass):
  438. """Check that we raise an error for `early_stopping` used with
  439. `partial_fit`.
  440. """
  441. err_msg = "early_stopping should be False with partial_fit"
  442. with pytest.raises(ValueError, match=err_msg):
  443. klass(early_stopping=True).partial_fit(X, Y)
  444. @pytest.mark.parametrize(
  445. "klass, fit_params",
  446. [
  447. (SGDClassifier, {"intercept_init": 0}),
  448. (SparseSGDClassifier, {"intercept_init": 0}),
  449. (SGDOneClassSVM, {"offset_init": 0}),
  450. (SparseSGDOneClassSVM, {"offset_init": 0}),
  451. ],
  452. )
  453. def test_set_intercept_offset_binary(klass, fit_params):
  454. """Check that we can pass a scaler with binary classification to
  455. `intercept_init` or `offset_init`."""
  456. klass().fit(X5, Y5, **fit_params)
  457. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  458. def test_average_binary_computed_correctly(klass):
  459. # Checks the SGDClassifier correctly computes the average weights
  460. eta = 0.1
  461. alpha = 2.0
  462. n_samples = 20
  463. n_features = 10
  464. rng = np.random.RandomState(0)
  465. X = rng.normal(size=(n_samples, n_features))
  466. w = rng.normal(size=n_features)
  467. clf = klass(
  468. loss="squared_error",
  469. learning_rate="constant",
  470. eta0=eta,
  471. alpha=alpha,
  472. fit_intercept=True,
  473. max_iter=1,
  474. average=True,
  475. shuffle=False,
  476. )
  477. # simple linear function without noise
  478. y = np.dot(X, w)
  479. y = np.sign(y)
  480. clf.fit(X, y)
  481. average_weights, average_intercept = asgd(klass, X, y, eta, alpha)
  482. average_weights = average_weights.reshape(1, -1)
  483. assert_array_almost_equal(clf.coef_, average_weights, decimal=14)
  484. assert_almost_equal(clf.intercept_, average_intercept, decimal=14)
  485. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  486. def test_set_intercept_to_intercept(klass):
  487. # Checks intercept_ shape consistency for the warm starts
  488. # Inconsistent intercept_ shape.
  489. clf = klass().fit(X5, Y5)
  490. klass().fit(X5, Y5, intercept_init=clf.intercept_)
  491. clf = klass().fit(X, Y)
  492. klass().fit(X, Y, intercept_init=clf.intercept_)
  493. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  494. def test_sgd_at_least_two_labels(klass):
  495. # Target must have at least two labels
  496. clf = klass(alpha=0.01, max_iter=20)
  497. with pytest.raises(ValueError):
  498. clf.fit(X2, np.ones(9))
  499. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  500. def test_partial_fit_weight_class_balanced(klass):
  501. # partial_fit with class_weight='balanced' not supported"""
  502. regex = (
  503. r"class_weight 'balanced' is not supported for "
  504. r"partial_fit\. In order to use 'balanced' weights, "
  505. r"use compute_class_weight\('balanced', classes=classes, y=y\). "
  506. r"In place of y you can use a large enough sample "
  507. r"of the full training set target to properly "
  508. r"estimate the class frequency distributions\. "
  509. r"Pass the resulting weights as the class_weight "
  510. r"parameter\."
  511. )
  512. with pytest.raises(ValueError, match=regex):
  513. klass(class_weight="balanced").partial_fit(X, Y, classes=np.unique(Y))
  514. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  515. def test_sgd_multiclass(klass):
  516. # Multi-class test case
  517. clf = klass(alpha=0.01, max_iter=20).fit(X2, Y2)
  518. assert clf.coef_.shape == (3, 2)
  519. assert clf.intercept_.shape == (3,)
  520. assert clf.decision_function([[0, 0]]).shape == (1, 3)
  521. pred = clf.predict(T2)
  522. assert_array_equal(pred, true_result2)
  523. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  524. def test_sgd_multiclass_average(klass):
  525. eta = 0.001
  526. alpha = 0.01
  527. # Multi-class average test case
  528. clf = klass(
  529. loss="squared_error",
  530. learning_rate="constant",
  531. eta0=eta,
  532. alpha=alpha,
  533. fit_intercept=True,
  534. max_iter=1,
  535. average=True,
  536. shuffle=False,
  537. )
  538. np_Y2 = np.array(Y2)
  539. clf.fit(X2, np_Y2)
  540. classes = np.unique(np_Y2)
  541. for i, cl in enumerate(classes):
  542. y_i = np.ones(np_Y2.shape[0])
  543. y_i[np_Y2 != cl] = -1
  544. average_coef, average_intercept = asgd(klass, X2, y_i, eta, alpha)
  545. assert_array_almost_equal(average_coef, clf.coef_[i], decimal=16)
  546. assert_almost_equal(average_intercept, clf.intercept_[i], decimal=16)
  547. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  548. def test_sgd_multiclass_with_init_coef(klass):
  549. # Multi-class test case
  550. clf = klass(alpha=0.01, max_iter=20)
  551. clf.fit(X2, Y2, coef_init=np.zeros((3, 2)), intercept_init=np.zeros(3))
  552. assert clf.coef_.shape == (3, 2)
  553. assert clf.intercept_.shape, (3,)
  554. pred = clf.predict(T2)
  555. assert_array_equal(pred, true_result2)
  556. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  557. def test_sgd_multiclass_njobs(klass):
  558. # Multi-class test case with multi-core support
  559. clf = klass(alpha=0.01, max_iter=20, n_jobs=2).fit(X2, Y2)
  560. assert clf.coef_.shape == (3, 2)
  561. assert clf.intercept_.shape == (3,)
  562. assert clf.decision_function([[0, 0]]).shape == (1, 3)
  563. pred = clf.predict(T2)
  564. assert_array_equal(pred, true_result2)
  565. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  566. def test_set_coef_multiclass(klass):
  567. # Checks coef_init and intercept_init shape for multi-class
  568. # problems
  569. # Provided coef_ does not match dataset
  570. clf = klass()
  571. with pytest.raises(ValueError):
  572. clf.fit(X2, Y2, coef_init=np.zeros((2, 2)))
  573. # Provided coef_ does match dataset
  574. clf = klass().fit(X2, Y2, coef_init=np.zeros((3, 2)))
  575. # Provided intercept_ does not match dataset
  576. clf = klass()
  577. with pytest.raises(ValueError):
  578. clf.fit(X2, Y2, intercept_init=np.zeros((1,)))
  579. # Provided intercept_ does match dataset.
  580. clf = klass().fit(X2, Y2, intercept_init=np.zeros((3,)))
  581. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  582. def test_sgd_predict_proba_method_access(klass):
  583. # Checks that SGDClassifier predict_proba and predict_log_proba methods
  584. # can either be accessed or raise an appropriate error message
  585. # otherwise. See
  586. # https://github.com/scikit-learn/scikit-learn/issues/10938 for more
  587. # details.
  588. for loss in linear_model.SGDClassifier.loss_functions:
  589. clf = SGDClassifier(loss=loss)
  590. if loss in ("log_loss", "modified_huber"):
  591. assert hasattr(clf, "predict_proba")
  592. assert hasattr(clf, "predict_log_proba")
  593. else:
  594. message = "probability estimates are not available for loss={!r}".format(
  595. loss
  596. )
  597. assert not hasattr(clf, "predict_proba")
  598. assert not hasattr(clf, "predict_log_proba")
  599. with pytest.raises(AttributeError, match=message):
  600. clf.predict_proba
  601. with pytest.raises(AttributeError, match=message):
  602. clf.predict_log_proba
  603. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  604. def test_sgd_proba(klass):
  605. # Check SGD.predict_proba
  606. # Hinge loss does not allow for conditional prob estimate.
  607. # We cannot use the factory here, because it defines predict_proba
  608. # anyway.
  609. clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=10, tol=None).fit(X, Y)
  610. assert not hasattr(clf, "predict_proba")
  611. assert not hasattr(clf, "predict_log_proba")
  612. # log and modified_huber losses can output probability estimates
  613. # binary case
  614. for loss in ["log_loss", "modified_huber"]:
  615. clf = klass(loss=loss, alpha=0.01, max_iter=10)
  616. clf.fit(X, Y)
  617. p = clf.predict_proba([[3, 2]])
  618. assert p[0, 1] > 0.5
  619. p = clf.predict_proba([[-1, -1]])
  620. assert p[0, 1] < 0.5
  621. p = clf.predict_log_proba([[3, 2]])
  622. assert p[0, 1] > p[0, 0]
  623. p = clf.predict_log_proba([[-1, -1]])
  624. assert p[0, 1] < p[0, 0]
  625. # log loss multiclass probability estimates
  626. clf = klass(loss="log_loss", alpha=0.01, max_iter=10).fit(X2, Y2)
  627. d = clf.decision_function([[0.1, -0.1], [0.3, 0.2]])
  628. p = clf.predict_proba([[0.1, -0.1], [0.3, 0.2]])
  629. assert_array_equal(np.argmax(p, axis=1), np.argmax(d, axis=1))
  630. assert_almost_equal(p[0].sum(), 1)
  631. assert np.all(p[0] >= 0)
  632. p = clf.predict_proba([[-1, -1]])
  633. d = clf.decision_function([[-1, -1]])
  634. assert_array_equal(np.argsort(p[0]), np.argsort(d[0]))
  635. lp = clf.predict_log_proba([[3, 2]])
  636. p = clf.predict_proba([[3, 2]])
  637. assert_array_almost_equal(np.log(p), lp)
  638. lp = clf.predict_log_proba([[-1, -1]])
  639. p = clf.predict_proba([[-1, -1]])
  640. assert_array_almost_equal(np.log(p), lp)
  641. # Modified Huber multiclass probability estimates; requires a separate
  642. # test because the hard zero/one probabilities may destroy the
  643. # ordering present in decision_function output.
  644. clf = klass(loss="modified_huber", alpha=0.01, max_iter=10)
  645. clf.fit(X2, Y2)
  646. d = clf.decision_function([[3, 2]])
  647. p = clf.predict_proba([[3, 2]])
  648. if klass != SparseSGDClassifier:
  649. assert np.argmax(d, axis=1) == np.argmax(p, axis=1)
  650. else: # XXX the sparse test gets a different X2 (?)
  651. assert np.argmin(d, axis=1) == np.argmin(p, axis=1)
  652. # the following sample produces decision_function values < -1,
  653. # which would cause naive normalization to fail (see comment
  654. # in SGDClassifier.predict_proba)
  655. x = X.mean(axis=0)
  656. d = clf.decision_function([x])
  657. if np.all(d < -1): # XXX not true in sparse test case (why?)
  658. p = clf.predict_proba([x])
  659. assert_array_almost_equal(p[0], [1 / 3.0] * 3)
  660. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  661. def test_sgd_l1(klass):
  662. # Test L1 regularization
  663. n = len(X4)
  664. rng = np.random.RandomState(13)
  665. idx = np.arange(n)
  666. rng.shuffle(idx)
  667. X = X4[idx, :]
  668. Y = Y4[idx]
  669. clf = klass(
  670. penalty="l1",
  671. alpha=0.2,
  672. fit_intercept=False,
  673. max_iter=2000,
  674. tol=None,
  675. shuffle=False,
  676. )
  677. clf.fit(X, Y)
  678. assert_array_equal(clf.coef_[0, 1:-1], np.zeros((4,)))
  679. pred = clf.predict(X)
  680. assert_array_equal(pred, Y)
  681. # test sparsify with dense inputs
  682. clf.sparsify()
  683. assert sp.issparse(clf.coef_)
  684. pred = clf.predict(X)
  685. assert_array_equal(pred, Y)
  686. # pickle and unpickle with sparse coef_
  687. clf = pickle.loads(pickle.dumps(clf))
  688. assert sp.issparse(clf.coef_)
  689. pred = clf.predict(X)
  690. assert_array_equal(pred, Y)
  691. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  692. def test_class_weights(klass):
  693. # Test class weights.
  694. X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
  695. y = [1, 1, 1, -1, -1]
  696. clf = klass(alpha=0.1, max_iter=1000, fit_intercept=False, class_weight=None)
  697. clf.fit(X, y)
  698. assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))
  699. # we give a small weights to class 1
  700. clf = klass(alpha=0.1, max_iter=1000, fit_intercept=False, class_weight={1: 0.001})
  701. clf.fit(X, y)
  702. # now the hyperplane should rotate clock-wise and
  703. # the prediction on this point should shift
  704. assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
  705. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  706. def test_equal_class_weight(klass):
  707. # Test if equal class weights approx. equals no class weights.
  708. X = [[1, 0], [1, 0], [0, 1], [0, 1]]
  709. y = [0, 0, 1, 1]
  710. clf = klass(alpha=0.1, max_iter=1000, class_weight=None)
  711. clf.fit(X, y)
  712. X = [[1, 0], [0, 1]]
  713. y = [0, 1]
  714. clf_weighted = klass(alpha=0.1, max_iter=1000, class_weight={0: 0.5, 1: 0.5})
  715. clf_weighted.fit(X, y)
  716. # should be similar up to some epsilon due to learning rate schedule
  717. assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
  718. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  719. def test_wrong_class_weight_label(klass):
  720. # ValueError due to not existing class label.
  721. clf = klass(alpha=0.1, max_iter=1000, class_weight={0: 0.5})
  722. with pytest.raises(ValueError):
  723. clf.fit(X, Y)
  724. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  725. def test_weights_multiplied(klass):
  726. # Tests that class_weight and sample_weight are multiplicative
  727. class_weights = {1: 0.6, 2: 0.3}
  728. rng = np.random.RandomState(0)
  729. sample_weights = rng.random_sample(Y4.shape[0])
  730. multiplied_together = np.copy(sample_weights)
  731. multiplied_together[Y4 == 1] *= class_weights[1]
  732. multiplied_together[Y4 == 2] *= class_weights[2]
  733. clf1 = klass(alpha=0.1, max_iter=20, class_weight=class_weights)
  734. clf2 = klass(alpha=0.1, max_iter=20)
  735. clf1.fit(X4, Y4, sample_weight=sample_weights)
  736. clf2.fit(X4, Y4, sample_weight=multiplied_together)
  737. assert_almost_equal(clf1.coef_, clf2.coef_)
  738. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  739. def test_balanced_weight(klass):
  740. # Test class weights for imbalanced data"""
  741. # compute reference metrics on iris dataset that is quite balanced by
  742. # default
  743. X, y = iris.data, iris.target
  744. X = scale(X)
  745. idx = np.arange(X.shape[0])
  746. rng = np.random.RandomState(6)
  747. rng.shuffle(idx)
  748. X = X[idx]
  749. y = y[idx]
  750. clf = klass(alpha=0.0001, max_iter=1000, class_weight=None, shuffle=False).fit(X, y)
  751. f1 = metrics.f1_score(y, clf.predict(X), average="weighted")
  752. assert_almost_equal(f1, 0.96, decimal=1)
  753. # make the same prediction using balanced class_weight
  754. clf_balanced = klass(
  755. alpha=0.0001, max_iter=1000, class_weight="balanced", shuffle=False
  756. ).fit(X, y)
  757. f1 = metrics.f1_score(y, clf_balanced.predict(X), average="weighted")
  758. assert_almost_equal(f1, 0.96, decimal=1)
  759. # Make sure that in the balanced case it does not change anything
  760. # to use "balanced"
  761. assert_array_almost_equal(clf.coef_, clf_balanced.coef_, 6)
  762. # build an very very imbalanced dataset out of iris data
  763. X_0 = X[y == 0, :]
  764. y_0 = y[y == 0]
  765. X_imbalanced = np.vstack([X] + [X_0] * 10)
  766. y_imbalanced = np.concatenate([y] + [y_0] * 10)
  767. # fit a model on the imbalanced data without class weight info
  768. clf = klass(max_iter=1000, class_weight=None, shuffle=False)
  769. clf.fit(X_imbalanced, y_imbalanced)
  770. y_pred = clf.predict(X)
  771. assert metrics.f1_score(y, y_pred, average="weighted") < 0.96
  772. # fit a model with balanced class_weight enabled
  773. clf = klass(max_iter=1000, class_weight="balanced", shuffle=False)
  774. clf.fit(X_imbalanced, y_imbalanced)
  775. y_pred = clf.predict(X)
  776. assert metrics.f1_score(y, y_pred, average="weighted") > 0.96
  777. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  778. def test_sample_weights(klass):
  779. # Test weights on individual samples
  780. X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
  781. y = [1, 1, 1, -1, -1]
  782. clf = klass(alpha=0.1, max_iter=1000, fit_intercept=False)
  783. clf.fit(X, y)
  784. assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))
  785. # we give a small weights to class 1
  786. clf.fit(X, y, sample_weight=[0.001] * 3 + [1] * 2)
  787. # now the hyperplane should rotate clock-wise and
  788. # the prediction on this point should shift
  789. assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
  790. @pytest.mark.parametrize(
  791. "klass", [SGDClassifier, SparseSGDClassifier, SGDOneClassSVM, SparseSGDOneClassSVM]
  792. )
  793. def test_wrong_sample_weights(klass):
  794. # Test if ValueError is raised if sample_weight has wrong shape
  795. if klass in [SGDClassifier, SparseSGDClassifier]:
  796. clf = klass(alpha=0.1, max_iter=1000, fit_intercept=False)
  797. elif klass in [SGDOneClassSVM, SparseSGDOneClassSVM]:
  798. clf = klass(nu=0.1, max_iter=1000, fit_intercept=False)
  799. # provided sample_weight too long
  800. with pytest.raises(ValueError):
  801. clf.fit(X, Y, sample_weight=np.arange(7))
  802. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  803. def test_partial_fit_exception(klass):
  804. clf = klass(alpha=0.01)
  805. # classes was not specified
  806. with pytest.raises(ValueError):
  807. clf.partial_fit(X3, Y3)
  808. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  809. def test_partial_fit_binary(klass):
  810. third = X.shape[0] // 3
  811. clf = klass(alpha=0.01)
  812. classes = np.unique(Y)
  813. clf.partial_fit(X[:third], Y[:third], classes=classes)
  814. assert clf.coef_.shape == (1, X.shape[1])
  815. assert clf.intercept_.shape == (1,)
  816. assert clf.decision_function([[0, 0]]).shape == (1,)
  817. id1 = id(clf.coef_.data)
  818. clf.partial_fit(X[third:], Y[third:])
  819. id2 = id(clf.coef_.data)
  820. # check that coef_ haven't been re-allocated
  821. assert id1, id2
  822. y_pred = clf.predict(T)
  823. assert_array_equal(y_pred, true_result)
  824. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  825. def test_partial_fit_multiclass(klass):
  826. third = X2.shape[0] // 3
  827. clf = klass(alpha=0.01)
  828. classes = np.unique(Y2)
  829. clf.partial_fit(X2[:third], Y2[:third], classes=classes)
  830. assert clf.coef_.shape == (3, X2.shape[1])
  831. assert clf.intercept_.shape == (3,)
  832. assert clf.decision_function([[0, 0]]).shape == (1, 3)
  833. id1 = id(clf.coef_.data)
  834. clf.partial_fit(X2[third:], Y2[third:])
  835. id2 = id(clf.coef_.data)
  836. # check that coef_ haven't been re-allocated
  837. assert id1, id2
  838. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  839. def test_partial_fit_multiclass_average(klass):
  840. third = X2.shape[0] // 3
  841. clf = klass(alpha=0.01, average=X2.shape[0])
  842. classes = np.unique(Y2)
  843. clf.partial_fit(X2[:third], Y2[:third], classes=classes)
  844. assert clf.coef_.shape == (3, X2.shape[1])
  845. assert clf.intercept_.shape == (3,)
  846. clf.partial_fit(X2[third:], Y2[third:])
  847. assert clf.coef_.shape == (3, X2.shape[1])
  848. assert clf.intercept_.shape == (3,)
  849. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  850. def test_fit_then_partial_fit(klass):
  851. # Partial_fit should work after initial fit in the multiclass case.
  852. # Non-regression test for #2496; fit would previously produce a
  853. # Fortran-ordered coef_ that subsequent partial_fit couldn't handle.
  854. clf = klass()
  855. clf.fit(X2, Y2)
  856. clf.partial_fit(X2, Y2) # no exception here
  857. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  858. @pytest.mark.parametrize("lr", ["constant", "optimal", "invscaling", "adaptive"])
  859. def test_partial_fit_equal_fit_classif(klass, lr):
  860. for X_, Y_, T_ in ((X, Y, T), (X2, Y2, T2)):
  861. clf = klass(alpha=0.01, eta0=0.01, max_iter=2, learning_rate=lr, shuffle=False)
  862. clf.fit(X_, Y_)
  863. y_pred = clf.decision_function(T_)
  864. t = clf.t_
  865. classes = np.unique(Y_)
  866. clf = klass(alpha=0.01, eta0=0.01, learning_rate=lr, shuffle=False)
  867. for i in range(2):
  868. clf.partial_fit(X_, Y_, classes=classes)
  869. y_pred2 = clf.decision_function(T_)
  870. assert clf.t_ == t
  871. assert_array_almost_equal(y_pred, y_pred2, decimal=2)
  872. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  873. def test_regression_losses(klass):
  874. random_state = np.random.RandomState(1)
  875. clf = klass(
  876. alpha=0.01,
  877. learning_rate="constant",
  878. eta0=0.1,
  879. loss="epsilon_insensitive",
  880. random_state=random_state,
  881. )
  882. clf.fit(X, Y)
  883. assert 1.0 == np.mean(clf.predict(X) == Y)
  884. clf = klass(
  885. alpha=0.01,
  886. learning_rate="constant",
  887. eta0=0.1,
  888. loss="squared_epsilon_insensitive",
  889. random_state=random_state,
  890. )
  891. clf.fit(X, Y)
  892. assert 1.0 == np.mean(clf.predict(X) == Y)
  893. clf = klass(alpha=0.01, loss="huber", random_state=random_state)
  894. clf.fit(X, Y)
  895. assert 1.0 == np.mean(clf.predict(X) == Y)
  896. clf = klass(
  897. alpha=0.01,
  898. learning_rate="constant",
  899. eta0=0.01,
  900. loss="squared_error",
  901. random_state=random_state,
  902. )
  903. clf.fit(X, Y)
  904. assert 1.0 == np.mean(clf.predict(X) == Y)
  905. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  906. def test_warm_start_multiclass(klass):
  907. _test_warm_start(klass, X2, Y2, "optimal")
  908. @pytest.mark.parametrize("klass", [SGDClassifier, SparseSGDClassifier])
  909. def test_multiple_fit(klass):
  910. # Test multiple calls of fit w/ different shaped inputs.
  911. clf = klass(alpha=0.01, shuffle=False)
  912. clf.fit(X, Y)
  913. assert hasattr(clf, "coef_")
  914. # Non-regression test: try fitting with a different label set.
  915. y = [["ham", "spam"][i] for i in LabelEncoder().fit_transform(Y)]
  916. clf.fit(X[:, :-1], y)
  917. ###############################################################################
  918. # Regression Test Case
  919. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  920. def test_sgd_reg(klass):
  921. # Check that SGD gives any results.
  922. clf = klass(alpha=0.1, max_iter=2, fit_intercept=False)
  923. clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
  924. assert clf.coef_[0] == clf.coef_[1]
  925. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  926. def test_sgd_averaged_computed_correctly(klass):
  927. # Tests the average regressor matches the naive implementation
  928. eta = 0.001
  929. alpha = 0.01
  930. n_samples = 20
  931. n_features = 10
  932. rng = np.random.RandomState(0)
  933. X = rng.normal(size=(n_samples, n_features))
  934. w = rng.normal(size=n_features)
  935. # simple linear function without noise
  936. y = np.dot(X, w)
  937. clf = klass(
  938. loss="squared_error",
  939. learning_rate="constant",
  940. eta0=eta,
  941. alpha=alpha,
  942. fit_intercept=True,
  943. max_iter=1,
  944. average=True,
  945. shuffle=False,
  946. )
  947. clf.fit(X, y)
  948. average_weights, average_intercept = asgd(klass, X, y, eta, alpha)
  949. assert_array_almost_equal(clf.coef_, average_weights, decimal=16)
  950. assert_almost_equal(clf.intercept_, average_intercept, decimal=16)
  951. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  952. def test_sgd_averaged_partial_fit(klass):
  953. # Tests whether the partial fit yields the same average as the fit
  954. eta = 0.001
  955. alpha = 0.01
  956. n_samples = 20
  957. n_features = 10
  958. rng = np.random.RandomState(0)
  959. X = rng.normal(size=(n_samples, n_features))
  960. w = rng.normal(size=n_features)
  961. # simple linear function without noise
  962. y = np.dot(X, w)
  963. clf = klass(
  964. loss="squared_error",
  965. learning_rate="constant",
  966. eta0=eta,
  967. alpha=alpha,
  968. fit_intercept=True,
  969. max_iter=1,
  970. average=True,
  971. shuffle=False,
  972. )
  973. clf.partial_fit(X[: int(n_samples / 2)][:], y[: int(n_samples / 2)])
  974. clf.partial_fit(X[int(n_samples / 2) :][:], y[int(n_samples / 2) :])
  975. average_weights, average_intercept = asgd(klass, X, y, eta, alpha)
  976. assert_array_almost_equal(clf.coef_, average_weights, decimal=16)
  977. assert_almost_equal(clf.intercept_[0], average_intercept, decimal=16)
  978. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  979. def test_average_sparse(klass):
  980. # Checks the average weights on data with 0s
  981. eta = 0.001
  982. alpha = 0.01
  983. clf = klass(
  984. loss="squared_error",
  985. learning_rate="constant",
  986. eta0=eta,
  987. alpha=alpha,
  988. fit_intercept=True,
  989. max_iter=1,
  990. average=True,
  991. shuffle=False,
  992. )
  993. n_samples = Y3.shape[0]
  994. clf.partial_fit(X3[: int(n_samples / 2)][:], Y3[: int(n_samples / 2)])
  995. clf.partial_fit(X3[int(n_samples / 2) :][:], Y3[int(n_samples / 2) :])
  996. average_weights, average_intercept = asgd(klass, X3, Y3, eta, alpha)
  997. assert_array_almost_equal(clf.coef_, average_weights, decimal=16)
  998. assert_almost_equal(clf.intercept_, average_intercept, decimal=16)
  999. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  1000. def test_sgd_least_squares_fit(klass):
  1001. xmin, xmax = -5, 5
  1002. n_samples = 100
  1003. rng = np.random.RandomState(0)
  1004. X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
  1005. # simple linear function without noise
  1006. y = 0.5 * X.ravel()
  1007. clf = klass(loss="squared_error", alpha=0.1, max_iter=20, fit_intercept=False)
  1008. clf.fit(X, y)
  1009. score = clf.score(X, y)
  1010. assert score > 0.99
  1011. # simple linear function with noise
  1012. y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
  1013. clf = klass(loss="squared_error", alpha=0.1, max_iter=20, fit_intercept=False)
  1014. clf.fit(X, y)
  1015. score = clf.score(X, y)
  1016. assert score > 0.5
  1017. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  1018. def test_sgd_epsilon_insensitive(klass):
  1019. xmin, xmax = -5, 5
  1020. n_samples = 100
  1021. rng = np.random.RandomState(0)
  1022. X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
  1023. # simple linear function without noise
  1024. y = 0.5 * X.ravel()
  1025. clf = klass(
  1026. loss="epsilon_insensitive",
  1027. epsilon=0.01,
  1028. alpha=0.1,
  1029. max_iter=20,
  1030. fit_intercept=False,
  1031. )
  1032. clf.fit(X, y)
  1033. score = clf.score(X, y)
  1034. assert score > 0.99
  1035. # simple linear function with noise
  1036. y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
  1037. clf = klass(
  1038. loss="epsilon_insensitive",
  1039. epsilon=0.01,
  1040. alpha=0.1,
  1041. max_iter=20,
  1042. fit_intercept=False,
  1043. )
  1044. clf.fit(X, y)
  1045. score = clf.score(X, y)
  1046. assert score > 0.5
  1047. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  1048. def test_sgd_huber_fit(klass):
  1049. xmin, xmax = -5, 5
  1050. n_samples = 100
  1051. rng = np.random.RandomState(0)
  1052. X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
  1053. # simple linear function without noise
  1054. y = 0.5 * X.ravel()
  1055. clf = klass(loss="huber", epsilon=0.1, alpha=0.1, max_iter=20, fit_intercept=False)
  1056. clf.fit(X, y)
  1057. score = clf.score(X, y)
  1058. assert score > 0.99
  1059. # simple linear function with noise
  1060. y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
  1061. clf = klass(loss="huber", epsilon=0.1, alpha=0.1, max_iter=20, fit_intercept=False)
  1062. clf.fit(X, y)
  1063. score = clf.score(X, y)
  1064. assert score > 0.5
  1065. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  1066. def test_elasticnet_convergence(klass):
  1067. # Check that the SGD output is consistent with coordinate descent
  1068. n_samples, n_features = 1000, 5
  1069. rng = np.random.RandomState(0)
  1070. X = rng.randn(n_samples, n_features)
  1071. # ground_truth linear model that generate y from X and to which the
  1072. # models should converge if the regularizer would be set to 0.0
  1073. ground_truth_coef = rng.randn(n_features)
  1074. y = np.dot(X, ground_truth_coef)
  1075. # XXX: alpha = 0.1 seems to cause convergence problems
  1076. for alpha in [0.01, 0.001]:
  1077. for l1_ratio in [0.5, 0.8, 1.0]:
  1078. cd = linear_model.ElasticNet(
  1079. alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False
  1080. )
  1081. cd.fit(X, y)
  1082. sgd = klass(
  1083. penalty="elasticnet",
  1084. max_iter=50,
  1085. alpha=alpha,
  1086. l1_ratio=l1_ratio,
  1087. fit_intercept=False,
  1088. )
  1089. sgd.fit(X, y)
  1090. err_msg = (
  1091. "cd and sgd did not converge to comparable "
  1092. "results for alpha=%f and l1_ratio=%f" % (alpha, l1_ratio)
  1093. )
  1094. assert_almost_equal(cd.coef_, sgd.coef_, decimal=2, err_msg=err_msg)
  1095. @ignore_warnings
  1096. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  1097. def test_partial_fit(klass):
  1098. third = X.shape[0] // 3
  1099. clf = klass(alpha=0.01)
  1100. clf.partial_fit(X[:third], Y[:third])
  1101. assert clf.coef_.shape == (X.shape[1],)
  1102. assert clf.intercept_.shape == (1,)
  1103. assert clf.predict([[0, 0]]).shape == (1,)
  1104. id1 = id(clf.coef_.data)
  1105. clf.partial_fit(X[third:], Y[third:])
  1106. id2 = id(clf.coef_.data)
  1107. # check that coef_ haven't been re-allocated
  1108. assert id1, id2
  1109. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  1110. @pytest.mark.parametrize("lr", ["constant", "optimal", "invscaling", "adaptive"])
  1111. def test_partial_fit_equal_fit(klass, lr):
  1112. clf = klass(alpha=0.01, max_iter=2, eta0=0.01, learning_rate=lr, shuffle=False)
  1113. clf.fit(X, Y)
  1114. y_pred = clf.predict(T)
  1115. t = clf.t_
  1116. clf = klass(alpha=0.01, eta0=0.01, learning_rate=lr, shuffle=False)
  1117. for i in range(2):
  1118. clf.partial_fit(X, Y)
  1119. y_pred2 = clf.predict(T)
  1120. assert clf.t_ == t
  1121. assert_array_almost_equal(y_pred, y_pred2, decimal=2)
  1122. @pytest.mark.parametrize("klass", [SGDRegressor, SparseSGDRegressor])
  1123. def test_loss_function_epsilon(klass):
  1124. clf = klass(epsilon=0.9)
  1125. clf.set_params(epsilon=0.1)
  1126. assert clf.loss_functions["huber"][1] == 0.1
  1127. ###############################################################################
  1128. # SGD One Class SVM Test Case
  1129. # a simple implementation of ASGD to use for testing SGDOneClassSVM
  1130. def asgd_oneclass(klass, X, eta, nu, coef_init=None, offset_init=0.0):
  1131. if coef_init is None:
  1132. coef = np.zeros(X.shape[1])
  1133. else:
  1134. coef = coef_init
  1135. average_coef = np.zeros(X.shape[1])
  1136. offset = offset_init
  1137. intercept = 1 - offset
  1138. average_intercept = 0.0
  1139. decay = 1.0
  1140. # sparse data has a fixed decay of .01
  1141. if klass == SparseSGDOneClassSVM:
  1142. decay = 0.01
  1143. for i, entry in enumerate(X):
  1144. p = np.dot(entry, coef)
  1145. p += intercept
  1146. if p <= 1.0:
  1147. gradient = -1
  1148. else:
  1149. gradient = 0
  1150. coef *= max(0, 1.0 - (eta * nu / 2))
  1151. coef += -(eta * gradient * entry)
  1152. intercept += -(eta * (nu + gradient)) * decay
  1153. average_coef *= i
  1154. average_coef += coef
  1155. average_coef /= i + 1.0
  1156. average_intercept *= i
  1157. average_intercept += intercept
  1158. average_intercept /= i + 1.0
  1159. return average_coef, 1 - average_intercept
  1160. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1161. def _test_warm_start_oneclass(klass, X, lr):
  1162. # Test that explicit warm restart...
  1163. clf = klass(nu=0.5, eta0=0.01, shuffle=False, learning_rate=lr)
  1164. clf.fit(X)
  1165. clf2 = klass(nu=0.1, eta0=0.01, shuffle=False, learning_rate=lr)
  1166. clf2.fit(X, coef_init=clf.coef_.copy(), offset_init=clf.offset_.copy())
  1167. # ... and implicit warm restart are equivalent.
  1168. clf3 = klass(nu=0.5, eta0=0.01, shuffle=False, warm_start=True, learning_rate=lr)
  1169. clf3.fit(X)
  1170. assert clf3.t_ == clf.t_
  1171. assert_allclose(clf3.coef_, clf.coef_)
  1172. clf3.set_params(nu=0.1)
  1173. clf3.fit(X)
  1174. assert clf3.t_ == clf2.t_
  1175. assert_allclose(clf3.coef_, clf2.coef_)
  1176. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1177. @pytest.mark.parametrize("lr", ["constant", "optimal", "invscaling", "adaptive"])
  1178. def test_warm_start_oneclass(klass, lr):
  1179. _test_warm_start_oneclass(klass, X, lr)
  1180. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1181. def test_clone_oneclass(klass):
  1182. # Test whether clone works ok.
  1183. clf = klass(nu=0.5)
  1184. clf = clone(clf)
  1185. clf.set_params(nu=0.1)
  1186. clf.fit(X)
  1187. clf2 = klass(nu=0.1)
  1188. clf2.fit(X)
  1189. assert_array_equal(clf.coef_, clf2.coef_)
  1190. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1191. def test_partial_fit_oneclass(klass):
  1192. third = X.shape[0] // 3
  1193. clf = klass(nu=0.1)
  1194. clf.partial_fit(X[:third])
  1195. assert clf.coef_.shape == (X.shape[1],)
  1196. assert clf.offset_.shape == (1,)
  1197. assert clf.predict([[0, 0]]).shape == (1,)
  1198. previous_coefs = clf.coef_
  1199. clf.partial_fit(X[third:])
  1200. # check that coef_ haven't been re-allocated
  1201. assert clf.coef_ is previous_coefs
  1202. # raises ValueError if number of features does not match previous data
  1203. with pytest.raises(ValueError):
  1204. clf.partial_fit(X[:, 1])
  1205. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1206. @pytest.mark.parametrize("lr", ["constant", "optimal", "invscaling", "adaptive"])
  1207. def test_partial_fit_equal_fit_oneclass(klass, lr):
  1208. clf = klass(nu=0.05, max_iter=2, eta0=0.01, learning_rate=lr, shuffle=False)
  1209. clf.fit(X)
  1210. y_scores = clf.decision_function(T)
  1211. t = clf.t_
  1212. coef = clf.coef_
  1213. offset = clf.offset_
  1214. clf = klass(nu=0.05, eta0=0.01, max_iter=1, learning_rate=lr, shuffle=False)
  1215. for _ in range(2):
  1216. clf.partial_fit(X)
  1217. y_scores2 = clf.decision_function(T)
  1218. assert clf.t_ == t
  1219. assert_allclose(y_scores, y_scores2)
  1220. assert_allclose(clf.coef_, coef)
  1221. assert_allclose(clf.offset_, offset)
  1222. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1223. def test_late_onset_averaging_reached_oneclass(klass):
  1224. # Test average
  1225. eta0 = 0.001
  1226. nu = 0.05
  1227. # 2 passes over the training set but average only at second pass
  1228. clf1 = klass(
  1229. average=7, learning_rate="constant", eta0=eta0, nu=nu, max_iter=2, shuffle=False
  1230. )
  1231. # 1 pass over the training set with no averaging
  1232. clf2 = klass(
  1233. average=0, learning_rate="constant", eta0=eta0, nu=nu, max_iter=1, shuffle=False
  1234. )
  1235. clf1.fit(X)
  1236. clf2.fit(X)
  1237. # Start from clf2 solution, compute averaging using asgd function and
  1238. # compare with clf1 solution
  1239. average_coef, average_offset = asgd_oneclass(
  1240. klass, X, eta0, nu, coef_init=clf2.coef_.ravel(), offset_init=clf2.offset_
  1241. )
  1242. assert_allclose(clf1.coef_.ravel(), average_coef.ravel())
  1243. assert_allclose(clf1.offset_, average_offset)
  1244. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1245. def test_sgd_averaged_computed_correctly_oneclass(klass):
  1246. # Tests the average SGD One-Class SVM matches the naive implementation
  1247. eta = 0.001
  1248. nu = 0.05
  1249. n_samples = 20
  1250. n_features = 10
  1251. rng = np.random.RandomState(0)
  1252. X = rng.normal(size=(n_samples, n_features))
  1253. clf = klass(
  1254. learning_rate="constant",
  1255. eta0=eta,
  1256. nu=nu,
  1257. fit_intercept=True,
  1258. max_iter=1,
  1259. average=True,
  1260. shuffle=False,
  1261. )
  1262. clf.fit(X)
  1263. average_coef, average_offset = asgd_oneclass(klass, X, eta, nu)
  1264. assert_allclose(clf.coef_, average_coef)
  1265. assert_allclose(clf.offset_, average_offset)
  1266. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1267. def test_sgd_averaged_partial_fit_oneclass(klass):
  1268. # Tests whether the partial fit yields the same average as the fit
  1269. eta = 0.001
  1270. nu = 0.05
  1271. n_samples = 20
  1272. n_features = 10
  1273. rng = np.random.RandomState(0)
  1274. X = rng.normal(size=(n_samples, n_features))
  1275. clf = klass(
  1276. learning_rate="constant",
  1277. eta0=eta,
  1278. nu=nu,
  1279. fit_intercept=True,
  1280. max_iter=1,
  1281. average=True,
  1282. shuffle=False,
  1283. )
  1284. clf.partial_fit(X[: int(n_samples / 2)][:])
  1285. clf.partial_fit(X[int(n_samples / 2) :][:])
  1286. average_coef, average_offset = asgd_oneclass(klass, X, eta, nu)
  1287. assert_allclose(clf.coef_, average_coef)
  1288. assert_allclose(clf.offset_, average_offset)
  1289. @pytest.mark.parametrize("klass", [SGDOneClassSVM, SparseSGDOneClassSVM])
  1290. def test_average_sparse_oneclass(klass):
  1291. # Checks the average coef on data with 0s
  1292. eta = 0.001
  1293. nu = 0.01
  1294. clf = klass(
  1295. learning_rate="constant",
  1296. eta0=eta,
  1297. nu=nu,
  1298. fit_intercept=True,
  1299. max_iter=1,
  1300. average=True,
  1301. shuffle=False,
  1302. )
  1303. n_samples = X3.shape[0]
  1304. clf.partial_fit(X3[: int(n_samples / 2)])
  1305. clf.partial_fit(X3[int(n_samples / 2) :])
  1306. average_coef, average_offset = asgd_oneclass(klass, X3, eta, nu)
  1307. assert_allclose(clf.coef_, average_coef)
  1308. assert_allclose(clf.offset_, average_offset)
  1309. def test_sgd_oneclass():
  1310. # Test fit, decision_function, predict and score_samples on a toy
  1311. # dataset
  1312. X_train = np.array([[-2, -1], [-1, -1], [1, 1]])
  1313. X_test = np.array([[0.5, -2], [2, 2]])
  1314. clf = SGDOneClassSVM(
  1315. nu=0.5, eta0=1, learning_rate="constant", shuffle=False, max_iter=1
  1316. )
  1317. clf.fit(X_train)
  1318. assert_allclose(clf.coef_, np.array([-0.125, 0.4375]))
  1319. assert clf.offset_[0] == -0.5
  1320. scores = clf.score_samples(X_test)
  1321. assert_allclose(scores, np.array([-0.9375, 0.625]))
  1322. dec = clf.score_samples(X_test) - clf.offset_
  1323. assert_allclose(clf.decision_function(X_test), dec)
  1324. pred = clf.predict(X_test)
  1325. assert_array_equal(pred, np.array([-1, 1]))
  1326. def test_ocsvm_vs_sgdocsvm():
  1327. # Checks SGDOneClass SVM gives a good approximation of kernelized
  1328. # One-Class SVM
  1329. nu = 0.05
  1330. gamma = 2.0
  1331. random_state = 42
  1332. # Generate train and test data
  1333. rng = np.random.RandomState(random_state)
  1334. X = 0.3 * rng.randn(500, 2)
  1335. X_train = np.r_[X + 2, X - 2]
  1336. X = 0.3 * rng.randn(100, 2)
  1337. X_test = np.r_[X + 2, X - 2]
  1338. # One-Class SVM
  1339. clf = OneClassSVM(gamma=gamma, kernel="rbf", nu=nu)
  1340. clf.fit(X_train)
  1341. y_pred_ocsvm = clf.predict(X_test)
  1342. dec_ocsvm = clf.decision_function(X_test).reshape(1, -1)
  1343. # SGDOneClassSVM using kernel approximation
  1344. max_iter = 15
  1345. transform = Nystroem(gamma=gamma, random_state=random_state)
  1346. clf_sgd = SGDOneClassSVM(
  1347. nu=nu,
  1348. shuffle=True,
  1349. fit_intercept=True,
  1350. max_iter=max_iter,
  1351. random_state=random_state,
  1352. tol=None,
  1353. )
  1354. pipe_sgd = make_pipeline(transform, clf_sgd)
  1355. pipe_sgd.fit(X_train)
  1356. y_pred_sgdocsvm = pipe_sgd.predict(X_test)
  1357. dec_sgdocsvm = pipe_sgd.decision_function(X_test).reshape(1, -1)
  1358. assert np.mean(y_pred_sgdocsvm == y_pred_ocsvm) >= 0.99
  1359. corrcoef = np.corrcoef(np.concatenate((dec_ocsvm, dec_sgdocsvm)))[0, 1]
  1360. assert corrcoef >= 0.9
  1361. def test_l1_ratio():
  1362. # Test if l1 ratio extremes match L1 and L2 penalty settings.
  1363. X, y = datasets.make_classification(
  1364. n_samples=1000, n_features=100, n_informative=20, random_state=1234
  1365. )
  1366. # test if elasticnet with l1_ratio near 1 gives same result as pure l1
  1367. est_en = SGDClassifier(
  1368. alpha=0.001,
  1369. penalty="elasticnet",
  1370. tol=None,
  1371. max_iter=6,
  1372. l1_ratio=0.9999999999,
  1373. random_state=42,
  1374. ).fit(X, y)
  1375. est_l1 = SGDClassifier(
  1376. alpha=0.001, penalty="l1", max_iter=6, random_state=42, tol=None
  1377. ).fit(X, y)
  1378. assert_array_almost_equal(est_en.coef_, est_l1.coef_)
  1379. # test if elasticnet with l1_ratio near 0 gives same result as pure l2
  1380. est_en = SGDClassifier(
  1381. alpha=0.001,
  1382. penalty="elasticnet",
  1383. tol=None,
  1384. max_iter=6,
  1385. l1_ratio=0.0000000001,
  1386. random_state=42,
  1387. ).fit(X, y)
  1388. est_l2 = SGDClassifier(
  1389. alpha=0.001, penalty="l2", max_iter=6, random_state=42, tol=None
  1390. ).fit(X, y)
  1391. assert_array_almost_equal(est_en.coef_, est_l2.coef_)
  1392. def test_underflow_or_overlow():
  1393. with np.errstate(all="raise"):
  1394. # Generate some weird data with hugely unscaled features
  1395. rng = np.random.RandomState(0)
  1396. n_samples = 100
  1397. n_features = 10
  1398. X = rng.normal(size=(n_samples, n_features))
  1399. X[:, :2] *= 1e300
  1400. assert np.isfinite(X).all()
  1401. # Use MinMaxScaler to scale the data without introducing a numerical
  1402. # instability (computing the standard deviation naively is not possible
  1403. # on this data)
  1404. X_scaled = MinMaxScaler().fit_transform(X)
  1405. assert np.isfinite(X_scaled).all()
  1406. # Define a ground truth on the scaled data
  1407. ground_truth = rng.normal(size=n_features)
  1408. y = (np.dot(X_scaled, ground_truth) > 0.0).astype(np.int32)
  1409. assert_array_equal(np.unique(y), [0, 1])
  1410. model = SGDClassifier(alpha=0.1, loss="squared_hinge", max_iter=500)
  1411. # smoke test: model is stable on scaled data
  1412. model.fit(X_scaled, y)
  1413. assert np.isfinite(model.coef_).all()
  1414. # model is numerically unstable on unscaled data
  1415. msg_regxp = (
  1416. r"Floating-point under-/overflow occurred at epoch #.*"
  1417. " Scaling input data with StandardScaler or MinMaxScaler"
  1418. " might help."
  1419. )
  1420. with pytest.raises(ValueError, match=msg_regxp):
  1421. model.fit(X, y)
  1422. def test_numerical_stability_large_gradient():
  1423. # Non regression test case for numerical stability on scaled problems
  1424. # where the gradient can still explode with some losses
  1425. model = SGDClassifier(
  1426. loss="squared_hinge",
  1427. max_iter=10,
  1428. shuffle=True,
  1429. penalty="elasticnet",
  1430. l1_ratio=0.3,
  1431. alpha=0.01,
  1432. eta0=0.001,
  1433. random_state=0,
  1434. tol=None,
  1435. )
  1436. with np.errstate(all="raise"):
  1437. model.fit(iris.data, iris.target)
  1438. assert np.isfinite(model.coef_).all()
  1439. @pytest.mark.parametrize("penalty", ["l2", "l1", "elasticnet"])
  1440. def test_large_regularization(penalty):
  1441. # Non regression tests for numerical stability issues caused by large
  1442. # regularization parameters
  1443. model = SGDClassifier(
  1444. alpha=1e5,
  1445. learning_rate="constant",
  1446. eta0=0.1,
  1447. penalty=penalty,
  1448. shuffle=False,
  1449. tol=None,
  1450. max_iter=6,
  1451. )
  1452. with np.errstate(all="raise"):
  1453. model.fit(iris.data, iris.target)
  1454. assert_array_almost_equal(model.coef_, np.zeros_like(model.coef_))
  1455. def test_tol_parameter():
  1456. # Test that the tol parameter behaves as expected
  1457. X = StandardScaler().fit_transform(iris.data)
  1458. y = iris.target == 1
  1459. # With tol is None, the number of iteration should be equal to max_iter
  1460. max_iter = 42
  1461. model_0 = SGDClassifier(tol=None, random_state=0, max_iter=max_iter)
  1462. model_0.fit(X, y)
  1463. assert max_iter == model_0.n_iter_
  1464. # If tol is not None, the number of iteration should be less than max_iter
  1465. max_iter = 2000
  1466. model_1 = SGDClassifier(tol=0, random_state=0, max_iter=max_iter)
  1467. model_1.fit(X, y)
  1468. assert max_iter > model_1.n_iter_
  1469. assert model_1.n_iter_ > 5
  1470. # A larger tol should yield a smaller number of iteration
  1471. model_2 = SGDClassifier(tol=0.1, random_state=0, max_iter=max_iter)
  1472. model_2.fit(X, y)
  1473. assert model_1.n_iter_ > model_2.n_iter_
  1474. assert model_2.n_iter_ > 3
  1475. # Strict tolerance and small max_iter should trigger a warning
  1476. model_3 = SGDClassifier(max_iter=3, tol=1e-3, random_state=0)
  1477. warning_message = (
  1478. "Maximum number of iteration reached before "
  1479. "convergence. Consider increasing max_iter to "
  1480. "improve the fit."
  1481. )
  1482. with pytest.warns(ConvergenceWarning, match=warning_message):
  1483. model_3.fit(X, y)
  1484. assert model_3.n_iter_ == 3
  1485. def _test_loss_common(loss_function, cases):
  1486. # Test the different loss functions
  1487. # cases is a list of (p, y, expected)
  1488. for p, y, expected_loss, expected_dloss in cases:
  1489. assert_almost_equal(loss_function.py_loss(p, y), expected_loss)
  1490. assert_almost_equal(loss_function.py_dloss(p, y), expected_dloss)
  1491. def test_loss_hinge():
  1492. # Test Hinge (hinge / perceptron)
  1493. # hinge
  1494. loss = sgd_fast.Hinge(1.0)
  1495. cases = [
  1496. # (p, y, expected_loss, expected_dloss)
  1497. (1.1, 1.0, 0.0, 0.0),
  1498. (-2.0, -1.0, 0.0, 0.0),
  1499. (1.0, 1.0, 0.0, -1.0),
  1500. (-1.0, -1.0, 0.0, 1.0),
  1501. (0.5, 1.0, 0.5, -1.0),
  1502. (2.0, -1.0, 3.0, 1.0),
  1503. (-0.5, -1.0, 0.5, 1.0),
  1504. (0.0, 1.0, 1, -1.0),
  1505. ]
  1506. _test_loss_common(loss, cases)
  1507. # perceptron
  1508. loss = sgd_fast.Hinge(0.0)
  1509. cases = [
  1510. # (p, y, expected_loss, expected_dloss)
  1511. (1.0, 1.0, 0.0, 0.0),
  1512. (-0.1, -1.0, 0.0, 0.0),
  1513. (0.0, 1.0, 0.0, -1.0),
  1514. (0.0, -1.0, 0.0, 1.0),
  1515. (0.5, -1.0, 0.5, 1.0),
  1516. (2.0, -1.0, 2.0, 1.0),
  1517. (-0.5, 1.0, 0.5, -1.0),
  1518. (-1.0, 1.0, 1.0, -1.0),
  1519. ]
  1520. _test_loss_common(loss, cases)
  1521. def test_gradient_squared_hinge():
  1522. # Test SquaredHinge
  1523. loss = sgd_fast.SquaredHinge(1.0)
  1524. cases = [
  1525. # (p, y, expected_loss, expected_dloss)
  1526. (1.0, 1.0, 0.0, 0.0),
  1527. (-2.0, -1.0, 0.0, 0.0),
  1528. (1.0, -1.0, 4.0, 4.0),
  1529. (-1.0, 1.0, 4.0, -4.0),
  1530. (0.5, 1.0, 0.25, -1.0),
  1531. (0.5, -1.0, 2.25, 3.0),
  1532. ]
  1533. _test_loss_common(loss, cases)
  1534. def test_loss_log():
  1535. # Test Log (logistic loss)
  1536. loss = sgd_fast.Log()
  1537. cases = [
  1538. # (p, y, expected_loss, expected_dloss)
  1539. (1.0, 1.0, np.log(1.0 + np.exp(-1.0)), -1.0 / (np.exp(1.0) + 1.0)),
  1540. (1.0, -1.0, np.log(1.0 + np.exp(1.0)), 1.0 / (np.exp(-1.0) + 1.0)),
  1541. (-1.0, -1.0, np.log(1.0 + np.exp(-1.0)), 1.0 / (np.exp(1.0) + 1.0)),
  1542. (-1.0, 1.0, np.log(1.0 + np.exp(1.0)), -1.0 / (np.exp(-1.0) + 1.0)),
  1543. (0.0, 1.0, np.log(2), -0.5),
  1544. (0.0, -1.0, np.log(2), 0.5),
  1545. (17.9, -1.0, 17.9, 1.0),
  1546. (-17.9, 1.0, 17.9, -1.0),
  1547. ]
  1548. _test_loss_common(loss, cases)
  1549. assert_almost_equal(loss.py_dloss(18.1, 1.0), np.exp(-18.1) * -1.0, 16)
  1550. assert_almost_equal(loss.py_loss(18.1, 1.0), np.exp(-18.1), 16)
  1551. assert_almost_equal(loss.py_dloss(-18.1, -1.0), np.exp(-18.1) * 1.0, 16)
  1552. assert_almost_equal(loss.py_loss(-18.1, 1.0), 18.1, 16)
  1553. def test_loss_squared_loss():
  1554. # Test SquaredLoss
  1555. loss = sgd_fast.SquaredLoss()
  1556. cases = [
  1557. # (p, y, expected_loss, expected_dloss)
  1558. (0.0, 0.0, 0.0, 0.0),
  1559. (1.0, 1.0, 0.0, 0.0),
  1560. (1.0, 0.0, 0.5, 1.0),
  1561. (0.5, -1.0, 1.125, 1.5),
  1562. (-2.5, 2.0, 10.125, -4.5),
  1563. ]
  1564. _test_loss_common(loss, cases)
  1565. def test_loss_huber():
  1566. # Test Huber
  1567. loss = sgd_fast.Huber(0.1)
  1568. cases = [
  1569. # (p, y, expected_loss, expected_dloss)
  1570. (0.0, 0.0, 0.0, 0.0),
  1571. (0.1, 0.0, 0.005, 0.1),
  1572. (0.0, 0.1, 0.005, -0.1),
  1573. (3.95, 4.0, 0.00125, -0.05),
  1574. (5.0, 2.0, 0.295, 0.1),
  1575. (-1.0, 5.0, 0.595, -0.1),
  1576. ]
  1577. _test_loss_common(loss, cases)
  1578. def test_loss_modified_huber():
  1579. # (p, y, expected_loss, expected_dloss)
  1580. loss = sgd_fast.ModifiedHuber()
  1581. cases = [
  1582. # (p, y, expected_loss, expected_dloss)
  1583. (1.0, 1.0, 0.0, 0.0),
  1584. (-1.0, -1.0, 0.0, 0.0),
  1585. (2.0, 1.0, 0.0, 0.0),
  1586. (0.0, 1.0, 1.0, -2.0),
  1587. (-1.0, 1.0, 4.0, -4.0),
  1588. (0.5, -1.0, 2.25, 3.0),
  1589. (-2.0, 1.0, 8, -4.0),
  1590. (-3.0, 1.0, 12, -4.0),
  1591. ]
  1592. _test_loss_common(loss, cases)
  1593. def test_loss_epsilon_insensitive():
  1594. # Test EpsilonInsensitive
  1595. loss = sgd_fast.EpsilonInsensitive(0.1)
  1596. cases = [
  1597. # (p, y, expected_loss, expected_dloss)
  1598. (0.0, 0.0, 0.0, 0.0),
  1599. (0.1, 0.0, 0.0, 0.0),
  1600. (-2.05, -2.0, 0.0, 0.0),
  1601. (3.05, 3.0, 0.0, 0.0),
  1602. (2.2, 2.0, 0.1, 1.0),
  1603. (2.0, -1.0, 2.9, 1.0),
  1604. (2.0, 2.2, 0.1, -1.0),
  1605. (-2.0, 1.0, 2.9, -1.0),
  1606. ]
  1607. _test_loss_common(loss, cases)
  1608. def test_loss_squared_epsilon_insensitive():
  1609. # Test SquaredEpsilonInsensitive
  1610. loss = sgd_fast.SquaredEpsilonInsensitive(0.1)
  1611. cases = [
  1612. # (p, y, expected_loss, expected_dloss)
  1613. (0.0, 0.0, 0.0, 0.0),
  1614. (0.1, 0.0, 0.0, 0.0),
  1615. (-2.05, -2.0, 0.0, 0.0),
  1616. (3.05, 3.0, 0.0, 0.0),
  1617. (2.2, 2.0, 0.01, 0.2),
  1618. (2.0, -1.0, 8.41, 5.8),
  1619. (2.0, 2.2, 0.01, -0.2),
  1620. (-2.0, 1.0, 8.41, -5.8),
  1621. ]
  1622. _test_loss_common(loss, cases)
  1623. def test_multi_thread_multi_class_and_early_stopping():
  1624. # This is a non-regression test for a bad interaction between
  1625. # early stopping internal attribute and thread-based parallelism.
  1626. clf = SGDClassifier(
  1627. alpha=1e-3,
  1628. tol=1e-3,
  1629. max_iter=1000,
  1630. early_stopping=True,
  1631. n_iter_no_change=100,
  1632. random_state=0,
  1633. n_jobs=2,
  1634. )
  1635. clf.fit(iris.data, iris.target)
  1636. assert clf.n_iter_ > clf.n_iter_no_change
  1637. assert clf.n_iter_ < clf.n_iter_no_change + 20
  1638. assert clf.score(iris.data, iris.target) > 0.8
  1639. def test_multi_core_gridsearch_and_early_stopping():
  1640. # This is a non-regression test for a bad interaction between
  1641. # early stopping internal attribute and process-based multi-core
  1642. # parallelism.
  1643. param_grid = {
  1644. "alpha": np.logspace(-4, 4, 9),
  1645. "n_iter_no_change": [5, 10, 50],
  1646. }
  1647. clf = SGDClassifier(tol=1e-2, max_iter=1000, early_stopping=True, random_state=0)
  1648. search = RandomizedSearchCV(clf, param_grid, n_iter=5, n_jobs=2, random_state=0)
  1649. search.fit(iris.data, iris.target)
  1650. assert search.best_score_ > 0.8
  1651. @pytest.mark.parametrize("backend", ["loky", "multiprocessing", "threading"])
  1652. def test_SGDClassifier_fit_for_all_backends(backend):
  1653. # This is a non-regression smoke test. In the multi-class case,
  1654. # SGDClassifier.fit fits each class in a one-versus-all fashion using
  1655. # joblib.Parallel. However, each OvA step updates the coef_ attribute of
  1656. # the estimator in-place. Internally, SGDClassifier calls Parallel using
  1657. # require='sharedmem'. This test makes sure SGDClassifier.fit works
  1658. # consistently even when the user asks for a backend that does not provide
  1659. # sharedmem semantics.
  1660. # We further test a case where memmapping would have been used if
  1661. # SGDClassifier.fit was called from a loky or multiprocessing backend. In
  1662. # this specific case, in-place modification of clf.coef_ would have caused
  1663. # a segmentation fault when trying to write in a readonly memory mapped
  1664. # buffer.
  1665. random_state = np.random.RandomState(42)
  1666. # Create a classification problem with 50000 features and 20 classes. Using
  1667. # loky or multiprocessing this make the clf.coef_ exceed the threshold
  1668. # above which memmaping is used in joblib and loky (1MB as of 2018/11/1).
  1669. X = sp.random(500, 2000, density=0.02, format="csr", random_state=random_state)
  1670. y = random_state.choice(20, 500)
  1671. # Begin by fitting a SGD classifier sequentially
  1672. clf_sequential = SGDClassifier(max_iter=1000, n_jobs=1, random_state=42)
  1673. clf_sequential.fit(X, y)
  1674. # Fit a SGDClassifier using the specified backend, and make sure the
  1675. # coefficients are equal to those obtained using a sequential fit
  1676. clf_parallel = SGDClassifier(max_iter=1000, n_jobs=4, random_state=42)
  1677. with joblib.parallel_backend(backend=backend):
  1678. clf_parallel.fit(X, y)
  1679. assert_array_almost_equal(clf_sequential.coef_, clf_parallel.coef_)
  1680. @pytest.mark.parametrize(
  1681. "Estimator", [linear_model.SGDClassifier, linear_model.SGDRegressor]
  1682. )
  1683. def test_sgd_random_state(Estimator, global_random_seed):
  1684. # Train the same model on the same data without converging and check that we
  1685. # get reproducible results by fixing the random seed.
  1686. if Estimator == linear_model.SGDRegressor:
  1687. X, y = datasets.make_regression(random_state=global_random_seed)
  1688. else:
  1689. X, y = datasets.make_classification(random_state=global_random_seed)
  1690. # Fitting twice a model with the same hyper-parameters on the same training
  1691. # set with the same seed leads to the same results deterministically.
  1692. est = Estimator(random_state=global_random_seed, max_iter=1)
  1693. with pytest.warns(ConvergenceWarning):
  1694. coef_same_seed_a = est.fit(X, y).coef_
  1695. assert est.n_iter_ == 1
  1696. est = Estimator(random_state=global_random_seed, max_iter=1)
  1697. with pytest.warns(ConvergenceWarning):
  1698. coef_same_seed_b = est.fit(X, y).coef_
  1699. assert est.n_iter_ == 1
  1700. assert_allclose(coef_same_seed_a, coef_same_seed_b)
  1701. # Fitting twice a model with the same hyper-parameters on the same training
  1702. # set but with different random seed leads to different results after one
  1703. # epoch because of the random shuffling of the dataset.
  1704. est = Estimator(random_state=global_random_seed + 1, max_iter=1)
  1705. with pytest.warns(ConvergenceWarning):
  1706. coef_other_seed = est.fit(X, y).coef_
  1707. assert est.n_iter_ == 1
  1708. assert np.abs(coef_same_seed_a - coef_other_seed).max() > 1.0
  1709. def test_validation_mask_correctly_subsets(monkeypatch):
  1710. """Test that data passed to validation callback correctly subsets.
  1711. Non-regression test for #23255.
  1712. """
  1713. X, Y = iris.data, iris.target
  1714. n_samples = X.shape[0]
  1715. validation_fraction = 0.2
  1716. clf = linear_model.SGDClassifier(
  1717. early_stopping=True,
  1718. tol=1e-3,
  1719. max_iter=1000,
  1720. validation_fraction=validation_fraction,
  1721. )
  1722. mock = Mock(side_effect=_stochastic_gradient._ValidationScoreCallback)
  1723. monkeypatch.setattr(_stochastic_gradient, "_ValidationScoreCallback", mock)
  1724. clf.fit(X, Y)
  1725. X_val, y_val = mock.call_args[0][1:3]
  1726. assert X_val.shape[0] == int(n_samples * validation_fraction)
  1727. assert y_val.shape[0] == int(n_samples * validation_fraction)
  1728. def test_sgd_error_on_zero_validation_weight():
  1729. # Test that SGDClassifier raises error when all the validation samples
  1730. # have zero sample_weight. Non-regression test for #17229.
  1731. X, Y = iris.data, iris.target
  1732. sample_weight = np.zeros_like(Y)
  1733. validation_fraction = 0.4
  1734. clf = linear_model.SGDClassifier(
  1735. early_stopping=True, validation_fraction=validation_fraction, random_state=0
  1736. )
  1737. error_message = (
  1738. "The sample weights for validation set are all zero, consider using a"
  1739. " different random state."
  1740. )
  1741. with pytest.raises(ValueError, match=error_message):
  1742. clf.fit(X, Y, sample_weight=sample_weight)
  1743. @pytest.mark.parametrize("Estimator", [SGDClassifier, SGDRegressor])
  1744. def test_sgd_verbose(Estimator):
  1745. """non-regression test for gh #25249"""
  1746. Estimator(verbose=1).fit(X, Y)
  1747. @pytest.mark.parametrize(
  1748. "SGDEstimator",
  1749. [
  1750. SGDClassifier,
  1751. SparseSGDClassifier,
  1752. SGDRegressor,
  1753. SparseSGDRegressor,
  1754. SGDOneClassSVM,
  1755. SparseSGDOneClassSVM,
  1756. ],
  1757. )
  1758. @pytest.mark.parametrize("data_type", (np.float32, np.float64))
  1759. def test_sgd_dtype_match(SGDEstimator, data_type):
  1760. _X = X.astype(data_type)
  1761. _Y = np.array(Y, dtype=data_type)
  1762. sgd_model = SGDEstimator()
  1763. sgd_model.fit(_X, _Y)
  1764. assert sgd_model.coef_.dtype == data_type
  1765. @pytest.mark.parametrize(
  1766. "SGDEstimator",
  1767. [
  1768. SGDClassifier,
  1769. SparseSGDClassifier,
  1770. SGDRegressor,
  1771. SparseSGDRegressor,
  1772. SGDOneClassSVM,
  1773. SparseSGDOneClassSVM,
  1774. ],
  1775. )
  1776. def test_sgd_numerical_consistency(SGDEstimator):
  1777. X_64 = X.astype(dtype=np.float64)
  1778. Y_64 = np.array(Y, dtype=np.float64)
  1779. X_32 = X.astype(dtype=np.float32)
  1780. Y_32 = np.array(Y, dtype=np.float32)
  1781. sgd_64 = SGDEstimator(max_iter=20)
  1782. sgd_64.fit(X_64, Y_64)
  1783. sgd_32 = SGDEstimator(max_iter=20)
  1784. sgd_32.fit(X_32, Y_32)
  1785. assert_allclose(sgd_64.coef_, sgd_32.coef_)