_base.py 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262
  1. import warnings
  2. from abc import ABCMeta, abstractmethod
  3. from numbers import Integral, Real
  4. import numpy as np
  5. import scipy.sparse as sp
  6. from ..base import BaseEstimator, ClassifierMixin, _fit_context
  7. from ..exceptions import ConvergenceWarning, NotFittedError
  8. from ..preprocessing import LabelEncoder
  9. from ..utils import check_array, check_random_state, column_or_1d, compute_class_weight
  10. from ..utils._param_validation import Interval, StrOptions
  11. from ..utils.extmath import safe_sparse_dot
  12. from ..utils.metaestimators import available_if
  13. from ..utils.multiclass import _ovr_decision_function, check_classification_targets
  14. from ..utils.validation import (
  15. _check_large_sparse,
  16. _check_sample_weight,
  17. _num_samples,
  18. check_consistent_length,
  19. check_is_fitted,
  20. )
  21. from . import _liblinear as liblinear # type: ignore
  22. # mypy error: error: Module 'sklearn.svm' has no attribute '_libsvm'
  23. # (and same for other imports)
  24. from . import _libsvm as libsvm # type: ignore
  25. from . import _libsvm_sparse as libsvm_sparse # type: ignore
  26. LIBSVM_IMPL = ["c_svc", "nu_svc", "one_class", "epsilon_svr", "nu_svr"]
  27. def _one_vs_one_coef(dual_coef, n_support, support_vectors):
  28. """Generate primal coefficients from dual coefficients
  29. for the one-vs-one multi class LibSVM in the case
  30. of a linear kernel."""
  31. # get 1vs1 weights for all n*(n-1) classifiers.
  32. # this is somewhat messy.
  33. # shape of dual_coef_ is nSV * (n_classes -1)
  34. # see docs for details
  35. n_class = dual_coef.shape[0] + 1
  36. # XXX we could do preallocation of coef but
  37. # would have to take care in the sparse case
  38. coef = []
  39. sv_locs = np.cumsum(np.hstack([[0], n_support]))
  40. for class1 in range(n_class):
  41. # SVs for class1:
  42. sv1 = support_vectors[sv_locs[class1] : sv_locs[class1 + 1], :]
  43. for class2 in range(class1 + 1, n_class):
  44. # SVs for class1:
  45. sv2 = support_vectors[sv_locs[class2] : sv_locs[class2 + 1], :]
  46. # dual coef for class1 SVs:
  47. alpha1 = dual_coef[class2 - 1, sv_locs[class1] : sv_locs[class1 + 1]]
  48. # dual coef for class2 SVs:
  49. alpha2 = dual_coef[class1, sv_locs[class2] : sv_locs[class2 + 1]]
  50. # build weight for class1 vs class2
  51. coef.append(safe_sparse_dot(alpha1, sv1) + safe_sparse_dot(alpha2, sv2))
  52. return coef
  53. class BaseLibSVM(BaseEstimator, metaclass=ABCMeta):
  54. """Base class for estimators that use libsvm as backing library.
  55. This implements support vector machine classification and regression.
  56. Parameter documentation is in the derived `SVC` class.
  57. """
  58. _parameter_constraints: dict = {
  59. "kernel": [
  60. StrOptions({"linear", "poly", "rbf", "sigmoid", "precomputed"}),
  61. callable,
  62. ],
  63. "degree": [Interval(Integral, 0, None, closed="left")],
  64. "gamma": [
  65. StrOptions({"scale", "auto"}),
  66. Interval(Real, 0.0, None, closed="left"),
  67. ],
  68. "coef0": [Interval(Real, None, None, closed="neither")],
  69. "tol": [Interval(Real, 0.0, None, closed="neither")],
  70. "C": [Interval(Real, 0.0, None, closed="neither")],
  71. "nu": [Interval(Real, 0.0, 1.0, closed="right")],
  72. "epsilon": [Interval(Real, 0.0, None, closed="left")],
  73. "shrinking": ["boolean"],
  74. "probability": ["boolean"],
  75. "cache_size": [Interval(Real, 0, None, closed="neither")],
  76. "class_weight": [StrOptions({"balanced"}), dict, None],
  77. "verbose": ["verbose"],
  78. "max_iter": [Interval(Integral, -1, None, closed="left")],
  79. "random_state": ["random_state"],
  80. }
  81. # The order of these must match the integer values in LibSVM.
  82. # XXX These are actually the same in the dense case. Need to factor
  83. # this out.
  84. _sparse_kernels = ["linear", "poly", "rbf", "sigmoid", "precomputed"]
  85. @abstractmethod
  86. def __init__(
  87. self,
  88. kernel,
  89. degree,
  90. gamma,
  91. coef0,
  92. tol,
  93. C,
  94. nu,
  95. epsilon,
  96. shrinking,
  97. probability,
  98. cache_size,
  99. class_weight,
  100. verbose,
  101. max_iter,
  102. random_state,
  103. ):
  104. if self._impl not in LIBSVM_IMPL:
  105. raise ValueError(
  106. "impl should be one of %s, %s was given" % (LIBSVM_IMPL, self._impl)
  107. )
  108. self.kernel = kernel
  109. self.degree = degree
  110. self.gamma = gamma
  111. self.coef0 = coef0
  112. self.tol = tol
  113. self.C = C
  114. self.nu = nu
  115. self.epsilon = epsilon
  116. self.shrinking = shrinking
  117. self.probability = probability
  118. self.cache_size = cache_size
  119. self.class_weight = class_weight
  120. self.verbose = verbose
  121. self.max_iter = max_iter
  122. self.random_state = random_state
  123. def _more_tags(self):
  124. # Used by cross_val_score.
  125. return {"pairwise": self.kernel == "precomputed"}
  126. @_fit_context(prefer_skip_nested_validation=True)
  127. def fit(self, X, y, sample_weight=None):
  128. """Fit the SVM model according to the given training data.
  129. Parameters
  130. ----------
  131. X : {array-like, sparse matrix} of shape (n_samples, n_features) \
  132. or (n_samples, n_samples)
  133. Training vectors, where `n_samples` is the number of samples
  134. and `n_features` is the number of features.
  135. For kernel="precomputed", the expected shape of X is
  136. (n_samples, n_samples).
  137. y : array-like of shape (n_samples,)
  138. Target values (class labels in classification, real numbers in
  139. regression).
  140. sample_weight : array-like of shape (n_samples,), default=None
  141. Per-sample weights. Rescale C per sample. Higher weights
  142. force the classifier to put more emphasis on these points.
  143. Returns
  144. -------
  145. self : object
  146. Fitted estimator.
  147. Notes
  148. -----
  149. If X and y are not C-ordered and contiguous arrays of np.float64 and
  150. X is not a scipy.sparse.csr_matrix, X and/or y may be copied.
  151. If X is a dense array, then the other methods will not support sparse
  152. matrices as input.
  153. """
  154. rnd = check_random_state(self.random_state)
  155. sparse = sp.issparse(X)
  156. if sparse and self.kernel == "precomputed":
  157. raise TypeError("Sparse precomputed kernels are not supported.")
  158. self._sparse = sparse and not callable(self.kernel)
  159. if callable(self.kernel):
  160. check_consistent_length(X, y)
  161. else:
  162. X, y = self._validate_data(
  163. X,
  164. y,
  165. dtype=np.float64,
  166. order="C",
  167. accept_sparse="csr",
  168. accept_large_sparse=False,
  169. )
  170. y = self._validate_targets(y)
  171. sample_weight = np.asarray(
  172. [] if sample_weight is None else sample_weight, dtype=np.float64
  173. )
  174. solver_type = LIBSVM_IMPL.index(self._impl)
  175. # input validation
  176. n_samples = _num_samples(X)
  177. if solver_type != 2 and n_samples != y.shape[0]:
  178. raise ValueError(
  179. "X and y have incompatible shapes.\n"
  180. + "X has %s samples, but y has %s." % (n_samples, y.shape[0])
  181. )
  182. if self.kernel == "precomputed" and n_samples != X.shape[1]:
  183. raise ValueError(
  184. "Precomputed matrix must be a square matrix."
  185. " Input is a {}x{} matrix.".format(X.shape[0], X.shape[1])
  186. )
  187. if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:
  188. raise ValueError(
  189. "sample_weight and X have incompatible shapes: "
  190. "%r vs %r\n"
  191. "Note: Sparse matrices cannot be indexed w/"
  192. "boolean masks (use `indices=True` in CV)."
  193. % (sample_weight.shape, X.shape)
  194. )
  195. kernel = "precomputed" if callable(self.kernel) else self.kernel
  196. if kernel == "precomputed":
  197. # unused but needs to be a float for cython code that ignores
  198. # it anyway
  199. self._gamma = 0.0
  200. elif isinstance(self.gamma, str):
  201. if self.gamma == "scale":
  202. # var = E[X^2] - E[X]^2 if sparse
  203. X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()
  204. self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0
  205. elif self.gamma == "auto":
  206. self._gamma = 1.0 / X.shape[1]
  207. elif isinstance(self.gamma, Real):
  208. self._gamma = self.gamma
  209. fit = self._sparse_fit if self._sparse else self._dense_fit
  210. if self.verbose:
  211. print("[LibSVM]", end="")
  212. seed = rnd.randint(np.iinfo("i").max)
  213. fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  214. # see comment on the other call to np.iinfo in this file
  215. self.shape_fit_ = X.shape if hasattr(X, "shape") else (n_samples,)
  216. # In binary case, we need to flip the sign of coef, intercept and
  217. # decision function. Use self._intercept_ and self._dual_coef_
  218. # internally.
  219. self._intercept_ = self.intercept_.copy()
  220. self._dual_coef_ = self.dual_coef_
  221. if self._impl in ["c_svc", "nu_svc"] and len(self.classes_) == 2:
  222. self.intercept_ *= -1
  223. self.dual_coef_ = -self.dual_coef_
  224. dual_coef = self._dual_coef_.data if self._sparse else self._dual_coef_
  225. intercept_finiteness = np.isfinite(self._intercept_).all()
  226. dual_coef_finiteness = np.isfinite(dual_coef).all()
  227. if not (intercept_finiteness and dual_coef_finiteness):
  228. raise ValueError(
  229. "The dual coefficients or intercepts are not finite."
  230. " The input data may contain large values and need to be"
  231. " preprocessed."
  232. )
  233. # Since, in the case of SVC and NuSVC, the number of models optimized by
  234. # libSVM could be greater than one (depending on the input), `n_iter_`
  235. # stores an ndarray.
  236. # For the other sub-classes (SVR, NuSVR, and OneClassSVM), the number of
  237. # models optimized by libSVM is always one, so `n_iter_` stores an
  238. # integer.
  239. if self._impl in ["c_svc", "nu_svc"]:
  240. self.n_iter_ = self._num_iter
  241. else:
  242. self.n_iter_ = self._num_iter.item()
  243. return self
  244. def _validate_targets(self, y):
  245. """Validation of y and class_weight.
  246. Default implementation for SVR and one-class; overridden in BaseSVC.
  247. """
  248. return column_or_1d(y, warn=True).astype(np.float64, copy=False)
  249. def _warn_from_fit_status(self):
  250. assert self.fit_status_ in (0, 1)
  251. if self.fit_status_ == 1:
  252. warnings.warn(
  253. "Solver terminated early (max_iter=%i)."
  254. " Consider pre-processing your data with"
  255. " StandardScaler or MinMaxScaler."
  256. % self.max_iter,
  257. ConvergenceWarning,
  258. )
  259. def _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):
  260. if callable(self.kernel):
  261. # you must store a reference to X to compute the kernel in predict
  262. # TODO: add keyword copy to copy on demand
  263. self.__Xfit = X
  264. X = self._compute_kernel(X)
  265. if X.shape[0] != X.shape[1]:
  266. raise ValueError("X.shape[0] should be equal to X.shape[1]")
  267. libsvm.set_verbosity_wrap(self.verbose)
  268. # we don't pass **self.get_params() to allow subclasses to
  269. # add other parameters to __init__
  270. (
  271. self.support_,
  272. self.support_vectors_,
  273. self._n_support,
  274. self.dual_coef_,
  275. self.intercept_,
  276. self._probA,
  277. self._probB,
  278. self.fit_status_,
  279. self._num_iter,
  280. ) = libsvm.fit(
  281. X,
  282. y,
  283. svm_type=solver_type,
  284. sample_weight=sample_weight,
  285. # TODO(1.4): Replace "_class_weight" with "class_weight_"
  286. class_weight=getattr(self, "_class_weight", np.empty(0)),
  287. kernel=kernel,
  288. C=self.C,
  289. nu=self.nu,
  290. probability=self.probability,
  291. degree=self.degree,
  292. shrinking=self.shrinking,
  293. tol=self.tol,
  294. cache_size=self.cache_size,
  295. coef0=self.coef0,
  296. gamma=self._gamma,
  297. epsilon=self.epsilon,
  298. max_iter=self.max_iter,
  299. random_seed=random_seed,
  300. )
  301. self._warn_from_fit_status()
  302. def _sparse_fit(self, X, y, sample_weight, solver_type, kernel, random_seed):
  303. X.data = np.asarray(X.data, dtype=np.float64, order="C")
  304. X.sort_indices()
  305. kernel_type = self._sparse_kernels.index(kernel)
  306. libsvm_sparse.set_verbosity_wrap(self.verbose)
  307. (
  308. self.support_,
  309. self.support_vectors_,
  310. dual_coef_data,
  311. self.intercept_,
  312. self._n_support,
  313. self._probA,
  314. self._probB,
  315. self.fit_status_,
  316. self._num_iter,
  317. ) = libsvm_sparse.libsvm_sparse_train(
  318. X.shape[1],
  319. X.data,
  320. X.indices,
  321. X.indptr,
  322. y,
  323. solver_type,
  324. kernel_type,
  325. self.degree,
  326. self._gamma,
  327. self.coef0,
  328. self.tol,
  329. self.C,
  330. # TODO(1.4): Replace "_class_weight" with "class_weight_"
  331. getattr(self, "_class_weight", np.empty(0)),
  332. sample_weight,
  333. self.nu,
  334. self.cache_size,
  335. self.epsilon,
  336. int(self.shrinking),
  337. int(self.probability),
  338. self.max_iter,
  339. random_seed,
  340. )
  341. self._warn_from_fit_status()
  342. if hasattr(self, "classes_"):
  343. n_class = len(self.classes_) - 1
  344. else: # regression
  345. n_class = 1
  346. n_SV = self.support_vectors_.shape[0]
  347. dual_coef_indices = np.tile(np.arange(n_SV), n_class)
  348. if not n_SV:
  349. self.dual_coef_ = sp.csr_matrix([])
  350. else:
  351. dual_coef_indptr = np.arange(
  352. 0, dual_coef_indices.size + 1, dual_coef_indices.size / n_class
  353. )
  354. self.dual_coef_ = sp.csr_matrix(
  355. (dual_coef_data, dual_coef_indices, dual_coef_indptr), (n_class, n_SV)
  356. )
  357. def predict(self, X):
  358. """Perform regression on samples in X.
  359. For an one-class model, +1 (inlier) or -1 (outlier) is returned.
  360. Parameters
  361. ----------
  362. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  363. For kernel="precomputed", the expected shape of X is
  364. (n_samples_test, n_samples_train).
  365. Returns
  366. -------
  367. y_pred : ndarray of shape (n_samples,)
  368. The predicted values.
  369. """
  370. X = self._validate_for_predict(X)
  371. predict = self._sparse_predict if self._sparse else self._dense_predict
  372. return predict(X)
  373. def _dense_predict(self, X):
  374. X = self._compute_kernel(X)
  375. if X.ndim == 1:
  376. X = check_array(X, order="C", accept_large_sparse=False)
  377. kernel = self.kernel
  378. if callable(self.kernel):
  379. kernel = "precomputed"
  380. if X.shape[1] != self.shape_fit_[0]:
  381. raise ValueError(
  382. "X.shape[1] = %d should be equal to %d, "
  383. "the number of samples at training time"
  384. % (X.shape[1], self.shape_fit_[0])
  385. )
  386. svm_type = LIBSVM_IMPL.index(self._impl)
  387. return libsvm.predict(
  388. X,
  389. self.support_,
  390. self.support_vectors_,
  391. self._n_support,
  392. self._dual_coef_,
  393. self._intercept_,
  394. self._probA,
  395. self._probB,
  396. svm_type=svm_type,
  397. kernel=kernel,
  398. degree=self.degree,
  399. coef0=self.coef0,
  400. gamma=self._gamma,
  401. cache_size=self.cache_size,
  402. )
  403. def _sparse_predict(self, X):
  404. # Precondition: X is a csr_matrix of dtype np.float64.
  405. kernel = self.kernel
  406. if callable(kernel):
  407. kernel = "precomputed"
  408. kernel_type = self._sparse_kernels.index(kernel)
  409. C = 0.0 # C is not useful here
  410. return libsvm_sparse.libsvm_sparse_predict(
  411. X.data,
  412. X.indices,
  413. X.indptr,
  414. self.support_vectors_.data,
  415. self.support_vectors_.indices,
  416. self.support_vectors_.indptr,
  417. self._dual_coef_.data,
  418. self._intercept_,
  419. LIBSVM_IMPL.index(self._impl),
  420. kernel_type,
  421. self.degree,
  422. self._gamma,
  423. self.coef0,
  424. self.tol,
  425. C,
  426. # TODO(1.4): Replace "_class_weight" with "class_weight_"
  427. getattr(self, "_class_weight", np.empty(0)),
  428. self.nu,
  429. self.epsilon,
  430. self.shrinking,
  431. self.probability,
  432. self._n_support,
  433. self._probA,
  434. self._probB,
  435. )
  436. def _compute_kernel(self, X):
  437. """Return the data transformed by a callable kernel"""
  438. if callable(self.kernel):
  439. # in the case of precomputed kernel given as a function, we
  440. # have to compute explicitly the kernel matrix
  441. kernel = self.kernel(X, self.__Xfit)
  442. if sp.issparse(kernel):
  443. kernel = kernel.toarray()
  444. X = np.asarray(kernel, dtype=np.float64, order="C")
  445. return X
  446. def _decision_function(self, X):
  447. """Evaluates the decision function for the samples in X.
  448. Parameters
  449. ----------
  450. X : array-like of shape (n_samples, n_features)
  451. Returns
  452. -------
  453. X : array-like of shape (n_samples, n_class * (n_class-1) / 2)
  454. Returns the decision function of the sample for each class
  455. in the model.
  456. """
  457. # NOTE: _validate_for_predict contains check for is_fitted
  458. # hence must be placed before any other attributes are used.
  459. X = self._validate_for_predict(X)
  460. X = self._compute_kernel(X)
  461. if self._sparse:
  462. dec_func = self._sparse_decision_function(X)
  463. else:
  464. dec_func = self._dense_decision_function(X)
  465. # In binary case, we need to flip the sign of coef, intercept and
  466. # decision function.
  467. if self._impl in ["c_svc", "nu_svc"] and len(self.classes_) == 2:
  468. return -dec_func.ravel()
  469. return dec_func
  470. def _dense_decision_function(self, X):
  471. X = check_array(X, dtype=np.float64, order="C", accept_large_sparse=False)
  472. kernel = self.kernel
  473. if callable(kernel):
  474. kernel = "precomputed"
  475. return libsvm.decision_function(
  476. X,
  477. self.support_,
  478. self.support_vectors_,
  479. self._n_support,
  480. self._dual_coef_,
  481. self._intercept_,
  482. self._probA,
  483. self._probB,
  484. svm_type=LIBSVM_IMPL.index(self._impl),
  485. kernel=kernel,
  486. degree=self.degree,
  487. cache_size=self.cache_size,
  488. coef0=self.coef0,
  489. gamma=self._gamma,
  490. )
  491. def _sparse_decision_function(self, X):
  492. X.data = np.asarray(X.data, dtype=np.float64, order="C")
  493. kernel = self.kernel
  494. if hasattr(kernel, "__call__"):
  495. kernel = "precomputed"
  496. kernel_type = self._sparse_kernels.index(kernel)
  497. return libsvm_sparse.libsvm_sparse_decision_function(
  498. X.data,
  499. X.indices,
  500. X.indptr,
  501. self.support_vectors_.data,
  502. self.support_vectors_.indices,
  503. self.support_vectors_.indptr,
  504. self._dual_coef_.data,
  505. self._intercept_,
  506. LIBSVM_IMPL.index(self._impl),
  507. kernel_type,
  508. self.degree,
  509. self._gamma,
  510. self.coef0,
  511. self.tol,
  512. self.C,
  513. # TODO(1.4): Replace "_class_weight" with "class_weight_"
  514. getattr(self, "_class_weight", np.empty(0)),
  515. self.nu,
  516. self.epsilon,
  517. self.shrinking,
  518. self.probability,
  519. self._n_support,
  520. self._probA,
  521. self._probB,
  522. )
  523. def _validate_for_predict(self, X):
  524. check_is_fitted(self)
  525. if not callable(self.kernel):
  526. X = self._validate_data(
  527. X,
  528. accept_sparse="csr",
  529. dtype=np.float64,
  530. order="C",
  531. accept_large_sparse=False,
  532. reset=False,
  533. )
  534. if self._sparse and not sp.issparse(X):
  535. X = sp.csr_matrix(X)
  536. if self._sparse:
  537. X.sort_indices()
  538. if sp.issparse(X) and not self._sparse and not callable(self.kernel):
  539. raise ValueError(
  540. "cannot use sparse input in %r trained on dense data"
  541. % type(self).__name__
  542. )
  543. if self.kernel == "precomputed":
  544. if X.shape[1] != self.shape_fit_[0]:
  545. raise ValueError(
  546. "X.shape[1] = %d should be equal to %d, "
  547. "the number of samples at training time"
  548. % (X.shape[1], self.shape_fit_[0])
  549. )
  550. # Fixes https://nvd.nist.gov/vuln/detail/CVE-2020-28975
  551. # Check that _n_support is consistent with support_vectors
  552. sv = self.support_vectors_
  553. if not self._sparse and sv.size > 0 and self.n_support_.sum() != sv.shape[0]:
  554. raise ValueError(
  555. f"The internal representation of {self.__class__.__name__} was altered"
  556. )
  557. return X
  558. @property
  559. def coef_(self):
  560. """Weights assigned to the features when `kernel="linear"`.
  561. Returns
  562. -------
  563. ndarray of shape (n_features, n_classes)
  564. """
  565. if self.kernel != "linear":
  566. raise AttributeError("coef_ is only available when using a linear kernel")
  567. coef = self._get_coef()
  568. # coef_ being a read-only property, it's better to mark the value as
  569. # immutable to avoid hiding potential bugs for the unsuspecting user.
  570. if sp.issparse(coef):
  571. # sparse matrix do not have global flags
  572. coef.data.flags.writeable = False
  573. else:
  574. # regular dense array
  575. coef.flags.writeable = False
  576. return coef
  577. def _get_coef(self):
  578. return safe_sparse_dot(self._dual_coef_, self.support_vectors_)
  579. @property
  580. def n_support_(self):
  581. """Number of support vectors for each class."""
  582. try:
  583. check_is_fitted(self)
  584. except NotFittedError:
  585. raise AttributeError
  586. svm_type = LIBSVM_IMPL.index(self._impl)
  587. if svm_type in (0, 1):
  588. return self._n_support
  589. else:
  590. # SVR and OneClass
  591. # _n_support has size 2, we make it size 1
  592. return np.array([self._n_support[0]])
  593. class BaseSVC(ClassifierMixin, BaseLibSVM, metaclass=ABCMeta):
  594. """ABC for LibSVM-based classifiers."""
  595. _parameter_constraints: dict = {
  596. **BaseLibSVM._parameter_constraints,
  597. "decision_function_shape": [StrOptions({"ovr", "ovo"})],
  598. "break_ties": ["boolean"],
  599. }
  600. for unused_param in ["epsilon", "nu"]:
  601. _parameter_constraints.pop(unused_param)
  602. @abstractmethod
  603. def __init__(
  604. self,
  605. kernel,
  606. degree,
  607. gamma,
  608. coef0,
  609. tol,
  610. C,
  611. nu,
  612. shrinking,
  613. probability,
  614. cache_size,
  615. class_weight,
  616. verbose,
  617. max_iter,
  618. decision_function_shape,
  619. random_state,
  620. break_ties,
  621. ):
  622. self.decision_function_shape = decision_function_shape
  623. self.break_ties = break_ties
  624. super().__init__(
  625. kernel=kernel,
  626. degree=degree,
  627. gamma=gamma,
  628. coef0=coef0,
  629. tol=tol,
  630. C=C,
  631. nu=nu,
  632. epsilon=0.0,
  633. shrinking=shrinking,
  634. probability=probability,
  635. cache_size=cache_size,
  636. class_weight=class_weight,
  637. verbose=verbose,
  638. max_iter=max_iter,
  639. random_state=random_state,
  640. )
  641. def _validate_targets(self, y):
  642. y_ = column_or_1d(y, warn=True)
  643. check_classification_targets(y)
  644. cls, y = np.unique(y_, return_inverse=True)
  645. self.class_weight_ = compute_class_weight(self.class_weight, classes=cls, y=y_)
  646. if len(cls) < 2:
  647. raise ValueError(
  648. "The number of classes has to be greater than one; got %d class"
  649. % len(cls)
  650. )
  651. self.classes_ = cls
  652. return np.asarray(y, dtype=np.float64, order="C")
  653. def decision_function(self, X):
  654. """Evaluate the decision function for the samples in X.
  655. Parameters
  656. ----------
  657. X : array-like of shape (n_samples, n_features)
  658. The input samples.
  659. Returns
  660. -------
  661. X : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)
  662. Returns the decision function of the sample for each class
  663. in the model.
  664. If decision_function_shape='ovr', the shape is (n_samples,
  665. n_classes).
  666. Notes
  667. -----
  668. If decision_function_shape='ovo', the function values are proportional
  669. to the distance of the samples X to the separating hyperplane. If the
  670. exact distances are required, divide the function values by the norm of
  671. the weight vector (``coef_``). See also `this question
  672. <https://stats.stackexchange.com/questions/14876/
  673. interpreting-distance-from-hyperplane-in-svm>`_ for further details.
  674. If decision_function_shape='ovr', the decision function is a monotonic
  675. transformation of ovo decision function.
  676. """
  677. dec = self._decision_function(X)
  678. if self.decision_function_shape == "ovr" and len(self.classes_) > 2:
  679. return _ovr_decision_function(dec < 0, -dec, len(self.classes_))
  680. return dec
  681. def predict(self, X):
  682. """Perform classification on samples in X.
  683. For an one-class model, +1 or -1 is returned.
  684. Parameters
  685. ----------
  686. X : {array-like, sparse matrix} of shape (n_samples, n_features) or \
  687. (n_samples_test, n_samples_train)
  688. For kernel="precomputed", the expected shape of X is
  689. (n_samples_test, n_samples_train).
  690. Returns
  691. -------
  692. y_pred : ndarray of shape (n_samples,)
  693. Class labels for samples in X.
  694. """
  695. check_is_fitted(self)
  696. if self.break_ties and self.decision_function_shape == "ovo":
  697. raise ValueError(
  698. "break_ties must be False when decision_function_shape is 'ovo'"
  699. )
  700. if (
  701. self.break_ties
  702. and self.decision_function_shape == "ovr"
  703. and len(self.classes_) > 2
  704. ):
  705. y = np.argmax(self.decision_function(X), axis=1)
  706. else:
  707. y = super().predict(X)
  708. return self.classes_.take(np.asarray(y, dtype=np.intp))
  709. # Hacky way of getting predict_proba to raise an AttributeError when
  710. # probability=False using properties. Do not use this in new code; when
  711. # probabilities are not available depending on a setting, introduce two
  712. # estimators.
  713. def _check_proba(self):
  714. if not self.probability:
  715. raise AttributeError(
  716. "predict_proba is not available when probability=False"
  717. )
  718. if self._impl not in ("c_svc", "nu_svc"):
  719. raise AttributeError("predict_proba only implemented for SVC and NuSVC")
  720. return True
  721. @available_if(_check_proba)
  722. def predict_proba(self, X):
  723. """Compute probabilities of possible outcomes for samples in X.
  724. The model needs to have probability information computed at training
  725. time: fit with attribute `probability` set to True.
  726. Parameters
  727. ----------
  728. X : array-like of shape (n_samples, n_features)
  729. For kernel="precomputed", the expected shape of X is
  730. (n_samples_test, n_samples_train).
  731. Returns
  732. -------
  733. T : ndarray of shape (n_samples, n_classes)
  734. Returns the probability of the sample for each class in
  735. the model. The columns correspond to the classes in sorted
  736. order, as they appear in the attribute :term:`classes_`.
  737. Notes
  738. -----
  739. The probability model is created using cross validation, so
  740. the results can be slightly different than those obtained by
  741. predict. Also, it will produce meaningless results on very small
  742. datasets.
  743. """
  744. X = self._validate_for_predict(X)
  745. if self.probA_.size == 0 or self.probB_.size == 0:
  746. raise NotFittedError(
  747. "predict_proba is not available when fitted with probability=False"
  748. )
  749. pred_proba = (
  750. self._sparse_predict_proba if self._sparse else self._dense_predict_proba
  751. )
  752. return pred_proba(X)
  753. @available_if(_check_proba)
  754. def predict_log_proba(self, X):
  755. """Compute log probabilities of possible outcomes for samples in X.
  756. The model need to have probability information computed at training
  757. time: fit with attribute `probability` set to True.
  758. Parameters
  759. ----------
  760. X : array-like of shape (n_samples, n_features) or \
  761. (n_samples_test, n_samples_train)
  762. For kernel="precomputed", the expected shape of X is
  763. (n_samples_test, n_samples_train).
  764. Returns
  765. -------
  766. T : ndarray of shape (n_samples, n_classes)
  767. Returns the log-probabilities of the sample for each class in
  768. the model. The columns correspond to the classes in sorted
  769. order, as they appear in the attribute :term:`classes_`.
  770. Notes
  771. -----
  772. The probability model is created using cross validation, so
  773. the results can be slightly different than those obtained by
  774. predict. Also, it will produce meaningless results on very small
  775. datasets.
  776. """
  777. return np.log(self.predict_proba(X))
  778. def _dense_predict_proba(self, X):
  779. X = self._compute_kernel(X)
  780. kernel = self.kernel
  781. if callable(kernel):
  782. kernel = "precomputed"
  783. svm_type = LIBSVM_IMPL.index(self._impl)
  784. pprob = libsvm.predict_proba(
  785. X,
  786. self.support_,
  787. self.support_vectors_,
  788. self._n_support,
  789. self._dual_coef_,
  790. self._intercept_,
  791. self._probA,
  792. self._probB,
  793. svm_type=svm_type,
  794. kernel=kernel,
  795. degree=self.degree,
  796. cache_size=self.cache_size,
  797. coef0=self.coef0,
  798. gamma=self._gamma,
  799. )
  800. return pprob
  801. def _sparse_predict_proba(self, X):
  802. X.data = np.asarray(X.data, dtype=np.float64, order="C")
  803. kernel = self.kernel
  804. if callable(kernel):
  805. kernel = "precomputed"
  806. kernel_type = self._sparse_kernels.index(kernel)
  807. return libsvm_sparse.libsvm_sparse_predict_proba(
  808. X.data,
  809. X.indices,
  810. X.indptr,
  811. self.support_vectors_.data,
  812. self.support_vectors_.indices,
  813. self.support_vectors_.indptr,
  814. self._dual_coef_.data,
  815. self._intercept_,
  816. LIBSVM_IMPL.index(self._impl),
  817. kernel_type,
  818. self.degree,
  819. self._gamma,
  820. self.coef0,
  821. self.tol,
  822. self.C,
  823. # TODO(1.4): Replace "_class_weight" with "class_weight_"
  824. getattr(self, "_class_weight", np.empty(0)),
  825. self.nu,
  826. self.epsilon,
  827. self.shrinking,
  828. self.probability,
  829. self._n_support,
  830. self._probA,
  831. self._probB,
  832. )
  833. def _get_coef(self):
  834. if self.dual_coef_.shape[0] == 1:
  835. # binary classifier
  836. coef = safe_sparse_dot(self.dual_coef_, self.support_vectors_)
  837. else:
  838. # 1vs1 classifier
  839. coef = _one_vs_one_coef(
  840. self.dual_coef_, self._n_support, self.support_vectors_
  841. )
  842. if sp.issparse(coef[0]):
  843. coef = sp.vstack(coef).tocsr()
  844. else:
  845. coef = np.vstack(coef)
  846. return coef
  847. @property
  848. def probA_(self):
  849. """Parameter learned in Platt scaling when `probability=True`.
  850. Returns
  851. -------
  852. ndarray of shape (n_classes * (n_classes - 1) / 2)
  853. """
  854. return self._probA
  855. @property
  856. def probB_(self):
  857. """Parameter learned in Platt scaling when `probability=True`.
  858. Returns
  859. -------
  860. ndarray of shape (n_classes * (n_classes - 1) / 2)
  861. """
  862. return self._probB
  863. # TODO(1.4): Remove
  864. @property
  865. def _class_weight(self):
  866. """Weights per class"""
  867. # Class weights are defined for classifiers during
  868. # fit.
  869. return self.class_weight_
  870. def _get_liblinear_solver_type(multi_class, penalty, loss, dual):
  871. """Find the liblinear magic number for the solver.
  872. This number depends on the values of the following attributes:
  873. - multi_class
  874. - penalty
  875. - loss
  876. - dual
  877. The same number is also internally used by LibLinear to determine
  878. which solver to use.
  879. """
  880. # nested dicts containing level 1: available loss functions,
  881. # level2: available penalties for the given loss function,
  882. # level3: whether the dual solver is available for the specified
  883. # combination of loss function and penalty
  884. _solver_type_dict = {
  885. "logistic_regression": {"l1": {False: 6}, "l2": {False: 0, True: 7}},
  886. "hinge": {"l2": {True: 3}},
  887. "squared_hinge": {"l1": {False: 5}, "l2": {False: 2, True: 1}},
  888. "epsilon_insensitive": {"l2": {True: 13}},
  889. "squared_epsilon_insensitive": {"l2": {False: 11, True: 12}},
  890. "crammer_singer": 4,
  891. }
  892. if multi_class == "crammer_singer":
  893. return _solver_type_dict[multi_class]
  894. elif multi_class != "ovr":
  895. raise ValueError(
  896. "`multi_class` must be one of `ovr`, `crammer_singer`, got %r" % multi_class
  897. )
  898. _solver_pen = _solver_type_dict.get(loss, None)
  899. if _solver_pen is None:
  900. error_string = "loss='%s' is not supported" % loss
  901. else:
  902. _solver_dual = _solver_pen.get(penalty, None)
  903. if _solver_dual is None:
  904. error_string = (
  905. "The combination of penalty='%s' and loss='%s' is not supported"
  906. % (penalty, loss)
  907. )
  908. else:
  909. solver_num = _solver_dual.get(dual, None)
  910. if solver_num is None:
  911. error_string = (
  912. "The combination of penalty='%s' and "
  913. "loss='%s' are not supported when dual=%s" % (penalty, loss, dual)
  914. )
  915. else:
  916. return solver_num
  917. raise ValueError(
  918. "Unsupported set of arguments: %s, Parameters: penalty=%r, loss=%r, dual=%r"
  919. % (error_string, penalty, loss, dual)
  920. )
  921. def _fit_liblinear(
  922. X,
  923. y,
  924. C,
  925. fit_intercept,
  926. intercept_scaling,
  927. class_weight,
  928. penalty,
  929. dual,
  930. verbose,
  931. max_iter,
  932. tol,
  933. random_state=None,
  934. multi_class="ovr",
  935. loss="logistic_regression",
  936. epsilon=0.1,
  937. sample_weight=None,
  938. ):
  939. """Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.
  940. Preprocessing is done in this function before supplying it to liblinear.
  941. Parameters
  942. ----------
  943. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  944. Training vector, where `n_samples` is the number of samples and
  945. `n_features` is the number of features.
  946. y : array-like of shape (n_samples,)
  947. Target vector relative to X
  948. C : float
  949. Inverse of cross-validation parameter. The lower the C, the higher
  950. the penalization.
  951. fit_intercept : bool
  952. Whether or not to fit an intercept. If set to True, the feature vector
  953. is extended to include an intercept term: ``[x_1, ..., x_n, 1]``, where
  954. 1 corresponds to the intercept. If set to False, no intercept will be
  955. used in calculations (i.e. data is expected to be already centered).
  956. intercept_scaling : float
  957. Liblinear internally penalizes the intercept, treating it like any
  958. other term in the feature vector. To reduce the impact of the
  959. regularization on the intercept, the `intercept_scaling` parameter can
  960. be set to a value greater than 1; the higher the value of
  961. `intercept_scaling`, the lower the impact of regularization on it.
  962. Then, the weights become `[w_x_1, ..., w_x_n,
  963. w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
  964. the feature weights and the intercept weight is scaled by
  965. `intercept_scaling`. This scaling allows the intercept term to have a
  966. different regularization behavior compared to the other features.
  967. class_weight : dict or 'balanced', default=None
  968. Weights associated with classes in the form ``{class_label: weight}``.
  969. If not given, all classes are supposed to have weight one. For
  970. multi-output problems, a list of dicts can be provided in the same
  971. order as the columns of y.
  972. The "balanced" mode uses the values of y to automatically adjust
  973. weights inversely proportional to class frequencies in the input data
  974. as ``n_samples / (n_classes * np.bincount(y))``
  975. penalty : {'l1', 'l2'}
  976. The norm of the penalty used in regularization.
  977. dual : bool
  978. Dual or primal formulation,
  979. verbose : int
  980. Set verbose to any positive number for verbosity.
  981. max_iter : int
  982. Number of iterations.
  983. tol : float
  984. Stopping condition.
  985. random_state : int, RandomState instance or None, default=None
  986. Controls the pseudo random number generation for shuffling the data.
  987. Pass an int for reproducible output across multiple function calls.
  988. See :term:`Glossary <random_state>`.
  989. multi_class : {'ovr', 'crammer_singer'}, default='ovr'
  990. `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`
  991. optimizes a joint objective over all classes.
  992. While `crammer_singer` is interesting from an theoretical perspective
  993. as it is consistent it is seldom used in practice and rarely leads to
  994. better accuracy and is more expensive to compute.
  995. If `crammer_singer` is chosen, the options loss, penalty and dual will
  996. be ignored.
  997. loss : {'logistic_regression', 'hinge', 'squared_hinge', \
  998. 'epsilon_insensitive', 'squared_epsilon_insensitive}, \
  999. default='logistic_regression'
  1000. The loss function used to fit the model.
  1001. epsilon : float, default=0.1
  1002. Epsilon parameter in the epsilon-insensitive loss function. Note
  1003. that the value of this parameter depends on the scale of the target
  1004. variable y. If unsure, set epsilon=0.
  1005. sample_weight : array-like of shape (n_samples,), default=None
  1006. Weights assigned to each sample.
  1007. Returns
  1008. -------
  1009. coef_ : ndarray of shape (n_features, n_features + 1)
  1010. The coefficient vector got by minimizing the objective function.
  1011. intercept_ : float
  1012. The intercept term added to the vector.
  1013. n_iter_ : array of int
  1014. Number of iterations run across for each class.
  1015. """
  1016. if loss not in ["epsilon_insensitive", "squared_epsilon_insensitive"]:
  1017. enc = LabelEncoder()
  1018. y_ind = enc.fit_transform(y)
  1019. classes_ = enc.classes_
  1020. if len(classes_) < 2:
  1021. raise ValueError(
  1022. "This solver needs samples of at least 2 classes"
  1023. " in the data, but the data contains only one"
  1024. " class: %r"
  1025. % classes_[0]
  1026. )
  1027. class_weight_ = compute_class_weight(class_weight, classes=classes_, y=y)
  1028. else:
  1029. class_weight_ = np.empty(0, dtype=np.float64)
  1030. y_ind = y
  1031. liblinear.set_verbosity_wrap(verbose)
  1032. rnd = check_random_state(random_state)
  1033. if verbose:
  1034. print("[LibLinear]", end="")
  1035. # LinearSVC breaks when intercept_scaling is <= 0
  1036. bias = -1.0
  1037. if fit_intercept:
  1038. if intercept_scaling <= 0:
  1039. raise ValueError(
  1040. "Intercept scaling is %r but needs to be greater "
  1041. "than 0. To disable fitting an intercept,"
  1042. " set fit_intercept=False." % intercept_scaling
  1043. )
  1044. else:
  1045. bias = intercept_scaling
  1046. libsvm.set_verbosity_wrap(verbose)
  1047. libsvm_sparse.set_verbosity_wrap(verbose)
  1048. liblinear.set_verbosity_wrap(verbose)
  1049. # Liblinear doesn't support 64bit sparse matrix indices yet
  1050. if sp.issparse(X):
  1051. _check_large_sparse(X)
  1052. # LibLinear wants targets as doubles, even for classification
  1053. y_ind = np.asarray(y_ind, dtype=np.float64).ravel()
  1054. y_ind = np.require(y_ind, requirements="W")
  1055. sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64)
  1056. solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
  1057. raw_coef_, n_iter_ = liblinear.train_wrap(
  1058. X,
  1059. y_ind,
  1060. sp.issparse(X),
  1061. solver_type,
  1062. tol,
  1063. bias,
  1064. C,
  1065. class_weight_,
  1066. max_iter,
  1067. rnd.randint(np.iinfo("i").max),
  1068. epsilon,
  1069. sample_weight,
  1070. )
  1071. # Regarding rnd.randint(..) in the above signature:
  1072. # seed for srand in range [0..INT_MAX); due to limitations in Numpy
  1073. # on 32-bit platforms, we can't get to the UINT_MAX limit that
  1074. # srand supports
  1075. n_iter_max = max(n_iter_)
  1076. if n_iter_max >= max_iter:
  1077. warnings.warn(
  1078. "Liblinear failed to converge, increase the number of iterations.",
  1079. ConvergenceWarning,
  1080. )
  1081. if fit_intercept:
  1082. coef_ = raw_coef_[:, :-1]
  1083. intercept_ = intercept_scaling * raw_coef_[:, -1]
  1084. else:
  1085. coef_ = raw_coef_
  1086. intercept_ = 0.0
  1087. return coef_, intercept_, n_iter_