| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114 |
- """
- Logistic Regression
- """
- # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
- # Fabian Pedregosa <f@bianp.net>
- # Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
- # Manoj Kumar <manojkumarsivaraj334@gmail.com>
- # Lars Buitinck
- # Simon Wu <s8wu@uwaterloo.ca>
- # Arthur Mensch <arthur.mensch@m4x.org
- import numbers
- import warnings
- from numbers import Integral, Real
- import numpy as np
- from joblib import effective_n_jobs
- from scipy import optimize
- from sklearn.metrics import get_scorer_names
- from .._loss.loss import HalfBinomialLoss, HalfMultinomialLoss
- from ..base import _fit_context
- from ..metrics import get_scorer
- from ..model_selection import check_cv
- from ..preprocessing import LabelBinarizer, LabelEncoder
- from ..svm._base import _fit_liblinear
- from ..utils import (
- check_array,
- check_consistent_length,
- check_random_state,
- compute_class_weight,
- )
- from ..utils._param_validation import Interval, StrOptions
- from ..utils.extmath import row_norms, softmax
- from ..utils.multiclass import check_classification_targets
- from ..utils.optimize import _check_optimize_result, _newton_cg
- from ..utils.parallel import Parallel, delayed
- from ..utils.validation import _check_sample_weight, check_is_fitted
- from ._base import BaseEstimator, LinearClassifierMixin, SparseCoefMixin
- from ._glm.glm import NewtonCholeskySolver
- from ._linear_loss import LinearModelLoss
- from ._sag import sag_solver
- _LOGISTIC_SOLVER_CONVERGENCE_MSG = (
- "Please also refer to the documentation for alternative solver options:\n"
- " https://scikit-learn.org/stable/modules/linear_model.html"
- "#logistic-regression"
- )
- def _check_solver(solver, penalty, dual):
- # TODO(1.4): Remove "none" option
- if solver not in ["liblinear", "saga"] and penalty not in ("l2", "none", None):
- raise ValueError(
- "Solver %s supports only 'l2' or 'none' penalties, got %s penalty."
- % (solver, penalty)
- )
- if solver != "liblinear" and dual:
- raise ValueError(
- "Solver %s supports only dual=False, got dual=%s" % (solver, dual)
- )
- if penalty == "elasticnet" and solver != "saga":
- raise ValueError(
- "Only 'saga' solver supports elasticnet penalty, got solver={}.".format(
- solver
- )
- )
- if solver == "liblinear" and penalty == "none":
- raise ValueError("penalty='none' is not supported for the liblinear solver")
- return solver
- def _check_multi_class(multi_class, solver, n_classes):
- """Computes the multi class type, either "multinomial" or "ovr".
- For `n_classes` > 2 and a solver that supports it, returns "multinomial".
- For all other cases, in particular binary classification, return "ovr".
- """
- if multi_class == "auto":
- if solver in ("liblinear", "newton-cholesky"):
- multi_class = "ovr"
- elif n_classes > 2:
- multi_class = "multinomial"
- else:
- multi_class = "ovr"
- if multi_class == "multinomial" and solver in ("liblinear", "newton-cholesky"):
- raise ValueError("Solver %s does not support a multinomial backend." % solver)
- return multi_class
- def _logistic_regression_path(
- X,
- y,
- pos_class=None,
- Cs=10,
- fit_intercept=True,
- max_iter=100,
- tol=1e-4,
- verbose=0,
- solver="lbfgs",
- coef=None,
- class_weight=None,
- dual=False,
- penalty="l2",
- intercept_scaling=1.0,
- multi_class="auto",
- random_state=None,
- check_input=True,
- max_squared_sum=None,
- sample_weight=None,
- l1_ratio=None,
- n_threads=1,
- ):
- """Compute a Logistic Regression model for a list of regularization
- parameters.
- This is an implementation that uses the result of the previous model
- to speed up computations along the set of solutions, making it faster
- than sequentially calling LogisticRegression for the different parameters.
- Note that there will be no speedup with liblinear solver, since it does
- not handle warm-starting.
- Read more in the :ref:`User Guide <logistic_regression>`.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Input data.
- y : array-like of shape (n_samples,) or (n_samples, n_targets)
- Input data, target values.
- pos_class : int, default=None
- The class with respect to which we perform a one-vs-all fit.
- If None, then it is assumed that the given problem is binary.
- Cs : int or array-like of shape (n_cs,), default=10
- List of values for the regularization parameter or integer specifying
- the number of regularization parameters that should be used. In this
- case, the parameters will be chosen in a logarithmic scale between
- 1e-4 and 1e4.
- fit_intercept : bool, default=True
- Whether to fit an intercept for the model. In this case the shape of
- the returned array is (n_cs, n_features + 1).
- max_iter : int, default=100
- Maximum number of iterations for the solver.
- tol : float, default=1e-4
- Stopping criterion. For the newton-cg and lbfgs solvers, the iteration
- will stop when ``max{|g_i | i = 1, ..., n} <= tol``
- where ``g_i`` is the i-th component of the gradient.
- verbose : int, default=0
- For the liblinear and lbfgs solvers set verbose to any positive
- number for verbosity.
- solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \
- default='lbfgs'
- Numerical solver to use.
- coef : array-like of shape (n_features,), default=None
- Initialization value for coefficients of logistic regression.
- Useless for liblinear solver.
- class_weight : dict or 'balanced', default=None
- Weights associated with classes in the form ``{class_label: weight}``.
- If not given, all classes are supposed to have weight one.
- The "balanced" mode uses the values of y to automatically adjust
- weights inversely proportional to class frequencies in the input data
- as ``n_samples / (n_classes * np.bincount(y))``.
- Note that these weights will be multiplied with sample_weight (passed
- through the fit method) if sample_weight is specified.
- dual : bool, default=False
- Dual or primal formulation. Dual formulation is only implemented for
- l2 penalty with liblinear solver. Prefer dual=False when
- n_samples > n_features.
- penalty : {'l1', 'l2', 'elasticnet'}, default='l2'
- Used to specify the norm used in the penalization. The 'newton-cg',
- 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is
- only supported by the 'saga' solver.
- intercept_scaling : float, default=1.
- Useful only when the solver 'liblinear' is used
- and self.fit_intercept is set to True. In this case, x becomes
- [x, self.intercept_scaling],
- i.e. a "synthetic" feature with constant value equal to
- intercept_scaling is appended to the instance vector.
- The intercept becomes ``intercept_scaling * synthetic_feature_weight``.
- Note! the synthetic feature weight is subject to l1/l2 regularization
- as all other features.
- To lessen the effect of regularization on synthetic feature weight
- (and therefore on the intercept) intercept_scaling has to be increased.
- multi_class : {'ovr', 'multinomial', 'auto'}, default='auto'
- If the option chosen is 'ovr', then a binary problem is fit for each
- label. For 'multinomial' the loss minimised is the multinomial loss fit
- across the entire probability distribution, *even when the data is
- binary*. 'multinomial' is unavailable when solver='liblinear'.
- 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',
- and otherwise selects 'multinomial'.
- .. versionadded:: 0.18
- Stochastic Average Gradient descent solver for 'multinomial' case.
- .. versionchanged:: 0.22
- Default changed from 'ovr' to 'auto' in 0.22.
- random_state : int, RandomState instance, default=None
- Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the
- data. See :term:`Glossary <random_state>` for details.
- check_input : bool, default=True
- If False, the input arrays X and y will not be checked.
- max_squared_sum : float, default=None
- Maximum squared sum of X over samples. Used only in SAG solver.
- If None, it will be computed, going through all the samples.
- The value should be precomputed to speed up cross validation.
- sample_weight : array-like of shape(n_samples,), default=None
- Array of weights that are assigned to individual samples.
- If not provided, then each sample is given unit weight.
- l1_ratio : float, default=None
- The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only
- used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent
- to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent
- to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a
- combination of L1 and L2.
- n_threads : int, default=1
- Number of OpenMP threads to use.
- Returns
- -------
- coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)
- List of coefficients for the Logistic Regression model. If
- fit_intercept is set to True then the second dimension will be
- n_features + 1, where the last item represents the intercept. For
- ``multiclass='multinomial'``, the shape is (n_classes, n_cs,
- n_features) or (n_classes, n_cs, n_features + 1).
- Cs : ndarray
- Grid of Cs used for cross-validation.
- n_iter : array of shape (n_cs,)
- Actual number of iteration for each Cs.
- Notes
- -----
- You might get slightly different results with the solver liblinear than
- with the others since this uses LIBLINEAR which penalizes the intercept.
- .. versionchanged:: 0.19
- The "copy" parameter was removed.
- """
- if isinstance(Cs, numbers.Integral):
- Cs = np.logspace(-4, 4, Cs)
- solver = _check_solver(solver, penalty, dual)
- # Preprocessing.
- if check_input:
- X = check_array(
- X,
- accept_sparse="csr",
- dtype=np.float64,
- accept_large_sparse=solver not in ["liblinear", "sag", "saga"],
- )
- y = check_array(y, ensure_2d=False, dtype=None)
- check_consistent_length(X, y)
- n_samples, n_features = X.shape
- classes = np.unique(y)
- random_state = check_random_state(random_state)
- multi_class = _check_multi_class(multi_class, solver, len(classes))
- if pos_class is None and multi_class != "multinomial":
- if classes.size > 2:
- raise ValueError("To fit OvR, use the pos_class argument")
- # np.unique(y) gives labels in sorted order.
- pos_class = classes[1]
- # If sample weights exist, convert them to array (support for lists)
- # and check length
- # Otherwise set them to 1 for all examples
- sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype, copy=True)
- if solver == "newton-cholesky":
- # IMPORTANT NOTE: Rescaling of sample_weight:
- # Same as in _GeneralizedLinearRegressor.fit().
- # We want to minimize
- # obj = 1/(2*sum(sample_weight)) * sum(sample_weight * deviance)
- # + 1/2 * alpha * L2,
- # with
- # deviance = 2 * log_loss.
- # The objective is invariant to multiplying sample_weight by a constant. We
- # choose this constant such that sum(sample_weight) = 1. Thus, we end up with
- # obj = sum(sample_weight * loss) + 1/2 * alpha * L2.
- # Note that LinearModelLoss.loss() computes sum(sample_weight * loss).
- #
- # This rescaling has to be done before multiplying by class_weights.
- sw_sum = sample_weight.sum() # needed to rescale penalty, nasty matter!
- sample_weight = sample_weight / sw_sum
- # If class_weights is a dict (provided by the user), the weights
- # are assigned to the original labels. If it is "balanced", then
- # the class_weights are assigned after masking the labels with a OvR.
- le = LabelEncoder()
- if isinstance(class_weight, dict) or multi_class == "multinomial":
- class_weight_ = compute_class_weight(class_weight, classes=classes, y=y)
- sample_weight *= class_weight_[le.fit_transform(y)]
- # For doing a ovr, we need to mask the labels first. For the
- # multinomial case this is not necessary.
- if multi_class == "ovr":
- w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype)
- mask = y == pos_class
- y_bin = np.ones(y.shape, dtype=X.dtype)
- if solver in ["lbfgs", "newton-cg", "newton-cholesky"]:
- # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead
- # of in [-1, 1].
- mask_classes = np.array([0, 1])
- y_bin[~mask] = 0.0
- else:
- mask_classes = np.array([-1, 1])
- y_bin[~mask] = -1.0
- # for compute_class_weight
- if class_weight == "balanced":
- class_weight_ = compute_class_weight(
- class_weight, classes=mask_classes, y=y_bin
- )
- sample_weight *= class_weight_[le.fit_transform(y_bin)]
- else:
- if solver in ["sag", "saga", "lbfgs", "newton-cg"]:
- # SAG, lbfgs and newton-cg multinomial solvers need LabelEncoder,
- # not LabelBinarizer, i.e. y as a 1d-array of integers.
- # LabelEncoder also saves memory compared to LabelBinarizer, especially
- # when n_classes is large.
- le = LabelEncoder()
- Y_multi = le.fit_transform(y).astype(X.dtype, copy=False)
- else:
- # For liblinear solver, apply LabelBinarizer, i.e. y is one-hot encoded.
- lbin = LabelBinarizer()
- Y_multi = lbin.fit_transform(y)
- if Y_multi.shape[1] == 1:
- Y_multi = np.hstack([1 - Y_multi, Y_multi])
- w0 = np.zeros(
- (classes.size, n_features + int(fit_intercept)), order="F", dtype=X.dtype
- )
- if coef is not None:
- # it must work both giving the bias term and not
- if multi_class == "ovr":
- if coef.size not in (n_features, w0.size):
- raise ValueError(
- "Initialization coef is of shape %d, expected shape %d or %d"
- % (coef.size, n_features, w0.size)
- )
- w0[: coef.size] = coef
- else:
- # For binary problems coef.shape[0] should be 1, otherwise it
- # should be classes.size.
- n_classes = classes.size
- if n_classes == 2:
- n_classes = 1
- if coef.shape[0] != n_classes or coef.shape[1] not in (
- n_features,
- n_features + 1,
- ):
- raise ValueError(
- "Initialization coef is of shape (%d, %d), expected "
- "shape (%d, %d) or (%d, %d)"
- % (
- coef.shape[0],
- coef.shape[1],
- classes.size,
- n_features,
- classes.size,
- n_features + 1,
- )
- )
- if n_classes == 1:
- w0[0, : coef.shape[1]] = -coef
- w0[1, : coef.shape[1]] = coef
- else:
- w0[:, : coef.shape[1]] = coef
- if multi_class == "multinomial":
- if solver in ["lbfgs", "newton-cg"]:
- # scipy.optimize.minimize and newton-cg accept only ravelled parameters,
- # i.e. 1d-arrays. LinearModelLoss expects classes to be contiguous and
- # reconstructs the 2d-array via w0.reshape((n_classes, -1), order="F").
- # As w0 is F-contiguous, ravel(order="F") also avoids a copy.
- w0 = w0.ravel(order="F")
- loss = LinearModelLoss(
- base_loss=HalfMultinomialLoss(n_classes=classes.size),
- fit_intercept=fit_intercept,
- )
- target = Y_multi
- if solver in "lbfgs":
- func = loss.loss_gradient
- elif solver == "newton-cg":
- func = loss.loss
- grad = loss.gradient
- hess = loss.gradient_hessian_product # hess = [gradient, hessp]
- warm_start_sag = {"coef": w0.T}
- else:
- target = y_bin
- if solver == "lbfgs":
- loss = LinearModelLoss(
- base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept
- )
- func = loss.loss_gradient
- elif solver == "newton-cg":
- loss = LinearModelLoss(
- base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept
- )
- func = loss.loss
- grad = loss.gradient
- hess = loss.gradient_hessian_product # hess = [gradient, hessp]
- elif solver == "newton-cholesky":
- loss = LinearModelLoss(
- base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept
- )
- warm_start_sag = {"coef": np.expand_dims(w0, axis=1)}
- coefs = list()
- n_iter = np.zeros(len(Cs), dtype=np.int32)
- for i, C in enumerate(Cs):
- if solver == "lbfgs":
- l2_reg_strength = 1.0 / C
- iprint = [-1, 50, 1, 100, 101][
- np.searchsorted(np.array([0, 1, 2, 3]), verbose)
- ]
- opt_res = optimize.minimize(
- func,
- w0,
- method="L-BFGS-B",
- jac=True,
- args=(X, target, sample_weight, l2_reg_strength, n_threads),
- options={"iprint": iprint, "gtol": tol, "maxiter": max_iter},
- )
- n_iter_i = _check_optimize_result(
- solver,
- opt_res,
- max_iter,
- extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,
- )
- w0, loss = opt_res.x, opt_res.fun
- elif solver == "newton-cg":
- l2_reg_strength = 1.0 / C
- args = (X, target, sample_weight, l2_reg_strength, n_threads)
- w0, n_iter_i = _newton_cg(
- hess, func, grad, w0, args=args, maxiter=max_iter, tol=tol
- )
- elif solver == "newton-cholesky":
- # The division by sw_sum is a consequence of the rescaling of
- # sample_weight, see comment above.
- l2_reg_strength = 1.0 / C / sw_sum
- sol = NewtonCholeskySolver(
- coef=w0,
- linear_loss=loss,
- l2_reg_strength=l2_reg_strength,
- tol=tol,
- max_iter=max_iter,
- n_threads=n_threads,
- verbose=verbose,
- )
- w0 = sol.solve(X=X, y=target, sample_weight=sample_weight)
- n_iter_i = sol.iteration
- elif solver == "liblinear":
- (
- coef_,
- intercept_,
- n_iter_i,
- ) = _fit_liblinear(
- X,
- target,
- C,
- fit_intercept,
- intercept_scaling,
- None,
- penalty,
- dual,
- verbose,
- max_iter,
- tol,
- random_state,
- sample_weight=sample_weight,
- )
- if fit_intercept:
- w0 = np.concatenate([coef_.ravel(), intercept_])
- else:
- w0 = coef_.ravel()
- # n_iter_i is an array for each class. However, `target` is always encoded
- # in {-1, 1}, so we only take the first element of n_iter_i.
- n_iter_i = n_iter_i.item()
- elif solver in ["sag", "saga"]:
- if multi_class == "multinomial":
- target = target.astype(X.dtype, copy=False)
- loss = "multinomial"
- else:
- loss = "log"
- # alpha is for L2-norm, beta is for L1-norm
- if penalty == "l1":
- alpha = 0.0
- beta = 1.0 / C
- elif penalty == "l2":
- alpha = 1.0 / C
- beta = 0.0
- else: # Elastic-Net penalty
- alpha = (1.0 / C) * (1 - l1_ratio)
- beta = (1.0 / C) * l1_ratio
- w0, n_iter_i, warm_start_sag = sag_solver(
- X,
- target,
- sample_weight,
- loss,
- alpha,
- beta,
- max_iter,
- tol,
- verbose,
- random_state,
- False,
- max_squared_sum,
- warm_start_sag,
- is_saga=(solver == "saga"),
- )
- else:
- raise ValueError(
- "solver must be one of {'liblinear', 'lbfgs', "
- "'newton-cg', 'sag'}, got '%s' instead" % solver
- )
- if multi_class == "multinomial":
- n_classes = max(2, classes.size)
- if solver in ["lbfgs", "newton-cg"]:
- multi_w0 = np.reshape(w0, (n_classes, -1), order="F")
- else:
- multi_w0 = w0
- if n_classes == 2:
- multi_w0 = multi_w0[1][np.newaxis, :]
- coefs.append(multi_w0.copy())
- else:
- coefs.append(w0.copy())
- n_iter[i] = n_iter_i
- return np.array(coefs), np.array(Cs), n_iter
- # helper function for LogisticCV
- def _log_reg_scoring_path(
- X,
- y,
- train,
- test,
- pos_class=None,
- Cs=10,
- scoring=None,
- fit_intercept=False,
- max_iter=100,
- tol=1e-4,
- class_weight=None,
- verbose=0,
- solver="lbfgs",
- penalty="l2",
- dual=False,
- intercept_scaling=1.0,
- multi_class="auto",
- random_state=None,
- max_squared_sum=None,
- sample_weight=None,
- l1_ratio=None,
- ):
- """Computes scores across logistic_regression_path
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Training data.
- y : array-like of shape (n_samples,) or (n_samples, n_targets)
- Target labels.
- train : list of indices
- The indices of the train set.
- test : list of indices
- The indices of the test set.
- pos_class : int, default=None
- The class with respect to which we perform a one-vs-all fit.
- If None, then it is assumed that the given problem is binary.
- Cs : int or list of floats, default=10
- Each of the values in Cs describes the inverse of
- regularization strength. If Cs is as an int, then a grid of Cs
- values are chosen in a logarithmic scale between 1e-4 and 1e4.
- If not provided, then a fixed set of values for Cs are used.
- scoring : callable, default=None
- A string (see model evaluation documentation) or
- a scorer callable object / function with signature
- ``scorer(estimator, X, y)``. For a list of scoring functions
- that can be used, look at :mod:`sklearn.metrics`. The
- default scoring option used is accuracy_score.
- fit_intercept : bool, default=False
- If False, then the bias term is set to zero. Else the last
- term of each coef_ gives us the intercept.
- max_iter : int, default=100
- Maximum number of iterations for the solver.
- tol : float, default=1e-4
- Tolerance for stopping criteria.
- class_weight : dict or 'balanced', default=None
- Weights associated with classes in the form ``{class_label: weight}``.
- If not given, all classes are supposed to have weight one.
- The "balanced" mode uses the values of y to automatically adjust
- weights inversely proportional to class frequencies in the input data
- as ``n_samples / (n_classes * np.bincount(y))``
- Note that these weights will be multiplied with sample_weight (passed
- through the fit method) if sample_weight is specified.
- verbose : int, default=0
- For the liblinear and lbfgs solvers set verbose to any positive
- number for verbosity.
- solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \
- default='lbfgs'
- Decides which solver to use.
- penalty : {'l1', 'l2', 'elasticnet'}, default='l2'
- Used to specify the norm used in the penalization. The 'newton-cg',
- 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is
- only supported by the 'saga' solver.
- dual : bool, default=False
- Dual or primal formulation. Dual formulation is only implemented for
- l2 penalty with liblinear solver. Prefer dual=False when
- n_samples > n_features.
- intercept_scaling : float, default=1.
- Useful only when the solver 'liblinear' is used
- and self.fit_intercept is set to True. In this case, x becomes
- [x, self.intercept_scaling],
- i.e. a "synthetic" feature with constant value equals to
- intercept_scaling is appended to the instance vector.
- The intercept becomes intercept_scaling * synthetic feature weight
- Note! the synthetic feature weight is subject to l1/l2 regularization
- as all other features.
- To lessen the effect of regularization on synthetic feature weight
- (and therefore on the intercept) intercept_scaling has to be increased.
- multi_class : {'auto', 'ovr', 'multinomial'}, default='auto'
- If the option chosen is 'ovr', then a binary problem is fit for each
- label. For 'multinomial' the loss minimised is the multinomial loss fit
- across the entire probability distribution, *even when the data is
- binary*. 'multinomial' is unavailable when solver='liblinear'.
- random_state : int, RandomState instance, default=None
- Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the
- data. See :term:`Glossary <random_state>` for details.
- max_squared_sum : float, default=None
- Maximum squared sum of X over samples. Used only in SAG solver.
- If None, it will be computed, going through all the samples.
- The value should be precomputed to speed up cross validation.
- sample_weight : array-like of shape(n_samples,), default=None
- Array of weights that are assigned to individual samples.
- If not provided, then each sample is given unit weight.
- l1_ratio : float, default=None
- The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only
- used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent
- to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent
- to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a
- combination of L1 and L2.
- Returns
- -------
- coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)
- List of coefficients for the Logistic Regression model. If
- fit_intercept is set to True then the second dimension will be
- n_features + 1, where the last item represents the intercept.
- Cs : ndarray
- Grid of Cs used for cross-validation.
- scores : ndarray of shape (n_cs,)
- Scores obtained for each Cs.
- n_iter : ndarray of shape(n_cs,)
- Actual number of iteration for each Cs.
- """
- X_train = X[train]
- X_test = X[test]
- y_train = y[train]
- y_test = y[test]
- if sample_weight is not None:
- sample_weight = _check_sample_weight(sample_weight, X)
- sample_weight = sample_weight[train]
- coefs, Cs, n_iter = _logistic_regression_path(
- X_train,
- y_train,
- Cs=Cs,
- l1_ratio=l1_ratio,
- fit_intercept=fit_intercept,
- solver=solver,
- max_iter=max_iter,
- class_weight=class_weight,
- pos_class=pos_class,
- multi_class=multi_class,
- tol=tol,
- verbose=verbose,
- dual=dual,
- penalty=penalty,
- intercept_scaling=intercept_scaling,
- random_state=random_state,
- check_input=False,
- max_squared_sum=max_squared_sum,
- sample_weight=sample_weight,
- )
- log_reg = LogisticRegression(solver=solver, multi_class=multi_class)
- # The score method of Logistic Regression has a classes_ attribute.
- if multi_class == "ovr":
- log_reg.classes_ = np.array([-1, 1])
- elif multi_class == "multinomial":
- log_reg.classes_ = np.unique(y_train)
- else:
- raise ValueError(
- "multi_class should be either multinomial or ovr, got %d" % multi_class
- )
- if pos_class is not None:
- mask = y_test == pos_class
- y_test = np.ones(y_test.shape, dtype=np.float64)
- y_test[~mask] = -1.0
- scores = list()
- scoring = get_scorer(scoring)
- for w in coefs:
- if multi_class == "ovr":
- w = w[np.newaxis, :]
- if fit_intercept:
- log_reg.coef_ = w[:, :-1]
- log_reg.intercept_ = w[:, -1]
- else:
- log_reg.coef_ = w
- log_reg.intercept_ = 0.0
- if scoring is None:
- scores.append(log_reg.score(X_test, y_test))
- else:
- scores.append(scoring(log_reg, X_test, y_test))
- return coefs, Cs, np.array(scores), n_iter
- class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
- """
- Logistic Regression (aka logit, MaxEnt) classifier.
- In the multiclass case, the training algorithm uses the one-vs-rest (OvR)
- scheme if the 'multi_class' option is set to 'ovr', and uses the
- cross-entropy loss if the 'multi_class' option is set to 'multinomial'.
- (Currently the 'multinomial' option is supported only by the 'lbfgs',
- 'sag', 'saga' and 'newton-cg' solvers.)
- This class implements regularized logistic regression using the
- 'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note
- that regularization is applied by default**. It can handle both dense
- and sparse input. Use C-ordered arrays or CSR matrices containing 64-bit
- floats for optimal performance; any other input format will be converted
- (and copied).
- The 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization
- with primal formulation, or no regularization. The 'liblinear' solver
- supports both L1 and L2 regularization, with a dual formulation only for
- the L2 penalty. The Elastic-Net regularization is only supported by the
- 'saga' solver.
- Read more in the :ref:`User Guide <logistic_regression>`.
- Parameters
- ----------
- penalty : {'l1', 'l2', 'elasticnet', None}, default='l2'
- Specify the norm of the penalty:
- - `None`: no penalty is added;
- - `'l2'`: add a L2 penalty term and it is the default choice;
- - `'l1'`: add a L1 penalty term;
- - `'elasticnet'`: both L1 and L2 penalty terms are added.
- .. warning::
- Some penalties may not work with some solvers. See the parameter
- `solver` below, to know the compatibility between the penalty and
- solver.
- .. versionadded:: 0.19
- l1 penalty with SAGA solver (allowing 'multinomial' + L1)
- .. deprecated:: 1.2
- The 'none' option was deprecated in version 1.2, and will be removed
- in 1.4. Use `None` instead.
- dual : bool, default=False
- Dual (constrained) or primal (regularized, see also
- :ref:`this equation <regularized-logistic-loss>`) formulation. Dual formulation
- is only implemented for l2 penalty with liblinear solver. Prefer dual=False when
- n_samples > n_features.
- tol : float, default=1e-4
- Tolerance for stopping criteria.
- C : float, default=1.0
- Inverse of regularization strength; must be a positive float.
- Like in support vector machines, smaller values specify stronger
- regularization.
- fit_intercept : bool, default=True
- Specifies if a constant (a.k.a. bias or intercept) should be
- added to the decision function.
- intercept_scaling : float, default=1
- Useful only when the solver 'liblinear' is used
- and self.fit_intercept is set to True. In this case, x becomes
- [x, self.intercept_scaling],
- i.e. a "synthetic" feature with constant value equal to
- intercept_scaling is appended to the instance vector.
- The intercept becomes ``intercept_scaling * synthetic_feature_weight``.
- Note! the synthetic feature weight is subject to l1/l2 regularization
- as all other features.
- To lessen the effect of regularization on synthetic feature weight
- (and therefore on the intercept) intercept_scaling has to be increased.
- class_weight : dict or 'balanced', default=None
- Weights associated with classes in the form ``{class_label: weight}``.
- If not given, all classes are supposed to have weight one.
- The "balanced" mode uses the values of y to automatically adjust
- weights inversely proportional to class frequencies in the input data
- as ``n_samples / (n_classes * np.bincount(y))``.
- Note that these weights will be multiplied with sample_weight (passed
- through the fit method) if sample_weight is specified.
- .. versionadded:: 0.17
- *class_weight='balanced'*
- random_state : int, RandomState instance, default=None
- Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the
- data. See :term:`Glossary <random_state>` for details.
- solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \
- default='lbfgs'
- Algorithm to use in the optimization problem. Default is 'lbfgs'.
- To choose a solver, you might want to consider the following aspects:
- - For small datasets, 'liblinear' is a good choice, whereas 'sag'
- and 'saga' are faster for large ones;
- - For multiclass problems, only 'newton-cg', 'sag', 'saga' and
- 'lbfgs' handle multinomial loss;
- - 'liblinear' is limited to one-versus-rest schemes.
- - 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,
- especially with one-hot encoded categorical features with rare
- categories. Note that it is limited to binary classification and the
- one-versus-rest reduction for multiclass classification. Be aware that
- the memory usage of this solver has a quadratic dependency on
- `n_features` because it explicitly computes the Hessian matrix.
- .. warning::
- The choice of the algorithm depends on the penalty chosen.
- Supported penalties by solver:
- - 'lbfgs' - ['l2', None]
- - 'liblinear' - ['l1', 'l2']
- - 'newton-cg' - ['l2', None]
- - 'newton-cholesky' - ['l2', None]
- - 'sag' - ['l2', None]
- - 'saga' - ['elasticnet', 'l1', 'l2', None]
- .. note::
- 'sag' and 'saga' fast convergence is only guaranteed on features
- with approximately the same scale. You can preprocess the data with
- a scaler from :mod:`sklearn.preprocessing`.
- .. seealso::
- Refer to the User Guide for more information regarding
- :class:`LogisticRegression` and more specifically the
- :ref:`Table <Logistic_regression>`
- summarizing solver/penalty supports.
- .. versionadded:: 0.17
- Stochastic Average Gradient descent solver.
- .. versionadded:: 0.19
- SAGA solver.
- .. versionchanged:: 0.22
- The default solver changed from 'liblinear' to 'lbfgs' in 0.22.
- .. versionadded:: 1.2
- newton-cholesky solver.
- max_iter : int, default=100
- Maximum number of iterations taken for the solvers to converge.
- multi_class : {'auto', 'ovr', 'multinomial'}, default='auto'
- If the option chosen is 'ovr', then a binary problem is fit for each
- label. For 'multinomial' the loss minimised is the multinomial loss fit
- across the entire probability distribution, *even when the data is
- binary*. 'multinomial' is unavailable when solver='liblinear'.
- 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',
- and otherwise selects 'multinomial'.
- .. versionadded:: 0.18
- Stochastic Average Gradient descent solver for 'multinomial' case.
- .. versionchanged:: 0.22
- Default changed from 'ovr' to 'auto' in 0.22.
- verbose : int, default=0
- For the liblinear and lbfgs solvers set verbose to any positive
- number for verbosity.
- warm_start : bool, default=False
- When set to True, reuse the solution of the previous call to fit as
- initialization, otherwise, just erase the previous solution.
- Useless for liblinear solver. See :term:`the Glossary <warm_start>`.
- .. versionadded:: 0.17
- *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.
- n_jobs : int, default=None
- Number of CPU cores used when parallelizing over classes if
- multi_class='ovr'". This parameter is ignored when the ``solver`` is
- set to 'liblinear' regardless of whether 'multi_class' is specified or
- not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`
- context. ``-1`` means using all processors.
- See :term:`Glossary <n_jobs>` for more details.
- l1_ratio : float, default=None
- The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only
- used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent
- to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent
- to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a
- combination of L1 and L2.
- Attributes
- ----------
- classes_ : ndarray of shape (n_classes, )
- A list of class labels known to the classifier.
- coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)
- Coefficient of the features in the decision function.
- `coef_` is of shape (1, n_features) when the given problem is binary.
- In particular, when `multi_class='multinomial'`, `coef_` corresponds
- to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).
- intercept_ : ndarray of shape (1,) or (n_classes,)
- Intercept (a.k.a. bias) added to the decision function.
- If `fit_intercept` is set to False, the intercept is set to zero.
- `intercept_` is of shape (1,) when the given problem is binary.
- In particular, when `multi_class='multinomial'`, `intercept_`
- corresponds to outcome 1 (True) and `-intercept_` corresponds to
- outcome 0 (False).
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- n_iter_ : ndarray of shape (n_classes,) or (1, )
- Actual number of iterations for all classes. If binary or multinomial,
- it returns only 1 element. For liblinear solver, only the maximum
- number of iteration across all classes is given.
- .. versionchanged:: 0.20
- In SciPy <= 1.0.0 the number of lbfgs iterations may exceed
- ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.
- See Also
- --------
- SGDClassifier : Incrementally trained logistic regression (when given
- the parameter ``loss="log_loss"``).
- LogisticRegressionCV : Logistic regression with built-in cross validation.
- Notes
- -----
- The underlying C implementation uses a random number generator to
- select features when fitting the model. It is thus not uncommon,
- to have slightly different results for the same input data. If
- that happens, try with a smaller tol parameter.
- Predict output may not match that of standalone liblinear in certain
- cases. See :ref:`differences from liblinear <liblinear_differences>`
- in the narrative documentation.
- References
- ----------
- L-BFGS-B -- Software for Large-scale Bound-constrained Optimization
- Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.
- http://users.iems.northwestern.edu/~nocedal/lbfgsb.html
- LIBLINEAR -- A Library for Large Linear Classification
- https://www.csie.ntu.edu.tw/~cjlin/liblinear/
- SAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach
- Minimizing Finite Sums with the Stochastic Average Gradient
- https://hal.inria.fr/hal-00860051/document
- SAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).
- :arxiv:`"SAGA: A Fast Incremental Gradient Method With Support
- for Non-Strongly Convex Composite Objectives" <1407.0202>`
- Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent
- methods for logistic regression and maximum entropy models.
- Machine Learning 85(1-2):41-75.
- https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf
- Examples
- --------
- >>> from sklearn.datasets import load_iris
- >>> from sklearn.linear_model import LogisticRegression
- >>> X, y = load_iris(return_X_y=True)
- >>> clf = LogisticRegression(random_state=0).fit(X, y)
- >>> clf.predict(X[:2, :])
- array([0, 0])
- >>> clf.predict_proba(X[:2, :])
- array([[9.8...e-01, 1.8...e-02, 1.4...e-08],
- [9.7...e-01, 2.8...e-02, ...e-08]])
- >>> clf.score(X, y)
- 0.97...
- """
- _parameter_constraints: dict = {
- # TODO(1.4): Remove "none" option
- "penalty": [
- StrOptions({"l1", "l2", "elasticnet", "none"}, deprecated={"none"}),
- None,
- ],
- "dual": ["boolean"],
- "tol": [Interval(Real, 0, None, closed="left")],
- "C": [Interval(Real, 0, None, closed="right")],
- "fit_intercept": ["boolean"],
- "intercept_scaling": [Interval(Real, 0, None, closed="neither")],
- "class_weight": [dict, StrOptions({"balanced"}), None],
- "random_state": ["random_state"],
- "solver": [
- StrOptions(
- {"lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag", "saga"}
- )
- ],
- "max_iter": [Interval(Integral, 0, None, closed="left")],
- "multi_class": [StrOptions({"auto", "ovr", "multinomial"})],
- "verbose": ["verbose"],
- "warm_start": ["boolean"],
- "n_jobs": [None, Integral],
- "l1_ratio": [Interval(Real, 0, 1, closed="both"), None],
- }
- def __init__(
- self,
- penalty="l2",
- *,
- dual=False,
- tol=1e-4,
- C=1.0,
- fit_intercept=True,
- intercept_scaling=1,
- class_weight=None,
- random_state=None,
- solver="lbfgs",
- max_iter=100,
- multi_class="auto",
- verbose=0,
- warm_start=False,
- n_jobs=None,
- l1_ratio=None,
- ):
- self.penalty = penalty
- self.dual = dual
- self.tol = tol
- self.C = C
- self.fit_intercept = fit_intercept
- self.intercept_scaling = intercept_scaling
- self.class_weight = class_weight
- self.random_state = random_state
- self.solver = solver
- self.max_iter = max_iter
- self.multi_class = multi_class
- self.verbose = verbose
- self.warm_start = warm_start
- self.n_jobs = n_jobs
- self.l1_ratio = l1_ratio
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y, sample_weight=None):
- """
- Fit the model according to the given training data.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Training vector, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- y : array-like of shape (n_samples,)
- Target vector relative to X.
- sample_weight : array-like of shape (n_samples,) default=None
- Array of weights that are assigned to individual samples.
- If not provided, then each sample is given unit weight.
- .. versionadded:: 0.17
- *sample_weight* support to LogisticRegression.
- Returns
- -------
- self
- Fitted estimator.
- Notes
- -----
- The SAGA solver supports both float64 and float32 bit arrays.
- """
- solver = _check_solver(self.solver, self.penalty, self.dual)
- if self.penalty != "elasticnet" and self.l1_ratio is not None:
- warnings.warn(
- "l1_ratio parameter is only used when penalty is "
- "'elasticnet'. Got "
- "(penalty={})".format(self.penalty)
- )
- if self.penalty == "elasticnet" and self.l1_ratio is None:
- raise ValueError("l1_ratio must be specified when penalty is elasticnet.")
- # TODO(1.4): Remove "none" option
- if self.penalty == "none":
- warnings.warn(
- (
- "`penalty='none'`has been deprecated in 1.2 and will be removed in"
- " 1.4. To keep the past behaviour, set `penalty=None`."
- ),
- FutureWarning,
- )
- if self.penalty is None or self.penalty == "none":
- if self.C != 1.0: # default values
- warnings.warn(
- "Setting penalty=None will ignore the C and l1_ratio parameters"
- )
- # Note that check for l1_ratio is done right above
- C_ = np.inf
- penalty = "l2"
- else:
- C_ = self.C
- penalty = self.penalty
- if solver == "lbfgs":
- _dtype = np.float64
- else:
- _dtype = [np.float64, np.float32]
- X, y = self._validate_data(
- X,
- y,
- accept_sparse="csr",
- dtype=_dtype,
- order="C",
- accept_large_sparse=solver not in ["liblinear", "sag", "saga"],
- )
- check_classification_targets(y)
- self.classes_ = np.unique(y)
- multi_class = _check_multi_class(self.multi_class, solver, len(self.classes_))
- if solver == "liblinear":
- if effective_n_jobs(self.n_jobs) != 1:
- warnings.warn(
- "'n_jobs' > 1 does not have any effect when"
- " 'solver' is set to 'liblinear'. Got 'n_jobs'"
- " = {}.".format(effective_n_jobs(self.n_jobs))
- )
- self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
- X,
- y,
- self.C,
- self.fit_intercept,
- self.intercept_scaling,
- self.class_weight,
- self.penalty,
- self.dual,
- self.verbose,
- self.max_iter,
- self.tol,
- self.random_state,
- sample_weight=sample_weight,
- )
- return self
- if solver in ["sag", "saga"]:
- max_squared_sum = row_norms(X, squared=True).max()
- else:
- max_squared_sum = None
- n_classes = len(self.classes_)
- classes_ = self.classes_
- if n_classes < 2:
- raise ValueError(
- "This solver needs samples of at least 2 classes"
- " in the data, but the data contains only one"
- " class: %r"
- % classes_[0]
- )
- if len(self.classes_) == 2:
- n_classes = 1
- classes_ = classes_[1:]
- if self.warm_start:
- warm_start_coef = getattr(self, "coef_", None)
- else:
- warm_start_coef = None
- if warm_start_coef is not None and self.fit_intercept:
- warm_start_coef = np.append(
- warm_start_coef, self.intercept_[:, np.newaxis], axis=1
- )
- # Hack so that we iterate only once for the multinomial case.
- if multi_class == "multinomial":
- classes_ = [None]
- warm_start_coef = [warm_start_coef]
- if warm_start_coef is None:
- warm_start_coef = [None] * n_classes
- path_func = delayed(_logistic_regression_path)
- # The SAG solver releases the GIL so it's more efficient to use
- # threads for this solver.
- if solver in ["sag", "saga"]:
- prefer = "threads"
- else:
- prefer = "processes"
- # TODO: Refactor this to avoid joblib parallelism entirely when doing binary
- # and multinomial multiclass classification and use joblib only for the
- # one-vs-rest multiclass case.
- if (
- solver in ["lbfgs", "newton-cg", "newton-cholesky"]
- and len(classes_) == 1
- and effective_n_jobs(self.n_jobs) == 1
- ):
- # In the future, we would like n_threads = _openmp_effective_n_threads()
- # For the time being, we just do
- n_threads = 1
- else:
- n_threads = 1
- fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(
- path_func(
- X,
- y,
- pos_class=class_,
- Cs=[C_],
- l1_ratio=self.l1_ratio,
- fit_intercept=self.fit_intercept,
- tol=self.tol,
- verbose=self.verbose,
- solver=solver,
- multi_class=multi_class,
- max_iter=self.max_iter,
- class_weight=self.class_weight,
- check_input=False,
- random_state=self.random_state,
- coef=warm_start_coef_,
- penalty=penalty,
- max_squared_sum=max_squared_sum,
- sample_weight=sample_weight,
- n_threads=n_threads,
- )
- for class_, warm_start_coef_ in zip(classes_, warm_start_coef)
- )
- fold_coefs_, _, n_iter_ = zip(*fold_coefs_)
- self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]
- n_features = X.shape[1]
- if multi_class == "multinomial":
- self.coef_ = fold_coefs_[0][0]
- else:
- self.coef_ = np.asarray(fold_coefs_)
- self.coef_ = self.coef_.reshape(
- n_classes, n_features + int(self.fit_intercept)
- )
- if self.fit_intercept:
- self.intercept_ = self.coef_[:, -1]
- self.coef_ = self.coef_[:, :-1]
- else:
- self.intercept_ = np.zeros(n_classes)
- return self
- def predict_proba(self, X):
- """
- Probability estimates.
- The returned estimates for all classes are ordered by the
- label of classes.
- For a multi_class problem, if multi_class is set to be "multinomial"
- the softmax function is used to find the predicted probability of
- each class.
- Else use a one-vs-rest approach, i.e calculate the probability
- of each class assuming it to be positive using the logistic function.
- and normalize these values across all the classes.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Vector to be scored, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- Returns
- -------
- T : array-like of shape (n_samples, n_classes)
- Returns the probability of the sample for each class in the model,
- where classes are ordered as they are in ``self.classes_``.
- """
- check_is_fitted(self)
- ovr = self.multi_class in ["ovr", "warn"] or (
- self.multi_class == "auto"
- and (
- self.classes_.size <= 2
- or self.solver in ("liblinear", "newton-cholesky")
- )
- )
- if ovr:
- return super()._predict_proba_lr(X)
- else:
- decision = self.decision_function(X)
- if decision.ndim == 1:
- # Workaround for multi_class="multinomial" and binary outcomes
- # which requires softmax prediction with only a 1D decision.
- decision_2d = np.c_[-decision, decision]
- else:
- decision_2d = decision
- return softmax(decision_2d, copy=False)
- def predict_log_proba(self, X):
- """
- Predict logarithm of probability estimates.
- The returned estimates for all classes are ordered by the
- label of classes.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Vector to be scored, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- Returns
- -------
- T : array-like of shape (n_samples, n_classes)
- Returns the log-probability of the sample for each class in the
- model, where classes are ordered as they are in ``self.classes_``.
- """
- return np.log(self.predict_proba(X))
- class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstimator):
- """Logistic Regression CV (aka logit, MaxEnt) classifier.
- See glossary entry for :term:`cross-validation estimator`.
- This class implements logistic regression using liblinear, newton-cg, sag
- of lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2
- regularization with primal formulation. The liblinear solver supports both
- L1 and L2 regularization, with a dual formulation only for the L2 penalty.
- Elastic-Net penalty is only supported by the saga solver.
- For the grid of `Cs` values and `l1_ratios` values, the best hyperparameter
- is selected by the cross-validator
- :class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed
- using the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'
- solvers can warm-start the coefficients (see :term:`Glossary<warm_start>`).
- Read more in the :ref:`User Guide <logistic_regression>`.
- Parameters
- ----------
- Cs : int or list of floats, default=10
- Each of the values in Cs describes the inverse of regularization
- strength. If Cs is as an int, then a grid of Cs values are chosen
- in a logarithmic scale between 1e-4 and 1e4.
- Like in support vector machines, smaller values specify stronger
- regularization.
- fit_intercept : bool, default=True
- Specifies if a constant (a.k.a. bias or intercept) should be
- added to the decision function.
- cv : int or cross-validation generator, default=None
- The default cross-validation generator used is Stratified K-Folds.
- If an integer is provided, then it is the number of folds used.
- See the module :mod:`sklearn.model_selection` module for the
- list of possible cross-validation objects.
- .. versionchanged:: 0.22
- ``cv`` default value if None changed from 3-fold to 5-fold.
- dual : bool, default=False
- Dual (constrained) or primal (regularized, see also
- :ref:`this equation <regularized-logistic-loss>`) formulation. Dual formulation
- is only implemented for l2 penalty with liblinear solver. Prefer dual=False when
- n_samples > n_features.
- penalty : {'l1', 'l2', 'elasticnet'}, default='l2'
- Specify the norm of the penalty:
- - `'l2'`: add a L2 penalty term (used by default);
- - `'l1'`: add a L1 penalty term;
- - `'elasticnet'`: both L1 and L2 penalty terms are added.
- .. warning::
- Some penalties may not work with some solvers. See the parameter
- `solver` below, to know the compatibility between the penalty and
- solver.
- scoring : str or callable, default=None
- A string (see model evaluation documentation) or
- a scorer callable object / function with signature
- ``scorer(estimator, X, y)``. For a list of scoring functions
- that can be used, look at :mod:`sklearn.metrics`. The
- default scoring option used is 'accuracy'.
- solver : {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, \
- default='lbfgs'
- Algorithm to use in the optimization problem. Default is 'lbfgs'.
- To choose a solver, you might want to consider the following aspects:
- - For small datasets, 'liblinear' is a good choice, whereas 'sag'
- and 'saga' are faster for large ones;
- - For multiclass problems, only 'newton-cg', 'sag', 'saga' and
- 'lbfgs' handle multinomial loss;
- - 'liblinear' might be slower in :class:`LogisticRegressionCV`
- because it does not handle warm-starting. 'liblinear' is
- limited to one-versus-rest schemes.
- - 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,
- especially with one-hot encoded categorical features with rare
- categories. Note that it is limited to binary classification and the
- one-versus-rest reduction for multiclass classification. Be aware that
- the memory usage of this solver has a quadratic dependency on
- `n_features` because it explicitly computes the Hessian matrix.
- .. warning::
- The choice of the algorithm depends on the penalty chosen.
- Supported penalties by solver:
- - 'lbfgs' - ['l2']
- - 'liblinear' - ['l1', 'l2']
- - 'newton-cg' - ['l2']
- - 'newton-cholesky' - ['l2']
- - 'sag' - ['l2']
- - 'saga' - ['elasticnet', 'l1', 'l2']
- .. note::
- 'sag' and 'saga' fast convergence is only guaranteed on features
- with approximately the same scale. You can preprocess the data with
- a scaler from :mod:`sklearn.preprocessing`.
- .. versionadded:: 0.17
- Stochastic Average Gradient descent solver.
- .. versionadded:: 0.19
- SAGA solver.
- .. versionadded:: 1.2
- newton-cholesky solver.
- tol : float, default=1e-4
- Tolerance for stopping criteria.
- max_iter : int, default=100
- Maximum number of iterations of the optimization algorithm.
- class_weight : dict or 'balanced', default=None
- Weights associated with classes in the form ``{class_label: weight}``.
- If not given, all classes are supposed to have weight one.
- The "balanced" mode uses the values of y to automatically adjust
- weights inversely proportional to class frequencies in the input data
- as ``n_samples / (n_classes * np.bincount(y))``.
- Note that these weights will be multiplied with sample_weight (passed
- through the fit method) if sample_weight is specified.
- .. versionadded:: 0.17
- class_weight == 'balanced'
- n_jobs : int, default=None
- Number of CPU cores used during the cross-validation loop.
- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
- ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
- for more details.
- verbose : int, default=0
- For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any
- positive number for verbosity.
- refit : bool, default=True
- If set to True, the scores are averaged across all folds, and the
- coefs and the C that corresponds to the best score is taken, and a
- final refit is done using these parameters.
- Otherwise the coefs, intercepts and C that correspond to the
- best scores across folds are averaged.
- intercept_scaling : float, default=1
- Useful only when the solver 'liblinear' is used
- and self.fit_intercept is set to True. In this case, x becomes
- [x, self.intercept_scaling],
- i.e. a "synthetic" feature with constant value equal to
- intercept_scaling is appended to the instance vector.
- The intercept becomes ``intercept_scaling * synthetic_feature_weight``.
- Note! the synthetic feature weight is subject to l1/l2 regularization
- as all other features.
- To lessen the effect of regularization on synthetic feature weight
- (and therefore on the intercept) intercept_scaling has to be increased.
- multi_class : {'auto, 'ovr', 'multinomial'}, default='auto'
- If the option chosen is 'ovr', then a binary problem is fit for each
- label. For 'multinomial' the loss minimised is the multinomial loss fit
- across the entire probability distribution, *even when the data is
- binary*. 'multinomial' is unavailable when solver='liblinear'.
- 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',
- and otherwise selects 'multinomial'.
- .. versionadded:: 0.18
- Stochastic Average Gradient descent solver for 'multinomial' case.
- .. versionchanged:: 0.22
- Default changed from 'ovr' to 'auto' in 0.22.
- random_state : int, RandomState instance, default=None
- Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.
- Note that this only applies to the solver and not the cross-validation
- generator. See :term:`Glossary <random_state>` for details.
- l1_ratios : list of float, default=None
- The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.
- Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to
- using ``penalty='l2'``, while 1 is equivalent to using
- ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination
- of L1 and L2.
- Attributes
- ----------
- classes_ : ndarray of shape (n_classes, )
- A list of class labels known to the classifier.
- coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)
- Coefficient of the features in the decision function.
- `coef_` is of shape (1, n_features) when the given problem
- is binary.
- intercept_ : ndarray of shape (1,) or (n_classes,)
- Intercept (a.k.a. bias) added to the decision function.
- If `fit_intercept` is set to False, the intercept is set to zero.
- `intercept_` is of shape(1,) when the problem is binary.
- Cs_ : ndarray of shape (n_cs)
- Array of C i.e. inverse of regularization parameter values used
- for cross-validation.
- l1_ratios_ : ndarray of shape (n_l1_ratios)
- Array of l1_ratios used for cross-validation. If no l1_ratio is used
- (i.e. penalty is not 'elasticnet'), this is set to ``[None]``
- coefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or \
- (n_folds, n_cs, n_features + 1)
- dict with classes as the keys, and the path of coefficients obtained
- during cross-validating across each fold and then across each Cs
- after doing an OvR for the corresponding class as values.
- If the 'multi_class' option is set to 'multinomial', then
- the coefs_paths are the coefficients corresponding to each class.
- Each dict value has shape ``(n_folds, n_cs, n_features)`` or
- ``(n_folds, n_cs, n_features + 1)`` depending on whether the
- intercept is fit or not. If ``penalty='elasticnet'``, the shape is
- ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or
- ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.
- scores_ : dict
- dict with classes as the keys, and the values as the
- grid of scores obtained during cross-validating each fold, after doing
- an OvR for the corresponding class. If the 'multi_class' option
- given is 'multinomial' then the same scores are repeated across
- all classes, since this is the multinomial class. Each dict value
- has shape ``(n_folds, n_cs)`` or ``(n_folds, n_cs, n_l1_ratios)`` if
- ``penalty='elasticnet'``.
- C_ : ndarray of shape (n_classes,) or (n_classes - 1,)
- Array of C that maps to the best scores across every class. If refit is
- set to False, then for each class, the best C is the average of the
- C's that correspond to the best scores for each fold.
- `C_` is of shape(n_classes,) when the problem is binary.
- l1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)
- Array of l1_ratio that maps to the best scores across every class. If
- refit is set to False, then for each class, the best l1_ratio is the
- average of the l1_ratio's that correspond to the best scores for each
- fold. `l1_ratio_` is of shape(n_classes,) when the problem is binary.
- n_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)
- Actual number of iterations for all classes, folds and Cs.
- In the binary or multinomial cases, the first dimension is equal to 1.
- If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,
- n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- LogisticRegression : Logistic regression without tuning the
- hyperparameter `C`.
- Examples
- --------
- >>> from sklearn.datasets import load_iris
- >>> from sklearn.linear_model import LogisticRegressionCV
- >>> X, y = load_iris(return_X_y=True)
- >>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)
- >>> clf.predict(X[:2, :])
- array([0, 0])
- >>> clf.predict_proba(X[:2, :]).shape
- (2, 3)
- >>> clf.score(X, y)
- 0.98...
- """
- _parameter_constraints: dict = {**LogisticRegression._parameter_constraints}
- for param in ["C", "warm_start", "l1_ratio"]:
- _parameter_constraints.pop(param)
- _parameter_constraints.update(
- {
- "Cs": [Interval(Integral, 1, None, closed="left"), "array-like"],
- "cv": ["cv_object"],
- "scoring": [StrOptions(set(get_scorer_names())), callable, None],
- "l1_ratios": ["array-like", None],
- "refit": ["boolean"],
- "penalty": [StrOptions({"l1", "l2", "elasticnet"})],
- }
- )
- def __init__(
- self,
- *,
- Cs=10,
- fit_intercept=True,
- cv=None,
- dual=False,
- penalty="l2",
- scoring=None,
- solver="lbfgs",
- tol=1e-4,
- max_iter=100,
- class_weight=None,
- n_jobs=None,
- verbose=0,
- refit=True,
- intercept_scaling=1.0,
- multi_class="auto",
- random_state=None,
- l1_ratios=None,
- ):
- self.Cs = Cs
- self.fit_intercept = fit_intercept
- self.cv = cv
- self.dual = dual
- self.penalty = penalty
- self.scoring = scoring
- self.tol = tol
- self.max_iter = max_iter
- self.class_weight = class_weight
- self.n_jobs = n_jobs
- self.verbose = verbose
- self.solver = solver
- self.refit = refit
- self.intercept_scaling = intercept_scaling
- self.multi_class = multi_class
- self.random_state = random_state
- self.l1_ratios = l1_ratios
- @_fit_context(prefer_skip_nested_validation=True)
- def fit(self, X, y, sample_weight=None):
- """Fit the model according to the given training data.
- Parameters
- ----------
- X : {array-like, sparse matrix} of shape (n_samples, n_features)
- Training vector, where `n_samples` is the number of samples and
- `n_features` is the number of features.
- y : array-like of shape (n_samples,)
- Target vector relative to X.
- sample_weight : array-like of shape (n_samples,) default=None
- Array of weights that are assigned to individual samples.
- If not provided, then each sample is given unit weight.
- Returns
- -------
- self : object
- Fitted LogisticRegressionCV estimator.
- """
- solver = _check_solver(self.solver, self.penalty, self.dual)
- if self.penalty == "elasticnet":
- if (
- self.l1_ratios is None
- or len(self.l1_ratios) == 0
- or any(
- (
- not isinstance(l1_ratio, numbers.Number)
- or l1_ratio < 0
- or l1_ratio > 1
- )
- for l1_ratio in self.l1_ratios
- )
- ):
- raise ValueError(
- "l1_ratios must be a list of numbers between "
- "0 and 1; got (l1_ratios=%r)"
- % self.l1_ratios
- )
- l1_ratios_ = self.l1_ratios
- else:
- if self.l1_ratios is not None:
- warnings.warn(
- "l1_ratios parameter is only used when penalty "
- "is 'elasticnet'. Got (penalty={})".format(self.penalty)
- )
- l1_ratios_ = [None]
- X, y = self._validate_data(
- X,
- y,
- accept_sparse="csr",
- dtype=np.float64,
- order="C",
- accept_large_sparse=solver not in ["liblinear", "sag", "saga"],
- )
- check_classification_targets(y)
- class_weight = self.class_weight
- # Encode for string labels
- label_encoder = LabelEncoder().fit(y)
- y = label_encoder.transform(y)
- if isinstance(class_weight, dict):
- class_weight = {
- label_encoder.transform([cls])[0]: v for cls, v in class_weight.items()
- }
- # The original class labels
- classes = self.classes_ = label_encoder.classes_
- encoded_labels = label_encoder.transform(label_encoder.classes_)
- multi_class = _check_multi_class(self.multi_class, solver, len(classes))
- if solver in ["sag", "saga"]:
- max_squared_sum = row_norms(X, squared=True).max()
- else:
- max_squared_sum = None
- # init cross-validation generator
- cv = check_cv(self.cv, y, classifier=True)
- folds = list(cv.split(X, y))
- # Use the label encoded classes
- n_classes = len(encoded_labels)
- if n_classes < 2:
- raise ValueError(
- "This solver needs samples of at least 2 classes"
- " in the data, but the data contains only one"
- " class: %r"
- % classes[0]
- )
- if n_classes == 2:
- # OvR in case of binary problems is as good as fitting
- # the higher label
- n_classes = 1
- encoded_labels = encoded_labels[1:]
- classes = classes[1:]
- # We need this hack to iterate only once over labels, in the case of
- # multi_class = multinomial, without changing the value of the labels.
- if multi_class == "multinomial":
- iter_encoded_labels = iter_classes = [None]
- else:
- iter_encoded_labels = encoded_labels
- iter_classes = classes
- # compute the class weights for the entire dataset y
- if class_weight == "balanced":
- class_weight = compute_class_weight(
- class_weight, classes=np.arange(len(self.classes_)), y=y
- )
- class_weight = dict(enumerate(class_weight))
- path_func = delayed(_log_reg_scoring_path)
- # The SAG solver releases the GIL so it's more efficient to use
- # threads for this solver.
- if self.solver in ["sag", "saga"]:
- prefer = "threads"
- else:
- prefer = "processes"
- fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(
- path_func(
- X,
- y,
- train,
- test,
- pos_class=label,
- Cs=self.Cs,
- fit_intercept=self.fit_intercept,
- penalty=self.penalty,
- dual=self.dual,
- solver=solver,
- tol=self.tol,
- max_iter=self.max_iter,
- verbose=self.verbose,
- class_weight=class_weight,
- scoring=self.scoring,
- multi_class=multi_class,
- intercept_scaling=self.intercept_scaling,
- random_state=self.random_state,
- max_squared_sum=max_squared_sum,
- sample_weight=sample_weight,
- l1_ratio=l1_ratio,
- )
- for label in iter_encoded_labels
- for train, test in folds
- for l1_ratio in l1_ratios_
- )
- # _log_reg_scoring_path will output different shapes depending on the
- # multi_class param, so we need to reshape the outputs accordingly.
- # Cs is of shape (n_classes . n_folds . n_l1_ratios, n_Cs) and all the
- # rows are equal, so we just take the first one.
- # After reshaping,
- # - scores is of shape (n_classes, n_folds, n_Cs . n_l1_ratios)
- # - coefs_paths is of shape
- # (n_classes, n_folds, n_Cs . n_l1_ratios, n_features)
- # - n_iter is of shape
- # (n_classes, n_folds, n_Cs . n_l1_ratios) or
- # (1, n_folds, n_Cs . n_l1_ratios)
- coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_)
- self.Cs_ = Cs[0]
- if multi_class == "multinomial":
- coefs_paths = np.reshape(
- coefs_paths,
- (len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1),
- )
- # equiv to coefs_paths = np.moveaxis(coefs_paths, (0, 1, 2, 3),
- # (1, 2, 0, 3))
- coefs_paths = np.swapaxes(coefs_paths, 0, 1)
- coefs_paths = np.swapaxes(coefs_paths, 0, 2)
- self.n_iter_ = np.reshape(
- n_iter_, (1, len(folds), len(self.Cs_) * len(l1_ratios_))
- )
- # repeat same scores across all classes
- scores = np.tile(scores, (n_classes, 1, 1))
- else:
- coefs_paths = np.reshape(
- coefs_paths,
- (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_), -1),
- )
- self.n_iter_ = np.reshape(
- n_iter_, (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_))
- )
- scores = np.reshape(scores, (n_classes, len(folds), -1))
- self.scores_ = dict(zip(classes, scores))
- self.coefs_paths_ = dict(zip(classes, coefs_paths))
- self.C_ = list()
- self.l1_ratio_ = list()
- self.coef_ = np.empty((n_classes, X.shape[1]))
- self.intercept_ = np.zeros(n_classes)
- for index, (cls, encoded_label) in enumerate(
- zip(iter_classes, iter_encoded_labels)
- ):
- if multi_class == "ovr":
- scores = self.scores_[cls]
- coefs_paths = self.coefs_paths_[cls]
- else:
- # For multinomial, all scores are the same across classes
- scores = scores[0]
- # coefs_paths will keep its original shape because
- # logistic_regression_path expects it this way
- if self.refit:
- # best_index is between 0 and (n_Cs . n_l1_ratios - 1)
- # for example, with n_cs=2 and n_l1_ratios=3
- # the layout of scores is
- # [c1, c2, c1, c2, c1, c2]
- # l1_1 , l1_2 , l1_3
- best_index = scores.sum(axis=0).argmax()
- best_index_C = best_index % len(self.Cs_)
- C_ = self.Cs_[best_index_C]
- self.C_.append(C_)
- best_index_l1 = best_index // len(self.Cs_)
- l1_ratio_ = l1_ratios_[best_index_l1]
- self.l1_ratio_.append(l1_ratio_)
- if multi_class == "multinomial":
- coef_init = np.mean(coefs_paths[:, :, best_index, :], axis=1)
- else:
- coef_init = np.mean(coefs_paths[:, best_index, :], axis=0)
- # Note that y is label encoded and hence pos_class must be
- # the encoded label / None (for 'multinomial')
- w, _, _ = _logistic_regression_path(
- X,
- y,
- pos_class=encoded_label,
- Cs=[C_],
- solver=solver,
- fit_intercept=self.fit_intercept,
- coef=coef_init,
- max_iter=self.max_iter,
- tol=self.tol,
- penalty=self.penalty,
- class_weight=class_weight,
- multi_class=multi_class,
- verbose=max(0, self.verbose - 1),
- random_state=self.random_state,
- check_input=False,
- max_squared_sum=max_squared_sum,
- sample_weight=sample_weight,
- l1_ratio=l1_ratio_,
- )
- w = w[0]
- else:
- # Take the best scores across every fold and the average of
- # all coefficients corresponding to the best scores.
- best_indices = np.argmax(scores, axis=1)
- if multi_class == "ovr":
- w = np.mean(
- [coefs_paths[i, best_indices[i], :] for i in range(len(folds))],
- axis=0,
- )
- else:
- w = np.mean(
- [
- coefs_paths[:, i, best_indices[i], :]
- for i in range(len(folds))
- ],
- axis=0,
- )
- best_indices_C = best_indices % len(self.Cs_)
- self.C_.append(np.mean(self.Cs_[best_indices_C]))
- if self.penalty == "elasticnet":
- best_indices_l1 = best_indices // len(self.Cs_)
- self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1]))
- else:
- self.l1_ratio_.append(None)
- if multi_class == "multinomial":
- self.C_ = np.tile(self.C_, n_classes)
- self.l1_ratio_ = np.tile(self.l1_ratio_, n_classes)
- self.coef_ = w[:, : X.shape[1]]
- if self.fit_intercept:
- self.intercept_ = w[:, -1]
- else:
- self.coef_[index] = w[: X.shape[1]]
- if self.fit_intercept:
- self.intercept_[index] = w[-1]
- self.C_ = np.asarray(self.C_)
- self.l1_ratio_ = np.asarray(self.l1_ratio_)
- self.l1_ratios_ = np.asarray(l1_ratios_)
- # if elasticnet was used, add the l1_ratios dimension to some
- # attributes
- if self.l1_ratios is not None:
- # with n_cs=2 and n_l1_ratios=3
- # the layout of scores is
- # [c1, c2, c1, c2, c1, c2]
- # l1_1 , l1_2 , l1_3
- # To get a 2d array with the following layout
- # l1_1, l1_2, l1_3
- # c1 [[ . , . , . ],
- # c2 [ . , . , . ]]
- # We need to first reshape and then transpose.
- # The same goes for the other arrays
- for cls, coefs_path in self.coefs_paths_.items():
- self.coefs_paths_[cls] = coefs_path.reshape(
- (len(folds), self.l1_ratios_.size, self.Cs_.size, -1)
- )
- self.coefs_paths_[cls] = np.transpose(
- self.coefs_paths_[cls], (0, 2, 1, 3)
- )
- for cls, score in self.scores_.items():
- self.scores_[cls] = score.reshape(
- (len(folds), self.l1_ratios_.size, self.Cs_.size)
- )
- self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1))
- self.n_iter_ = self.n_iter_.reshape(
- (-1, len(folds), self.l1_ratios_.size, self.Cs_.size)
- )
- self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2))
- return self
- def score(self, X, y, sample_weight=None):
- """Score using the `scoring` option on the given test data and labels.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- Test samples.
- y : array-like of shape (n_samples,)
- True labels for X.
- sample_weight : array-like of shape (n_samples,), default=None
- Sample weights.
- Returns
- -------
- score : float
- Score of self.predict(X) w.r.t. y.
- """
- scoring = self.scoring or "accuracy"
- scoring = get_scorer(scoring)
- return scoring(self, X, y, sample_weight=sample_weight)
- def _more_tags(self):
- return {
- "_xfail_checks": {
- "check_sample_weights_invariance": (
- "zero sample_weight is not equivalent to removing samples"
- ),
- }
- }
|