_bounds.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. """Determination of parameter bounds"""
  2. # Author: Paolo Losi
  3. # License: BSD 3 clause
  4. from numbers import Real
  5. import numpy as np
  6. from ..preprocessing import LabelBinarizer
  7. from ..utils._param_validation import Interval, StrOptions, validate_params
  8. from ..utils.extmath import safe_sparse_dot
  9. from ..utils.validation import check_array, check_consistent_length
  10. @validate_params(
  11. {
  12. "X": ["array-like", "sparse matrix"],
  13. "y": ["array-like"],
  14. "loss": [StrOptions({"squared_hinge", "log"})],
  15. "fit_intercept": ["boolean"],
  16. "intercept_scaling": [Interval(Real, 0, None, closed="neither")],
  17. },
  18. prefer_skip_nested_validation=True,
  19. )
  20. def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0):
  21. """Return the lowest bound for C.
  22. The lower bound for C is computed such that for C in (l1_min_C, infinity)
  23. the model is guaranteed not to be empty. This applies to l1 penalized
  24. classifiers, such as LinearSVC with penalty='l1' and
  25. linear_model.LogisticRegression with penalty='l1'.
  26. This value is valid if class_weight parameter in fit() is not set.
  27. Parameters
  28. ----------
  29. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  30. Training vector, where `n_samples` is the number of samples and
  31. `n_features` is the number of features.
  32. y : array-like of shape (n_samples,)
  33. Target vector relative to X.
  34. loss : {'squared_hinge', 'log'}, default='squared_hinge'
  35. Specifies the loss function.
  36. With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).
  37. With 'log' it is the loss of logistic regression models.
  38. fit_intercept : bool, default=True
  39. Specifies if the intercept should be fitted by the model.
  40. It must match the fit() method parameter.
  41. intercept_scaling : float, default=1.0
  42. When fit_intercept is True, instance vector x becomes
  43. [x, intercept_scaling],
  44. i.e. a "synthetic" feature with constant value equals to
  45. intercept_scaling is appended to the instance vector.
  46. It must match the fit() method parameter.
  47. Returns
  48. -------
  49. l1_min_c : float
  50. Minimum value for C.
  51. """
  52. X = check_array(X, accept_sparse="csc")
  53. check_consistent_length(X, y)
  54. Y = LabelBinarizer(neg_label=-1).fit_transform(y).T
  55. # maximum absolute value over classes and features
  56. den = np.max(np.abs(safe_sparse_dot(Y, X)))
  57. if fit_intercept:
  58. bias = np.full(
  59. (np.size(y), 1), intercept_scaling, dtype=np.array(intercept_scaling).dtype
  60. )
  61. den = max(den, abs(np.dot(Y, bias)).max())
  62. if den == 0.0:
  63. raise ValueError(
  64. "Ill-posed l1_min_c calculation: l1 will always "
  65. "select zero coefficients for this data"
  66. )
  67. if loss == "squared_hinge":
  68. return 0.5 / den
  69. else: # loss == 'log':
  70. return 2.0 / den