kernels.py 83 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389
  1. """Kernels for Gaussian process regression and classification.
  2. The kernels in this module allow kernel-engineering, i.e., they can be
  3. combined via the "+" and "*" operators or be exponentiated with a scalar
  4. via "**". These sum and product expressions can also contain scalar values,
  5. which are automatically converted to a constant kernel.
  6. All kernels allow (analytic) gradient-based hyperparameter optimization.
  7. The space of hyperparameters can be specified by giving lower und upper
  8. boundaries for the value of each hyperparameter (the search space is thus
  9. rectangular). Instead of specifying bounds, hyperparameters can also be
  10. declared to be "fixed", which causes these hyperparameters to be excluded from
  11. optimization.
  12. """
  13. # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
  14. # License: BSD 3 clause
  15. # Note: this module is strongly inspired by the kernel module of the george
  16. # package.
  17. import math
  18. import warnings
  19. from abc import ABCMeta, abstractmethod
  20. from collections import namedtuple
  21. from inspect import signature
  22. import numpy as np
  23. from scipy.spatial.distance import cdist, pdist, squareform
  24. from scipy.special import gamma, kv
  25. from ..base import clone
  26. from ..exceptions import ConvergenceWarning
  27. from ..metrics.pairwise import pairwise_kernels
  28. from ..utils.validation import _num_samples
  29. def _check_length_scale(X, length_scale):
  30. length_scale = np.squeeze(length_scale).astype(float)
  31. if np.ndim(length_scale) > 1:
  32. raise ValueError("length_scale cannot be of dimension greater than 1")
  33. if np.ndim(length_scale) == 1 and X.shape[1] != length_scale.shape[0]:
  34. raise ValueError(
  35. "Anisotropic kernel must have the same number of "
  36. "dimensions as data (%d!=%d)" % (length_scale.shape[0], X.shape[1])
  37. )
  38. return length_scale
  39. class Hyperparameter(
  40. namedtuple(
  41. "Hyperparameter", ("name", "value_type", "bounds", "n_elements", "fixed")
  42. )
  43. ):
  44. """A kernel hyperparameter's specification in form of a namedtuple.
  45. .. versionadded:: 0.18
  46. Attributes
  47. ----------
  48. name : str
  49. The name of the hyperparameter. Note that a kernel using a
  50. hyperparameter with name "x" must have the attributes self.x and
  51. self.x_bounds
  52. value_type : str
  53. The type of the hyperparameter. Currently, only "numeric"
  54. hyperparameters are supported.
  55. bounds : pair of floats >= 0 or "fixed"
  56. The lower and upper bound on the parameter. If n_elements>1, a pair
  57. of 1d array with n_elements each may be given alternatively. If
  58. the string "fixed" is passed as bounds, the hyperparameter's value
  59. cannot be changed.
  60. n_elements : int, default=1
  61. The number of elements of the hyperparameter value. Defaults to 1,
  62. which corresponds to a scalar hyperparameter. n_elements > 1
  63. corresponds to a hyperparameter which is vector-valued,
  64. such as, e.g., anisotropic length-scales.
  65. fixed : bool, default=None
  66. Whether the value of this hyperparameter is fixed, i.e., cannot be
  67. changed during hyperparameter tuning. If None is passed, the "fixed" is
  68. derived based on the given bounds.
  69. Examples
  70. --------
  71. >>> from sklearn.gaussian_process.kernels import ConstantKernel
  72. >>> from sklearn.datasets import make_friedman2
  73. >>> from sklearn.gaussian_process import GaussianProcessRegressor
  74. >>> from sklearn.gaussian_process.kernels import Hyperparameter
  75. >>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)
  76. >>> kernel = ConstantKernel(constant_value=1.0,
  77. ... constant_value_bounds=(0.0, 10.0))
  78. We can access each hyperparameter:
  79. >>> for hyperparameter in kernel.hyperparameters:
  80. ... print(hyperparameter)
  81. Hyperparameter(name='constant_value', value_type='numeric',
  82. bounds=array([[ 0., 10.]]), n_elements=1, fixed=False)
  83. >>> params = kernel.get_params()
  84. >>> for key in sorted(params): print(f"{key} : {params[key]}")
  85. constant_value : 1.0
  86. constant_value_bounds : (0.0, 10.0)
  87. """
  88. # A raw namedtuple is very memory efficient as it packs the attributes
  89. # in a struct to get rid of the __dict__ of attributes in particular it
  90. # does not copy the string for the keys on each instance.
  91. # By deriving a namedtuple class just to introduce the __init__ method we
  92. # would also reintroduce the __dict__ on the instance. By telling the
  93. # Python interpreter that this subclass uses static __slots__ instead of
  94. # dynamic attributes. Furthermore we don't need any additional slot in the
  95. # subclass so we set __slots__ to the empty tuple.
  96. __slots__ = ()
  97. def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):
  98. if not isinstance(bounds, str) or bounds != "fixed":
  99. bounds = np.atleast_2d(bounds)
  100. if n_elements > 1: # vector-valued parameter
  101. if bounds.shape[0] == 1:
  102. bounds = np.repeat(bounds, n_elements, 0)
  103. elif bounds.shape[0] != n_elements:
  104. raise ValueError(
  105. "Bounds on %s should have either 1 or "
  106. "%d dimensions. Given are %d"
  107. % (name, n_elements, bounds.shape[0])
  108. )
  109. if fixed is None:
  110. fixed = isinstance(bounds, str) and bounds == "fixed"
  111. return super(Hyperparameter, cls).__new__(
  112. cls, name, value_type, bounds, n_elements, fixed
  113. )
  114. # This is mainly a testing utility to check that two hyperparameters
  115. # are equal.
  116. def __eq__(self, other):
  117. return (
  118. self.name == other.name
  119. and self.value_type == other.value_type
  120. and np.all(self.bounds == other.bounds)
  121. and self.n_elements == other.n_elements
  122. and self.fixed == other.fixed
  123. )
  124. class Kernel(metaclass=ABCMeta):
  125. """Base class for all kernels.
  126. .. versionadded:: 0.18
  127. """
  128. def get_params(self, deep=True):
  129. """Get parameters of this kernel.
  130. Parameters
  131. ----------
  132. deep : bool, default=True
  133. If True, will return the parameters for this estimator and
  134. contained subobjects that are estimators.
  135. Returns
  136. -------
  137. params : dict
  138. Parameter names mapped to their values.
  139. """
  140. params = dict()
  141. # introspect the constructor arguments to find the model parameters
  142. # to represent
  143. cls = self.__class__
  144. init = getattr(cls.__init__, "deprecated_original", cls.__init__)
  145. init_sign = signature(init)
  146. args, varargs = [], []
  147. for parameter in init_sign.parameters.values():
  148. if parameter.kind != parameter.VAR_KEYWORD and parameter.name != "self":
  149. args.append(parameter.name)
  150. if parameter.kind == parameter.VAR_POSITIONAL:
  151. varargs.append(parameter.name)
  152. if len(varargs) != 0:
  153. raise RuntimeError(
  154. "scikit-learn kernels should always "
  155. "specify their parameters in the signature"
  156. " of their __init__ (no varargs)."
  157. " %s doesn't follow this convention." % (cls,)
  158. )
  159. for arg in args:
  160. params[arg] = getattr(self, arg)
  161. return params
  162. def set_params(self, **params):
  163. """Set the parameters of this kernel.
  164. The method works on simple kernels as well as on nested kernels.
  165. The latter have parameters of the form ``<component>__<parameter>``
  166. so that it's possible to update each component of a nested object.
  167. Returns
  168. -------
  169. self
  170. """
  171. if not params:
  172. # Simple optimisation to gain speed (inspect is slow)
  173. return self
  174. valid_params = self.get_params(deep=True)
  175. for key, value in params.items():
  176. split = key.split("__", 1)
  177. if len(split) > 1:
  178. # nested objects case
  179. name, sub_name = split
  180. if name not in valid_params:
  181. raise ValueError(
  182. "Invalid parameter %s for kernel %s. "
  183. "Check the list of available parameters "
  184. "with `kernel.get_params().keys()`." % (name, self)
  185. )
  186. sub_object = valid_params[name]
  187. sub_object.set_params(**{sub_name: value})
  188. else:
  189. # simple objects case
  190. if key not in valid_params:
  191. raise ValueError(
  192. "Invalid parameter %s for kernel %s. "
  193. "Check the list of available parameters "
  194. "with `kernel.get_params().keys()`."
  195. % (key, self.__class__.__name__)
  196. )
  197. setattr(self, key, value)
  198. return self
  199. def clone_with_theta(self, theta):
  200. """Returns a clone of self with given hyperparameters theta.
  201. Parameters
  202. ----------
  203. theta : ndarray of shape (n_dims,)
  204. The hyperparameters
  205. """
  206. cloned = clone(self)
  207. cloned.theta = theta
  208. return cloned
  209. @property
  210. def n_dims(self):
  211. """Returns the number of non-fixed hyperparameters of the kernel."""
  212. return self.theta.shape[0]
  213. @property
  214. def hyperparameters(self):
  215. """Returns a list of all hyperparameter specifications."""
  216. r = [
  217. getattr(self, attr)
  218. for attr in dir(self)
  219. if attr.startswith("hyperparameter_")
  220. ]
  221. return r
  222. @property
  223. def theta(self):
  224. """Returns the (flattened, log-transformed) non-fixed hyperparameters.
  225. Note that theta are typically the log-transformed values of the
  226. kernel's hyperparameters as this representation of the search space
  227. is more amenable for hyperparameter search, as hyperparameters like
  228. length-scales naturally live on a log-scale.
  229. Returns
  230. -------
  231. theta : ndarray of shape (n_dims,)
  232. The non-fixed, log-transformed hyperparameters of the kernel
  233. """
  234. theta = []
  235. params = self.get_params()
  236. for hyperparameter in self.hyperparameters:
  237. if not hyperparameter.fixed:
  238. theta.append(params[hyperparameter.name])
  239. if len(theta) > 0:
  240. return np.log(np.hstack(theta))
  241. else:
  242. return np.array([])
  243. @theta.setter
  244. def theta(self, theta):
  245. """Sets the (flattened, log-transformed) non-fixed hyperparameters.
  246. Parameters
  247. ----------
  248. theta : ndarray of shape (n_dims,)
  249. The non-fixed, log-transformed hyperparameters of the kernel
  250. """
  251. params = self.get_params()
  252. i = 0
  253. for hyperparameter in self.hyperparameters:
  254. if hyperparameter.fixed:
  255. continue
  256. if hyperparameter.n_elements > 1:
  257. # vector-valued parameter
  258. params[hyperparameter.name] = np.exp(
  259. theta[i : i + hyperparameter.n_elements]
  260. )
  261. i += hyperparameter.n_elements
  262. else:
  263. params[hyperparameter.name] = np.exp(theta[i])
  264. i += 1
  265. if i != len(theta):
  266. raise ValueError(
  267. "theta has not the correct number of entries."
  268. " Should be %d; given are %d" % (i, len(theta))
  269. )
  270. self.set_params(**params)
  271. @property
  272. def bounds(self):
  273. """Returns the log-transformed bounds on the theta.
  274. Returns
  275. -------
  276. bounds : ndarray of shape (n_dims, 2)
  277. The log-transformed bounds on the kernel's hyperparameters theta
  278. """
  279. bounds = [
  280. hyperparameter.bounds
  281. for hyperparameter in self.hyperparameters
  282. if not hyperparameter.fixed
  283. ]
  284. if len(bounds) > 0:
  285. return np.log(np.vstack(bounds))
  286. else:
  287. return np.array([])
  288. def __add__(self, b):
  289. if not isinstance(b, Kernel):
  290. return Sum(self, ConstantKernel(b))
  291. return Sum(self, b)
  292. def __radd__(self, b):
  293. if not isinstance(b, Kernel):
  294. return Sum(ConstantKernel(b), self)
  295. return Sum(b, self)
  296. def __mul__(self, b):
  297. if not isinstance(b, Kernel):
  298. return Product(self, ConstantKernel(b))
  299. return Product(self, b)
  300. def __rmul__(self, b):
  301. if not isinstance(b, Kernel):
  302. return Product(ConstantKernel(b), self)
  303. return Product(b, self)
  304. def __pow__(self, b):
  305. return Exponentiation(self, b)
  306. def __eq__(self, b):
  307. if type(self) != type(b):
  308. return False
  309. params_a = self.get_params()
  310. params_b = b.get_params()
  311. for key in set(list(params_a.keys()) + list(params_b.keys())):
  312. if np.any(params_a.get(key, None) != params_b.get(key, None)):
  313. return False
  314. return True
  315. def __repr__(self):
  316. return "{0}({1})".format(
  317. self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.theta))
  318. )
  319. @abstractmethod
  320. def __call__(self, X, Y=None, eval_gradient=False):
  321. """Evaluate the kernel."""
  322. @abstractmethod
  323. def diag(self, X):
  324. """Returns the diagonal of the kernel k(X, X).
  325. The result of this method is identical to np.diag(self(X)); however,
  326. it can be evaluated more efficiently since only the diagonal is
  327. evaluated.
  328. Parameters
  329. ----------
  330. X : array-like of shape (n_samples,)
  331. Left argument of the returned kernel k(X, Y)
  332. Returns
  333. -------
  334. K_diag : ndarray of shape (n_samples_X,)
  335. Diagonal of kernel k(X, X)
  336. """
  337. @abstractmethod
  338. def is_stationary(self):
  339. """Returns whether the kernel is stationary."""
  340. @property
  341. def requires_vector_input(self):
  342. """Returns whether the kernel is defined on fixed-length feature
  343. vectors or generic objects. Defaults to True for backward
  344. compatibility."""
  345. return True
  346. def _check_bounds_params(self):
  347. """Called after fitting to warn if bounds may have been too tight."""
  348. list_close = np.isclose(self.bounds, np.atleast_2d(self.theta).T)
  349. idx = 0
  350. for hyp in self.hyperparameters:
  351. if hyp.fixed:
  352. continue
  353. for dim in range(hyp.n_elements):
  354. if list_close[idx, 0]:
  355. warnings.warn(
  356. "The optimal value found for "
  357. "dimension %s of parameter %s is "
  358. "close to the specified lower "
  359. "bound %s. Decreasing the bound and"
  360. " calling fit again may find a "
  361. "better value." % (dim, hyp.name, hyp.bounds[dim][0]),
  362. ConvergenceWarning,
  363. )
  364. elif list_close[idx, 1]:
  365. warnings.warn(
  366. "The optimal value found for "
  367. "dimension %s of parameter %s is "
  368. "close to the specified upper "
  369. "bound %s. Increasing the bound and"
  370. " calling fit again may find a "
  371. "better value." % (dim, hyp.name, hyp.bounds[dim][1]),
  372. ConvergenceWarning,
  373. )
  374. idx += 1
  375. class NormalizedKernelMixin:
  376. """Mixin for kernels which are normalized: k(X, X)=1.
  377. .. versionadded:: 0.18
  378. """
  379. def diag(self, X):
  380. """Returns the diagonal of the kernel k(X, X).
  381. The result of this method is identical to np.diag(self(X)); however,
  382. it can be evaluated more efficiently since only the diagonal is
  383. evaluated.
  384. Parameters
  385. ----------
  386. X : ndarray of shape (n_samples_X, n_features)
  387. Left argument of the returned kernel k(X, Y)
  388. Returns
  389. -------
  390. K_diag : ndarray of shape (n_samples_X,)
  391. Diagonal of kernel k(X, X)
  392. """
  393. return np.ones(X.shape[0])
  394. class StationaryKernelMixin:
  395. """Mixin for kernels which are stationary: k(X, Y)= f(X-Y).
  396. .. versionadded:: 0.18
  397. """
  398. def is_stationary(self):
  399. """Returns whether the kernel is stationary."""
  400. return True
  401. class GenericKernelMixin:
  402. """Mixin for kernels which operate on generic objects such as variable-
  403. length sequences, trees, and graphs.
  404. .. versionadded:: 0.22
  405. """
  406. @property
  407. def requires_vector_input(self):
  408. """Whether the kernel works only on fixed-length feature vectors."""
  409. return False
  410. class CompoundKernel(Kernel):
  411. """Kernel which is composed of a set of other kernels.
  412. .. versionadded:: 0.18
  413. Parameters
  414. ----------
  415. kernels : list of Kernels
  416. The other kernels
  417. Examples
  418. --------
  419. >>> from sklearn.gaussian_process.kernels import WhiteKernel
  420. >>> from sklearn.gaussian_process.kernels import RBF
  421. >>> from sklearn.gaussian_process.kernels import CompoundKernel
  422. >>> kernel = CompoundKernel(
  423. ... [WhiteKernel(noise_level=3.0), RBF(length_scale=2.0)])
  424. >>> print(kernel.bounds)
  425. [[-11.51292546 11.51292546]
  426. [-11.51292546 11.51292546]]
  427. >>> print(kernel.n_dims)
  428. 2
  429. >>> print(kernel.theta)
  430. [1.09861229 0.69314718]
  431. """
  432. def __init__(self, kernels):
  433. self.kernels = kernels
  434. def get_params(self, deep=True):
  435. """Get parameters of this kernel.
  436. Parameters
  437. ----------
  438. deep : bool, default=True
  439. If True, will return the parameters for this estimator and
  440. contained subobjects that are estimators.
  441. Returns
  442. -------
  443. params : dict
  444. Parameter names mapped to their values.
  445. """
  446. return dict(kernels=self.kernels)
  447. @property
  448. def theta(self):
  449. """Returns the (flattened, log-transformed) non-fixed hyperparameters.
  450. Note that theta are typically the log-transformed values of the
  451. kernel's hyperparameters as this representation of the search space
  452. is more amenable for hyperparameter search, as hyperparameters like
  453. length-scales naturally live on a log-scale.
  454. Returns
  455. -------
  456. theta : ndarray of shape (n_dims,)
  457. The non-fixed, log-transformed hyperparameters of the kernel
  458. """
  459. return np.hstack([kernel.theta for kernel in self.kernels])
  460. @theta.setter
  461. def theta(self, theta):
  462. """Sets the (flattened, log-transformed) non-fixed hyperparameters.
  463. Parameters
  464. ----------
  465. theta : array of shape (n_dims,)
  466. The non-fixed, log-transformed hyperparameters of the kernel
  467. """
  468. k_dims = self.k1.n_dims
  469. for i, kernel in enumerate(self.kernels):
  470. kernel.theta = theta[i * k_dims : (i + 1) * k_dims]
  471. @property
  472. def bounds(self):
  473. """Returns the log-transformed bounds on the theta.
  474. Returns
  475. -------
  476. bounds : array of shape (n_dims, 2)
  477. The log-transformed bounds on the kernel's hyperparameters theta
  478. """
  479. return np.vstack([kernel.bounds for kernel in self.kernels])
  480. def __call__(self, X, Y=None, eval_gradient=False):
  481. """Return the kernel k(X, Y) and optionally its gradient.
  482. Note that this compound kernel returns the results of all simple kernel
  483. stacked along an additional axis.
  484. Parameters
  485. ----------
  486. X : array-like of shape (n_samples_X, n_features) or list of object, \
  487. default=None
  488. Left argument of the returned kernel k(X, Y)
  489. Y : array-like of shape (n_samples_X, n_features) or list of object, \
  490. default=None
  491. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  492. is evaluated instead.
  493. eval_gradient : bool, default=False
  494. Determines whether the gradient with respect to the log of the
  495. kernel hyperparameter is computed.
  496. Returns
  497. -------
  498. K : ndarray of shape (n_samples_X, n_samples_Y, n_kernels)
  499. Kernel k(X, Y)
  500. K_gradient : ndarray of shape \
  501. (n_samples_X, n_samples_X, n_dims, n_kernels), optional
  502. The gradient of the kernel k(X, X) with respect to the log of the
  503. hyperparameter of the kernel. Only returned when `eval_gradient`
  504. is True.
  505. """
  506. if eval_gradient:
  507. K = []
  508. K_grad = []
  509. for kernel in self.kernels:
  510. K_single, K_grad_single = kernel(X, Y, eval_gradient)
  511. K.append(K_single)
  512. K_grad.append(K_grad_single[..., np.newaxis])
  513. return np.dstack(K), np.concatenate(K_grad, 3)
  514. else:
  515. return np.dstack([kernel(X, Y, eval_gradient) for kernel in self.kernels])
  516. def __eq__(self, b):
  517. if type(self) != type(b) or len(self.kernels) != len(b.kernels):
  518. return False
  519. return np.all(
  520. [self.kernels[i] == b.kernels[i] for i in range(len(self.kernels))]
  521. )
  522. def is_stationary(self):
  523. """Returns whether the kernel is stationary."""
  524. return np.all([kernel.is_stationary() for kernel in self.kernels])
  525. @property
  526. def requires_vector_input(self):
  527. """Returns whether the kernel is defined on discrete structures."""
  528. return np.any([kernel.requires_vector_input for kernel in self.kernels])
  529. def diag(self, X):
  530. """Returns the diagonal of the kernel k(X, X).
  531. The result of this method is identical to `np.diag(self(X))`; however,
  532. it can be evaluated more efficiently since only the diagonal is
  533. evaluated.
  534. Parameters
  535. ----------
  536. X : array-like of shape (n_samples_X, n_features) or list of object
  537. Argument to the kernel.
  538. Returns
  539. -------
  540. K_diag : ndarray of shape (n_samples_X, n_kernels)
  541. Diagonal of kernel k(X, X)
  542. """
  543. return np.vstack([kernel.diag(X) for kernel in self.kernels]).T
  544. class KernelOperator(Kernel):
  545. """Base class for all kernel operators.
  546. .. versionadded:: 0.18
  547. """
  548. def __init__(self, k1, k2):
  549. self.k1 = k1
  550. self.k2 = k2
  551. def get_params(self, deep=True):
  552. """Get parameters of this kernel.
  553. Parameters
  554. ----------
  555. deep : bool, default=True
  556. If True, will return the parameters for this estimator and
  557. contained subobjects that are estimators.
  558. Returns
  559. -------
  560. params : dict
  561. Parameter names mapped to their values.
  562. """
  563. params = dict(k1=self.k1, k2=self.k2)
  564. if deep:
  565. deep_items = self.k1.get_params().items()
  566. params.update(("k1__" + k, val) for k, val in deep_items)
  567. deep_items = self.k2.get_params().items()
  568. params.update(("k2__" + k, val) for k, val in deep_items)
  569. return params
  570. @property
  571. def hyperparameters(self):
  572. """Returns a list of all hyperparameter."""
  573. r = [
  574. Hyperparameter(
  575. "k1__" + hyperparameter.name,
  576. hyperparameter.value_type,
  577. hyperparameter.bounds,
  578. hyperparameter.n_elements,
  579. )
  580. for hyperparameter in self.k1.hyperparameters
  581. ]
  582. for hyperparameter in self.k2.hyperparameters:
  583. r.append(
  584. Hyperparameter(
  585. "k2__" + hyperparameter.name,
  586. hyperparameter.value_type,
  587. hyperparameter.bounds,
  588. hyperparameter.n_elements,
  589. )
  590. )
  591. return r
  592. @property
  593. def theta(self):
  594. """Returns the (flattened, log-transformed) non-fixed hyperparameters.
  595. Note that theta are typically the log-transformed values of the
  596. kernel's hyperparameters as this representation of the search space
  597. is more amenable for hyperparameter search, as hyperparameters like
  598. length-scales naturally live on a log-scale.
  599. Returns
  600. -------
  601. theta : ndarray of shape (n_dims,)
  602. The non-fixed, log-transformed hyperparameters of the kernel
  603. """
  604. return np.append(self.k1.theta, self.k2.theta)
  605. @theta.setter
  606. def theta(self, theta):
  607. """Sets the (flattened, log-transformed) non-fixed hyperparameters.
  608. Parameters
  609. ----------
  610. theta : ndarray of shape (n_dims,)
  611. The non-fixed, log-transformed hyperparameters of the kernel
  612. """
  613. k1_dims = self.k1.n_dims
  614. self.k1.theta = theta[:k1_dims]
  615. self.k2.theta = theta[k1_dims:]
  616. @property
  617. def bounds(self):
  618. """Returns the log-transformed bounds on the theta.
  619. Returns
  620. -------
  621. bounds : ndarray of shape (n_dims, 2)
  622. The log-transformed bounds on the kernel's hyperparameters theta
  623. """
  624. if self.k1.bounds.size == 0:
  625. return self.k2.bounds
  626. if self.k2.bounds.size == 0:
  627. return self.k1.bounds
  628. return np.vstack((self.k1.bounds, self.k2.bounds))
  629. def __eq__(self, b):
  630. if type(self) != type(b):
  631. return False
  632. return (self.k1 == b.k1 and self.k2 == b.k2) or (
  633. self.k1 == b.k2 and self.k2 == b.k1
  634. )
  635. def is_stationary(self):
  636. """Returns whether the kernel is stationary."""
  637. return self.k1.is_stationary() and self.k2.is_stationary()
  638. @property
  639. def requires_vector_input(self):
  640. """Returns whether the kernel is stationary."""
  641. return self.k1.requires_vector_input or self.k2.requires_vector_input
  642. class Sum(KernelOperator):
  643. """The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`
  644. and combines them via
  645. .. math::
  646. k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)
  647. Note that the `__add__` magic method is overridden, so
  648. `Sum(RBF(), RBF())` is equivalent to using the + operator
  649. with `RBF() + RBF()`.
  650. Read more in the :ref:`User Guide <gp_kernels>`.
  651. .. versionadded:: 0.18
  652. Parameters
  653. ----------
  654. k1 : Kernel
  655. The first base-kernel of the sum-kernel
  656. k2 : Kernel
  657. The second base-kernel of the sum-kernel
  658. Examples
  659. --------
  660. >>> from sklearn.datasets import make_friedman2
  661. >>> from sklearn.gaussian_process import GaussianProcessRegressor
  662. >>> from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel
  663. >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
  664. >>> kernel = Sum(ConstantKernel(2), RBF())
  665. >>> gpr = GaussianProcessRegressor(kernel=kernel,
  666. ... random_state=0).fit(X, y)
  667. >>> gpr.score(X, y)
  668. 1.0
  669. >>> kernel
  670. 1.41**2 + RBF(length_scale=1)
  671. """
  672. def __call__(self, X, Y=None, eval_gradient=False):
  673. """Return the kernel k(X, Y) and optionally its gradient.
  674. Parameters
  675. ----------
  676. X : array-like of shape (n_samples_X, n_features) or list of object
  677. Left argument of the returned kernel k(X, Y)
  678. Y : array-like of shape (n_samples_X, n_features) or list of object,\
  679. default=None
  680. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  681. is evaluated instead.
  682. eval_gradient : bool, default=False
  683. Determines whether the gradient with respect to the log of
  684. the kernel hyperparameter is computed.
  685. Returns
  686. -------
  687. K : ndarray of shape (n_samples_X, n_samples_Y)
  688. Kernel k(X, Y)
  689. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
  690. optional
  691. The gradient of the kernel k(X, X) with respect to the log of the
  692. hyperparameter of the kernel. Only returned when `eval_gradient`
  693. is True.
  694. """
  695. if eval_gradient:
  696. K1, K1_gradient = self.k1(X, Y, eval_gradient=True)
  697. K2, K2_gradient = self.k2(X, Y, eval_gradient=True)
  698. return K1 + K2, np.dstack((K1_gradient, K2_gradient))
  699. else:
  700. return self.k1(X, Y) + self.k2(X, Y)
  701. def diag(self, X):
  702. """Returns the diagonal of the kernel k(X, X).
  703. The result of this method is identical to `np.diag(self(X))`; however,
  704. it can be evaluated more efficiently since only the diagonal is
  705. evaluated.
  706. Parameters
  707. ----------
  708. X : array-like of shape (n_samples_X, n_features) or list of object
  709. Argument to the kernel.
  710. Returns
  711. -------
  712. K_diag : ndarray of shape (n_samples_X,)
  713. Diagonal of kernel k(X, X)
  714. """
  715. return self.k1.diag(X) + self.k2.diag(X)
  716. def __repr__(self):
  717. return "{0} + {1}".format(self.k1, self.k2)
  718. class Product(KernelOperator):
  719. """The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`
  720. and combines them via
  721. .. math::
  722. k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)
  723. Note that the `__mul__` magic method is overridden, so
  724. `Product(RBF(), RBF())` is equivalent to using the * operator
  725. with `RBF() * RBF()`.
  726. Read more in the :ref:`User Guide <gp_kernels>`.
  727. .. versionadded:: 0.18
  728. Parameters
  729. ----------
  730. k1 : Kernel
  731. The first base-kernel of the product-kernel
  732. k2 : Kernel
  733. The second base-kernel of the product-kernel
  734. Examples
  735. --------
  736. >>> from sklearn.datasets import make_friedman2
  737. >>> from sklearn.gaussian_process import GaussianProcessRegressor
  738. >>> from sklearn.gaussian_process.kernels import (RBF, Product,
  739. ... ConstantKernel)
  740. >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
  741. >>> kernel = Product(ConstantKernel(2), RBF())
  742. >>> gpr = GaussianProcessRegressor(kernel=kernel,
  743. ... random_state=0).fit(X, y)
  744. >>> gpr.score(X, y)
  745. 1.0
  746. >>> kernel
  747. 1.41**2 * RBF(length_scale=1)
  748. """
  749. def __call__(self, X, Y=None, eval_gradient=False):
  750. """Return the kernel k(X, Y) and optionally its gradient.
  751. Parameters
  752. ----------
  753. X : array-like of shape (n_samples_X, n_features) or list of object
  754. Left argument of the returned kernel k(X, Y)
  755. Y : array-like of shape (n_samples_Y, n_features) or list of object,\
  756. default=None
  757. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  758. is evaluated instead.
  759. eval_gradient : bool, default=False
  760. Determines whether the gradient with respect to the log of
  761. the kernel hyperparameter is computed.
  762. Returns
  763. -------
  764. K : ndarray of shape (n_samples_X, n_samples_Y)
  765. Kernel k(X, Y)
  766. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
  767. optional
  768. The gradient of the kernel k(X, X) with respect to the log of the
  769. hyperparameter of the kernel. Only returned when `eval_gradient`
  770. is True.
  771. """
  772. if eval_gradient:
  773. K1, K1_gradient = self.k1(X, Y, eval_gradient=True)
  774. K2, K2_gradient = self.k2(X, Y, eval_gradient=True)
  775. return K1 * K2, np.dstack(
  776. (K1_gradient * K2[:, :, np.newaxis], K2_gradient * K1[:, :, np.newaxis])
  777. )
  778. else:
  779. return self.k1(X, Y) * self.k2(X, Y)
  780. def diag(self, X):
  781. """Returns the diagonal of the kernel k(X, X).
  782. The result of this method is identical to np.diag(self(X)); however,
  783. it can be evaluated more efficiently since only the diagonal is
  784. evaluated.
  785. Parameters
  786. ----------
  787. X : array-like of shape (n_samples_X, n_features) or list of object
  788. Argument to the kernel.
  789. Returns
  790. -------
  791. K_diag : ndarray of shape (n_samples_X,)
  792. Diagonal of kernel k(X, X)
  793. """
  794. return self.k1.diag(X) * self.k2.diag(X)
  795. def __repr__(self):
  796. return "{0} * {1}".format(self.k1, self.k2)
  797. class Exponentiation(Kernel):
  798. """The Exponentiation kernel takes one base kernel and a scalar parameter
  799. :math:`p` and combines them via
  800. .. math::
  801. k_{exp}(X, Y) = k(X, Y) ^p
  802. Note that the `__pow__` magic method is overridden, so
  803. `Exponentiation(RBF(), 2)` is equivalent to using the ** operator
  804. with `RBF() ** 2`.
  805. Read more in the :ref:`User Guide <gp_kernels>`.
  806. .. versionadded:: 0.18
  807. Parameters
  808. ----------
  809. kernel : Kernel
  810. The base kernel
  811. exponent : float
  812. The exponent for the base kernel
  813. Examples
  814. --------
  815. >>> from sklearn.datasets import make_friedman2
  816. >>> from sklearn.gaussian_process import GaussianProcessRegressor
  817. >>> from sklearn.gaussian_process.kernels import (RationalQuadratic,
  818. ... Exponentiation)
  819. >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
  820. >>> kernel = Exponentiation(RationalQuadratic(), exponent=2)
  821. >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
  822. ... random_state=0).fit(X, y)
  823. >>> gpr.score(X, y)
  824. 0.419...
  825. >>> gpr.predict(X[:1,:], return_std=True)
  826. (array([635.5...]), array([0.559...]))
  827. """
  828. def __init__(self, kernel, exponent):
  829. self.kernel = kernel
  830. self.exponent = exponent
  831. def get_params(self, deep=True):
  832. """Get parameters of this kernel.
  833. Parameters
  834. ----------
  835. deep : bool, default=True
  836. If True, will return the parameters for this estimator and
  837. contained subobjects that are estimators.
  838. Returns
  839. -------
  840. params : dict
  841. Parameter names mapped to their values.
  842. """
  843. params = dict(kernel=self.kernel, exponent=self.exponent)
  844. if deep:
  845. deep_items = self.kernel.get_params().items()
  846. params.update(("kernel__" + k, val) for k, val in deep_items)
  847. return params
  848. @property
  849. def hyperparameters(self):
  850. """Returns a list of all hyperparameter."""
  851. r = []
  852. for hyperparameter in self.kernel.hyperparameters:
  853. r.append(
  854. Hyperparameter(
  855. "kernel__" + hyperparameter.name,
  856. hyperparameter.value_type,
  857. hyperparameter.bounds,
  858. hyperparameter.n_elements,
  859. )
  860. )
  861. return r
  862. @property
  863. def theta(self):
  864. """Returns the (flattened, log-transformed) non-fixed hyperparameters.
  865. Note that theta are typically the log-transformed values of the
  866. kernel's hyperparameters as this representation of the search space
  867. is more amenable for hyperparameter search, as hyperparameters like
  868. length-scales naturally live on a log-scale.
  869. Returns
  870. -------
  871. theta : ndarray of shape (n_dims,)
  872. The non-fixed, log-transformed hyperparameters of the kernel
  873. """
  874. return self.kernel.theta
  875. @theta.setter
  876. def theta(self, theta):
  877. """Sets the (flattened, log-transformed) non-fixed hyperparameters.
  878. Parameters
  879. ----------
  880. theta : ndarray of shape (n_dims,)
  881. The non-fixed, log-transformed hyperparameters of the kernel
  882. """
  883. self.kernel.theta = theta
  884. @property
  885. def bounds(self):
  886. """Returns the log-transformed bounds on the theta.
  887. Returns
  888. -------
  889. bounds : ndarray of shape (n_dims, 2)
  890. The log-transformed bounds on the kernel's hyperparameters theta
  891. """
  892. return self.kernel.bounds
  893. def __eq__(self, b):
  894. if type(self) != type(b):
  895. return False
  896. return self.kernel == b.kernel and self.exponent == b.exponent
  897. def __call__(self, X, Y=None, eval_gradient=False):
  898. """Return the kernel k(X, Y) and optionally its gradient.
  899. Parameters
  900. ----------
  901. X : array-like of shape (n_samples_X, n_features) or list of object
  902. Left argument of the returned kernel k(X, Y)
  903. Y : array-like of shape (n_samples_Y, n_features) or list of object,\
  904. default=None
  905. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  906. is evaluated instead.
  907. eval_gradient : bool, default=False
  908. Determines whether the gradient with respect to the log of
  909. the kernel hyperparameter is computed.
  910. Returns
  911. -------
  912. K : ndarray of shape (n_samples_X, n_samples_Y)
  913. Kernel k(X, Y)
  914. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
  915. optional
  916. The gradient of the kernel k(X, X) with respect to the log of the
  917. hyperparameter of the kernel. Only returned when `eval_gradient`
  918. is True.
  919. """
  920. if eval_gradient:
  921. K, K_gradient = self.kernel(X, Y, eval_gradient=True)
  922. K_gradient *= self.exponent * K[:, :, np.newaxis] ** (self.exponent - 1)
  923. return K**self.exponent, K_gradient
  924. else:
  925. K = self.kernel(X, Y, eval_gradient=False)
  926. return K**self.exponent
  927. def diag(self, X):
  928. """Returns the diagonal of the kernel k(X, X).
  929. The result of this method is identical to np.diag(self(X)); however,
  930. it can be evaluated more efficiently since only the diagonal is
  931. evaluated.
  932. Parameters
  933. ----------
  934. X : array-like of shape (n_samples_X, n_features) or list of object
  935. Argument to the kernel.
  936. Returns
  937. -------
  938. K_diag : ndarray of shape (n_samples_X,)
  939. Diagonal of kernel k(X, X)
  940. """
  941. return self.kernel.diag(X) ** self.exponent
  942. def __repr__(self):
  943. return "{0} ** {1}".format(self.kernel, self.exponent)
  944. def is_stationary(self):
  945. """Returns whether the kernel is stationary."""
  946. return self.kernel.is_stationary()
  947. @property
  948. def requires_vector_input(self):
  949. """Returns whether the kernel is defined on discrete structures."""
  950. return self.kernel.requires_vector_input
  951. class ConstantKernel(StationaryKernelMixin, GenericKernelMixin, Kernel):
  952. """Constant kernel.
  953. Can be used as part of a product-kernel where it scales the magnitude of
  954. the other factor (kernel) or as part of a sum-kernel, where it modifies
  955. the mean of the Gaussian process.
  956. .. math::
  957. k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2
  958. Adding a constant kernel is equivalent to adding a constant::
  959. kernel = RBF() + ConstantKernel(constant_value=2)
  960. is the same as::
  961. kernel = RBF() + 2
  962. Read more in the :ref:`User Guide <gp_kernels>`.
  963. .. versionadded:: 0.18
  964. Parameters
  965. ----------
  966. constant_value : float, default=1.0
  967. The constant value which defines the covariance:
  968. k(x_1, x_2) = constant_value
  969. constant_value_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  970. The lower and upper bound on `constant_value`.
  971. If set to "fixed", `constant_value` cannot be changed during
  972. hyperparameter tuning.
  973. Examples
  974. --------
  975. >>> from sklearn.datasets import make_friedman2
  976. >>> from sklearn.gaussian_process import GaussianProcessRegressor
  977. >>> from sklearn.gaussian_process.kernels import RBF, ConstantKernel
  978. >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
  979. >>> kernel = RBF() + ConstantKernel(constant_value=2)
  980. >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
  981. ... random_state=0).fit(X, y)
  982. >>> gpr.score(X, y)
  983. 0.3696...
  984. >>> gpr.predict(X[:1,:], return_std=True)
  985. (array([606.1...]), array([0.24...]))
  986. """
  987. def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5)):
  988. self.constant_value = constant_value
  989. self.constant_value_bounds = constant_value_bounds
  990. @property
  991. def hyperparameter_constant_value(self):
  992. return Hyperparameter("constant_value", "numeric", self.constant_value_bounds)
  993. def __call__(self, X, Y=None, eval_gradient=False):
  994. """Return the kernel k(X, Y) and optionally its gradient.
  995. Parameters
  996. ----------
  997. X : array-like of shape (n_samples_X, n_features) or list of object
  998. Left argument of the returned kernel k(X, Y)
  999. Y : array-like of shape (n_samples_X, n_features) or list of object, \
  1000. default=None
  1001. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  1002. is evaluated instead.
  1003. eval_gradient : bool, default=False
  1004. Determines whether the gradient with respect to the log of
  1005. the kernel hyperparameter is computed.
  1006. Only supported when Y is None.
  1007. Returns
  1008. -------
  1009. K : ndarray of shape (n_samples_X, n_samples_Y)
  1010. Kernel k(X, Y)
  1011. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
  1012. optional
  1013. The gradient of the kernel k(X, X) with respect to the log of the
  1014. hyperparameter of the kernel. Only returned when eval_gradient
  1015. is True.
  1016. """
  1017. if Y is None:
  1018. Y = X
  1019. elif eval_gradient:
  1020. raise ValueError("Gradient can only be evaluated when Y is None.")
  1021. K = np.full(
  1022. (_num_samples(X), _num_samples(Y)),
  1023. self.constant_value,
  1024. dtype=np.array(self.constant_value).dtype,
  1025. )
  1026. if eval_gradient:
  1027. if not self.hyperparameter_constant_value.fixed:
  1028. return (
  1029. K,
  1030. np.full(
  1031. (_num_samples(X), _num_samples(X), 1),
  1032. self.constant_value,
  1033. dtype=np.array(self.constant_value).dtype,
  1034. ),
  1035. )
  1036. else:
  1037. return K, np.empty((_num_samples(X), _num_samples(X), 0))
  1038. else:
  1039. return K
  1040. def diag(self, X):
  1041. """Returns the diagonal of the kernel k(X, X).
  1042. The result of this method is identical to np.diag(self(X)); however,
  1043. it can be evaluated more efficiently since only the diagonal is
  1044. evaluated.
  1045. Parameters
  1046. ----------
  1047. X : array-like of shape (n_samples_X, n_features) or list of object
  1048. Argument to the kernel.
  1049. Returns
  1050. -------
  1051. K_diag : ndarray of shape (n_samples_X,)
  1052. Diagonal of kernel k(X, X)
  1053. """
  1054. return np.full(
  1055. _num_samples(X),
  1056. self.constant_value,
  1057. dtype=np.array(self.constant_value).dtype,
  1058. )
  1059. def __repr__(self):
  1060. return "{0:.3g}**2".format(np.sqrt(self.constant_value))
  1061. class WhiteKernel(StationaryKernelMixin, GenericKernelMixin, Kernel):
  1062. """White kernel.
  1063. The main use-case of this kernel is as part of a sum-kernel where it
  1064. explains the noise of the signal as independently and identically
  1065. normally-distributed. The parameter noise_level equals the variance of this
  1066. noise.
  1067. .. math::
  1068. k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0
  1069. Read more in the :ref:`User Guide <gp_kernels>`.
  1070. .. versionadded:: 0.18
  1071. Parameters
  1072. ----------
  1073. noise_level : float, default=1.0
  1074. Parameter controlling the noise level (variance)
  1075. noise_level_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1076. The lower and upper bound on 'noise_level'.
  1077. If set to "fixed", 'noise_level' cannot be changed during
  1078. hyperparameter tuning.
  1079. Examples
  1080. --------
  1081. >>> from sklearn.datasets import make_friedman2
  1082. >>> from sklearn.gaussian_process import GaussianProcessRegressor
  1083. >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
  1084. >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
  1085. >>> kernel = DotProduct() + WhiteKernel(noise_level=0.5)
  1086. >>> gpr = GaussianProcessRegressor(kernel=kernel,
  1087. ... random_state=0).fit(X, y)
  1088. >>> gpr.score(X, y)
  1089. 0.3680...
  1090. >>> gpr.predict(X[:2,:], return_std=True)
  1091. (array([653.0..., 592.1... ]), array([316.6..., 316.6...]))
  1092. """
  1093. def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5)):
  1094. self.noise_level = noise_level
  1095. self.noise_level_bounds = noise_level_bounds
  1096. @property
  1097. def hyperparameter_noise_level(self):
  1098. return Hyperparameter("noise_level", "numeric", self.noise_level_bounds)
  1099. def __call__(self, X, Y=None, eval_gradient=False):
  1100. """Return the kernel k(X, Y) and optionally its gradient.
  1101. Parameters
  1102. ----------
  1103. X : array-like of shape (n_samples_X, n_features) or list of object
  1104. Left argument of the returned kernel k(X, Y)
  1105. Y : array-like of shape (n_samples_X, n_features) or list of object,\
  1106. default=None
  1107. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  1108. is evaluated instead.
  1109. eval_gradient : bool, default=False
  1110. Determines whether the gradient with respect to the log of
  1111. the kernel hyperparameter is computed.
  1112. Only supported when Y is None.
  1113. Returns
  1114. -------
  1115. K : ndarray of shape (n_samples_X, n_samples_Y)
  1116. Kernel k(X, Y)
  1117. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
  1118. optional
  1119. The gradient of the kernel k(X, X) with respect to the log of the
  1120. hyperparameter of the kernel. Only returned when eval_gradient
  1121. is True.
  1122. """
  1123. if Y is not None and eval_gradient:
  1124. raise ValueError("Gradient can only be evaluated when Y is None.")
  1125. if Y is None:
  1126. K = self.noise_level * np.eye(_num_samples(X))
  1127. if eval_gradient:
  1128. if not self.hyperparameter_noise_level.fixed:
  1129. return (
  1130. K,
  1131. self.noise_level * np.eye(_num_samples(X))[:, :, np.newaxis],
  1132. )
  1133. else:
  1134. return K, np.empty((_num_samples(X), _num_samples(X), 0))
  1135. else:
  1136. return K
  1137. else:
  1138. return np.zeros((_num_samples(X), _num_samples(Y)))
  1139. def diag(self, X):
  1140. """Returns the diagonal of the kernel k(X, X).
  1141. The result of this method is identical to np.diag(self(X)); however,
  1142. it can be evaluated more efficiently since only the diagonal is
  1143. evaluated.
  1144. Parameters
  1145. ----------
  1146. X : array-like of shape (n_samples_X, n_features) or list of object
  1147. Argument to the kernel.
  1148. Returns
  1149. -------
  1150. K_diag : ndarray of shape (n_samples_X,)
  1151. Diagonal of kernel k(X, X)
  1152. """
  1153. return np.full(
  1154. _num_samples(X), self.noise_level, dtype=np.array(self.noise_level).dtype
  1155. )
  1156. def __repr__(self):
  1157. return "{0}(noise_level={1:.3g})".format(
  1158. self.__class__.__name__, self.noise_level
  1159. )
  1160. class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
  1161. """Radial basis function kernel (aka squared-exponential kernel).
  1162. The RBF kernel is a stationary kernel. It is also known as the
  1163. "squared exponential" kernel. It is parameterized by a length scale
  1164. parameter :math:`l>0`, which can either be a scalar (isotropic variant
  1165. of the kernel) or a vector with the same number of dimensions as the inputs
  1166. X (anisotropic variant of the kernel). The kernel is given by:
  1167. .. math::
  1168. k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)
  1169. where :math:`l` is the length scale of the kernel and
  1170. :math:`d(\\cdot,\\cdot)` is the Euclidean distance.
  1171. For advice on how to set the length scale parameter, see e.g. [1]_.
  1172. This kernel is infinitely differentiable, which implies that GPs with this
  1173. kernel as covariance function have mean square derivatives of all orders,
  1174. and are thus very smooth.
  1175. See [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.
  1176. Read more in the :ref:`User Guide <gp_kernels>`.
  1177. .. versionadded:: 0.18
  1178. Parameters
  1179. ----------
  1180. length_scale : float or ndarray of shape (n_features,), default=1.0
  1181. The length scale of the kernel. If a float, an isotropic kernel is
  1182. used. If an array, an anisotropic kernel is used where each dimension
  1183. of l defines the length-scale of the respective feature dimension.
  1184. length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1185. The lower and upper bound on 'length_scale'.
  1186. If set to "fixed", 'length_scale' cannot be changed during
  1187. hyperparameter tuning.
  1188. References
  1189. ----------
  1190. .. [1] `David Duvenaud (2014). "The Kernel Cookbook:
  1191. Advice on Covariance functions".
  1192. <https://www.cs.toronto.edu/~duvenaud/cookbook/>`_
  1193. .. [2] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).
  1194. "Gaussian Processes for Machine Learning". The MIT Press.
  1195. <http://www.gaussianprocess.org/gpml/>`_
  1196. Examples
  1197. --------
  1198. >>> from sklearn.datasets import load_iris
  1199. >>> from sklearn.gaussian_process import GaussianProcessClassifier
  1200. >>> from sklearn.gaussian_process.kernels import RBF
  1201. >>> X, y = load_iris(return_X_y=True)
  1202. >>> kernel = 1.0 * RBF(1.0)
  1203. >>> gpc = GaussianProcessClassifier(kernel=kernel,
  1204. ... random_state=0).fit(X, y)
  1205. >>> gpc.score(X, y)
  1206. 0.9866...
  1207. >>> gpc.predict_proba(X[:2,:])
  1208. array([[0.8354..., 0.03228..., 0.1322...],
  1209. [0.7906..., 0.0652..., 0.1441...]])
  1210. """
  1211. def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):
  1212. self.length_scale = length_scale
  1213. self.length_scale_bounds = length_scale_bounds
  1214. @property
  1215. def anisotropic(self):
  1216. return np.iterable(self.length_scale) and len(self.length_scale) > 1
  1217. @property
  1218. def hyperparameter_length_scale(self):
  1219. if self.anisotropic:
  1220. return Hyperparameter(
  1221. "length_scale",
  1222. "numeric",
  1223. self.length_scale_bounds,
  1224. len(self.length_scale),
  1225. )
  1226. return Hyperparameter("length_scale", "numeric", self.length_scale_bounds)
  1227. def __call__(self, X, Y=None, eval_gradient=False):
  1228. """Return the kernel k(X, Y) and optionally its gradient.
  1229. Parameters
  1230. ----------
  1231. X : ndarray of shape (n_samples_X, n_features)
  1232. Left argument of the returned kernel k(X, Y)
  1233. Y : ndarray of shape (n_samples_Y, n_features), default=None
  1234. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  1235. if evaluated instead.
  1236. eval_gradient : bool, default=False
  1237. Determines whether the gradient with respect to the log of
  1238. the kernel hyperparameter is computed.
  1239. Only supported when Y is None.
  1240. Returns
  1241. -------
  1242. K : ndarray of shape (n_samples_X, n_samples_Y)
  1243. Kernel k(X, Y)
  1244. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
  1245. optional
  1246. The gradient of the kernel k(X, X) with respect to the log of the
  1247. hyperparameter of the kernel. Only returned when `eval_gradient`
  1248. is True.
  1249. """
  1250. X = np.atleast_2d(X)
  1251. length_scale = _check_length_scale(X, self.length_scale)
  1252. if Y is None:
  1253. dists = pdist(X / length_scale, metric="sqeuclidean")
  1254. K = np.exp(-0.5 * dists)
  1255. # convert from upper-triangular matrix to square matrix
  1256. K = squareform(K)
  1257. np.fill_diagonal(K, 1)
  1258. else:
  1259. if eval_gradient:
  1260. raise ValueError("Gradient can only be evaluated when Y is None.")
  1261. dists = cdist(X / length_scale, Y / length_scale, metric="sqeuclidean")
  1262. K = np.exp(-0.5 * dists)
  1263. if eval_gradient:
  1264. if self.hyperparameter_length_scale.fixed:
  1265. # Hyperparameter l kept fixed
  1266. return K, np.empty((X.shape[0], X.shape[0], 0))
  1267. elif not self.anisotropic or length_scale.shape[0] == 1:
  1268. K_gradient = (K * squareform(dists))[:, :, np.newaxis]
  1269. return K, K_gradient
  1270. elif self.anisotropic:
  1271. # We need to recompute the pairwise dimension-wise distances
  1272. K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (
  1273. length_scale**2
  1274. )
  1275. K_gradient *= K[..., np.newaxis]
  1276. return K, K_gradient
  1277. else:
  1278. return K
  1279. def __repr__(self):
  1280. if self.anisotropic:
  1281. return "{0}(length_scale=[{1}])".format(
  1282. self.__class__.__name__,
  1283. ", ".join(map("{0:.3g}".format, self.length_scale)),
  1284. )
  1285. else: # isotropic
  1286. return "{0}(length_scale={1:.3g})".format(
  1287. self.__class__.__name__, np.ravel(self.length_scale)[0]
  1288. )
  1289. class Matern(RBF):
  1290. """Matern kernel.
  1291. The class of Matern kernels is a generalization of the :class:`RBF`.
  1292. It has an additional parameter :math:`\\nu` which controls the
  1293. smoothness of the resulting function. The smaller :math:`\\nu`,
  1294. the less smooth the approximated function is.
  1295. As :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to
  1296. the :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Matérn kernel
  1297. becomes identical to the absolute exponential kernel.
  1298. Important intermediate values are
  1299. :math:`\\nu=1.5` (once differentiable functions)
  1300. and :math:`\\nu=2.5` (twice differentiable functions).
  1301. The kernel is given by:
  1302. .. math::
  1303. k(x_i, x_j) = \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(
  1304. \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )
  1305. \\Bigg)^\\nu K_\\nu\\Bigg(
  1306. \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)
  1307. where :math:`d(\\cdot,\\cdot)` is the Euclidean distance,
  1308. :math:`K_{\\nu}(\\cdot)` is a modified Bessel function and
  1309. :math:`\\Gamma(\\cdot)` is the gamma function.
  1310. See [1]_, Chapter 4, Section 4.2, for details regarding the different
  1311. variants of the Matern kernel.
  1312. Read more in the :ref:`User Guide <gp_kernels>`.
  1313. .. versionadded:: 0.18
  1314. Parameters
  1315. ----------
  1316. length_scale : float or ndarray of shape (n_features,), default=1.0
  1317. The length scale of the kernel. If a float, an isotropic kernel is
  1318. used. If an array, an anisotropic kernel is used where each dimension
  1319. of l defines the length-scale of the respective feature dimension.
  1320. length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1321. The lower and upper bound on 'length_scale'.
  1322. If set to "fixed", 'length_scale' cannot be changed during
  1323. hyperparameter tuning.
  1324. nu : float, default=1.5
  1325. The parameter nu controlling the smoothness of the learned function.
  1326. The smaller nu, the less smooth the approximated function is.
  1327. For nu=inf, the kernel becomes equivalent to the RBF kernel and for
  1328. nu=0.5 to the absolute exponential kernel. Important intermediate
  1329. values are nu=1.5 (once differentiable functions) and nu=2.5
  1330. (twice differentiable functions). Note that values of nu not in
  1331. [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost
  1332. (appr. 10 times higher) since they require to evaluate the modified
  1333. Bessel function. Furthermore, in contrast to l, nu is kept fixed to
  1334. its initial value and not optimized.
  1335. References
  1336. ----------
  1337. .. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).
  1338. "Gaussian Processes for Machine Learning". The MIT Press.
  1339. <http://www.gaussianprocess.org/gpml/>`_
  1340. Examples
  1341. --------
  1342. >>> from sklearn.datasets import load_iris
  1343. >>> from sklearn.gaussian_process import GaussianProcessClassifier
  1344. >>> from sklearn.gaussian_process.kernels import Matern
  1345. >>> X, y = load_iris(return_X_y=True)
  1346. >>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)
  1347. >>> gpc = GaussianProcessClassifier(kernel=kernel,
  1348. ... random_state=0).fit(X, y)
  1349. >>> gpc.score(X, y)
  1350. 0.9866...
  1351. >>> gpc.predict_proba(X[:2,:])
  1352. array([[0.8513..., 0.0368..., 0.1117...],
  1353. [0.8086..., 0.0693..., 0.1220...]])
  1354. """
  1355. def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5):
  1356. super().__init__(length_scale, length_scale_bounds)
  1357. self.nu = nu
  1358. def __call__(self, X, Y=None, eval_gradient=False):
  1359. """Return the kernel k(X, Y) and optionally its gradient.
  1360. Parameters
  1361. ----------
  1362. X : ndarray of shape (n_samples_X, n_features)
  1363. Left argument of the returned kernel k(X, Y)
  1364. Y : ndarray of shape (n_samples_Y, n_features), default=None
  1365. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  1366. if evaluated instead.
  1367. eval_gradient : bool, default=False
  1368. Determines whether the gradient with respect to the log of
  1369. the kernel hyperparameter is computed.
  1370. Only supported when Y is None.
  1371. Returns
  1372. -------
  1373. K : ndarray of shape (n_samples_X, n_samples_Y)
  1374. Kernel k(X, Y)
  1375. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
  1376. optional
  1377. The gradient of the kernel k(X, X) with respect to the log of the
  1378. hyperparameter of the kernel. Only returned when `eval_gradient`
  1379. is True.
  1380. """
  1381. X = np.atleast_2d(X)
  1382. length_scale = _check_length_scale(X, self.length_scale)
  1383. if Y is None:
  1384. dists = pdist(X / length_scale, metric="euclidean")
  1385. else:
  1386. if eval_gradient:
  1387. raise ValueError("Gradient can only be evaluated when Y is None.")
  1388. dists = cdist(X / length_scale, Y / length_scale, metric="euclidean")
  1389. if self.nu == 0.5:
  1390. K = np.exp(-dists)
  1391. elif self.nu == 1.5:
  1392. K = dists * math.sqrt(3)
  1393. K = (1.0 + K) * np.exp(-K)
  1394. elif self.nu == 2.5:
  1395. K = dists * math.sqrt(5)
  1396. K = (1.0 + K + K**2 / 3.0) * np.exp(-K)
  1397. elif self.nu == np.inf:
  1398. K = np.exp(-(dists**2) / 2.0)
  1399. else: # general case; expensive to evaluate
  1400. K = dists
  1401. K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
  1402. tmp = math.sqrt(2 * self.nu) * K
  1403. K.fill((2 ** (1.0 - self.nu)) / gamma(self.nu))
  1404. K *= tmp**self.nu
  1405. K *= kv(self.nu, tmp)
  1406. if Y is None:
  1407. # convert from upper-triangular matrix to square matrix
  1408. K = squareform(K)
  1409. np.fill_diagonal(K, 1)
  1410. if eval_gradient:
  1411. if self.hyperparameter_length_scale.fixed:
  1412. # Hyperparameter l kept fixed
  1413. K_gradient = np.empty((X.shape[0], X.shape[0], 0))
  1414. return K, K_gradient
  1415. # We need to recompute the pairwise dimension-wise distances
  1416. if self.anisotropic:
  1417. D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (
  1418. length_scale**2
  1419. )
  1420. else:
  1421. D = squareform(dists**2)[:, :, np.newaxis]
  1422. if self.nu == 0.5:
  1423. denominator = np.sqrt(D.sum(axis=2))[:, :, np.newaxis]
  1424. divide_result = np.zeros_like(D)
  1425. np.divide(
  1426. D,
  1427. denominator,
  1428. out=divide_result,
  1429. where=denominator != 0,
  1430. )
  1431. K_gradient = K[..., np.newaxis] * divide_result
  1432. elif self.nu == 1.5:
  1433. K_gradient = 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]
  1434. elif self.nu == 2.5:
  1435. tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]
  1436. K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)
  1437. elif self.nu == np.inf:
  1438. K_gradient = D * K[..., np.newaxis]
  1439. else:
  1440. # approximate gradient numerically
  1441. def f(theta): # helper function
  1442. return self.clone_with_theta(theta)(X, Y)
  1443. return K, _approx_fprime(self.theta, f, 1e-10)
  1444. if not self.anisotropic:
  1445. return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]
  1446. else:
  1447. return K, K_gradient
  1448. else:
  1449. return K
  1450. def __repr__(self):
  1451. if self.anisotropic:
  1452. return "{0}(length_scale=[{1}], nu={2:.3g})".format(
  1453. self.__class__.__name__,
  1454. ", ".join(map("{0:.3g}".format, self.length_scale)),
  1455. self.nu,
  1456. )
  1457. else:
  1458. return "{0}(length_scale={1:.3g}, nu={2:.3g})".format(
  1459. self.__class__.__name__, np.ravel(self.length_scale)[0], self.nu
  1460. )
  1461. class RationalQuadratic(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
  1462. """Rational Quadratic kernel.
  1463. The RationalQuadratic kernel can be seen as a scale mixture (an infinite
  1464. sum) of RBF kernels with different characteristic length scales. It is
  1465. parameterized by a length scale parameter :math:`l>0` and a scale
  1466. mixture parameter :math:`\\alpha>0`. Only the isotropic variant
  1467. where length_scale :math:`l` is a scalar is supported at the moment.
  1468. The kernel is given by:
  1469. .. math::
  1470. k(x_i, x_j) = \\left(
  1471. 1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha l^2}\\right)^{-\\alpha}
  1472. where :math:`\\alpha` is the scale mixture parameter, :math:`l` is
  1473. the length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the
  1474. Euclidean distance.
  1475. For advice on how to set the parameters, see e.g. [1]_.
  1476. Read more in the :ref:`User Guide <gp_kernels>`.
  1477. .. versionadded:: 0.18
  1478. Parameters
  1479. ----------
  1480. length_scale : float > 0, default=1.0
  1481. The length scale of the kernel.
  1482. alpha : float > 0, default=1.0
  1483. Scale mixture parameter
  1484. length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1485. The lower and upper bound on 'length_scale'.
  1486. If set to "fixed", 'length_scale' cannot be changed during
  1487. hyperparameter tuning.
  1488. alpha_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1489. The lower and upper bound on 'alpha'.
  1490. If set to "fixed", 'alpha' cannot be changed during
  1491. hyperparameter tuning.
  1492. References
  1493. ----------
  1494. .. [1] `David Duvenaud (2014). "The Kernel Cookbook:
  1495. Advice on Covariance functions".
  1496. <https://www.cs.toronto.edu/~duvenaud/cookbook/>`_
  1497. Examples
  1498. --------
  1499. >>> from sklearn.datasets import load_iris
  1500. >>> from sklearn.gaussian_process import GaussianProcessClassifier
  1501. >>> from sklearn.gaussian_process.kernels import RationalQuadratic
  1502. >>> X, y = load_iris(return_X_y=True)
  1503. >>> kernel = RationalQuadratic(length_scale=1.0, alpha=1.5)
  1504. >>> gpc = GaussianProcessClassifier(kernel=kernel,
  1505. ... random_state=0).fit(X, y)
  1506. >>> gpc.score(X, y)
  1507. 0.9733...
  1508. >>> gpc.predict_proba(X[:2,:])
  1509. array([[0.8881..., 0.0566..., 0.05518...],
  1510. [0.8678..., 0.0707... , 0.0614...]])
  1511. """
  1512. def __init__(
  1513. self,
  1514. length_scale=1.0,
  1515. alpha=1.0,
  1516. length_scale_bounds=(1e-5, 1e5),
  1517. alpha_bounds=(1e-5, 1e5),
  1518. ):
  1519. self.length_scale = length_scale
  1520. self.alpha = alpha
  1521. self.length_scale_bounds = length_scale_bounds
  1522. self.alpha_bounds = alpha_bounds
  1523. @property
  1524. def hyperparameter_length_scale(self):
  1525. return Hyperparameter("length_scale", "numeric", self.length_scale_bounds)
  1526. @property
  1527. def hyperparameter_alpha(self):
  1528. return Hyperparameter("alpha", "numeric", self.alpha_bounds)
  1529. def __call__(self, X, Y=None, eval_gradient=False):
  1530. """Return the kernel k(X, Y) and optionally its gradient.
  1531. Parameters
  1532. ----------
  1533. X : ndarray of shape (n_samples_X, n_features)
  1534. Left argument of the returned kernel k(X, Y)
  1535. Y : ndarray of shape (n_samples_Y, n_features), default=None
  1536. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  1537. if evaluated instead.
  1538. eval_gradient : bool, default=False
  1539. Determines whether the gradient with respect to the log of
  1540. the kernel hyperparameter is computed.
  1541. Only supported when Y is None.
  1542. Returns
  1543. -------
  1544. K : ndarray of shape (n_samples_X, n_samples_Y)
  1545. Kernel k(X, Y)
  1546. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims)
  1547. The gradient of the kernel k(X, X) with respect to the log of the
  1548. hyperparameter of the kernel. Only returned when eval_gradient
  1549. is True.
  1550. """
  1551. if len(np.atleast_1d(self.length_scale)) > 1:
  1552. raise AttributeError(
  1553. "RationalQuadratic kernel only supports isotropic version, "
  1554. "please use a single scalar for length_scale"
  1555. )
  1556. X = np.atleast_2d(X)
  1557. if Y is None:
  1558. dists = squareform(pdist(X, metric="sqeuclidean"))
  1559. tmp = dists / (2 * self.alpha * self.length_scale**2)
  1560. base = 1 + tmp
  1561. K = base**-self.alpha
  1562. np.fill_diagonal(K, 1)
  1563. else:
  1564. if eval_gradient:
  1565. raise ValueError("Gradient can only be evaluated when Y is None.")
  1566. dists = cdist(X, Y, metric="sqeuclidean")
  1567. K = (1 + dists / (2 * self.alpha * self.length_scale**2)) ** -self.alpha
  1568. if eval_gradient:
  1569. # gradient with respect to length_scale
  1570. if not self.hyperparameter_length_scale.fixed:
  1571. length_scale_gradient = dists * K / (self.length_scale**2 * base)
  1572. length_scale_gradient = length_scale_gradient[:, :, np.newaxis]
  1573. else: # l is kept fixed
  1574. length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
  1575. # gradient with respect to alpha
  1576. if not self.hyperparameter_alpha.fixed:
  1577. alpha_gradient = K * (
  1578. -self.alpha * np.log(base)
  1579. + dists / (2 * self.length_scale**2 * base)
  1580. )
  1581. alpha_gradient = alpha_gradient[:, :, np.newaxis]
  1582. else: # alpha is kept fixed
  1583. alpha_gradient = np.empty((K.shape[0], K.shape[1], 0))
  1584. return K, np.dstack((alpha_gradient, length_scale_gradient))
  1585. else:
  1586. return K
  1587. def __repr__(self):
  1588. return "{0}(alpha={1:.3g}, length_scale={2:.3g})".format(
  1589. self.__class__.__name__, self.alpha, self.length_scale
  1590. )
  1591. class ExpSineSquared(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
  1592. r"""Exp-Sine-Squared kernel (aka periodic kernel).
  1593. The ExpSineSquared kernel allows one to model functions which repeat
  1594. themselves exactly. It is parameterized by a length scale
  1595. parameter :math:`l>0` and a periodicity parameter :math:`p>0`.
  1596. Only the isotropic variant where :math:`l` is a scalar is
  1597. supported at the moment. The kernel is given by:
  1598. .. math::
  1599. k(x_i, x_j) = \text{exp}\left(-
  1600. \frac{ 2\sin^2(\pi d(x_i, x_j)/p) }{ l^ 2} \right)
  1601. where :math:`l` is the length scale of the kernel, :math:`p` the
  1602. periodicity of the kernel and :math:`d(\\cdot,\\cdot)` is the
  1603. Euclidean distance.
  1604. Read more in the :ref:`User Guide <gp_kernels>`.
  1605. .. versionadded:: 0.18
  1606. Parameters
  1607. ----------
  1608. length_scale : float > 0, default=1.0
  1609. The length scale of the kernel.
  1610. periodicity : float > 0, default=1.0
  1611. The periodicity of the kernel.
  1612. length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1613. The lower and upper bound on 'length_scale'.
  1614. If set to "fixed", 'length_scale' cannot be changed during
  1615. hyperparameter tuning.
  1616. periodicity_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1617. The lower and upper bound on 'periodicity'.
  1618. If set to "fixed", 'periodicity' cannot be changed during
  1619. hyperparameter tuning.
  1620. Examples
  1621. --------
  1622. >>> from sklearn.datasets import make_friedman2
  1623. >>> from sklearn.gaussian_process import GaussianProcessRegressor
  1624. >>> from sklearn.gaussian_process.kernels import ExpSineSquared
  1625. >>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)
  1626. >>> kernel = ExpSineSquared(length_scale=1, periodicity=1)
  1627. >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
  1628. ... random_state=0).fit(X, y)
  1629. >>> gpr.score(X, y)
  1630. 0.0144...
  1631. >>> gpr.predict(X[:2,:], return_std=True)
  1632. (array([425.6..., 457.5...]), array([0.3894..., 0.3467...]))
  1633. """
  1634. def __init__(
  1635. self,
  1636. length_scale=1.0,
  1637. periodicity=1.0,
  1638. length_scale_bounds=(1e-5, 1e5),
  1639. periodicity_bounds=(1e-5, 1e5),
  1640. ):
  1641. self.length_scale = length_scale
  1642. self.periodicity = periodicity
  1643. self.length_scale_bounds = length_scale_bounds
  1644. self.periodicity_bounds = periodicity_bounds
  1645. @property
  1646. def hyperparameter_length_scale(self):
  1647. """Returns the length scale"""
  1648. return Hyperparameter("length_scale", "numeric", self.length_scale_bounds)
  1649. @property
  1650. def hyperparameter_periodicity(self):
  1651. return Hyperparameter("periodicity", "numeric", self.periodicity_bounds)
  1652. def __call__(self, X, Y=None, eval_gradient=False):
  1653. """Return the kernel k(X, Y) and optionally its gradient.
  1654. Parameters
  1655. ----------
  1656. X : ndarray of shape (n_samples_X, n_features)
  1657. Left argument of the returned kernel k(X, Y)
  1658. Y : ndarray of shape (n_samples_Y, n_features), default=None
  1659. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  1660. if evaluated instead.
  1661. eval_gradient : bool, default=False
  1662. Determines whether the gradient with respect to the log of
  1663. the kernel hyperparameter is computed.
  1664. Only supported when Y is None.
  1665. Returns
  1666. -------
  1667. K : ndarray of shape (n_samples_X, n_samples_Y)
  1668. Kernel k(X, Y)
  1669. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
  1670. optional
  1671. The gradient of the kernel k(X, X) with respect to the log of the
  1672. hyperparameter of the kernel. Only returned when `eval_gradient`
  1673. is True.
  1674. """
  1675. X = np.atleast_2d(X)
  1676. if Y is None:
  1677. dists = squareform(pdist(X, metric="euclidean"))
  1678. arg = np.pi * dists / self.periodicity
  1679. sin_of_arg = np.sin(arg)
  1680. K = np.exp(-2 * (sin_of_arg / self.length_scale) ** 2)
  1681. else:
  1682. if eval_gradient:
  1683. raise ValueError("Gradient can only be evaluated when Y is None.")
  1684. dists = cdist(X, Y, metric="euclidean")
  1685. K = np.exp(
  1686. -2 * (np.sin(np.pi / self.periodicity * dists) / self.length_scale) ** 2
  1687. )
  1688. if eval_gradient:
  1689. cos_of_arg = np.cos(arg)
  1690. # gradient with respect to length_scale
  1691. if not self.hyperparameter_length_scale.fixed:
  1692. length_scale_gradient = 4 / self.length_scale**2 * sin_of_arg**2 * K
  1693. length_scale_gradient = length_scale_gradient[:, :, np.newaxis]
  1694. else: # length_scale is kept fixed
  1695. length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
  1696. # gradient with respect to p
  1697. if not self.hyperparameter_periodicity.fixed:
  1698. periodicity_gradient = (
  1699. 4 * arg / self.length_scale**2 * cos_of_arg * sin_of_arg * K
  1700. )
  1701. periodicity_gradient = periodicity_gradient[:, :, np.newaxis]
  1702. else: # p is kept fixed
  1703. periodicity_gradient = np.empty((K.shape[0], K.shape[1], 0))
  1704. return K, np.dstack((length_scale_gradient, periodicity_gradient))
  1705. else:
  1706. return K
  1707. def __repr__(self):
  1708. return "{0}(length_scale={1:.3g}, periodicity={2:.3g})".format(
  1709. self.__class__.__name__, self.length_scale, self.periodicity
  1710. )
  1711. class DotProduct(Kernel):
  1712. r"""Dot-Product kernel.
  1713. The DotProduct kernel is non-stationary and can be obtained from linear
  1714. regression by putting :math:`N(0, 1)` priors on the coefficients
  1715. of :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \sigma_0^2)`
  1716. on the bias. The DotProduct kernel is invariant to a rotation of
  1717. the coordinates about the origin, but not translations.
  1718. It is parameterized by a parameter sigma_0 :math:`\sigma`
  1719. which controls the inhomogenity of the kernel. For :math:`\sigma_0^2 =0`,
  1720. the kernel is called the homogeneous linear kernel, otherwise
  1721. it is inhomogeneous. The kernel is given by
  1722. .. math::
  1723. k(x_i, x_j) = \sigma_0 ^ 2 + x_i \cdot x_j
  1724. The DotProduct kernel is commonly combined with exponentiation.
  1725. See [1]_, Chapter 4, Section 4.2, for further details regarding the
  1726. DotProduct kernel.
  1727. Read more in the :ref:`User Guide <gp_kernels>`.
  1728. .. versionadded:: 0.18
  1729. Parameters
  1730. ----------
  1731. sigma_0 : float >= 0, default=1.0
  1732. Parameter controlling the inhomogenity of the kernel. If sigma_0=0,
  1733. the kernel is homogeneous.
  1734. sigma_0_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1735. The lower and upper bound on 'sigma_0'.
  1736. If set to "fixed", 'sigma_0' cannot be changed during
  1737. hyperparameter tuning.
  1738. References
  1739. ----------
  1740. .. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).
  1741. "Gaussian Processes for Machine Learning". The MIT Press.
  1742. <http://www.gaussianprocess.org/gpml/>`_
  1743. Examples
  1744. --------
  1745. >>> from sklearn.datasets import make_friedman2
  1746. >>> from sklearn.gaussian_process import GaussianProcessRegressor
  1747. >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
  1748. >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
  1749. >>> kernel = DotProduct() + WhiteKernel()
  1750. >>> gpr = GaussianProcessRegressor(kernel=kernel,
  1751. ... random_state=0).fit(X, y)
  1752. >>> gpr.score(X, y)
  1753. 0.3680...
  1754. >>> gpr.predict(X[:2,:], return_std=True)
  1755. (array([653.0..., 592.1...]), array([316.6..., 316.6...]))
  1756. """
  1757. def __init__(self, sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)):
  1758. self.sigma_0 = sigma_0
  1759. self.sigma_0_bounds = sigma_0_bounds
  1760. @property
  1761. def hyperparameter_sigma_0(self):
  1762. return Hyperparameter("sigma_0", "numeric", self.sigma_0_bounds)
  1763. def __call__(self, X, Y=None, eval_gradient=False):
  1764. """Return the kernel k(X, Y) and optionally its gradient.
  1765. Parameters
  1766. ----------
  1767. X : ndarray of shape (n_samples_X, n_features)
  1768. Left argument of the returned kernel k(X, Y)
  1769. Y : ndarray of shape (n_samples_Y, n_features), default=None
  1770. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  1771. if evaluated instead.
  1772. eval_gradient : bool, default=False
  1773. Determines whether the gradient with respect to the log of
  1774. the kernel hyperparameter is computed.
  1775. Only supported when Y is None.
  1776. Returns
  1777. -------
  1778. K : ndarray of shape (n_samples_X, n_samples_Y)
  1779. Kernel k(X, Y)
  1780. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
  1781. optional
  1782. The gradient of the kernel k(X, X) with respect to the log of the
  1783. hyperparameter of the kernel. Only returned when `eval_gradient`
  1784. is True.
  1785. """
  1786. X = np.atleast_2d(X)
  1787. if Y is None:
  1788. K = np.inner(X, X) + self.sigma_0**2
  1789. else:
  1790. if eval_gradient:
  1791. raise ValueError("Gradient can only be evaluated when Y is None.")
  1792. K = np.inner(X, Y) + self.sigma_0**2
  1793. if eval_gradient:
  1794. if not self.hyperparameter_sigma_0.fixed:
  1795. K_gradient = np.empty((K.shape[0], K.shape[1], 1))
  1796. K_gradient[..., 0] = 2 * self.sigma_0**2
  1797. return K, K_gradient
  1798. else:
  1799. return K, np.empty((X.shape[0], X.shape[0], 0))
  1800. else:
  1801. return K
  1802. def diag(self, X):
  1803. """Returns the diagonal of the kernel k(X, X).
  1804. The result of this method is identical to np.diag(self(X)); however,
  1805. it can be evaluated more efficiently since only the diagonal is
  1806. evaluated.
  1807. Parameters
  1808. ----------
  1809. X : ndarray of shape (n_samples_X, n_features)
  1810. Left argument of the returned kernel k(X, Y).
  1811. Returns
  1812. -------
  1813. K_diag : ndarray of shape (n_samples_X,)
  1814. Diagonal of kernel k(X, X).
  1815. """
  1816. return np.einsum("ij,ij->i", X, X) + self.sigma_0**2
  1817. def is_stationary(self):
  1818. """Returns whether the kernel is stationary."""
  1819. return False
  1820. def __repr__(self):
  1821. return "{0}(sigma_0={1:.3g})".format(self.__class__.__name__, self.sigma_0)
  1822. # adapted from scipy/optimize/optimize.py for functions with 2d output
  1823. def _approx_fprime(xk, f, epsilon, args=()):
  1824. f0 = f(*((xk,) + args))
  1825. grad = np.zeros((f0.shape[0], f0.shape[1], len(xk)), float)
  1826. ei = np.zeros((len(xk),), float)
  1827. for k in range(len(xk)):
  1828. ei[k] = 1.0
  1829. d = epsilon * ei
  1830. grad[:, :, k] = (f(*((xk + d,) + args)) - f0) / d[k]
  1831. ei[k] = 0.0
  1832. return grad
  1833. class PairwiseKernel(Kernel):
  1834. """Wrapper for kernels in sklearn.metrics.pairwise.
  1835. A thin wrapper around the functionality of the kernels in
  1836. sklearn.metrics.pairwise.
  1837. Note: Evaluation of eval_gradient is not analytic but numeric and all
  1838. kernels support only isotropic distances. The parameter gamma is
  1839. considered to be a hyperparameter and may be optimized. The other
  1840. kernel parameters are set directly at initialization and are kept
  1841. fixed.
  1842. .. versionadded:: 0.18
  1843. Parameters
  1844. ----------
  1845. gamma : float, default=1.0
  1846. Parameter gamma of the pairwise kernel specified by metric. It should
  1847. be positive.
  1848. gamma_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
  1849. The lower and upper bound on 'gamma'.
  1850. If set to "fixed", 'gamma' cannot be changed during
  1851. hyperparameter tuning.
  1852. metric : {"linear", "additive_chi2", "chi2", "poly", "polynomial", \
  1853. "rbf", "laplacian", "sigmoid", "cosine"} or callable, \
  1854. default="linear"
  1855. The metric to use when calculating kernel between instances in a
  1856. feature array. If metric is a string, it must be one of the metrics
  1857. in pairwise.PAIRWISE_KERNEL_FUNCTIONS.
  1858. If metric is "precomputed", X is assumed to be a kernel matrix.
  1859. Alternatively, if metric is a callable function, it is called on each
  1860. pair of instances (rows) and the resulting value recorded. The callable
  1861. should take two arrays from X as input and return a value indicating
  1862. the distance between them.
  1863. pairwise_kernels_kwargs : dict, default=None
  1864. All entries of this dict (if any) are passed as keyword arguments to
  1865. the pairwise kernel function.
  1866. Examples
  1867. --------
  1868. >>> from sklearn.datasets import load_iris
  1869. >>> from sklearn.gaussian_process import GaussianProcessClassifier
  1870. >>> from sklearn.gaussian_process.kernels import PairwiseKernel
  1871. >>> X, y = load_iris(return_X_y=True)
  1872. >>> kernel = PairwiseKernel(metric='rbf')
  1873. >>> gpc = GaussianProcessClassifier(kernel=kernel,
  1874. ... random_state=0).fit(X, y)
  1875. >>> gpc.score(X, y)
  1876. 0.9733...
  1877. >>> gpc.predict_proba(X[:2,:])
  1878. array([[0.8880..., 0.05663..., 0.05532...],
  1879. [0.8676..., 0.07073..., 0.06165...]])
  1880. """
  1881. def __init__(
  1882. self,
  1883. gamma=1.0,
  1884. gamma_bounds=(1e-5, 1e5),
  1885. metric="linear",
  1886. pairwise_kernels_kwargs=None,
  1887. ):
  1888. self.gamma = gamma
  1889. self.gamma_bounds = gamma_bounds
  1890. self.metric = metric
  1891. self.pairwise_kernels_kwargs = pairwise_kernels_kwargs
  1892. @property
  1893. def hyperparameter_gamma(self):
  1894. return Hyperparameter("gamma", "numeric", self.gamma_bounds)
  1895. def __call__(self, X, Y=None, eval_gradient=False):
  1896. """Return the kernel k(X, Y) and optionally its gradient.
  1897. Parameters
  1898. ----------
  1899. X : ndarray of shape (n_samples_X, n_features)
  1900. Left argument of the returned kernel k(X, Y)
  1901. Y : ndarray of shape (n_samples_Y, n_features), default=None
  1902. Right argument of the returned kernel k(X, Y). If None, k(X, X)
  1903. if evaluated instead.
  1904. eval_gradient : bool, default=False
  1905. Determines whether the gradient with respect to the log of
  1906. the kernel hyperparameter is computed.
  1907. Only supported when Y is None.
  1908. Returns
  1909. -------
  1910. K : ndarray of shape (n_samples_X, n_samples_Y)
  1911. Kernel k(X, Y)
  1912. K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
  1913. optional
  1914. The gradient of the kernel k(X, X) with respect to the log of the
  1915. hyperparameter of the kernel. Only returned when `eval_gradient`
  1916. is True.
  1917. """
  1918. pairwise_kernels_kwargs = self.pairwise_kernels_kwargs
  1919. if self.pairwise_kernels_kwargs is None:
  1920. pairwise_kernels_kwargs = {}
  1921. X = np.atleast_2d(X)
  1922. K = pairwise_kernels(
  1923. X,
  1924. Y,
  1925. metric=self.metric,
  1926. gamma=self.gamma,
  1927. filter_params=True,
  1928. **pairwise_kernels_kwargs,
  1929. )
  1930. if eval_gradient:
  1931. if self.hyperparameter_gamma.fixed:
  1932. return K, np.empty((X.shape[0], X.shape[0], 0))
  1933. else:
  1934. # approximate gradient numerically
  1935. def f(gamma): # helper function
  1936. return pairwise_kernels(
  1937. X,
  1938. Y,
  1939. metric=self.metric,
  1940. gamma=np.exp(gamma),
  1941. filter_params=True,
  1942. **pairwise_kernels_kwargs,
  1943. )
  1944. return K, _approx_fprime(self.theta, f, 1e-10)
  1945. else:
  1946. return K
  1947. def diag(self, X):
  1948. """Returns the diagonal of the kernel k(X, X).
  1949. The result of this method is identical to np.diag(self(X)); however,
  1950. it can be evaluated more efficiently since only the diagonal is
  1951. evaluated.
  1952. Parameters
  1953. ----------
  1954. X : ndarray of shape (n_samples_X, n_features)
  1955. Left argument of the returned kernel k(X, Y)
  1956. Returns
  1957. -------
  1958. K_diag : ndarray of shape (n_samples_X,)
  1959. Diagonal of kernel k(X, X)
  1960. """
  1961. # We have to fall back to slow way of computing diagonal
  1962. return np.apply_along_axis(self, 1, X).ravel()
  1963. def is_stationary(self):
  1964. """Returns whether the kernel is stationary."""
  1965. return self.metric in ["rbf"]
  1966. def __repr__(self):
  1967. return "{0}(gamma={1}, metric={2})".format(
  1968. self.__class__.__name__, self.gamma, self.metric
  1969. )