| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389 |
- """Kernels for Gaussian process regression and classification.
- The kernels in this module allow kernel-engineering, i.e., they can be
- combined via the "+" and "*" operators or be exponentiated with a scalar
- via "**". These sum and product expressions can also contain scalar values,
- which are automatically converted to a constant kernel.
- All kernels allow (analytic) gradient-based hyperparameter optimization.
- The space of hyperparameters can be specified by giving lower und upper
- boundaries for the value of each hyperparameter (the search space is thus
- rectangular). Instead of specifying bounds, hyperparameters can also be
- declared to be "fixed", which causes these hyperparameters to be excluded from
- optimization.
- """
- # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
- # License: BSD 3 clause
- # Note: this module is strongly inspired by the kernel module of the george
- # package.
- import math
- import warnings
- from abc import ABCMeta, abstractmethod
- from collections import namedtuple
- from inspect import signature
- import numpy as np
- from scipy.spatial.distance import cdist, pdist, squareform
- from scipy.special import gamma, kv
- from ..base import clone
- from ..exceptions import ConvergenceWarning
- from ..metrics.pairwise import pairwise_kernels
- from ..utils.validation import _num_samples
- def _check_length_scale(X, length_scale):
- length_scale = np.squeeze(length_scale).astype(float)
- if np.ndim(length_scale) > 1:
- raise ValueError("length_scale cannot be of dimension greater than 1")
- if np.ndim(length_scale) == 1 and X.shape[1] != length_scale.shape[0]:
- raise ValueError(
- "Anisotropic kernel must have the same number of "
- "dimensions as data (%d!=%d)" % (length_scale.shape[0], X.shape[1])
- )
- return length_scale
- class Hyperparameter(
- namedtuple(
- "Hyperparameter", ("name", "value_type", "bounds", "n_elements", "fixed")
- )
- ):
- """A kernel hyperparameter's specification in form of a namedtuple.
- .. versionadded:: 0.18
- Attributes
- ----------
- name : str
- The name of the hyperparameter. Note that a kernel using a
- hyperparameter with name "x" must have the attributes self.x and
- self.x_bounds
- value_type : str
- The type of the hyperparameter. Currently, only "numeric"
- hyperparameters are supported.
- bounds : pair of floats >= 0 or "fixed"
- The lower and upper bound on the parameter. If n_elements>1, a pair
- of 1d array with n_elements each may be given alternatively. If
- the string "fixed" is passed as bounds, the hyperparameter's value
- cannot be changed.
- n_elements : int, default=1
- The number of elements of the hyperparameter value. Defaults to 1,
- which corresponds to a scalar hyperparameter. n_elements > 1
- corresponds to a hyperparameter which is vector-valued,
- such as, e.g., anisotropic length-scales.
- fixed : bool, default=None
- Whether the value of this hyperparameter is fixed, i.e., cannot be
- changed during hyperparameter tuning. If None is passed, the "fixed" is
- derived based on the given bounds.
- Examples
- --------
- >>> from sklearn.gaussian_process.kernels import ConstantKernel
- >>> from sklearn.datasets import make_friedman2
- >>> from sklearn.gaussian_process import GaussianProcessRegressor
- >>> from sklearn.gaussian_process.kernels import Hyperparameter
- >>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)
- >>> kernel = ConstantKernel(constant_value=1.0,
- ... constant_value_bounds=(0.0, 10.0))
- We can access each hyperparameter:
- >>> for hyperparameter in kernel.hyperparameters:
- ... print(hyperparameter)
- Hyperparameter(name='constant_value', value_type='numeric',
- bounds=array([[ 0., 10.]]), n_elements=1, fixed=False)
- >>> params = kernel.get_params()
- >>> for key in sorted(params): print(f"{key} : {params[key]}")
- constant_value : 1.0
- constant_value_bounds : (0.0, 10.0)
- """
- # A raw namedtuple is very memory efficient as it packs the attributes
- # in a struct to get rid of the __dict__ of attributes in particular it
- # does not copy the string for the keys on each instance.
- # By deriving a namedtuple class just to introduce the __init__ method we
- # would also reintroduce the __dict__ on the instance. By telling the
- # Python interpreter that this subclass uses static __slots__ instead of
- # dynamic attributes. Furthermore we don't need any additional slot in the
- # subclass so we set __slots__ to the empty tuple.
- __slots__ = ()
- def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):
- if not isinstance(bounds, str) or bounds != "fixed":
- bounds = np.atleast_2d(bounds)
- if n_elements > 1: # vector-valued parameter
- if bounds.shape[0] == 1:
- bounds = np.repeat(bounds, n_elements, 0)
- elif bounds.shape[0] != n_elements:
- raise ValueError(
- "Bounds on %s should have either 1 or "
- "%d dimensions. Given are %d"
- % (name, n_elements, bounds.shape[0])
- )
- if fixed is None:
- fixed = isinstance(bounds, str) and bounds == "fixed"
- return super(Hyperparameter, cls).__new__(
- cls, name, value_type, bounds, n_elements, fixed
- )
- # This is mainly a testing utility to check that two hyperparameters
- # are equal.
- def __eq__(self, other):
- return (
- self.name == other.name
- and self.value_type == other.value_type
- and np.all(self.bounds == other.bounds)
- and self.n_elements == other.n_elements
- and self.fixed == other.fixed
- )
- class Kernel(metaclass=ABCMeta):
- """Base class for all kernels.
- .. versionadded:: 0.18
- """
- def get_params(self, deep=True):
- """Get parameters of this kernel.
- Parameters
- ----------
- deep : bool, default=True
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
- Returns
- -------
- params : dict
- Parameter names mapped to their values.
- """
- params = dict()
- # introspect the constructor arguments to find the model parameters
- # to represent
- cls = self.__class__
- init = getattr(cls.__init__, "deprecated_original", cls.__init__)
- init_sign = signature(init)
- args, varargs = [], []
- for parameter in init_sign.parameters.values():
- if parameter.kind != parameter.VAR_KEYWORD and parameter.name != "self":
- args.append(parameter.name)
- if parameter.kind == parameter.VAR_POSITIONAL:
- varargs.append(parameter.name)
- if len(varargs) != 0:
- raise RuntimeError(
- "scikit-learn kernels should always "
- "specify their parameters in the signature"
- " of their __init__ (no varargs)."
- " %s doesn't follow this convention." % (cls,)
- )
- for arg in args:
- params[arg] = getattr(self, arg)
- return params
- def set_params(self, **params):
- """Set the parameters of this kernel.
- The method works on simple kernels as well as on nested kernels.
- The latter have parameters of the form ``<component>__<parameter>``
- so that it's possible to update each component of a nested object.
- Returns
- -------
- self
- """
- if not params:
- # Simple optimisation to gain speed (inspect is slow)
- return self
- valid_params = self.get_params(deep=True)
- for key, value in params.items():
- split = key.split("__", 1)
- if len(split) > 1:
- # nested objects case
- name, sub_name = split
- if name not in valid_params:
- raise ValueError(
- "Invalid parameter %s for kernel %s. "
- "Check the list of available parameters "
- "with `kernel.get_params().keys()`." % (name, self)
- )
- sub_object = valid_params[name]
- sub_object.set_params(**{sub_name: value})
- else:
- # simple objects case
- if key not in valid_params:
- raise ValueError(
- "Invalid parameter %s for kernel %s. "
- "Check the list of available parameters "
- "with `kernel.get_params().keys()`."
- % (key, self.__class__.__name__)
- )
- setattr(self, key, value)
- return self
- def clone_with_theta(self, theta):
- """Returns a clone of self with given hyperparameters theta.
- Parameters
- ----------
- theta : ndarray of shape (n_dims,)
- The hyperparameters
- """
- cloned = clone(self)
- cloned.theta = theta
- return cloned
- @property
- def n_dims(self):
- """Returns the number of non-fixed hyperparameters of the kernel."""
- return self.theta.shape[0]
- @property
- def hyperparameters(self):
- """Returns a list of all hyperparameter specifications."""
- r = [
- getattr(self, attr)
- for attr in dir(self)
- if attr.startswith("hyperparameter_")
- ]
- return r
- @property
- def theta(self):
- """Returns the (flattened, log-transformed) non-fixed hyperparameters.
- Note that theta are typically the log-transformed values of the
- kernel's hyperparameters as this representation of the search space
- is more amenable for hyperparameter search, as hyperparameters like
- length-scales naturally live on a log-scale.
- Returns
- -------
- theta : ndarray of shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- theta = []
- params = self.get_params()
- for hyperparameter in self.hyperparameters:
- if not hyperparameter.fixed:
- theta.append(params[hyperparameter.name])
- if len(theta) > 0:
- return np.log(np.hstack(theta))
- else:
- return np.array([])
- @theta.setter
- def theta(self, theta):
- """Sets the (flattened, log-transformed) non-fixed hyperparameters.
- Parameters
- ----------
- theta : ndarray of shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- params = self.get_params()
- i = 0
- for hyperparameter in self.hyperparameters:
- if hyperparameter.fixed:
- continue
- if hyperparameter.n_elements > 1:
- # vector-valued parameter
- params[hyperparameter.name] = np.exp(
- theta[i : i + hyperparameter.n_elements]
- )
- i += hyperparameter.n_elements
- else:
- params[hyperparameter.name] = np.exp(theta[i])
- i += 1
- if i != len(theta):
- raise ValueError(
- "theta has not the correct number of entries."
- " Should be %d; given are %d" % (i, len(theta))
- )
- self.set_params(**params)
- @property
- def bounds(self):
- """Returns the log-transformed bounds on the theta.
- Returns
- -------
- bounds : ndarray of shape (n_dims, 2)
- The log-transformed bounds on the kernel's hyperparameters theta
- """
- bounds = [
- hyperparameter.bounds
- for hyperparameter in self.hyperparameters
- if not hyperparameter.fixed
- ]
- if len(bounds) > 0:
- return np.log(np.vstack(bounds))
- else:
- return np.array([])
- def __add__(self, b):
- if not isinstance(b, Kernel):
- return Sum(self, ConstantKernel(b))
- return Sum(self, b)
- def __radd__(self, b):
- if not isinstance(b, Kernel):
- return Sum(ConstantKernel(b), self)
- return Sum(b, self)
- def __mul__(self, b):
- if not isinstance(b, Kernel):
- return Product(self, ConstantKernel(b))
- return Product(self, b)
- def __rmul__(self, b):
- if not isinstance(b, Kernel):
- return Product(ConstantKernel(b), self)
- return Product(b, self)
- def __pow__(self, b):
- return Exponentiation(self, b)
- def __eq__(self, b):
- if type(self) != type(b):
- return False
- params_a = self.get_params()
- params_b = b.get_params()
- for key in set(list(params_a.keys()) + list(params_b.keys())):
- if np.any(params_a.get(key, None) != params_b.get(key, None)):
- return False
- return True
- def __repr__(self):
- return "{0}({1})".format(
- self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.theta))
- )
- @abstractmethod
- def __call__(self, X, Y=None, eval_gradient=False):
- """Evaluate the kernel."""
- @abstractmethod
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : array-like of shape (n_samples,)
- Left argument of the returned kernel k(X, Y)
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- @abstractmethod
- def is_stationary(self):
- """Returns whether the kernel is stationary."""
- @property
- def requires_vector_input(self):
- """Returns whether the kernel is defined on fixed-length feature
- vectors or generic objects. Defaults to True for backward
- compatibility."""
- return True
- def _check_bounds_params(self):
- """Called after fitting to warn if bounds may have been too tight."""
- list_close = np.isclose(self.bounds, np.atleast_2d(self.theta).T)
- idx = 0
- for hyp in self.hyperparameters:
- if hyp.fixed:
- continue
- for dim in range(hyp.n_elements):
- if list_close[idx, 0]:
- warnings.warn(
- "The optimal value found for "
- "dimension %s of parameter %s is "
- "close to the specified lower "
- "bound %s. Decreasing the bound and"
- " calling fit again may find a "
- "better value." % (dim, hyp.name, hyp.bounds[dim][0]),
- ConvergenceWarning,
- )
- elif list_close[idx, 1]:
- warnings.warn(
- "The optimal value found for "
- "dimension %s of parameter %s is "
- "close to the specified upper "
- "bound %s. Increasing the bound and"
- " calling fit again may find a "
- "better value." % (dim, hyp.name, hyp.bounds[dim][1]),
- ConvergenceWarning,
- )
- idx += 1
- class NormalizedKernelMixin:
- """Mixin for kernels which are normalized: k(X, X)=1.
- .. versionadded:: 0.18
- """
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return np.ones(X.shape[0])
- class StationaryKernelMixin:
- """Mixin for kernels which are stationary: k(X, Y)= f(X-Y).
- .. versionadded:: 0.18
- """
- def is_stationary(self):
- """Returns whether the kernel is stationary."""
- return True
- class GenericKernelMixin:
- """Mixin for kernels which operate on generic objects such as variable-
- length sequences, trees, and graphs.
- .. versionadded:: 0.22
- """
- @property
- def requires_vector_input(self):
- """Whether the kernel works only on fixed-length feature vectors."""
- return False
- class CompoundKernel(Kernel):
- """Kernel which is composed of a set of other kernels.
- .. versionadded:: 0.18
- Parameters
- ----------
- kernels : list of Kernels
- The other kernels
- Examples
- --------
- >>> from sklearn.gaussian_process.kernels import WhiteKernel
- >>> from sklearn.gaussian_process.kernels import RBF
- >>> from sklearn.gaussian_process.kernels import CompoundKernel
- >>> kernel = CompoundKernel(
- ... [WhiteKernel(noise_level=3.0), RBF(length_scale=2.0)])
- >>> print(kernel.bounds)
- [[-11.51292546 11.51292546]
- [-11.51292546 11.51292546]]
- >>> print(kernel.n_dims)
- 2
- >>> print(kernel.theta)
- [1.09861229 0.69314718]
- """
- def __init__(self, kernels):
- self.kernels = kernels
- def get_params(self, deep=True):
- """Get parameters of this kernel.
- Parameters
- ----------
- deep : bool, default=True
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
- Returns
- -------
- params : dict
- Parameter names mapped to their values.
- """
- return dict(kernels=self.kernels)
- @property
- def theta(self):
- """Returns the (flattened, log-transformed) non-fixed hyperparameters.
- Note that theta are typically the log-transformed values of the
- kernel's hyperparameters as this representation of the search space
- is more amenable for hyperparameter search, as hyperparameters like
- length-scales naturally live on a log-scale.
- Returns
- -------
- theta : ndarray of shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- return np.hstack([kernel.theta for kernel in self.kernels])
- @theta.setter
- def theta(self, theta):
- """Sets the (flattened, log-transformed) non-fixed hyperparameters.
- Parameters
- ----------
- theta : array of shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- k_dims = self.k1.n_dims
- for i, kernel in enumerate(self.kernels):
- kernel.theta = theta[i * k_dims : (i + 1) * k_dims]
- @property
- def bounds(self):
- """Returns the log-transformed bounds on the theta.
- Returns
- -------
- bounds : array of shape (n_dims, 2)
- The log-transformed bounds on the kernel's hyperparameters theta
- """
- return np.vstack([kernel.bounds for kernel in self.kernels])
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Note that this compound kernel returns the results of all simple kernel
- stacked along an additional axis.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object, \
- default=None
- Left argument of the returned kernel k(X, Y)
- Y : array-like of shape (n_samples_X, n_features) or list of object, \
- default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- is evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of the
- kernel hyperparameter is computed.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y, n_kernels)
- Kernel k(X, Y)
- K_gradient : ndarray of shape \
- (n_samples_X, n_samples_X, n_dims, n_kernels), optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- if eval_gradient:
- K = []
- K_grad = []
- for kernel in self.kernels:
- K_single, K_grad_single = kernel(X, Y, eval_gradient)
- K.append(K_single)
- K_grad.append(K_grad_single[..., np.newaxis])
- return np.dstack(K), np.concatenate(K_grad, 3)
- else:
- return np.dstack([kernel(X, Y, eval_gradient) for kernel in self.kernels])
- def __eq__(self, b):
- if type(self) != type(b) or len(self.kernels) != len(b.kernels):
- return False
- return np.all(
- [self.kernels[i] == b.kernels[i] for i in range(len(self.kernels))]
- )
- def is_stationary(self):
- """Returns whether the kernel is stationary."""
- return np.all([kernel.is_stationary() for kernel in self.kernels])
- @property
- def requires_vector_input(self):
- """Returns whether the kernel is defined on discrete structures."""
- return np.any([kernel.requires_vector_input for kernel in self.kernels])
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to `np.diag(self(X))`; however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Argument to the kernel.
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X, n_kernels)
- Diagonal of kernel k(X, X)
- """
- return np.vstack([kernel.diag(X) for kernel in self.kernels]).T
- class KernelOperator(Kernel):
- """Base class for all kernel operators.
- .. versionadded:: 0.18
- """
- def __init__(self, k1, k2):
- self.k1 = k1
- self.k2 = k2
- def get_params(self, deep=True):
- """Get parameters of this kernel.
- Parameters
- ----------
- deep : bool, default=True
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
- Returns
- -------
- params : dict
- Parameter names mapped to their values.
- """
- params = dict(k1=self.k1, k2=self.k2)
- if deep:
- deep_items = self.k1.get_params().items()
- params.update(("k1__" + k, val) for k, val in deep_items)
- deep_items = self.k2.get_params().items()
- params.update(("k2__" + k, val) for k, val in deep_items)
- return params
- @property
- def hyperparameters(self):
- """Returns a list of all hyperparameter."""
- r = [
- Hyperparameter(
- "k1__" + hyperparameter.name,
- hyperparameter.value_type,
- hyperparameter.bounds,
- hyperparameter.n_elements,
- )
- for hyperparameter in self.k1.hyperparameters
- ]
- for hyperparameter in self.k2.hyperparameters:
- r.append(
- Hyperparameter(
- "k2__" + hyperparameter.name,
- hyperparameter.value_type,
- hyperparameter.bounds,
- hyperparameter.n_elements,
- )
- )
- return r
- @property
- def theta(self):
- """Returns the (flattened, log-transformed) non-fixed hyperparameters.
- Note that theta are typically the log-transformed values of the
- kernel's hyperparameters as this representation of the search space
- is more amenable for hyperparameter search, as hyperparameters like
- length-scales naturally live on a log-scale.
- Returns
- -------
- theta : ndarray of shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- return np.append(self.k1.theta, self.k2.theta)
- @theta.setter
- def theta(self, theta):
- """Sets the (flattened, log-transformed) non-fixed hyperparameters.
- Parameters
- ----------
- theta : ndarray of shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- k1_dims = self.k1.n_dims
- self.k1.theta = theta[:k1_dims]
- self.k2.theta = theta[k1_dims:]
- @property
- def bounds(self):
- """Returns the log-transformed bounds on the theta.
- Returns
- -------
- bounds : ndarray of shape (n_dims, 2)
- The log-transformed bounds on the kernel's hyperparameters theta
- """
- if self.k1.bounds.size == 0:
- return self.k2.bounds
- if self.k2.bounds.size == 0:
- return self.k1.bounds
- return np.vstack((self.k1.bounds, self.k2.bounds))
- def __eq__(self, b):
- if type(self) != type(b):
- return False
- return (self.k1 == b.k1 and self.k2 == b.k2) or (
- self.k1 == b.k2 and self.k2 == b.k1
- )
- def is_stationary(self):
- """Returns whether the kernel is stationary."""
- return self.k1.is_stationary() and self.k2.is_stationary()
- @property
- def requires_vector_input(self):
- """Returns whether the kernel is stationary."""
- return self.k1.requires_vector_input or self.k2.requires_vector_input
- class Sum(KernelOperator):
- """The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`
- and combines them via
- .. math::
- k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)
- Note that the `__add__` magic method is overridden, so
- `Sum(RBF(), RBF())` is equivalent to using the + operator
- with `RBF() + RBF()`.
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- k1 : Kernel
- The first base-kernel of the sum-kernel
- k2 : Kernel
- The second base-kernel of the sum-kernel
- Examples
- --------
- >>> from sklearn.datasets import make_friedman2
- >>> from sklearn.gaussian_process import GaussianProcessRegressor
- >>> from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel
- >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
- >>> kernel = Sum(ConstantKernel(2), RBF())
- >>> gpr = GaussianProcessRegressor(kernel=kernel,
- ... random_state=0).fit(X, y)
- >>> gpr.score(X, y)
- 1.0
- >>> kernel
- 1.41**2 + RBF(length_scale=1)
- """
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Left argument of the returned kernel k(X, Y)
- Y : array-like of shape (n_samples_X, n_features) or list of object,\
- default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- is evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- if eval_gradient:
- K1, K1_gradient = self.k1(X, Y, eval_gradient=True)
- K2, K2_gradient = self.k2(X, Y, eval_gradient=True)
- return K1 + K2, np.dstack((K1_gradient, K2_gradient))
- else:
- return self.k1(X, Y) + self.k2(X, Y)
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to `np.diag(self(X))`; however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Argument to the kernel.
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return self.k1.diag(X) + self.k2.diag(X)
- def __repr__(self):
- return "{0} + {1}".format(self.k1, self.k2)
- class Product(KernelOperator):
- """The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`
- and combines them via
- .. math::
- k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)
- Note that the `__mul__` magic method is overridden, so
- `Product(RBF(), RBF())` is equivalent to using the * operator
- with `RBF() * RBF()`.
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- k1 : Kernel
- The first base-kernel of the product-kernel
- k2 : Kernel
- The second base-kernel of the product-kernel
- Examples
- --------
- >>> from sklearn.datasets import make_friedman2
- >>> from sklearn.gaussian_process import GaussianProcessRegressor
- >>> from sklearn.gaussian_process.kernels import (RBF, Product,
- ... ConstantKernel)
- >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
- >>> kernel = Product(ConstantKernel(2), RBF())
- >>> gpr = GaussianProcessRegressor(kernel=kernel,
- ... random_state=0).fit(X, y)
- >>> gpr.score(X, y)
- 1.0
- >>> kernel
- 1.41**2 * RBF(length_scale=1)
- """
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Left argument of the returned kernel k(X, Y)
- Y : array-like of shape (n_samples_Y, n_features) or list of object,\
- default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- is evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- if eval_gradient:
- K1, K1_gradient = self.k1(X, Y, eval_gradient=True)
- K2, K2_gradient = self.k2(X, Y, eval_gradient=True)
- return K1 * K2, np.dstack(
- (K1_gradient * K2[:, :, np.newaxis], K2_gradient * K1[:, :, np.newaxis])
- )
- else:
- return self.k1(X, Y) * self.k2(X, Y)
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Argument to the kernel.
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return self.k1.diag(X) * self.k2.diag(X)
- def __repr__(self):
- return "{0} * {1}".format(self.k1, self.k2)
- class Exponentiation(Kernel):
- """The Exponentiation kernel takes one base kernel and a scalar parameter
- :math:`p` and combines them via
- .. math::
- k_{exp}(X, Y) = k(X, Y) ^p
- Note that the `__pow__` magic method is overridden, so
- `Exponentiation(RBF(), 2)` is equivalent to using the ** operator
- with `RBF() ** 2`.
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- kernel : Kernel
- The base kernel
- exponent : float
- The exponent for the base kernel
- Examples
- --------
- >>> from sklearn.datasets import make_friedman2
- >>> from sklearn.gaussian_process import GaussianProcessRegressor
- >>> from sklearn.gaussian_process.kernels import (RationalQuadratic,
- ... Exponentiation)
- >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
- >>> kernel = Exponentiation(RationalQuadratic(), exponent=2)
- >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
- ... random_state=0).fit(X, y)
- >>> gpr.score(X, y)
- 0.419...
- >>> gpr.predict(X[:1,:], return_std=True)
- (array([635.5...]), array([0.559...]))
- """
- def __init__(self, kernel, exponent):
- self.kernel = kernel
- self.exponent = exponent
- def get_params(self, deep=True):
- """Get parameters of this kernel.
- Parameters
- ----------
- deep : bool, default=True
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
- Returns
- -------
- params : dict
- Parameter names mapped to their values.
- """
- params = dict(kernel=self.kernel, exponent=self.exponent)
- if deep:
- deep_items = self.kernel.get_params().items()
- params.update(("kernel__" + k, val) for k, val in deep_items)
- return params
- @property
- def hyperparameters(self):
- """Returns a list of all hyperparameter."""
- r = []
- for hyperparameter in self.kernel.hyperparameters:
- r.append(
- Hyperparameter(
- "kernel__" + hyperparameter.name,
- hyperparameter.value_type,
- hyperparameter.bounds,
- hyperparameter.n_elements,
- )
- )
- return r
- @property
- def theta(self):
- """Returns the (flattened, log-transformed) non-fixed hyperparameters.
- Note that theta are typically the log-transformed values of the
- kernel's hyperparameters as this representation of the search space
- is more amenable for hyperparameter search, as hyperparameters like
- length-scales naturally live on a log-scale.
- Returns
- -------
- theta : ndarray of shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- return self.kernel.theta
- @theta.setter
- def theta(self, theta):
- """Sets the (flattened, log-transformed) non-fixed hyperparameters.
- Parameters
- ----------
- theta : ndarray of shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- self.kernel.theta = theta
- @property
- def bounds(self):
- """Returns the log-transformed bounds on the theta.
- Returns
- -------
- bounds : ndarray of shape (n_dims, 2)
- The log-transformed bounds on the kernel's hyperparameters theta
- """
- return self.kernel.bounds
- def __eq__(self, b):
- if type(self) != type(b):
- return False
- return self.kernel == b.kernel and self.exponent == b.exponent
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Left argument of the returned kernel k(X, Y)
- Y : array-like of shape (n_samples_Y, n_features) or list of object,\
- default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- is evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- if eval_gradient:
- K, K_gradient = self.kernel(X, Y, eval_gradient=True)
- K_gradient *= self.exponent * K[:, :, np.newaxis] ** (self.exponent - 1)
- return K**self.exponent, K_gradient
- else:
- K = self.kernel(X, Y, eval_gradient=False)
- return K**self.exponent
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Argument to the kernel.
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return self.kernel.diag(X) ** self.exponent
- def __repr__(self):
- return "{0} ** {1}".format(self.kernel, self.exponent)
- def is_stationary(self):
- """Returns whether the kernel is stationary."""
- return self.kernel.is_stationary()
- @property
- def requires_vector_input(self):
- """Returns whether the kernel is defined on discrete structures."""
- return self.kernel.requires_vector_input
- class ConstantKernel(StationaryKernelMixin, GenericKernelMixin, Kernel):
- """Constant kernel.
- Can be used as part of a product-kernel where it scales the magnitude of
- the other factor (kernel) or as part of a sum-kernel, where it modifies
- the mean of the Gaussian process.
- .. math::
- k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2
- Adding a constant kernel is equivalent to adding a constant::
- kernel = RBF() + ConstantKernel(constant_value=2)
- is the same as::
- kernel = RBF() + 2
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- constant_value : float, default=1.0
- The constant value which defines the covariance:
- k(x_1, x_2) = constant_value
- constant_value_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on `constant_value`.
- If set to "fixed", `constant_value` cannot be changed during
- hyperparameter tuning.
- Examples
- --------
- >>> from sklearn.datasets import make_friedman2
- >>> from sklearn.gaussian_process import GaussianProcessRegressor
- >>> from sklearn.gaussian_process.kernels import RBF, ConstantKernel
- >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
- >>> kernel = RBF() + ConstantKernel(constant_value=2)
- >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
- ... random_state=0).fit(X, y)
- >>> gpr.score(X, y)
- 0.3696...
- >>> gpr.predict(X[:1,:], return_std=True)
- (array([606.1...]), array([0.24...]))
- """
- def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5)):
- self.constant_value = constant_value
- self.constant_value_bounds = constant_value_bounds
- @property
- def hyperparameter_constant_value(self):
- return Hyperparameter("constant_value", "numeric", self.constant_value_bounds)
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Left argument of the returned kernel k(X, Y)
- Y : array-like of shape (n_samples_X, n_features) or list of object, \
- default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- is evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- if Y is None:
- Y = X
- elif eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
- K = np.full(
- (_num_samples(X), _num_samples(Y)),
- self.constant_value,
- dtype=np.array(self.constant_value).dtype,
- )
- if eval_gradient:
- if not self.hyperparameter_constant_value.fixed:
- return (
- K,
- np.full(
- (_num_samples(X), _num_samples(X), 1),
- self.constant_value,
- dtype=np.array(self.constant_value).dtype,
- ),
- )
- else:
- return K, np.empty((_num_samples(X), _num_samples(X), 0))
- else:
- return K
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Argument to the kernel.
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return np.full(
- _num_samples(X),
- self.constant_value,
- dtype=np.array(self.constant_value).dtype,
- )
- def __repr__(self):
- return "{0:.3g}**2".format(np.sqrt(self.constant_value))
- class WhiteKernel(StationaryKernelMixin, GenericKernelMixin, Kernel):
- """White kernel.
- The main use-case of this kernel is as part of a sum-kernel where it
- explains the noise of the signal as independently and identically
- normally-distributed. The parameter noise_level equals the variance of this
- noise.
- .. math::
- k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- noise_level : float, default=1.0
- Parameter controlling the noise level (variance)
- noise_level_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'noise_level'.
- If set to "fixed", 'noise_level' cannot be changed during
- hyperparameter tuning.
- Examples
- --------
- >>> from sklearn.datasets import make_friedman2
- >>> from sklearn.gaussian_process import GaussianProcessRegressor
- >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
- >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
- >>> kernel = DotProduct() + WhiteKernel(noise_level=0.5)
- >>> gpr = GaussianProcessRegressor(kernel=kernel,
- ... random_state=0).fit(X, y)
- >>> gpr.score(X, y)
- 0.3680...
- >>> gpr.predict(X[:2,:], return_std=True)
- (array([653.0..., 592.1... ]), array([316.6..., 316.6...]))
- """
- def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5)):
- self.noise_level = noise_level
- self.noise_level_bounds = noise_level_bounds
- @property
- def hyperparameter_noise_level(self):
- return Hyperparameter("noise_level", "numeric", self.noise_level_bounds)
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Left argument of the returned kernel k(X, Y)
- Y : array-like of shape (n_samples_X, n_features) or list of object,\
- default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- is evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- if Y is not None and eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
- if Y is None:
- K = self.noise_level * np.eye(_num_samples(X))
- if eval_gradient:
- if not self.hyperparameter_noise_level.fixed:
- return (
- K,
- self.noise_level * np.eye(_num_samples(X))[:, :, np.newaxis],
- )
- else:
- return K, np.empty((_num_samples(X), _num_samples(X), 0))
- else:
- return K
- else:
- return np.zeros((_num_samples(X), _num_samples(Y)))
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : array-like of shape (n_samples_X, n_features) or list of object
- Argument to the kernel.
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return np.full(
- _num_samples(X), self.noise_level, dtype=np.array(self.noise_level).dtype
- )
- def __repr__(self):
- return "{0}(noise_level={1:.3g})".format(
- self.__class__.__name__, self.noise_level
- )
- class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
- """Radial basis function kernel (aka squared-exponential kernel).
- The RBF kernel is a stationary kernel. It is also known as the
- "squared exponential" kernel. It is parameterized by a length scale
- parameter :math:`l>0`, which can either be a scalar (isotropic variant
- of the kernel) or a vector with the same number of dimensions as the inputs
- X (anisotropic variant of the kernel). The kernel is given by:
- .. math::
- k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)
- where :math:`l` is the length scale of the kernel and
- :math:`d(\\cdot,\\cdot)` is the Euclidean distance.
- For advice on how to set the length scale parameter, see e.g. [1]_.
- This kernel is infinitely differentiable, which implies that GPs with this
- kernel as covariance function have mean square derivatives of all orders,
- and are thus very smooth.
- See [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- length_scale : float or ndarray of shape (n_features,), default=1.0
- The length scale of the kernel. If a float, an isotropic kernel is
- used. If an array, an anisotropic kernel is used where each dimension
- of l defines the length-scale of the respective feature dimension.
- length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'length_scale'.
- If set to "fixed", 'length_scale' cannot be changed during
- hyperparameter tuning.
- References
- ----------
- .. [1] `David Duvenaud (2014). "The Kernel Cookbook:
- Advice on Covariance functions".
- <https://www.cs.toronto.edu/~duvenaud/cookbook/>`_
- .. [2] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).
- "Gaussian Processes for Machine Learning". The MIT Press.
- <http://www.gaussianprocess.org/gpml/>`_
- Examples
- --------
- >>> from sklearn.datasets import load_iris
- >>> from sklearn.gaussian_process import GaussianProcessClassifier
- >>> from sklearn.gaussian_process.kernels import RBF
- >>> X, y = load_iris(return_X_y=True)
- >>> kernel = 1.0 * RBF(1.0)
- >>> gpc = GaussianProcessClassifier(kernel=kernel,
- ... random_state=0).fit(X, y)
- >>> gpc.score(X, y)
- 0.9866...
- >>> gpc.predict_proba(X[:2,:])
- array([[0.8354..., 0.03228..., 0.1322...],
- [0.7906..., 0.0652..., 0.1441...]])
- """
- def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):
- self.length_scale = length_scale
- self.length_scale_bounds = length_scale_bounds
- @property
- def anisotropic(self):
- return np.iterable(self.length_scale) and len(self.length_scale) > 1
- @property
- def hyperparameter_length_scale(self):
- if self.anisotropic:
- return Hyperparameter(
- "length_scale",
- "numeric",
- self.length_scale_bounds,
- len(self.length_scale),
- )
- return Hyperparameter("length_scale", "numeric", self.length_scale_bounds)
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Y : ndarray of shape (n_samples_Y, n_features), default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- X = np.atleast_2d(X)
- length_scale = _check_length_scale(X, self.length_scale)
- if Y is None:
- dists = pdist(X / length_scale, metric="sqeuclidean")
- K = np.exp(-0.5 * dists)
- # convert from upper-triangular matrix to square matrix
- K = squareform(K)
- np.fill_diagonal(K, 1)
- else:
- if eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
- dists = cdist(X / length_scale, Y / length_scale, metric="sqeuclidean")
- K = np.exp(-0.5 * dists)
- if eval_gradient:
- if self.hyperparameter_length_scale.fixed:
- # Hyperparameter l kept fixed
- return K, np.empty((X.shape[0], X.shape[0], 0))
- elif not self.anisotropic or length_scale.shape[0] == 1:
- K_gradient = (K * squareform(dists))[:, :, np.newaxis]
- return K, K_gradient
- elif self.anisotropic:
- # We need to recompute the pairwise dimension-wise distances
- K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (
- length_scale**2
- )
- K_gradient *= K[..., np.newaxis]
- return K, K_gradient
- else:
- return K
- def __repr__(self):
- if self.anisotropic:
- return "{0}(length_scale=[{1}])".format(
- self.__class__.__name__,
- ", ".join(map("{0:.3g}".format, self.length_scale)),
- )
- else: # isotropic
- return "{0}(length_scale={1:.3g})".format(
- self.__class__.__name__, np.ravel(self.length_scale)[0]
- )
- class Matern(RBF):
- """Matern kernel.
- The class of Matern kernels is a generalization of the :class:`RBF`.
- It has an additional parameter :math:`\\nu` which controls the
- smoothness of the resulting function. The smaller :math:`\\nu`,
- the less smooth the approximated function is.
- As :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to
- the :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Matérn kernel
- becomes identical to the absolute exponential kernel.
- Important intermediate values are
- :math:`\\nu=1.5` (once differentiable functions)
- and :math:`\\nu=2.5` (twice differentiable functions).
- The kernel is given by:
- .. math::
- k(x_i, x_j) = \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(
- \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )
- \\Bigg)^\\nu K_\\nu\\Bigg(
- \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)
- where :math:`d(\\cdot,\\cdot)` is the Euclidean distance,
- :math:`K_{\\nu}(\\cdot)` is a modified Bessel function and
- :math:`\\Gamma(\\cdot)` is the gamma function.
- See [1]_, Chapter 4, Section 4.2, for details regarding the different
- variants of the Matern kernel.
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- length_scale : float or ndarray of shape (n_features,), default=1.0
- The length scale of the kernel. If a float, an isotropic kernel is
- used. If an array, an anisotropic kernel is used where each dimension
- of l defines the length-scale of the respective feature dimension.
- length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'length_scale'.
- If set to "fixed", 'length_scale' cannot be changed during
- hyperparameter tuning.
- nu : float, default=1.5
- The parameter nu controlling the smoothness of the learned function.
- The smaller nu, the less smooth the approximated function is.
- For nu=inf, the kernel becomes equivalent to the RBF kernel and for
- nu=0.5 to the absolute exponential kernel. Important intermediate
- values are nu=1.5 (once differentiable functions) and nu=2.5
- (twice differentiable functions). Note that values of nu not in
- [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost
- (appr. 10 times higher) since they require to evaluate the modified
- Bessel function. Furthermore, in contrast to l, nu is kept fixed to
- its initial value and not optimized.
- References
- ----------
- .. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).
- "Gaussian Processes for Machine Learning". The MIT Press.
- <http://www.gaussianprocess.org/gpml/>`_
- Examples
- --------
- >>> from sklearn.datasets import load_iris
- >>> from sklearn.gaussian_process import GaussianProcessClassifier
- >>> from sklearn.gaussian_process.kernels import Matern
- >>> X, y = load_iris(return_X_y=True)
- >>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)
- >>> gpc = GaussianProcessClassifier(kernel=kernel,
- ... random_state=0).fit(X, y)
- >>> gpc.score(X, y)
- 0.9866...
- >>> gpc.predict_proba(X[:2,:])
- array([[0.8513..., 0.0368..., 0.1117...],
- [0.8086..., 0.0693..., 0.1220...]])
- """
- def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5):
- super().__init__(length_scale, length_scale_bounds)
- self.nu = nu
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Y : ndarray of shape (n_samples_Y, n_features), default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- X = np.atleast_2d(X)
- length_scale = _check_length_scale(X, self.length_scale)
- if Y is None:
- dists = pdist(X / length_scale, metric="euclidean")
- else:
- if eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
- dists = cdist(X / length_scale, Y / length_scale, metric="euclidean")
- if self.nu == 0.5:
- K = np.exp(-dists)
- elif self.nu == 1.5:
- K = dists * math.sqrt(3)
- K = (1.0 + K) * np.exp(-K)
- elif self.nu == 2.5:
- K = dists * math.sqrt(5)
- K = (1.0 + K + K**2 / 3.0) * np.exp(-K)
- elif self.nu == np.inf:
- K = np.exp(-(dists**2) / 2.0)
- else: # general case; expensive to evaluate
- K = dists
- K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
- tmp = math.sqrt(2 * self.nu) * K
- K.fill((2 ** (1.0 - self.nu)) / gamma(self.nu))
- K *= tmp**self.nu
- K *= kv(self.nu, tmp)
- if Y is None:
- # convert from upper-triangular matrix to square matrix
- K = squareform(K)
- np.fill_diagonal(K, 1)
- if eval_gradient:
- if self.hyperparameter_length_scale.fixed:
- # Hyperparameter l kept fixed
- K_gradient = np.empty((X.shape[0], X.shape[0], 0))
- return K, K_gradient
- # We need to recompute the pairwise dimension-wise distances
- if self.anisotropic:
- D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (
- length_scale**2
- )
- else:
- D = squareform(dists**2)[:, :, np.newaxis]
- if self.nu == 0.5:
- denominator = np.sqrt(D.sum(axis=2))[:, :, np.newaxis]
- divide_result = np.zeros_like(D)
- np.divide(
- D,
- denominator,
- out=divide_result,
- where=denominator != 0,
- )
- K_gradient = K[..., np.newaxis] * divide_result
- elif self.nu == 1.5:
- K_gradient = 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]
- elif self.nu == 2.5:
- tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]
- K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)
- elif self.nu == np.inf:
- K_gradient = D * K[..., np.newaxis]
- else:
- # approximate gradient numerically
- def f(theta): # helper function
- return self.clone_with_theta(theta)(X, Y)
- return K, _approx_fprime(self.theta, f, 1e-10)
- if not self.anisotropic:
- return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]
- else:
- return K, K_gradient
- else:
- return K
- def __repr__(self):
- if self.anisotropic:
- return "{0}(length_scale=[{1}], nu={2:.3g})".format(
- self.__class__.__name__,
- ", ".join(map("{0:.3g}".format, self.length_scale)),
- self.nu,
- )
- else:
- return "{0}(length_scale={1:.3g}, nu={2:.3g})".format(
- self.__class__.__name__, np.ravel(self.length_scale)[0], self.nu
- )
- class RationalQuadratic(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
- """Rational Quadratic kernel.
- The RationalQuadratic kernel can be seen as a scale mixture (an infinite
- sum) of RBF kernels with different characteristic length scales. It is
- parameterized by a length scale parameter :math:`l>0` and a scale
- mixture parameter :math:`\\alpha>0`. Only the isotropic variant
- where length_scale :math:`l` is a scalar is supported at the moment.
- The kernel is given by:
- .. math::
- k(x_i, x_j) = \\left(
- 1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha l^2}\\right)^{-\\alpha}
- where :math:`\\alpha` is the scale mixture parameter, :math:`l` is
- the length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the
- Euclidean distance.
- For advice on how to set the parameters, see e.g. [1]_.
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- length_scale : float > 0, default=1.0
- The length scale of the kernel.
- alpha : float > 0, default=1.0
- Scale mixture parameter
- length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'length_scale'.
- If set to "fixed", 'length_scale' cannot be changed during
- hyperparameter tuning.
- alpha_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'alpha'.
- If set to "fixed", 'alpha' cannot be changed during
- hyperparameter tuning.
- References
- ----------
- .. [1] `David Duvenaud (2014). "The Kernel Cookbook:
- Advice on Covariance functions".
- <https://www.cs.toronto.edu/~duvenaud/cookbook/>`_
- Examples
- --------
- >>> from sklearn.datasets import load_iris
- >>> from sklearn.gaussian_process import GaussianProcessClassifier
- >>> from sklearn.gaussian_process.kernels import RationalQuadratic
- >>> X, y = load_iris(return_X_y=True)
- >>> kernel = RationalQuadratic(length_scale=1.0, alpha=1.5)
- >>> gpc = GaussianProcessClassifier(kernel=kernel,
- ... random_state=0).fit(X, y)
- >>> gpc.score(X, y)
- 0.9733...
- >>> gpc.predict_proba(X[:2,:])
- array([[0.8881..., 0.0566..., 0.05518...],
- [0.8678..., 0.0707... , 0.0614...]])
- """
- def __init__(
- self,
- length_scale=1.0,
- alpha=1.0,
- length_scale_bounds=(1e-5, 1e5),
- alpha_bounds=(1e-5, 1e5),
- ):
- self.length_scale = length_scale
- self.alpha = alpha
- self.length_scale_bounds = length_scale_bounds
- self.alpha_bounds = alpha_bounds
- @property
- def hyperparameter_length_scale(self):
- return Hyperparameter("length_scale", "numeric", self.length_scale_bounds)
- @property
- def hyperparameter_alpha(self):
- return Hyperparameter("alpha", "numeric", self.alpha_bounds)
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Y : ndarray of shape (n_samples_Y, n_features), default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- if len(np.atleast_1d(self.length_scale)) > 1:
- raise AttributeError(
- "RationalQuadratic kernel only supports isotropic version, "
- "please use a single scalar for length_scale"
- )
- X = np.atleast_2d(X)
- if Y is None:
- dists = squareform(pdist(X, metric="sqeuclidean"))
- tmp = dists / (2 * self.alpha * self.length_scale**2)
- base = 1 + tmp
- K = base**-self.alpha
- np.fill_diagonal(K, 1)
- else:
- if eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
- dists = cdist(X, Y, metric="sqeuclidean")
- K = (1 + dists / (2 * self.alpha * self.length_scale**2)) ** -self.alpha
- if eval_gradient:
- # gradient with respect to length_scale
- if not self.hyperparameter_length_scale.fixed:
- length_scale_gradient = dists * K / (self.length_scale**2 * base)
- length_scale_gradient = length_scale_gradient[:, :, np.newaxis]
- else: # l is kept fixed
- length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
- # gradient with respect to alpha
- if not self.hyperparameter_alpha.fixed:
- alpha_gradient = K * (
- -self.alpha * np.log(base)
- + dists / (2 * self.length_scale**2 * base)
- )
- alpha_gradient = alpha_gradient[:, :, np.newaxis]
- else: # alpha is kept fixed
- alpha_gradient = np.empty((K.shape[0], K.shape[1], 0))
- return K, np.dstack((alpha_gradient, length_scale_gradient))
- else:
- return K
- def __repr__(self):
- return "{0}(alpha={1:.3g}, length_scale={2:.3g})".format(
- self.__class__.__name__, self.alpha, self.length_scale
- )
- class ExpSineSquared(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
- r"""Exp-Sine-Squared kernel (aka periodic kernel).
- The ExpSineSquared kernel allows one to model functions which repeat
- themselves exactly. It is parameterized by a length scale
- parameter :math:`l>0` and a periodicity parameter :math:`p>0`.
- Only the isotropic variant where :math:`l` is a scalar is
- supported at the moment. The kernel is given by:
- .. math::
- k(x_i, x_j) = \text{exp}\left(-
- \frac{ 2\sin^2(\pi d(x_i, x_j)/p) }{ l^ 2} \right)
- where :math:`l` is the length scale of the kernel, :math:`p` the
- periodicity of the kernel and :math:`d(\\cdot,\\cdot)` is the
- Euclidean distance.
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- length_scale : float > 0, default=1.0
- The length scale of the kernel.
- periodicity : float > 0, default=1.0
- The periodicity of the kernel.
- length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'length_scale'.
- If set to "fixed", 'length_scale' cannot be changed during
- hyperparameter tuning.
- periodicity_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'periodicity'.
- If set to "fixed", 'periodicity' cannot be changed during
- hyperparameter tuning.
- Examples
- --------
- >>> from sklearn.datasets import make_friedman2
- >>> from sklearn.gaussian_process import GaussianProcessRegressor
- >>> from sklearn.gaussian_process.kernels import ExpSineSquared
- >>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)
- >>> kernel = ExpSineSquared(length_scale=1, periodicity=1)
- >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,
- ... random_state=0).fit(X, y)
- >>> gpr.score(X, y)
- 0.0144...
- >>> gpr.predict(X[:2,:], return_std=True)
- (array([425.6..., 457.5...]), array([0.3894..., 0.3467...]))
- """
- def __init__(
- self,
- length_scale=1.0,
- periodicity=1.0,
- length_scale_bounds=(1e-5, 1e5),
- periodicity_bounds=(1e-5, 1e5),
- ):
- self.length_scale = length_scale
- self.periodicity = periodicity
- self.length_scale_bounds = length_scale_bounds
- self.periodicity_bounds = periodicity_bounds
- @property
- def hyperparameter_length_scale(self):
- """Returns the length scale"""
- return Hyperparameter("length_scale", "numeric", self.length_scale_bounds)
- @property
- def hyperparameter_periodicity(self):
- return Hyperparameter("periodicity", "numeric", self.periodicity_bounds)
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Y : ndarray of shape (n_samples_Y, n_features), default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- X = np.atleast_2d(X)
- if Y is None:
- dists = squareform(pdist(X, metric="euclidean"))
- arg = np.pi * dists / self.periodicity
- sin_of_arg = np.sin(arg)
- K = np.exp(-2 * (sin_of_arg / self.length_scale) ** 2)
- else:
- if eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
- dists = cdist(X, Y, metric="euclidean")
- K = np.exp(
- -2 * (np.sin(np.pi / self.periodicity * dists) / self.length_scale) ** 2
- )
- if eval_gradient:
- cos_of_arg = np.cos(arg)
- # gradient with respect to length_scale
- if not self.hyperparameter_length_scale.fixed:
- length_scale_gradient = 4 / self.length_scale**2 * sin_of_arg**2 * K
- length_scale_gradient = length_scale_gradient[:, :, np.newaxis]
- else: # length_scale is kept fixed
- length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
- # gradient with respect to p
- if not self.hyperparameter_periodicity.fixed:
- periodicity_gradient = (
- 4 * arg / self.length_scale**2 * cos_of_arg * sin_of_arg * K
- )
- periodicity_gradient = periodicity_gradient[:, :, np.newaxis]
- else: # p is kept fixed
- periodicity_gradient = np.empty((K.shape[0], K.shape[1], 0))
- return K, np.dstack((length_scale_gradient, periodicity_gradient))
- else:
- return K
- def __repr__(self):
- return "{0}(length_scale={1:.3g}, periodicity={2:.3g})".format(
- self.__class__.__name__, self.length_scale, self.periodicity
- )
- class DotProduct(Kernel):
- r"""Dot-Product kernel.
- The DotProduct kernel is non-stationary and can be obtained from linear
- regression by putting :math:`N(0, 1)` priors on the coefficients
- of :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \sigma_0^2)`
- on the bias. The DotProduct kernel is invariant to a rotation of
- the coordinates about the origin, but not translations.
- It is parameterized by a parameter sigma_0 :math:`\sigma`
- which controls the inhomogenity of the kernel. For :math:`\sigma_0^2 =0`,
- the kernel is called the homogeneous linear kernel, otherwise
- it is inhomogeneous. The kernel is given by
- .. math::
- k(x_i, x_j) = \sigma_0 ^ 2 + x_i \cdot x_j
- The DotProduct kernel is commonly combined with exponentiation.
- See [1]_, Chapter 4, Section 4.2, for further details regarding the
- DotProduct kernel.
- Read more in the :ref:`User Guide <gp_kernels>`.
- .. versionadded:: 0.18
- Parameters
- ----------
- sigma_0 : float >= 0, default=1.0
- Parameter controlling the inhomogenity of the kernel. If sigma_0=0,
- the kernel is homogeneous.
- sigma_0_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'sigma_0'.
- If set to "fixed", 'sigma_0' cannot be changed during
- hyperparameter tuning.
- References
- ----------
- .. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).
- "Gaussian Processes for Machine Learning". The MIT Press.
- <http://www.gaussianprocess.org/gpml/>`_
- Examples
- --------
- >>> from sklearn.datasets import make_friedman2
- >>> from sklearn.gaussian_process import GaussianProcessRegressor
- >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
- >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
- >>> kernel = DotProduct() + WhiteKernel()
- >>> gpr = GaussianProcessRegressor(kernel=kernel,
- ... random_state=0).fit(X, y)
- >>> gpr.score(X, y)
- 0.3680...
- >>> gpr.predict(X[:2,:], return_std=True)
- (array([653.0..., 592.1...]), array([316.6..., 316.6...]))
- """
- def __init__(self, sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)):
- self.sigma_0 = sigma_0
- self.sigma_0_bounds = sigma_0_bounds
- @property
- def hyperparameter_sigma_0(self):
- return Hyperparameter("sigma_0", "numeric", self.sigma_0_bounds)
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Y : ndarray of shape (n_samples_Y, n_features), default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- X = np.atleast_2d(X)
- if Y is None:
- K = np.inner(X, X) + self.sigma_0**2
- else:
- if eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
- K = np.inner(X, Y) + self.sigma_0**2
- if eval_gradient:
- if not self.hyperparameter_sigma_0.fixed:
- K_gradient = np.empty((K.shape[0], K.shape[1], 1))
- K_gradient[..., 0] = 2 * self.sigma_0**2
- return K, K_gradient
- else:
- return K, np.empty((X.shape[0], X.shape[0], 0))
- else:
- return K
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y).
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X).
- """
- return np.einsum("ij,ij->i", X, X) + self.sigma_0**2
- def is_stationary(self):
- """Returns whether the kernel is stationary."""
- return False
- def __repr__(self):
- return "{0}(sigma_0={1:.3g})".format(self.__class__.__name__, self.sigma_0)
- # adapted from scipy/optimize/optimize.py for functions with 2d output
- def _approx_fprime(xk, f, epsilon, args=()):
- f0 = f(*((xk,) + args))
- grad = np.zeros((f0.shape[0], f0.shape[1], len(xk)), float)
- ei = np.zeros((len(xk),), float)
- for k in range(len(xk)):
- ei[k] = 1.0
- d = epsilon * ei
- grad[:, :, k] = (f(*((xk + d,) + args)) - f0) / d[k]
- ei[k] = 0.0
- return grad
- class PairwiseKernel(Kernel):
- """Wrapper for kernels in sklearn.metrics.pairwise.
- A thin wrapper around the functionality of the kernels in
- sklearn.metrics.pairwise.
- Note: Evaluation of eval_gradient is not analytic but numeric and all
- kernels support only isotropic distances. The parameter gamma is
- considered to be a hyperparameter and may be optimized. The other
- kernel parameters are set directly at initialization and are kept
- fixed.
- .. versionadded:: 0.18
- Parameters
- ----------
- gamma : float, default=1.0
- Parameter gamma of the pairwise kernel specified by metric. It should
- be positive.
- gamma_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5)
- The lower and upper bound on 'gamma'.
- If set to "fixed", 'gamma' cannot be changed during
- hyperparameter tuning.
- metric : {"linear", "additive_chi2", "chi2", "poly", "polynomial", \
- "rbf", "laplacian", "sigmoid", "cosine"} or callable, \
- default="linear"
- The metric to use when calculating kernel between instances in a
- feature array. If metric is a string, it must be one of the metrics
- in pairwise.PAIRWISE_KERNEL_FUNCTIONS.
- If metric is "precomputed", X is assumed to be a kernel matrix.
- Alternatively, if metric is a callable function, it is called on each
- pair of instances (rows) and the resulting value recorded. The callable
- should take two arrays from X as input and return a value indicating
- the distance between them.
- pairwise_kernels_kwargs : dict, default=None
- All entries of this dict (if any) are passed as keyword arguments to
- the pairwise kernel function.
- Examples
- --------
- >>> from sklearn.datasets import load_iris
- >>> from sklearn.gaussian_process import GaussianProcessClassifier
- >>> from sklearn.gaussian_process.kernels import PairwiseKernel
- >>> X, y = load_iris(return_X_y=True)
- >>> kernel = PairwiseKernel(metric='rbf')
- >>> gpc = GaussianProcessClassifier(kernel=kernel,
- ... random_state=0).fit(X, y)
- >>> gpc.score(X, y)
- 0.9733...
- >>> gpc.predict_proba(X[:2,:])
- array([[0.8880..., 0.05663..., 0.05532...],
- [0.8676..., 0.07073..., 0.06165...]])
- """
- def __init__(
- self,
- gamma=1.0,
- gamma_bounds=(1e-5, 1e5),
- metric="linear",
- pairwise_kernels_kwargs=None,
- ):
- self.gamma = gamma
- self.gamma_bounds = gamma_bounds
- self.metric = metric
- self.pairwise_kernels_kwargs = pairwise_kernels_kwargs
- @property
- def hyperparameter_gamma(self):
- return Hyperparameter("gamma", "numeric", self.gamma_bounds)
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Y : ndarray of shape (n_samples_Y, n_features), default=None
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
- eval_gradient : bool, default=False
- Determines whether the gradient with respect to the log of
- the kernel hyperparameter is computed.
- Only supported when Y is None.
- Returns
- -------
- K : ndarray of shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
- K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
- optional
- The gradient of the kernel k(X, X) with respect to the log of the
- hyperparameter of the kernel. Only returned when `eval_gradient`
- is True.
- """
- pairwise_kernels_kwargs = self.pairwise_kernels_kwargs
- if self.pairwise_kernels_kwargs is None:
- pairwise_kernels_kwargs = {}
- X = np.atleast_2d(X)
- K = pairwise_kernels(
- X,
- Y,
- metric=self.metric,
- gamma=self.gamma,
- filter_params=True,
- **pairwise_kernels_kwargs,
- )
- if eval_gradient:
- if self.hyperparameter_gamma.fixed:
- return K, np.empty((X.shape[0], X.shape[0], 0))
- else:
- # approximate gradient numerically
- def f(gamma): # helper function
- return pairwise_kernels(
- X,
- Y,
- metric=self.metric,
- gamma=np.exp(gamma),
- filter_params=True,
- **pairwise_kernels_kwargs,
- )
- return K, _approx_fprime(self.theta, f, 1e-10)
- else:
- return K
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
- Parameters
- ----------
- X : ndarray of shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
- Returns
- -------
- K_diag : ndarray of shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- # We have to fall back to slow way of computing diagonal
- return np.apply_along_axis(self, 1, X).ravel()
- def is_stationary(self):
- """Returns whether the kernel is stationary."""
- return self.metric in ["rbf"]
- def __repr__(self):
- return "{0}(gamma={1}, metric={2})".format(
- self.__class__.__name__, self.gamma, self.metric
- )
|