| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888 |
- """Bayesian Gaussian Mixture Model."""
- # Author: Wei Xue <xuewei4d@gmail.com>
- # Thierry Guillemot <thierry.guillemot.work@gmail.com>
- # License: BSD 3 clause
- import math
- from numbers import Real
- import numpy as np
- from scipy.special import betaln, digamma, gammaln
- from ..utils import check_array
- from ..utils._param_validation import Interval, StrOptions
- from ._base import BaseMixture, _check_shape
- from ._gaussian_mixture import (
- _check_precision_matrix,
- _check_precision_positivity,
- _compute_log_det_cholesky,
- _compute_precision_cholesky,
- _estimate_gaussian_parameters,
- _estimate_log_gaussian_prob,
- )
- def _log_dirichlet_norm(dirichlet_concentration):
- """Compute the log of the Dirichlet distribution normalization term.
- Parameters
- ----------
- dirichlet_concentration : array-like of shape (n_samples,)
- The parameters values of the Dirichlet distribution.
- Returns
- -------
- log_dirichlet_norm : float
- The log normalization of the Dirichlet distribution.
- """
- return gammaln(np.sum(dirichlet_concentration)) - np.sum(
- gammaln(dirichlet_concentration)
- )
- def _log_wishart_norm(degrees_of_freedom, log_det_precisions_chol, n_features):
- """Compute the log of the Wishart distribution normalization term.
- Parameters
- ----------
- degrees_of_freedom : array-like of shape (n_components,)
- The number of degrees of freedom on the covariance Wishart
- distributions.
- log_det_precision_chol : array-like of shape (n_components,)
- The determinant of the precision matrix for each component.
- n_features : int
- The number of features.
- Return
- ------
- log_wishart_norm : array-like of shape (n_components,)
- The log normalization of the Wishart distribution.
- """
- # To simplify the computation we have removed the np.log(np.pi) term
- return -(
- degrees_of_freedom * log_det_precisions_chol
- + degrees_of_freedom * n_features * 0.5 * math.log(2.0)
- + np.sum(
- gammaln(0.5 * (degrees_of_freedom - np.arange(n_features)[:, np.newaxis])),
- 0,
- )
- )
- class BayesianGaussianMixture(BaseMixture):
- """Variational Bayesian estimation of a Gaussian mixture.
- This class allows to infer an approximate posterior distribution over the
- parameters of a Gaussian mixture distribution. The effective number of
- components can be inferred from the data.
- This class implements two types of prior for the weights distribution: a
- finite mixture model with Dirichlet distribution and an infinite mixture
- model with the Dirichlet Process. In practice Dirichlet Process inference
- algorithm is approximated and uses a truncated distribution with a fixed
- maximum number of components (called the Stick-breaking representation).
- The number of components actually used almost always depends on the data.
- .. versionadded:: 0.18
- Read more in the :ref:`User Guide <bgmm>`.
- Parameters
- ----------
- n_components : int, default=1
- The number of mixture components. Depending on the data and the value
- of the `weight_concentration_prior` the model can decide to not use
- all the components by setting some component `weights_` to values very
- close to zero. The number of effective components is therefore smaller
- than n_components.
- covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'
- String describing the type of covariance parameters to use.
- Must be one of::
- 'full' (each component has its own general covariance matrix),
- 'tied' (all components share the same general covariance matrix),
- 'diag' (each component has its own diagonal covariance matrix),
- 'spherical' (each component has its own single variance).
- tol : float, default=1e-3
- The convergence threshold. EM iterations will stop when the
- lower bound average gain on the likelihood (of the training data with
- respect to the model) is below this threshold.
- reg_covar : float, default=1e-6
- Non-negative regularization added to the diagonal of covariance.
- Allows to assure that the covariance matrices are all positive.
- max_iter : int, default=100
- The number of EM iterations to perform.
- n_init : int, default=1
- The number of initializations to perform. The result with the highest
- lower bound value on the likelihood is kept.
- init_params : {'kmeans', 'k-means++', 'random', 'random_from_data'}, \
- default='kmeans'
- The method used to initialize the weights, the means and the
- covariances.
- String must be one of:
- 'kmeans' : responsibilities are initialized using kmeans.
- 'k-means++' : use the k-means++ method to initialize.
- 'random' : responsibilities are initialized randomly.
- 'random_from_data' : initial means are randomly selected data points.
- .. versionchanged:: v1.1
- `init_params` now accepts 'random_from_data' and 'k-means++' as
- initialization methods.
- weight_concentration_prior_type : {'dirichlet_process', 'dirichlet_distribution'}, \
- default='dirichlet_process'
- String describing the type of the weight concentration prior.
- weight_concentration_prior : float or None, default=None
- The dirichlet concentration of each component on the weight
- distribution (Dirichlet). This is commonly called gamma in the
- literature. The higher concentration puts more mass in
- the center and will lead to more components being active, while a lower
- concentration parameter will lead to more mass at the edge of the
- mixture weights simplex. The value of the parameter must be greater
- than 0. If it is None, it's set to ``1. / n_components``.
- mean_precision_prior : float or None, default=None
- The precision prior on the mean distribution (Gaussian).
- Controls the extent of where means can be placed. Larger
- values concentrate the cluster means around `mean_prior`.
- The value of the parameter must be greater than 0.
- If it is None, it is set to 1.
- mean_prior : array-like, shape (n_features,), default=None
- The prior on the mean distribution (Gaussian).
- If it is None, it is set to the mean of X.
- degrees_of_freedom_prior : float or None, default=None
- The prior of the number of degrees of freedom on the covariance
- distributions (Wishart). If it is None, it's set to `n_features`.
- covariance_prior : float or array-like, default=None
- The prior on the covariance distribution (Wishart).
- If it is None, the emiprical covariance prior is initialized using the
- covariance of X. The shape depends on `covariance_type`::
- (n_features, n_features) if 'full',
- (n_features, n_features) if 'tied',
- (n_features) if 'diag',
- float if 'spherical'
- random_state : int, RandomState instance or None, default=None
- Controls the random seed given to the method chosen to initialize the
- parameters (see `init_params`).
- In addition, it controls the generation of random samples from the
- fitted distribution (see the method `sample`).
- Pass an int for reproducible output across multiple function calls.
- See :term:`Glossary <random_state>`.
- warm_start : bool, default=False
- If 'warm_start' is True, the solution of the last fitting is used as
- initialization for the next call of fit(). This can speed up
- convergence when fit is called several times on similar problems.
- See :term:`the Glossary <warm_start>`.
- verbose : int, default=0
- Enable verbose output. If 1 then it prints the current
- initialization and each iteration step. If greater than 1 then
- it prints also the log probability and the time needed
- for each step.
- verbose_interval : int, default=10
- Number of iteration done before the next print.
- Attributes
- ----------
- weights_ : array-like of shape (n_components,)
- The weights of each mixture components.
- means_ : array-like of shape (n_components, n_features)
- The mean of each mixture component.
- covariances_ : array-like
- The covariance of each mixture component.
- The shape depends on `covariance_type`::
- (n_components,) if 'spherical',
- (n_features, n_features) if 'tied',
- (n_components, n_features) if 'diag',
- (n_components, n_features, n_features) if 'full'
- precisions_ : array-like
- The precision matrices for each component in the mixture. A precision
- matrix is the inverse of a covariance matrix. A covariance matrix is
- symmetric positive definite so the mixture of Gaussian can be
- equivalently parameterized by the precision matrices. Storing the
- precision matrices instead of the covariance matrices makes it more
- efficient to compute the log-likelihood of new samples at test time.
- The shape depends on ``covariance_type``::
- (n_components,) if 'spherical',
- (n_features, n_features) if 'tied',
- (n_components, n_features) if 'diag',
- (n_components, n_features, n_features) if 'full'
- precisions_cholesky_ : array-like
- The cholesky decomposition of the precision matrices of each mixture
- component. A precision matrix is the inverse of a covariance matrix.
- A covariance matrix is symmetric positive definite so the mixture of
- Gaussian can be equivalently parameterized by the precision matrices.
- Storing the precision matrices instead of the covariance matrices makes
- it more efficient to compute the log-likelihood of new samples at test
- time. The shape depends on ``covariance_type``::
- (n_components,) if 'spherical',
- (n_features, n_features) if 'tied',
- (n_components, n_features) if 'diag',
- (n_components, n_features, n_features) if 'full'
- converged_ : bool
- True when convergence was reached in fit(), False otherwise.
- n_iter_ : int
- Number of step used by the best fit of inference to reach the
- convergence.
- lower_bound_ : float
- Lower bound value on the model evidence (of the training data) of the
- best fit of inference.
- weight_concentration_prior_ : tuple or float
- The dirichlet concentration of each component on the weight
- distribution (Dirichlet). The type depends on
- ``weight_concentration_prior_type``::
- (float, float) if 'dirichlet_process' (Beta parameters),
- float if 'dirichlet_distribution' (Dirichlet parameters).
- The higher concentration puts more mass in
- the center and will lead to more components being active, while a lower
- concentration parameter will lead to more mass at the edge of the
- simplex.
- weight_concentration_ : array-like of shape (n_components,)
- The dirichlet concentration of each component on the weight
- distribution (Dirichlet).
- mean_precision_prior_ : float
- The precision prior on the mean distribution (Gaussian).
- Controls the extent of where means can be placed.
- Larger values concentrate the cluster means around `mean_prior`.
- If mean_precision_prior is set to None, `mean_precision_prior_` is set
- to 1.
- mean_precision_ : array-like of shape (n_components,)
- The precision of each components on the mean distribution (Gaussian).
- mean_prior_ : array-like of shape (n_features,)
- The prior on the mean distribution (Gaussian).
- degrees_of_freedom_prior_ : float
- The prior of the number of degrees of freedom on the covariance
- distributions (Wishart).
- degrees_of_freedom_ : array-like of shape (n_components,)
- The number of degrees of freedom of each components in the model.
- covariance_prior_ : float or array-like
- The prior on the covariance distribution (Wishart).
- The shape depends on `covariance_type`::
- (n_features, n_features) if 'full',
- (n_features, n_features) if 'tied',
- (n_features) if 'diag',
- float if 'spherical'
- n_features_in_ : int
- Number of features seen during :term:`fit`.
- .. versionadded:: 0.24
- feature_names_in_ : ndarray of shape (`n_features_in_`,)
- Names of features seen during :term:`fit`. Defined only when `X`
- has feature names that are all strings.
- .. versionadded:: 1.0
- See Also
- --------
- GaussianMixture : Finite Gaussian mixture fit with EM.
- References
- ----------
- .. [1] `Bishop, Christopher M. (2006). "Pattern recognition and machine
- learning". Vol. 4 No. 4. New York: Springer.
- <https://www.springer.com/kr/book/9780387310732>`_
- .. [2] `Hagai Attias. (2000). "A Variational Bayesian Framework for
- Graphical Models". In Advances in Neural Information Processing
- Systems 12.
- <https://citeseerx.ist.psu.edu/doc_view/pid/ee844fd96db7041a9681b5a18bff008912052c7e>`_
- .. [3] `Blei, David M. and Michael I. Jordan. (2006). "Variational
- inference for Dirichlet process mixtures". Bayesian analysis 1.1
- <https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.mixture import BayesianGaussianMixture
- >>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])
- >>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)
- >>> bgm.means_
- array([[2.49... , 2.29...],
- [8.45..., 4.52... ]])
- >>> bgm.predict([[0, 0], [9, 3]])
- array([0, 1])
- """
- _parameter_constraints: dict = {
- **BaseMixture._parameter_constraints,
- "covariance_type": [StrOptions({"spherical", "tied", "diag", "full"})],
- "weight_concentration_prior_type": [
- StrOptions({"dirichlet_process", "dirichlet_distribution"})
- ],
- "weight_concentration_prior": [
- None,
- Interval(Real, 0.0, None, closed="neither"),
- ],
- "mean_precision_prior": [None, Interval(Real, 0.0, None, closed="neither")],
- "mean_prior": [None, "array-like"],
- "degrees_of_freedom_prior": [None, Interval(Real, 0.0, None, closed="neither")],
- "covariance_prior": [
- None,
- "array-like",
- Interval(Real, 0.0, None, closed="neither"),
- ],
- }
- def __init__(
- self,
- *,
- n_components=1,
- covariance_type="full",
- tol=1e-3,
- reg_covar=1e-6,
- max_iter=100,
- n_init=1,
- init_params="kmeans",
- weight_concentration_prior_type="dirichlet_process",
- weight_concentration_prior=None,
- mean_precision_prior=None,
- mean_prior=None,
- degrees_of_freedom_prior=None,
- covariance_prior=None,
- random_state=None,
- warm_start=False,
- verbose=0,
- verbose_interval=10,
- ):
- super().__init__(
- n_components=n_components,
- tol=tol,
- reg_covar=reg_covar,
- max_iter=max_iter,
- n_init=n_init,
- init_params=init_params,
- random_state=random_state,
- warm_start=warm_start,
- verbose=verbose,
- verbose_interval=verbose_interval,
- )
- self.covariance_type = covariance_type
- self.weight_concentration_prior_type = weight_concentration_prior_type
- self.weight_concentration_prior = weight_concentration_prior
- self.mean_precision_prior = mean_precision_prior
- self.mean_prior = mean_prior
- self.degrees_of_freedom_prior = degrees_of_freedom_prior
- self.covariance_prior = covariance_prior
- def _check_parameters(self, X):
- """Check that the parameters are well defined.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- """
- self._check_weights_parameters()
- self._check_means_parameters(X)
- self._check_precision_parameters(X)
- self._checkcovariance_prior_parameter(X)
- def _check_weights_parameters(self):
- """Check the parameter of the Dirichlet distribution."""
- if self.weight_concentration_prior is None:
- self.weight_concentration_prior_ = 1.0 / self.n_components
- else:
- self.weight_concentration_prior_ = self.weight_concentration_prior
- def _check_means_parameters(self, X):
- """Check the parameters of the Gaussian distribution.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- """
- _, n_features = X.shape
- if self.mean_precision_prior is None:
- self.mean_precision_prior_ = 1.0
- else:
- self.mean_precision_prior_ = self.mean_precision_prior
- if self.mean_prior is None:
- self.mean_prior_ = X.mean(axis=0)
- else:
- self.mean_prior_ = check_array(
- self.mean_prior, dtype=[np.float64, np.float32], ensure_2d=False
- )
- _check_shape(self.mean_prior_, (n_features,), "means")
- def _check_precision_parameters(self, X):
- """Check the prior parameters of the precision distribution.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- """
- _, n_features = X.shape
- if self.degrees_of_freedom_prior is None:
- self.degrees_of_freedom_prior_ = n_features
- elif self.degrees_of_freedom_prior > n_features - 1.0:
- self.degrees_of_freedom_prior_ = self.degrees_of_freedom_prior
- else:
- raise ValueError(
- "The parameter 'degrees_of_freedom_prior' "
- "should be greater than %d, but got %.3f."
- % (n_features - 1, self.degrees_of_freedom_prior)
- )
- def _checkcovariance_prior_parameter(self, X):
- """Check the `covariance_prior_`.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- """
- _, n_features = X.shape
- if self.covariance_prior is None:
- self.covariance_prior_ = {
- "full": np.atleast_2d(np.cov(X.T)),
- "tied": np.atleast_2d(np.cov(X.T)),
- "diag": np.var(X, axis=0, ddof=1),
- "spherical": np.var(X, axis=0, ddof=1).mean(),
- }[self.covariance_type]
- elif self.covariance_type in ["full", "tied"]:
- self.covariance_prior_ = check_array(
- self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False
- )
- _check_shape(
- self.covariance_prior_,
- (n_features, n_features),
- "%s covariance_prior" % self.covariance_type,
- )
- _check_precision_matrix(self.covariance_prior_, self.covariance_type)
- elif self.covariance_type == "diag":
- self.covariance_prior_ = check_array(
- self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False
- )
- _check_shape(
- self.covariance_prior_,
- (n_features,),
- "%s covariance_prior" % self.covariance_type,
- )
- _check_precision_positivity(self.covariance_prior_, self.covariance_type)
- # spherical case
- else:
- self.covariance_prior_ = self.covariance_prior
- def _initialize(self, X, resp):
- """Initialization of the mixture parameters.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- resp : array-like of shape (n_samples, n_components)
- """
- nk, xk, sk = _estimate_gaussian_parameters(
- X, resp, self.reg_covar, self.covariance_type
- )
- self._estimate_weights(nk)
- self._estimate_means(nk, xk)
- self._estimate_precisions(nk, xk, sk)
- def _estimate_weights(self, nk):
- """Estimate the parameters of the Dirichlet distribution.
- Parameters
- ----------
- nk : array-like of shape (n_components,)
- """
- if self.weight_concentration_prior_type == "dirichlet_process":
- # For dirichlet process weight_concentration will be a tuple
- # containing the two parameters of the beta distribution
- self.weight_concentration_ = (
- 1.0 + nk,
- (
- self.weight_concentration_prior_
- + np.hstack((np.cumsum(nk[::-1])[-2::-1], 0))
- ),
- )
- else:
- # case Variational Gaussian mixture with dirichlet distribution
- self.weight_concentration_ = self.weight_concentration_prior_ + nk
- def _estimate_means(self, nk, xk):
- """Estimate the parameters of the Gaussian distribution.
- Parameters
- ----------
- nk : array-like of shape (n_components,)
- xk : array-like of shape (n_components, n_features)
- """
- self.mean_precision_ = self.mean_precision_prior_ + nk
- self.means_ = (
- self.mean_precision_prior_ * self.mean_prior_ + nk[:, np.newaxis] * xk
- ) / self.mean_precision_[:, np.newaxis]
- def _estimate_precisions(self, nk, xk, sk):
- """Estimate the precisions parameters of the precision distribution.
- Parameters
- ----------
- nk : array-like of shape (n_components,)
- xk : array-like of shape (n_components, n_features)
- sk : array-like
- The shape depends of `covariance_type`:
- 'full' : (n_components, n_features, n_features)
- 'tied' : (n_features, n_features)
- 'diag' : (n_components, n_features)
- 'spherical' : (n_components,)
- """
- {
- "full": self._estimate_wishart_full,
- "tied": self._estimate_wishart_tied,
- "diag": self._estimate_wishart_diag,
- "spherical": self._estimate_wishart_spherical,
- }[self.covariance_type](nk, xk, sk)
- self.precisions_cholesky_ = _compute_precision_cholesky(
- self.covariances_, self.covariance_type
- )
- def _estimate_wishart_full(self, nk, xk, sk):
- """Estimate the full Wishart distribution parameters.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- nk : array-like of shape (n_components,)
- xk : array-like of shape (n_components, n_features)
- sk : array-like of shape (n_components, n_features, n_features)
- """
- _, n_features = xk.shape
- # Warning : in some Bishop book, there is a typo on the formula 10.63
- # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk` is
- # the correct formula
- self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk
- self.covariances_ = np.empty((self.n_components, n_features, n_features))
- for k in range(self.n_components):
- diff = xk[k] - self.mean_prior_
- self.covariances_[k] = (
- self.covariance_prior_
- + nk[k] * sk[k]
- + nk[k]
- * self.mean_precision_prior_
- / self.mean_precision_[k]
- * np.outer(diff, diff)
- )
- # Contrary to the original bishop book, we normalize the covariances
- self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis, np.newaxis]
- def _estimate_wishart_tied(self, nk, xk, sk):
- """Estimate the tied Wishart distribution parameters.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- nk : array-like of shape (n_components,)
- xk : array-like of shape (n_components, n_features)
- sk : array-like of shape (n_features, n_features)
- """
- _, n_features = xk.shape
- # Warning : in some Bishop book, there is a typo on the formula 10.63
- # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`
- # is the correct formula
- self.degrees_of_freedom_ = (
- self.degrees_of_freedom_prior_ + nk.sum() / self.n_components
- )
- diff = xk - self.mean_prior_
- self.covariances_ = (
- self.covariance_prior_
- + sk * nk.sum() / self.n_components
- + self.mean_precision_prior_
- / self.n_components
- * np.dot((nk / self.mean_precision_) * diff.T, diff)
- )
- # Contrary to the original bishop book, we normalize the covariances
- self.covariances_ /= self.degrees_of_freedom_
- def _estimate_wishart_diag(self, nk, xk, sk):
- """Estimate the diag Wishart distribution parameters.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- nk : array-like of shape (n_components,)
- xk : array-like of shape (n_components, n_features)
- sk : array-like of shape (n_components, n_features)
- """
- _, n_features = xk.shape
- # Warning : in some Bishop book, there is a typo on the formula 10.63
- # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`
- # is the correct formula
- self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk
- diff = xk - self.mean_prior_
- self.covariances_ = self.covariance_prior_ + nk[:, np.newaxis] * (
- sk
- + (self.mean_precision_prior_ / self.mean_precision_)[:, np.newaxis]
- * np.square(diff)
- )
- # Contrary to the original bishop book, we normalize the covariances
- self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis]
- def _estimate_wishart_spherical(self, nk, xk, sk):
- """Estimate the spherical Wishart distribution parameters.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- nk : array-like of shape (n_components,)
- xk : array-like of shape (n_components, n_features)
- sk : array-like of shape (n_components,)
- """
- _, n_features = xk.shape
- # Warning : in some Bishop book, there is a typo on the formula 10.63
- # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`
- # is the correct formula
- self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk
- diff = xk - self.mean_prior_
- self.covariances_ = self.covariance_prior_ + nk * (
- sk
- + self.mean_precision_prior_
- / self.mean_precision_
- * np.mean(np.square(diff), 1)
- )
- # Contrary to the original bishop book, we normalize the covariances
- self.covariances_ /= self.degrees_of_freedom_
- def _m_step(self, X, log_resp):
- """M step.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- log_resp : array-like of shape (n_samples, n_components)
- Logarithm of the posterior probabilities (or responsibilities) of
- the point of each sample in X.
- """
- n_samples, _ = X.shape
- nk, xk, sk = _estimate_gaussian_parameters(
- X, np.exp(log_resp), self.reg_covar, self.covariance_type
- )
- self._estimate_weights(nk)
- self._estimate_means(nk, xk)
- self._estimate_precisions(nk, xk, sk)
- def _estimate_log_weights(self):
- if self.weight_concentration_prior_type == "dirichlet_process":
- digamma_sum = digamma(
- self.weight_concentration_[0] + self.weight_concentration_[1]
- )
- digamma_a = digamma(self.weight_concentration_[0])
- digamma_b = digamma(self.weight_concentration_[1])
- return (
- digamma_a
- - digamma_sum
- + np.hstack((0, np.cumsum(digamma_b - digamma_sum)[:-1]))
- )
- else:
- # case Variational Gaussian mixture with dirichlet distribution
- return digamma(self.weight_concentration_) - digamma(
- np.sum(self.weight_concentration_)
- )
- def _estimate_log_prob(self, X):
- _, n_features = X.shape
- # We remove `n_features * np.log(self.degrees_of_freedom_)` because
- # the precision matrix is normalized
- log_gauss = _estimate_log_gaussian_prob(
- X, self.means_, self.precisions_cholesky_, self.covariance_type
- ) - 0.5 * n_features * np.log(self.degrees_of_freedom_)
- log_lambda = n_features * np.log(2.0) + np.sum(
- digamma(
- 0.5
- * (self.degrees_of_freedom_ - np.arange(0, n_features)[:, np.newaxis])
- ),
- 0,
- )
- return log_gauss + 0.5 * (log_lambda - n_features / self.mean_precision_)
- def _compute_lower_bound(self, log_resp, log_prob_norm):
- """Estimate the lower bound of the model.
- The lower bound on the likelihood (of the training data with respect to
- the model) is used to detect the convergence and has to increase at
- each iteration.
- Parameters
- ----------
- X : array-like of shape (n_samples, n_features)
- log_resp : array, shape (n_samples, n_components)
- Logarithm of the posterior probabilities (or responsibilities) of
- the point of each sample in X.
- log_prob_norm : float
- Logarithm of the probability of each sample in X.
- Returns
- -------
- lower_bound : float
- """
- # Contrary to the original formula, we have done some simplification
- # and removed all the constant terms.
- (n_features,) = self.mean_prior_.shape
- # We removed `.5 * n_features * np.log(self.degrees_of_freedom_)`
- # because the precision matrix is normalized.
- log_det_precisions_chol = _compute_log_det_cholesky(
- self.precisions_cholesky_, self.covariance_type, n_features
- ) - 0.5 * n_features * np.log(self.degrees_of_freedom_)
- if self.covariance_type == "tied":
- log_wishart = self.n_components * np.float64(
- _log_wishart_norm(
- self.degrees_of_freedom_, log_det_precisions_chol, n_features
- )
- )
- else:
- log_wishart = np.sum(
- _log_wishart_norm(
- self.degrees_of_freedom_, log_det_precisions_chol, n_features
- )
- )
- if self.weight_concentration_prior_type == "dirichlet_process":
- log_norm_weight = -np.sum(
- betaln(self.weight_concentration_[0], self.weight_concentration_[1])
- )
- else:
- log_norm_weight = _log_dirichlet_norm(self.weight_concentration_)
- return (
- -np.sum(np.exp(log_resp) * log_resp)
- - log_wishart
- - log_norm_weight
- - 0.5 * n_features * np.sum(np.log(self.mean_precision_))
- )
- def _get_parameters(self):
- return (
- self.weight_concentration_,
- self.mean_precision_,
- self.means_,
- self.degrees_of_freedom_,
- self.covariances_,
- self.precisions_cholesky_,
- )
- def _set_parameters(self, params):
- (
- self.weight_concentration_,
- self.mean_precision_,
- self.means_,
- self.degrees_of_freedom_,
- self.covariances_,
- self.precisions_cholesky_,
- ) = params
- # Weights computation
- if self.weight_concentration_prior_type == "dirichlet_process":
- weight_dirichlet_sum = (
- self.weight_concentration_[0] + self.weight_concentration_[1]
- )
- tmp = self.weight_concentration_[1] / weight_dirichlet_sum
- self.weights_ = (
- self.weight_concentration_[0]
- / weight_dirichlet_sum
- * np.hstack((1, np.cumprod(tmp[:-1])))
- )
- self.weights_ /= np.sum(self.weights_)
- else:
- self.weights_ = self.weight_concentration_ / np.sum(
- self.weight_concentration_
- )
- # Precisions matrices computation
- if self.covariance_type == "full":
- self.precisions_ = np.array(
- [
- np.dot(prec_chol, prec_chol.T)
- for prec_chol in self.precisions_cholesky_
- ]
- )
- elif self.covariance_type == "tied":
- self.precisions_ = np.dot(
- self.precisions_cholesky_, self.precisions_cholesky_.T
- )
- else:
- self.precisions_ = self.precisions_cholesky_**2
|