_affinity_propagation.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. """Affinity Propagation clustering algorithm."""
  2. # Author: Alexandre Gramfort alexandre.gramfort@inria.fr
  3. # Gael Varoquaux gael.varoquaux@normalesup.org
  4. # License: BSD 3 clause
  5. import warnings
  6. from numbers import Integral, Real
  7. import numpy as np
  8. from .._config import config_context
  9. from ..base import BaseEstimator, ClusterMixin, _fit_context
  10. from ..exceptions import ConvergenceWarning
  11. from ..metrics import euclidean_distances, pairwise_distances_argmin
  12. from ..utils import check_random_state
  13. from ..utils._param_validation import Interval, StrOptions, validate_params
  14. from ..utils.validation import check_is_fitted
  15. def _equal_similarities_and_preferences(S, preference):
  16. def all_equal_preferences():
  17. return np.all(preference == preference.flat[0])
  18. def all_equal_similarities():
  19. # Create mask to ignore diagonal of S
  20. mask = np.ones(S.shape, dtype=bool)
  21. np.fill_diagonal(mask, 0)
  22. return np.all(S[mask].flat == S[mask].flat[0])
  23. return all_equal_preferences() and all_equal_similarities()
  24. def _affinity_propagation(
  25. S,
  26. *,
  27. preference,
  28. convergence_iter,
  29. max_iter,
  30. damping,
  31. verbose,
  32. return_n_iter,
  33. random_state,
  34. ):
  35. """Main affinity propagation algorithm."""
  36. n_samples = S.shape[0]
  37. if n_samples == 1 or _equal_similarities_and_preferences(S, preference):
  38. # It makes no sense to run the algorithm in this case, so return 1 or
  39. # n_samples clusters, depending on preferences
  40. warnings.warn(
  41. "All samples have mutually equal similarities. "
  42. "Returning arbitrary cluster center(s)."
  43. )
  44. if preference.flat[0] >= S.flat[n_samples - 1]:
  45. return (
  46. (np.arange(n_samples), np.arange(n_samples), 0)
  47. if return_n_iter
  48. else (np.arange(n_samples), np.arange(n_samples))
  49. )
  50. else:
  51. return (
  52. (np.array([0]), np.array([0] * n_samples), 0)
  53. if return_n_iter
  54. else (np.array([0]), np.array([0] * n_samples))
  55. )
  56. # Place preference on the diagonal of S
  57. S.flat[:: (n_samples + 1)] = preference
  58. A = np.zeros((n_samples, n_samples))
  59. R = np.zeros((n_samples, n_samples)) # Initialize messages
  60. # Intermediate results
  61. tmp = np.zeros((n_samples, n_samples))
  62. # Remove degeneracies
  63. S += (
  64. np.finfo(S.dtype).eps * S + np.finfo(S.dtype).tiny * 100
  65. ) * random_state.standard_normal(size=(n_samples, n_samples))
  66. # Execute parallel affinity propagation updates
  67. e = np.zeros((n_samples, convergence_iter))
  68. ind = np.arange(n_samples)
  69. for it in range(max_iter):
  70. # tmp = A + S; compute responsibilities
  71. np.add(A, S, tmp)
  72. I = np.argmax(tmp, axis=1)
  73. Y = tmp[ind, I] # np.max(A + S, axis=1)
  74. tmp[ind, I] = -np.inf
  75. Y2 = np.max(tmp, axis=1)
  76. # tmp = Rnew
  77. np.subtract(S, Y[:, None], tmp)
  78. tmp[ind, I] = S[ind, I] - Y2
  79. # Damping
  80. tmp *= 1 - damping
  81. R *= damping
  82. R += tmp
  83. # tmp = Rp; compute availabilities
  84. np.maximum(R, 0, tmp)
  85. tmp.flat[:: n_samples + 1] = R.flat[:: n_samples + 1]
  86. # tmp = -Anew
  87. tmp -= np.sum(tmp, axis=0)
  88. dA = np.diag(tmp).copy()
  89. tmp.clip(0, np.inf, tmp)
  90. tmp.flat[:: n_samples + 1] = dA
  91. # Damping
  92. tmp *= 1 - damping
  93. A *= damping
  94. A -= tmp
  95. # Check for convergence
  96. E = (np.diag(A) + np.diag(R)) > 0
  97. e[:, it % convergence_iter] = E
  98. K = np.sum(E, axis=0)
  99. if it >= convergence_iter:
  100. se = np.sum(e, axis=1)
  101. unconverged = np.sum((se == convergence_iter) + (se == 0)) != n_samples
  102. if (not unconverged and (K > 0)) or (it == max_iter):
  103. never_converged = False
  104. if verbose:
  105. print("Converged after %d iterations." % it)
  106. break
  107. else:
  108. never_converged = True
  109. if verbose:
  110. print("Did not converge")
  111. I = np.flatnonzero(E)
  112. K = I.size # Identify exemplars
  113. if K > 0:
  114. if never_converged:
  115. warnings.warn(
  116. (
  117. "Affinity propagation did not converge, this model "
  118. "may return degenerate cluster centers and labels."
  119. ),
  120. ConvergenceWarning,
  121. )
  122. c = np.argmax(S[:, I], axis=1)
  123. c[I] = np.arange(K) # Identify clusters
  124. # Refine the final set of exemplars and clusters and return results
  125. for k in range(K):
  126. ii = np.where(c == k)[0]
  127. j = np.argmax(np.sum(S[ii[:, np.newaxis], ii], axis=0))
  128. I[k] = ii[j]
  129. c = np.argmax(S[:, I], axis=1)
  130. c[I] = np.arange(K)
  131. labels = I[c]
  132. # Reduce labels to a sorted, gapless, list
  133. cluster_centers_indices = np.unique(labels)
  134. labels = np.searchsorted(cluster_centers_indices, labels)
  135. else:
  136. warnings.warn(
  137. (
  138. "Affinity propagation did not converge and this model "
  139. "will not have any cluster centers."
  140. ),
  141. ConvergenceWarning,
  142. )
  143. labels = np.array([-1] * n_samples)
  144. cluster_centers_indices = []
  145. if return_n_iter:
  146. return cluster_centers_indices, labels, it + 1
  147. else:
  148. return cluster_centers_indices, labels
  149. ###############################################################################
  150. # Public API
  151. @validate_params(
  152. {
  153. "S": ["array-like"],
  154. "return_n_iter": ["boolean"],
  155. },
  156. prefer_skip_nested_validation=False,
  157. )
  158. def affinity_propagation(
  159. S,
  160. *,
  161. preference=None,
  162. convergence_iter=15,
  163. max_iter=200,
  164. damping=0.5,
  165. copy=True,
  166. verbose=False,
  167. return_n_iter=False,
  168. random_state=None,
  169. ):
  170. """Perform Affinity Propagation Clustering of data.
  171. Read more in the :ref:`User Guide <affinity_propagation>`.
  172. Parameters
  173. ----------
  174. S : array-like of shape (n_samples, n_samples)
  175. Matrix of similarities between points.
  176. preference : array-like of shape (n_samples,) or float, default=None
  177. Preferences for each point - points with larger values of
  178. preferences are more likely to be chosen as exemplars. The number of
  179. exemplars, i.e. of clusters, is influenced by the input preferences
  180. value. If the preferences are not passed as arguments, they will be
  181. set to the median of the input similarities (resulting in a moderate
  182. number of clusters). For a smaller amount of clusters, this can be set
  183. to the minimum value of the similarities.
  184. convergence_iter : int, default=15
  185. Number of iterations with no change in the number
  186. of estimated clusters that stops the convergence.
  187. max_iter : int, default=200
  188. Maximum number of iterations.
  189. damping : float, default=0.5
  190. Damping factor between 0.5 and 1.
  191. copy : bool, default=True
  192. If copy is False, the affinity matrix is modified inplace by the
  193. algorithm, for memory efficiency.
  194. verbose : bool, default=False
  195. The verbosity level.
  196. return_n_iter : bool, default=False
  197. Whether or not to return the number of iterations.
  198. random_state : int, RandomState instance or None, default=None
  199. Pseudo-random number generator to control the starting state.
  200. Use an int for reproducible results across function calls.
  201. See the :term:`Glossary <random_state>`.
  202. .. versionadded:: 0.23
  203. this parameter was previously hardcoded as 0.
  204. Returns
  205. -------
  206. cluster_centers_indices : ndarray of shape (n_clusters,)
  207. Index of clusters centers.
  208. labels : ndarray of shape (n_samples,)
  209. Cluster labels for each point.
  210. n_iter : int
  211. Number of iterations run. Returned only if `return_n_iter` is
  212. set to True.
  213. Notes
  214. -----
  215. For an example, see :ref:`examples/cluster/plot_affinity_propagation.py
  216. <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.
  217. When the algorithm does not converge, it will still return a arrays of
  218. ``cluster_center_indices`` and labels if there are any exemplars/clusters,
  219. however they may be degenerate and should be used with caution.
  220. When all training samples have equal similarities and equal preferences,
  221. the assignment of cluster centers and labels depends on the preference.
  222. If the preference is smaller than the similarities, a single cluster center
  223. and label ``0`` for every sample will be returned. Otherwise, every
  224. training sample becomes its own cluster center and is assigned a unique
  225. label.
  226. References
  227. ----------
  228. Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages
  229. Between Data Points", Science Feb. 2007
  230. """
  231. estimator = AffinityPropagation(
  232. damping=damping,
  233. max_iter=max_iter,
  234. convergence_iter=convergence_iter,
  235. copy=copy,
  236. preference=preference,
  237. affinity="precomputed",
  238. verbose=verbose,
  239. random_state=random_state,
  240. ).fit(S)
  241. if return_n_iter:
  242. return estimator.cluster_centers_indices_, estimator.labels_, estimator.n_iter_
  243. return estimator.cluster_centers_indices_, estimator.labels_
  244. class AffinityPropagation(ClusterMixin, BaseEstimator):
  245. """Perform Affinity Propagation Clustering of data.
  246. Read more in the :ref:`User Guide <affinity_propagation>`.
  247. Parameters
  248. ----------
  249. damping : float, default=0.5
  250. Damping factor in the range `[0.5, 1.0)` is the extent to
  251. which the current value is maintained relative to
  252. incoming values (weighted 1 - damping). This in order
  253. to avoid numerical oscillations when updating these
  254. values (messages).
  255. max_iter : int, default=200
  256. Maximum number of iterations.
  257. convergence_iter : int, default=15
  258. Number of iterations with no change in the number
  259. of estimated clusters that stops the convergence.
  260. copy : bool, default=True
  261. Make a copy of input data.
  262. preference : array-like of shape (n_samples,) or float, default=None
  263. Preferences for each point - points with larger values of
  264. preferences are more likely to be chosen as exemplars. The number
  265. of exemplars, ie of clusters, is influenced by the input
  266. preferences value. If the preferences are not passed as arguments,
  267. they will be set to the median of the input similarities.
  268. affinity : {'euclidean', 'precomputed'}, default='euclidean'
  269. Which affinity to use. At the moment 'precomputed' and
  270. ``euclidean`` are supported. 'euclidean' uses the
  271. negative squared euclidean distance between points.
  272. verbose : bool, default=False
  273. Whether to be verbose.
  274. random_state : int, RandomState instance or None, default=None
  275. Pseudo-random number generator to control the starting state.
  276. Use an int for reproducible results across function calls.
  277. See the :term:`Glossary <random_state>`.
  278. .. versionadded:: 0.23
  279. this parameter was previously hardcoded as 0.
  280. Attributes
  281. ----------
  282. cluster_centers_indices_ : ndarray of shape (n_clusters,)
  283. Indices of cluster centers.
  284. cluster_centers_ : ndarray of shape (n_clusters, n_features)
  285. Cluster centers (if affinity != ``precomputed``).
  286. labels_ : ndarray of shape (n_samples,)
  287. Labels of each point.
  288. affinity_matrix_ : ndarray of shape (n_samples, n_samples)
  289. Stores the affinity matrix used in ``fit``.
  290. n_iter_ : int
  291. Number of iterations taken to converge.
  292. n_features_in_ : int
  293. Number of features seen during :term:`fit`.
  294. .. versionadded:: 0.24
  295. feature_names_in_ : ndarray of shape (`n_features_in_`,)
  296. Names of features seen during :term:`fit`. Defined only when `X`
  297. has feature names that are all strings.
  298. .. versionadded:: 1.0
  299. See Also
  300. --------
  301. AgglomerativeClustering : Recursively merges the pair of
  302. clusters that minimally increases a given linkage distance.
  303. FeatureAgglomeration : Similar to AgglomerativeClustering,
  304. but recursively merges features instead of samples.
  305. KMeans : K-Means clustering.
  306. MiniBatchKMeans : Mini-Batch K-Means clustering.
  307. MeanShift : Mean shift clustering using a flat kernel.
  308. SpectralClustering : Apply clustering to a projection
  309. of the normalized Laplacian.
  310. Notes
  311. -----
  312. For an example, see :ref:`examples/cluster/plot_affinity_propagation.py
  313. <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.
  314. The algorithmic complexity of affinity propagation is quadratic
  315. in the number of points.
  316. When the algorithm does not converge, it will still return a arrays of
  317. ``cluster_center_indices`` and labels if there are any exemplars/clusters,
  318. however they may be degenerate and should be used with caution.
  319. When ``fit`` does not converge, ``cluster_centers_`` is still populated
  320. however it may be degenerate. In such a case, proceed with caution.
  321. If ``fit`` does not converge and fails to produce any ``cluster_centers_``
  322. then ``predict`` will label every sample as ``-1``.
  323. When all training samples have equal similarities and equal preferences,
  324. the assignment of cluster centers and labels depends on the preference.
  325. If the preference is smaller than the similarities, ``fit`` will result in
  326. a single cluster center and label ``0`` for every sample. Otherwise, every
  327. training sample becomes its own cluster center and is assigned a unique
  328. label.
  329. References
  330. ----------
  331. Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages
  332. Between Data Points", Science Feb. 2007
  333. Examples
  334. --------
  335. >>> from sklearn.cluster import AffinityPropagation
  336. >>> import numpy as np
  337. >>> X = np.array([[1, 2], [1, 4], [1, 0],
  338. ... [4, 2], [4, 4], [4, 0]])
  339. >>> clustering = AffinityPropagation(random_state=5).fit(X)
  340. >>> clustering
  341. AffinityPropagation(random_state=5)
  342. >>> clustering.labels_
  343. array([0, 0, 0, 1, 1, 1])
  344. >>> clustering.predict([[0, 0], [4, 4]])
  345. array([0, 1])
  346. >>> clustering.cluster_centers_
  347. array([[1, 2],
  348. [4, 2]])
  349. """
  350. _parameter_constraints: dict = {
  351. "damping": [Interval(Real, 0.5, 1.0, closed="left")],
  352. "max_iter": [Interval(Integral, 1, None, closed="left")],
  353. "convergence_iter": [Interval(Integral, 1, None, closed="left")],
  354. "copy": ["boolean"],
  355. "preference": [
  356. "array-like",
  357. Interval(Real, None, None, closed="neither"),
  358. None,
  359. ],
  360. "affinity": [StrOptions({"euclidean", "precomputed"})],
  361. "verbose": ["verbose"],
  362. "random_state": ["random_state"],
  363. }
  364. def __init__(
  365. self,
  366. *,
  367. damping=0.5,
  368. max_iter=200,
  369. convergence_iter=15,
  370. copy=True,
  371. preference=None,
  372. affinity="euclidean",
  373. verbose=False,
  374. random_state=None,
  375. ):
  376. self.damping = damping
  377. self.max_iter = max_iter
  378. self.convergence_iter = convergence_iter
  379. self.copy = copy
  380. self.verbose = verbose
  381. self.preference = preference
  382. self.affinity = affinity
  383. self.random_state = random_state
  384. def _more_tags(self):
  385. return {"pairwise": self.affinity == "precomputed"}
  386. @_fit_context(prefer_skip_nested_validation=True)
  387. def fit(self, X, y=None):
  388. """Fit the clustering from features, or affinity matrix.
  389. Parameters
  390. ----------
  391. X : {array-like, sparse matrix} of shape (n_samples, n_features), or \
  392. array-like of shape (n_samples, n_samples)
  393. Training instances to cluster, or similarities / affinities between
  394. instances if ``affinity='precomputed'``. If a sparse feature matrix
  395. is provided, it will be converted into a sparse ``csr_matrix``.
  396. y : Ignored
  397. Not used, present here for API consistency by convention.
  398. Returns
  399. -------
  400. self
  401. Returns the instance itself.
  402. """
  403. if self.affinity == "precomputed":
  404. accept_sparse = False
  405. else:
  406. accept_sparse = "csr"
  407. X = self._validate_data(X, accept_sparse=accept_sparse)
  408. if self.affinity == "precomputed":
  409. self.affinity_matrix_ = X.copy() if self.copy else X
  410. else: # self.affinity == "euclidean"
  411. self.affinity_matrix_ = -euclidean_distances(X, squared=True)
  412. if self.affinity_matrix_.shape[0] != self.affinity_matrix_.shape[1]:
  413. raise ValueError(
  414. "The matrix of similarities must be a square array. "
  415. f"Got {self.affinity_matrix_.shape} instead."
  416. )
  417. if self.preference is None:
  418. preference = np.median(self.affinity_matrix_)
  419. else:
  420. preference = self.preference
  421. preference = np.array(preference, copy=False)
  422. random_state = check_random_state(self.random_state)
  423. (
  424. self.cluster_centers_indices_,
  425. self.labels_,
  426. self.n_iter_,
  427. ) = _affinity_propagation(
  428. self.affinity_matrix_,
  429. max_iter=self.max_iter,
  430. convergence_iter=self.convergence_iter,
  431. preference=preference,
  432. damping=self.damping,
  433. verbose=self.verbose,
  434. return_n_iter=True,
  435. random_state=random_state,
  436. )
  437. if self.affinity != "precomputed":
  438. self.cluster_centers_ = X[self.cluster_centers_indices_].copy()
  439. return self
  440. def predict(self, X):
  441. """Predict the closest cluster each sample in X belongs to.
  442. Parameters
  443. ----------
  444. X : {array-like, sparse matrix} of shape (n_samples, n_features)
  445. New data to predict. If a sparse matrix is provided, it will be
  446. converted into a sparse ``csr_matrix``.
  447. Returns
  448. -------
  449. labels : ndarray of shape (n_samples,)
  450. Cluster labels.
  451. """
  452. check_is_fitted(self)
  453. X = self._validate_data(X, reset=False, accept_sparse="csr")
  454. if not hasattr(self, "cluster_centers_"):
  455. raise ValueError(
  456. "Predict method is not supported when affinity='precomputed'."
  457. )
  458. if self.cluster_centers_.shape[0] > 0:
  459. with config_context(assume_finite=True):
  460. return pairwise_distances_argmin(X, self.cluster_centers_)
  461. else:
  462. warnings.warn(
  463. (
  464. "This model does not have any cluster centers "
  465. "because affinity propagation did not converge. "
  466. "Labeling every sample as '-1'."
  467. ),
  468. ConvergenceWarning,
  469. )
  470. return np.array([-1] * X.shape[0])
  471. def fit_predict(self, X, y=None):
  472. """Fit clustering from features/affinity matrix; return cluster labels.
  473. Parameters
  474. ----------
  475. X : {array-like, sparse matrix} of shape (n_samples, n_features), or \
  476. array-like of shape (n_samples, n_samples)
  477. Training instances to cluster, or similarities / affinities between
  478. instances if ``affinity='precomputed'``. If a sparse feature matrix
  479. is provided, it will be converted into a sparse ``csr_matrix``.
  480. y : Ignored
  481. Not used, present here for API consistency by convention.
  482. Returns
  483. -------
  484. labels : ndarray of shape (n_samples,)
  485. Cluster labels.
  486. """
  487. return super().fit_predict(X, y)