| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- """
- Graph utilities and algorithms
- Graphs are represented with their adjacency matrices, preferably using
- sparse matrices.
- """
- # Authors: Aric Hagberg <hagberg@lanl.gov>
- # Gael Varoquaux <gael.varoquaux@normalesup.org>
- # Jake Vanderplas <vanderplas@astro.washington.edu>
- # License: BSD 3 clause
- import numpy as np
- from scipy import sparse
- from ..metrics.pairwise import pairwise_distances
- ###############################################################################
- # Path and connected component analysis.
- # Code adapted from networkx
- def single_source_shortest_path_length(graph, source, *, cutoff=None):
- """Return the length of the shortest path from source to all reachable nodes.
- Parameters
- ----------
- graph : {sparse matrix, ndarray} of shape (n_nodes, n_nodes)
- Adjacency matrix of the graph. Sparse matrix of format LIL is
- preferred.
- source : int
- Start node for path.
- cutoff : int, default=None
- Depth to stop the search - only paths of length <= cutoff are returned.
- Returns
- -------
- paths : dict
- Reachable end nodes mapped to length of path from source,
- i.e. `{end: path_length}`.
- Examples
- --------
- >>> from sklearn.utils.graph import single_source_shortest_path_length
- >>> import numpy as np
- >>> graph = np.array([[ 0, 1, 0, 0],
- ... [ 1, 0, 1, 0],
- ... [ 0, 1, 0, 0],
- ... [ 0, 0, 0, 0]])
- >>> single_source_shortest_path_length(graph, 0)
- {0: 0, 1: 1, 2: 2}
- >>> graph = np.ones((6, 6))
- >>> sorted(single_source_shortest_path_length(graph, 2).items())
- [(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]
- """
- if sparse.issparse(graph):
- graph = graph.tolil()
- else:
- graph = sparse.lil_matrix(graph)
- seen = {} # level (number of hops) when seen in BFS
- level = 0 # the current level
- next_level = [source] # dict of nodes to check at next level
- while next_level:
- this_level = next_level # advance to next level
- next_level = set() # and start a new list (fringe)
- for v in this_level:
- if v not in seen:
- seen[v] = level # set the level of vertex v
- next_level.update(graph.rows[v])
- if cutoff is not None and cutoff <= level:
- break
- level += 1
- return seen # return all path lengths as dictionary
- def _fix_connected_components(
- X,
- graph,
- n_connected_components,
- component_labels,
- mode="distance",
- metric="euclidean",
- **kwargs,
- ):
- """Add connections to sparse graph to connect unconnected components.
- For each pair of unconnected components, compute all pairwise distances
- from one component to the other, and add a connection on the closest pair
- of samples. This is a hacky way to get a graph with a single connected
- component, which is necessary for example to compute a shortest path
- between all pairs of samples in the graph.
- Parameters
- ----------
- X : array of shape (n_samples, n_features) or (n_samples, n_samples)
- Features to compute the pairwise distances. If `metric =
- "precomputed"`, X is the matrix of pairwise distances.
- graph : sparse matrix of shape (n_samples, n_samples)
- Graph of connection between samples.
- n_connected_components : int
- Number of connected components, as computed by
- `scipy.sparse.csgraph.connected_components`.
- component_labels : array of shape (n_samples)
- Labels of connected components, as computed by
- `scipy.sparse.csgraph.connected_components`.
- mode : {'connectivity', 'distance'}, default='distance'
- Type of graph matrix: 'connectivity' corresponds to the connectivity
- matrix with ones and zeros, and 'distance' corresponds to the distances
- between neighbors according to the given metric.
- metric : str
- Metric used in `sklearn.metrics.pairwise.pairwise_distances`.
- kwargs : kwargs
- Keyword arguments passed to
- `sklearn.metrics.pairwise.pairwise_distances`.
- Returns
- -------
- graph : sparse matrix of shape (n_samples, n_samples)
- Graph of connection between samples, with a single connected component.
- """
- if metric == "precomputed" and sparse.issparse(X):
- raise RuntimeError(
- "_fix_connected_components with metric='precomputed' requires the "
- "full distance matrix in X, and does not work with a sparse "
- "neighbors graph."
- )
- for i in range(n_connected_components):
- idx_i = np.flatnonzero(component_labels == i)
- Xi = X[idx_i]
- for j in range(i):
- idx_j = np.flatnonzero(component_labels == j)
- Xj = X[idx_j]
- if metric == "precomputed":
- D = X[np.ix_(idx_i, idx_j)]
- else:
- D = pairwise_distances(Xi, Xj, metric=metric, **kwargs)
- ii, jj = np.unravel_index(D.argmin(axis=None), D.shape)
- if mode == "connectivity":
- graph[idx_i[ii], idx_j[jj]] = 1
- graph[idx_j[jj], idx_i[ii]] = 1
- elif mode == "distance":
- graph[idx_i[ii], idx_j[jj]] = D[ii, jj]
- graph[idx_j[jj], idx_i[ii]] = D[ii, jj]
- else:
- raise ValueError(
- "Unknown mode=%r, should be one of ['connectivity', 'distance']."
- % mode
- )
- return graph
|