| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- """
- ==================
- Words/Ladder Graph
- ==================
- Generate an undirected graph over the 5757 5-letter words in the datafile
- `words_dat.txt.gz`. Two words are connected by an edge if they differ in one
- letter, resulting in 14,135 edges. This example is described in Section 1.1 of
- Donald E. Knuth, "The Stanford GraphBase: A Platform for Combinatorial
- Computing", ACM Press, New York, 1993.
- http://www-cs-faculty.stanford.edu/~knuth/sgb.html
- The data file can be found at:
- - https://github.com/networkx/networkx/blob/main/examples/graph/words_dat.txt.gz
- """
- import gzip
- from string import ascii_lowercase as lowercase
- import matplotlib.pyplot as plt
- import networkx as nx
- def generate_graph(words):
- G = nx.Graph(name="words")
- lookup = {c: lowercase.index(c) for c in lowercase}
- def edit_distance_one(word):
- for i in range(len(word)):
- left, c, right = word[0:i], word[i], word[i + 1 :]
- j = lookup[c] # lowercase.index(c)
- for cc in lowercase[j + 1 :]:
- yield left + cc + right
- candgen = (
- (word, cand)
- for word in sorted(words)
- for cand in edit_distance_one(word)
- if cand in words
- )
- G.add_nodes_from(words)
- for word, cand in candgen:
- G.add_edge(word, cand)
- return G
- def words_graph():
- """Return the words example graph from the Stanford GraphBase"""
- fh = gzip.open("words_dat.txt.gz", "r")
- words = set()
- for line in fh.readlines():
- line = line.decode()
- if line.startswith("*"):
- continue
- w = str(line[0:5])
- words.add(w)
- return generate_graph(words)
- G = words_graph()
- print("Loaded words_dat.txt containing 5757 five-letter English words.")
- print("Two words are connected if they differ in one letter.")
- print(G)
- print(f"{nx.number_connected_components(G)} connected components")
- for source, target in [("chaos", "order"), ("nodes", "graph"), ("pound", "marks")]:
- print(f"Shortest path between {source} and {target} is")
- try:
- shortest_path = nx.shortest_path(G, source, target)
- for n in shortest_path:
- print(n)
- except nx.NetworkXNoPath:
- print("None")
- # draw a subset of the graph
- boundary = list(nx.node_boundary(G, shortest_path))
- G.add_nodes_from(shortest_path, color="red")
- G.add_nodes_from(boundary, color="blue")
- H = G.subgraph(shortest_path + boundary)
- colors = nx.get_node_attributes(H, "color")
- options = {"node_size": 1500, "alpha": 0.3, "node_color": colors.values()}
- pos = nx.kamada_kawai_layout(H)
- nx.draw(H, pos, **options)
- nx.draw_networkx_labels(H, pos, font_weight="bold")
- plt.show()
|