plot_words.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. """
  2. ==================
  3. Words/Ladder Graph
  4. ==================
  5. Generate an undirected graph over the 5757 5-letter words in the datafile
  6. `words_dat.txt.gz`. Two words are connected by an edge if they differ in one
  7. letter, resulting in 14,135 edges. This example is described in Section 1.1 of
  8. Donald E. Knuth, "The Stanford GraphBase: A Platform for Combinatorial
  9. Computing", ACM Press, New York, 1993.
  10. http://www-cs-faculty.stanford.edu/~knuth/sgb.html
  11. The data file can be found at:
  12. - https://github.com/networkx/networkx/blob/main/examples/graph/words_dat.txt.gz
  13. """
  14. import gzip
  15. from string import ascii_lowercase as lowercase
  16. import matplotlib.pyplot as plt
  17. import networkx as nx
  18. def generate_graph(words):
  19. G = nx.Graph(name="words")
  20. lookup = {c: lowercase.index(c) for c in lowercase}
  21. def edit_distance_one(word):
  22. for i in range(len(word)):
  23. left, c, right = word[0:i], word[i], word[i + 1 :]
  24. j = lookup[c] # lowercase.index(c)
  25. for cc in lowercase[j + 1 :]:
  26. yield left + cc + right
  27. candgen = (
  28. (word, cand)
  29. for word in sorted(words)
  30. for cand in edit_distance_one(word)
  31. if cand in words
  32. )
  33. G.add_nodes_from(words)
  34. for word, cand in candgen:
  35. G.add_edge(word, cand)
  36. return G
  37. def words_graph():
  38. """Return the words example graph from the Stanford GraphBase"""
  39. fh = gzip.open("words_dat.txt.gz", "r")
  40. words = set()
  41. for line in fh.readlines():
  42. line = line.decode()
  43. if line.startswith("*"):
  44. continue
  45. w = str(line[0:5])
  46. words.add(w)
  47. return generate_graph(words)
  48. G = words_graph()
  49. print("Loaded words_dat.txt containing 5757 five-letter English words.")
  50. print("Two words are connected if they differ in one letter.")
  51. print(G)
  52. print(f"{nx.number_connected_components(G)} connected components")
  53. for source, target in [("chaos", "order"), ("nodes", "graph"), ("pound", "marks")]:
  54. print(f"Shortest path between {source} and {target} is")
  55. try:
  56. shortest_path = nx.shortest_path(G, source, target)
  57. for n in shortest_path:
  58. print(n)
  59. except nx.NetworkXNoPath:
  60. print("None")
  61. # draw a subset of the graph
  62. boundary = list(nx.node_boundary(G, shortest_path))
  63. G.add_nodes_from(shortest_path, color="red")
  64. G.add_nodes_from(boundary, color="blue")
  65. H = G.subgraph(shortest_path + boundary)
  66. colors = nx.get_node_attributes(H, "color")
  67. options = {"node_size": 1500, "alpha": 0.3, "node_color": colors.values()}
  68. pos = nx.kamada_kawai_layout(H)
  69. nx.draw(H, pos, **options)
  70. nx.draw_networkx_labels(H, pos, font_weight="bold")
  71. plt.show()