token_set_test.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. suite('lunr.TokenSet', function () {
  2. suite('#toString', function () {
  3. test('includes node finality', function () {
  4. var nonFinal = new lunr.TokenSet,
  5. final = new lunr.TokenSet,
  6. otherFinal = new lunr.TokenSet
  7. final.final = true
  8. otherFinal.final = true
  9. assert.notEqual(nonFinal.toString(), final.toString())
  10. assert.equal(otherFinal.toString(), final.toString())
  11. })
  12. test('includes all edges', function () {
  13. var zeroEdges = new lunr.TokenSet,
  14. oneEdge = new lunr.TokenSet,
  15. twoEdges = new lunr.TokenSet
  16. oneEdge.edges['a'] = 1
  17. twoEdges.edges['a'] = 1
  18. twoEdges.edges['b'] = 1
  19. assert.notEqual(zeroEdges.toString(), oneEdge.toString())
  20. assert.notEqual(twoEdges.toString(), oneEdge.toString())
  21. assert.notEqual(twoEdges.toString(), zeroEdges.toString())
  22. })
  23. test('includes edge id', function () {
  24. var childA = new lunr.TokenSet,
  25. childB = new lunr.TokenSet,
  26. parentA = new lunr.TokenSet,
  27. parentB = new lunr.TokenSet,
  28. parentC = new lunr.TokenSet
  29. parentA.edges['a'] = childA
  30. parentB.edges['a'] = childB
  31. parentC.edges['a'] = childB
  32. assert.equal(parentB.toString(), parentC.toString())
  33. assert.notEqual(parentA.toString(), parentC.toString())
  34. assert.notEqual(parentA.toString(), parentB.toString())
  35. })
  36. })
  37. suite('.fromString', function () {
  38. test('without wildcard', function () {
  39. lunr.TokenSet._nextId = 1
  40. var x = lunr.TokenSet.fromString('a')
  41. assert.equal(x.toString(), '0a2')
  42. assert.isOk(x.edges['a'].final)
  43. })
  44. test('with trailing wildcard', function () {
  45. var x = lunr.TokenSet.fromString('a*'),
  46. wild = x.edges['a'].edges['*']
  47. // a state reached by a wildcard has
  48. // an edge with a wildcard to itself.
  49. // the resulting automota is
  50. // non-determenistic
  51. assert.equal(wild, wild.edges['*'])
  52. assert.isOk(wild.final)
  53. })
  54. })
  55. suite('.fromArray', function () {
  56. test('with unsorted array', function () {
  57. assert.throws(function () {
  58. lunr.TokenSet.fromArray(['z', 'a'])
  59. })
  60. })
  61. test('with sorted array', function () {
  62. var tokenSet = lunr.TokenSet.fromArray(['a', 'z'])
  63. assert.deepEqual(['a', 'z'], tokenSet.toArray().sort())
  64. })
  65. test('is minimal', function () {
  66. var tokenSet = lunr.TokenSet.fromArray(['ac', 'dc']),
  67. acNode = tokenSet.edges['a'].edges['c'],
  68. dcNode = tokenSet.edges['d'].edges['c']
  69. assert.deepEqual(acNode, dcNode)
  70. })
  71. })
  72. suite('#toArray', function () {
  73. test('includes all words', function () {
  74. var words = ['bat', 'cat'],
  75. tokenSet = lunr.TokenSet.fromArray(words)
  76. assert.sameMembers(words, tokenSet.toArray())
  77. })
  78. test('includes single words', function () {
  79. var word = 'bat',
  80. tokenSet = lunr.TokenSet.fromString(word)
  81. assert.sameMembers([word], tokenSet.toArray())
  82. })
  83. })
  84. suite('#intersect', function () {
  85. test('no intersection', function () {
  86. var x = lunr.TokenSet.fromString('cat'),
  87. y = lunr.TokenSet.fromString('bar'),
  88. z = x.intersect(y)
  89. assert.equal(0, z.toArray().length)
  90. })
  91. test('simple intersection', function () {
  92. var x = lunr.TokenSet.fromString('cat'),
  93. y = lunr.TokenSet.fromString('cat'),
  94. z = x.intersect(y)
  95. assert.sameMembers(['cat'], z.toArray())
  96. })
  97. test('trailing wildcard intersection', function () {
  98. var x = lunr.TokenSet.fromString('cat'),
  99. y = lunr.TokenSet.fromString('c*'),
  100. z = x.intersect(y)
  101. assert.sameMembers(['cat'], z.toArray())
  102. })
  103. test('trailing wildcard no intersection', function () {
  104. var x = lunr.TokenSet.fromString('cat'),
  105. y = lunr.TokenSet.fromString('b*'),
  106. z = x.intersect(y)
  107. assert.equal(0, z.toArray().length)
  108. })
  109. test('leading wildcard intersection', function () {
  110. var x = lunr.TokenSet.fromString('cat'),
  111. y = lunr.TokenSet.fromString('*t'),
  112. z = x.intersect(y)
  113. assert.sameMembers(['cat'], z.toArray())
  114. })
  115. test('leading wildcard backtracking intersection', function () {
  116. var x = lunr.TokenSet.fromString('aaacbab'),
  117. y = lunr.TokenSet.fromString('*ab'),
  118. z = x.intersect(y)
  119. assert.sameMembers(['aaacbab'], z.toArray())
  120. })
  121. test('leading wildcard no intersection', function () {
  122. var x = lunr.TokenSet.fromString('cat'),
  123. y = lunr.TokenSet.fromString('*r'),
  124. z = x.intersect(y)
  125. assert.equal(0, z.toArray().length)
  126. })
  127. test('leading wildcard backtracking no intersection', function () {
  128. var x = lunr.TokenSet.fromString('aaabdcbc'),
  129. y = lunr.TokenSet.fromString('*abc'),
  130. z = x.intersect(y)
  131. assert.equal(0, z.toArray().length)
  132. })
  133. test('contained wildcard intersection', function () {
  134. var x = lunr.TokenSet.fromString('foo'),
  135. y = lunr.TokenSet.fromString('f*o'),
  136. z = x.intersect(y)
  137. assert.sameMembers(['foo'], z.toArray())
  138. })
  139. test('contained wildcard backtracking intersection', function () {
  140. var x = lunr.TokenSet.fromString('ababc'),
  141. y = lunr.TokenSet.fromString('a*bc'),
  142. z = x.intersect(y)
  143. assert.sameMembers(['ababc'], z.toArray())
  144. })
  145. test('contained wildcard no intersection', function () {
  146. var x = lunr.TokenSet.fromString('foo'),
  147. y = lunr.TokenSet.fromString('b*r'),
  148. z = x.intersect(y)
  149. assert.equal(0, z.toArray().length)
  150. })
  151. test('contained wildcard backtracking no intersection', function () {
  152. var x = lunr.TokenSet.fromString('ababc'),
  153. y = lunr.TokenSet.fromString('a*ac'),
  154. z = x.intersect(y)
  155. assert.equal(0, z.toArray().length)
  156. })
  157. test('wildcard matches zero or more characters', function () {
  158. var x = lunr.TokenSet.fromString('foo'),
  159. y = lunr.TokenSet.fromString('foo*'),
  160. z = x.intersect(y)
  161. assert.sameMembers(['foo'], z.toArray())
  162. })
  163. // This test is intended to prevent 'bugs' that have lead to these
  164. // kind of intersections taking a _very_ long time. The assertion
  165. // is not of interest, just that the test does not timeout.
  166. test('catastrophic backtracking with leading characters', function () {
  167. var x = lunr.TokenSet.fromString('fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff'),
  168. y = lunr.TokenSet.fromString('*ff'),
  169. z = x.intersect(y)
  170. assert.equal(1, z.toArray().length)
  171. })
  172. test('leading and trailing backtracking intersection', function () {
  173. var x = lunr.TokenSet.fromString('acbaabab'),
  174. y = lunr.TokenSet.fromString('*ab*'),
  175. z = x.intersect(y)
  176. assert.sameMembers(['acbaabab'], z.toArray())
  177. })
  178. test('multiple contained wildcard backtracking', function () {
  179. var x = lunr.TokenSet.fromString('acbaabab'),
  180. y = lunr.TokenSet.fromString('a*ba*b'),
  181. z = x.intersect(y)
  182. assert.sameMembers(['acbaabab'], z.toArray())
  183. })
  184. test('intersect with fuzzy string substitution', function () {
  185. var x1 = lunr.TokenSet.fromString('bar'),
  186. x2 = lunr.TokenSet.fromString('cur'),
  187. x3 = lunr.TokenSet.fromString('cat'),
  188. x4 = lunr.TokenSet.fromString('car'),
  189. x5 = lunr.TokenSet.fromString('foo'),
  190. y = lunr.TokenSet.fromFuzzyString('car', 1)
  191. assert.sameMembers(x1.intersect(y).toArray(), ["bar"])
  192. assert.sameMembers(x2.intersect(y).toArray(), ["cur"])
  193. assert.sameMembers(x3.intersect(y).toArray(), ["cat"])
  194. assert.sameMembers(x4.intersect(y).toArray(), ["car"])
  195. assert.equal(x5.intersect(y).toArray().length, 0)
  196. })
  197. test('intersect with fuzzy string deletion', function () {
  198. var x1 = lunr.TokenSet.fromString('ar'),
  199. x2 = lunr.TokenSet.fromString('br'),
  200. x3 = lunr.TokenSet.fromString('ba'),
  201. x4 = lunr.TokenSet.fromString('bar'),
  202. x5 = lunr.TokenSet.fromString('foo'),
  203. y = lunr.TokenSet.fromFuzzyString('bar', 1)
  204. assert.sameMembers(x1.intersect(y).toArray(), ["ar"])
  205. assert.sameMembers(x2.intersect(y).toArray(), ["br"])
  206. assert.sameMembers(x3.intersect(y).toArray(), ["ba"])
  207. assert.sameMembers(x4.intersect(y).toArray(), ["bar"])
  208. assert.equal(x5.intersect(y).toArray().length, 0)
  209. })
  210. test('intersect with fuzzy string insertion', function () {
  211. var x1 = lunr.TokenSet.fromString('bbar'),
  212. x2 = lunr.TokenSet.fromString('baar'),
  213. x3 = lunr.TokenSet.fromString('barr'),
  214. x4 = lunr.TokenSet.fromString('bar'),
  215. x5 = lunr.TokenSet.fromString('ba'),
  216. x6 = lunr.TokenSet.fromString('foo'),
  217. x7 = lunr.TokenSet.fromString('bara'),
  218. y = lunr.TokenSet.fromFuzzyString('bar', 1)
  219. assert.sameMembers(x1.intersect(y).toArray(), ["bbar"])
  220. assert.sameMembers(x2.intersect(y).toArray(), ["baar"])
  221. assert.sameMembers(x3.intersect(y).toArray(), ["barr"])
  222. assert.sameMembers(x4.intersect(y).toArray(), ["bar"])
  223. assert.sameMembers(x5.intersect(y).toArray(), ["ba"])
  224. assert.equal(x6.intersect(y).toArray().length, 0)
  225. assert.sameMembers(x7.intersect(y).toArray(), ["bara"])
  226. })
  227. test('intersect with fuzzy string transpose', function () {
  228. var x1 = lunr.TokenSet.fromString('abr'),
  229. x2 = lunr.TokenSet.fromString('bra'),
  230. x3 = lunr.TokenSet.fromString('foo'),
  231. y = lunr.TokenSet.fromFuzzyString('bar', 1)
  232. assert.sameMembers(x1.intersect(y).toArray(), ["abr"])
  233. assert.sameMembers(x2.intersect(y).toArray(), ["bra"])
  234. assert.equal(x3.intersect(y).toArray().length, 0)
  235. })
  236. test('fuzzy string insertion', function () {
  237. var x = lunr.TokenSet.fromString('abcxx'),
  238. y = lunr.TokenSet.fromFuzzyString('abc', 2)
  239. assert.sameMembers(x.intersect(y).toArray(), ['abcxx'])
  240. })
  241. test('fuzzy string substitution', function () {
  242. var x = lunr.TokenSet.fromString('axx'),
  243. y = lunr.TokenSet.fromFuzzyString('abc', 2)
  244. assert.sameMembers(x.intersect(y).toArray(), ['axx'])
  245. })
  246. test('fuzzy string deletion', function () {
  247. var x = lunr.TokenSet.fromString('a'),
  248. y = lunr.TokenSet.fromFuzzyString('abc', 2)
  249. assert.sameMembers(x.intersect(y).toArray(), ['a'])
  250. })
  251. test('fuzzy string transpose', function () {
  252. var x = lunr.TokenSet.fromString('bca'),
  253. y = lunr.TokenSet.fromFuzzyString('abc', 2)
  254. assert.sameMembers(x.intersect(y).toArray(), ['bca'])
  255. })
  256. })
  257. })