tokenizer_test.js 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. suite('lunr.tokenizer', function () {
  2. var toString = function (o) { return o.toString() }
  3. test('splitting into tokens', function () {
  4. var tokens = lunr.tokenizer('foo bar baz')
  5. .map(toString)
  6. assert.sameMembers(['foo', 'bar', 'baz'], tokens)
  7. })
  8. test('downcases tokens', function () {
  9. var tokens = lunr.tokenizer('Foo BAR BAZ')
  10. .map(toString)
  11. assert.sameMembers(['foo', 'bar', 'baz'], tokens)
  12. })
  13. test('array of strings', function () {
  14. var tokens = lunr.tokenizer(['foo', 'bar', 'baz'])
  15. .map(toString)
  16. assert.sameMembers(['foo', 'bar', 'baz'], tokens)
  17. })
  18. test('undefined is converted to empty string', function () {
  19. var tokens = lunr.tokenizer(['foo', undefined, 'baz'])
  20. .map(toString)
  21. assert.sameMembers(['foo', '', 'baz'], tokens)
  22. })
  23. test('null is converted to empty string', function () {
  24. var tokens = lunr.tokenizer(['foo', null, 'baz'])
  25. .map(toString)
  26. assert.sameMembers(['foo', '', 'baz'], tokens)
  27. })
  28. test('multiple white space is stripped', function () {
  29. var tokens = lunr.tokenizer(' foo bar baz ')
  30. .map(toString)
  31. assert.sameMembers(['foo', 'bar', 'baz'], tokens)
  32. })
  33. test('handling null-like arguments', function () {
  34. assert.lengthOf(lunr.tokenizer(), 0)
  35. assert.lengthOf(lunr.tokenizer(undefined), 0)
  36. assert.lengthOf(lunr.tokenizer(null), 0)
  37. })
  38. test('converting a date to tokens', function () {
  39. var date = new Date(Date.UTC(2013, 0, 1, 12))
  40. // NOTE: slicing here to prevent asserting on parts
  41. // of the date that might be affected by the timezone
  42. // the test is running in.
  43. assert.sameMembers(['tue', 'jan', '01', '2013'], lunr.tokenizer(date).slice(0, 4).map(toString))
  44. })
  45. test('converting a number to tokens', function () {
  46. assert.equal('41', lunr.tokenizer(41).map(toString))
  47. })
  48. test('converting a boolean to tokens', function () {
  49. assert.equal('false', lunr.tokenizer(false).map(toString))
  50. })
  51. test('converting an object to tokens', function () {
  52. var obj = {
  53. toString: function () { return 'custom object' }
  54. }
  55. assert.sameMembers(lunr.tokenizer(obj).map(toString), ['custom', 'object'])
  56. })
  57. test('splits strings with hyphens', function () {
  58. assert.sameMembers(lunr.tokenizer('foo-bar').map(toString), ['foo', 'bar'])
  59. })
  60. test('splits strings with hyphens and spaces', function () {
  61. assert.sameMembers(lunr.tokenizer('foo - bar').map(toString), ['foo', 'bar'])
  62. })
  63. test('tracking the token index', function () {
  64. var tokens = lunr.tokenizer('foo bar')
  65. assert.equal(tokens[0].metadata.index, 0)
  66. assert.equal(tokens[1].metadata.index, 1)
  67. })
  68. test('tracking the token position', function () {
  69. var tokens = lunr.tokenizer('foo bar')
  70. assert.deepEqual(tokens[0].metadata.position, [0, 3])
  71. assert.deepEqual(tokens[1].metadata.position, [4, 3])
  72. })
  73. test('tracking the token position with additional left-hand whitespace', function () {
  74. var tokens = lunr.tokenizer(' foo bar')
  75. assert.deepEqual(tokens[0].metadata.position, [1, 3])
  76. assert.deepEqual(tokens[1].metadata.position, [5, 3])
  77. })
  78. test('tracking the token position with additional right-hand whitespace', function () {
  79. var tokens = lunr.tokenizer('foo bar ')
  80. assert.deepEqual(tokens[0].metadata.position, [0, 3])
  81. assert.deepEqual(tokens[1].metadata.position, [4, 3])
  82. })
  83. test('providing additional metadata', function () {
  84. var tokens = lunr.tokenizer('foo bar', { 'hurp': 'durp' })
  85. assert.deepEqual(tokens[0].metadata.hurp, 'durp')
  86. assert.deepEqual(tokens[1].metadata.hurp, 'durp')
  87. })
  88. })