tokenizer_test.js 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. module('lunr.tokenizer')
  2. test("splitting simple strings into tokens", function () {
  3. var simpleString = "this is a simple string",
  4. tokens = lunr.tokenizer(simpleString)
  5. deepEqual(tokens, ['this', 'is', 'a', 'simple', 'string'])
  6. })
  7. test('downcasing tokens', function () {
  8. var simpleString = 'FOO BAR',
  9. tags = ['Foo', 'BAR']
  10. deepEqual(lunr.tokenizer(simpleString), ['foo', 'bar'])
  11. deepEqual(lunr.tokenizer(tags), ['foo', 'bar'])
  12. })
  13. test('handling arrays of strings', function () {
  14. var tags = ['foo', 'bar'],
  15. tokens = lunr.tokenizer(tags)
  16. deepEqual(tokens, tags)
  17. })
  18. test('handling arrays with undefined or null values', function () {
  19. var arr = ['foo', undefined, null, 'bar'],
  20. tokens = lunr.tokenizer(arr)
  21. deepEqual(tokens, ['foo', '', '', 'bar'])
  22. })
  23. test('handling multiple white spaces', function () {
  24. var testString = ' foo bar ',
  25. tokens = lunr.tokenizer(testString)
  26. deepEqual(tokens, ['foo', 'bar'])
  27. })
  28. test('handling null-like arguments', function () {
  29. deepEqual(lunr.tokenizer(), [])
  30. deepEqual(lunr.tokenizer(null), [])
  31. deepEqual(lunr.tokenizer(undefined), [])
  32. })
  33. test('calling to string on passed val', function () {
  34. var date = new Date (Date.UTC(2013, 0, 1, 12)),
  35. obj = {
  36. toString: function () { return 'custom object' }
  37. }
  38. equal(lunr.tokenizer(41), '41')
  39. equal(lunr.tokenizer(false), 'false')
  40. deepEqual(lunr.tokenizer(obj), ['custom', 'object'])
  41. // slicing here to avoid asserting on the timezone part of the date
  42. // that will be different whereever the test is run.
  43. deepEqual(lunr.tokenizer(date).slice(0, 4), ['tue', 'jan', '01', '2013'])
  44. })
  45. test("splitting strings with hyphens", function () {
  46. var simpleString = "take the New York-San Francisco flight",
  47. tokens = lunr.tokenizer(simpleString)
  48. deepEqual(tokens, ['take', 'the', 'new', 'york', 'san', 'francisco', 'flight'])
  49. })
  50. test("splitting strings with hyphens and spaces", function () {
  51. var simpleString = "Solve for A - B",
  52. tokens = lunr.tokenizer(simpleString)
  53. deepEqual(tokens, ['solve', 'for', 'a', 'b'])
  54. })
  55. test("registering a tokenizer function", function () {
  56. var fn = function () {}
  57. lunr.tokenizer.registerFunction(fn, 'test')
  58. equal(fn.label, 'test')
  59. equal(lunr.tokenizer.registeredFunctions['test'], fn)
  60. delete lunr.tokenizer.registerFunction['test'] // resetting the state after the test
  61. })
  62. test("loading a registered tokenizer", function () {
  63. var serialized = 'default', // default tokenizer is already registered
  64. tokenizerFn = lunr.tokenizer.load(serialized)
  65. equal(tokenizerFn, lunr.tokenizer)
  66. })
  67. test("loading an un-registered tokenizer", function () {
  68. var serialized = 'un-registered' // default tokenizer is already registered
  69. throws(function () {
  70. lunr.tokenizer.load(serialized)
  71. })
  72. })
  73. test('custom separator', function () {
  74. try {
  75. var defaultSeparator = lunr.tokenizer.separator,
  76. str = 'foo|bar|baz'
  77. lunr.tokenizer.separator = '|'
  78. deepEqual(lunr.tokenizer(str), ['foo', 'bar', 'baz'])
  79. } finally {
  80. lunr.tokenizer.separator = defaultSeparator
  81. }
  82. })