module('lunr.tokenizer')

test("splitting simple strings into tokens", function () {
  var simpleString = "this is a simple string",
      tokens = lunr.tokenizer(simpleString)

  deepEqual(tokens, ['this', 'is', 'a', 'simple', 'string'])
})

test('downcasing tokens', function () {
  var simpleString = 'FOO BAR',
      tags = ['Foo', 'BAR']

  deepEqual(lunr.tokenizer(simpleString), ['foo', 'bar'])
  deepEqual(lunr.tokenizer(tags), ['foo', 'bar'])
})

test('handling arrays of strings', function () {
  var tags = ['foo', 'bar'],
      tokens = lunr.tokenizer(tags)

  deepEqual(tokens, tags)
})

test('handling arrays with undefined or null values', function () {
  var arr = ['foo', undefined, null, 'bar'],
      tokens = lunr.tokenizer(arr)

  deepEqual(tokens, ['foo', '', '', 'bar'])
})

test('handling multiple white spaces', function () {
  var testString = '  foo    bar  ',
      tokens = lunr.tokenizer(testString)

  deepEqual(tokens, ['foo', 'bar'])
})

test('handling null-like arguments', function () {
  deepEqual(lunr.tokenizer(), [])
  deepEqual(lunr.tokenizer(null), [])
  deepEqual(lunr.tokenizer(undefined), [])
})

test('calling to string on passed val', function () {
  var date = new Date (Date.UTC(2013, 0, 1, 12)),
      obj = {
        toString: function () { return 'custom object' }
      }

  equal(lunr.tokenizer(41), '41')
  equal(lunr.tokenizer(false), 'false')
  deepEqual(lunr.tokenizer(obj), ['custom', 'object'])

  // slicing here to avoid asserting on the timezone part of the date
  // that will be different whereever the test is run.
  deepEqual(lunr.tokenizer(date).slice(0, 4), ['tue', 'jan', '01', '2013'])
})

test("splitting strings with hyphens", function () {
  var simpleString = "take the New York-San Francisco flight",
      tokens = lunr.tokenizer(simpleString)

  deepEqual(tokens, ['take', 'the', 'new', 'york', 'san', 'francisco', 'flight'])
})

test("splitting strings with hyphens and spaces", function () {
  var simpleString = "Solve for A - B",
      tokens = lunr.tokenizer(simpleString)

  deepEqual(tokens, ['solve', 'for', 'a', 'b'])
})

test("registering a tokenizer function", function () {
  var fn = function () {}
  lunr.tokenizer.registerFunction(fn, 'test')

  equal(fn.label, 'test')
  equal(lunr.tokenizer.registeredFunctions['test'], fn)

  delete lunr.tokenizer.registerFunction['test'] // resetting the state after the test
})

test("loading a registered tokenizer", function () {
  var serialized = 'default', // default tokenizer is already registered
      tokenizerFn = lunr.tokenizer.load(serialized)

  equal(tokenizerFn, lunr.tokenizer)
})

test("loading an un-registered tokenizer", function () {
  var serialized = 'un-registered' // default tokenizer is already registered

  throws(function () {
    lunr.tokenizer.load(serialized)
  })
})

test('custom separator', function () {
  try {
    var defaultSeparator = lunr.tokenizer.separator,
        str = 'foo|bar|baz'

    lunr.tokenizer.separator = '|'

    deepEqual(lunr.tokenizer(str), ['foo', 'bar', 'baz'])
  } finally {
    lunr.tokenizer.separator = defaultSeparator
  }
})