123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- module('lunr.tokenizer')
- test("splitting simple strings into tokens", function () {
- var simpleString = "this is a simple string",
- tokens = lunr.tokenizer(simpleString)
- deepEqual(tokens, ['this', 'is', 'a', 'simple', 'string'])
- })
- test('downcasing tokens', function () {
- var simpleString = 'FOO BAR',
- tags = ['Foo', 'BAR']
- deepEqual(lunr.tokenizer(simpleString), ['foo', 'bar'])
- deepEqual(lunr.tokenizer(tags), ['foo', 'bar'])
- })
- test('handling arrays of strings', function () {
- var tags = ['foo', 'bar'],
- tokens = lunr.tokenizer(tags)
- deepEqual(tokens, tags)
- })
- test('handling arrays with undefined or null values', function () {
- var arr = ['foo', undefined, null, 'bar'],
- tokens = lunr.tokenizer(arr)
- deepEqual(tokens, ['foo', '', '', 'bar'])
- })
- test('handling multiple white spaces', function () {
- var testString = ' foo bar ',
- tokens = lunr.tokenizer(testString)
- deepEqual(tokens, ['foo', 'bar'])
- })
- test('handling null-like arguments', function () {
- deepEqual(lunr.tokenizer(), [])
- deepEqual(lunr.tokenizer(null), [])
- deepEqual(lunr.tokenizer(undefined), [])
- })
- test('calling to string on passed val', function () {
- var date = new Date (Date.UTC(2013, 0, 1, 12)),
- obj = {
- toString: function () { return 'custom object' }
- }
- equal(lunr.tokenizer(41), '41')
- equal(lunr.tokenizer(false), 'false')
- deepEqual(lunr.tokenizer(obj), ['custom', 'object'])
- // slicing here to avoid asserting on the timezone part of the date
- // that will be different whereever the test is run.
- deepEqual(lunr.tokenizer(date).slice(0, 4), ['tue', 'jan', '01', '2013'])
- })
- test("splitting strings with hyphens", function () {
- var simpleString = "take the New York-San Francisco flight",
- tokens = lunr.tokenizer(simpleString)
- deepEqual(tokens, ['take', 'the', 'new', 'york', 'san', 'francisco', 'flight'])
- })
- test("splitting strings with hyphens and spaces", function () {
- var simpleString = "Solve for A - B",
- tokens = lunr.tokenizer(simpleString)
- deepEqual(tokens, ['solve', 'for', 'a', 'b'])
- })
- test("registering a tokenizer function", function () {
- var fn = function () {}
- lunr.tokenizer.registerFunction(fn, 'test')
- equal(fn.label, 'test')
- equal(lunr.tokenizer.registeredFunctions['test'], fn)
- delete lunr.tokenizer.registerFunction['test'] // resetting the state after the test
- })
- test("loading a registered tokenizer", function () {
- var serialized = 'default', // default tokenizer is already registered
- tokenizerFn = lunr.tokenizer.load(serialized)
- equal(tokenizerFn, lunr.tokenizer)
- })
- test("loading an un-registered tokenizer", function () {
- var serialized = 'un-registered' // default tokenizer is already registered
- throws(function () {
- lunr.tokenizer.load(serialized)
- })
- })
- test('custom separator', function () {
- try {
- var defaultSeparator = lunr.tokenizer.separator,
- str = 'foo|bar|baz'
- lunr.tokenizer.separator = '|'
- deepEqual(lunr.tokenizer(str), ['foo', 'bar', 'baz'])
- } finally {
- lunr.tokenizer.separator = defaultSeparator
- }
- })
|