12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- /*!
- * lunr.tokenizer
- * Copyright (C) @YEAR Oliver Nightingale
- */
- /**
- * A function for splitting a string into tokens ready to be inserted into
- * the search index. Uses `lunr.tokenizer.separator` to split strings, change
- * the value of this property to change how strings are split into tokens.
- *
- * @module
- * @param {String} obj The string to convert into tokens
- * @see lunr.tokenizer.separator
- * @returns {Array}
- */
- lunr.tokenizer = function (obj) {
- if (!arguments.length || obj == null || obj == undefined) return []
- if (Array.isArray(obj)) return obj.map(function (t) { return lunr.utils.asString(t).toLowerCase() })
- return obj.toString().trim().toLowerCase().split(lunr.tokenizer.separator)
- }
- /**
- * The sperator used to split a string into tokens. Override this property to change the behaviour of
- * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
- *
- * @static
- * @see lunr.tokenizer
- */
- lunr.tokenizer.separator = /[\s\-]+/
- /**
- * Loads a previously serialised tokenizer.
- *
- * A tokenizer function to be loaded must already be registered with lunr.tokenizer.
- * If the serialised tokenizer has not been registered then an error will be thrown.
- *
- * @param {String} label The label of the serialised tokenizer.
- * @returns {Function}
- * @memberOf tokenizer
- */
- lunr.tokenizer.load = function (label) {
- var fn = this.registeredFunctions[label]
- if (!fn) {
- throw new Error('Cannot load un-registered function: ' + label)
- }
- return fn
- }
- lunr.tokenizer.label = 'default'
- lunr.tokenizer.registeredFunctions = {
- 'default': lunr.tokenizer
- }
- /**
- * Register a tokenizer function.
- *
- * Functions that are used as tokenizers should be registered if they are to be used with a serialised index.
- *
- * Registering a function does not add it to an index, functions must still be associated with a specific index for them to be used when indexing and searching documents.
- *
- * @param {Function} fn The function to register.
- * @param {String} label The label to register this function with
- * @memberOf tokenizer
- */
- lunr.tokenizer.registerFunction = function (fn, label) {
- if (label in this.registeredFunctions) {
- lunr.utils.warn('Overwriting existing tokenizer: ' + label)
- }
- fn.label = label
- this.registeredFunctions[label] = fn
- }
|