| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 | /*! * lunr.tokenizer * Copyright (C) @YEAR Oliver Nightingale *//** * A function for splitting a string into tokens ready to be inserted into * the search index. Uses `lunr.tokenizer.separator` to split strings, change * the value of this property to change how strings are split into tokens. * * This tokenizer will convert its parameter to a string by calling `toString` and * then will split this string on the character in `lunr.tokenizer.separator`. * Arrays will have their elements converted to strings and wrapped in a lunr.Token. * * Optional metadata can be passed to the tokenizer, this metadata will be cloned and * added as metadata to every token that is created from the object to be tokenized. * * @static * @param {?(string|object|object[])} obj - The object to convert into tokens * @param {?object} metadata - Optional metadata to associate with every token * @returns {lunr.Token[]} * @see {@link lunr.Pipeline} */lunr.tokenizer = function (obj, metadata) {  if (obj == null || obj == undefined) {    return []  }  if (Array.isArray(obj)) {    return obj.map(function (t) {      return new lunr.Token(        lunr.utils.asString(t).toLowerCase(),        lunr.utils.clone(metadata)      )    })  }  var str = obj.toString().toLowerCase(),      len = str.length,      tokens = []  for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {    var char = str.charAt(sliceEnd),        sliceLength = sliceEnd - sliceStart    if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {      if (sliceLength > 0) {        var tokenMetadata = lunr.utils.clone(metadata) || {}        tokenMetadata["position"] = [sliceStart, sliceLength]        tokenMetadata["index"] = tokens.length        tokens.push(          new lunr.Token (            str.slice(sliceStart, sliceEnd),            tokenMetadata          )        )      }      sliceStart = sliceEnd + 1    }  }  return tokens}/** * The separator used to split a string into tokens. Override this property to change the behaviour of * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens. * * @static * @see lunr.tokenizer */lunr.tokenizer.separator = /[\s\-]+/
 |