123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209 |
- lunr.QueryLexer = function (str) {
- this.lexemes = []
- this.str = str
- this.length = str.length
- this.pos = 0
- this.start = 0
- this.escapeCharPositions = []
- }
- lunr.QueryLexer.prototype.run = function () {
- var state = lunr.QueryLexer.lexText
- while (state) {
- state = state(this)
- }
- }
- lunr.QueryLexer.prototype.sliceString = function () {
- var subSlices = [],
- sliceStart = this.start,
- sliceEnd = this.pos
- for (var i = 0; i < this.escapeCharPositions.length; i++) {
- sliceEnd = this.escapeCharPositions[i]
- subSlices.push(this.str.slice(sliceStart, sliceEnd))
- sliceStart = sliceEnd + 1
- }
- subSlices.push(this.str.slice(sliceStart, this.pos))
- this.escapeCharPositions.length = 0
- return subSlices.join('')
- }
- lunr.QueryLexer.prototype.emit = function (type) {
- this.lexemes.push({
- type: type,
- str: this.sliceString(),
- start: this.start,
- end: this.pos
- })
- this.start = this.pos
- }
- lunr.QueryLexer.prototype.escapeCharacter = function () {
- this.escapeCharPositions.push(this.pos - 1)
- this.pos += 1
- }
- lunr.QueryLexer.prototype.next = function () {
- if (this.pos >= this.length) {
- return lunr.QueryLexer.EOS
- }
- var char = this.str.charAt(this.pos)
- this.pos += 1
- return char
- }
- lunr.QueryLexer.prototype.width = function () {
- return this.pos - this.start
- }
- lunr.QueryLexer.prototype.ignore = function () {
- if (this.start == this.pos) {
- this.pos += 1
- }
- this.start = this.pos
- }
- lunr.QueryLexer.prototype.backup = function () {
- this.pos -= 1
- }
- lunr.QueryLexer.prototype.acceptDigitRun = function () {
- var char, charCode
- do {
- char = this.next()
- charCode = char.charCodeAt(0)
- } while (charCode > 47 && charCode < 58)
- if (char != lunr.QueryLexer.EOS) {
- this.backup()
- }
- }
- lunr.QueryLexer.prototype.more = function () {
- return this.pos < this.length
- }
- lunr.QueryLexer.EOS = 'EOS'
- lunr.QueryLexer.FIELD = 'FIELD'
- lunr.QueryLexer.TERM = 'TERM'
- lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'
- lunr.QueryLexer.BOOST = 'BOOST'
- lunr.QueryLexer.PRESENCE = 'PRESENCE'
- lunr.QueryLexer.lexField = function (lexer) {
- lexer.backup()
- lexer.emit(lunr.QueryLexer.FIELD)
- lexer.ignore()
- return lunr.QueryLexer.lexText
- }
- lunr.QueryLexer.lexTerm = function (lexer) {
- if (lexer.width() > 1) {
- lexer.backup()
- lexer.emit(lunr.QueryLexer.TERM)
- }
- lexer.ignore()
- if (lexer.more()) {
- return lunr.QueryLexer.lexText
- }
- }
- lunr.QueryLexer.lexEditDistance = function (lexer) {
- lexer.ignore()
- lexer.acceptDigitRun()
- lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)
- return lunr.QueryLexer.lexText
- }
- lunr.QueryLexer.lexBoost = function (lexer) {
- lexer.ignore()
- lexer.acceptDigitRun()
- lexer.emit(lunr.QueryLexer.BOOST)
- return lunr.QueryLexer.lexText
- }
- lunr.QueryLexer.lexEOS = function (lexer) {
- if (lexer.width() > 0) {
- lexer.emit(lunr.QueryLexer.TERM)
- }
- }
- // This matches the separator used when tokenising fields
- // within a document. These should match otherwise it is
- // not possible to search for some tokens within a document.
- //
- // It is possible for the user to change the separator on the
- // tokenizer so it _might_ clash with any other of the special
- // characters already used within the search string, e.g. :.
- //
- // This means that it is possible to change the separator in
- // such a way that makes some words unsearchable using a search
- // string.
- lunr.QueryLexer.termSeparator = lunr.tokenizer.separator
- lunr.QueryLexer.lexText = function (lexer) {
- while (true) {
- var char = lexer.next()
- if (char == lunr.QueryLexer.EOS) {
- return lunr.QueryLexer.lexEOS
- }
- // Escape character is '\'
- if (char.charCodeAt(0) == 92) {
- lexer.escapeCharacter()
- continue
- }
- if (char == ":") {
- return lunr.QueryLexer.lexField
- }
- if (char == "~") {
- lexer.backup()
- if (lexer.width() > 0) {
- lexer.emit(lunr.QueryLexer.TERM)
- }
- return lunr.QueryLexer.lexEditDistance
- }
- if (char == "^") {
- lexer.backup()
- if (lexer.width() > 0) {
- lexer.emit(lunr.QueryLexer.TERM)
- }
- return lunr.QueryLexer.lexBoost
- }
- // "+" indicates term presence is required
- // checking for length to ensure that only
- // leading "+" are considered
- if (char == "+" && lexer.width() === 1) {
- lexer.emit(lunr.QueryLexer.PRESENCE)
- return lunr.QueryLexer.lexText
- }
- // "-" indicates term presence is prohibited
- // checking for length to ensure that only
- // leading "-" are considered
- if (char == "-" && lexer.width() === 1) {
- lexer.emit(lunr.QueryLexer.PRESENCE)
- return lunr.QueryLexer.lexText
- }
- if (char.match(lunr.QueryLexer.termSeparator)) {
- return lunr.QueryLexer.lexTerm
- }
- }
- }
|