123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492 |
- lunr.Index = function (attrs) {
- this.invertedIndex = attrs.invertedIndex
- this.fieldVectors = attrs.fieldVectors
- this.tokenSet = attrs.tokenSet
- this.fields = attrs.fields
- this.pipeline = attrs.pipeline
- }
- lunr.Index.prototype.search = function (queryString) {
- return this.query(function (query) {
- var parser = new lunr.QueryParser(queryString, query)
- parser.parse()
- })
- }
- lunr.Index.prototype.query = function (fn) {
-
-
-
-
-
-
- var query = new lunr.Query(this.fields),
- matchingFields = Object.create(null),
- queryVectors = Object.create(null),
- termFieldCache = Object.create(null),
- requiredMatches = Object.create(null),
- prohibitedMatches = Object.create(null)
-
- for (var i = 0; i < this.fields.length; i++) {
- queryVectors[this.fields[i]] = new lunr.Vector
- }
- fn.call(query, query)
- for (var i = 0; i < query.clauses.length; i++) {
-
- var clause = query.clauses[i],
- terms = null,
- clauseMatches = lunr.Set.empty
- if (clause.usePipeline) {
- terms = this.pipeline.runString(clause.term, {
- fields: clause.fields
- })
- } else {
- terms = [clause.term]
- }
- for (var m = 0; m < terms.length; m++) {
- var term = terms[m]
-
- clause.term = term
-
- var termTokenSet = lunr.TokenSet.fromClause(clause),
- expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
-
- if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) {
- for (var k = 0; k < clause.fields.length; k++) {
- var field = clause.fields[k]
- requiredMatches[field] = lunr.Set.empty
- }
- break
- }
- for (var j = 0; j < expandedTerms.length; j++) {
-
- var expandedTerm = expandedTerms[j],
- posting = this.invertedIndex[expandedTerm],
- termIndex = posting._index
- for (var k = 0; k < clause.fields.length; k++) {
-
- var field = clause.fields[k],
- fieldPosting = posting[field],
- matchingDocumentRefs = Object.keys(fieldPosting),
- termField = expandedTerm + "/" + field,
- matchingDocumentsSet = new lunr.Set(matchingDocumentRefs)
-
- if (clause.presence == lunr.Query.presence.REQUIRED) {
- clauseMatches = clauseMatches.union(matchingDocumentsSet)
- if (requiredMatches[field] === undefined) {
- requiredMatches[field] = lunr.Set.complete
- }
- }
-
- if (clause.presence == lunr.Query.presence.PROHIBITED) {
- if (prohibitedMatches[field] === undefined) {
- prohibitedMatches[field] = lunr.Set.empty
- }
- prohibitedMatches[field] = prohibitedMatches[field].union(matchingDocumentsSet)
-
- continue
- }
-
- queryVectors[field].upsert(termIndex, clause.boost, function (a, b) { return a + b })
-
- if (termFieldCache[termField]) {
- continue
- }
- for (var l = 0; l < matchingDocumentRefs.length; l++) {
-
- var matchingDocumentRef = matchingDocumentRefs[l],
- matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field),
- metadata = fieldPosting[matchingDocumentRef],
- fieldMatch
- if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) {
- matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata)
- } else {
- fieldMatch.add(expandedTerm, field, metadata)
- }
- }
- termFieldCache[termField] = true
- }
- }
- }
-
- if (clause.presence === lunr.Query.presence.REQUIRED) {
- for (var k = 0; k < clause.fields.length; k++) {
- var field = clause.fields[k]
- requiredMatches[field] = requiredMatches[field].intersect(clauseMatches)
- }
- }
- }
-
- var allRequiredMatches = lunr.Set.complete,
- allProhibitedMatches = lunr.Set.empty
- for (var i = 0; i < this.fields.length; i++) {
- var field = this.fields[i]
- if (requiredMatches[field]) {
- allRequiredMatches = allRequiredMatches.intersect(requiredMatches[field])
- }
- if (prohibitedMatches[field]) {
- allProhibitedMatches = allProhibitedMatches.union(prohibitedMatches[field])
- }
- }
- var matchingFieldRefs = Object.keys(matchingFields),
- results = [],
- matches = Object.create(null)
-
- if (query.isNegated()) {
- matchingFieldRefs = Object.keys(this.fieldVectors)
- for (var i = 0; i < matchingFieldRefs.length; i++) {
- var matchingFieldRef = matchingFieldRefs[i]
- var fieldRef = lunr.FieldRef.fromString(matchingFieldRef)
- matchingFields[matchingFieldRef] = new lunr.MatchData
- }
- }
- for (var i = 0; i < matchingFieldRefs.length; i++) {
-
- var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
- docRef = fieldRef.docRef
- if (!allRequiredMatches.contains(docRef)) {
- continue
- }
- if (allProhibitedMatches.contains(docRef)) {
- continue
- }
- var fieldVector = this.fieldVectors[fieldRef],
- score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
- docMatch
- if ((docMatch = matches[docRef]) !== undefined) {
- docMatch.score += score
- docMatch.matchData.combine(matchingFields[fieldRef])
- } else {
- var match = {
- ref: docRef,
- score: score,
- matchData: matchingFields[fieldRef]
- }
- matches[docRef] = match
- results.push(match)
- }
- }
-
- return results.sort(function (a, b) {
- return b.score - a.score
- })
- }
- lunr.Index.prototype.toJSON = function () {
- var invertedIndex = Object.keys(this.invertedIndex)
- .sort()
- .map(function (term) {
- return [term, this.invertedIndex[term]]
- }, this)
- var fieldVectors = Object.keys(this.fieldVectors)
- .map(function (ref) {
- return [ref, this.fieldVectors[ref].toJSON()]
- }, this)
- return {
- version: lunr.version,
- fields: this.fields,
- fieldVectors: fieldVectors,
- invertedIndex: invertedIndex,
- pipeline: this.pipeline.toJSON()
- }
- }
- lunr.Index.load = function (serializedIndex) {
- var attrs = {},
- fieldVectors = {},
- serializedVectors = serializedIndex.fieldVectors,
- invertedIndex = Object.create(null),
- serializedInvertedIndex = serializedIndex.invertedIndex,
- tokenSetBuilder = new lunr.TokenSet.Builder,
- pipeline = lunr.Pipeline.load(serializedIndex.pipeline)
- if (serializedIndex.version != lunr.version) {
- lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'")
- }
- for (var i = 0; i < serializedVectors.length; i++) {
- var tuple = serializedVectors[i],
- ref = tuple[0],
- elements = tuple[1]
- fieldVectors[ref] = new lunr.Vector(elements)
- }
- for (var i = 0; i < serializedInvertedIndex.length; i++) {
- var tuple = serializedInvertedIndex[i],
- term = tuple[0],
- posting = tuple[1]
- tokenSetBuilder.insert(term)
- invertedIndex[term] = posting
- }
- tokenSetBuilder.finish()
- attrs.fields = serializedIndex.fields
- attrs.fieldVectors = fieldVectors
- attrs.invertedIndex = invertedIndex
- attrs.tokenSet = tokenSetBuilder.root
- attrs.pipeline = pipeline
- return new lunr.Index(attrs)
- }
|