lunr.js 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053
  1. /**
  2. * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 1.0.0
  3. * Copyright (C) 2017 Oliver Nightingale
  4. * @license MIT
  5. */
  6. ;(function(){
  7. /**
  8. * Convenience function for instantiating a new lunr index and configuring it
  9. * with the default pipeline functions and the passed config function.
  10. *
  11. * When using this convenience function a new index will be created with the
  12. * following functions already in the pipeline:
  13. *
  14. * lunr.StopWordFilter - filters out any stop words before they enter the
  15. * index
  16. *
  17. * lunr.stemmer - stems the tokens before entering the index.
  18. *
  19. * Example:
  20. *
  21. * var idx = lunr(function () {
  22. * this.field('title', 10)
  23. * this.field('tags', 100)
  24. * this.field('body')
  25. *
  26. * this.ref('cid')
  27. *
  28. * this.pipeline.add(function () {
  29. * // some custom pipeline function
  30. * })
  31. *
  32. * })
  33. *
  34. * @param {Function} config A function that will be called with the new instance
  35. * of the lunr.Index as both its context and first parameter. It can be used to
  36. * customize the instance of new lunr.Index.
  37. * @namespace
  38. * @module
  39. * @returns {lunr.Index}
  40. *
  41. */
  42. var lunr = function (config) {
  43. var idx = new lunr.Index
  44. idx.pipeline.add(
  45. lunr.trimmer,
  46. lunr.stopWordFilter,
  47. lunr.stemmer
  48. )
  49. if (config) config.call(idx, idx)
  50. return idx
  51. }
  52. lunr.version = "1.0.0"
  53. /*!
  54. * lunr.utils
  55. * Copyright (C) 2017 Oliver Nightingale
  56. */
  57. /**
  58. * A namespace containing utils for the rest of the lunr library
  59. */
  60. lunr.utils = {}
  61. /**
  62. * Print a warning message to the console.
  63. *
  64. * @param {String} message The message to be printed.
  65. * @memberOf Utils
  66. */
  67. lunr.utils.warn = (function (global) {
  68. return function (message) {
  69. if (global.console && console.warn) {
  70. console.warn(message)
  71. }
  72. }
  73. })(this)
  74. /**
  75. * Convert an object to a string.
  76. *
  77. * In the case of `null` and `undefined` the function returns
  78. * the empty string, in all other cases the result of calling
  79. * `toString` on the passed object is returned.
  80. *
  81. * @param {Any} obj The object to convert to a string.
  82. * @return {String} string representation of the passed object.
  83. * @memberOf Utils
  84. */
  85. lunr.utils.asString = function (obj) {
  86. if (obj === void 0 || obj === null) {
  87. return ""
  88. } else {
  89. return obj.toString()
  90. }
  91. }
  92. /*!
  93. * lunr.EventEmitter
  94. * Copyright (C) 2017 Oliver Nightingale
  95. */
  96. /**
  97. * lunr.EventEmitter is an event emitter for lunr. It manages adding and removing event handlers and triggering events and their handlers.
  98. *
  99. * @constructor
  100. */
  101. lunr.EventEmitter = function () {
  102. this.events = {}
  103. }
  104. /**
  105. * Binds a handler function to a specific event(s).
  106. *
  107. * Can bind a single function to many different events in one call.
  108. *
  109. * @param {String} [eventName] The name(s) of events to bind this function to.
  110. * @param {Function} fn The function to call when an event is fired.
  111. * @memberOf EventEmitter
  112. */
  113. lunr.EventEmitter.prototype.addListener = function () {
  114. var args = Array.prototype.slice.call(arguments),
  115. fn = args.pop(),
  116. names = args
  117. if (typeof fn !== "function") throw new TypeError ("last argument must be a function")
  118. names.forEach(function (name) {
  119. if (!this.hasHandler(name)) this.events[name] = []
  120. this.events[name].push(fn)
  121. }, this)
  122. }
  123. /**
  124. * Removes a handler function from a specific event.
  125. *
  126. * @param {String} eventName The name of the event to remove this function from.
  127. * @param {Function} fn The function to remove from an event.
  128. * @memberOf EventEmitter
  129. */
  130. lunr.EventEmitter.prototype.removeListener = function (name, fn) {
  131. if (!this.hasHandler(name)) return
  132. var fnIndex = this.events[name].indexOf(fn)
  133. this.events[name].splice(fnIndex, 1)
  134. if (!this.events[name].length) delete this.events[name]
  135. }
  136. /**
  137. * Calls all functions bound to the given event.
  138. *
  139. * Additional data can be passed to the event handler as arguments to `emit`
  140. * after the event name.
  141. *
  142. * @param {String} eventName The name of the event to emit.
  143. * @memberOf EventEmitter
  144. */
  145. lunr.EventEmitter.prototype.emit = function (name) {
  146. if (!this.hasHandler(name)) return
  147. var args = Array.prototype.slice.call(arguments, 1)
  148. this.events[name].forEach(function (fn) {
  149. fn.apply(undefined, args)
  150. })
  151. }
  152. /**
  153. * Checks whether a handler has ever been stored against an event.
  154. *
  155. * @param {String} eventName The name of the event to check.
  156. * @private
  157. * @memberOf EventEmitter
  158. */
  159. lunr.EventEmitter.prototype.hasHandler = function (name) {
  160. return name in this.events
  161. }
  162. /*!
  163. * lunr.tokenizer
  164. * Copyright (C) 2017 Oliver Nightingale
  165. */
  166. /**
  167. * A function for splitting a string into tokens ready to be inserted into
  168. * the search index. Uses `lunr.tokenizer.separator` to split strings, change
  169. * the value of this property to change how strings are split into tokens.
  170. *
  171. * @module
  172. * @param {String} obj The string to convert into tokens
  173. * @see lunr.tokenizer.separator
  174. * @returns {Array}
  175. */
  176. lunr.tokenizer = function (obj) {
  177. if (!arguments.length || obj == null || obj == undefined) return []
  178. if (Array.isArray(obj)) return obj.map(function (t) { return lunr.utils.asString(t).toLowerCase() })
  179. return obj.toString().trim().toLowerCase().split(lunr.tokenizer.separator)
  180. }
  181. /**
  182. * The sperator used to split a string into tokens. Override this property to change the behaviour of
  183. * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
  184. *
  185. * @static
  186. * @see lunr.tokenizer
  187. */
  188. lunr.tokenizer.separator = /[\s\-]+/
  189. /**
  190. * Loads a previously serialised tokenizer.
  191. *
  192. * A tokenizer function to be loaded must already be registered with lunr.tokenizer.
  193. * If the serialised tokenizer has not been registered then an error will be thrown.
  194. *
  195. * @param {String} label The label of the serialised tokenizer.
  196. * @returns {Function}
  197. * @memberOf tokenizer
  198. */
  199. lunr.tokenizer.load = function (label) {
  200. var fn = this.registeredFunctions[label]
  201. if (!fn) {
  202. throw new Error('Cannot load un-registered function: ' + label)
  203. }
  204. return fn
  205. }
  206. lunr.tokenizer.label = 'default'
  207. lunr.tokenizer.registeredFunctions = {
  208. 'default': lunr.tokenizer
  209. }
  210. /**
  211. * Register a tokenizer function.
  212. *
  213. * Functions that are used as tokenizers should be registered if they are to be used with a serialised index.
  214. *
  215. * Registering a function does not add it to an index, functions must still be associated with a specific index for them to be used when indexing and searching documents.
  216. *
  217. * @param {Function} fn The function to register.
  218. * @param {String} label The label to register this function with
  219. * @memberOf tokenizer
  220. */
  221. lunr.tokenizer.registerFunction = function (fn, label) {
  222. if (label in this.registeredFunctions) {
  223. lunr.utils.warn('Overwriting existing tokenizer: ' + label)
  224. }
  225. fn.label = label
  226. this.registeredFunctions[label] = fn
  227. }
  228. /*!
  229. * lunr.Pipeline
  230. * Copyright (C) 2017 Oliver Nightingale
  231. */
  232. /**
  233. * lunr.Pipelines maintain an ordered list of functions to be applied to all
  234. * tokens in documents entering the search index and queries being ran against
  235. * the index.
  236. *
  237. * An instance of lunr.Index created with the lunr shortcut will contain a
  238. * pipeline with a stop word filter and an English language stemmer. Extra
  239. * functions can be added before or after either of these functions or these
  240. * default functions can be removed.
  241. *
  242. * When run the pipeline will call each function in turn, passing a token, the
  243. * index of that token in the original list of all tokens and finally a list of
  244. * all the original tokens.
  245. *
  246. * The output of functions in the pipeline will be passed to the next function
  247. * in the pipeline. To exclude a token from entering the index the function
  248. * should return undefined, the rest of the pipeline will not be called with
  249. * this token.
  250. *
  251. * For serialisation of pipelines to work, all functions used in an instance of
  252. * a pipeline should be registered with lunr.Pipeline. Registered functions can
  253. * then be loaded. If trying to load a serialised pipeline that uses functions
  254. * that are not registered an error will be thrown.
  255. *
  256. * If not planning on serialising the pipeline then registering pipeline functions
  257. * is not necessary.
  258. *
  259. * @constructor
  260. */
  261. lunr.Pipeline = function () {
  262. this._stack = []
  263. }
  264. lunr.Pipeline.registeredFunctions = {}
  265. /**
  266. * Register a function with the pipeline.
  267. *
  268. * Functions that are used in the pipeline should be registered if the pipeline
  269. * needs to be serialised, or a serialised pipeline needs to be loaded.
  270. *
  271. * Registering a function does not add it to a pipeline, functions must still be
  272. * added to instances of the pipeline for them to be used when running a pipeline.
  273. *
  274. * @param {Function} fn The function to check for.
  275. * @param {String} label The label to register this function with
  276. * @memberOf Pipeline
  277. */
  278. lunr.Pipeline.registerFunction = function (fn, label) {
  279. if (label in this.registeredFunctions) {
  280. lunr.utils.warn('Overwriting existing registered function: ' + label)
  281. }
  282. fn.label = label
  283. lunr.Pipeline.registeredFunctions[fn.label] = fn
  284. }
  285. /**
  286. * Warns if the function is not registered as a Pipeline function.
  287. *
  288. * @param {Function} fn The function to check for.
  289. * @private
  290. * @memberOf Pipeline
  291. */
  292. lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
  293. var isRegistered = fn.label && (fn.label in this.registeredFunctions)
  294. if (!isRegistered) {
  295. lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn)
  296. }
  297. }
  298. /**
  299. * Loads a previously serialised pipeline.
  300. *
  301. * All functions to be loaded must already be registered with lunr.Pipeline.
  302. * If any function from the serialised data has not been registered then an
  303. * error will be thrown.
  304. *
  305. * @param {Object} serialised The serialised pipeline to load.
  306. * @returns {lunr.Pipeline}
  307. * @memberOf Pipeline
  308. */
  309. lunr.Pipeline.load = function (serialised) {
  310. var pipeline = new lunr.Pipeline
  311. serialised.forEach(function (fnName) {
  312. var fn = lunr.Pipeline.registeredFunctions[fnName]
  313. if (fn) {
  314. pipeline.add(fn)
  315. } else {
  316. throw new Error('Cannot load un-registered function: ' + fnName)
  317. }
  318. })
  319. return pipeline
  320. }
  321. /**
  322. * Adds new functions to the end of the pipeline.
  323. *
  324. * Logs a warning if the function has not been registered.
  325. *
  326. * @param {Function} functions Any number of functions to add to the pipeline.
  327. * @memberOf Pipeline
  328. */
  329. lunr.Pipeline.prototype.add = function () {
  330. var fns = Array.prototype.slice.call(arguments)
  331. fns.forEach(function (fn) {
  332. lunr.Pipeline.warnIfFunctionNotRegistered(fn)
  333. this._stack.push(fn)
  334. }, this)
  335. }
  336. /**
  337. * Adds a single function after a function that already exists in the
  338. * pipeline.
  339. *
  340. * Logs a warning if the function has not been registered.
  341. *
  342. * @param {Function} existingFn A function that already exists in the pipeline.
  343. * @param {Function} newFn The new function to add to the pipeline.
  344. * @memberOf Pipeline
  345. */
  346. lunr.Pipeline.prototype.after = function (existingFn, newFn) {
  347. lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
  348. var pos = this._stack.indexOf(existingFn)
  349. if (pos == -1) {
  350. throw new Error('Cannot find existingFn')
  351. }
  352. pos = pos + 1
  353. this._stack.splice(pos, 0, newFn)
  354. }
  355. /**
  356. * Adds a single function before a function that already exists in the
  357. * pipeline.
  358. *
  359. * Logs a warning if the function has not been registered.
  360. *
  361. * @param {Function} existingFn A function that already exists in the pipeline.
  362. * @param {Function} newFn The new function to add to the pipeline.
  363. * @memberOf Pipeline
  364. */
  365. lunr.Pipeline.prototype.before = function (existingFn, newFn) {
  366. lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
  367. var pos = this._stack.indexOf(existingFn)
  368. if (pos == -1) {
  369. throw new Error('Cannot find existingFn')
  370. }
  371. this._stack.splice(pos, 0, newFn)
  372. }
  373. /**
  374. * Removes a function from the pipeline.
  375. *
  376. * @param {Function} fn The function to remove from the pipeline.
  377. * @memberOf Pipeline
  378. */
  379. lunr.Pipeline.prototype.remove = function (fn) {
  380. var pos = this._stack.indexOf(fn)
  381. if (pos == -1) {
  382. return
  383. }
  384. this._stack.splice(pos, 1)
  385. }
  386. /**
  387. * Runs the current list of functions that make up the pipeline against the
  388. * passed tokens.
  389. *
  390. * @param {Array} tokens The tokens to run through the pipeline.
  391. * @returns {Array}
  392. * @memberOf Pipeline
  393. */
  394. lunr.Pipeline.prototype.run = function (tokens) {
  395. var out = [],
  396. tokenLength = tokens.length,
  397. stackLength = this._stack.length
  398. for (var i = 0; i < tokenLength; i++) {
  399. var token = tokens[i]
  400. for (var j = 0; j < stackLength; j++) {
  401. token = this._stack[j](token, i, tokens)
  402. if (token === void 0 || token === '') break
  403. };
  404. if (token !== void 0 && token !== '') out.push(token)
  405. };
  406. return out
  407. }
  408. /**
  409. * Resets the pipeline by removing any existing processors.
  410. *
  411. * @memberOf Pipeline
  412. */
  413. lunr.Pipeline.prototype.reset = function () {
  414. this._stack = []
  415. }
  416. /**
  417. * Returns a representation of the pipeline ready for serialisation.
  418. *
  419. * Logs a warning if the function has not been registered.
  420. *
  421. * @returns {Array}
  422. * @memberOf Pipeline
  423. */
  424. lunr.Pipeline.prototype.toJSON = function () {
  425. return this._stack.map(function (fn) {
  426. lunr.Pipeline.warnIfFunctionNotRegistered(fn)
  427. return fn.label
  428. })
  429. }
  430. /*!
  431. * lunr.Vector
  432. * Copyright (C) 2017 Oliver Nightingale
  433. */
  434. /**
  435. * lunr.Vectors implement vector related operations for
  436. * a series of elements.
  437. *
  438. * @constructor
  439. */
  440. lunr.Vector = function () {
  441. this._magnitude = null
  442. this.list = undefined
  443. this.length = 0
  444. }
  445. /**
  446. * lunr.Vector.Node is a simple struct for each node
  447. * in a lunr.Vector.
  448. *
  449. * @private
  450. * @param {Number} The index of the node in the vector.
  451. * @param {Object} The data at this node in the vector.
  452. * @param {lunr.Vector.Node} The node directly after this node in the vector.
  453. * @constructor
  454. * @memberOf Vector
  455. */
  456. lunr.Vector.Node = function (idx, val, next) {
  457. this.idx = idx
  458. this.val = val
  459. this.next = next
  460. }
  461. /**
  462. * Inserts a new value at a position in a vector.
  463. *
  464. * @param {Number} The index at which to insert a value.
  465. * @param {Object} The object to insert in the vector.
  466. * @memberOf Vector.
  467. */
  468. lunr.Vector.prototype.insert = function (idx, val) {
  469. this._magnitude = undefined;
  470. var list = this.list
  471. if (!list) {
  472. this.list = new lunr.Vector.Node (idx, val, list)
  473. return this.length++
  474. }
  475. if (idx < list.idx) {
  476. this.list = new lunr.Vector.Node (idx, val, list)
  477. return this.length++
  478. }
  479. var prev = list,
  480. next = list.next
  481. while (next != undefined) {
  482. if (idx < next.idx) {
  483. prev.next = new lunr.Vector.Node (idx, val, next)
  484. return this.length++
  485. }
  486. prev = next, next = next.next
  487. }
  488. prev.next = new lunr.Vector.Node (idx, val, next)
  489. return this.length++
  490. }
  491. /**
  492. * Calculates the magnitude of this vector.
  493. *
  494. * @returns {Number}
  495. * @memberOf Vector
  496. */
  497. lunr.Vector.prototype.magnitude = function () {
  498. if (this._magnitude) return this._magnitude
  499. var node = this.list,
  500. sumOfSquares = 0,
  501. val
  502. while (node) {
  503. val = node.val
  504. sumOfSquares += val * val
  505. node = node.next
  506. }
  507. return this._magnitude = Math.sqrt(sumOfSquares)
  508. }
  509. /**
  510. * Calculates the dot product of this vector and another vector.
  511. *
  512. * @param {lunr.Vector} otherVector The vector to compute the dot product with.
  513. * @returns {Number}
  514. * @memberOf Vector
  515. */
  516. lunr.Vector.prototype.dot = function (otherVector) {
  517. var node = this.list,
  518. otherNode = otherVector.list,
  519. dotProduct = 0
  520. while (node && otherNode) {
  521. if (node.idx < otherNode.idx) {
  522. node = node.next
  523. } else if (node.idx > otherNode.idx) {
  524. otherNode = otherNode.next
  525. } else {
  526. dotProduct += node.val * otherNode.val
  527. node = node.next
  528. otherNode = otherNode.next
  529. }
  530. }
  531. return dotProduct
  532. }
  533. /**
  534. * Calculates the cosine similarity between this vector and another
  535. * vector.
  536. *
  537. * @param {lunr.Vector} otherVector The other vector to calculate the
  538. * similarity with.
  539. * @returns {Number}
  540. * @memberOf Vector
  541. */
  542. lunr.Vector.prototype.similarity = function (otherVector) {
  543. return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude())
  544. }
  545. /*!
  546. * lunr.SortedSet
  547. * Copyright (C) 2017 Oliver Nightingale
  548. */
  549. /**
  550. * lunr.SortedSets are used to maintain an array of uniq values in a sorted
  551. * order.
  552. *
  553. * @constructor
  554. */
  555. lunr.SortedSet = function () {
  556. this.length = 0
  557. this.elements = []
  558. }
  559. /**
  560. * Loads a previously serialised sorted set.
  561. *
  562. * @param {Array} serialisedData The serialised set to load.
  563. * @returns {lunr.SortedSet}
  564. * @memberOf SortedSet
  565. */
  566. lunr.SortedSet.load = function (serialisedData) {
  567. var set = new this
  568. set.elements = serialisedData
  569. set.length = serialisedData.length
  570. return set
  571. }
  572. /**
  573. * Inserts new items into the set in the correct position to maintain the
  574. * order.
  575. *
  576. * @param {Object} The objects to add to this set.
  577. * @memberOf SortedSet
  578. */
  579. lunr.SortedSet.prototype.add = function () {
  580. var i, element
  581. for (i = 0; i < arguments.length; i++) {
  582. element = arguments[i]
  583. if (~this.indexOf(element)) continue
  584. this.elements.splice(this.locationFor(element), 0, element)
  585. }
  586. this.length = this.elements.length
  587. }
  588. /**
  589. * Converts this sorted set into an array.
  590. *
  591. * @returns {Array}
  592. * @memberOf SortedSet
  593. */
  594. lunr.SortedSet.prototype.toArray = function () {
  595. return this.elements.slice()
  596. }
  597. /**
  598. * Creates a new array with the results of calling a provided function on every
  599. * element in this sorted set.
  600. *
  601. * Delegates to Array.prototype.map and has the same signature.
  602. *
  603. * @param {Function} fn The function that is called on each element of the
  604. * set.
  605. * @param {Object} ctx An optional object that can be used as the context
  606. * for the function fn.
  607. * @returns {Array}
  608. * @memberOf SortedSet
  609. */
  610. lunr.SortedSet.prototype.map = function (fn, ctx) {
  611. return this.elements.map(fn, ctx)
  612. }
  613. /**
  614. * Executes a provided function once per sorted set element.
  615. *
  616. * Delegates to Array.prototype.forEach and has the same signature.
  617. *
  618. * @param {Function} fn The function that is called on each element of the
  619. * set.
  620. * @param {Object} ctx An optional object that can be used as the context
  621. * @memberOf SortedSet
  622. * for the function fn.
  623. */
  624. lunr.SortedSet.prototype.forEach = function (fn, ctx) {
  625. return this.elements.forEach(fn, ctx)
  626. }
  627. /**
  628. * Returns the index at which a given element can be found in the
  629. * sorted set, or -1 if it is not present.
  630. *
  631. * @param {Object} elem The object to locate in the sorted set.
  632. * @returns {Number}
  633. * @memberOf SortedSet
  634. */
  635. lunr.SortedSet.prototype.indexOf = function (elem) {
  636. var start = 0,
  637. end = this.elements.length,
  638. sectionLength = end - start,
  639. pivot = start + Math.floor(sectionLength / 2),
  640. pivotElem = this.elements[pivot]
  641. while (sectionLength > 1) {
  642. if (pivotElem === elem) return pivot
  643. if (pivotElem < elem) start = pivot
  644. if (pivotElem > elem) end = pivot
  645. sectionLength = end - start
  646. pivot = start + Math.floor(sectionLength / 2)
  647. pivotElem = this.elements[pivot]
  648. }
  649. if (pivotElem === elem) return pivot
  650. return -1
  651. }
  652. /**
  653. * Returns the position within the sorted set that an element should be
  654. * inserted at to maintain the current order of the set.
  655. *
  656. * This function assumes that the element to search for does not already exist
  657. * in the sorted set.
  658. *
  659. * @param {Object} elem The elem to find the position for in the set
  660. * @returns {Number}
  661. * @memberOf SortedSet
  662. */
  663. lunr.SortedSet.prototype.locationFor = function (elem) {
  664. var start = 0,
  665. end = this.elements.length,
  666. sectionLength = end - start,
  667. pivot = start + Math.floor(sectionLength / 2),
  668. pivotElem = this.elements[pivot]
  669. while (sectionLength > 1) {
  670. if (pivotElem < elem) start = pivot
  671. if (pivotElem > elem) end = pivot
  672. sectionLength = end - start
  673. pivot = start + Math.floor(sectionLength / 2)
  674. pivotElem = this.elements[pivot]
  675. }
  676. if (pivotElem > elem) return pivot
  677. if (pivotElem < elem) return pivot + 1
  678. }
  679. /**
  680. * Creates a new lunr.SortedSet that contains the elements in the intersection
  681. * of this set and the passed set.
  682. *
  683. * @param {lunr.SortedSet} otherSet The set to intersect with this set.
  684. * @returns {lunr.SortedSet}
  685. * @memberOf SortedSet
  686. */
  687. lunr.SortedSet.prototype.intersect = function (otherSet) {
  688. var intersectSet = new lunr.SortedSet,
  689. i = 0, j = 0,
  690. a_len = this.length, b_len = otherSet.length,
  691. a = this.elements, b = otherSet.elements
  692. while (true) {
  693. if (i > a_len - 1 || j > b_len - 1) break
  694. if (a[i] === b[j]) {
  695. intersectSet.add(a[i])
  696. i++, j++
  697. continue
  698. }
  699. if (a[i] < b[j]) {
  700. i++
  701. continue
  702. }
  703. if (a[i] > b[j]) {
  704. j++
  705. continue
  706. }
  707. };
  708. return intersectSet
  709. }
  710. /**
  711. * Makes a copy of this set
  712. *
  713. * @returns {lunr.SortedSet}
  714. * @memberOf SortedSet
  715. */
  716. lunr.SortedSet.prototype.clone = function () {
  717. var clone = new lunr.SortedSet
  718. clone.elements = this.toArray()
  719. clone.length = clone.elements.length
  720. return clone
  721. }
  722. /**
  723. * Creates a new lunr.SortedSet that contains the elements in the union
  724. * of this set and the passed set.
  725. *
  726. * @param {lunr.SortedSet} otherSet The set to union with this set.
  727. * @returns {lunr.SortedSet}
  728. * @memberOf SortedSet
  729. */
  730. lunr.SortedSet.prototype.union = function (otherSet) {
  731. var longSet, shortSet, unionSet
  732. if (this.length >= otherSet.length) {
  733. longSet = this, shortSet = otherSet
  734. } else {
  735. longSet = otherSet, shortSet = this
  736. }
  737. unionSet = longSet.clone()
  738. for(var i = 0, shortSetElements = shortSet.toArray(); i < shortSetElements.length; i++){
  739. unionSet.add(shortSetElements[i])
  740. }
  741. return unionSet
  742. }
  743. /**
  744. * Returns a representation of the sorted set ready for serialisation.
  745. *
  746. * @returns {Array}
  747. * @memberOf SortedSet
  748. */
  749. lunr.SortedSet.prototype.toJSON = function () {
  750. return this.toArray()
  751. }
  752. /*!
  753. * lunr.Index
  754. * Copyright (C) 2017 Oliver Nightingale
  755. */
  756. /**
  757. * lunr.Index is object that manages a search index. It contains the indexes
  758. * and stores all the tokens and document lookups. It also provides the main
  759. * user facing API for the library.
  760. *
  761. * @constructor
  762. */
  763. lunr.Index = function () {
  764. this._fields = []
  765. this._ref = 'id'
  766. this.pipeline = new lunr.Pipeline
  767. this.documentStore = new lunr.Store
  768. this.tokenStore = new lunr.TokenStore
  769. this.corpusTokens = new lunr.SortedSet
  770. this.eventEmitter = new lunr.EventEmitter
  771. this.tokenizerFn = lunr.tokenizer
  772. this._idfCache = {}
  773. this.on('add', 'remove', 'update', (function () {
  774. this._idfCache = {}
  775. }).bind(this))
  776. }
  777. /**
  778. * Bind a handler to events being emitted by the index.
  779. *
  780. * The handler can be bound to many events at the same time.
  781. *
  782. * @param {String} [eventName] The name(s) of events to bind the function to.
  783. * @param {Function} fn The serialised set to load.
  784. * @memberOf Index
  785. */
  786. lunr.Index.prototype.on = function () {
  787. var args = Array.prototype.slice.call(arguments)
  788. return this.eventEmitter.addListener.apply(this.eventEmitter, args)
  789. }
  790. /**
  791. * Removes a handler from an event being emitted by the index.
  792. *
  793. * @param {String} eventName The name of events to remove the function from.
  794. * @param {Function} fn The serialised set to load.
  795. * @memberOf Index
  796. */
  797. lunr.Index.prototype.off = function (name, fn) {
  798. return this.eventEmitter.removeListener(name, fn)
  799. }
  800. /**
  801. * Loads a previously serialised index.
  802. *
  803. * Issues a warning if the index being imported was serialised
  804. * by a different version of lunr.
  805. *
  806. * @param {Object} serialisedData The serialised set to load.
  807. * @returns {lunr.Index}
  808. * @memberOf Index
  809. */
  810. lunr.Index.load = function (serialisedData) {
  811. if (serialisedData.version !== lunr.version) {
  812. lunr.utils.warn('version mismatch: current ' + lunr.version + ' importing ' + serialisedData.version)
  813. }
  814. var idx = new this
  815. idx._fields = serialisedData.fields
  816. idx._ref = serialisedData.ref
  817. idx.tokenizer(lunr.tokenizer.load(serialisedData.tokenizer))
  818. idx.documentStore = lunr.Store.load(serialisedData.documentStore)
  819. idx.tokenStore = lunr.TokenStore.load(serialisedData.tokenStore)
  820. idx.corpusTokens = lunr.SortedSet.load(serialisedData.corpusTokens)
  821. idx.pipeline = lunr.Pipeline.load(serialisedData.pipeline)
  822. return idx
  823. }
  824. /**
  825. * Adds a field to the list of fields that will be searchable within documents
  826. * in the index.
  827. *
  828. * An optional boost param can be passed to affect how much tokens in this field
  829. * rank in search results, by default the boost value is 1.
  830. *
  831. * Fields should be added before any documents are added to the index, fields
  832. * that are added after documents are added to the index will only apply to new
  833. * documents added to the index.
  834. *
  835. * @param {String} fieldName The name of the field within the document that
  836. * should be indexed
  837. * @param {Number} boost An optional boost that can be applied to terms in this
  838. * field.
  839. * @returns {lunr.Index}
  840. * @memberOf Index
  841. */
  842. lunr.Index.prototype.field = function (fieldName, opts) {
  843. var opts = opts || {},
  844. field = { name: fieldName, boost: opts.boost || 1 }
  845. this._fields.push(field)
  846. return this
  847. }
  848. /**
  849. * Sets the property used to uniquely identify documents added to the index,
  850. * by default this property is 'id'.
  851. *
  852. * This should only be changed before adding documents to the index, changing
  853. * the ref property without resetting the index can lead to unexpected results.
  854. *
  855. * The value of ref can be of any type but it _must_ be stably comparable and
  856. * orderable.
  857. *
  858. * @param {String} refName The property to use to uniquely identify the
  859. * documents in the index.
  860. * @param {Boolean} emitEvent Whether to emit add events, defaults to true
  861. * @returns {lunr.Index}
  862. * @memberOf Index
  863. */
  864. lunr.Index.prototype.ref = function (refName) {
  865. this._ref = refName
  866. return this
  867. }
  868. /**
  869. * Sets the tokenizer used for this index.
  870. *
  871. * By default the index will use the default tokenizer, lunr.tokenizer. The tokenizer
  872. * should only be changed before adding documents to the index. Changing the tokenizer
  873. * without re-building the index can lead to unexpected results.
  874. *
  875. * @param {Function} fn The function to use as a tokenizer.
  876. * @returns {lunr.Index}
  877. * @memberOf Index
  878. */
  879. lunr.Index.prototype.tokenizer = function (fn) {
  880. var isRegistered = fn.label && (fn.label in lunr.tokenizer.registeredFunctions)
  881. if (!isRegistered) {
  882. lunr.utils.warn('Function is not a registered tokenizer. This may cause problems when serialising the index')
  883. }
  884. this.tokenizerFn = fn
  885. return this
  886. }
  887. /**
  888. * Add a document to the index.
  889. *
  890. * This is the way new documents enter the index, this function will run the
  891. * fields from the document through the index's pipeline and then add it to
  892. * the index, it will then show up in search results.
  893. *
  894. * An 'add' event is emitted with the document that has been added and the index
  895. * the document has been added to. This event can be silenced by passing false
  896. * as the second argument to add.
  897. *
  898. * @param {Object} doc The document to add to the index.
  899. * @param {Boolean} emitEvent Whether or not to emit events, default true.
  900. * @memberOf Index
  901. */
  902. lunr.Index.prototype.add = function (doc, emitEvent) {
  903. var docTokens = {},
  904. allDocumentTokens = new lunr.SortedSet,
  905. docRef = doc[this._ref],
  906. emitEvent = emitEvent === undefined ? true : emitEvent
  907. this._fields.forEach(function (field) {
  908. var fieldTokens = this.pipeline.run(this.tokenizerFn(doc[field.name]))
  909. docTokens[field.name] = fieldTokens
  910. for (var i = 0; i < fieldTokens.length; i++) {
  911. var token = fieldTokens[i]
  912. allDocumentTokens.add(token)
  913. this.corpusTokens.add(token)
  914. }
  915. }, this)
  916. this.documentStore.set(docRef, allDocumentTokens)
  917. for (var i = 0; i < allDocumentTokens.length; i++) {
  918. var token = allDocumentTokens.elements[i]
  919. var tf = 0;
  920. for (var j = 0; j < this._fields.length; j++){
  921. var field = this._fields[j]
  922. var fieldTokens = docTokens[field.name]
  923. var fieldLength = fieldTokens.length
  924. if (!fieldLength) continue
  925. var tokenCount = 0
  926. for (var k = 0; k < fieldLength; k++){
  927. if (fieldTokens[k] === token){
  928. tokenCount++
  929. }
  930. }
  931. tf += (tokenCount / fieldLength * field.boost)
  932. }
  933. this.tokenStore.add(token, { ref: docRef, tf: tf })
  934. };
  935. if (emitEvent) this.eventEmitter.emit('add', doc, this)
  936. }
  937. /**
  938. * Removes a document from the index.
  939. *
  940. * To make sure documents no longer show up in search results they can be
  941. * removed from the index using this method.
  942. *
  943. * The document passed only needs to have the same ref property value as the
  944. * document that was added to the index, they could be completely different
  945. * objects.
  946. *
  947. * A 'remove' event is emitted with the document that has been removed and the index
  948. * the document has been removed from. This event can be silenced by passing false
  949. * as the second argument to remove.
  950. *
  951. * @param {Object} doc The document to remove from the index.
  952. * @param {Boolean} emitEvent Whether to emit remove events, defaults to true
  953. * @memberOf Index
  954. */
  955. lunr.Index.prototype.remove = function (doc, emitEvent) {
  956. var docRef = doc[this._ref],
  957. emitEvent = emitEvent === undefined ? true : emitEvent
  958. if (!this.documentStore.has(docRef)) return
  959. var docTokens = this.documentStore.get(docRef)
  960. this.documentStore.remove(docRef)
  961. docTokens.forEach(function (token) {
  962. this.tokenStore.remove(token, docRef)
  963. }, this)
  964. if (emitEvent) this.eventEmitter.emit('remove', doc, this)
  965. }
  966. /**
  967. * Updates a document in the index.
  968. *
  969. * When a document contained within the index gets updated, fields changed,
  970. * added or removed, to make sure it correctly matched against search queries,
  971. * it should be updated in the index.
  972. *
  973. * This method is just a wrapper around `remove` and `add`
  974. *
  975. * An 'update' event is emitted with the document that has been updated and the index.
  976. * This event can be silenced by passing false as the second argument to update. Only
  977. * an update event will be fired, the 'add' and 'remove' events of the underlying calls
  978. * are silenced.
  979. *
  980. * @param {Object} doc The document to update in the index.
  981. * @param {Boolean} emitEvent Whether to emit update events, defaults to true
  982. * @see Index.prototype.remove
  983. * @see Index.prototype.add
  984. * @memberOf Index
  985. */
  986. lunr.Index.prototype.update = function (doc, emitEvent) {
  987. var emitEvent = emitEvent === undefined ? true : emitEvent
  988. this.remove(doc, false)
  989. this.add(doc, false)
  990. if (emitEvent) this.eventEmitter.emit('update', doc, this)
  991. }
  992. /**
  993. * Calculates the inverse document frequency for a token within the index.
  994. *
  995. * @param {String} token The token to calculate the idf of.
  996. * @see Index.prototype.idf
  997. * @private
  998. * @memberOf Index
  999. */
  1000. lunr.Index.prototype.idf = function (term) {
  1001. var cacheKey = "@" + term
  1002. if (Object.prototype.hasOwnProperty.call(this._idfCache, cacheKey)) return this._idfCache[cacheKey]
  1003. var documentFrequency = this.tokenStore.count(term),
  1004. idf = 1
  1005. if (documentFrequency > 0) {
  1006. idf = 1 + Math.log(this.documentStore.length / documentFrequency)
  1007. }
  1008. return this._idfCache[cacheKey] = idf
  1009. }
  1010. /**
  1011. * Searches the index using the passed query.
  1012. *
  1013. * Queries should be a string, multiple words are allowed and will lead to an
  1014. * AND based query, e.g. `idx.search('foo bar')` will run a search for
  1015. * documents containing both 'foo' and 'bar'.
  1016. *
  1017. * All query tokens are passed through the same pipeline that document tokens
  1018. * are passed through, so any language processing involved will be run on every
  1019. * query term.
  1020. *
  1021. * Each query term is expanded, so that the term 'he' might be expanded to
  1022. * 'hello' and 'help' if those terms were already included in the index.
  1023. *
  1024. * Matching documents are returned as an array of objects, each object contains
  1025. * the matching document ref, as set for this index, and the similarity score
  1026. * for this document against the query.
  1027. *
  1028. * @param {String} query The query to search the index with.
  1029. * @returns {Object}
  1030. * @see Index.prototype.idf
  1031. * @see Index.prototype.documentVector
  1032. * @memberOf Index
  1033. */
  1034. lunr.Index.prototype.search = function (query) {
  1035. var queryTokens = this.pipeline.run(this.tokenizerFn(query)),
  1036. queryVector = new lunr.Vector,
  1037. documentSets = [],
  1038. fieldBoosts = this._fields.reduce(function (memo, f) { return memo + f.boost }, 0)
  1039. var hasSomeToken = queryTokens.some(function (token) {
  1040. return this.tokenStore.has(token)
  1041. }, this)
  1042. if (!hasSomeToken) return []
  1043. queryTokens
  1044. .forEach(function (token, i, tokens) {
  1045. var tf = 1 / tokens.length * this._fields.length * fieldBoosts,
  1046. self = this
  1047. var set = this.tokenStore.expand(token).reduce(function (memo, key) {
  1048. var pos = self.corpusTokens.indexOf(key),
  1049. idf = self.idf(key),
  1050. similarityBoost = 1,
  1051. set = new lunr.SortedSet
  1052. // if the expanded key is not an exact match to the token then
  1053. // penalise the score for this key by how different the key is
  1054. // to the token.
  1055. if (key !== token) {
  1056. var diff = Math.max(3, key.length - token.length)
  1057. similarityBoost = 1 / Math.log(diff)
  1058. }
  1059. // calculate the query tf-idf score for this token
  1060. // applying an similarityBoost to ensure exact matches
  1061. // these rank higher than expanded terms
  1062. if (pos > -1) queryVector.insert(pos, tf * idf * similarityBoost)
  1063. // add all the documents that have this key into a set
  1064. // ensuring that the type of key is preserved
  1065. var matchingDocuments = self.tokenStore.get(key),
  1066. refs = Object.keys(matchingDocuments),
  1067. refsLen = refs.length
  1068. for (var i = 0; i < refsLen; i++) {
  1069. set.add(matchingDocuments[refs[i]].ref)
  1070. }
  1071. return memo.union(set)
  1072. }, new lunr.SortedSet)
  1073. documentSets.push(set)
  1074. }, this)
  1075. var documentSet = documentSets.reduce(function (memo, set) {
  1076. return memo.intersect(set)
  1077. })
  1078. return documentSet
  1079. .map(function (ref) {
  1080. return { ref: ref, score: queryVector.similarity(this.documentVector(ref)) }
  1081. }, this)
  1082. .sort(function (a, b) {
  1083. return b.score - a.score
  1084. })
  1085. }
  1086. /**
  1087. * Generates a vector containing all the tokens in the document matching the
  1088. * passed documentRef.
  1089. *
  1090. * The vector contains the tf-idf score for each token contained in the
  1091. * document with the passed documentRef. The vector will contain an element
  1092. * for every token in the indexes corpus, if the document does not contain that
  1093. * token the element will be 0.
  1094. *
  1095. * @param {Object} documentRef The ref to find the document with.
  1096. * @returns {lunr.Vector}
  1097. * @private
  1098. * @memberOf Index
  1099. */
  1100. lunr.Index.prototype.documentVector = function (documentRef) {
  1101. var documentTokens = this.documentStore.get(documentRef),
  1102. documentTokensLength = documentTokens.length,
  1103. documentVector = new lunr.Vector
  1104. for (var i = 0; i < documentTokensLength; i++) {
  1105. var token = documentTokens.elements[i],
  1106. tf = this.tokenStore.get(token)[documentRef].tf,
  1107. idf = this.idf(token)
  1108. documentVector.insert(this.corpusTokens.indexOf(token), tf * idf)
  1109. };
  1110. return documentVector
  1111. }
  1112. /**
  1113. * Returns a representation of the index ready for serialisation.
  1114. *
  1115. * @returns {Object}
  1116. * @memberOf Index
  1117. */
  1118. lunr.Index.prototype.toJSON = function () {
  1119. return {
  1120. version: lunr.version,
  1121. fields: this._fields,
  1122. ref: this._ref,
  1123. tokenizer: this.tokenizerFn.label,
  1124. documentStore: this.documentStore.toJSON(),
  1125. tokenStore: this.tokenStore.toJSON(),
  1126. corpusTokens: this.corpusTokens.toJSON(),
  1127. pipeline: this.pipeline.toJSON()
  1128. }
  1129. }
  1130. /**
  1131. * Applies a plugin to the current index.
  1132. *
  1133. * A plugin is a function that is called with the index as its context.
  1134. * Plugins can be used to customise or extend the behaviour the index
  1135. * in some way. A plugin is just a function, that encapsulated the custom
  1136. * behaviour that should be applied to the index.
  1137. *
  1138. * The plugin function will be called with the index as its argument, additional
  1139. * arguments can also be passed when calling use. The function will be called
  1140. * with the index as its context.
  1141. *
  1142. * Example:
  1143. *
  1144. * var myPlugin = function (idx, arg1, arg2) {
  1145. * // `this` is the index to be extended
  1146. * // apply any extensions etc here.
  1147. * }
  1148. *
  1149. * var idx = lunr(function () {
  1150. * this.use(myPlugin, 'arg1', 'arg2')
  1151. * })
  1152. *
  1153. * @param {Function} plugin The plugin to apply.
  1154. * @memberOf Index
  1155. */
  1156. lunr.Index.prototype.use = function (plugin) {
  1157. var args = Array.prototype.slice.call(arguments, 1)
  1158. args.unshift(this)
  1159. plugin.apply(this, args)
  1160. }
  1161. /*!
  1162. * lunr.Store
  1163. * Copyright (C) 2017 Oliver Nightingale
  1164. */
  1165. /**
  1166. * lunr.Store is a simple key-value store used for storing sets of tokens for
  1167. * documents stored in index.
  1168. *
  1169. * @constructor
  1170. * @module
  1171. */
  1172. lunr.Store = function () {
  1173. this.store = {}
  1174. this.length = 0
  1175. }
  1176. /**
  1177. * Loads a previously serialised store
  1178. *
  1179. * @param {Object} serialisedData The serialised store to load.
  1180. * @returns {lunr.Store}
  1181. * @memberOf Store
  1182. */
  1183. lunr.Store.load = function (serialisedData) {
  1184. var store = new this
  1185. store.length = serialisedData.length
  1186. store.store = Object.keys(serialisedData.store).reduce(function (memo, key) {
  1187. memo[key] = lunr.SortedSet.load(serialisedData.store[key])
  1188. return memo
  1189. }, {})
  1190. return store
  1191. }
  1192. /**
  1193. * Stores the given tokens in the store against the given id.
  1194. *
  1195. * @param {Object} id The key used to store the tokens against.
  1196. * @param {Object} tokens The tokens to store against the key.
  1197. * @memberOf Store
  1198. */
  1199. lunr.Store.prototype.set = function (id, tokens) {
  1200. if (!this.has(id)) this.length++
  1201. this.store[id] = tokens
  1202. }
  1203. /**
  1204. * Retrieves the tokens from the store for a given key.
  1205. *
  1206. * @param {Object} id The key to lookup and retrieve from the store.
  1207. * @returns {Object}
  1208. * @memberOf Store
  1209. */
  1210. lunr.Store.prototype.get = function (id) {
  1211. return this.store[id]
  1212. }
  1213. /**
  1214. * Checks whether the store contains a key.
  1215. *
  1216. * @param {Object} id The id to look up in the store.
  1217. * @returns {Boolean}
  1218. * @memberOf Store
  1219. */
  1220. lunr.Store.prototype.has = function (id) {
  1221. return id in this.store
  1222. }
  1223. /**
  1224. * Removes the value for a key in the store.
  1225. *
  1226. * @param {Object} id The id to remove from the store.
  1227. * @memberOf Store
  1228. */
  1229. lunr.Store.prototype.remove = function (id) {
  1230. if (!this.has(id)) return
  1231. delete this.store[id]
  1232. this.length--
  1233. }
  1234. /**
  1235. * Returns a representation of the store ready for serialisation.
  1236. *
  1237. * @returns {Object}
  1238. * @memberOf Store
  1239. */
  1240. lunr.Store.prototype.toJSON = function () {
  1241. return {
  1242. store: this.store,
  1243. length: this.length
  1244. }
  1245. }
  1246. /*!
  1247. * lunr.stemmer
  1248. * Copyright (C) 2017 Oliver Nightingale
  1249. * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
  1250. */
  1251. /**
  1252. * lunr.stemmer is an english language stemmer, this is a JavaScript
  1253. * implementation of the PorterStemmer taken from http://tartarus.org/~martin
  1254. *
  1255. * @module
  1256. * @param {String} str The string to stem
  1257. * @returns {String}
  1258. * @see lunr.Pipeline
  1259. */
  1260. lunr.stemmer = (function(){
  1261. var step2list = {
  1262. "ational" : "ate",
  1263. "tional" : "tion",
  1264. "enci" : "ence",
  1265. "anci" : "ance",
  1266. "izer" : "ize",
  1267. "bli" : "ble",
  1268. "alli" : "al",
  1269. "entli" : "ent",
  1270. "eli" : "e",
  1271. "ousli" : "ous",
  1272. "ization" : "ize",
  1273. "ation" : "ate",
  1274. "ator" : "ate",
  1275. "alism" : "al",
  1276. "iveness" : "ive",
  1277. "fulness" : "ful",
  1278. "ousness" : "ous",
  1279. "aliti" : "al",
  1280. "iviti" : "ive",
  1281. "biliti" : "ble",
  1282. "logi" : "log"
  1283. },
  1284. step3list = {
  1285. "icate" : "ic",
  1286. "ative" : "",
  1287. "alize" : "al",
  1288. "iciti" : "ic",
  1289. "ical" : "ic",
  1290. "ful" : "",
  1291. "ness" : ""
  1292. },
  1293. c = "[^aeiou]", // consonant
  1294. v = "[aeiouy]", // vowel
  1295. C = c + "[^aeiouy]*", // consonant sequence
  1296. V = v + "[aeiou]*", // vowel sequence
  1297. mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
  1298. meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
  1299. mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
  1300. s_v = "^(" + C + ")?" + v; // vowel in stem
  1301. var re_mgr0 = new RegExp(mgr0);
  1302. var re_mgr1 = new RegExp(mgr1);
  1303. var re_meq1 = new RegExp(meq1);
  1304. var re_s_v = new RegExp(s_v);
  1305. var re_1a = /^(.+?)(ss|i)es$/;
  1306. var re2_1a = /^(.+?)([^s])s$/;
  1307. var re_1b = /^(.+?)eed$/;
  1308. var re2_1b = /^(.+?)(ed|ing)$/;
  1309. var re_1b_2 = /.$/;
  1310. var re2_1b_2 = /(at|bl|iz)$/;
  1311. var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$");
  1312. var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$");
  1313. var re_1c = /^(.+?[^aeiou])y$/;
  1314. var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
  1315. var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
  1316. var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
  1317. var re2_4 = /^(.+?)(s|t)(ion)$/;
  1318. var re_5 = /^(.+?)e$/;
  1319. var re_5_1 = /ll$/;
  1320. var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$");
  1321. var porterStemmer = function porterStemmer(w) {
  1322. var stem,
  1323. suffix,
  1324. firstch,
  1325. re,
  1326. re2,
  1327. re3,
  1328. re4;
  1329. if (w.length < 3) { return w; }
  1330. firstch = w.substr(0,1);
  1331. if (firstch == "y") {
  1332. w = firstch.toUpperCase() + w.substr(1);
  1333. }
  1334. // Step 1a
  1335. re = re_1a
  1336. re2 = re2_1a;
  1337. if (re.test(w)) { w = w.replace(re,"$1$2"); }
  1338. else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
  1339. // Step 1b
  1340. re = re_1b;
  1341. re2 = re2_1b;
  1342. if (re.test(w)) {
  1343. var fp = re.exec(w);
  1344. re = re_mgr0;
  1345. if (re.test(fp[1])) {
  1346. re = re_1b_2;
  1347. w = w.replace(re,"");
  1348. }
  1349. } else if (re2.test(w)) {
  1350. var fp = re2.exec(w);
  1351. stem = fp[1];
  1352. re2 = re_s_v;
  1353. if (re2.test(stem)) {
  1354. w = stem;
  1355. re2 = re2_1b_2;
  1356. re3 = re3_1b_2;
  1357. re4 = re4_1b_2;
  1358. if (re2.test(w)) { w = w + "e"; }
  1359. else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); }
  1360. else if (re4.test(w)) { w = w + "e"; }
  1361. }
  1362. }
  1363. // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
  1364. re = re_1c;
  1365. if (re.test(w)) {
  1366. var fp = re.exec(w);
  1367. stem = fp[1];
  1368. w = stem + "i";
  1369. }
  1370. // Step 2
  1371. re = re_2;
  1372. if (re.test(w)) {
  1373. var fp = re.exec(w);
  1374. stem = fp[1];
  1375. suffix = fp[2];
  1376. re = re_mgr0;
  1377. if (re.test(stem)) {
  1378. w = stem + step2list[suffix];
  1379. }
  1380. }
  1381. // Step 3
  1382. re = re_3;
  1383. if (re.test(w)) {
  1384. var fp = re.exec(w);
  1385. stem = fp[1];
  1386. suffix = fp[2];
  1387. re = re_mgr0;
  1388. if (re.test(stem)) {
  1389. w = stem + step3list[suffix];
  1390. }
  1391. }
  1392. // Step 4
  1393. re = re_4;
  1394. re2 = re2_4;
  1395. if (re.test(w)) {
  1396. var fp = re.exec(w);
  1397. stem = fp[1];
  1398. re = re_mgr1;
  1399. if (re.test(stem)) {
  1400. w = stem;
  1401. }
  1402. } else if (re2.test(w)) {
  1403. var fp = re2.exec(w);
  1404. stem = fp[1] + fp[2];
  1405. re2 = re_mgr1;
  1406. if (re2.test(stem)) {
  1407. w = stem;
  1408. }
  1409. }
  1410. // Step 5
  1411. re = re_5;
  1412. if (re.test(w)) {
  1413. var fp = re.exec(w);
  1414. stem = fp[1];
  1415. re = re_mgr1;
  1416. re2 = re_meq1;
  1417. re3 = re3_5;
  1418. if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
  1419. w = stem;
  1420. }
  1421. }
  1422. re = re_5_1;
  1423. re2 = re_mgr1;
  1424. if (re.test(w) && re2.test(w)) {
  1425. re = re_1b_2;
  1426. w = w.replace(re,"");
  1427. }
  1428. // and turn initial Y back to y
  1429. if (firstch == "y") {
  1430. w = firstch.toLowerCase() + w.substr(1);
  1431. }
  1432. return w;
  1433. };
  1434. return porterStemmer;
  1435. })();
  1436. lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
  1437. /*!
  1438. * lunr.stopWordFilter
  1439. * Copyright (C) 2017 Oliver Nightingale
  1440. */
  1441. /**
  1442. * lunr.generateStopWordFilter builds a stopWordFilter function from the provided
  1443. * list of stop words.
  1444. *
  1445. * The built in lunr.stopWordFilter is built using this generator and can be used
  1446. * to generate custom stopWordFilters for applications or non English languages.
  1447. *
  1448. * @module
  1449. * @param {Array} token The token to pass through the filter
  1450. * @returns {Function}
  1451. * @see lunr.Pipeline
  1452. * @see lunr.stopWordFilter
  1453. */
  1454. lunr.generateStopWordFilter = function (stopWords) {
  1455. var words = stopWords.reduce(function (memo, stopWord) {
  1456. memo[stopWord] = stopWord
  1457. return memo
  1458. }, {})
  1459. return function (token) {
  1460. if (token && words[token] !== token) return token
  1461. }
  1462. }
  1463. /**
  1464. * lunr.stopWordFilter is an English language stop word list filter, any words
  1465. * contained in the list will not be passed through the filter.
  1466. *
  1467. * This is intended to be used in the Pipeline. If the token does not pass the
  1468. * filter then undefined will be returned.
  1469. *
  1470. * @module
  1471. * @param {String} token The token to pass through the filter
  1472. * @returns {String}
  1473. * @see lunr.Pipeline
  1474. */
  1475. lunr.stopWordFilter = lunr.generateStopWordFilter([
  1476. 'a',
  1477. 'able',
  1478. 'about',
  1479. 'across',
  1480. 'after',
  1481. 'all',
  1482. 'almost',
  1483. 'also',
  1484. 'am',
  1485. 'among',
  1486. 'an',
  1487. 'and',
  1488. 'any',
  1489. 'are',
  1490. 'as',
  1491. 'at',
  1492. 'be',
  1493. 'because',
  1494. 'been',
  1495. 'but',
  1496. 'by',
  1497. 'can',
  1498. 'cannot',
  1499. 'could',
  1500. 'dear',
  1501. 'did',
  1502. 'do',
  1503. 'does',
  1504. 'either',
  1505. 'else',
  1506. 'ever',
  1507. 'every',
  1508. 'for',
  1509. 'from',
  1510. 'get',
  1511. 'got',
  1512. 'had',
  1513. 'has',
  1514. 'have',
  1515. 'he',
  1516. 'her',
  1517. 'hers',
  1518. 'him',
  1519. 'his',
  1520. 'how',
  1521. 'however',
  1522. 'i',
  1523. 'if',
  1524. 'in',
  1525. 'into',
  1526. 'is',
  1527. 'it',
  1528. 'its',
  1529. 'just',
  1530. 'least',
  1531. 'let',
  1532. 'like',
  1533. 'likely',
  1534. 'may',
  1535. 'me',
  1536. 'might',
  1537. 'most',
  1538. 'must',
  1539. 'my',
  1540. 'neither',
  1541. 'no',
  1542. 'nor',
  1543. 'not',
  1544. 'of',
  1545. 'off',
  1546. 'often',
  1547. 'on',
  1548. 'only',
  1549. 'or',
  1550. 'other',
  1551. 'our',
  1552. 'own',
  1553. 'rather',
  1554. 'said',
  1555. 'say',
  1556. 'says',
  1557. 'she',
  1558. 'should',
  1559. 'since',
  1560. 'so',
  1561. 'some',
  1562. 'than',
  1563. 'that',
  1564. 'the',
  1565. 'their',
  1566. 'them',
  1567. 'then',
  1568. 'there',
  1569. 'these',
  1570. 'they',
  1571. 'this',
  1572. 'tis',
  1573. 'to',
  1574. 'too',
  1575. 'twas',
  1576. 'us',
  1577. 'wants',
  1578. 'was',
  1579. 'we',
  1580. 'were',
  1581. 'what',
  1582. 'when',
  1583. 'where',
  1584. 'which',
  1585. 'while',
  1586. 'who',
  1587. 'whom',
  1588. 'why',
  1589. 'will',
  1590. 'with',
  1591. 'would',
  1592. 'yet',
  1593. 'you',
  1594. 'your'
  1595. ])
  1596. lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')
  1597. /*!
  1598. * lunr.trimmer
  1599. * Copyright (C) 2017 Oliver Nightingale
  1600. */
  1601. /**
  1602. * lunr.trimmer is a pipeline function for trimming non word
  1603. * characters from the begining and end of tokens before they
  1604. * enter the index.
  1605. *
  1606. * This implementation may not work correctly for non latin
  1607. * characters and should either be removed or adapted for use
  1608. * with languages with non-latin characters.
  1609. *
  1610. * @module
  1611. * @param {String} token The token to pass through the filter
  1612. * @returns {String}
  1613. * @see lunr.Pipeline
  1614. */
  1615. lunr.trimmer = function (token) {
  1616. return token.replace(/^\W+/, '').replace(/\W+$/, '')
  1617. }
  1618. lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')
  1619. /*!
  1620. * lunr.stemmer
  1621. * Copyright (C) 2017 Oliver Nightingale
  1622. * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
  1623. */
  1624. /**
  1625. * lunr.TokenStore is used for efficient storing and lookup of the reverse
  1626. * index of token to document ref.
  1627. *
  1628. * @constructor
  1629. */
  1630. lunr.TokenStore = function () {
  1631. this.root = { docs: {} }
  1632. this.length = 0
  1633. }
  1634. /**
  1635. * Loads a previously serialised token store
  1636. *
  1637. * @param {Object} serialisedData The serialised token store to load.
  1638. * @returns {lunr.TokenStore}
  1639. * @memberOf TokenStore
  1640. */
  1641. lunr.TokenStore.load = function (serialisedData) {
  1642. var store = new this
  1643. store.root = serialisedData.root
  1644. store.length = serialisedData.length
  1645. return store
  1646. }
  1647. /**
  1648. * Adds a new token doc pair to the store.
  1649. *
  1650. * By default this function starts at the root of the current store, however
  1651. * it can start at any node of any token store if required.
  1652. *
  1653. * @param {String} token The token to store the doc under
  1654. * @param {Object} doc The doc to store against the token
  1655. * @param {Object} root An optional node at which to start looking for the
  1656. * correct place to enter the doc, by default the root of this lunr.TokenStore
  1657. * is used.
  1658. * @memberOf TokenStore
  1659. */
  1660. lunr.TokenStore.prototype.add = function (token, doc, root) {
  1661. var root = root || this.root,
  1662. key = token.charAt(0),
  1663. rest = token.slice(1)
  1664. if (!(key in root)) root[key] = {docs: {}}
  1665. if (rest.length === 0) {
  1666. root[key].docs[doc.ref] = doc
  1667. this.length += 1
  1668. return
  1669. } else {
  1670. return this.add(rest, doc, root[key])
  1671. }
  1672. }
  1673. /**
  1674. * Checks whether this key is contained within this lunr.TokenStore.
  1675. *
  1676. * By default this function starts at the root of the current store, however
  1677. * it can start at any node of any token store if required.
  1678. *
  1679. * @param {String} token The token to check for
  1680. * @param {Object} root An optional node at which to start
  1681. * @memberOf TokenStore
  1682. */
  1683. lunr.TokenStore.prototype.has = function (token) {
  1684. if (!token) return false
  1685. var node = this.root
  1686. for (var i = 0; i < token.length; i++) {
  1687. if (!node[token.charAt(i)]) return false
  1688. node = node[token.charAt(i)]
  1689. }
  1690. return true
  1691. }
  1692. /**
  1693. * Retrieve a node from the token store for a given token.
  1694. *
  1695. * By default this function starts at the root of the current store, however
  1696. * it can start at any node of any token store if required.
  1697. *
  1698. * @param {String} token The token to get the node for.
  1699. * @param {Object} root An optional node at which to start.
  1700. * @returns {Object}
  1701. * @see TokenStore.prototype.get
  1702. * @memberOf TokenStore
  1703. */
  1704. lunr.TokenStore.prototype.getNode = function (token) {
  1705. if (!token) return {}
  1706. var node = this.root
  1707. for (var i = 0; i < token.length; i++) {
  1708. if (!node[token.charAt(i)]) return {}
  1709. node = node[token.charAt(i)]
  1710. }
  1711. return node
  1712. }
  1713. /**
  1714. * Retrieve the documents for a node for the given token.
  1715. *
  1716. * By default this function starts at the root of the current store, however
  1717. * it can start at any node of any token store if required.
  1718. *
  1719. * @param {String} token The token to get the documents for.
  1720. * @param {Object} root An optional node at which to start.
  1721. * @returns {Object}
  1722. * @memberOf TokenStore
  1723. */
  1724. lunr.TokenStore.prototype.get = function (token, root) {
  1725. return this.getNode(token, root).docs || {}
  1726. }
  1727. lunr.TokenStore.prototype.count = function (token, root) {
  1728. return Object.keys(this.get(token, root)).length
  1729. }
  1730. /**
  1731. * Remove the document identified by ref from the token in the store.
  1732. *
  1733. * By default this function starts at the root of the current store, however
  1734. * it can start at any node of any token store if required.
  1735. *
  1736. * @param {String} token The token to get the documents for.
  1737. * @param {String} ref The ref of the document to remove from this token.
  1738. * @param {Object} root An optional node at which to start.
  1739. * @returns {Object}
  1740. * @memberOf TokenStore
  1741. */
  1742. lunr.TokenStore.prototype.remove = function (token, ref) {
  1743. if (!token) return
  1744. var node = this.root
  1745. for (var i = 0; i < token.length; i++) {
  1746. if (!(token.charAt(i) in node)) return
  1747. node = node[token.charAt(i)]
  1748. }
  1749. delete node.docs[ref]
  1750. }
  1751. /**
  1752. * Find all the possible suffixes of the passed token using tokens
  1753. * currently in the store.
  1754. *
  1755. * @param {String} token The token to expand.
  1756. * @returns {Array}
  1757. * @memberOf TokenStore
  1758. */
  1759. lunr.TokenStore.prototype.expand = function (token, memo) {
  1760. var root = this.getNode(token),
  1761. docs = root.docs || {},
  1762. memo = memo || []
  1763. if (Object.keys(docs).length) memo.push(token)
  1764. Object.keys(root)
  1765. .forEach(function (key) {
  1766. if (key === 'docs') return
  1767. memo.concat(this.expand(token + key, memo))
  1768. }, this)
  1769. return memo
  1770. }
  1771. /**
  1772. * Returns a representation of the token store ready for serialisation.
  1773. *
  1774. * @returns {Object}
  1775. * @memberOf TokenStore
  1776. */
  1777. lunr.TokenStore.prototype.toJSON = function () {
  1778. return {
  1779. root: this.root,
  1780. length: this.length
  1781. }
  1782. }
  1783. /**
  1784. * export the module via AMD, CommonJS or as a browser global
  1785. * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
  1786. */
  1787. ;(function (root, factory) {
  1788. if (typeof define === 'function' && define.amd) {
  1789. // AMD. Register as an anonymous module.
  1790. define(factory)
  1791. } else if (typeof exports === 'object') {
  1792. /**
  1793. * Node. Does not work with strict CommonJS, but
  1794. * only CommonJS-like enviroments that support module.exports,
  1795. * like Node.
  1796. */
  1797. module.exports = factory()
  1798. } else {
  1799. // Browser globals (root is window)
  1800. root.lunr = factory()
  1801. }
  1802. }(this, function () {
  1803. /**
  1804. * Just return a value to define the module export.
  1805. * This example returns an object, but the module
  1806. * can return a function as the exported value.
  1807. */
  1808. return lunr
  1809. }))
  1810. })();