pipeline.js 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. /*!
  2. * lunr.Pipeline
  3. * Copyright (C) @YEAR Oliver Nightingale
  4. */
  5. /**
  6. * lunr.Pipelines maintain an ordered list of functions to be applied to all
  7. * tokens in documents entering the search index and queries being ran against
  8. * the index.
  9. *
  10. * An instance of lunr.Index created with the lunr shortcut will contain a
  11. * pipeline with a stop word filter and an English language stemmer. Extra
  12. * functions can be added before or after either of these functions or these
  13. * default functions can be removed.
  14. *
  15. * When run the pipeline will call each function in turn, passing a token, the
  16. * index of that token in the original list of all tokens and finally a list of
  17. * all the original tokens.
  18. *
  19. * The output of functions in the pipeline will be passed to the next function
  20. * in the pipeline. To exclude a token from entering the index the function
  21. * should return undefined, the rest of the pipeline will not be called with
  22. * this token.
  23. *
  24. * For serialisation of pipelines to work, all functions used in an instance of
  25. * a pipeline should be registered with lunr.Pipeline. Registered functions can
  26. * then be loaded. If trying to load a serialised pipeline that uses functions
  27. * that are not registered an error will be thrown.
  28. *
  29. * If not planning on serialising the pipeline then registering pipeline functions
  30. * is not necessary.
  31. *
  32. * @constructor
  33. */
  34. lunr.Pipeline = function () {
  35. this._stack = []
  36. }
  37. lunr.Pipeline.registeredFunctions = Object.create(null)
  38. /**
  39. * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
  40. * string as well as all known metadata. A pipeline function can mutate the token string
  41. * or mutate (or add) metadata for a given token.
  42. *
  43. * A pipeline function can indicate that the passed token should be discarded by returning
  44. * null, undefined or an empty string. This token will not be passed to any downstream pipeline
  45. * functions and will not be added to the index.
  46. *
  47. * Multiple tokens can be returned by returning an array of tokens. Each token will be passed
  48. * to any downstream pipeline functions and all will returned tokens will be added to the index.
  49. *
  50. * Any number of pipeline functions may be chained together using a lunr.Pipeline.
  51. *
  52. * @interface lunr.PipelineFunction
  53. * @param {lunr.Token} token - A token from the document being processed.
  54. * @param {number} i - The index of this token in the complete list of tokens for this document/field.
  55. * @param {lunr.Token[]} tokens - All tokens for this document/field.
  56. * @returns {(?lunr.Token|lunr.Token[])}
  57. */
  58. /**
  59. * Register a function with the pipeline.
  60. *
  61. * Functions that are used in the pipeline should be registered if the pipeline
  62. * needs to be serialised, or a serialised pipeline needs to be loaded.
  63. *
  64. * Registering a function does not add it to a pipeline, functions must still be
  65. * added to instances of the pipeline for them to be used when running a pipeline.
  66. *
  67. * @param {lunr.PipelineFunction} fn - The function to check for.
  68. * @param {String} label - The label to register this function with
  69. */
  70. lunr.Pipeline.registerFunction = function (fn, label) {
  71. if (label in this.registeredFunctions) {
  72. lunr.utils.warn('Overwriting existing registered function: ' + label)
  73. }
  74. fn.label = label
  75. lunr.Pipeline.registeredFunctions[fn.label] = fn
  76. }
  77. /**
  78. * Warns if the function is not registered as a Pipeline function.
  79. *
  80. * @param {lunr.PipelineFunction} fn - The function to check for.
  81. * @private
  82. */
  83. lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
  84. var isRegistered = fn.label && (fn.label in this.registeredFunctions)
  85. if (!isRegistered) {
  86. lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn)
  87. }
  88. }
  89. /**
  90. * Loads a previously serialised pipeline.
  91. *
  92. * All functions to be loaded must already be registered with lunr.Pipeline.
  93. * If any function from the serialised data has not been registered then an
  94. * error will be thrown.
  95. *
  96. * @param {Object} serialised - The serialised pipeline to load.
  97. * @returns {lunr.Pipeline}
  98. */
  99. lunr.Pipeline.load = function (serialised) {
  100. var pipeline = new lunr.Pipeline
  101. serialised.forEach(function (fnName) {
  102. var fn = lunr.Pipeline.registeredFunctions[fnName]
  103. if (fn) {
  104. pipeline.add(fn)
  105. } else {
  106. throw new Error('Cannot load unregistered function: ' + fnName)
  107. }
  108. })
  109. return pipeline
  110. }
  111. /**
  112. * Adds new functions to the end of the pipeline.
  113. *
  114. * Logs a warning if the function has not been registered.
  115. *
  116. * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.
  117. */
  118. lunr.Pipeline.prototype.add = function () {
  119. var fns = Array.prototype.slice.call(arguments)
  120. fns.forEach(function (fn) {
  121. lunr.Pipeline.warnIfFunctionNotRegistered(fn)
  122. this._stack.push(fn)
  123. }, this)
  124. }
  125. /**
  126. * Adds a single function after a function that already exists in the
  127. * pipeline.
  128. *
  129. * Logs a warning if the function has not been registered.
  130. *
  131. * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
  132. * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
  133. */
  134. lunr.Pipeline.prototype.after = function (existingFn, newFn) {
  135. lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
  136. var pos = this._stack.indexOf(existingFn)
  137. if (pos == -1) {
  138. throw new Error('Cannot find existingFn')
  139. }
  140. pos = pos + 1
  141. this._stack.splice(pos, 0, newFn)
  142. }
  143. /**
  144. * Adds a single function before a function that already exists in the
  145. * pipeline.
  146. *
  147. * Logs a warning if the function has not been registered.
  148. *
  149. * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
  150. * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
  151. */
  152. lunr.Pipeline.prototype.before = function (existingFn, newFn) {
  153. lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
  154. var pos = this._stack.indexOf(existingFn)
  155. if (pos == -1) {
  156. throw new Error('Cannot find existingFn')
  157. }
  158. this._stack.splice(pos, 0, newFn)
  159. }
  160. /**
  161. * Removes a function from the pipeline.
  162. *
  163. * @param {lunr.PipelineFunction} fn The function to remove from the pipeline.
  164. */
  165. lunr.Pipeline.prototype.remove = function (fn) {
  166. var pos = this._stack.indexOf(fn)
  167. if (pos == -1) {
  168. return
  169. }
  170. this._stack.splice(pos, 1)
  171. }
  172. /**
  173. * Runs the current list of functions that make up the pipeline against the
  174. * passed tokens.
  175. *
  176. * @param {Array} tokens The tokens to run through the pipeline.
  177. * @returns {Array}
  178. */
  179. lunr.Pipeline.prototype.run = function (tokens) {
  180. var stackLength = this._stack.length
  181. for (var i = 0; i < stackLength; i++) {
  182. var fn = this._stack[i]
  183. var memo = []
  184. for (var j = 0; j < tokens.length; j++) {
  185. var result = fn(tokens[j], j, tokens)
  186. if (result === null || result === void 0 || result === '') continue
  187. if (Array.isArray(result)) {
  188. for (var k = 0; k < result.length; k++) {
  189. memo.push(result[k])
  190. }
  191. } else {
  192. memo.push(result)
  193. }
  194. }
  195. tokens = memo
  196. }
  197. return tokens
  198. }
  199. /**
  200. * Convenience method for passing a string through a pipeline and getting
  201. * strings out. This method takes care of wrapping the passed string in a
  202. * token and mapping the resulting tokens back to strings.
  203. *
  204. * @param {string} str - The string to pass through the pipeline.
  205. * @param {?object} metadata - Optional metadata to associate with the token
  206. * passed to the pipeline.
  207. * @returns {string[]}
  208. */
  209. lunr.Pipeline.prototype.runString = function (str, metadata) {
  210. var token = new lunr.Token (str, metadata)
  211. return this.run([token]).map(function (t) {
  212. return t.toString()
  213. })
  214. }
  215. /**
  216. * Resets the pipeline by removing any existing processors.
  217. *
  218. */
  219. lunr.Pipeline.prototype.reset = function () {
  220. this._stack = []
  221. }
  222. /**
  223. * Returns a representation of the pipeline ready for serialisation.
  224. *
  225. * Logs a warning if the function has not been registered.
  226. *
  227. * @returns {Array}
  228. */
  229. lunr.Pipeline.prototype.toJSON = function () {
  230. return this._stack.map(function (fn) {
  231. lunr.Pipeline.warnIfFunctionNotRegistered(fn)
  232. return fn.label
  233. })
  234. }