vector.js 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. /*!
  2. * lunr.Vector
  3. * Copyright (C) @YEAR Oliver Nightingale
  4. */
  5. /**
  6. * A vector is used to construct the vector space of documents and queries. These
  7. * vectors support operations to determine the similarity between two documents or
  8. * a document and a query.
  9. *
  10. * Normally no parameters are required for initializing a vector, but in the case of
  11. * loading a previously dumped vector the raw elements can be provided to the constructor.
  12. *
  13. * For performance reasons vectors are implemented with a flat array, where an elements
  14. * index is immediately followed by its value. E.g. [index, value, index, value]. This
  15. * allows the underlying array to be as sparse as possible and still offer decent
  16. * performance when being used for vector calculations.
  17. *
  18. * @constructor
  19. * @param {Number[]} [elements] - The flat list of element index and element value pairs.
  20. */
  21. lunr.Vector = function (elements) {
  22. this._magnitude = 0
  23. this.elements = elements || []
  24. }
  25. /**
  26. * Calculates the position within the vector to insert a given index.
  27. *
  28. * This is used internally by insert and upsert. If there are duplicate indexes then
  29. * the position is returned as if the value for that index were to be updated, but it
  30. * is the callers responsibility to check whether there is a duplicate at that index
  31. *
  32. * @param {Number} insertIdx - The index at which the element should be inserted.
  33. * @returns {Number}
  34. */
  35. lunr.Vector.prototype.positionForIndex = function (index) {
  36. // For an empty vector the tuple can be inserted at the beginning
  37. if (this.elements.length == 0) {
  38. return 0
  39. }
  40. var start = 0,
  41. end = this.elements.length / 2,
  42. sliceLength = end - start,
  43. pivotPoint = Math.floor(sliceLength / 2),
  44. pivotIndex = this.elements[pivotPoint * 2]
  45. while (sliceLength > 1) {
  46. if (pivotIndex < index) {
  47. start = pivotPoint
  48. }
  49. if (pivotIndex > index) {
  50. end = pivotPoint
  51. }
  52. if (pivotIndex == index) {
  53. break
  54. }
  55. sliceLength = end - start
  56. pivotPoint = start + Math.floor(sliceLength / 2)
  57. pivotIndex = this.elements[pivotPoint * 2]
  58. }
  59. if (pivotIndex == index) {
  60. return pivotPoint * 2
  61. }
  62. if (pivotIndex > index) {
  63. return pivotPoint * 2
  64. }
  65. if (pivotIndex < index) {
  66. return (pivotPoint + 1) * 2
  67. }
  68. }
  69. /**
  70. * Inserts an element at an index within the vector.
  71. *
  72. * Does not allow duplicates, will throw an error if there is already an entry
  73. * for this index.
  74. *
  75. * @param {Number} insertIdx - The index at which the element should be inserted.
  76. * @param {Number} val - The value to be inserted into the vector.
  77. */
  78. lunr.Vector.prototype.insert = function (insertIdx, val) {
  79. this.upsert(insertIdx, val, function () {
  80. throw "duplicate index"
  81. })
  82. }
  83. /**
  84. * Inserts or updates an existing index within the vector.
  85. *
  86. * @param {Number} insertIdx - The index at which the element should be inserted.
  87. * @param {Number} val - The value to be inserted into the vector.
  88. * @param {function} fn - A function that is called for updates, the existing value and the
  89. * requested value are passed as arguments
  90. */
  91. lunr.Vector.prototype.upsert = function (insertIdx, val, fn) {
  92. this._magnitude = 0
  93. var position = this.positionForIndex(insertIdx)
  94. if (this.elements[position] == insertIdx) {
  95. this.elements[position + 1] = fn(this.elements[position + 1], val)
  96. } else {
  97. this.elements.splice(position, 0, insertIdx, val)
  98. }
  99. }
  100. /**
  101. * Calculates the magnitude of this vector.
  102. *
  103. * @returns {Number}
  104. */
  105. lunr.Vector.prototype.magnitude = function () {
  106. if (this._magnitude) return this._magnitude
  107. var sumOfSquares = 0,
  108. elementsLength = this.elements.length
  109. for (var i = 1; i < elementsLength; i += 2) {
  110. var val = this.elements[i]
  111. sumOfSquares += val * val
  112. }
  113. return this._magnitude = Math.sqrt(sumOfSquares)
  114. }
  115. /**
  116. * Calculates the dot product of this vector and another vector.
  117. *
  118. * @param {lunr.Vector} otherVector - The vector to compute the dot product with.
  119. * @returns {Number}
  120. */
  121. lunr.Vector.prototype.dot = function (otherVector) {
  122. var dotProduct = 0,
  123. a = this.elements, b = otherVector.elements,
  124. aLen = a.length, bLen = b.length,
  125. aVal = 0, bVal = 0,
  126. i = 0, j = 0
  127. while (i < aLen && j < bLen) {
  128. aVal = a[i], bVal = b[j]
  129. if (aVal < bVal) {
  130. i += 2
  131. } else if (aVal > bVal) {
  132. j += 2
  133. } else if (aVal == bVal) {
  134. dotProduct += a[i + 1] * b[j + 1]
  135. i += 2
  136. j += 2
  137. }
  138. }
  139. return dotProduct
  140. }
  141. /**
  142. * Calculates the similarity between this vector and another vector.
  143. *
  144. * @param {lunr.Vector} otherVector - The other vector to calculate the
  145. * similarity with.
  146. * @returns {Number}
  147. */
  148. lunr.Vector.prototype.similarity = function (otherVector) {
  149. return this.dot(otherVector) / this.magnitude() || 0
  150. }
  151. /**
  152. * Converts the vector to an array of the elements within the vector.
  153. *
  154. * @returns {Number[]}
  155. */
  156. lunr.Vector.prototype.toArray = function () {
  157. var output = new Array (this.elements.length / 2)
  158. for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {
  159. output[j] = this.elements[i]
  160. }
  161. return output
  162. }
  163. /**
  164. * A JSON serializable representation of the vector.
  165. *
  166. * @returns {Number[]}
  167. */
  168. lunr.Vector.prototype.toJSON = function () {
  169. return this.elements
  170. }