math.cjs 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.maximalMarginalRelevance = exports.euclideanDistance = exports.innerProduct = exports.cosineSimilarity = exports.normalize = exports.matrixFunc = void 0;
  4. const similarities_js_1 = require("./ml-distance/similarities.cjs");
  5. const distances_js_1 = require("./ml-distance/distances.cjs");
  6. const euclidean_js_1 = require("./ml-distance-euclidean/euclidean.cjs");
  7. /**
  8. * Apply a row-wise function between two matrices with the same number of columns.
  9. *
  10. * @param {number[][]} X - The first matrix.
  11. * @param {number[][]} Y - The second matrix.
  12. * @param {VectorFunction} func - The function to apply.
  13. *
  14. * @throws {Error} If the number of columns in X and Y are not the same.
  15. *
  16. * @returns {number[][] | [[]]} A matrix where each row represents the result of applying the function between the corresponding rows of X and Y.
  17. */
  18. function matrixFunc(X, Y, func) {
  19. if (X.length === 0 ||
  20. X[0].length === 0 ||
  21. Y.length === 0 ||
  22. Y[0].length === 0) {
  23. return [[]];
  24. }
  25. if (X[0].length !== Y[0].length) {
  26. throw new Error(`Number of columns in X and Y must be the same. X has shape ${[
  27. X.length,
  28. X[0].length,
  29. ]} and Y has shape ${[Y.length, Y[0].length]}.`);
  30. }
  31. return X.map((xVector) => Y.map((yVector) => func(xVector, yVector)).map((similarity) => Number.isNaN(similarity) ? 0 : similarity));
  32. }
  33. exports.matrixFunc = matrixFunc;
  34. function normalize(M, similarity = false) {
  35. const max = matrixMaxVal(M);
  36. return M.map((row) => row.map((val) => (similarity ? 1 - val / max : val / max)));
  37. }
  38. exports.normalize = normalize;
  39. /**
  40. * This function calculates the row-wise cosine similarity between two matrices with the same number of columns.
  41. *
  42. * @param {number[][]} X - The first matrix.
  43. * @param {number[][]} Y - The second matrix.
  44. *
  45. * @throws {Error} If the number of columns in X and Y are not the same.
  46. *
  47. * @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
  48. */
  49. function cosineSimilarity(X, Y) {
  50. return matrixFunc(X, Y, similarities_js_1.cosine);
  51. }
  52. exports.cosineSimilarity = cosineSimilarity;
  53. function innerProduct(X, Y) {
  54. return matrixFunc(X, Y, distances_js_1.innerProduct);
  55. }
  56. exports.innerProduct = innerProduct;
  57. function euclideanDistance(X, Y) {
  58. return matrixFunc(X, Y, euclidean_js_1.euclidean);
  59. }
  60. exports.euclideanDistance = euclideanDistance;
  61. /**
  62. * This function implements the Maximal Marginal Relevance algorithm
  63. * to select a set of embeddings that maximizes the diversity and relevance to a query embedding.
  64. *
  65. * @param {number[]|number[][]} queryEmbedding - The query embedding.
  66. * @param {number[][]} embeddingList - The list of embeddings to select from.
  67. * @param {number} [lambda=0.5] - The trade-off parameter between relevance and diversity.
  68. * @param {number} [k=4] - The maximum number of embeddings to select.
  69. *
  70. * @returns {number[]} The indexes of the selected embeddings in the embeddingList.
  71. */
  72. function maximalMarginalRelevance(queryEmbedding, embeddingList, lambda = 0.5, k = 4) {
  73. if (Math.min(k, embeddingList.length) <= 0) {
  74. return [];
  75. }
  76. const queryEmbeddingExpanded = (Array.isArray(queryEmbedding[0]) ? queryEmbedding : [queryEmbedding]);
  77. const similarityToQuery = cosineSimilarity(queryEmbeddingExpanded, embeddingList)[0];
  78. const mostSimilarEmbeddingIndex = argMax(similarityToQuery).maxIndex;
  79. const selectedEmbeddings = [embeddingList[mostSimilarEmbeddingIndex]];
  80. const selectedEmbeddingsIndexes = [mostSimilarEmbeddingIndex];
  81. while (selectedEmbeddingsIndexes.length < Math.min(k, embeddingList.length)) {
  82. let bestScore = -Infinity;
  83. let bestIndex = -1;
  84. const similarityToSelected = cosineSimilarity(embeddingList, selectedEmbeddings);
  85. similarityToQuery.forEach((queryScore, queryScoreIndex) => {
  86. if (selectedEmbeddingsIndexes.includes(queryScoreIndex)) {
  87. return;
  88. }
  89. const maxSimilarityToSelected = Math.max(...similarityToSelected[queryScoreIndex]);
  90. const score = lambda * queryScore - (1 - lambda) * maxSimilarityToSelected;
  91. if (score > bestScore) {
  92. bestScore = score;
  93. bestIndex = queryScoreIndex;
  94. }
  95. });
  96. selectedEmbeddings.push(embeddingList[bestIndex]);
  97. selectedEmbeddingsIndexes.push(bestIndex);
  98. }
  99. return selectedEmbeddingsIndexes;
  100. }
  101. exports.maximalMarginalRelevance = maximalMarginalRelevance;
  102. /**
  103. * Finds the index of the maximum value in the given array.
  104. * @param {number[]} array - The input array.
  105. *
  106. * @returns {number} The index of the maximum value in the array. If the array is empty, returns -1.
  107. */
  108. function argMax(array) {
  109. if (array.length === 0) {
  110. return {
  111. maxIndex: -1,
  112. maxValue: NaN,
  113. };
  114. }
  115. let maxValue = array[0];
  116. let maxIndex = 0;
  117. for (let i = 1; i < array.length; i += 1) {
  118. if (array[i] > maxValue) {
  119. maxIndex = i;
  120. maxValue = array[i];
  121. }
  122. }
  123. return { maxIndex, maxValue };
  124. }
  125. function matrixMaxVal(arrays) {
  126. return arrays.reduce((acc, array) => Math.max(acc, argMax(array).maxValue), 0);
  127. }