math.js 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import { cosine } from "./ml-distance/similarities.js";
  2. import { innerProduct as innerProductDistance } from "./ml-distance/distances.js";
  3. import { euclidean } from "./ml-distance-euclidean/euclidean.js";
  4. /**
  5. * Apply a row-wise function between two matrices with the same number of columns.
  6. *
  7. * @param {number[][]} X - The first matrix.
  8. * @param {number[][]} Y - The second matrix.
  9. * @param {VectorFunction} func - The function to apply.
  10. *
  11. * @throws {Error} If the number of columns in X and Y are not the same.
  12. *
  13. * @returns {number[][] | [[]]} A matrix where each row represents the result of applying the function between the corresponding rows of X and Y.
  14. */
  15. export function matrixFunc(X, Y, func) {
  16. if (X.length === 0 ||
  17. X[0].length === 0 ||
  18. Y.length === 0 ||
  19. Y[0].length === 0) {
  20. return [[]];
  21. }
  22. if (X[0].length !== Y[0].length) {
  23. throw new Error(`Number of columns in X and Y must be the same. X has shape ${[
  24. X.length,
  25. X[0].length,
  26. ]} and Y has shape ${[Y.length, Y[0].length]}.`);
  27. }
  28. return X.map((xVector) => Y.map((yVector) => func(xVector, yVector)).map((similarity) => Number.isNaN(similarity) ? 0 : similarity));
  29. }
  30. export function normalize(M, similarity = false) {
  31. const max = matrixMaxVal(M);
  32. return M.map((row) => row.map((val) => (similarity ? 1 - val / max : val / max)));
  33. }
  34. /**
  35. * This function calculates the row-wise cosine similarity between two matrices with the same number of columns.
  36. *
  37. * @param {number[][]} X - The first matrix.
  38. * @param {number[][]} Y - The second matrix.
  39. *
  40. * @throws {Error} If the number of columns in X and Y are not the same.
  41. *
  42. * @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
  43. */
  44. export function cosineSimilarity(X, Y) {
  45. return matrixFunc(X, Y, cosine);
  46. }
  47. export function innerProduct(X, Y) {
  48. return matrixFunc(X, Y, innerProductDistance);
  49. }
  50. export function euclideanDistance(X, Y) {
  51. return matrixFunc(X, Y, euclidean);
  52. }
  53. /**
  54. * This function implements the Maximal Marginal Relevance algorithm
  55. * to select a set of embeddings that maximizes the diversity and relevance to a query embedding.
  56. *
  57. * @param {number[]|number[][]} queryEmbedding - The query embedding.
  58. * @param {number[][]} embeddingList - The list of embeddings to select from.
  59. * @param {number} [lambda=0.5] - The trade-off parameter between relevance and diversity.
  60. * @param {number} [k=4] - The maximum number of embeddings to select.
  61. *
  62. * @returns {number[]} The indexes of the selected embeddings in the embeddingList.
  63. */
  64. export function maximalMarginalRelevance(queryEmbedding, embeddingList, lambda = 0.5, k = 4) {
  65. if (Math.min(k, embeddingList.length) <= 0) {
  66. return [];
  67. }
  68. const queryEmbeddingExpanded = (Array.isArray(queryEmbedding[0]) ? queryEmbedding : [queryEmbedding]);
  69. const similarityToQuery = cosineSimilarity(queryEmbeddingExpanded, embeddingList)[0];
  70. const mostSimilarEmbeddingIndex = argMax(similarityToQuery).maxIndex;
  71. const selectedEmbeddings = [embeddingList[mostSimilarEmbeddingIndex]];
  72. const selectedEmbeddingsIndexes = [mostSimilarEmbeddingIndex];
  73. while (selectedEmbeddingsIndexes.length < Math.min(k, embeddingList.length)) {
  74. let bestScore = -Infinity;
  75. let bestIndex = -1;
  76. const similarityToSelected = cosineSimilarity(embeddingList, selectedEmbeddings);
  77. similarityToQuery.forEach((queryScore, queryScoreIndex) => {
  78. if (selectedEmbeddingsIndexes.includes(queryScoreIndex)) {
  79. return;
  80. }
  81. const maxSimilarityToSelected = Math.max(...similarityToSelected[queryScoreIndex]);
  82. const score = lambda * queryScore - (1 - lambda) * maxSimilarityToSelected;
  83. if (score > bestScore) {
  84. bestScore = score;
  85. bestIndex = queryScoreIndex;
  86. }
  87. });
  88. selectedEmbeddings.push(embeddingList[bestIndex]);
  89. selectedEmbeddingsIndexes.push(bestIndex);
  90. }
  91. return selectedEmbeddingsIndexes;
  92. }
  93. /**
  94. * Finds the index of the maximum value in the given array.
  95. * @param {number[]} array - The input array.
  96. *
  97. * @returns {number} The index of the maximum value in the array. If the array is empty, returns -1.
  98. */
  99. function argMax(array) {
  100. if (array.length === 0) {
  101. return {
  102. maxIndex: -1,
  103. maxValue: NaN,
  104. };
  105. }
  106. let maxValue = array[0];
  107. let maxIndex = 0;
  108. for (let i = 1; i < array.length; i += 1) {
  109. if (array[i] > maxValue) {
  110. maxIndex = i;
  111. maxValue = array[i];
  112. }
  113. }
  114. return { maxIndex, maxValue };
  115. }
  116. function matrixMaxVal(arrays) {
  117. return arrays.reduce((acc, array) => Math.max(acc, argMax(array).maxValue), 0);
  118. }