evaluator.js 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. import { v4 as uuidv4 } from "uuid";
  2. import { traceable } from "../traceable.js";
  3. /**
  4. * Wraps an evaluator function + implements the RunEvaluator interface.
  5. */
  6. export class DynamicRunEvaluator {
  7. constructor(evaluator) {
  8. Object.defineProperty(this, "func", {
  9. enumerable: true,
  10. configurable: true,
  11. writable: true,
  12. value: void 0
  13. });
  14. this.func = ((input) => {
  15. const { run, example } = input.langSmithRunAndExample;
  16. return evaluator({
  17. ...run,
  18. run,
  19. example,
  20. inputs: example?.inputs,
  21. outputs: run?.outputs,
  22. referenceOutputs: example?.outputs,
  23. attachments: example?.attachments,
  24. }, example);
  25. });
  26. }
  27. isEvaluationResults(x) {
  28. return (typeof x === "object" &&
  29. x != null &&
  30. "results" in x &&
  31. Array.isArray(x.results) &&
  32. x.results.length > 0);
  33. }
  34. coerceEvaluationResults(results, sourceRunId) {
  35. if (this.isEvaluationResults(results)) {
  36. return {
  37. results: results.results.map((r) => this.coerceEvaluationResult(r, sourceRunId, false)),
  38. };
  39. }
  40. return this.coerceEvaluationResult(results, sourceRunId, true);
  41. }
  42. coerceEvaluationResult(result, sourceRunId, allowNoKey = false) {
  43. if ("key" in result) {
  44. if (!result.sourceRunId) {
  45. result.sourceRunId = sourceRunId;
  46. }
  47. return result;
  48. }
  49. if (!("key" in result)) {
  50. if (allowNoKey) {
  51. result["key"] = this.func.name;
  52. }
  53. }
  54. return {
  55. sourceRunId,
  56. ...result,
  57. };
  58. }
  59. /**
  60. * Evaluates a run with an optional example and returns the evaluation result.
  61. * @param run The run to evaluate.
  62. * @param example The optional example to use for evaluation.
  63. * @returns A promise that extracts to the evaluation result.
  64. */
  65. async evaluateRun(run, example, options) {
  66. const sourceRunId = uuidv4();
  67. const metadata = {
  68. targetRunId: run.id,
  69. };
  70. if ("session_id" in run) {
  71. metadata["experiment"] = run.session_id;
  72. }
  73. if (typeof this.func !== "function") {
  74. throw new Error("Target must be runnable function");
  75. }
  76. const wrappedTraceableFunc = traceable(this.func, {
  77. project_name: "evaluators",
  78. name: "evaluator",
  79. id: sourceRunId,
  80. ...options,
  81. });
  82. const result = await wrappedTraceableFunc(
  83. // Pass data via `langSmithRunAndExample` key to avoid conflicts with other
  84. // inputs. This key is extracted in the wrapped function, with `run` and
  85. // `example` passed to evaluator function as arguments.
  86. { langSmithRunAndExample: { run, example } }, { metadata });
  87. // Check the one required property of EvaluationResult since 'instanceof' is not possible
  88. if ("key" in result) {
  89. if (!result.sourceRunId) {
  90. result.sourceRunId = sourceRunId;
  91. }
  92. return result;
  93. }
  94. if (Array.isArray(result)) {
  95. return {
  96. results: result.map((r) => this.coerceEvaluationResult(r, sourceRunId, false)),
  97. };
  98. }
  99. if (typeof result !== "object") {
  100. throw new Error("Evaluator function must return an object.");
  101. }
  102. return this.coerceEvaluationResults(result, sourceRunId);
  103. }
  104. }
  105. export function runEvaluator(func) {
  106. return new DynamicRunEvaluator(func);
  107. }