evaluator.d.ts 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. import { Example, FeedbackConfig, Run, ScoreType, ValueType } from "../schemas.js";
  2. import { RunTreeConfig } from "../run_trees.js";
  3. /**
  4. * Represents a categorical class.
  5. */
  6. export type Category = {
  7. /**
  8. * The value of the category.
  9. */
  10. value?: number;
  11. /**
  12. * The label of the category.
  13. */
  14. label: string;
  15. };
  16. /**
  17. * Represents the result of an evaluation.
  18. */
  19. export type EvaluationResult = {
  20. /**
  21. * The key associated with the evaluation result.
  22. */
  23. key: string;
  24. /**
  25. * The score of the evaluation result.
  26. */
  27. score?: ScoreType;
  28. /**
  29. * The value of the evaluation result.
  30. */
  31. value?: ValueType;
  32. /**
  33. * A comment associated with the evaluation result.
  34. */
  35. comment?: string;
  36. /**
  37. * A correction record associated with the evaluation result.
  38. */
  39. correction?: Record<string, unknown>;
  40. /**
  41. * Information about the evaluator.
  42. */
  43. evaluatorInfo?: Record<string, unknown>;
  44. /**
  45. * The source run ID of the evaluation result.
  46. * If set, a link to the source run will be available in the UI.
  47. */
  48. sourceRunId?: string;
  49. /**
  50. * The target run ID of the evaluation result.
  51. * If this is not set, the target run ID is assumed to be
  52. * the root of the trace.
  53. */
  54. targetRunId?: string;
  55. /**
  56. * The feedback config associated with the evaluation result.
  57. * If set, this will be used to define how a feedback key
  58. * should be interpreted.
  59. */
  60. feedbackConfig?: FeedbackConfig;
  61. };
  62. /**
  63. * Batch evaluation results, if your evaluator wishes
  64. * to return multiple scores.
  65. */
  66. export type EvaluationResults = {
  67. /**
  68. * The evaluation results.
  69. */
  70. results: Array<EvaluationResult>;
  71. };
  72. export interface RunEvaluator {
  73. evaluateRun(run: Run, example?: Example, options?: Partial<RunTreeConfig>): Promise<EvaluationResult | EvaluationResults>;
  74. }
  75. export type RunEvaluatorLike = ((run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>) | ((run: Run, example?: Example) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((run: Run, example: Example) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>) | ((run: Run, example: Example) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((args: {
  76. run: Run;
  77. example: Example;
  78. inputs: Record<string, any>;
  79. outputs: Record<string, any>;
  80. referenceOutputs?: Record<string, any>;
  81. }) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((args: {
  82. run: Run;
  83. example: Example;
  84. inputs: Record<string, any>;
  85. outputs: Record<string, any>;
  86. referenceOutputs?: Record<string, any>;
  87. }) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>);
  88. /**
  89. * Wraps an evaluator function + implements the RunEvaluator interface.
  90. */
  91. export declare class DynamicRunEvaluator<Func extends (...args: any[]) => any> implements RunEvaluator {
  92. func: Func;
  93. constructor(evaluator: Func);
  94. private isEvaluationResults;
  95. private coerceEvaluationResults;
  96. private coerceEvaluationResult;
  97. /**
  98. * Evaluates a run with an optional example and returns the evaluation result.
  99. * @param run The run to evaluate.
  100. * @param example The optional example to use for evaluation.
  101. * @returns A promise that extracts to the evaluation result.
  102. */
  103. evaluateRun(run: Run, example?: Example, options?: Partial<RunTreeConfig>): Promise<EvaluationResult | EvaluationResults>;
  104. }
  105. export declare function runEvaluator(func: RunEvaluatorLike): RunEvaluator;