evaluate_comparative.d.ts 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import { Client } from "../index.js";
  2. import { ComparisonEvaluationResult as ComparisonEvaluationResultRow, Example, Run } from "../schemas.js";
  3. import { evaluate } from "./index.js";
  4. type ExperimentResults = Awaited<ReturnType<typeof evaluate>>;
  5. /** @deprecated Use ComparativeEvaluatorNew instead: (args: { runs, example, inputs, outputs, referenceOutputs }) => ... */
  6. export type _ComparativeEvaluatorLegacy = (runs: Run[], example: Example) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow>;
  7. export type _ComparativeEvaluator = (args: {
  8. runs: Run[];
  9. example: Example;
  10. inputs: Record<string, any>;
  11. outputs: Record<string, any>[];
  12. referenceOutputs?: Record<string, any>;
  13. }) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow>;
  14. export type ComparativeEvaluator = _ComparativeEvaluatorLegacy | _ComparativeEvaluator;
  15. export interface EvaluateComparativeOptions {
  16. /**
  17. * A list of evaluators to use for comparative evaluation.
  18. */
  19. evaluators: Array<ComparativeEvaluator>;
  20. /**
  21. * Randomize the order of outputs for each evaluation
  22. * @default false
  23. */
  24. randomizeOrder?: boolean;
  25. /**
  26. * The LangSmith client to use.
  27. * @default undefined
  28. */
  29. client?: Client;
  30. /**
  31. * Metadata to attach to the experiment.
  32. * @default undefined
  33. */
  34. metadata?: Record<string, unknown>;
  35. /**
  36. * A prefix to use for your experiment name.
  37. * @default undefined
  38. */
  39. experimentPrefix?: string;
  40. /**
  41. * A free-form description of the experiment.
  42. * @default undefined
  43. */
  44. description?: string;
  45. /**
  46. * Whether to load all child runs for the experiment.
  47. * @default false
  48. */
  49. loadNested?: boolean;
  50. /**
  51. * The maximum number of concurrent evaluators to run.
  52. * @default undefined
  53. */
  54. maxConcurrency?: number;
  55. }
  56. export interface ComparisonEvaluationResults {
  57. experimentName: string;
  58. results: ComparisonEvaluationResultRow[];
  59. }
  60. export declare function evaluateComparative(experiments: Array<string> | Array<Promise<ExperimentResults> | ExperimentResults>, options: EvaluateComparativeOptions): Promise<ComparisonEvaluationResults>;
  61. export {};