123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236 |
- import { Client } from "../index.js";
- import { AttachmentInfo, Example, KVMap, Run, TracerSession } from "../schemas.js";
- import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js";
- import { ComparisonEvaluationResults, ComparativeEvaluator } from "./evaluate_comparative.js";
- export type TargetConfigT = KVMap & {
- attachments?: Record<string, AttachmentInfo>;
- callbacks?: any;
- };
- type StandardTargetT<TInput = any, TOutput = KVMap> = ((input: TInput, config?: TargetConfigT) => Promise<TOutput>) | ((input: TInput, config?: TargetConfigT) => TOutput) | {
- invoke: (input: TInput, config?: TargetConfigT) => TOutput;
- } | {
- invoke: (input: TInput, config?: TargetConfigT) => Promise<TOutput>;
- };
- type ComparativeTargetT = Array<string> | Array<Promise<ExperimentResults> | ExperimentResults>;
- export type TargetT<TInput = any, TOutput = KVMap> = StandardTargetT<TInput, TOutput> | ComparativeTargetT;
- export type DataT = string | AsyncIterable<Example> | Example[];
- /** @deprecated Use object parameter version instead: (args: { runs, examples, inputs, outputs, referenceOutputs }) => ... */
- type DeprecatedSyncSummaryEvaluator = (runs: Array<Run>, examples: Array<Example>) => EvaluationResult | EvaluationResult[] | EvaluationResults;
- /** @deprecated Use object parameter version instead: (args: { runs, examples, inputs, outputs, referenceOutputs }) => ... */
- type DeprecatedAsyncSummaryEvaluator = (runs: Array<Run>, examples: Array<Example>) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>;
- export type SummaryEvaluatorT = DeprecatedSyncSummaryEvaluator | DeprecatedAsyncSummaryEvaluator | ((args: {
- runs: Array<Run>;
- examples: Array<Example>;
- inputs: Array<Record<string, any>>;
- outputs: Array<Record<string, any>>;
- referenceOutputs?: Array<Record<string, any>>;
- }) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((args: {
- runs: Array<Run>;
- examples: Array<Example>;
- inputs: Array<Record<string, any>>;
- outputs: Array<Record<string, any>>;
- referenceOutputs?: Array<Record<string, any>>;
- }) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>);
- /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
- type DeprecatedRunEvaluator = RunEvaluator;
- /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
- type DeprecatedFunctionEvaluator = (run: Run, example?: Example) => EvaluationResult | EvaluationResult[] | EvaluationResults;
- /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
- type DeprecatedAsyncFunctionEvaluator = (run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>;
- export type EvaluatorT = DeprecatedRunEvaluator | DeprecatedFunctionEvaluator | DeprecatedAsyncFunctionEvaluator | ((args: {
- run: Run;
- example: Example;
- inputs: Record<string, any>;
- outputs: Record<string, any>;
- referenceOutputs?: Record<string, any>;
- attachments?: Record<string, any>;
- }) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((args: {
- run: Run;
- example: Example;
- inputs: Record<string, any>;
- outputs: Record<string, any>;
- referenceOutputs?: Record<string, any>;
- attachments?: Record<string, any>;
- }) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>);
- interface _ForwardResults {
- run: Run;
- example: Example;
- }
- interface _ExperimentManagerArgs {
- data?: DataT;
- experiment?: TracerSession | string;
- metadata?: KVMap;
- client?: Client;
- runs?: AsyncGenerator<Run>;
- evaluationResults?: AsyncGenerator<EvaluationResults>;
- summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
- examples?: Example[];
- numRepetitions?: number;
- _runsArray?: Run[];
- includeAttachments?: boolean;
- }
- type BaseEvaluateOptions = {
- /**
- * Metadata to attach to the experiment.
- * @default undefined
- */
- metadata?: KVMap;
- /**
- * A prefix to provide for your experiment name.
- * @default undefined
- */
- experimentPrefix?: string;
- /**
- * A free-form description of the experiment.
- */
- description?: string;
- /**
- * The maximum number of concurrent evaluations to run.
- * @default undefined
- */
- maxConcurrency?: number;
- /**
- * The LangSmith client to use.
- * @default undefined
- */
- client?: Client;
- /**
- * The number of repetitions to perform. Each example
- * will be run this many times.
- * @default 1
- */
- numRepetitions?: number;
- };
- export interface EvaluateOptions extends BaseEvaluateOptions {
- /**
- * A list of evaluators to run on each example.
- * @default undefined
- */
- evaluators?: Array<EvaluatorT>;
- /**
- * A list of summary evaluators to run on the entire dataset.
- * @default undefined
- */
- summaryEvaluators?: Array<SummaryEvaluatorT>;
- /**
- * The dataset to evaluate on. Can be a dataset name, a list of
- * examples, or a generator of examples.
- */
- data: DataT;
- /**
- * Whether to use attachments for the experiment.
- * @default false
- */
- includeAttachments?: boolean;
- }
- export interface ComparativeEvaluateOptions extends BaseEvaluateOptions {
- /**
- * A list of evaluators to run on each example.
- */
- evaluators: Array<ComparativeEvaluator>;
- /**
- * Whether to load all child runs for the experiment.
- * @default false
- */
- loadNested?: boolean;
- /**
- * Randomize the order of outputs for each evaluation
- * @default false
- */
- randomizeOrder?: boolean;
- }
- export declare function evaluate(target: ComparativeTargetT, options: ComparativeEvaluateOptions): Promise<ComparisonEvaluationResults>;
- export declare function evaluate(target: StandardTargetT, options: EvaluateOptions): Promise<ExperimentResults>;
- export interface ExperimentResultRow {
- run: Run;
- example: Example;
- evaluationResults: EvaluationResults;
- }
- /**
- * Manage the execution of experiments.
- *
- * Supports lazily running predictions and evaluations in parallel to facilitate
- * result streaming and early debugging.
- */
- export declare class _ExperimentManager {
- _data?: DataT;
- _runs?: AsyncGenerator<Run>;
- _evaluationResults?: AsyncGenerator<EvaluationResults>;
- _summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
- _examples?: Example[];
- _numRepetitions?: number;
- _runsArray?: Run[];
- client: Client;
- _experiment?: TracerSession;
- _experimentName: string;
- _metadata: KVMap;
- _description?: string;
- _includeAttachments?: boolean;
- get experimentName(): string;
- getExamples(): Promise<Array<Example>>;
- setExamples(examples: Example[]): void;
- get datasetId(): Promise<string>;
- get evaluationResults(): AsyncGenerator<EvaluationResults>;
- get runs(): AsyncGenerator<Run>;
- constructor(args: _ExperimentManagerArgs);
- _getExperiment(): TracerSession;
- _getExperimentMetadata(): Promise<KVMap>;
- _createProject(firstExample: Example, projectMetadata: KVMap): Promise<TracerSession>;
- _getProject(firstExample: Example): Promise<TracerSession>;
- protected _printExperimentStart(): Promise<void>;
- start(): Promise<_ExperimentManager>;
- withPredictions(target: StandardTargetT, options?: {
- maxConcurrency?: number;
- }): Promise<_ExperimentManager>;
- withEvaluators(evaluators: Array<EvaluatorT | RunEvaluator>, options?: {
- maxConcurrency?: number;
- }): Promise<_ExperimentManager>;
- withSummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): Promise<_ExperimentManager>;
- getResults(): AsyncGenerator<ExperimentResultRow>;
- getSummaryScores(): Promise<EvaluationResults>;
- /**
- * Run the target function or runnable on the examples.
- * @param {StandardTargetT} target The target function or runnable to evaluate.
- * @param options
- * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
- */
- _predict(target: StandardTargetT, options?: {
- maxConcurrency?: number;
- }): AsyncGenerator<_ForwardResults>;
- _runEvaluators(evaluators: Array<RunEvaluator>, currentResults: ExperimentResultRow, fields: {
- client: Client;
- }): Promise<ExperimentResultRow>;
- /**
- * Run the evaluators on the prediction stream.
- * Expects runs to be available in the manager.
- * (e.g. from a previous prediction step)
- * @param {Array<RunEvaluator>} evaluators
- * @param {number} maxConcurrency
- */
- _score(evaluators: Array<RunEvaluator>, options?: {
- maxConcurrency?: number;
- }): AsyncGenerator<ExperimentResultRow>;
- _applySummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults>>;
- _getDatasetVersion(): Promise<string | undefined>;
- _getDatasetSplits(): Promise<string[] | undefined>;
- _end(): Promise<void>;
- }
- /**
- * Represents the results of an evaluate() call.
- * This class provides an iterator interface to iterate over the experiment results
- * as they become available. It also provides methods to access the experiment name,
- * the number of results, and to wait for the results to be processed.
- */
- declare class ExperimentResults implements AsyncIterableIterator<ExperimentResultRow> {
- private manager;
- results: ExperimentResultRow[];
- processedCount: number;
- summaryResults: EvaluationResults;
- constructor(experimentManager: _ExperimentManager);
- get experimentName(): string;
- [Symbol.asyncIterator](): AsyncIterableIterator<ExperimentResultRow>;
- next(): Promise<IteratorResult<ExperimentResultRow>>;
- processData(manager: _ExperimentManager): Promise<void>;
- get length(): number;
- }
- export {};
|