import { Client } from "../index.js"; import { AttachmentInfo, Example, KVMap, Run, TracerSession } from "../schemas.js"; import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js"; import { ComparisonEvaluationResults, ComparativeEvaluator } from "./evaluate_comparative.js"; export type TargetConfigT = KVMap & { attachments?: Record; callbacks?: any; }; type StandardTargetT = ((input: TInput, config?: TargetConfigT) => Promise) | ((input: TInput, config?: TargetConfigT) => TOutput) | { invoke: (input: TInput, config?: TargetConfigT) => TOutput; } | { invoke: (input: TInput, config?: TargetConfigT) => Promise; }; type ComparativeTargetT = Array | Array | ExperimentResults>; export type TargetT = StandardTargetT | ComparativeTargetT; export type DataT = string | AsyncIterable | Example[]; /** @deprecated Use object parameter version instead: (args: { runs, examples, inputs, outputs, referenceOutputs }) => ... */ type DeprecatedSyncSummaryEvaluator = (runs: Array, examples: Array) => EvaluationResult | EvaluationResult[] | EvaluationResults; /** @deprecated Use object parameter version instead: (args: { runs, examples, inputs, outputs, referenceOutputs }) => ... */ type DeprecatedAsyncSummaryEvaluator = (runs: Array, examples: Array) => Promise; export type SummaryEvaluatorT = DeprecatedSyncSummaryEvaluator | DeprecatedAsyncSummaryEvaluator | ((args: { runs: Array; examples: Array; inputs: Array>; outputs: Array>; referenceOutputs?: Array>; }) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((args: { runs: Array; examples: Array; inputs: Array>; outputs: Array>; referenceOutputs?: Array>; }) => Promise); /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */ type DeprecatedRunEvaluator = RunEvaluator; /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */ type DeprecatedFunctionEvaluator = (run: Run, example?: Example) => EvaluationResult | EvaluationResult[] | EvaluationResults; /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */ type DeprecatedAsyncFunctionEvaluator = (run: Run, example?: Example) => Promise; export type EvaluatorT = DeprecatedRunEvaluator | DeprecatedFunctionEvaluator | DeprecatedAsyncFunctionEvaluator | ((args: { run: Run; example: Example; inputs: Record; outputs: Record; referenceOutputs?: Record; attachments?: Record; }) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((args: { run: Run; example: Example; inputs: Record; outputs: Record; referenceOutputs?: Record; attachments?: Record; }) => Promise); interface _ForwardResults { run: Run; example: Example; } interface _ExperimentManagerArgs { data?: DataT; experiment?: TracerSession | string; metadata?: KVMap; client?: Client; runs?: AsyncGenerator; evaluationResults?: AsyncGenerator; summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator, any, unknown>; examples?: Example[]; numRepetitions?: number; _runsArray?: Run[]; includeAttachments?: boolean; } type BaseEvaluateOptions = { /** * Metadata to attach to the experiment. * @default undefined */ metadata?: KVMap; /** * A prefix to provide for your experiment name. * @default undefined */ experimentPrefix?: string; /** * A free-form description of the experiment. */ description?: string; /** * The maximum number of concurrent evaluations to run. * @default undefined */ maxConcurrency?: number; /** * The LangSmith client to use. * @default undefined */ client?: Client; /** * The number of repetitions to perform. Each example * will be run this many times. * @default 1 */ numRepetitions?: number; }; export interface EvaluateOptions extends BaseEvaluateOptions { /** * A list of evaluators to run on each example. * @default undefined */ evaluators?: Array; /** * A list of summary evaluators to run on the entire dataset. * @default undefined */ summaryEvaluators?: Array; /** * The dataset to evaluate on. Can be a dataset name, a list of * examples, or a generator of examples. */ data: DataT; /** * Whether to use attachments for the experiment. * @default false */ includeAttachments?: boolean; } export interface ComparativeEvaluateOptions extends BaseEvaluateOptions { /** * A list of evaluators to run on each example. */ evaluators: Array; /** * Whether to load all child runs for the experiment. * @default false */ loadNested?: boolean; /** * Randomize the order of outputs for each evaluation * @default false */ randomizeOrder?: boolean; } export declare function evaluate(target: ComparativeTargetT, options: ComparativeEvaluateOptions): Promise; export declare function evaluate(target: StandardTargetT, options: EvaluateOptions): Promise; export interface ExperimentResultRow { run: Run; example: Example; evaluationResults: EvaluationResults; } /** * Manage the execution of experiments. * * Supports lazily running predictions and evaluations in parallel to facilitate * result streaming and early debugging. */ export declare class _ExperimentManager { _data?: DataT; _runs?: AsyncGenerator; _evaluationResults?: AsyncGenerator; _summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator, any, unknown>; _examples?: Example[]; _numRepetitions?: number; _runsArray?: Run[]; client: Client; _experiment?: TracerSession; _experimentName: string; _metadata: KVMap; _description?: string; _includeAttachments?: boolean; get experimentName(): string; getExamples(): Promise>; setExamples(examples: Example[]): void; get datasetId(): Promise; get evaluationResults(): AsyncGenerator; get runs(): AsyncGenerator; constructor(args: _ExperimentManagerArgs); _getExperiment(): TracerSession; _getExperimentMetadata(): Promise; _createProject(firstExample: Example, projectMetadata: KVMap): Promise; _getProject(firstExample: Example): Promise; protected _printExperimentStart(): Promise; start(): Promise<_ExperimentManager>; withPredictions(target: StandardTargetT, options?: { maxConcurrency?: number; }): Promise<_ExperimentManager>; withEvaluators(evaluators: Array, options?: { maxConcurrency?: number; }): Promise<_ExperimentManager>; withSummaryEvaluators(summaryEvaluators: Array): Promise<_ExperimentManager>; getResults(): AsyncGenerator; getSummaryScores(): Promise; /** * Run the target function or runnable on the examples. * @param {StandardTargetT} target The target function or runnable to evaluate. * @param options * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results. */ _predict(target: StandardTargetT, options?: { maxConcurrency?: number; }): AsyncGenerator<_ForwardResults>; _runEvaluators(evaluators: Array, currentResults: ExperimentResultRow, fields: { client: Client; }): Promise; /** * Run the evaluators on the prediction stream. * Expects runs to be available in the manager. * (e.g. from a previous prediction step) * @param {Array} evaluators * @param {number} maxConcurrency */ _score(evaluators: Array, options?: { maxConcurrency?: number; }): AsyncGenerator; _applySummaryEvaluators(summaryEvaluators: Array): AsyncGenerator<(runsArray: Run[]) => AsyncGenerator>; _getDatasetVersion(): Promise; _getDatasetSplits(): Promise; _end(): Promise; } /** * Represents the results of an evaluate() call. * This class provides an iterator interface to iterate over the experiment results * as they become available. It also provides methods to access the experiment name, * the number of results, and to wait for the results to be processed. */ declare class ExperimentResults implements AsyncIterableIterator { private manager; results: ExperimentResultRow[]; processedCount: number; summaryResults: EvaluationResults; constructor(experimentManager: _ExperimentManager); get experimentName(): string; [Symbol.asyncIterator](): AsyncIterableIterator; next(): Promise>; processData(manager: _ExperimentManager): Promise; get length(): number; } export {};