WangZixian
/
app


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
							import { Client } from "../index.js";
import { AttachmentInfo, Example, KVMap, Run, TracerSession } from "../schemas.js";
import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js";
import { ComparisonEvaluationResults, ComparativeEvaluator } from "./evaluate_comparative.js";
export type TargetConfigT = KVMap & {
    attachments?: Record<string, AttachmentInfo>;
    callbacks?: any;
};
type StandardTargetT<TInput = any, TOutput = KVMap> = ((input: TInput, config?: TargetConfigT) => Promise<TOutput>) | ((input: TInput, config?: TargetConfigT) => TOutput) | {
    invoke: (input: TInput, config?: TargetConfigT) => TOutput;
} | {
    invoke: (input: TInput, config?: TargetConfigT) => Promise<TOutput>;
};
type ComparativeTargetT = Array<string> | Array<Promise<ExperimentResults> | ExperimentResults>;
export type TargetT<TInput = any, TOutput = KVMap> = StandardTargetT<TInput, TOutput> | ComparativeTargetT;
export type DataT = string | AsyncIterable<Example> | Example[];
/** @deprecated Use object parameter version instead: (args: { runs, examples, inputs, outputs, referenceOutputs }) => ... */
type DeprecatedSyncSummaryEvaluator = (runs: Array<Run>, examples: Array<Example>) => EvaluationResult | EvaluationResult[] | EvaluationResults;
/** @deprecated Use object parameter version instead: (args: { runs, examples, inputs, outputs, referenceOutputs }) => ... */
type DeprecatedAsyncSummaryEvaluator = (runs: Array<Run>, examples: Array<Example>) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>;
export type SummaryEvaluatorT = DeprecatedSyncSummaryEvaluator | DeprecatedAsyncSummaryEvaluator | ((args: {
    runs: Array<Run>;
    examples: Array<Example>;
    inputs: Array<Record<string, any>>;
    outputs: Array<Record<string, any>>;
    referenceOutputs?: Array<Record<string, any>>;
}) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((args: {
    runs: Array<Run>;
    examples: Array<Example>;
    inputs: Array<Record<string, any>>;
    outputs: Array<Record<string, any>>;
    referenceOutputs?: Array<Record<string, any>>;
}) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>);
/** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
type DeprecatedRunEvaluator = RunEvaluator;
/** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
type DeprecatedFunctionEvaluator = (run: Run, example?: Example) => EvaluationResult | EvaluationResult[] | EvaluationResults;
/** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
type DeprecatedAsyncFunctionEvaluator = (run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>;
export type EvaluatorT = DeprecatedRunEvaluator | DeprecatedFunctionEvaluator | DeprecatedAsyncFunctionEvaluator | ((args: {
    run: Run;
    example: Example;
    inputs: Record<string, any>;
    outputs: Record<string, any>;
    referenceOutputs?: Record<string, any>;
    attachments?: Record<string, any>;
}) => EvaluationResult | EvaluationResult[] | EvaluationResults) | ((args: {
    run: Run;
    example: Example;
    inputs: Record<string, any>;
    outputs: Record<string, any>;
    referenceOutputs?: Record<string, any>;
    attachments?: Record<string, any>;
}) => Promise<EvaluationResult | EvaluationResult[] | EvaluationResults>);
interface _ForwardResults {
    run: Run;
    example: Example;
}
interface _ExperimentManagerArgs {
    data?: DataT;
    experiment?: TracerSession | string;
    metadata?: KVMap;
    client?: Client;
    runs?: AsyncGenerator<Run>;
    evaluationResults?: AsyncGenerator<EvaluationResults>;
    summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
    examples?: Example[];
    numRepetitions?: number;
    _runsArray?: Run[];
    includeAttachments?: boolean;
}
type BaseEvaluateOptions = {
    /**
     * Metadata to attach to the experiment.
     * @default undefined
     */
    metadata?: KVMap;
    /**
     * A prefix to provide for your experiment name.
     * @default undefined
     */
    experimentPrefix?: string;
    /**
     * A free-form description of the experiment.
     */
    description?: string;
    /**
     * The maximum number of concurrent evaluations to run.
     * @default undefined
     */
    maxConcurrency?: number;
    /**
     * The LangSmith client to use.
     * @default undefined
     */
    client?: Client;
    /**
     * The number of repetitions to perform. Each example
     * will be run this many times.
     * @default 1
     */
    numRepetitions?: number;
};
export interface EvaluateOptions extends BaseEvaluateOptions {
    /**
     * A list of evaluators to run on each example.
     * @default undefined
     */
    evaluators?: Array<EvaluatorT>;
    /**
     * A list of summary evaluators to run on the entire dataset.
     * @default undefined
     */
    summaryEvaluators?: Array<SummaryEvaluatorT>;
    /**
     * The dataset to evaluate on. Can be a dataset name, a list of
     * examples, or a generator of examples.
     */
    data: DataT;
    /**
     * Whether to use attachments for the experiment.
     * @default false
     */
    includeAttachments?: boolean;
}
export interface ComparativeEvaluateOptions extends BaseEvaluateOptions {
    /**
     * A list of evaluators to run on each example.
     */
    evaluators: Array<ComparativeEvaluator>;
    /**
     * Whether to load all child runs for the experiment.
     * @default false
     */
    loadNested?: boolean;
    /**
     * Randomize the order of outputs for each evaluation
     * @default false
     */
    randomizeOrder?: boolean;
}
export declare function evaluate(target: ComparativeTargetT, options: ComparativeEvaluateOptions): Promise<ComparisonEvaluationResults>;
export declare function evaluate(target: StandardTargetT, options: EvaluateOptions): Promise<ExperimentResults>;
export interface ExperimentResultRow {
    run: Run;
    example: Example;
    evaluationResults: EvaluationResults;
}
/**
 * Manage the execution of experiments.
 *
 * Supports lazily running predictions and evaluations in parallel to facilitate
 * result streaming and early debugging.
 */
export declare class _ExperimentManager {
    _data?: DataT;
    _runs?: AsyncGenerator<Run>;
    _evaluationResults?: AsyncGenerator<EvaluationResults>;
    _summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
    _examples?: Example[];
    _numRepetitions?: number;
    _runsArray?: Run[];
    client: Client;
    _experiment?: TracerSession;
    _experimentName: string;
    _metadata: KVMap;
    _description?: string;
    _includeAttachments?: boolean;
    get experimentName(): string;
    getExamples(): Promise<Array<Example>>;
    setExamples(examples: Example[]): void;
    get datasetId(): Promise<string>;
    get evaluationResults(): AsyncGenerator<EvaluationResults>;
    get runs(): AsyncGenerator<Run>;
    constructor(args: _ExperimentManagerArgs);
    _getExperiment(): TracerSession;
    _getExperimentMetadata(): Promise<KVMap>;
    _createProject(firstExample: Example, projectMetadata: KVMap): Promise<TracerSession>;
    _getProject(firstExample: Example): Promise<TracerSession>;
    protected _printExperimentStart(): Promise<void>;
    start(): Promise<_ExperimentManager>;
    withPredictions(target: StandardTargetT, options?: {
        maxConcurrency?: number;
    }): Promise<_ExperimentManager>;
    withEvaluators(evaluators: Array<EvaluatorT | RunEvaluator>, options?: {
        maxConcurrency?: number;
    }): Promise<_ExperimentManager>;
    withSummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): Promise<_ExperimentManager>;
    getResults(): AsyncGenerator<ExperimentResultRow>;
    getSummaryScores(): Promise<EvaluationResults>;
    /**
     * Run the target function or runnable on the examples.
     * @param {StandardTargetT} target The target function or runnable to evaluate.
     * @param options
     * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
     */
    _predict(target: StandardTargetT, options?: {
        maxConcurrency?: number;
    }): AsyncGenerator<_ForwardResults>;
    _runEvaluators(evaluators: Array<RunEvaluator>, currentResults: ExperimentResultRow, fields: {
        client: Client;
    }): Promise<ExperimentResultRow>;
    /**
     * Run the evaluators on the prediction stream.
     * Expects runs to be available in the manager.
     * (e.g. from a previous prediction step)
     * @param {Array<RunEvaluator>} evaluators
     * @param {number} maxConcurrency
     */
    _score(evaluators: Array<RunEvaluator>, options?: {
        maxConcurrency?: number;
    }): AsyncGenerator<ExperimentResultRow>;
    _applySummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults>>;
    _getDatasetVersion(): Promise<string | undefined>;
    _getDatasetSplits(): Promise<string[] | undefined>;
    _end(): Promise<void>;
}
/**
 * Represents the results of an evaluate() call.
 * This class provides an iterator interface to iterate over the experiment results
 * as they become available. It also provides methods to access the experiment name,
 * the number of results, and to wait for the results to be processed.
 */
declare class ExperimentResults implements AsyncIterableIterator<ExperimentResultRow> {
    private manager;
    results: ExperimentResultRow[];
    processedCount: number;
    summaryResults: EvaluationResults;
    constructor(experimentManager: _ExperimentManager);
    get experimentName(): string;
    [Symbol.asyncIterator](): AsyncIterableIterator<ExperimentResultRow>;
    next(): Promise<IteratorResult<ExperimentResultRow>>;
    processData(manager: _ExperimentManager): Promise<void>;
    get length(): number;
}
export {};