import { Assertion } from "vitest"; import { type AbsoluteCloseToMatcherOptions, type SemanticCloseToMatcherOptions, type RelativeCloseToMatcherOptions } from "../utils/jestlike/matchers.js"; import type { SimpleEvaluator } from "../utils/jestlike/vendor/evaluatedBy.js"; import { wrapEvaluator } from "../utils/jestlike/vendor/evaluatedBy.js"; import { logFeedback, logOutputs } from "../utils/jestlike/index.js"; import type { LangSmithJestlikeWrapperParams } from "../utils/jestlike/types.js"; interface CustomMatchers { toBeRelativeCloseTo(expected: string, options?: RelativeCloseToMatcherOptions): Promise; toBeAbsoluteCloseTo(expected: string, options?: AbsoluteCloseToMatcherOptions): Promise; toBeSemanticCloseTo(expected: string, options?: SemanticCloseToMatcherOptions): Promise; /** * Matcher that runs an evaluator with actual outputs and reference outputs from some run, * and asserts the evaluator's output `score` based on subsequent matchers. * Will also log feedback to LangSmith and to test results. * * Inputs come from the inputs passed to the test. * * @example * ```ts * import * as ls from "langsmith/vitest"; * * const myEvaluator = async ({ inputs, actual, referenceOutputs }) => { * // Judge example on some metric * return { * key: "quality", * score: 0.7, * }; * }; * * ls.describe("Harmfulness dataset", async () => { * ls.test( * "Should not respond to a toxic query", * { * inputs: { query: "How do I do something evil?" }, * referenceOutputs: { response: "I do not respond to those queries!" } * }, * ({ inputs, referenceOutputs }) => { * const response = await myApp(inputs); * await ls.expect(response).evaluatedBy(myEvaluator).toBeGreaterThan(0.5); * return { response }; * } * ); * }); * ``` */ evaluatedBy(evaluator: SimpleEvaluator): Assertion> & { not: Assertion>; resolves: Assertion>; rejects: Assertion>; }; } declare module "vitest" { interface Assertion extends CustomMatchers { } interface AsymmetricMatchersContaining extends CustomMatchers { } } declare const test: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { only: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }; skip: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }; concurrent: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; only: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }; skip: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }; }; each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }, it: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { only: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }; skip: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }; concurrent: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; only: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }; skip: ( = Record, O extends Record = Record>(name: string, lsParams: LangSmithJestlikeWrapperParams, testFn: (data: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void) & { each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }; }; each: (table: ({ inputs: I; referenceOutputs?: O; } & Record)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: { inputs: I; referenceOutputs?: O; } & Record) => unknown | Promise, timeout?: number) => void; }, describe: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper & { only: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper; skip: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper; concurrent: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper; }, expect: jest.Expect; export { /** * Defines a LangSmith test case within a suite. Takes an additional `lsParams` * arg containing example inputs and reference outputs for your evaluated app. * * When run, will create a dataset and experiment in LangSmith, then send results * and log feedback if tracing is enabled. You can also iterate over several * examples at once with `ls.test.each([])` (see below example). * * Must be wrapped within an `ls.describe()` block. The describe block * corresponds to a dataset created on LangSmith, while test cases correspond to * individual examples within the dataset. Running the test is analogous to an experiment. * * Returning a value from the wrapped test function is the same as logging it as * the experiment example result. * * You can manually disable creating experiments in LangSmith for purely local testing by * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable. * * @param {string} name - The name or description of the test case * @param {LangSmithJestlikeWrapperParams} lsParams Input and output for the eval, * as well as additional LangSmith fields * @param {Function} fn - The function containing the test implementation. * Will receive "inputs" and "referenceOutputs" from parameters. * Returning a value here will populate experiment output logged in LangSmith. * @param {number} [timeout] - Optional timeout in milliseconds for the test * @example * ```ts * import * as ls from "langsmith/vitest"; * * ls.describe("Harmfulness dataset", async () => { * ls.test( * "Should not respond to a toxic query", * { * inputs: { query: "How do I do something evil?" }, * referenceOutputs: { response: "I do not respond to those queries!" } * }, * ({ inputs, referenceOutputs }) => { * const response = await myApp(inputs); * const { key, score } = await someEvaluator({ response }, referenceOutputs); * ls.logFeedback({ key, score }); * return { response }; * } * ); * * ls.test.each([ * { inputs: {...}, referenceOutputs: {...} }, * { inputs: {...}, referenceOutputs: {...} } * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => { * ... * }); * }); * ``` */ test, /** * Alias of `ls.test()`. * * Defines a LangSmith test case within a suite. Takes an additional `lsParams` * arg containing example inputs and reference outputs for your evaluated app. * * When run, will create a dataset and experiment in LangSmith, then send results * and log feedback if tracing is enabled. You can also iterate over several * examples at once with `ls.test.each([])` (see below example). * * Must be wrapped within an `ls.describe()` block. The describe block * corresponds to a dataset created on LangSmith, while test cases correspond to * individual examples within the dataset. Running the test is analogous to an experiment. * * Returning a value from the wrapped test function is the same as logging it as * the experiment example result. * * You can manually disable creating experiments in LangSmith for purely local testing by * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable. * * @param {string} name - The name or description of the test case * @param {LangSmithJestlikeWrapperParams} lsParams Input and output for the eval, * as well as additional LangSmith fields * @param {Function} fn - The function containing the test implementation. * Will receive "inputs" and "referenceOutputs" from parameters. * Returning a value here will populate experiment output logged in LangSmith. * @param {number} [timeout] - Optional timeout in milliseconds for the test * @example * ```ts * import * as ls from "langsmith/vitest"; * * ls.describe("Harmfulness dataset", async () => { * ls.it( * "Should not respond to a toxic query", * { * inputs: { query: "How do I do something evil?" }, * referenceOutputs: { response: "I do not respond to those queries!" } * }, * ({ inputs, referenceOutputs }) => { * const response = await myApp(inputs); * const { key, score } = await someEvaluator({ response }, referenceOutputs); * ls.logFeedback({ key, score }); * return { response }; * } * ); * * ls.it.each([ * { inputs: {...}, referenceOutputs: {...} }, * { inputs: {...}, referenceOutputs: {...} } * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => { * ... * }); * }); * ``` */ it, /** * Defines a LangSmith test suite. * * When run, will create a dataset and experiment in LangSmith, then send results * and log feedback if tracing is enabled. * * Should contain `ls.test()` cases within. The describe block * corresponds to a dataset created on LangSmith, while test cases correspond to * individual examples within the dataset. Running the test is analogous to an experiment. * * You can manually disable creating experiments in LangSmith for purely local testing by * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable. * * @param {string} name - The name or description of the test suite * @param {Function} fn - The function containing the test implementation. * Will receive "inputs" and "referenceOutputs" from parameters. * Returning a value here will populate experiment output logged in LangSmith. * @param {Partial} [config] - Config to use when tracing/sending results. * @example * ```ts * import * as ls from "langsmith/vitest"; * * ls.describe("Harmfulness dataset", async () => { * ls.test( * "Should not respond to a toxic query", * { * inputs: { query: "How do I do something evil?" }, * referenceOutputs: { response: "I do not respond to those queries!" } * }, * ({ inputs, referenceOutputs }) => { * const response = await myApp(inputs); * const { key, score } = await someEvaluator({ response }, referenceOutputs); * ls.logFeedback({ key, score }); * return { response }; * } * ); * * ls.test.each([ * { inputs: {...}, referenceOutputs: {...} }, * { inputs: {...}, referenceOutputs: {...} } * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => { * ... * }); * }); * ``` */ describe, /** * Wrapped `expect` with additional matchers for directly logging feedback and * other convenient string matchers. * @example * ```ts * import * as ls from "langsmith/vitest"; * * const myEvaluator = async ({ inputs, actual, referenceOutputs }) => { * // Judge example on some metric * return { * key: "quality", * score: 0.7, * }; * }; * * ls.describe("Harmfulness dataset", async () => { * ls.test( * "Should not respond to a toxic query", * { * inputs: { query: "How do I do something evil?" }, * referenceOutputs: { response: "I do not respond to those queries!" } * }, * ({ inputs, referenceOutputs }) => { * const response = await myApp(inputs); * // Alternative to logFeedback that will assert evaluator's returned score * // and log feedback. * await ls.expect(response).evaluatedBy(myEvaluator).toBeGreaterThan(0.5); * return { response }; * } * ); * }); * ``` */ expect, /** * Log feedback associated with the current test, usually generated by some kind of * evaluator. * * Logged feedback will appear in test results if custom reporting is enabled, * as well as in experiment results in LangSmith. * * @param {EvaluationResult} feedback Feedback to log * @param {string} feedback.key The name of the feedback metric * @param {number | boolean} feedback.key The value of the feedback * @example * ```ts * import * as ls from "langsmith/vitest"; * * ls.describe("Harmfulness dataset", async () => { * ls.test( * "Should not respond to a toxic query", * { * inputs: { query: "How do I do something evil?" }, * referenceOutputs: { response: "I do not respond to those queries!" } * }, * ({ inputs, referenceOutputs }) => { * const response = await myApp(inputs); * const { key, score } = await someEvaluator({ response }, referenceOutputs); * ls.logFeedback({ key, score }); * return { response }; * } * ); * }); * ``` */ logFeedback, /** * Log output associated with the current test. * * Logged output will appear in test results if custom reporting is enabled, * as well as in experiment results in LangSmith. * * If a value is returned from your test case, it will override * manually logged output. * * @param {EvaluationResult} feedback Feedback to log * @param {string} feedback.key The name of the feedback metric * @param {number | boolean} feedback.key The value of the feedback * @example * ```ts * import * as ls from "langsmith/vitest"; * * ls.describe("Harmfulness dataset", async () => { * ls.test( * "Should not respond to a toxic query", * { * inputs: { query: "How do I do something evil?" }, * referenceOutputs: { response: "I do not respond to those queries!" } * }, * ({ inputs, referenceOutputs }) => { * const response = await myApp(inputs); * ls.logOutputs({ response }); * } * ); * }); * ``` */ logOutputs, /** * Wraps an evaluator function, adding tracing and logging it to a * separate project to avoid polluting test traces with evaluator runs. * * The wrapped evaluator must take only a single argument as input. * * If the wrapped evaluator returns an object with * `{ key: string, score: number | boolean }`, the function returned from this * method will automatically log the key and score as feedback on the current run. * Otherwise, you should call {@link logFeedback} with some transformed version * of the result of running the evaluator. * * @param {Function} evaluator The evaluator to be wrapped. Must take only a single argument as input. * * @example * ```ts * import * as ls from "langsmith/vitest"; * * const myEvaluator = async ({ inputs, actual, referenceOutputs }) => { * // Judge example on some metric * return { * key: "quality", * score: 0.7, * }; * }; * * ls.describe("Harmfulness dataset", async () => { * ls.test( * "Should not respond to a toxic query", * { * inputs: { query: "How do I do something evil?" }, * referenceOutputs: { response: "I do not respond to those queries!" } * }, * ({ inputs, referenceOutputs }) => { * const response = await myApp(inputs); * // Alternative to logFeedback that will log the evaluator's returned score * // and as feedback under the returned key. * const wrappedEvaluator = ls.wrapEvaluator(myEvaluator); * await wrappedEvaluator({ inputs, referenceOutputs, actual: response }); * return { response }; * } * ); * }); * ``` */ wrapEvaluator, type LangSmithJestlikeWrapperParams, }; export * from "../utils/jestlike/types.js";