index.d.ts 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. import { Assertion } from "vitest";
  2. import { type AbsoluteCloseToMatcherOptions, type SemanticCloseToMatcherOptions, type RelativeCloseToMatcherOptions } from "../utils/jestlike/matchers.js";
  3. import type { SimpleEvaluator } from "../utils/jestlike/vendor/evaluatedBy.js";
  4. import { wrapEvaluator } from "../utils/jestlike/vendor/evaluatedBy.js";
  5. import { logFeedback, logOutputs } from "../utils/jestlike/index.js";
  6. import type { LangSmithJestlikeWrapperParams } from "../utils/jestlike/types.js";
  7. interface CustomMatchers<R = unknown> {
  8. toBeRelativeCloseTo(expected: string, options?: RelativeCloseToMatcherOptions): Promise<R>;
  9. toBeAbsoluteCloseTo(expected: string, options?: AbsoluteCloseToMatcherOptions): Promise<R>;
  10. toBeSemanticCloseTo(expected: string, options?: SemanticCloseToMatcherOptions): Promise<R>;
  11. /**
  12. * Matcher that runs an evaluator with actual outputs and reference outputs from some run,
  13. * and asserts the evaluator's output `score` based on subsequent matchers.
  14. * Will also log feedback to LangSmith and to test results.
  15. *
  16. * Inputs come from the inputs passed to the test.
  17. *
  18. * @example
  19. * ```ts
  20. * import * as ls from "langsmith/vitest";
  21. *
  22. * const myEvaluator = async ({ inputs, actual, referenceOutputs }) => {
  23. * // Judge example on some metric
  24. * return {
  25. * key: "quality",
  26. * score: 0.7,
  27. * };
  28. * };
  29. *
  30. * ls.describe("Harmfulness dataset", async () => {
  31. * ls.test(
  32. * "Should not respond to a toxic query",
  33. * {
  34. * inputs: { query: "How do I do something evil?" },
  35. * referenceOutputs: { response: "I do not respond to those queries!" }
  36. * },
  37. * ({ inputs, referenceOutputs }) => {
  38. * const response = await myApp(inputs);
  39. * await ls.expect(response).evaluatedBy(myEvaluator).toBeGreaterThan(0.5);
  40. * return { response };
  41. * }
  42. * );
  43. * });
  44. * ```
  45. */
  46. evaluatedBy(evaluator: SimpleEvaluator): Assertion<Promise<R>> & {
  47. not: Assertion<Promise<R>>;
  48. resolves: Assertion<Promise<R>>;
  49. rejects: Assertion<Promise<R>>;
  50. };
  51. }
  52. declare module "vitest" {
  53. interface Assertion<T = any> extends CustomMatchers<T> {
  54. }
  55. interface AsymmetricMatchersContaining extends CustomMatchers {
  56. }
  57. }
  58. declare const test: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  59. inputs: I;
  60. referenceOutputs?: O;
  61. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  62. only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  63. inputs: I;
  64. referenceOutputs?: O;
  65. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  66. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  67. inputs: I;
  68. referenceOutputs?: O;
  69. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  70. inputs: I;
  71. referenceOutputs?: O;
  72. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  73. };
  74. skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  75. inputs: I;
  76. referenceOutputs?: O;
  77. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  78. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  79. inputs: I;
  80. referenceOutputs?: O;
  81. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  82. inputs: I;
  83. referenceOutputs?: O;
  84. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  85. };
  86. concurrent: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  87. inputs: I;
  88. referenceOutputs?: O;
  89. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  90. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  91. inputs: I;
  92. referenceOutputs?: O;
  93. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  94. inputs: I;
  95. referenceOutputs?: O;
  96. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  97. only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  98. inputs: I;
  99. referenceOutputs?: O;
  100. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  101. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  102. inputs: I;
  103. referenceOutputs?: O;
  104. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  105. inputs: I;
  106. referenceOutputs?: O;
  107. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  108. };
  109. skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  110. inputs: I;
  111. referenceOutputs?: O;
  112. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  113. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  114. inputs: I;
  115. referenceOutputs?: O;
  116. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  117. inputs: I;
  118. referenceOutputs?: O;
  119. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  120. };
  121. };
  122. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  123. inputs: I;
  124. referenceOutputs?: O;
  125. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  126. inputs: I;
  127. referenceOutputs?: O;
  128. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  129. }, it: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  130. inputs: I;
  131. referenceOutputs?: O;
  132. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  133. only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  134. inputs: I;
  135. referenceOutputs?: O;
  136. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  137. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  138. inputs: I;
  139. referenceOutputs?: O;
  140. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  141. inputs: I;
  142. referenceOutputs?: O;
  143. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  144. };
  145. skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  146. inputs: I;
  147. referenceOutputs?: O;
  148. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  149. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  150. inputs: I;
  151. referenceOutputs?: O;
  152. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  153. inputs: I;
  154. referenceOutputs?: O;
  155. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  156. };
  157. concurrent: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  158. inputs: I;
  159. referenceOutputs?: O;
  160. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  161. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  162. inputs: I;
  163. referenceOutputs?: O;
  164. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  165. inputs: I;
  166. referenceOutputs?: O;
  167. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  168. only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  169. inputs: I;
  170. referenceOutputs?: O;
  171. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  172. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  173. inputs: I;
  174. referenceOutputs?: O;
  175. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  176. inputs: I;
  177. referenceOutputs?: O;
  178. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  179. };
  180. skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  181. inputs: I;
  182. referenceOutputs?: O;
  183. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  184. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  185. inputs: I;
  186. referenceOutputs?: O;
  187. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  188. inputs: I;
  189. referenceOutputs?: O;
  190. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  191. };
  192. };
  193. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  194. inputs: I;
  195. referenceOutputs?: O;
  196. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  197. inputs: I;
  198. referenceOutputs?: O;
  199. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  200. }, describe: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper & {
  201. only: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
  202. skip: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
  203. concurrent: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
  204. }, expect: jest.Expect;
  205. export {
  206. /**
  207. * Defines a LangSmith test case within a suite. Takes an additional `lsParams`
  208. * arg containing example inputs and reference outputs for your evaluated app.
  209. *
  210. * When run, will create a dataset and experiment in LangSmith, then send results
  211. * and log feedback if tracing is enabled. You can also iterate over several
  212. * examples at once with `ls.test.each([])` (see below example).
  213. *
  214. * Must be wrapped within an `ls.describe()` block. The describe block
  215. * corresponds to a dataset created on LangSmith, while test cases correspond to
  216. * individual examples within the dataset. Running the test is analogous to an experiment.
  217. *
  218. * Returning a value from the wrapped test function is the same as logging it as
  219. * the experiment example result.
  220. *
  221. * You can manually disable creating experiments in LangSmith for purely local testing by
  222. * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
  223. *
  224. * @param {string} name - The name or description of the test case
  225. * @param {LangSmithJestlikeWrapperParams<I, O>} lsParams Input and output for the eval,
  226. * as well as additional LangSmith fields
  227. * @param {Function} fn - The function containing the test implementation.
  228. * Will receive "inputs" and "referenceOutputs" from parameters.
  229. * Returning a value here will populate experiment output logged in LangSmith.
  230. * @param {number} [timeout] - Optional timeout in milliseconds for the test
  231. * @example
  232. * ```ts
  233. * import * as ls from "langsmith/vitest";
  234. *
  235. * ls.describe("Harmfulness dataset", async () => {
  236. * ls.test(
  237. * "Should not respond to a toxic query",
  238. * {
  239. * inputs: { query: "How do I do something evil?" },
  240. * referenceOutputs: { response: "I do not respond to those queries!" }
  241. * },
  242. * ({ inputs, referenceOutputs }) => {
  243. * const response = await myApp(inputs);
  244. * const { key, score } = await someEvaluator({ response }, referenceOutputs);
  245. * ls.logFeedback({ key, score });
  246. * return { response };
  247. * }
  248. * );
  249. *
  250. * ls.test.each([
  251. * { inputs: {...}, referenceOutputs: {...} },
  252. * { inputs: {...}, referenceOutputs: {...} }
  253. * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
  254. * ...
  255. * });
  256. * });
  257. * ```
  258. */
  259. test,
  260. /**
  261. * Alias of `ls.test()`.
  262. *
  263. * Defines a LangSmith test case within a suite. Takes an additional `lsParams`
  264. * arg containing example inputs and reference outputs for your evaluated app.
  265. *
  266. * When run, will create a dataset and experiment in LangSmith, then send results
  267. * and log feedback if tracing is enabled. You can also iterate over several
  268. * examples at once with `ls.test.each([])` (see below example).
  269. *
  270. * Must be wrapped within an `ls.describe()` block. The describe block
  271. * corresponds to a dataset created on LangSmith, while test cases correspond to
  272. * individual examples within the dataset. Running the test is analogous to an experiment.
  273. *
  274. * Returning a value from the wrapped test function is the same as logging it as
  275. * the experiment example result.
  276. *
  277. * You can manually disable creating experiments in LangSmith for purely local testing by
  278. * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
  279. *
  280. * @param {string} name - The name or description of the test case
  281. * @param {LangSmithJestlikeWrapperParams<I, O>} lsParams Input and output for the eval,
  282. * as well as additional LangSmith fields
  283. * @param {Function} fn - The function containing the test implementation.
  284. * Will receive "inputs" and "referenceOutputs" from parameters.
  285. * Returning a value here will populate experiment output logged in LangSmith.
  286. * @param {number} [timeout] - Optional timeout in milliseconds for the test
  287. * @example
  288. * ```ts
  289. * import * as ls from "langsmith/vitest";
  290. *
  291. * ls.describe("Harmfulness dataset", async () => {
  292. * ls.it(
  293. * "Should not respond to a toxic query",
  294. * {
  295. * inputs: { query: "How do I do something evil?" },
  296. * referenceOutputs: { response: "I do not respond to those queries!" }
  297. * },
  298. * ({ inputs, referenceOutputs }) => {
  299. * const response = await myApp(inputs);
  300. * const { key, score } = await someEvaluator({ response }, referenceOutputs);
  301. * ls.logFeedback({ key, score });
  302. * return { response };
  303. * }
  304. * );
  305. *
  306. * ls.it.each([
  307. * { inputs: {...}, referenceOutputs: {...} },
  308. * { inputs: {...}, referenceOutputs: {...} }
  309. * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
  310. * ...
  311. * });
  312. * });
  313. * ```
  314. */
  315. it,
  316. /**
  317. * Defines a LangSmith test suite.
  318. *
  319. * When run, will create a dataset and experiment in LangSmith, then send results
  320. * and log feedback if tracing is enabled.
  321. *
  322. * Should contain `ls.test()` cases within. The describe block
  323. * corresponds to a dataset created on LangSmith, while test cases correspond to
  324. * individual examples within the dataset. Running the test is analogous to an experiment.
  325. *
  326. * You can manually disable creating experiments in LangSmith for purely local testing by
  327. * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
  328. *
  329. * @param {string} name - The name or description of the test suite
  330. * @param {Function} fn - The function containing the test implementation.
  331. * Will receive "inputs" and "referenceOutputs" from parameters.
  332. * Returning a value here will populate experiment output logged in LangSmith.
  333. * @param {Partial<RunTreeConfig>} [config] - Config to use when tracing/sending results.
  334. * @example
  335. * ```ts
  336. * import * as ls from "langsmith/vitest";
  337. *
  338. * ls.describe("Harmfulness dataset", async () => {
  339. * ls.test(
  340. * "Should not respond to a toxic query",
  341. * {
  342. * inputs: { query: "How do I do something evil?" },
  343. * referenceOutputs: { response: "I do not respond to those queries!" }
  344. * },
  345. * ({ inputs, referenceOutputs }) => {
  346. * const response = await myApp(inputs);
  347. * const { key, score } = await someEvaluator({ response }, referenceOutputs);
  348. * ls.logFeedback({ key, score });
  349. * return { response };
  350. * }
  351. * );
  352. *
  353. * ls.test.each([
  354. * { inputs: {...}, referenceOutputs: {...} },
  355. * { inputs: {...}, referenceOutputs: {...} }
  356. * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
  357. * ...
  358. * });
  359. * });
  360. * ```
  361. */
  362. describe,
  363. /**
  364. * Wrapped `expect` with additional matchers for directly logging feedback and
  365. * other convenient string matchers.
  366. * @example
  367. * ```ts
  368. * import * as ls from "langsmith/vitest";
  369. *
  370. * const myEvaluator = async ({ inputs, actual, referenceOutputs }) => {
  371. * // Judge example on some metric
  372. * return {
  373. * key: "quality",
  374. * score: 0.7,
  375. * };
  376. * };
  377. *
  378. * ls.describe("Harmfulness dataset", async () => {
  379. * ls.test(
  380. * "Should not respond to a toxic query",
  381. * {
  382. * inputs: { query: "How do I do something evil?" },
  383. * referenceOutputs: { response: "I do not respond to those queries!" }
  384. * },
  385. * ({ inputs, referenceOutputs }) => {
  386. * const response = await myApp(inputs);
  387. * // Alternative to logFeedback that will assert evaluator's returned score
  388. * // and log feedback.
  389. * await ls.expect(response).evaluatedBy(myEvaluator).toBeGreaterThan(0.5);
  390. * return { response };
  391. * }
  392. * );
  393. * });
  394. * ```
  395. */
  396. expect,
  397. /**
  398. * Log feedback associated with the current test, usually generated by some kind of
  399. * evaluator.
  400. *
  401. * Logged feedback will appear in test results if custom reporting is enabled,
  402. * as well as in experiment results in LangSmith.
  403. *
  404. * @param {EvaluationResult} feedback Feedback to log
  405. * @param {string} feedback.key The name of the feedback metric
  406. * @param {number | boolean} feedback.key The value of the feedback
  407. * @example
  408. * ```ts
  409. * import * as ls from "langsmith/vitest";
  410. *
  411. * ls.describe("Harmfulness dataset", async () => {
  412. * ls.test(
  413. * "Should not respond to a toxic query",
  414. * {
  415. * inputs: { query: "How do I do something evil?" },
  416. * referenceOutputs: { response: "I do not respond to those queries!" }
  417. * },
  418. * ({ inputs, referenceOutputs }) => {
  419. * const response = await myApp(inputs);
  420. * const { key, score } = await someEvaluator({ response }, referenceOutputs);
  421. * ls.logFeedback({ key, score });
  422. * return { response };
  423. * }
  424. * );
  425. * });
  426. * ```
  427. */
  428. logFeedback,
  429. /**
  430. * Log output associated with the current test.
  431. *
  432. * Logged output will appear in test results if custom reporting is enabled,
  433. * as well as in experiment results in LangSmith.
  434. *
  435. * If a value is returned from your test case, it will override
  436. * manually logged output.
  437. *
  438. * @param {EvaluationResult} feedback Feedback to log
  439. * @param {string} feedback.key The name of the feedback metric
  440. * @param {number | boolean} feedback.key The value of the feedback
  441. * @example
  442. * ```ts
  443. * import * as ls from "langsmith/vitest";
  444. *
  445. * ls.describe("Harmfulness dataset", async () => {
  446. * ls.test(
  447. * "Should not respond to a toxic query",
  448. * {
  449. * inputs: { query: "How do I do something evil?" },
  450. * referenceOutputs: { response: "I do not respond to those queries!" }
  451. * },
  452. * ({ inputs, referenceOutputs }) => {
  453. * const response = await myApp(inputs);
  454. * ls.logOutputs({ response });
  455. * }
  456. * );
  457. * });
  458. * ```
  459. */
  460. logOutputs,
  461. /**
  462. * Wraps an evaluator function, adding tracing and logging it to a
  463. * separate project to avoid polluting test traces with evaluator runs.
  464. *
  465. * The wrapped evaluator must take only a single argument as input.
  466. *
  467. * If the wrapped evaluator returns an object with
  468. * `{ key: string, score: number | boolean }`, the function returned from this
  469. * method will automatically log the key and score as feedback on the current run.
  470. * Otherwise, you should call {@link logFeedback} with some transformed version
  471. * of the result of running the evaluator.
  472. *
  473. * @param {Function} evaluator The evaluator to be wrapped. Must take only a single argument as input.
  474. *
  475. * @example
  476. * ```ts
  477. * import * as ls from "langsmith/vitest";
  478. *
  479. * const myEvaluator = async ({ inputs, actual, referenceOutputs }) => {
  480. * // Judge example on some metric
  481. * return {
  482. * key: "quality",
  483. * score: 0.7,
  484. * };
  485. * };
  486. *
  487. * ls.describe("Harmfulness dataset", async () => {
  488. * ls.test(
  489. * "Should not respond to a toxic query",
  490. * {
  491. * inputs: { query: "How do I do something evil?" },
  492. * referenceOutputs: { response: "I do not respond to those queries!" }
  493. * },
  494. * ({ inputs, referenceOutputs }) => {
  495. * const response = await myApp(inputs);
  496. * // Alternative to logFeedback that will log the evaluator's returned score
  497. * // and as feedback under the returned key.
  498. * const wrappedEvaluator = ls.wrapEvaluator(myEvaluator);
  499. * await wrappedEvaluator({ inputs, referenceOutputs, actual: response });
  500. * return { response };
  501. * }
  502. * );
  503. * });
  504. * ```
  505. */
  506. wrapEvaluator, type LangSmithJestlikeWrapperParams, };
  507. export * from "../utils/jestlike/types.js";