index.d.ts 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. import { type AbsoluteCloseToMatcherOptions, type SemanticCloseToMatcherOptions, type RelativeCloseToMatcherOptions } from "../utils/jestlike/matchers.js";
  2. import type { SimpleEvaluator } from "../utils/jestlike/vendor/evaluatedBy.js";
  3. import { wrapEvaluator } from "../utils/jestlike/vendor/evaluatedBy.js";
  4. import { logFeedback, logOutputs } from "../utils/jestlike/index.js";
  5. import type { LangSmithJestlikeWrapperParams } from "../utils/jestlike/types.js";
  6. declare global {
  7. namespace jest {
  8. interface AsymmetricMatchers {
  9. toBeRelativeCloseTo(expected: string, options?: RelativeCloseToMatcherOptions): Promise<void>;
  10. toBeAbsoluteCloseTo(expected: string, options?: AbsoluteCloseToMatcherOptions): Promise<void>;
  11. toBeSemanticCloseTo(expected: string, options?: SemanticCloseToMatcherOptions): Promise<void>;
  12. }
  13. interface Matchers<R> {
  14. toBeRelativeCloseTo(expected: string, options?: RelativeCloseToMatcherOptions): Promise<R>;
  15. toBeAbsoluteCloseTo(expected: string, options?: AbsoluteCloseToMatcherOptions): Promise<R>;
  16. toBeSemanticCloseTo(expected: string, options?: SemanticCloseToMatcherOptions): Promise<R>;
  17. /**
  18. * Matcher that runs an evaluator with actual outputs and referenceOutputs from some run,
  19. * and asserts the evaluator's output `score` based on subsequent matchers.
  20. * Will also log feedback to LangSmith and to test results.
  21. *
  22. * Inputs come from the inputs passed to the test.
  23. *
  24. * @example
  25. * ```ts
  26. * import * as ls from "langsmith/jest";
  27. *
  28. * const myEvaluator = async ({ inputs, outputs, referenceOutputs }) => {
  29. * // Judge example on some metric
  30. * return {
  31. * key: "quality",
  32. * score: 0.7,
  33. * };
  34. * };
  35. *
  36. * ls.describe("Harmfulness dataset", async () => {
  37. * ls.test(
  38. * "Should not respond to a toxic query",
  39. * {
  40. * inputs: { query: "How do I do something evil?" },
  41. * referenceOutputs: { response: "I do not respond to those queries!" }
  42. * },
  43. * ({ inputs, referenceOutputs }) => {
  44. * const response = await myApp(inputs);
  45. * await ls.expect(response).evaluatedBy(myEvaluator).toBeGreaterThan(0.5);
  46. * return { response };
  47. * }
  48. * );
  49. * });
  50. * ```
  51. */
  52. evaluatedBy(evaluator: SimpleEvaluator): jest.Matchers<Promise<R>> & {
  53. not: jest.Matchers<Promise<R>>;
  54. resolves: jest.Matchers<Promise<R>>;
  55. rejects: jest.Matchers<Promise<R>>;
  56. };
  57. }
  58. }
  59. }
  60. declare const test: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  61. inputs: I;
  62. referenceOutputs?: O;
  63. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  64. only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  65. inputs: I;
  66. referenceOutputs?: O;
  67. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  68. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  69. inputs: I;
  70. referenceOutputs?: O;
  71. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  72. inputs: I;
  73. referenceOutputs?: O;
  74. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  75. };
  76. skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  77. inputs: I;
  78. referenceOutputs?: O;
  79. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  80. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  81. inputs: I;
  82. referenceOutputs?: O;
  83. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  84. inputs: I;
  85. referenceOutputs?: O;
  86. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  87. };
  88. concurrent: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  89. inputs: I;
  90. referenceOutputs?: O;
  91. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  92. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  93. inputs: I;
  94. referenceOutputs?: O;
  95. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  96. inputs: I;
  97. referenceOutputs?: O;
  98. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  99. only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  100. inputs: I;
  101. referenceOutputs?: O;
  102. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  103. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  104. inputs: I;
  105. referenceOutputs?: O;
  106. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  107. inputs: I;
  108. referenceOutputs?: O;
  109. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  110. };
  111. skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  112. inputs: I;
  113. referenceOutputs?: O;
  114. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  115. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  116. inputs: I;
  117. referenceOutputs?: O;
  118. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  119. inputs: I;
  120. referenceOutputs?: O;
  121. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  122. };
  123. };
  124. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  125. inputs: I;
  126. referenceOutputs?: O;
  127. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  128. inputs: I;
  129. referenceOutputs?: O;
  130. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  131. }, it: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  132. inputs: I;
  133. referenceOutputs?: O;
  134. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  135. only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  136. inputs: I;
  137. referenceOutputs?: O;
  138. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  139. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  140. inputs: I;
  141. referenceOutputs?: O;
  142. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  143. inputs: I;
  144. referenceOutputs?: O;
  145. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  146. };
  147. skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  148. inputs: I;
  149. referenceOutputs?: O;
  150. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  151. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  152. inputs: I;
  153. referenceOutputs?: O;
  154. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  155. inputs: I;
  156. referenceOutputs?: O;
  157. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  158. };
  159. concurrent: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  160. inputs: I;
  161. referenceOutputs?: O;
  162. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  163. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  164. inputs: I;
  165. referenceOutputs?: O;
  166. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  167. inputs: I;
  168. referenceOutputs?: O;
  169. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  170. only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  171. inputs: I;
  172. referenceOutputs?: O;
  173. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  174. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  175. inputs: I;
  176. referenceOutputs?: O;
  177. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  178. inputs: I;
  179. referenceOutputs?: O;
  180. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  181. };
  182. skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
  183. inputs: I;
  184. referenceOutputs?: O;
  185. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
  186. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  187. inputs: I;
  188. referenceOutputs?: O;
  189. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  190. inputs: I;
  191. referenceOutputs?: O;
  192. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  193. };
  194. };
  195. each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
  196. inputs: I;
  197. referenceOutputs?: O;
  198. } & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
  199. inputs: I;
  200. referenceOutputs?: O;
  201. } & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
  202. }, describe: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper & {
  203. only: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
  204. skip: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
  205. concurrent: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
  206. }, expect: jest.Expect;
  207. export {
  208. /**
  209. * Defines a LangSmith test case within a suite. Takes an additional `lsParams`
  210. * arg containing example inputs and reference outputs for your evaluated app.
  211. *
  212. * When run, will create a dataset and experiment in LangSmith, then send results
  213. * and log feedback if tracing is enabled. You can also iterate over several
  214. * examples at once with `ls.test.each([])` (see below example).
  215. *
  216. * Must be wrapped within an `ls.describe()` block. The describe block
  217. * corresponds to a dataset created on LangSmith, while test cases correspond to
  218. * individual examples within the dataset. Running the test is analogous to an experiment.
  219. *
  220. * Returning a value from the wrapped test function is the same as logging it as
  221. * the experiment example result.
  222. *
  223. * You can manually disable creating experiments in LangSmith for purely local testing by
  224. * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
  225. *
  226. * @param {string} name - The name or description of the test case
  227. * @param {LangSmithJestlikeWrapperParams<I, O>} lsParams Input and output for the eval,
  228. * as well as additional LangSmith fields
  229. * @param {Function} fn - The function containing the test implementation.
  230. * Will receive "inputs" and "referenceOutputs" from parameters.
  231. * Returning a value here will populate experiment output logged in LangSmith.
  232. * @param {number} [timeout] - Optional timeout in milliseconds for the test
  233. * @example
  234. * ```ts
  235. * import * as ls from "langsmith/jest";
  236. *
  237. * ls.describe("Harmfulness dataset", async () => {
  238. * ls.test(
  239. * "Should not respond to a toxic query",
  240. * {
  241. * inputs: { query: "How do I do something evil?" },
  242. * referenceOutputs: { response: "I do not respond to those queries!" }
  243. * },
  244. * ({ inputs, referenceOutputs }) => {
  245. * const response = await myApp(inputs);
  246. * const { key, score } = await someEvaluator({ response }, referenceOutputs);
  247. * ls.logFeedback({ key, score });
  248. * return { response };
  249. * }
  250. * );
  251. *
  252. * ls.test.each([
  253. * { inputs: {...}, referenceOutputs: {...} },
  254. * { inputs: {...}, referenceOutputs: {...} }
  255. * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
  256. * ...
  257. * });
  258. * });
  259. * ```
  260. */
  261. test,
  262. /**
  263. * Alias of `ls.test()`.
  264. *
  265. * Defines a LangSmith test case within a suite. Takes an additional `lsParams`
  266. * arg containing example inputs and reference outputs for your evaluated app.
  267. *
  268. * When run, will create a dataset and experiment in LangSmith, then send results
  269. * and log feedback if tracing is enabled. You can also iterate over several
  270. * examples at once with `ls.test.each([])` (see below example).
  271. *
  272. * Must be wrapped within an `ls.describe()` block. The describe block
  273. * corresponds to a dataset created on LangSmith, while test cases correspond to
  274. * individual examples within the dataset. Running the test is analogous to an experiment.
  275. *
  276. * Returning a value from the wrapped test function is the same as logging it as
  277. * the experiment example result.
  278. *
  279. * You can manually disable creating experiments in LangSmith for purely local testing by
  280. * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
  281. *
  282. * @param {string} name - The name or description of the test case
  283. * @param {LangSmithJestlikeWrapperParams<I, O>} lsParams Input and output for the eval,
  284. * as well as additional LangSmith fields
  285. * @param {Function} fn - The function containing the test implementation.
  286. * Will receive "inputs" and "referenceOutputs" from parameters.
  287. * Returning a value here will populate experiment output logged in LangSmith.
  288. * @param {number} [timeout] - Optional timeout in milliseconds for the test
  289. * @example
  290. * ```ts
  291. * import * as ls from "langsmith/jest";
  292. *
  293. * ls.describe("Harmfulness dataset", async () => {
  294. * ls.it(
  295. * "Should not respond to a toxic query",
  296. * {
  297. * inputs: { query: "How do I do something evil?" },
  298. * referenceOutputs: { response: "I do not respond to those queries!" }
  299. * },
  300. * ({ inputs, referenceOutputs }) => {
  301. * const response = await myApp(inputs);
  302. * const { key, score } = await someEvaluator({ response }, referenceOutputs);
  303. * ls.logFeedback({ key, score });
  304. * return { response };
  305. * }
  306. * );
  307. *
  308. * ls.it.each([
  309. * { inputs: {...}, referenceOutputs: {...} },
  310. * { inputs: {...}, referenceOutputs: {...} }
  311. * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
  312. * ...
  313. * });
  314. * });
  315. * ```
  316. */
  317. it,
  318. /**
  319. * Defines a LangSmith test suite.
  320. *
  321. * When run, will create a dataset and experiment in LangSmith, then send results
  322. * and log feedback if tracing is enabled.
  323. *
  324. * Should contain `ls.test()` cases within. The describe block
  325. * corresponds to a dataset created on LangSmith, while test cases correspond to
  326. * individual examples within the dataset. Running the test is analogous to an experiment.
  327. *
  328. * You can manually disable creating experiments in LangSmith for purely local testing by
  329. * setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
  330. *
  331. * @param {string} name - The name or description of the test suite
  332. * @param {Function} fn - The function containing the test implementation.
  333. * Will receive "inputs" and "referenceOutputs" from parameters.
  334. * Returning a value here will populate experiment output logged in LangSmith.
  335. * @param {Partial<RunTreeConfig>} [config] - Config to use when tracing/sending results.
  336. * @example
  337. * ```ts
  338. * import * as ls from "langsmith/jest";
  339. *
  340. * ls.describe("Harmfulness dataset", async () => {
  341. * ls.test(
  342. * "Should not respond to a toxic query",
  343. * {
  344. * inputs: { query: "How do I do something evil?" },
  345. * referenceOutputs: { response: "I do not respond to those queries!" }
  346. * },
  347. * ({ inputs, referenceOutputs }) => {
  348. * const response = await myApp(inputs);
  349. * const { key, score } = await someEvaluator({ response }, referenceOutputs);
  350. * ls.logFeedback({ key, score });
  351. * return { response };
  352. * }
  353. * );
  354. *
  355. * ls.test.each([
  356. * { inputs: {...}, referenceOutputs: {...} },
  357. * { inputs: {...}, referenceOutputs: {...} }
  358. * ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
  359. * ...
  360. * });
  361. * });
  362. * ```
  363. */
  364. describe,
  365. /**
  366. * Wrapped `expect` with additional matchers for directly logging feedback and
  367. * other convenient string matchers.
  368. * @example
  369. * ```ts
  370. * import * as ls from "langsmith/jest";
  371. *
  372. * const myEvaluator = async ({ inputs, outputs, referenceOutputs }) => {
  373. * // Judge example on some metric
  374. * return {
  375. * key: "quality",
  376. * score: 0.7,
  377. * };
  378. * };
  379. *
  380. * ls.describe("Harmfulness dataset", async () => {
  381. * ls.test(
  382. * "Should not respond to a toxic query",
  383. * {
  384. * inputs: { query: "How do I do something evil?" },
  385. * referenceOutputs: { response: "I do not respond to those queries!" }
  386. * },
  387. * ({ inputs, referenceOutputs }) => {
  388. * const response = await myApp(inputs);
  389. * // Alternative to logFeedback that will assert evaluator's returned score
  390. * // and log feedback.
  391. * await ls.expect(response).evaluatedBy(myEvaluator).toBeGreaterThan(0.5);
  392. * return { response };
  393. * }
  394. * );
  395. * });
  396. * ```
  397. */
  398. expect,
  399. /**
  400. * Log feedback associated with the current test, usually generated by some kind of
  401. * evaluator.
  402. *
  403. * Logged feedback will appear in test results if custom reporting is enabled,
  404. * as well as in experiment results in LangSmith.
  405. *
  406. * @param {EvaluationResult} feedback Feedback to log
  407. * @param {string} feedback.key The name of the feedback metric
  408. * @param {number | boolean} feedback.key The value of the feedback
  409. * @example
  410. * ```ts
  411. * import * as ls from "langsmith/jest";
  412. *
  413. * ls.describe("Harmfulness dataset", async () => {
  414. * ls.test(
  415. * "Should not respond to a toxic query",
  416. * {
  417. * inputs: { query: "How do I do something evil?" },
  418. * referenceOutputs: { response: "I do not respond to those queries!" }
  419. * },
  420. * ({ inputs, referenceOutputs }) => {
  421. * const response = await myApp(inputs);
  422. * const { key, score } = await someEvaluator({ response }, referenceOutputs);
  423. * ls.logFeedback({ key, score });
  424. * return { response };
  425. * }
  426. * );
  427. * });
  428. * ```
  429. */
  430. logFeedback,
  431. /**
  432. * Log output associated with the current test.
  433. *
  434. * Logged output will appear in test results if custom reporting is enabled,
  435. * as well as in experiment results in LangSmith.
  436. *
  437. * If a value is returned from your test case, it will override
  438. * manually logged output.
  439. *
  440. * @param {EvaluationResult} feedback Feedback to log
  441. * @param {string} feedback.key The name of the feedback metric
  442. * @param {number | boolean} feedback.key The value of the feedback
  443. * @example
  444. * ```ts
  445. * import * as ls from "langsmith/jest";
  446. *
  447. * ls.describe("Harmfulness dataset", async () => {
  448. * ls.test(
  449. * "Should not respond to a toxic query",
  450. * {
  451. * inputs: { query: "How do I do something evil?" },
  452. * referenceOutputs: { response: "I do not respond to those queries!" }
  453. * },
  454. * ({ inputs, referenceOutputs }) => {
  455. * const response = await myApp(inputs);
  456. * ls.logOutputs({ response });
  457. * }
  458. * );
  459. * });
  460. * ```
  461. */
  462. logOutputs,
  463. /**
  464. * Wraps an evaluator function, adding tracing and logging it to a
  465. * separate project to avoid polluting test traces with evaluator runs.
  466. *
  467. * The wrapped evaluator must take only a single argument as input.
  468. *
  469. * If the wrapped evaluator returns an object with
  470. * `{ key: string, score: number | boolean }`, the function returned from this
  471. * method will automatically log the key and score as feedback on the current run.
  472. * Otherwise, you should call {@link logFeedback} with some transformed version
  473. * of the result of running the evaluator.
  474. *
  475. * @param {Function} evaluator The evaluator to be wrapped. Must take only a single argument as input.
  476. *
  477. * @example
  478. * ```ts
  479. * import * as ls from "langsmith/jest";
  480. *
  481. * const myEvaluator = async ({ inputs, actual, referenceOutputs }) => {
  482. * // Judge example on some metric
  483. * return {
  484. * key: "quality",
  485. * score: 0.7,
  486. * };
  487. * };
  488. *
  489. * ls.describe("Harmfulness dataset", async () => {
  490. * ls.test(
  491. * "Should not respond to a toxic query",
  492. * {
  493. * inputs: { query: "How do I do something evil?" },
  494. * referenceOutputs: { response: "I do not respond to those queries!" }
  495. * },
  496. * ({ inputs, referenceOutputs }) => {
  497. * const response = await myApp(inputs);
  498. * // Alternative to logFeedback that will log the evaluator's returned score
  499. * // and as feedback under the returned key.
  500. * const wrappedEvaluator = ls.wrapEvaluator(myEvaluator);
  501. * await wrappedEvaluator({ inputs, referenceOutputs, actual: response });
  502. * return { response };
  503. * }
  504. * );
  505. * });
  506. * ```
  507. */
  508. wrapEvaluator, type LangSmithJestlikeWrapperParams, };
  509. export * from "../utils/jestlike/types.js";