_runner.js 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866
  1. import { Client } from "../index.js";
  2. import { traceable } from "../traceable.js";
  3. import { getDefaultRevisionId, getGitInfo } from "../utils/_git.js";
  4. import { assertUuid } from "../utils/_uuid.js";
  5. import { AsyncCaller } from "../utils/async_caller.js";
  6. import { atee } from "../utils/atee.js";
  7. import { getLangChainEnvVarsMetadata } from "../utils/env.js";
  8. import { printErrorStackTrace } from "../utils/error.js";
  9. import { randomName } from "./_random_name.js";
  10. import { runEvaluator, } from "./evaluator.js";
  11. import { v4 as uuidv4 } from "uuid";
  12. import { evaluateComparative, } from "./evaluate_comparative.js";
  13. // Implementation signature
  14. export function evaluate(target, options) {
  15. return _evaluate(target, options);
  16. }
  17. /**
  18. * Manage the execution of experiments.
  19. *
  20. * Supports lazily running predictions and evaluations in parallel to facilitate
  21. * result streaming and early debugging.
  22. */
  23. export class _ExperimentManager {
  24. get experimentName() {
  25. if (this._experimentName) {
  26. return this._experimentName;
  27. }
  28. else {
  29. throw new Error("Experiment name not provided, and experiment not yet started.");
  30. }
  31. }
  32. async getExamples() {
  33. if (!this._examples) {
  34. if (!this._data) {
  35. throw new Error("Data not provided in this experiment.");
  36. }
  37. const unresolvedData = _resolveData(this._data, {
  38. client: this.client,
  39. includeAttachments: this._includeAttachments,
  40. });
  41. if (!this._examples) {
  42. this._examples = [];
  43. }
  44. const exs = [];
  45. for await (const example of unresolvedData) {
  46. exs.push(example);
  47. }
  48. if (this._numRepetitions && this._numRepetitions > 0) {
  49. const repeatedExamples = [];
  50. for (let i = 0; i < this._numRepetitions; i++) {
  51. repeatedExamples.push(...exs);
  52. }
  53. this.setExamples(repeatedExamples);
  54. }
  55. else {
  56. this.setExamples(exs);
  57. }
  58. }
  59. return this._examples;
  60. }
  61. setExamples(examples) {
  62. this._examples = examples;
  63. }
  64. get datasetId() {
  65. return this.getExamples().then((examples) => {
  66. if (examples.length === 0) {
  67. throw new Error("No examples found in the dataset.");
  68. }
  69. if (this._experiment && this._experiment.reference_dataset_id) {
  70. return this._experiment.reference_dataset_id;
  71. }
  72. return examples[0].dataset_id;
  73. });
  74. }
  75. get evaluationResults() {
  76. if (this._evaluationResults === undefined) {
  77. return async function* () {
  78. for (const _ of await this.getExamples()) {
  79. yield { results: [] };
  80. }
  81. }.call(this);
  82. }
  83. else {
  84. return this._evaluationResults;
  85. }
  86. }
  87. get runs() {
  88. if (this._runsArray && this._runsArray.length > 0) {
  89. throw new Error("Runs already provided as an array.");
  90. }
  91. if (this._runs === undefined) {
  92. throw new Error("Runs not provided in this experiment. Please predict first.");
  93. }
  94. else {
  95. return this._runs;
  96. }
  97. }
  98. constructor(args) {
  99. Object.defineProperty(this, "_data", {
  100. enumerable: true,
  101. configurable: true,
  102. writable: true,
  103. value: void 0
  104. });
  105. Object.defineProperty(this, "_runs", {
  106. enumerable: true,
  107. configurable: true,
  108. writable: true,
  109. value: void 0
  110. });
  111. Object.defineProperty(this, "_evaluationResults", {
  112. enumerable: true,
  113. configurable: true,
  114. writable: true,
  115. value: void 0
  116. });
  117. Object.defineProperty(this, "_summaryResults", {
  118. enumerable: true,
  119. configurable: true,
  120. writable: true,
  121. value: void 0
  122. });
  123. Object.defineProperty(this, "_examples", {
  124. enumerable: true,
  125. configurable: true,
  126. writable: true,
  127. value: void 0
  128. });
  129. Object.defineProperty(this, "_numRepetitions", {
  130. enumerable: true,
  131. configurable: true,
  132. writable: true,
  133. value: void 0
  134. });
  135. Object.defineProperty(this, "_runsArray", {
  136. enumerable: true,
  137. configurable: true,
  138. writable: true,
  139. value: void 0
  140. });
  141. Object.defineProperty(this, "client", {
  142. enumerable: true,
  143. configurable: true,
  144. writable: true,
  145. value: void 0
  146. });
  147. Object.defineProperty(this, "_experiment", {
  148. enumerable: true,
  149. configurable: true,
  150. writable: true,
  151. value: void 0
  152. });
  153. Object.defineProperty(this, "_experimentName", {
  154. enumerable: true,
  155. configurable: true,
  156. writable: true,
  157. value: void 0
  158. });
  159. Object.defineProperty(this, "_metadata", {
  160. enumerable: true,
  161. configurable: true,
  162. writable: true,
  163. value: void 0
  164. });
  165. Object.defineProperty(this, "_description", {
  166. enumerable: true,
  167. configurable: true,
  168. writable: true,
  169. value: void 0
  170. });
  171. Object.defineProperty(this, "_includeAttachments", {
  172. enumerable: true,
  173. configurable: true,
  174. writable: true,
  175. value: void 0
  176. });
  177. this.client = args.client ?? new Client();
  178. if (!args.experiment) {
  179. this._experimentName = randomName();
  180. }
  181. else if (typeof args.experiment === "string") {
  182. this._experimentName = `${args.experiment}-${uuidv4().slice(0, 8)}`;
  183. }
  184. else {
  185. if (!args.experiment.name) {
  186. throw new Error("Experiment must have a name");
  187. }
  188. this._experimentName = args.experiment.name;
  189. this._experiment = args.experiment;
  190. }
  191. let metadata = args.metadata || {};
  192. if (!("revision_id" in metadata)) {
  193. metadata = {
  194. revision_id: getLangChainEnvVarsMetadata().revision_id,
  195. ...metadata,
  196. };
  197. }
  198. this._metadata = metadata;
  199. if (args.examples && args.examples.length) {
  200. this.setExamples(args.examples);
  201. }
  202. this._data = args.data;
  203. if (args._runsArray && args._runsArray.length) {
  204. this._runsArray = args._runsArray;
  205. }
  206. this._runs = args.runs;
  207. this._evaluationResults = args.evaluationResults;
  208. this._summaryResults = args.summaryResults;
  209. this._numRepetitions = args.numRepetitions;
  210. this._includeAttachments = args.includeAttachments;
  211. }
  212. _getExperiment() {
  213. if (!this._experiment) {
  214. throw new Error("Experiment not yet started.");
  215. }
  216. return this._experiment;
  217. }
  218. async _getExperimentMetadata() {
  219. let projectMetadata = this._metadata ?? {};
  220. const gitInfo = await getGitInfo();
  221. if (gitInfo) {
  222. projectMetadata = {
  223. ...projectMetadata,
  224. git: gitInfo,
  225. };
  226. }
  227. if (this._experiment) {
  228. const experimentMetadata = this._experiment.extra && "metadata" in this._experiment.extra
  229. ? this._experiment.extra.metadata
  230. : {};
  231. projectMetadata = {
  232. ...experimentMetadata,
  233. ...projectMetadata,
  234. };
  235. }
  236. return projectMetadata;
  237. }
  238. async _createProject(firstExample, projectMetadata) {
  239. // Create the project, updating the experimentName until we find a unique one.
  240. let project;
  241. const originalExperimentName = this._experimentName;
  242. for (let i = 0; i < 10; i++) {
  243. try {
  244. project = await this.client.createProject({
  245. projectName: this._experimentName,
  246. referenceDatasetId: firstExample.dataset_id,
  247. metadata: projectMetadata,
  248. description: this._description,
  249. });
  250. return project;
  251. }
  252. catch (e) {
  253. // Naming collision
  254. if (e?.name === "LangSmithConflictError") {
  255. const ent = uuidv4().slice(0, 6);
  256. this._experimentName = `${originalExperimentName}-${ent}`;
  257. }
  258. else {
  259. throw e;
  260. }
  261. }
  262. }
  263. throw new Error("Could not generate a unique experiment name within 10 attempts." +
  264. " Please try again with a different name.");
  265. }
  266. async _getProject(firstExample) {
  267. let project;
  268. if (!this._experiment) {
  269. const projectMetadata = await this._getExperimentMetadata();
  270. project = await this._createProject(firstExample, projectMetadata);
  271. this._experiment = project;
  272. }
  273. return this._experiment;
  274. }
  275. async _printExperimentStart() {
  276. console.log(`Starting evaluation of experiment: ${this.experimentName}`);
  277. const firstExample = this._examples?.[0];
  278. const datasetId = firstExample?.dataset_id;
  279. if (!datasetId || !this._experiment)
  280. return;
  281. const datasetUrl = await this.client.getDatasetUrl({ datasetId });
  282. const compareUrl = `${datasetUrl}/compare?selectedSessions=${this._experiment.id}`;
  283. console.log(`View results at ${compareUrl}`);
  284. }
  285. async start() {
  286. const examples = await this.getExamples();
  287. const firstExample = examples[0];
  288. const project = await this._getProject(firstExample);
  289. await this._printExperimentStart();
  290. this._metadata["num_repetitions"] = this._numRepetitions;
  291. return new _ExperimentManager({
  292. examples,
  293. experiment: project,
  294. metadata: this._metadata,
  295. client: this.client,
  296. evaluationResults: this._evaluationResults,
  297. summaryResults: this._summaryResults,
  298. includeAttachments: this._includeAttachments,
  299. });
  300. }
  301. async withPredictions(target, options) {
  302. const experimentResults = this._predict(target, options);
  303. return new _ExperimentManager({
  304. examples: await this.getExamples(),
  305. experiment: this._experiment,
  306. metadata: this._metadata,
  307. client: this.client,
  308. runs: (async function* () {
  309. for await (const pred of experimentResults) {
  310. yield pred.run;
  311. }
  312. })(),
  313. includeAttachments: this._includeAttachments,
  314. });
  315. }
  316. async withEvaluators(evaluators, options) {
  317. const resolvedEvaluators = _resolveEvaluators(evaluators);
  318. const experimentResults = this._score(resolvedEvaluators, options);
  319. const [r1, r2] = atee(experimentResults);
  320. return new _ExperimentManager({
  321. examples: await this.getExamples(),
  322. experiment: this._experiment,
  323. metadata: this._metadata,
  324. client: this.client,
  325. runs: (async function* () {
  326. for await (const result of r1) {
  327. yield result.run;
  328. }
  329. })(),
  330. evaluationResults: (async function* () {
  331. for await (const result of r2) {
  332. yield result.evaluationResults;
  333. }
  334. })(),
  335. summaryResults: this._summaryResults,
  336. includeAttachments: this._includeAttachments,
  337. });
  338. }
  339. async withSummaryEvaluators(summaryEvaluators) {
  340. const aggregateFeedbackGen = this._applySummaryEvaluators(summaryEvaluators);
  341. return new _ExperimentManager({
  342. examples: await this.getExamples(),
  343. experiment: this._experiment,
  344. metadata: this._metadata,
  345. client: this.client,
  346. runs: this.runs,
  347. _runsArray: this._runsArray,
  348. evaluationResults: this._evaluationResults,
  349. summaryResults: aggregateFeedbackGen,
  350. includeAttachments: this._includeAttachments,
  351. });
  352. }
  353. async *getResults() {
  354. const examples = await this.getExamples();
  355. const evaluationResults = [];
  356. if (!this._runsArray) {
  357. this._runsArray = [];
  358. for await (const run of this.runs) {
  359. this._runsArray.push(run);
  360. }
  361. }
  362. for await (const evaluationResult of this.evaluationResults) {
  363. evaluationResults.push(evaluationResult);
  364. }
  365. for (let i = 0; i < this._runsArray.length; i++) {
  366. yield {
  367. run: this._runsArray[i],
  368. example: examples[i],
  369. evaluationResults: evaluationResults[i],
  370. };
  371. }
  372. }
  373. async getSummaryScores() {
  374. if (!this._summaryResults) {
  375. return { results: [] };
  376. }
  377. const results = [];
  378. for await (const evaluationResultsGenerator of this._summaryResults) {
  379. if (typeof evaluationResultsGenerator === "function") {
  380. // This is because runs array is not available until after this generator
  381. // is set, so we need to pass it like so.
  382. for await (const evaluationResults of evaluationResultsGenerator(this._runsArray ?? [])) {
  383. results.push(...evaluationResults.results);
  384. }
  385. }
  386. }
  387. return { results };
  388. }
  389. // Private methods
  390. /**
  391. * Run the target function or runnable on the examples.
  392. * @param {StandardTargetT} target The target function or runnable to evaluate.
  393. * @param options
  394. * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
  395. */
  396. async *_predict(target, options) {
  397. const maxConcurrency = options?.maxConcurrency ?? 0;
  398. const examples = await this.getExamples();
  399. if (maxConcurrency === 0) {
  400. for (const example of examples) {
  401. yield await _forward(target, example, this.experimentName, this._metadata, this.client, this._includeAttachments);
  402. }
  403. }
  404. else {
  405. const caller = new AsyncCaller({
  406. maxConcurrency,
  407. debug: this.client.debug,
  408. });
  409. const futures = [];
  410. for await (const example of examples) {
  411. futures.push(caller.call(_forward, target, example, this.experimentName, this._metadata, this.client, this._includeAttachments));
  412. }
  413. for await (const future of futures) {
  414. yield future;
  415. }
  416. }
  417. // Close out the project.
  418. await this._end();
  419. }
  420. async _runEvaluators(evaluators, currentResults, fields) {
  421. const { run, example, evaluationResults } = currentResults;
  422. for (const evaluator of evaluators) {
  423. try {
  424. const options = {
  425. reference_example_id: example.id,
  426. project_name: "evaluators",
  427. metadata: {
  428. example_version: example.modified_at
  429. ? new Date(example.modified_at).toISOString()
  430. : new Date(example.created_at).toISOString(),
  431. },
  432. client: fields.client,
  433. tracingEnabled: true,
  434. };
  435. const evaluatorResponse = await evaluator.evaluateRun(run, example, options);
  436. evaluationResults.results.push(...(await fields.client.logEvaluationFeedback(evaluatorResponse, run)));
  437. }
  438. catch (e) {
  439. console.error(`Error running evaluator ${evaluator.evaluateRun.name} on run ${run.id}: ${e}`);
  440. printErrorStackTrace(e);
  441. }
  442. }
  443. return {
  444. run,
  445. example,
  446. evaluationResults,
  447. };
  448. }
  449. /**
  450. * Run the evaluators on the prediction stream.
  451. * Expects runs to be available in the manager.
  452. * (e.g. from a previous prediction step)
  453. * @param {Array<RunEvaluator>} evaluators
  454. * @param {number} maxConcurrency
  455. */
  456. async *_score(evaluators, options) {
  457. const { maxConcurrency = 0 } = options || {};
  458. if (maxConcurrency === 0) {
  459. for await (const currentResults of this.getResults()) {
  460. yield this._runEvaluators(evaluators, currentResults, {
  461. client: this.client,
  462. });
  463. }
  464. }
  465. else {
  466. const caller = new AsyncCaller({
  467. maxConcurrency,
  468. debug: this.client.debug,
  469. });
  470. const futures = [];
  471. for await (const currentResults of this.getResults()) {
  472. futures.push(caller.call(this._runEvaluators, evaluators, currentResults, {
  473. client: this.client,
  474. }));
  475. }
  476. for (const result of futures) {
  477. yield result;
  478. }
  479. }
  480. }
  481. async *_applySummaryEvaluators(summaryEvaluators) {
  482. const projectId = this._getExperiment().id;
  483. const examples = await this.getExamples();
  484. const options = Array.from({ length: summaryEvaluators.length }).map(() => ({
  485. project_name: "evaluators",
  486. experiment: this.experimentName,
  487. projectId: projectId,
  488. }));
  489. const wrappedEvaluators = await wrapSummaryEvaluators(summaryEvaluators, options);
  490. yield async function* (runsArray) {
  491. const aggregateFeedback = [];
  492. for (const evaluator of wrappedEvaluators) {
  493. try {
  494. const summaryEvalResult = await evaluator(runsArray, examples);
  495. const flattenedResults = this.client._selectEvalResults(summaryEvalResult);
  496. aggregateFeedback.push(...flattenedResults);
  497. for (const result of flattenedResults) {
  498. // eslint-disable-next-line @typescript-eslint/no-unused-vars
  499. const { targetRunId, key, ...feedback } = result;
  500. const evaluatorInfo = feedback.evaluatorInfo;
  501. delete feedback.evaluatorInfo;
  502. await this.client.createFeedback(null, key, {
  503. ...feedback,
  504. projectId: projectId,
  505. sourceInfo: evaluatorInfo,
  506. });
  507. }
  508. }
  509. catch (e) {
  510. console.error(`Error running summary evaluator ${evaluator.name}: ${JSON.stringify(e, null, 2)}`);
  511. printErrorStackTrace(e);
  512. }
  513. }
  514. yield {
  515. results: aggregateFeedback,
  516. };
  517. }.bind(this);
  518. }
  519. async _getDatasetVersion() {
  520. const examples = await this.getExamples();
  521. const modifiedAt = examples.map((ex) => ex.modified_at);
  522. // Python might return microseconds, which we need
  523. // to account for when comparing dates.
  524. const modifiedAtTime = modifiedAt.map((date) => {
  525. function getMiliseconds(isoString) {
  526. const time = isoString.split("T").at(1);
  527. if (!time)
  528. return "";
  529. const regex = /[0-9]{2}:[0-9]{2}:[0-9]{2}.([0-9]+)/;
  530. const strMiliseconds = time.match(regex)?.[1];
  531. return strMiliseconds ?? "";
  532. }
  533. const jsDate = new Date(date);
  534. let source = getMiliseconds(date);
  535. let parsed = getMiliseconds(jsDate.toISOString());
  536. const length = Math.max(source.length, parsed.length);
  537. source = source.padEnd(length, "0");
  538. parsed = parsed.padEnd(length, "0");
  539. const microseconds = (Number.parseInt(source, 10) - Number.parseInt(parsed, 10)) / 1000;
  540. const time = jsDate.getTime() + microseconds;
  541. return { date, time };
  542. });
  543. if (modifiedAtTime.length === 0)
  544. return undefined;
  545. return modifiedAtTime.reduce((max, current) => (current.time > max.time ? current : max), modifiedAtTime[0]).date;
  546. }
  547. async _getDatasetSplits() {
  548. const examples = await this.getExamples();
  549. const allSplits = examples.reduce((acc, ex) => {
  550. if (ex.metadata && ex.metadata.dataset_split) {
  551. if (Array.isArray(ex.metadata.dataset_split)) {
  552. ex.metadata.dataset_split.forEach((split) => acc.add(split));
  553. }
  554. else if (typeof ex.metadata.dataset_split === "string") {
  555. acc.add(ex.metadata.dataset_split);
  556. }
  557. }
  558. return acc;
  559. }, new Set());
  560. return allSplits.size ? Array.from(allSplits) : undefined;
  561. }
  562. async _end() {
  563. const experiment = this._experiment;
  564. if (!experiment) {
  565. throw new Error("Experiment not yet started.");
  566. }
  567. const projectMetadata = await this._getExperimentMetadata();
  568. projectMetadata["dataset_version"] = await this._getDatasetVersion();
  569. projectMetadata["dataset_splits"] = await this._getDatasetSplits();
  570. // Update revision_id if not already set
  571. if (!projectMetadata["revision_id"]) {
  572. projectMetadata["revision_id"] = await getDefaultRevisionId();
  573. }
  574. await this.client.updateProject(experiment.id, {
  575. metadata: projectMetadata,
  576. });
  577. }
  578. }
  579. /**
  580. * Represents the results of an evaluate() call.
  581. * This class provides an iterator interface to iterate over the experiment results
  582. * as they become available. It also provides methods to access the experiment name,
  583. * the number of results, and to wait for the results to be processed.
  584. */
  585. class ExperimentResults {
  586. constructor(experimentManager) {
  587. Object.defineProperty(this, "manager", {
  588. enumerable: true,
  589. configurable: true,
  590. writable: true,
  591. value: void 0
  592. });
  593. Object.defineProperty(this, "results", {
  594. enumerable: true,
  595. configurable: true,
  596. writable: true,
  597. value: []
  598. });
  599. Object.defineProperty(this, "processedCount", {
  600. enumerable: true,
  601. configurable: true,
  602. writable: true,
  603. value: 0
  604. });
  605. Object.defineProperty(this, "summaryResults", {
  606. enumerable: true,
  607. configurable: true,
  608. writable: true,
  609. value: void 0
  610. });
  611. this.manager = experimentManager;
  612. }
  613. get experimentName() {
  614. return this.manager.experimentName;
  615. }
  616. [Symbol.asyncIterator]() {
  617. return this;
  618. }
  619. async next() {
  620. if (this.processedCount < this.results.length) {
  621. const result = this.results[this.processedCount];
  622. this.processedCount++;
  623. return Promise.resolve({ value: result, done: false });
  624. }
  625. else {
  626. return Promise.resolve({ value: undefined, done: true });
  627. }
  628. }
  629. async processData(manager) {
  630. for await (const item of manager.getResults()) {
  631. this.results.push(item);
  632. this.processedCount++;
  633. }
  634. this.summaryResults = await manager.getSummaryScores();
  635. }
  636. get length() {
  637. return this.results.length;
  638. }
  639. }
  640. async function _evaluate(target, fields) {
  641. // Add check for comparative evaluation
  642. if (Array.isArray(target)) {
  643. const comparativeOptions = fields;
  644. if (!comparativeOptions.evaluators) {
  645. throw new Error("Evaluators are required for comparative evaluation");
  646. }
  647. return evaluateComparative(target, {
  648. evaluators: comparativeOptions.evaluators,
  649. client: comparativeOptions.client,
  650. metadata: comparativeOptions.metadata,
  651. experimentPrefix: comparativeOptions.experimentPrefix,
  652. description: comparativeOptions.description,
  653. maxConcurrency: comparativeOptions.maxConcurrency,
  654. loadNested: comparativeOptions.loadNested ?? false,
  655. randomizeOrder: comparativeOptions.randomizeOrder ?? false,
  656. });
  657. }
  658. const client = fields.client ?? new Client();
  659. const runs = _isCallable(target) ? null : target;
  660. const standardFields = fields;
  661. const [experiment_, newRuns] = await _resolveExperiment(fields.experiment ?? null, runs, client);
  662. let manager = await new _ExperimentManager({
  663. data: Array.isArray(standardFields.data) ? undefined : standardFields.data,
  664. examples: Array.isArray(standardFields.data)
  665. ? standardFields.data
  666. : undefined,
  667. client,
  668. metadata: fields.metadata,
  669. experiment: experiment_ ?? fields.experimentPrefix,
  670. runs: newRuns ?? undefined,
  671. numRepetitions: fields.numRepetitions ?? 1,
  672. includeAttachments: standardFields.includeAttachments,
  673. }).start();
  674. if (_isCallable(target)) {
  675. manager = await manager.withPredictions(target, {
  676. maxConcurrency: fields.maxConcurrency,
  677. });
  678. }
  679. if (standardFields.evaluators) {
  680. manager = await manager.withEvaluators(standardFields.evaluators, {
  681. maxConcurrency: fields.maxConcurrency,
  682. });
  683. }
  684. if (standardFields.summaryEvaluators) {
  685. manager = await manager.withSummaryEvaluators(standardFields.summaryEvaluators);
  686. }
  687. // Start consuming the results.
  688. const results = new ExperimentResults(manager);
  689. await results.processData(manager);
  690. return results;
  691. }
  692. async function _forward(fn, example, experimentName, metadata, client, includeAttachments) {
  693. let run = null;
  694. const _getRun = (r) => {
  695. run = r;
  696. };
  697. const options = {
  698. reference_example_id: example.id,
  699. on_end: _getRun,
  700. project_name: experimentName,
  701. metadata: {
  702. ...metadata,
  703. example_version: example.modified_at
  704. ? new Date(example.modified_at).toISOString()
  705. : new Date(example.created_at).toISOString(),
  706. },
  707. client,
  708. tracingEnabled: true,
  709. };
  710. const wrappedFn = "invoke" in fn
  711. ? traceable(async (inputs) => {
  712. let langChainCallbacks;
  713. try {
  714. // TODO: Deprecate this and rely on interop on 0.2 minor bump.
  715. const { getLangchainCallbacks } = await import("../langchain.js");
  716. langChainCallbacks = await getLangchainCallbacks();
  717. }
  718. catch {
  719. // no-op
  720. }
  721. // Issue with retrieving LangChain callbacks, rely on interop
  722. if (langChainCallbacks === undefined && !includeAttachments) {
  723. return await fn.invoke(inputs);
  724. }
  725. else if (langChainCallbacks === undefined && includeAttachments) {
  726. return await fn.invoke(inputs, {
  727. attachments: example.attachments,
  728. });
  729. }
  730. else if (!includeAttachments) {
  731. return await fn.invoke(inputs, { callbacks: langChainCallbacks });
  732. }
  733. else {
  734. return await fn.invoke(inputs, {
  735. attachments: example.attachments,
  736. callbacks: langChainCallbacks,
  737. });
  738. }
  739. }, options)
  740. : traceable(fn, options);
  741. try {
  742. if (includeAttachments && !("invoke" in fn)) {
  743. await wrappedFn(example.inputs, { attachments: example.attachments });
  744. }
  745. else {
  746. await wrappedFn(example.inputs);
  747. }
  748. }
  749. catch (e) {
  750. console.error(`Error running target function: ${e}`);
  751. printErrorStackTrace(e);
  752. }
  753. if (!run) {
  754. throw new Error(`Run not created by target function.
  755. This is most likely due to tracing not being enabled.\n
  756. Try setting "LANGSMITH_TRACING=true" in your environment.`);
  757. }
  758. return {
  759. run,
  760. example,
  761. };
  762. }
  763. function _resolveData(data, options) {
  764. let isUUID = false;
  765. try {
  766. if (typeof data === "string") {
  767. assertUuid(data);
  768. isUUID = true;
  769. }
  770. }
  771. catch (_) {
  772. isUUID = false;
  773. }
  774. if (typeof data === "string" && isUUID) {
  775. return options.client.listExamples({
  776. datasetId: data,
  777. includeAttachments: options.includeAttachments,
  778. });
  779. }
  780. if (typeof data === "string") {
  781. return options.client.listExamples({
  782. datasetName: data,
  783. includeAttachments: options.includeAttachments,
  784. });
  785. }
  786. return data;
  787. }
  788. async function wrapSummaryEvaluators(evaluators, optionsArray) {
  789. async function _wrap(evaluator) {
  790. const evalName = evaluator.name || "BatchEvaluator";
  791. const wrapperInner = (runs, examples) => {
  792. const wrapperSuperInner = traceable((_runs_, _examples_) => {
  793. // Check if the evaluator expects an object parameter
  794. if (evaluator.length === 1) {
  795. const inputs = examples.map((ex) => ex.inputs);
  796. const outputs = runs.map((run) => run.outputs || {});
  797. const referenceOutputs = examples.map((ex) => ex.outputs || {});
  798. return Promise.resolve(evaluator({
  799. runs,
  800. examples,
  801. inputs,
  802. outputs,
  803. referenceOutputs,
  804. }));
  805. }
  806. // Otherwise use the traditional (runs, examples) signature
  807. return Promise.resolve(evaluator(runs, examples));
  808. }, { ...optionsArray, name: evalName });
  809. return Promise.resolve(wrapperSuperInner(`Runs[] (Length=${runs.length})`, `Examples[] (Length=${examples.length})`));
  810. };
  811. return wrapperInner;
  812. }
  813. const results = [];
  814. for (let i = 0; i < evaluators.length; i++) {
  815. results.push(await _wrap(evaluators[i]));
  816. }
  817. return results;
  818. }
  819. function _resolveEvaluators(evaluators) {
  820. const results = [];
  821. for (const evaluator of evaluators) {
  822. if ("evaluateRun" in evaluator) {
  823. results.push(evaluator);
  824. // todo fix this by porting LangChainStringEvaluator to langsmith sdk
  825. }
  826. else if (evaluator.name === "LangChainStringEvaluator") {
  827. throw new Error("Not yet implemented");
  828. }
  829. else {
  830. results.push(runEvaluator(evaluator));
  831. }
  832. }
  833. return results;
  834. }
  835. async function _resolveExperiment(experiment, runs, client) {
  836. // TODO: Remove this, handle outside the manager
  837. if (experiment !== null) {
  838. if (!experiment.name) {
  839. throw new Error("Experiment name must be defined if provided.");
  840. }
  841. return [experiment, undefined];
  842. }
  843. // If we have runs, that means the experiment was already started.
  844. if (runs !== null) {
  845. const results = [];
  846. for await (const item of atee(runs)) {
  847. results.push(item);
  848. }
  849. const [runsClone, runsOriginal] = results;
  850. const runsCloneIterator = runsClone[Symbol.asyncIterator]();
  851. // todo: this is `any`. does it work properly?
  852. const firstRun = await runsCloneIterator
  853. .next()
  854. .then((result) => result.value);
  855. const retrievedExperiment = await client.readProject(firstRun.sessionId);
  856. if (!retrievedExperiment.name) {
  857. throw new Error("Experiment name not found for provided runs.");
  858. }
  859. return [retrievedExperiment, runsOriginal];
  860. }
  861. return [undefined, undefined];
  862. }
  863. function _isCallable(target) {
  864. return Boolean(typeof target === "function" ||
  865. ("invoke" in target && typeof target.invoke === "function"));
  866. }