_runner.cjs 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports._ExperimentManager = void 0;
  4. exports.evaluate = evaluate;
  5. const index_js_1 = require("../index.cjs");
  6. const traceable_js_1 = require("../traceable.cjs");
  7. const _git_js_1 = require("../utils/_git.cjs");
  8. const _uuid_js_1 = require("../utils/_uuid.cjs");
  9. const async_caller_js_1 = require("../utils/async_caller.cjs");
  10. const atee_js_1 = require("../utils/atee.cjs");
  11. const env_js_1 = require("../utils/env.cjs");
  12. const error_js_1 = require("../utils/error.cjs");
  13. const _random_name_js_1 = require("./_random_name.cjs");
  14. const evaluator_js_1 = require("./evaluator.cjs");
  15. const uuid_1 = require("uuid");
  16. const evaluate_comparative_js_1 = require("./evaluate_comparative.cjs");
  17. // Implementation signature
  18. function evaluate(target, options) {
  19. return _evaluate(target, options);
  20. }
  21. /**
  22. * Manage the execution of experiments.
  23. *
  24. * Supports lazily running predictions and evaluations in parallel to facilitate
  25. * result streaming and early debugging.
  26. */
  27. class _ExperimentManager {
  28. get experimentName() {
  29. if (this._experimentName) {
  30. return this._experimentName;
  31. }
  32. else {
  33. throw new Error("Experiment name not provided, and experiment not yet started.");
  34. }
  35. }
  36. async getExamples() {
  37. if (!this._examples) {
  38. if (!this._data) {
  39. throw new Error("Data not provided in this experiment.");
  40. }
  41. const unresolvedData = _resolveData(this._data, {
  42. client: this.client,
  43. includeAttachments: this._includeAttachments,
  44. });
  45. if (!this._examples) {
  46. this._examples = [];
  47. }
  48. const exs = [];
  49. for await (const example of unresolvedData) {
  50. exs.push(example);
  51. }
  52. if (this._numRepetitions && this._numRepetitions > 0) {
  53. const repeatedExamples = [];
  54. for (let i = 0; i < this._numRepetitions; i++) {
  55. repeatedExamples.push(...exs);
  56. }
  57. this.setExamples(repeatedExamples);
  58. }
  59. else {
  60. this.setExamples(exs);
  61. }
  62. }
  63. return this._examples;
  64. }
  65. setExamples(examples) {
  66. this._examples = examples;
  67. }
  68. get datasetId() {
  69. return this.getExamples().then((examples) => {
  70. if (examples.length === 0) {
  71. throw new Error("No examples found in the dataset.");
  72. }
  73. if (this._experiment && this._experiment.reference_dataset_id) {
  74. return this._experiment.reference_dataset_id;
  75. }
  76. return examples[0].dataset_id;
  77. });
  78. }
  79. get evaluationResults() {
  80. if (this._evaluationResults === undefined) {
  81. return async function* () {
  82. for (const _ of await this.getExamples()) {
  83. yield { results: [] };
  84. }
  85. }.call(this);
  86. }
  87. else {
  88. return this._evaluationResults;
  89. }
  90. }
  91. get runs() {
  92. if (this._runsArray && this._runsArray.length > 0) {
  93. throw new Error("Runs already provided as an array.");
  94. }
  95. if (this._runs === undefined) {
  96. throw new Error("Runs not provided in this experiment. Please predict first.");
  97. }
  98. else {
  99. return this._runs;
  100. }
  101. }
  102. constructor(args) {
  103. Object.defineProperty(this, "_data", {
  104. enumerable: true,
  105. configurable: true,
  106. writable: true,
  107. value: void 0
  108. });
  109. Object.defineProperty(this, "_runs", {
  110. enumerable: true,
  111. configurable: true,
  112. writable: true,
  113. value: void 0
  114. });
  115. Object.defineProperty(this, "_evaluationResults", {
  116. enumerable: true,
  117. configurable: true,
  118. writable: true,
  119. value: void 0
  120. });
  121. Object.defineProperty(this, "_summaryResults", {
  122. enumerable: true,
  123. configurable: true,
  124. writable: true,
  125. value: void 0
  126. });
  127. Object.defineProperty(this, "_examples", {
  128. enumerable: true,
  129. configurable: true,
  130. writable: true,
  131. value: void 0
  132. });
  133. Object.defineProperty(this, "_numRepetitions", {
  134. enumerable: true,
  135. configurable: true,
  136. writable: true,
  137. value: void 0
  138. });
  139. Object.defineProperty(this, "_runsArray", {
  140. enumerable: true,
  141. configurable: true,
  142. writable: true,
  143. value: void 0
  144. });
  145. Object.defineProperty(this, "client", {
  146. enumerable: true,
  147. configurable: true,
  148. writable: true,
  149. value: void 0
  150. });
  151. Object.defineProperty(this, "_experiment", {
  152. enumerable: true,
  153. configurable: true,
  154. writable: true,
  155. value: void 0
  156. });
  157. Object.defineProperty(this, "_experimentName", {
  158. enumerable: true,
  159. configurable: true,
  160. writable: true,
  161. value: void 0
  162. });
  163. Object.defineProperty(this, "_metadata", {
  164. enumerable: true,
  165. configurable: true,
  166. writable: true,
  167. value: void 0
  168. });
  169. Object.defineProperty(this, "_description", {
  170. enumerable: true,
  171. configurable: true,
  172. writable: true,
  173. value: void 0
  174. });
  175. Object.defineProperty(this, "_includeAttachments", {
  176. enumerable: true,
  177. configurable: true,
  178. writable: true,
  179. value: void 0
  180. });
  181. this.client = args.client ?? new index_js_1.Client();
  182. if (!args.experiment) {
  183. this._experimentName = (0, _random_name_js_1.randomName)();
  184. }
  185. else if (typeof args.experiment === "string") {
  186. this._experimentName = `${args.experiment}-${(0, uuid_1.v4)().slice(0, 8)}`;
  187. }
  188. else {
  189. if (!args.experiment.name) {
  190. throw new Error("Experiment must have a name");
  191. }
  192. this._experimentName = args.experiment.name;
  193. this._experiment = args.experiment;
  194. }
  195. let metadata = args.metadata || {};
  196. if (!("revision_id" in metadata)) {
  197. metadata = {
  198. revision_id: (0, env_js_1.getLangChainEnvVarsMetadata)().revision_id,
  199. ...metadata,
  200. };
  201. }
  202. this._metadata = metadata;
  203. if (args.examples && args.examples.length) {
  204. this.setExamples(args.examples);
  205. }
  206. this._data = args.data;
  207. if (args._runsArray && args._runsArray.length) {
  208. this._runsArray = args._runsArray;
  209. }
  210. this._runs = args.runs;
  211. this._evaluationResults = args.evaluationResults;
  212. this._summaryResults = args.summaryResults;
  213. this._numRepetitions = args.numRepetitions;
  214. this._includeAttachments = args.includeAttachments;
  215. }
  216. _getExperiment() {
  217. if (!this._experiment) {
  218. throw new Error("Experiment not yet started.");
  219. }
  220. return this._experiment;
  221. }
  222. async _getExperimentMetadata() {
  223. let projectMetadata = this._metadata ?? {};
  224. const gitInfo = await (0, _git_js_1.getGitInfo)();
  225. if (gitInfo) {
  226. projectMetadata = {
  227. ...projectMetadata,
  228. git: gitInfo,
  229. };
  230. }
  231. if (this._experiment) {
  232. const experimentMetadata = this._experiment.extra && "metadata" in this._experiment.extra
  233. ? this._experiment.extra.metadata
  234. : {};
  235. projectMetadata = {
  236. ...experimentMetadata,
  237. ...projectMetadata,
  238. };
  239. }
  240. return projectMetadata;
  241. }
  242. async _createProject(firstExample, projectMetadata) {
  243. // Create the project, updating the experimentName until we find a unique one.
  244. let project;
  245. const originalExperimentName = this._experimentName;
  246. for (let i = 0; i < 10; i++) {
  247. try {
  248. project = await this.client.createProject({
  249. projectName: this._experimentName,
  250. referenceDatasetId: firstExample.dataset_id,
  251. metadata: projectMetadata,
  252. description: this._description,
  253. });
  254. return project;
  255. }
  256. catch (e) {
  257. // Naming collision
  258. if (e?.name === "LangSmithConflictError") {
  259. const ent = (0, uuid_1.v4)().slice(0, 6);
  260. this._experimentName = `${originalExperimentName}-${ent}`;
  261. }
  262. else {
  263. throw e;
  264. }
  265. }
  266. }
  267. throw new Error("Could not generate a unique experiment name within 10 attempts." +
  268. " Please try again with a different name.");
  269. }
  270. async _getProject(firstExample) {
  271. let project;
  272. if (!this._experiment) {
  273. const projectMetadata = await this._getExperimentMetadata();
  274. project = await this._createProject(firstExample, projectMetadata);
  275. this._experiment = project;
  276. }
  277. return this._experiment;
  278. }
  279. async _printExperimentStart() {
  280. console.log(`Starting evaluation of experiment: ${this.experimentName}`);
  281. const firstExample = this._examples?.[0];
  282. const datasetId = firstExample?.dataset_id;
  283. if (!datasetId || !this._experiment)
  284. return;
  285. const datasetUrl = await this.client.getDatasetUrl({ datasetId });
  286. const compareUrl = `${datasetUrl}/compare?selectedSessions=${this._experiment.id}`;
  287. console.log(`View results at ${compareUrl}`);
  288. }
  289. async start() {
  290. const examples = await this.getExamples();
  291. const firstExample = examples[0];
  292. const project = await this._getProject(firstExample);
  293. await this._printExperimentStart();
  294. this._metadata["num_repetitions"] = this._numRepetitions;
  295. return new _ExperimentManager({
  296. examples,
  297. experiment: project,
  298. metadata: this._metadata,
  299. client: this.client,
  300. evaluationResults: this._evaluationResults,
  301. summaryResults: this._summaryResults,
  302. includeAttachments: this._includeAttachments,
  303. });
  304. }
  305. async withPredictions(target, options) {
  306. const experimentResults = this._predict(target, options);
  307. return new _ExperimentManager({
  308. examples: await this.getExamples(),
  309. experiment: this._experiment,
  310. metadata: this._metadata,
  311. client: this.client,
  312. runs: (async function* () {
  313. for await (const pred of experimentResults) {
  314. yield pred.run;
  315. }
  316. })(),
  317. includeAttachments: this._includeAttachments,
  318. });
  319. }
  320. async withEvaluators(evaluators, options) {
  321. const resolvedEvaluators = _resolveEvaluators(evaluators);
  322. const experimentResults = this._score(resolvedEvaluators, options);
  323. const [r1, r2] = (0, atee_js_1.atee)(experimentResults);
  324. return new _ExperimentManager({
  325. examples: await this.getExamples(),
  326. experiment: this._experiment,
  327. metadata: this._metadata,
  328. client: this.client,
  329. runs: (async function* () {
  330. for await (const result of r1) {
  331. yield result.run;
  332. }
  333. })(),
  334. evaluationResults: (async function* () {
  335. for await (const result of r2) {
  336. yield result.evaluationResults;
  337. }
  338. })(),
  339. summaryResults: this._summaryResults,
  340. includeAttachments: this._includeAttachments,
  341. });
  342. }
  343. async withSummaryEvaluators(summaryEvaluators) {
  344. const aggregateFeedbackGen = this._applySummaryEvaluators(summaryEvaluators);
  345. return new _ExperimentManager({
  346. examples: await this.getExamples(),
  347. experiment: this._experiment,
  348. metadata: this._metadata,
  349. client: this.client,
  350. runs: this.runs,
  351. _runsArray: this._runsArray,
  352. evaluationResults: this._evaluationResults,
  353. summaryResults: aggregateFeedbackGen,
  354. includeAttachments: this._includeAttachments,
  355. });
  356. }
  357. async *getResults() {
  358. const examples = await this.getExamples();
  359. const evaluationResults = [];
  360. if (!this._runsArray) {
  361. this._runsArray = [];
  362. for await (const run of this.runs) {
  363. this._runsArray.push(run);
  364. }
  365. }
  366. for await (const evaluationResult of this.evaluationResults) {
  367. evaluationResults.push(evaluationResult);
  368. }
  369. for (let i = 0; i < this._runsArray.length; i++) {
  370. yield {
  371. run: this._runsArray[i],
  372. example: examples[i],
  373. evaluationResults: evaluationResults[i],
  374. };
  375. }
  376. }
  377. async getSummaryScores() {
  378. if (!this._summaryResults) {
  379. return { results: [] };
  380. }
  381. const results = [];
  382. for await (const evaluationResultsGenerator of this._summaryResults) {
  383. if (typeof evaluationResultsGenerator === "function") {
  384. // This is because runs array is not available until after this generator
  385. // is set, so we need to pass it like so.
  386. for await (const evaluationResults of evaluationResultsGenerator(this._runsArray ?? [])) {
  387. results.push(...evaluationResults.results);
  388. }
  389. }
  390. }
  391. return { results };
  392. }
  393. // Private methods
  394. /**
  395. * Run the target function or runnable on the examples.
  396. * @param {StandardTargetT} target The target function or runnable to evaluate.
  397. * @param options
  398. * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
  399. */
  400. async *_predict(target, options) {
  401. const maxConcurrency = options?.maxConcurrency ?? 0;
  402. const examples = await this.getExamples();
  403. if (maxConcurrency === 0) {
  404. for (const example of examples) {
  405. yield await _forward(target, example, this.experimentName, this._metadata, this.client, this._includeAttachments);
  406. }
  407. }
  408. else {
  409. const caller = new async_caller_js_1.AsyncCaller({
  410. maxConcurrency,
  411. debug: this.client.debug,
  412. });
  413. const futures = [];
  414. for await (const example of examples) {
  415. futures.push(caller.call(_forward, target, example, this.experimentName, this._metadata, this.client, this._includeAttachments));
  416. }
  417. for await (const future of futures) {
  418. yield future;
  419. }
  420. }
  421. // Close out the project.
  422. await this._end();
  423. }
  424. async _runEvaluators(evaluators, currentResults, fields) {
  425. const { run, example, evaluationResults } = currentResults;
  426. for (const evaluator of evaluators) {
  427. try {
  428. const options = {
  429. reference_example_id: example.id,
  430. project_name: "evaluators",
  431. metadata: {
  432. example_version: example.modified_at
  433. ? new Date(example.modified_at).toISOString()
  434. : new Date(example.created_at).toISOString(),
  435. },
  436. client: fields.client,
  437. tracingEnabled: true,
  438. };
  439. const evaluatorResponse = await evaluator.evaluateRun(run, example, options);
  440. evaluationResults.results.push(...(await fields.client.logEvaluationFeedback(evaluatorResponse, run)));
  441. }
  442. catch (e) {
  443. console.error(`Error running evaluator ${evaluator.evaluateRun.name} on run ${run.id}: ${e}`);
  444. (0, error_js_1.printErrorStackTrace)(e);
  445. }
  446. }
  447. return {
  448. run,
  449. example,
  450. evaluationResults,
  451. };
  452. }
  453. /**
  454. * Run the evaluators on the prediction stream.
  455. * Expects runs to be available in the manager.
  456. * (e.g. from a previous prediction step)
  457. * @param {Array<RunEvaluator>} evaluators
  458. * @param {number} maxConcurrency
  459. */
  460. async *_score(evaluators, options) {
  461. const { maxConcurrency = 0 } = options || {};
  462. if (maxConcurrency === 0) {
  463. for await (const currentResults of this.getResults()) {
  464. yield this._runEvaluators(evaluators, currentResults, {
  465. client: this.client,
  466. });
  467. }
  468. }
  469. else {
  470. const caller = new async_caller_js_1.AsyncCaller({
  471. maxConcurrency,
  472. debug: this.client.debug,
  473. });
  474. const futures = [];
  475. for await (const currentResults of this.getResults()) {
  476. futures.push(caller.call(this._runEvaluators, evaluators, currentResults, {
  477. client: this.client,
  478. }));
  479. }
  480. for (const result of futures) {
  481. yield result;
  482. }
  483. }
  484. }
  485. async *_applySummaryEvaluators(summaryEvaluators) {
  486. const projectId = this._getExperiment().id;
  487. const examples = await this.getExamples();
  488. const options = Array.from({ length: summaryEvaluators.length }).map(() => ({
  489. project_name: "evaluators",
  490. experiment: this.experimentName,
  491. projectId: projectId,
  492. }));
  493. const wrappedEvaluators = await wrapSummaryEvaluators(summaryEvaluators, options);
  494. yield async function* (runsArray) {
  495. const aggregateFeedback = [];
  496. for (const evaluator of wrappedEvaluators) {
  497. try {
  498. const summaryEvalResult = await evaluator(runsArray, examples);
  499. const flattenedResults = this.client._selectEvalResults(summaryEvalResult);
  500. aggregateFeedback.push(...flattenedResults);
  501. for (const result of flattenedResults) {
  502. // eslint-disable-next-line @typescript-eslint/no-unused-vars
  503. const { targetRunId, key, ...feedback } = result;
  504. const evaluatorInfo = feedback.evaluatorInfo;
  505. delete feedback.evaluatorInfo;
  506. await this.client.createFeedback(null, key, {
  507. ...feedback,
  508. projectId: projectId,
  509. sourceInfo: evaluatorInfo,
  510. });
  511. }
  512. }
  513. catch (e) {
  514. console.error(`Error running summary evaluator ${evaluator.name}: ${JSON.stringify(e, null, 2)}`);
  515. (0, error_js_1.printErrorStackTrace)(e);
  516. }
  517. }
  518. yield {
  519. results: aggregateFeedback,
  520. };
  521. }.bind(this);
  522. }
  523. async _getDatasetVersion() {
  524. const examples = await this.getExamples();
  525. const modifiedAt = examples.map((ex) => ex.modified_at);
  526. // Python might return microseconds, which we need
  527. // to account for when comparing dates.
  528. const modifiedAtTime = modifiedAt.map((date) => {
  529. function getMiliseconds(isoString) {
  530. const time = isoString.split("T").at(1);
  531. if (!time)
  532. return "";
  533. const regex = /[0-9]{2}:[0-9]{2}:[0-9]{2}.([0-9]+)/;
  534. const strMiliseconds = time.match(regex)?.[1];
  535. return strMiliseconds ?? "";
  536. }
  537. const jsDate = new Date(date);
  538. let source = getMiliseconds(date);
  539. let parsed = getMiliseconds(jsDate.toISOString());
  540. const length = Math.max(source.length, parsed.length);
  541. source = source.padEnd(length, "0");
  542. parsed = parsed.padEnd(length, "0");
  543. const microseconds = (Number.parseInt(source, 10) - Number.parseInt(parsed, 10)) / 1000;
  544. const time = jsDate.getTime() + microseconds;
  545. return { date, time };
  546. });
  547. if (modifiedAtTime.length === 0)
  548. return undefined;
  549. return modifiedAtTime.reduce((max, current) => (current.time > max.time ? current : max), modifiedAtTime[0]).date;
  550. }
  551. async _getDatasetSplits() {
  552. const examples = await this.getExamples();
  553. const allSplits = examples.reduce((acc, ex) => {
  554. if (ex.metadata && ex.metadata.dataset_split) {
  555. if (Array.isArray(ex.metadata.dataset_split)) {
  556. ex.metadata.dataset_split.forEach((split) => acc.add(split));
  557. }
  558. else if (typeof ex.metadata.dataset_split === "string") {
  559. acc.add(ex.metadata.dataset_split);
  560. }
  561. }
  562. return acc;
  563. }, new Set());
  564. return allSplits.size ? Array.from(allSplits) : undefined;
  565. }
  566. async _end() {
  567. const experiment = this._experiment;
  568. if (!experiment) {
  569. throw new Error("Experiment not yet started.");
  570. }
  571. const projectMetadata = await this._getExperimentMetadata();
  572. projectMetadata["dataset_version"] = await this._getDatasetVersion();
  573. projectMetadata["dataset_splits"] = await this._getDatasetSplits();
  574. // Update revision_id if not already set
  575. if (!projectMetadata["revision_id"]) {
  576. projectMetadata["revision_id"] = await (0, _git_js_1.getDefaultRevisionId)();
  577. }
  578. await this.client.updateProject(experiment.id, {
  579. metadata: projectMetadata,
  580. });
  581. }
  582. }
  583. exports._ExperimentManager = _ExperimentManager;
  584. /**
  585. * Represents the results of an evaluate() call.
  586. * This class provides an iterator interface to iterate over the experiment results
  587. * as they become available. It also provides methods to access the experiment name,
  588. * the number of results, and to wait for the results to be processed.
  589. */
  590. class ExperimentResults {
  591. constructor(experimentManager) {
  592. Object.defineProperty(this, "manager", {
  593. enumerable: true,
  594. configurable: true,
  595. writable: true,
  596. value: void 0
  597. });
  598. Object.defineProperty(this, "results", {
  599. enumerable: true,
  600. configurable: true,
  601. writable: true,
  602. value: []
  603. });
  604. Object.defineProperty(this, "processedCount", {
  605. enumerable: true,
  606. configurable: true,
  607. writable: true,
  608. value: 0
  609. });
  610. Object.defineProperty(this, "summaryResults", {
  611. enumerable: true,
  612. configurable: true,
  613. writable: true,
  614. value: void 0
  615. });
  616. this.manager = experimentManager;
  617. }
  618. get experimentName() {
  619. return this.manager.experimentName;
  620. }
  621. [Symbol.asyncIterator]() {
  622. return this;
  623. }
  624. async next() {
  625. if (this.processedCount < this.results.length) {
  626. const result = this.results[this.processedCount];
  627. this.processedCount++;
  628. return Promise.resolve({ value: result, done: false });
  629. }
  630. else {
  631. return Promise.resolve({ value: undefined, done: true });
  632. }
  633. }
  634. async processData(manager) {
  635. for await (const item of manager.getResults()) {
  636. this.results.push(item);
  637. this.processedCount++;
  638. }
  639. this.summaryResults = await manager.getSummaryScores();
  640. }
  641. get length() {
  642. return this.results.length;
  643. }
  644. }
  645. async function _evaluate(target, fields) {
  646. // Add check for comparative evaluation
  647. if (Array.isArray(target)) {
  648. const comparativeOptions = fields;
  649. if (!comparativeOptions.evaluators) {
  650. throw new Error("Evaluators are required for comparative evaluation");
  651. }
  652. return (0, evaluate_comparative_js_1.evaluateComparative)(target, {
  653. evaluators: comparativeOptions.evaluators,
  654. client: comparativeOptions.client,
  655. metadata: comparativeOptions.metadata,
  656. experimentPrefix: comparativeOptions.experimentPrefix,
  657. description: comparativeOptions.description,
  658. maxConcurrency: comparativeOptions.maxConcurrency,
  659. loadNested: comparativeOptions.loadNested ?? false,
  660. randomizeOrder: comparativeOptions.randomizeOrder ?? false,
  661. });
  662. }
  663. const client = fields.client ?? new index_js_1.Client();
  664. const runs = _isCallable(target) ? null : target;
  665. const standardFields = fields;
  666. const [experiment_, newRuns] = await _resolveExperiment(fields.experiment ?? null, runs, client);
  667. let manager = await new _ExperimentManager({
  668. data: Array.isArray(standardFields.data) ? undefined : standardFields.data,
  669. examples: Array.isArray(standardFields.data)
  670. ? standardFields.data
  671. : undefined,
  672. client,
  673. metadata: fields.metadata,
  674. experiment: experiment_ ?? fields.experimentPrefix,
  675. runs: newRuns ?? undefined,
  676. numRepetitions: fields.numRepetitions ?? 1,
  677. includeAttachments: standardFields.includeAttachments,
  678. }).start();
  679. if (_isCallable(target)) {
  680. manager = await manager.withPredictions(target, {
  681. maxConcurrency: fields.maxConcurrency,
  682. });
  683. }
  684. if (standardFields.evaluators) {
  685. manager = await manager.withEvaluators(standardFields.evaluators, {
  686. maxConcurrency: fields.maxConcurrency,
  687. });
  688. }
  689. if (standardFields.summaryEvaluators) {
  690. manager = await manager.withSummaryEvaluators(standardFields.summaryEvaluators);
  691. }
  692. // Start consuming the results.
  693. const results = new ExperimentResults(manager);
  694. await results.processData(manager);
  695. return results;
  696. }
  697. async function _forward(fn, example, experimentName, metadata, client, includeAttachments) {
  698. let run = null;
  699. const _getRun = (r) => {
  700. run = r;
  701. };
  702. const options = {
  703. reference_example_id: example.id,
  704. on_end: _getRun,
  705. project_name: experimentName,
  706. metadata: {
  707. ...metadata,
  708. example_version: example.modified_at
  709. ? new Date(example.modified_at).toISOString()
  710. : new Date(example.created_at).toISOString(),
  711. },
  712. client,
  713. tracingEnabled: true,
  714. };
  715. const wrappedFn = "invoke" in fn
  716. ? (0, traceable_js_1.traceable)(async (inputs) => {
  717. let langChainCallbacks;
  718. try {
  719. // TODO: Deprecate this and rely on interop on 0.2 minor bump.
  720. const { getLangchainCallbacks } = await import("../langchain.js");
  721. langChainCallbacks = await getLangchainCallbacks();
  722. }
  723. catch {
  724. // no-op
  725. }
  726. // Issue with retrieving LangChain callbacks, rely on interop
  727. if (langChainCallbacks === undefined && !includeAttachments) {
  728. return await fn.invoke(inputs);
  729. }
  730. else if (langChainCallbacks === undefined && includeAttachments) {
  731. return await fn.invoke(inputs, {
  732. attachments: example.attachments,
  733. });
  734. }
  735. else if (!includeAttachments) {
  736. return await fn.invoke(inputs, { callbacks: langChainCallbacks });
  737. }
  738. else {
  739. return await fn.invoke(inputs, {
  740. attachments: example.attachments,
  741. callbacks: langChainCallbacks,
  742. });
  743. }
  744. }, options)
  745. : (0, traceable_js_1.traceable)(fn, options);
  746. try {
  747. if (includeAttachments && !("invoke" in fn)) {
  748. await wrappedFn(example.inputs, { attachments: example.attachments });
  749. }
  750. else {
  751. await wrappedFn(example.inputs);
  752. }
  753. }
  754. catch (e) {
  755. console.error(`Error running target function: ${e}`);
  756. (0, error_js_1.printErrorStackTrace)(e);
  757. }
  758. if (!run) {
  759. throw new Error(`Run not created by target function.
  760. This is most likely due to tracing not being enabled.\n
  761. Try setting "LANGSMITH_TRACING=true" in your environment.`);
  762. }
  763. return {
  764. run,
  765. example,
  766. };
  767. }
  768. function _resolveData(data, options) {
  769. let isUUID = false;
  770. try {
  771. if (typeof data === "string") {
  772. (0, _uuid_js_1.assertUuid)(data);
  773. isUUID = true;
  774. }
  775. }
  776. catch (_) {
  777. isUUID = false;
  778. }
  779. if (typeof data === "string" && isUUID) {
  780. return options.client.listExamples({
  781. datasetId: data,
  782. includeAttachments: options.includeAttachments,
  783. });
  784. }
  785. if (typeof data === "string") {
  786. return options.client.listExamples({
  787. datasetName: data,
  788. includeAttachments: options.includeAttachments,
  789. });
  790. }
  791. return data;
  792. }
  793. async function wrapSummaryEvaluators(evaluators, optionsArray) {
  794. async function _wrap(evaluator) {
  795. const evalName = evaluator.name || "BatchEvaluator";
  796. const wrapperInner = (runs, examples) => {
  797. const wrapperSuperInner = (0, traceable_js_1.traceable)((_runs_, _examples_) => {
  798. // Check if the evaluator expects an object parameter
  799. if (evaluator.length === 1) {
  800. const inputs = examples.map((ex) => ex.inputs);
  801. const outputs = runs.map((run) => run.outputs || {});
  802. const referenceOutputs = examples.map((ex) => ex.outputs || {});
  803. return Promise.resolve(evaluator({
  804. runs,
  805. examples,
  806. inputs,
  807. outputs,
  808. referenceOutputs,
  809. }));
  810. }
  811. // Otherwise use the traditional (runs, examples) signature
  812. return Promise.resolve(evaluator(runs, examples));
  813. }, { ...optionsArray, name: evalName });
  814. return Promise.resolve(wrapperSuperInner(`Runs[] (Length=${runs.length})`, `Examples[] (Length=${examples.length})`));
  815. };
  816. return wrapperInner;
  817. }
  818. const results = [];
  819. for (let i = 0; i < evaluators.length; i++) {
  820. results.push(await _wrap(evaluators[i]));
  821. }
  822. return results;
  823. }
  824. function _resolveEvaluators(evaluators) {
  825. const results = [];
  826. for (const evaluator of evaluators) {
  827. if ("evaluateRun" in evaluator) {
  828. results.push(evaluator);
  829. // todo fix this by porting LangChainStringEvaluator to langsmith sdk
  830. }
  831. else if (evaluator.name === "LangChainStringEvaluator") {
  832. throw new Error("Not yet implemented");
  833. }
  834. else {
  835. results.push((0, evaluator_js_1.runEvaluator)(evaluator));
  836. }
  837. }
  838. return results;
  839. }
  840. async function _resolveExperiment(experiment, runs, client) {
  841. // TODO: Remove this, handle outside the manager
  842. if (experiment !== null) {
  843. if (!experiment.name) {
  844. throw new Error("Experiment name must be defined if provided.");
  845. }
  846. return [experiment, undefined];
  847. }
  848. // If we have runs, that means the experiment was already started.
  849. if (runs !== null) {
  850. const results = [];
  851. for await (const item of (0, atee_js_1.atee)(runs)) {
  852. results.push(item);
  853. }
  854. const [runsClone, runsOriginal] = results;
  855. const runsCloneIterator = runsClone[Symbol.asyncIterator]();
  856. // todo: this is `any`. does it work properly?
  857. const firstRun = await runsCloneIterator
  858. .next()
  859. .then((result) => result.value);
  860. const retrievedExperiment = await client.readProject(firstRun.sessionId);
  861. if (!retrievedExperiment.name) {
  862. throw new Error("Experiment name not found for provided runs.");
  863. }
  864. return [retrievedExperiment, runsOriginal];
  865. }
  866. return [undefined, undefined];
  867. }
  868. function _isCallable(target) {
  869. return Boolean(typeof target === "function" ||
  870. ("invoke" in target && typeof target.invoke === "function"));
  871. }