reporter.cjs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. "use strict";
  2. var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
  3. if (k2 === undefined) k2 = k;
  4. var desc = Object.getOwnPropertyDescriptor(m, k);
  5. if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
  6. desc = { enumerable: true, get: function() { return m[k]; } };
  7. }
  8. Object.defineProperty(o, k2, desc);
  9. }) : (function(o, m, k, k2) {
  10. if (k2 === undefined) k2 = k;
  11. o[k2] = m[k];
  12. }));
  13. var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
  14. Object.defineProperty(o, "default", { enumerable: true, value: v });
  15. }) : function(o, v) {
  16. o["default"] = v;
  17. });
  18. var __importStar = (this && this.__importStar) || (function () {
  19. var ownKeys = function(o) {
  20. ownKeys = Object.getOwnPropertyNames || function (o) {
  21. var ar = [];
  22. for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
  23. return ar;
  24. };
  25. return ownKeys(o);
  26. };
  27. return function (mod) {
  28. if (mod && mod.__esModule) return mod;
  29. var result = {};
  30. if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
  31. __setModuleDefault(result, mod);
  32. return result;
  33. };
  34. })();
  35. var __importDefault = (this && this.__importDefault) || function (mod) {
  36. return (mod && mod.__esModule) ? mod : { "default": mod };
  37. };
  38. Object.defineProperty(exports, "__esModule", { value: true });
  39. exports.printReporterTable = printReporterTable;
  40. const console_table_printer_1 = require("console-table-printer");
  41. const chalk_1 = __importDefault(require("chalk"));
  42. const os = __importStar(require("node:os"));
  43. const path = __importStar(require("node:path"));
  44. const fs = __importStar(require("node:fs/promises"));
  45. const constants_js_1 = require("./constants.cjs");
  46. const FEEDBACK_COLLAPSE_THRESHOLD = 48;
  47. const MAX_TEST_PARAMS_LENGTH = 18;
  48. const RESERVED_KEYS = [
  49. "Name",
  50. "Result",
  51. "Inputs",
  52. "Reference Outputs",
  53. "Outputs",
  54. "pass",
  55. ];
  56. function formatTestName(name, duration) {
  57. if (duration != null) {
  58. return `${name} (${duration}ms)`;
  59. }
  60. else {
  61. return name;
  62. }
  63. }
  64. function getFormattedStatus(status) {
  65. const s = status.toLowerCase();
  66. if (s === "pending" || s === "skipped") {
  67. return chalk_1.default.yellow("○ Skipped");
  68. }
  69. else if (s.includes("pass")) {
  70. return chalk_1.default.green("✓ Passed");
  71. }
  72. else if (s.includes("fail")) {
  73. return chalk_1.default.red("✕ Failed");
  74. }
  75. else {
  76. return status;
  77. }
  78. }
  79. function getColorParam(status) {
  80. const s = status.toLowerCase();
  81. if (s === "pending" || s === "skipped") {
  82. return { color: "yellow" };
  83. }
  84. else if (s.includes("pass")) {
  85. return { color: "grey" };
  86. }
  87. else if (s.includes("fail")) {
  88. return { color: "red" };
  89. }
  90. else {
  91. return {};
  92. }
  93. }
  94. function formatValue(value) {
  95. if (typeof value === "object" && value !== null) {
  96. return Object.entries(value)
  97. .map(([k, v]) => {
  98. const rawValue = typeof v === "string" ? v : JSON.stringify(v);
  99. const rawEntry = `${k}: ${rawValue}`;
  100. const entry = rawEntry.length > MAX_TEST_PARAMS_LENGTH
  101. ? rawEntry.slice(0, MAX_TEST_PARAMS_LENGTH - 3) + "..."
  102. : rawEntry;
  103. return entry;
  104. })
  105. .join("\n");
  106. }
  107. if (value == null) {
  108. return;
  109. }
  110. return String(value);
  111. }
  112. async function printReporterTable(testSuiteName, results, testStatus, failureMessage) {
  113. const rows = [];
  114. const feedbackKeys = new Set();
  115. let experimentUrl;
  116. for (const result of results) {
  117. const { title, duration, status } = result;
  118. const titleComponents = title.split(constants_js_1.TEST_ID_DELIMITER);
  119. const testId = titleComponents.length > 1 && titleComponents.at(-1) !== undefined
  120. ? titleComponents.at(-1)
  121. : undefined;
  122. const testName = testId !== undefined
  123. ? titleComponents.slice(0, -1).join(constants_js_1.TEST_ID_DELIMITER).trim()
  124. : titleComponents.join(constants_js_1.TEST_ID_DELIMITER);
  125. // Non-LangSmith test
  126. if (testId === undefined) {
  127. rows.push([
  128. {
  129. Test: formatTestName(testName, duration),
  130. Status: getFormattedStatus(status),
  131. },
  132. getColorParam(status),
  133. ]);
  134. }
  135. else if (status === "pending" || status === "skipped") {
  136. // Skipped
  137. rows.push([
  138. {
  139. Test: formatTestName(testName, duration),
  140. Status: getFormattedStatus(status),
  141. },
  142. getColorParam(status),
  143. ]);
  144. }
  145. else {
  146. const resultsPath = path.join(os.tmpdir(), "langsmith_test_results", `${testId}.json`);
  147. let fileContent;
  148. try {
  149. fileContent = JSON.parse(await fs.readFile(resultsPath, "utf-8"));
  150. await fs.unlink(resultsPath);
  151. }
  152. catch (e) {
  153. console.log("[LANGSMITH]: Failed to read custom evaluation results. Please contact us for help.");
  154. rows.push([
  155. {
  156. Test: formatTestName(testName, duration),
  157. Status: getFormattedStatus(status),
  158. },
  159. getColorParam(status),
  160. ]);
  161. continue;
  162. }
  163. const feedback = fileContent.feedback.reduce((acc, current) => {
  164. if (!RESERVED_KEYS.includes(current.key) &&
  165. current.score !== undefined) {
  166. feedbackKeys.add(current.key);
  167. acc[current.key] = current.score;
  168. }
  169. return acc;
  170. }, {});
  171. experimentUrl = experimentUrl ?? fileContent.experimentUrl;
  172. rows.push([
  173. {
  174. Test: formatTestName(testName, duration),
  175. Inputs: formatValue(fileContent.inputs),
  176. "Reference Outputs": formatValue(fileContent.referenceOutputs),
  177. Outputs: formatValue(fileContent.outputs),
  178. Status: getFormattedStatus(status),
  179. ...feedback,
  180. },
  181. getColorParam(status),
  182. ]);
  183. }
  184. }
  185. const feedbackKeysTotalLength = [...feedbackKeys].reduce((l, key) => l + key.length, 0);
  186. const collapseFeedbackColumn = feedbackKeysTotalLength > FEEDBACK_COLLAPSE_THRESHOLD;
  187. for (const key of feedbackKeys) {
  188. const scores = rows
  189. .map(([row]) => row[key])
  190. .filter((score) => score !== undefined);
  191. if (scores.length > 0) {
  192. const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
  193. const stdDev = Math.sqrt(scores.reduce((sq, n) => sq + Math.pow(n - mean, 2), 0) / scores.length);
  194. for (const row of rows) {
  195. const score = row[0][key];
  196. if (score !== undefined) {
  197. const deviation = (score - mean) / stdDev;
  198. let coloredKey;
  199. let coloredScore;
  200. if (isNaN(deviation)) {
  201. coloredKey = chalk_1.default.white(`${key}:`);
  202. coloredScore = chalk_1.default.white(score);
  203. }
  204. else if (deviation <= -1) {
  205. coloredKey = chalk_1.default.redBright(`${key}:`);
  206. coloredScore = chalk_1.default.redBright(score);
  207. }
  208. else if (deviation < -0.5) {
  209. coloredKey = chalk_1.default.red(`${key}:`);
  210. coloredScore = chalk_1.default.red(score);
  211. }
  212. else if (deviation < 0) {
  213. coloredKey = chalk_1.default.yellow(`${key}:`);
  214. coloredScore = chalk_1.default.yellow(score);
  215. }
  216. else if (deviation === 0) {
  217. coloredKey = chalk_1.default.white(`${key}:`);
  218. coloredScore = chalk_1.default.white(score);
  219. }
  220. else if (deviation <= 0.5) {
  221. coloredKey = chalk_1.default.green(`${key}:`);
  222. coloredScore = chalk_1.default.green(score);
  223. }
  224. else {
  225. coloredKey = chalk_1.default.greenBright(`${key}:`);
  226. coloredScore = chalk_1.default.greenBright(score);
  227. }
  228. if (collapseFeedbackColumn) {
  229. delete row[0][key];
  230. if (row[0].Feedback === undefined) {
  231. row[0].Feedback = `${coloredKey} ${coloredScore}`;
  232. }
  233. else {
  234. row[0].Feedback = `${row[0].Feedback}\n${coloredKey} ${coloredScore}`;
  235. }
  236. }
  237. else {
  238. row[0][key] = coloredScore;
  239. }
  240. }
  241. }
  242. }
  243. }
  244. const defaultColumns = [
  245. { name: "Test", alignment: "left", maxLen: 36 },
  246. { name: "Inputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
  247. {
  248. name: "Reference Outputs",
  249. alignment: "left",
  250. minLen: MAX_TEST_PARAMS_LENGTH,
  251. },
  252. { name: "Outputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
  253. { name: "Status", alignment: "left" },
  254. ];
  255. if (collapseFeedbackColumn) {
  256. const feedbackColumnLength = rows.reduce((max, [row]) => {
  257. const maxFeedbackLineLength = row.Feedback?.split("\n").reduce((max, feedbackLine) => {
  258. return Math.max(max, feedbackLine.replace(constants_js_1.STRIP_ANSI_REGEX, "").length);
  259. }, 0) ?? 0;
  260. return Math.max(max, maxFeedbackLineLength);
  261. }, 0);
  262. defaultColumns.push({
  263. name: "Feedback",
  264. alignment: "left",
  265. minLen: feedbackColumnLength + 8,
  266. });
  267. }
  268. console.log();
  269. const table = new console_table_printer_1.Table({
  270. columns: defaultColumns,
  271. colorMap: {
  272. grey: "\x1b[90m",
  273. },
  274. });
  275. for (const row of rows) {
  276. table.addRow(row[0], row[1]);
  277. }
  278. const testStatusColor = testStatus.includes("pass")
  279. ? chalk_1.default.green
  280. : testStatus.includes("fail")
  281. ? chalk_1.default.red
  282. : chalk_1.default.yellow;
  283. if (testSuiteName) {
  284. console.log(testStatusColor(`› ${testSuiteName}`));
  285. }
  286. if (failureMessage) {
  287. console.log(failureMessage);
  288. }
  289. table.printTable();
  290. if (experimentUrl) {
  291. console.log();
  292. console.log(` [LANGSMITH]: View full results in LangSmith at ${experimentUrl}`);
  293. console.log();
  294. }
  295. }