123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- import { Table } from "console-table-printer";
- import chalk from "chalk";
- import * as os from "node:os";
- import * as path from "node:path";
- import * as fs from "node:fs/promises";
- import { STRIP_ANSI_REGEX, TEST_ID_DELIMITER } from "./constants.js";
- const FEEDBACK_COLLAPSE_THRESHOLD = 48;
- const MAX_TEST_PARAMS_LENGTH = 18;
- const RESERVED_KEYS = [
- "Name",
- "Result",
- "Inputs",
- "Reference Outputs",
- "Outputs",
- "pass",
- ];
- function formatTestName(name, duration) {
- if (duration != null) {
- return `${name} (${duration}ms)`;
- }
- else {
- return name;
- }
- }
- function getFormattedStatus(status) {
- const s = status.toLowerCase();
- if (s === "pending" || s === "skipped") {
- return chalk.yellow("○ Skipped");
- }
- else if (s.includes("pass")) {
- return chalk.green("✓ Passed");
- }
- else if (s.includes("fail")) {
- return chalk.red("✕ Failed");
- }
- else {
- return status;
- }
- }
- function getColorParam(status) {
- const s = status.toLowerCase();
- if (s === "pending" || s === "skipped") {
- return { color: "yellow" };
- }
- else if (s.includes("pass")) {
- return { color: "grey" };
- }
- else if (s.includes("fail")) {
- return { color: "red" };
- }
- else {
- return {};
- }
- }
- function formatValue(value) {
- if (typeof value === "object" && value !== null) {
- return Object.entries(value)
- .map(([k, v]) => {
- const rawValue = typeof v === "string" ? v : JSON.stringify(v);
- const rawEntry = `${k}: ${rawValue}`;
- const entry = rawEntry.length > MAX_TEST_PARAMS_LENGTH
- ? rawEntry.slice(0, MAX_TEST_PARAMS_LENGTH - 3) + "..."
- : rawEntry;
- return entry;
- })
- .join("\n");
- }
- if (value == null) {
- return;
- }
- return String(value);
- }
- export async function printReporterTable(testSuiteName, results, testStatus, failureMessage) {
- const rows = [];
- const feedbackKeys = new Set();
- let experimentUrl;
- for (const result of results) {
- const { title, duration, status } = result;
- const titleComponents = title.split(TEST_ID_DELIMITER);
- const testId = titleComponents.length > 1 && titleComponents.at(-1) !== undefined
- ? titleComponents.at(-1)
- : undefined;
- const testName = testId !== undefined
- ? titleComponents.slice(0, -1).join(TEST_ID_DELIMITER).trim()
- : titleComponents.join(TEST_ID_DELIMITER);
- // Non-LangSmith test
- if (testId === undefined) {
- rows.push([
- {
- Test: formatTestName(testName, duration),
- Status: getFormattedStatus(status),
- },
- getColorParam(status),
- ]);
- }
- else if (status === "pending" || status === "skipped") {
- // Skipped
- rows.push([
- {
- Test: formatTestName(testName, duration),
- Status: getFormattedStatus(status),
- },
- getColorParam(status),
- ]);
- }
- else {
- const resultsPath = path.join(os.tmpdir(), "langsmith_test_results", `${testId}.json`);
- let fileContent;
- try {
- fileContent = JSON.parse(await fs.readFile(resultsPath, "utf-8"));
- await fs.unlink(resultsPath);
- }
- catch (e) {
- console.log("[LANGSMITH]: Failed to read custom evaluation results. Please contact us for help.");
- rows.push([
- {
- Test: formatTestName(testName, duration),
- Status: getFormattedStatus(status),
- },
- getColorParam(status),
- ]);
- continue;
- }
- const feedback = fileContent.feedback.reduce((acc, current) => {
- if (!RESERVED_KEYS.includes(current.key) &&
- current.score !== undefined) {
- feedbackKeys.add(current.key);
- acc[current.key] = current.score;
- }
- return acc;
- }, {});
- experimentUrl = experimentUrl ?? fileContent.experimentUrl;
- rows.push([
- {
- Test: formatTestName(testName, duration),
- Inputs: formatValue(fileContent.inputs),
- "Reference Outputs": formatValue(fileContent.referenceOutputs),
- Outputs: formatValue(fileContent.outputs),
- Status: getFormattedStatus(status),
- ...feedback,
- },
- getColorParam(status),
- ]);
- }
- }
- const feedbackKeysTotalLength = [...feedbackKeys].reduce((l, key) => l + key.length, 0);
- const collapseFeedbackColumn = feedbackKeysTotalLength > FEEDBACK_COLLAPSE_THRESHOLD;
- for (const key of feedbackKeys) {
- const scores = rows
- .map(([row]) => row[key])
- .filter((score) => score !== undefined);
- if (scores.length > 0) {
- const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
- const stdDev = Math.sqrt(scores.reduce((sq, n) => sq + Math.pow(n - mean, 2), 0) / scores.length);
- for (const row of rows) {
- const score = row[0][key];
- if (score !== undefined) {
- const deviation = (score - mean) / stdDev;
- let coloredKey;
- let coloredScore;
- if (isNaN(deviation)) {
- coloredKey = chalk.white(`${key}:`);
- coloredScore = chalk.white(score);
- }
- else if (deviation <= -1) {
- coloredKey = chalk.redBright(`${key}:`);
- coloredScore = chalk.redBright(score);
- }
- else if (deviation < -0.5) {
- coloredKey = chalk.red(`${key}:`);
- coloredScore = chalk.red(score);
- }
- else if (deviation < 0) {
- coloredKey = chalk.yellow(`${key}:`);
- coloredScore = chalk.yellow(score);
- }
- else if (deviation === 0) {
- coloredKey = chalk.white(`${key}:`);
- coloredScore = chalk.white(score);
- }
- else if (deviation <= 0.5) {
- coloredKey = chalk.green(`${key}:`);
- coloredScore = chalk.green(score);
- }
- else {
- coloredKey = chalk.greenBright(`${key}:`);
- coloredScore = chalk.greenBright(score);
- }
- if (collapseFeedbackColumn) {
- delete row[0][key];
- if (row[0].Feedback === undefined) {
- row[0].Feedback = `${coloredKey} ${coloredScore}`;
- }
- else {
- row[0].Feedback = `${row[0].Feedback}\n${coloredKey} ${coloredScore}`;
- }
- }
- else {
- row[0][key] = coloredScore;
- }
- }
- }
- }
- }
- const defaultColumns = [
- { name: "Test", alignment: "left", maxLen: 36 },
- { name: "Inputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
- {
- name: "Reference Outputs",
- alignment: "left",
- minLen: MAX_TEST_PARAMS_LENGTH,
- },
- { name: "Outputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
- { name: "Status", alignment: "left" },
- ];
- if (collapseFeedbackColumn) {
- const feedbackColumnLength = rows.reduce((max, [row]) => {
- const maxFeedbackLineLength = row.Feedback?.split("\n").reduce((max, feedbackLine) => {
- return Math.max(max, feedbackLine.replace(STRIP_ANSI_REGEX, "").length);
- }, 0) ?? 0;
- return Math.max(max, maxFeedbackLineLength);
- }, 0);
- defaultColumns.push({
- name: "Feedback",
- alignment: "left",
- minLen: feedbackColumnLength + 8,
- });
- }
- console.log();
- const table = new Table({
- columns: defaultColumns,
- colorMap: {
- grey: "\x1b[90m",
- },
- });
- for (const row of rows) {
- table.addRow(row[0], row[1]);
- }
- const testStatusColor = testStatus.includes("pass")
- ? chalk.green
- : testStatus.includes("fail")
- ? chalk.red
- : chalk.yellow;
- if (testSuiteName) {
- console.log(testStatusColor(`› ${testSuiteName}`));
- }
- if (failureMessage) {
- console.log(failureMessage);
- }
- table.printTable();
- if (experimentUrl) {
- console.log();
- console.log(` [LANGSMITH]: View full results in LangSmith at ${experimentUrl}`);
- console.log();
- }
- }
|