pull:初次提交
This commit is contained in:
102
n8n-n8n-1.109.2/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/display.ts
Executable file
102
n8n-n8n-1.109.2/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/display.ts
Executable file
@@ -0,0 +1,102 @@
|
||||
import cliProgress from 'cli-progress';
|
||||
import pc from 'picocolors';
|
||||
|
||||
import type { TestCase } from '../types/evaluation.js';
|
||||
import type { TestResult } from '../types/test-result.js';
|
||||
import {
|
||||
calculateTestMetrics,
|
||||
calculateCategoryAverages,
|
||||
countViolationsByType,
|
||||
} from '../utils/evaluation-calculator.js';
|
||||
import {
|
||||
displayTestResults,
|
||||
displaySummaryTable,
|
||||
displayViolationsDetail,
|
||||
} from '../utils/evaluation-reporter.js';
|
||||
|
||||
/**
|
||||
* Creates a progress bar for test execution
|
||||
* @param total - Total number of tests
|
||||
* @returns Progress bar instance
|
||||
*/
|
||||
export function createProgressBar(total: number): cliProgress.SingleBar {
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{
|
||||
format: 'Progress |{bar}| {percentage}% | {value}/{total} Tests | {status}',
|
||||
barCompleteChar: '█',
|
||||
barIncompleteChar: '░',
|
||||
hideCursor: true,
|
||||
},
|
||||
cliProgress.Presets.shades_classic,
|
||||
);
|
||||
progressBar.start(total, 0, { status: 'Starting...' });
|
||||
return progressBar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates progress bar with current status
|
||||
* @param progressBar - Progress bar instance
|
||||
* @param completed - Number of completed tests
|
||||
* @param total - Total number of tests
|
||||
* @param status - Optional status message
|
||||
*/
|
||||
export function updateProgress(
|
||||
progressBar: cliProgress.SingleBar,
|
||||
completed: number,
|
||||
total: number,
|
||||
status?: string,
|
||||
): void {
|
||||
progressBar.update(completed, {
|
||||
status: status ?? `${completed}/${total} completed`,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Displays evaluation results in the console
|
||||
* @param testCases - Array of test cases
|
||||
* @param results - Array of test results
|
||||
* @param totalTime - Total execution time in milliseconds
|
||||
*/
|
||||
export function displayResults(
|
||||
testCases: TestCase[],
|
||||
results: TestResult[],
|
||||
totalTime: number,
|
||||
): void {
|
||||
// Display test results
|
||||
displayTestResults(testCases, results);
|
||||
|
||||
console.log();
|
||||
console.log(pc.green(`✓ All tests completed in ${(totalTime / 1000).toFixed(1)}s`));
|
||||
|
||||
// Calculate metrics
|
||||
const metrics = calculateTestMetrics(results);
|
||||
const categoryAverages = calculateCategoryAverages(results);
|
||||
const violationCounts = countViolationsByType(results);
|
||||
|
||||
const combinedMetrics = {
|
||||
...metrics,
|
||||
categoryAverages,
|
||||
violationCounts,
|
||||
};
|
||||
|
||||
// Display summary
|
||||
displaySummaryTable(results, combinedMetrics);
|
||||
|
||||
// Display violations if any exist
|
||||
if (violationCounts.critical > 0 || violationCounts.major > 0 || violationCounts.minor > 0) {
|
||||
displayViolationsDetail(results);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Displays error message and exits
|
||||
* @param message - Error message
|
||||
* @param error - Optional error object
|
||||
*/
|
||||
export function displayError(message: string, error?: unknown): void {
|
||||
console.error(pc.red(`✗ ${message}`));
|
||||
if (error) {
|
||||
console.error(error);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
104
n8n-n8n-1.109.2/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/runner.ts
Executable file
104
n8n-n8n-1.109.2/packages/@n8n/ai-workflow-builder.ee/evaluations/cli/runner.ts
Executable file
@@ -0,0 +1,104 @@
|
||||
import pLimit from 'p-limit';
|
||||
import pc from 'picocolors';
|
||||
|
||||
import { createProgressBar, updateProgress, displayResults, displayError } from './display.js';
|
||||
import { basicTestCases, generateTestCases } from '../chains/test-case-generator.js';
|
||||
import {
|
||||
setupTestEnvironment,
|
||||
createAgent,
|
||||
getConcurrencyLimit,
|
||||
shouldGenerateTestCases,
|
||||
howManyTestCasesToGenerate,
|
||||
} from '../core/environment.js';
|
||||
import { runSingleTest, initializeTestTracking } from '../core/test-runner.js';
|
||||
import type { TestCase } from '../types/evaluation.js';
|
||||
import {
|
||||
calculateTestMetrics,
|
||||
calculateCategoryAverages,
|
||||
countViolationsByType,
|
||||
} from '../utils/evaluation-calculator.js';
|
||||
import { formatHeader, saveEvaluationResults } from '../utils/evaluation-helpers.js';
|
||||
import { generateMarkdownReport } from '../utils/evaluation-reporter.js';
|
||||
|
||||
/**
|
||||
* Main CLI evaluation runner that executes all test cases in parallel
|
||||
* Supports concurrency control via EVALUATION_CONCURRENCY environment variable
|
||||
*/
|
||||
export async function runCliEvaluation(): Promise<void> {
|
||||
console.log(formatHeader('AI Workflow Builder Full Evaluation', 70));
|
||||
console.log();
|
||||
try {
|
||||
// Setup test environment
|
||||
const { parsedNodeTypes, llm, tracer } = await setupTestEnvironment();
|
||||
|
||||
// Determine test cases to run
|
||||
let testCases: TestCase[] = basicTestCases;
|
||||
|
||||
// Optionally generate additional test cases
|
||||
if (shouldGenerateTestCases()) {
|
||||
console.log(pc.blue('➔ Generating additional test cases...'));
|
||||
const generatedCases = await generateTestCases(llm, howManyTestCasesToGenerate());
|
||||
testCases = [...testCases, ...generatedCases];
|
||||
}
|
||||
|
||||
// Get concurrency from environment
|
||||
const concurrency = getConcurrencyLimit();
|
||||
console.log(pc.dim(`Running ${testCases.length} test cases with concurrency=${concurrency}`));
|
||||
console.log();
|
||||
|
||||
// Create progress bar
|
||||
const progressBar = createProgressBar(testCases.length);
|
||||
|
||||
// Create concurrency limiter
|
||||
const limit = pLimit(concurrency);
|
||||
|
||||
// Track progress
|
||||
let completed = 0;
|
||||
const startTime = Date.now();
|
||||
const testResults = initializeTestTracking(testCases);
|
||||
|
||||
// Run all test cases in parallel with concurrency limit
|
||||
const promises = testCases.map(
|
||||
async (testCase) =>
|
||||
await limit(async () => {
|
||||
updateProgress(progressBar, completed, testCases.length, `Running: ${testCase.name}`);
|
||||
|
||||
// Create a dedicated agent for this test to avoid state conflicts
|
||||
const testAgent = createAgent(parsedNodeTypes, llm, tracer);
|
||||
const result = await runSingleTest(testAgent, llm, testCase);
|
||||
|
||||
testResults[testCase.id] = result.error ? 'fail' : 'pass';
|
||||
completed++;
|
||||
updateProgress(progressBar, completed, testCases.length);
|
||||
return result;
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
const totalTime = Date.now() - startTime;
|
||||
progressBar.stop();
|
||||
|
||||
// Display results
|
||||
displayResults(testCases, results, totalTime);
|
||||
|
||||
// Calculate metrics for report
|
||||
const metrics = calculateTestMetrics(results);
|
||||
const categoryAverages = calculateCategoryAverages(results);
|
||||
const violationCounts = countViolationsByType(results);
|
||||
|
||||
const combinedMetrics = {
|
||||
...metrics,
|
||||
categoryAverages,
|
||||
violationCounts,
|
||||
};
|
||||
|
||||
// Generate and save results
|
||||
const report = generateMarkdownReport(results, combinedMetrics);
|
||||
const { reportPath, resultsPath } = saveEvaluationResults(results, report);
|
||||
|
||||
console.log(`\nReport saved to: ${reportPath}`);
|
||||
console.log(`Detailed results saved to: ${resultsPath}`);
|
||||
} catch (error) {
|
||||
displayError('Evaluation failed', error);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user