pull:初次提交
This commit is contained in:
@@ -0,0 +1,132 @@
|
||||
import type { IExecuteFunctions, INodeExecutionData, IDataObject } from 'n8n-workflow';
|
||||
|
||||
import { readSheet } from '../../Google/Sheet/v2/actions/utils/readOperation';
|
||||
import { GoogleSheet } from '../../Google/Sheet/v2/helpers/GoogleSheet';
|
||||
import type { ResourceLocator } from '../../Google/Sheet/v2/helpers/GoogleSheets.types';
|
||||
import { getSpreadsheetId } from '../../Google/Sheet/v2/helpers/GoogleSheets.utils';
|
||||
|
||||
export async function getSheet(
|
||||
this: IExecuteFunctions,
|
||||
googleSheet: GoogleSheet,
|
||||
): Promise<{
|
||||
title: string;
|
||||
sheetId: number;
|
||||
}> {
|
||||
const sheetWithinDocument = this.getNodeParameter('sheetName', 0, undefined, {
|
||||
extractValue: true,
|
||||
}) as string;
|
||||
const { mode: sheetMode } = this.getNodeParameter('sheetName', 0) as {
|
||||
mode: ResourceLocator;
|
||||
};
|
||||
|
||||
return await googleSheet.spreadsheetGetSheet(this.getNode(), sheetMode, sheetWithinDocument);
|
||||
}
|
||||
|
||||
export function getGoogleSheet(this: IExecuteFunctions) {
|
||||
const { mode, value } = this.getNodeParameter('documentId', 0) as IDataObject;
|
||||
const spreadsheetId = getSpreadsheetId(this.getNode(), mode as ResourceLocator, value as string);
|
||||
|
||||
const googleSheet = new GoogleSheet(spreadsheetId, this);
|
||||
|
||||
return googleSheet;
|
||||
}
|
||||
|
||||
export async function getFilteredResults(
|
||||
this: IExecuteFunctions,
|
||||
operationResult: INodeExecutionData[],
|
||||
googleSheet: GoogleSheet,
|
||||
result: { title: string; sheetId: number },
|
||||
startingRow: number,
|
||||
endingRow: number,
|
||||
): Promise<INodeExecutionData[]> {
|
||||
const sheetName = result.title;
|
||||
|
||||
operationResult = await readSheet.call(
|
||||
this,
|
||||
googleSheet,
|
||||
sheetName,
|
||||
0,
|
||||
operationResult,
|
||||
this.getNode().typeVersion,
|
||||
[],
|
||||
undefined,
|
||||
{
|
||||
rangeDefinition: 'specifyRange',
|
||||
headerRow: 1,
|
||||
firstDataRow: startingRow,
|
||||
includeHeadersWithEmptyCells: true,
|
||||
},
|
||||
);
|
||||
|
||||
return operationResult.filter((row) => (row?.json?.row_number as number) <= endingRow);
|
||||
}
|
||||
|
||||
export async function getNumberOfRowsLeftFiltered(
|
||||
this: IExecuteFunctions,
|
||||
googleSheet: GoogleSheet,
|
||||
sheetName: string,
|
||||
startingRow: number,
|
||||
endingRow: number,
|
||||
) {
|
||||
const remainderSheet: INodeExecutionData[] = await readSheet.call(
|
||||
this,
|
||||
googleSheet,
|
||||
sheetName,
|
||||
0,
|
||||
[],
|
||||
this.getNode().typeVersion,
|
||||
[],
|
||||
undefined,
|
||||
{
|
||||
rangeDefinition: 'specifyRange',
|
||||
headerRow: 1,
|
||||
firstDataRow: startingRow,
|
||||
},
|
||||
);
|
||||
|
||||
return remainderSheet.filter((row) => (row?.json?.row_number as number) <= endingRow).length;
|
||||
}
|
||||
|
||||
export async function getResults(
|
||||
this: IExecuteFunctions,
|
||||
operationResult: INodeExecutionData[],
|
||||
googleSheet: GoogleSheet,
|
||||
result: { title: string; sheetId: number },
|
||||
rangeOptions: IDataObject,
|
||||
): Promise<INodeExecutionData[]> {
|
||||
const sheetName = result.title;
|
||||
|
||||
operationResult = await readSheet.call(
|
||||
this,
|
||||
googleSheet,
|
||||
sheetName,
|
||||
0,
|
||||
operationResult,
|
||||
this.getNode().typeVersion,
|
||||
[],
|
||||
undefined,
|
||||
{ ...rangeOptions, includeHeadersWithEmptyCells: true },
|
||||
);
|
||||
|
||||
return operationResult;
|
||||
}
|
||||
|
||||
export async function getRowsLeft(
|
||||
this: IExecuteFunctions,
|
||||
googleSheet: GoogleSheet,
|
||||
sheetName: string,
|
||||
rangeString: string,
|
||||
) {
|
||||
const remainderSheet: INodeExecutionData[] = await readSheet.call(
|
||||
this,
|
||||
googleSheet,
|
||||
sheetName,
|
||||
0,
|
||||
[],
|
||||
this.getNode().typeVersion,
|
||||
[],
|
||||
rangeString,
|
||||
);
|
||||
|
||||
return remainderSheet.length;
|
||||
}
|
||||
258
n8n-n8n-1.109.2/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
Executable file
258
n8n-n8n-1.109.2/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
Executable file
@@ -0,0 +1,258 @@
|
||||
import { UserError, NodeOperationError, EVALUATION_TRIGGER_NODE_TYPE } from 'n8n-workflow';
|
||||
import type {
|
||||
INodeParameters,
|
||||
IDataObject,
|
||||
IExecuteFunctions,
|
||||
INodeExecutionData,
|
||||
JsonObject,
|
||||
JsonValue,
|
||||
} from 'n8n-workflow';
|
||||
|
||||
import { getGoogleSheet, getSheet } from './evaluationTriggerUtils';
|
||||
import { metricHandlers } from './metricHandlers';
|
||||
import { composeReturnItem } from '../../Set/v2/helpers/utils';
|
||||
import assert from 'node:assert';
|
||||
|
||||
function withEvaluationData(this: IExecuteFunctions, data: JsonObject): INodeExecutionData[] {
|
||||
const inputData = this.getInputData();
|
||||
if (!inputData.length) {
|
||||
return inputData;
|
||||
}
|
||||
|
||||
const isEvaluationMode = this.getMode() === 'evaluation';
|
||||
return [
|
||||
{
|
||||
...inputData[0],
|
||||
// test-runner only looks at first item. Don't need to duplicate the data for each item
|
||||
evaluationData: isEvaluationMode ? data : undefined,
|
||||
},
|
||||
...inputData.slice(1),
|
||||
];
|
||||
}
|
||||
|
||||
function isOutputsArray(
|
||||
value: unknown,
|
||||
): value is Array<{ outputName: string; outputValue: JsonValue }> {
|
||||
return (
|
||||
Array.isArray(value) &&
|
||||
value.every(
|
||||
(item) =>
|
||||
typeof item === 'object' &&
|
||||
item !== null &&
|
||||
'outputName' in item &&
|
||||
'outputValue' in item &&
|
||||
typeof item.outputName === 'string',
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
export async function setOutputs(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
|
||||
const evaluationNode = this.getNode();
|
||||
const parentNodes = this.getParentNodes(evaluationNode.name);
|
||||
|
||||
const evalTrigger = parentNodes.find((node) => node.type === EVALUATION_TRIGGER_NODE_TYPE);
|
||||
const isEvalTriggerExecuted = evalTrigger
|
||||
? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
|
||||
: false;
|
||||
|
||||
if (!evalTrigger || !isEvalTriggerExecuted) {
|
||||
this.addExecutionHints({
|
||||
message: "No outputs were set since the execution didn't start from an evaluation trigger",
|
||||
location: 'outputPane',
|
||||
});
|
||||
return [this.getInputData()];
|
||||
}
|
||||
|
||||
const outputFields = this.getNodeParameter('outputs.values', 0, []);
|
||||
assert(
|
||||
isOutputsArray(outputFields),
|
||||
'Invalid output fields format. Expected an array of objects with outputName and outputValue properties.',
|
||||
);
|
||||
|
||||
if (outputFields.length === 0) {
|
||||
throw new UserError('No outputs to set', {
|
||||
description: 'Add outputs to write back to the Google Sheet using the ‘Add Output’ button',
|
||||
});
|
||||
}
|
||||
|
||||
const googleSheetInstance = getGoogleSheet.call(this);
|
||||
const googleSheet = await getSheet.call(this, googleSheetInstance);
|
||||
|
||||
const evaluationTrigger = this.evaluateExpression(
|
||||
`{{ $('${evalTrigger.name}').first().json }}`,
|
||||
0,
|
||||
) as IDataObject;
|
||||
|
||||
const rowNumber =
|
||||
evaluationTrigger.row_number === 'row_number' ? 1 : evaluationTrigger.row_number;
|
||||
|
||||
const columnNames = Object.keys(evaluationTrigger).filter(
|
||||
(key) => key !== 'row_number' && key !== '_rowsLeft',
|
||||
);
|
||||
|
||||
outputFields.forEach(({ outputName }) => {
|
||||
if (!columnNames.includes(outputName)) {
|
||||
columnNames.push(outputName);
|
||||
}
|
||||
});
|
||||
|
||||
await googleSheetInstance.updateRows(
|
||||
googleSheet.title,
|
||||
[columnNames],
|
||||
'RAW', // default value for Value Input Mode
|
||||
1, // header row
|
||||
);
|
||||
|
||||
const outputs = outputFields.reduce<JsonObject>((acc, { outputName, outputValue }) => {
|
||||
acc[outputName] = outputValue;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const preparedData = googleSheetInstance.prepareDataForUpdatingByRowNumber(
|
||||
[
|
||||
{
|
||||
row_number: rowNumber,
|
||||
...outputs,
|
||||
},
|
||||
],
|
||||
`${googleSheet.title}!A:Z`,
|
||||
[columnNames],
|
||||
);
|
||||
|
||||
await googleSheetInstance.batchUpdate(
|
||||
preparedData.updateData,
|
||||
'RAW', // default value for Value Input Mode
|
||||
);
|
||||
|
||||
return [withEvaluationData.call(this, outputs)];
|
||||
}
|
||||
|
||||
function isInputsArray(
|
||||
value: unknown,
|
||||
): value is Array<{ inputName: string; inputValue: JsonValue }> {
|
||||
return (
|
||||
Array.isArray(value) &&
|
||||
value.every(
|
||||
(item) =>
|
||||
typeof item === 'object' &&
|
||||
item !== null &&
|
||||
'inputName' in item &&
|
||||
'inputValue' in item &&
|
||||
typeof item.inputName === 'string',
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
export function setInputs(this: IExecuteFunctions): INodeExecutionData[][] {
|
||||
const evaluationNode = this.getNode();
|
||||
const parentNodes = this.getParentNodes(evaluationNode.name);
|
||||
|
||||
const evalTrigger = parentNodes.find((node) => node.type === 'n8n-nodes-base.evaluationTrigger');
|
||||
const isEvalTriggerExecuted = evalTrigger
|
||||
? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
|
||||
: false;
|
||||
|
||||
if (!evalTrigger || !isEvalTriggerExecuted) {
|
||||
this.addExecutionHints({
|
||||
message: "No inputs were set since the execution didn't start from an evaluation trigger",
|
||||
location: 'outputPane',
|
||||
});
|
||||
return [this.getInputData()];
|
||||
}
|
||||
|
||||
const inputFields = this.getNodeParameter('inputs.values', 0, []);
|
||||
assert(
|
||||
isInputsArray(inputFields),
|
||||
'Invalid input fields format. Expected an array of objects with inputName and inputValue properties.',
|
||||
);
|
||||
|
||||
if (inputFields.length === 0) {
|
||||
throw new UserError('No inputs to set', {
|
||||
description: 'Add inputs using the ‘Add Input’ button',
|
||||
});
|
||||
}
|
||||
|
||||
const inputs = inputFields.reduce<JsonObject>((acc, { inputName, inputValue }) => {
|
||||
acc[inputName] = inputValue;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return [withEvaluationData.call(this, inputs)];
|
||||
}
|
||||
|
||||
export async function setMetrics(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
|
||||
const items = this.getInputData();
|
||||
const metrics: INodeExecutionData[] = [];
|
||||
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
const metric = this.getNodeParameter('metric', i, {}) as keyof typeof metricHandlers;
|
||||
if (!metricHandlers.hasOwnProperty(metric)) {
|
||||
throw new NodeOperationError(this.getNode(), 'Unknown metric');
|
||||
}
|
||||
const newData = await metricHandlers[metric].call(this, i);
|
||||
|
||||
const newItem: INodeExecutionData = {
|
||||
json: {},
|
||||
pairedItem: { item: i },
|
||||
};
|
||||
|
||||
const returnItem = composeReturnItem.call(
|
||||
this,
|
||||
i,
|
||||
newItem,
|
||||
newData,
|
||||
{ dotNotation: false, include: 'none' },
|
||||
1,
|
||||
);
|
||||
metrics.push(returnItem);
|
||||
}
|
||||
|
||||
return [metrics];
|
||||
}
|
||||
|
||||
export async function checkIfEvaluating(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
|
||||
const evaluationExecutionResult: INodeExecutionData[] = [];
|
||||
const normalExecutionResult: INodeExecutionData[] = [];
|
||||
|
||||
const evaluationNode = this.getNode();
|
||||
const parentNodes = this.getParentNodes(evaluationNode.name);
|
||||
|
||||
const evalTrigger = parentNodes.find((node) => node.type === 'n8n-nodes-base.evaluationTrigger');
|
||||
const isEvalTriggerExecuted = evalTrigger
|
||||
? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
|
||||
: false;
|
||||
|
||||
if (isEvalTriggerExecuted) {
|
||||
return [this.getInputData(), normalExecutionResult];
|
||||
} else {
|
||||
return [evaluationExecutionResult, this.getInputData()];
|
||||
}
|
||||
}
|
||||
|
||||
export function getOutputConnectionTypes(parameters: INodeParameters) {
|
||||
if (parameters.operation === 'checkIfEvaluating') {
|
||||
return [
|
||||
{ type: 'main', displayName: 'Evaluation' },
|
||||
{ type: 'main', displayName: 'Normal' },
|
||||
];
|
||||
}
|
||||
|
||||
return [{ type: 'main' }];
|
||||
}
|
||||
|
||||
export function getInputConnectionTypes(
|
||||
parameters: INodeParameters,
|
||||
metricRequiresModelConnectionFn: (metric: string) => boolean,
|
||||
) {
|
||||
if (
|
||||
parameters.operation === 'setMetrics' &&
|
||||
metricRequiresModelConnectionFn(parameters.metric as string)
|
||||
) {
|
||||
return [
|
||||
{ type: 'main' },
|
||||
{ type: 'ai_languageModel', displayName: 'Model', maxConnections: 1 },
|
||||
];
|
||||
}
|
||||
|
||||
return [{ type: 'main' }];
|
||||
}
|
||||
357
n8n-n8n-1.109.2/packages/nodes-base/nodes/Evaluation/utils/metricHandlers.ts
Executable file
357
n8n-n8n-1.109.2/packages/nodes-base/nodes/Evaluation/utils/metricHandlers.ts
Executable file
@@ -0,0 +1,357 @@
|
||||
import {
|
||||
ChatPromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
} from '@langchain/core/prompts';
|
||||
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
|
||||
import { distance } from 'fastest-levenshtein';
|
||||
import { NodeOperationError, nodeNameToToolName } from 'n8n-workflow';
|
||||
import type {
|
||||
FieldType,
|
||||
AssignmentCollectionValue,
|
||||
IDataObject,
|
||||
IExecuteFunctions,
|
||||
} from 'n8n-workflow';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { validateEntry } from '../../Set/v2/helpers/utils';
|
||||
import {
|
||||
CORRECTNESS_PROMPT,
|
||||
CORRECTNESS_INPUT_PROMPT,
|
||||
HELPFULNESS_PROMPT,
|
||||
HELPFULNESS_INPUT_PROMPT,
|
||||
} from '../Evaluation/CannedMetricPrompts.ee';
|
||||
|
||||
export const metricHandlers = {
|
||||
async customMetrics(this: IExecuteFunctions, i: number): Promise<IDataObject> {
|
||||
const dataToSave = this.getNodeParameter('metrics', i, {}) as AssignmentCollectionValue;
|
||||
|
||||
return Object.fromEntries(
|
||||
(dataToSave?.assignments ?? []).map((assignment) => {
|
||||
const assignmentValue =
|
||||
typeof assignment.value === 'number' ? assignment.value : Number(assignment.value);
|
||||
|
||||
if (isNaN(assignmentValue)) {
|
||||
throw new NodeOperationError(
|
||||
this.getNode(),
|
||||
`Value for '${assignment.name}' isn't a number`,
|
||||
{
|
||||
description: `It's currently '${assignment.value}'. Metrics must be numeric.`,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
if (!assignment.name || isNaN(assignmentValue)) {
|
||||
throw new NodeOperationError(this.getNode(), 'Metric name missing', {
|
||||
description: 'Make sure each metric you define has a name',
|
||||
});
|
||||
}
|
||||
|
||||
const { name, value } = validateEntry(
|
||||
assignment.name,
|
||||
assignment.type as FieldType,
|
||||
assignmentValue,
|
||||
this.getNode(),
|
||||
i,
|
||||
false,
|
||||
1,
|
||||
);
|
||||
|
||||
return [name, value];
|
||||
}),
|
||||
);
|
||||
},
|
||||
|
||||
async toolsUsed(this: IExecuteFunctions, i: number): Promise<IDataObject> {
|
||||
const expectedToolsParam = this.getNodeParameter('expectedTools', i, '');
|
||||
const expectedToolsString = (expectedToolsParam as string)?.trim() || '';
|
||||
const expectedTools: string[] = expectedToolsString
|
||||
? expectedToolsString
|
||||
.split(',')
|
||||
.map((tool) => tool.trim())
|
||||
.filter((tool) => tool !== '')
|
||||
: [];
|
||||
|
||||
const intermediateSteps = this.getNodeParameter('intermediateSteps', i, {}) as Array<{
|
||||
action: { tool: string };
|
||||
}>;
|
||||
|
||||
if (!expectedTools || expectedTools.length === 0) {
|
||||
throw new NodeOperationError(this.getNode(), 'Expected tool name missing', {
|
||||
description:
|
||||
'Make sure you add at least one expected tool name (comma-separated if multiple)',
|
||||
});
|
||||
}
|
||||
if (!intermediateSteps || !Array.isArray(intermediateSteps)) {
|
||||
throw new NodeOperationError(this.getNode(), 'Intermediate steps missing', {
|
||||
description:
|
||||
"Make sure to enable returning intermediate steps in your agent node's options, then map them in here",
|
||||
});
|
||||
}
|
||||
|
||||
// Convert user-entered tool names to the format used in intermediate steps (case-insensitive)
|
||||
const normalizedExpectedTools = expectedTools.map((tool) =>
|
||||
nodeNameToToolName(tool).toLowerCase(),
|
||||
);
|
||||
|
||||
// Calculate individual tool usage (1 if used, 0 if not used)
|
||||
const toolUsageScores = normalizedExpectedTools.map((normalizedTool) => {
|
||||
return intermediateSteps.some((step) => {
|
||||
// Handle malformed intermediate steps gracefully
|
||||
if (!step || !step.action || typeof step.action.tool !== 'string') {
|
||||
return false;
|
||||
}
|
||||
return step.action.tool.toLowerCase() === normalizedTool;
|
||||
})
|
||||
? 1
|
||||
: 0;
|
||||
});
|
||||
|
||||
// Calculate the average of all tool usage scores
|
||||
const averageScore =
|
||||
toolUsageScores.reduce((sum: number, score: number) => sum + score, 0) /
|
||||
toolUsageScores.length;
|
||||
|
||||
const metricName = this.getNodeParameter('options.metricName', i, 'Tools Used') as string;
|
||||
|
||||
return {
|
||||
[metricName]: averageScore,
|
||||
};
|
||||
},
|
||||
|
||||
async categorization(this: IExecuteFunctions, i: number): Promise<IDataObject> {
|
||||
const expectedAnswer = (this.getNodeParameter('expectedAnswer', i, '') as string)
|
||||
.toString()
|
||||
.trim();
|
||||
const actualAnswer = (this.getNodeParameter('actualAnswer', i, '') as string).toString().trim();
|
||||
|
||||
if (!expectedAnswer) {
|
||||
throw new NodeOperationError(this.getNode(), 'Expected answer is missing', {
|
||||
description: 'Make sure to fill in an expected answer',
|
||||
});
|
||||
}
|
||||
if (!actualAnswer) {
|
||||
throw new NodeOperationError(this.getNode(), 'Actual answer is missing', {
|
||||
description: 'Make sure to fill in an actual answer',
|
||||
});
|
||||
}
|
||||
|
||||
const metricName = this.getNodeParameter('options.metricName', i, 'Categorization') as string;
|
||||
|
||||
return {
|
||||
[metricName]: expectedAnswer === actualAnswer ? 1 : 0,
|
||||
};
|
||||
},
|
||||
|
||||
async stringSimilarity(this: IExecuteFunctions, i: number): Promise<IDataObject> {
|
||||
const expectedAnswer = (this.getNodeParameter('expectedAnswer', i, '') as string)
|
||||
.toString()
|
||||
.trim();
|
||||
const actualAnswer = (this.getNodeParameter('actualAnswer', i, '') as string).toString().trim();
|
||||
|
||||
if (!expectedAnswer) {
|
||||
throw new NodeOperationError(this.getNode(), 'Expected answer is missing', {
|
||||
description: 'Make sure to fill in an expected answer',
|
||||
});
|
||||
}
|
||||
if (!actualAnswer) {
|
||||
throw new NodeOperationError(this.getNode(), 'Actual answer is missing', {
|
||||
description: 'Make sure to fill in an actual answer',
|
||||
});
|
||||
}
|
||||
|
||||
const metricName = this.getNodeParameter(
|
||||
'options.metricName',
|
||||
i,
|
||||
'String similarity',
|
||||
) as string;
|
||||
|
||||
const editDistance = distance(expectedAnswer, actualAnswer);
|
||||
const longerStringLength = Math.max(expectedAnswer.length, actualAnswer.length);
|
||||
const similarity = longerStringLength === 0 ? 1 : 1 - editDistance / longerStringLength;
|
||||
|
||||
return {
|
||||
[metricName]: similarity,
|
||||
};
|
||||
},
|
||||
|
||||
async helpfulness(this: IExecuteFunctions, i: number): Promise<IDataObject> {
|
||||
const userQuery = (this.getNodeParameter('userQuery', i, '') as string).toString().trim();
|
||||
const actualAnswer = (this.getNodeParameter('actualAnswer', i, '') as string).toString().trim();
|
||||
|
||||
if (!userQuery) {
|
||||
throw new NodeOperationError(this.getNode(), 'User query is missing', {
|
||||
description: 'Make sure to fill in the user query in the User Query field',
|
||||
});
|
||||
}
|
||||
if (!actualAnswer) {
|
||||
throw new NodeOperationError(this.getNode(), 'Response is missing', {
|
||||
description: 'Make sure to fill in the response to evaluate in the Response field',
|
||||
});
|
||||
}
|
||||
|
||||
// Get the connected LLM model
|
||||
const llm = (await this.getInputConnectionData('ai_languageModel', 0)) as BaseLanguageModel;
|
||||
|
||||
if (!llm) {
|
||||
throw new NodeOperationError(this.getNode(), 'No language model connected', {
|
||||
description: 'Connect a language model to the Model input to use the helpfulness metric',
|
||||
});
|
||||
}
|
||||
|
||||
// Get the system prompt and input prompt template, using defaults if not provided
|
||||
const systemPrompt = this.getNodeParameter('prompt', i, HELPFULNESS_PROMPT) as string;
|
||||
const inputPromptTemplate = this.getNodeParameter(
|
||||
'options.inputPrompt',
|
||||
i,
|
||||
HELPFULNESS_INPUT_PROMPT[0],
|
||||
) as string;
|
||||
|
||||
// Define the expected response schema
|
||||
const responseSchema = z.object({
|
||||
extended_reasoning: z
|
||||
.string()
|
||||
.describe('detailed step-by-step analysis of the response helpfulness'),
|
||||
reasoning_summary: z.string().describe('one sentence summary of the response helpfulness'),
|
||||
score: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(5)
|
||||
.describe('integer from 1 to 5 representing the helpfulness score'),
|
||||
});
|
||||
|
||||
// Create LangChain prompt templates
|
||||
const systemMessageTemplate = SystemMessagePromptTemplate.fromTemplate('{systemPrompt}');
|
||||
const humanMessageTemplate = HumanMessagePromptTemplate.fromTemplate(inputPromptTemplate);
|
||||
|
||||
// Create the chat prompt template
|
||||
const chatPrompt = ChatPromptTemplate.fromMessages([
|
||||
systemMessageTemplate,
|
||||
humanMessageTemplate,
|
||||
]);
|
||||
|
||||
// Create chain with structured output
|
||||
if (!llm.withStructuredOutput) {
|
||||
throw new NodeOperationError(
|
||||
this.getNode(),
|
||||
'Language model does not support structured output',
|
||||
{
|
||||
description:
|
||||
'The connected language model does not support structured output. Please use a compatible model.',
|
||||
},
|
||||
);
|
||||
}
|
||||
const chain = chatPrompt.pipe(llm.withStructuredOutput(responseSchema));
|
||||
|
||||
try {
|
||||
const response = await chain.invoke({
|
||||
systemPrompt,
|
||||
user_query: userQuery,
|
||||
actual_answer: actualAnswer,
|
||||
});
|
||||
|
||||
const metricName = this.getNodeParameter('options.metricName', i, 'Helpfulness') as string;
|
||||
|
||||
// Return the score as the main metric
|
||||
return {
|
||||
[metricName]: response.score,
|
||||
};
|
||||
} catch (error) {
|
||||
throw new NodeOperationError(this.getNode(), 'Failed to evaluate helpfulness', {
|
||||
description: `Error from language model: ${error instanceof Error ? error.message : String(error)}`,
|
||||
});
|
||||
}
|
||||
},
|
||||
|
||||
async correctness(this: IExecuteFunctions, i: number): Promise<IDataObject> {
|
||||
const expectedAnswer = (this.getNodeParameter('expectedAnswer', i, '') as string)
|
||||
.toString()
|
||||
.trim();
|
||||
const actualAnswer = (this.getNodeParameter('actualAnswer', i, '') as string).toString().trim();
|
||||
|
||||
if (!expectedAnswer) {
|
||||
throw new NodeOperationError(this.getNode(), 'Expected answer is missing', {
|
||||
description: 'Make sure to fill in an expected answer',
|
||||
});
|
||||
}
|
||||
if (!actualAnswer) {
|
||||
throw new NodeOperationError(this.getNode(), 'Actual answer is missing', {
|
||||
description: 'Make sure to fill in an actual answer',
|
||||
});
|
||||
}
|
||||
|
||||
// Get the connected LLM model
|
||||
const llm = (await this.getInputConnectionData('ai_languageModel', 0)) as BaseLanguageModel;
|
||||
|
||||
if (!llm) {
|
||||
throw new NodeOperationError(this.getNode(), 'No language model connected', {
|
||||
description: 'Connect a language model to the Model input to use the correctness metric',
|
||||
});
|
||||
}
|
||||
|
||||
// Get the system prompt and input prompt template, using defaults if not provided
|
||||
const systemPrompt = this.getNodeParameter('prompt', i, CORRECTNESS_PROMPT) as string;
|
||||
const inputPromptTemplate = this.getNodeParameter(
|
||||
'options.inputPrompt',
|
||||
i,
|
||||
CORRECTNESS_INPUT_PROMPT[0],
|
||||
) as string;
|
||||
|
||||
// Define the expected response schema
|
||||
const responseSchema = z.object({
|
||||
extended_reasoning: z
|
||||
.string()
|
||||
.describe('detailed step-by-step analysis of factual accuracy and similarity'),
|
||||
reasoning_summary: z.string().describe('one sentence summary focusing on key differences'),
|
||||
score: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(5)
|
||||
.describe('integer from 1 to 5 representing the similarity score'),
|
||||
});
|
||||
|
||||
// Create LangChain prompt templates
|
||||
const systemMessageTemplate = SystemMessagePromptTemplate.fromTemplate('{systemPrompt}');
|
||||
const humanMessageTemplate = HumanMessagePromptTemplate.fromTemplate(inputPromptTemplate);
|
||||
|
||||
// Create the chat prompt template
|
||||
const chatPrompt = ChatPromptTemplate.fromMessages([
|
||||
systemMessageTemplate,
|
||||
humanMessageTemplate,
|
||||
]);
|
||||
|
||||
// Create chain with structured output
|
||||
if (!llm.withStructuredOutput) {
|
||||
throw new NodeOperationError(
|
||||
this.getNode(),
|
||||
'Language model does not support structured output',
|
||||
{
|
||||
description:
|
||||
'The connected language model does not support structured output. Please use a compatible model.',
|
||||
},
|
||||
);
|
||||
}
|
||||
const chain = chatPrompt.pipe(llm.withStructuredOutput(responseSchema));
|
||||
|
||||
try {
|
||||
const response = await chain.invoke({
|
||||
systemPrompt,
|
||||
actual_answer: actualAnswer,
|
||||
expected_answer: expectedAnswer,
|
||||
});
|
||||
|
||||
const metricName = this.getNodeParameter('options.metricName', i, 'Correctness') as string;
|
||||
|
||||
// Return the score as the main metric
|
||||
return {
|
||||
[metricName]: response.score,
|
||||
};
|
||||
} catch (error) {
|
||||
throw new NodeOperationError(this.getNode(), 'Failed to evaluate correctness', {
|
||||
description: `Error from language model: ${error instanceof Error ? error.message : String(error)}`,
|
||||
});
|
||||
}
|
||||
},
|
||||
};
|
||||
Reference in New Issue
Block a user