import cloneDeep from 'lodash/cloneDeep'; import escapeRegExp from 'lodash/escapeRegExp'; import isEqual from 'lodash/isEqual'; import mapValues from 'lodash/mapValues'; import { OperationalError } from './errors'; import type { INode, INodeParameters, NodeParameterValueType } from './interfaces'; class LazyRegExp { private regExp?: RegExp; constructor( private pattern: () => string, private flags?: string, ) {} get(): RegExp { if (!this.regExp) this.regExp = new RegExp(this.pattern(), this.flags); return this.regExp; } } type ExpressionMapping = { nodeNameInExpression: null | string; // 'abc'; originalExpression: string; // "$('abc').first().def.ghi"; replacementPrefix: string; // "$('Start').first()"; replacementName: string; // "def_ghi"; }; type ParameterMapping = undefined | T[] | { [key: PropertyKey]: ParameterMapping }; type ParameterExtractMapping = ParameterMapping; const DOT_REFERENCEABLE_JS_VARIABLE = /\w[\w\d_\$]*/; const INVALID_JS_DOT_PATH = /[^\.\w\d_\$]/; const INVALID_JS_DOT_NAME = /[^\w\d_\$]/; // These are the keys that are followed by one of DATA_ACCESSORS const ITEM_TO_DATA_ACCESSORS = [ /^first\(\)/, /^last\(\)/, /^all\(\)/, // The order here is relevant because `item` would match occurrences of `itemMatching` /^itemMatching\(\d+\)/, // We only support trivial itemMatching arguments /^item/, ]; // These we safely can convert to a normal argument const ITEM_ACCESSORS = ['params', 'isExecuted']; const DATA_ACCESSORS = ['json', 'binary']; export function hasDotNotationBannedChar(nodeName: string) { const DOT_NOTATION_BANNED_CHARS = /^(\d)|[\\ `!@#$%^&*()_+\-=[\]{};':"\\|,.<>?~]/g; return DOT_NOTATION_BANNED_CHARS.test(nodeName); } export function backslashEscape(nodeName: string) { const BACKSLASH_ESCAPABLE_CHARS = /[.*+?^${}()|[\]\\]/g; return nodeName.replace(BACKSLASH_ESCAPABLE_CHARS, (char) => `\\${char}`); } export function dollarEscape(nodeName: string) { return nodeName.replace(new RegExp('\\$', 'g'), '$$$$'); } type AccessPattern = { checkPattern: string; replacePattern: (name: string) => string; customCallback?: (expression: string, newName: string, escapedNewName: string) => string; }; const ACCESS_PATTERNS: AccessPattern[] = [ { checkPattern: '$(', replacePattern: (s) => String.raw`(\$\(['"])${s}(['"]\))`, }, { checkPattern: '$node[', replacePattern: (s) => String.raw`(\$node\[['"])${s}(['"]\])`, }, { checkPattern: '$node.', replacePattern: (s) => String.raw`(\$node\.)${s}(\.?)`, customCallback: (expression: string, newName: string, escapedNewName: string) => { if (hasDotNotationBannedChar(newName)) { const regex = new RegExp(`.${backslashEscape(newName)}( |\\.)`, 'g'); return expression.replace(regex, `["${escapedNewName}"]$1`); } return expression; }, }, { checkPattern: '$items(', replacePattern: (s) => String.raw`(\$items\(['"])${s}(['"],|['"]\))`, }, ]; export function applyAccessPatterns(expression: string, previousName: string, newName: string) { // To not run the "expensive" regex stuff when it is not needed // make a simple check first if it really contains the node-name if (!expression.includes(previousName)) return expression; // Really contains node-name (even though we do not know yet if really as $node-expression) const escapedOldName = backslashEscape(previousName); // for match const escapedNewName = dollarEscape(newName); // for replacement for (const pattern of ACCESS_PATTERNS) { if (expression.includes(pattern.checkPattern)) { expression = expression.replace( new RegExp(pattern.replacePattern(escapedOldName), 'g'), `$1${escapedNewName}$2`, ); if (pattern.customCallback) { expression = pattern.customCallback(expression, newName, escapedNewName); } } } return expression; } function convertToUniqueJsDotName(nodeName: string, allNodeNames: string[]) { let jsLegal = nodeName .replaceAll(' ', '_') .split('') .filter((x) => !INVALID_JS_DOT_NAME.test(x)) .join(''); if (nodeName === jsLegal) return jsLegal; // This accounts for theoretical cases where we collide with other reduced names // By adding our own index in the array we also avoid running into theoretical cases // where a node with the name 'ourName_27' exists for our reduced name 'ourName' // because we must have a different index, so therefore only one of us can be `ourName_27_27` // // The underscore prevents colliding e.g. index 1 with 11 while (allNodeNames.includes(jsLegal)) jsLegal += `_${allNodeNames.indexOf(nodeName)}`; return jsLegal; } function convertDataAccessorName(name: string): string { const [fnName, maybeDigits] = name.split('('); switch (fnName.toLowerCase()) { case 'item': return fnName; case 'first': case 'last': return `${fnName}Item`; case 'all': return `${fnName}Items`; } // use the digits without the ) return `${fnName}_${maybeDigits?.slice(0, -1) ?? 'unknown'}`; } function parseExpressionMapping( isolatedExpression: string, nodeNameInExpression: string | null, nodeNamePlainJs: string | null, startNodeName: string, ): ExpressionMapping | null { const splitExpr = isolatedExpression.split('.'); // This supports literal . used in the node name const dotsInName = nodeNameInExpression?.split('').filter((x) => x === '.').length ?? 0; const dotInAccessorsOffset = isolatedExpression.startsWith('$node.') ? 1 : 0; const exprStart = splitExpr.slice(0, dotInAccessorsOffset + dotsInName + 1).join('.'); const parts = splitExpr.slice(dotInAccessorsOffset + dotsInName + 1); // The calling code is expected to only handle $json expressions for the root node // As these are invalid conversions for inner nodes if (exprStart === '$json') { let partsIdx = 0; for (; partsIdx < parts.length; ++partsIdx) { if (!DOT_REFERENCEABLE_JS_VARIABLE.test(parts[partsIdx])) break; } return { nodeNameInExpression: null, originalExpression: `${exprStart}.${parts.slice(0, partsIdx + 1).join('.')}`, // $json.valid.until, but not ['x'] after replacementPrefix: `${exprStart}`, // $json replacementName: `${parts.slice(0, partsIdx).join('_')}`, // valid_until }; } if (parts.length === 0) { // If a node is referenced by name without any accessor we return a proxy that stringifies as an empty object // But it can still be validly passed to other functions // However when passed to a sub-workflow it collapses into a true empty object // So lets just abort porting this and don't touch it return null; } // Handling `all()` is very awkward since we need to pass the value as a single parameter but // can't do `$('Start').all() since it would be a different node's all const accessorPrefix = parts[0] === 'all()' ? 'first()' : parts[0]; if (ITEM_TO_DATA_ACCESSORS.some((x) => parts[0].match(x))) { if (parts.length === 1) { // this case is a literal use of the return value of `$('nodeName').first()` // Note that it's safe to rename to first, even if there is a variable of the same name // since we resolve duplicate names later in the process const originalName = parts[0]; return { nodeNameInExpression, originalExpression: `${exprStart}.${parts[0]}`, // $('abc').first() replacementPrefix: `$('${startNodeName}').${accessorPrefix}.json`, // $('Start').first().json replacementName: `${nodeNamePlainJs}_${convertDataAccessorName(originalName)}`, // nodeName_firstItem, nodeName_itemMatching_20 }; } else { if (DATA_ACCESSORS.some((x) => parts[1] === x)) { let partsIdx = 2; for (; partsIdx < parts.length; ++partsIdx) { if (!DOT_REFERENCEABLE_JS_VARIABLE.test(parts[partsIdx])) break; } // Use a separate name for anything except item to avoid users confusing their e.g. first() variables const replacementPostfix = parts[0] === 'item' ? '' : `_${convertDataAccessorName(parts[0])}`; return { nodeNameInExpression, originalExpression: `${exprStart}.${parts.slice(0, partsIdx + 1).join('.')}`, // $('abc').item.json.valid.until, but not ['x'] after replacementPrefix: `$('${startNodeName}').${accessorPrefix}.${parts[1]}`, // $('Start').item.json replacementName: parts.slice(2, partsIdx).join('_') + replacementPostfix, // valid_until, or valid_until_firstItem }; } else { // this case covers any normal ObjectExtensions functions called on the ITEM_TO_DATA_ACCESSORS entry // e.g. $('nodeName').first().toJsonObject().randomJSFunction() or $('nodeName').all().map(x => ({...x, a: 3 })) return { nodeNameInExpression, originalExpression: `${exprStart}.${parts[0]}`, // $('abc').first() replacementPrefix: `$('${startNodeName}').${accessorPrefix}.json`, // $('Start').first().json. replacementName: `${nodeNamePlainJs}_${convertDataAccessorName(parts[0])}`, // nodeName_firstItem }; } } } // This covers specific metadata functions available on nodes const itemAccessorMatch = ITEM_ACCESSORS.flatMap((x) => (x === parts[0] ? x : []))[0]; if (itemAccessorMatch !== undefined) { return { nodeNameInExpression, originalExpression: `${exprStart}.${parts[0]}`, // $('abc').isExecuted replacementPrefix: `$('${startNodeName}').first().json`, // $('Start').first() replacementName: `${nodeNamePlainJs}_${parts[0]}`, // nodeName_isExecuted }; } // If we end up here it means that: // - we have a complex `itemMatching()` case, or // - the expression should be invalid, or // - a new function was added that we're not aware of. // // In these cases let's just not touch it and keep it as is return null; } // find `$('NodeName').item.json.path.to.x` in `{{ $('NodeName').item.json.path.to.x[someFunction()] }}` function extractExpressionCandidate(expression: string, startIndex: number, endIndex: number) { const firstPartException = ITEM_TO_DATA_ACCESSORS.map((x) => x.exec(expression.slice(endIndex)), ).filter((x) => x !== null); // Note that by choosing match 0 we use `itemMatching` matches over `item` // matches by relying on the order in ITEM_TO_DATA_ACCESSORS let after_accessor_idx = endIndex + (firstPartException[0]?.[0].length ?? -1); // skip `.` to continue, but halt before other symbols like `[` in `all()[0]` if (expression[after_accessor_idx + 1] === '.') after_accessor_idx += 1; const after_accessor = expression.slice(after_accessor_idx); const firstInvalidCharMatch = INVALID_JS_DOT_PATH.exec(after_accessor); // we should at least find the }} closing the JS expressions in valid cases if (!firstInvalidCharMatch) return null; return expression.slice(startIndex, after_accessor_idx + firstInvalidCharMatch.index); } // Parse a given regex accessor match (e.g. `$('nodeName')`, `$node['nodeName']`) // and extract a potential ExpressionMapping function parseCandidateMatch( match: RegExpExecArray, expression: string, nodeNames: string[], startNodeName: string, ): ExpressionMapping | null { const startIndex = match.index; const endIndex = startIndex + match[0].length + 1; // this works because all access patterns define match groups // [fullMatch, "$('", "nodeName", "')"] const nodeNameInExpression = match[2]; // This should be invalid in theory, since the regex matches should only act // on known node names if (!nodeNames.includes(nodeNameInExpression)) return null; const candidate = extractExpressionCandidate(expression, startIndex, endIndex); if (candidate === null) return null; return parseExpressionMapping( candidate, nodeNameInExpression, convertToUniqueJsDotName(nodeNameInExpression, nodeNames), startNodeName, ); } // Handle matches of form `$json.path.to.value`, which is necessary for the selection input node function parse$jsonMatch(match: RegExpExecArray, expression: string, startNodeName: string) { const candidate = extractExpressionCandidate( expression, match.index, match.index + match[0].length + 1, ); if (candidate === null) return null; return parseExpressionMapping(candidate, null, null, startNodeName); } // Parse all references to other nodes in `expression` and return them as `ExpressionMappings` function parseReferencingExpressions( expression: string, nodeRegexps: Array, nodeNames: string[], startNodeName: string, parse$json: boolean, ): ExpressionMapping[] { const result: ExpressionMapping[] = []; for (const [pattern, regexp] of nodeRegexps) { if (!expression.includes(pattern)) continue; const matches = [...expression.matchAll(regexp.get())]; result.push( ...matches .map((x) => parseCandidateMatch(x, expression, nodeNames, startNodeName)) .filter((x) => x !== null), ); } if (parse$json && expression.includes('$json')) { for (const match of expression.matchAll(/\$json/gi)) { const res = parse$jsonMatch(match, expression, startNodeName); if (res) result.push(res); } } return result; } // Recursively apply `mapper` to all expressions in `parameterValue` function applyParameterMapping( parameterValue: NodeParameterValueType, mapper: (s: string) => ExpressionMapping[], keyOfValue?: string, ): [ParameterExtractMapping, ExpressionMapping[]] { const result: ParameterExtractMapping = {}; if (typeof parameterValue !== 'object' || parameterValue === null) { if ( typeof parameterValue === 'string' && (parameterValue.charAt(0) === '=' || keyOfValue === 'jsCode') ) { const mapping = mapper(parameterValue); return [mapping, mapping]; } return [undefined, []]; } const allMappings = []; for (const [key, value] of Object.entries(parameterValue)) { const [mapping, all] = applyParameterMapping(value as NodeParameterValueType, mapper, key); result[key] = mapping; allMappings.push(...all); } return [result, allMappings]; } // Ensure all expressions have a unique variable name function resolveDuplicates(data: ExpressionMapping[], allNodeNames: string[]) { // Map from candidate variableName to its expressionData const triggerArgumentMap = new Map(); const originalExpressionMap = new Map(); for (const mapping of data) { const { nodeNameInExpression, originalExpression, replacementPrefix } = mapping; let { replacementName } = mapping; const hasKeyAndCollides = (key: string) => { const value = triggerArgumentMap.get(key); if (!value) return false; return !isEqual(value, mapping); }; // We need both parts in the key as we may need to pass e.g. `.first()` and `.item` separately // Since we cannot pass the node itself as its proxy reduces it to an empty object const key = () => `${replacementPrefix}.${replacementName}`; // This covers a realistic case where two nodes have the same path, e.g. // $('original input').item.json.path.to.url // $('some time later in the workflow').item.json.path.to.url if (hasKeyAndCollides(key()) && nodeNameInExpression) { replacementName = `${convertToUniqueJsDotName(nodeNameInExpression, allNodeNames)}_${replacementName}`; } // This covers all other theoretical cases, like where `${nodeName}_${variable}` might clash with another variable name while (hasKeyAndCollides(key())) replacementName += '_1'; triggerArgumentMap.set(key(), { originalExpression, nodeNameInExpression, replacementName, replacementPrefix, }); originalExpressionMap.set(originalExpression, key()); } return { triggerArgumentMap, originalExpressionMap, }; } // Recursively loop through the nodeProperties and apply `parameterExtractMapping` where defined function applyExtractMappingToNode(node: INode, parameterExtractMapping: ParameterExtractMapping) { const usedMappings: ExpressionMapping[] = []; const applyMapping = ( parameters: NodeParameterValueType, mapping: ParameterExtractMapping, ): NodeParameterValueType => { if (!mapping) return parameters; if (typeof parameters !== 'object' || parameters === null) { if (Array.isArray(mapping) && typeof parameters === 'string') { for (const mapper of mapping) { if (!parameters.includes(mapper.originalExpression)) continue; parameters = parameters.replaceAll( mapper.originalExpression, `${mapper.replacementPrefix}.${mapper.replacementName}`, ); usedMappings.push(mapper); } } return parameters; } // This should be an invalid state, though an explicit check makes typings easier if (Array.isArray(mapping)) { return parameters; } if (Array.isArray(parameters) && typeof mapping === 'object' && !Array.isArray(mapping)) { return parameters.map((x, i) => applyMapping(x, mapping[i]) as INodeParameters); } return mapValues(parameters, (v, k) => applyMapping(v, mapping[k])) as NodeParameterValueType; }; const parameters = applyMapping(node.parameters, parameterExtractMapping); return { result: { ...node, parameters } as INode, usedMappings }; } // Recursively find the finalized mapping for provisional mappings function applyCanonicalMapping( mapping: ParameterExtractMapping, getCanonicalData: (m: ExpressionMapping) => ExpressionMapping | undefined, ): ParameterExtractMapping { if (!mapping) return; if (Array.isArray(mapping)) { // Sort by longest so that we don't accidentally replace part of a longer expression return mapping .map(getCanonicalData) .filter((x) => x !== undefined) .sort((a, b) => b.originalExpression.length - a.originalExpression.length); } return mapValues(mapping, (v) => applyCanonicalMapping(v, getCanonicalData)); } /** * Extracts references to nodes in `nodeNames` from the nodes in `subGraph`. * * @returns an object with two keys: * - nodes: Transformed copies of nodes in `subGraph`, ready for use in a sub-workflow * - variables: A map from variable name in the sub-workflow to the replaced expression * * @throws if the startNodeName already exists in `nodeNames` * @throws if `nodeNames` does not include all node names in `subGraph` */ export function extractReferencesInNodeExpressions( subGraph: INode[], nodeNames: string[], insertedStartName: string, graphInputNodeNames?: string[], ) { //// // STEP 1 - Validate input invariants //// const subGraphNames = subGraph.map((x) => x.name); if (subGraphNames.includes(insertedStartName)) throw new OperationalError( `StartNodeName ${insertedStartName} already exists in nodeNames: ${JSON.stringify(subGraphNames)}`, ); if (subGraphNames.some((x) => !nodeNames.includes(x))) { throw new OperationalError( `extractReferencesInNodeExpressions called with node in subGraph ${JSON.stringify(subGraphNames)} whose name is not in provided 'nodeNames' list ${JSON.stringify(nodeNames)}.`, ); } //// // STEP 2 - Compile all candidate regexp patterns //// // This looks scary for large workflows, but RegExp should support >1 million characters and // it's a very linear pattern. const namesRegexp = '(' + nodeNames.map(escapeRegExp).join('|') + ')'; const nodeRegexps = ACCESS_PATTERNS.map( (pattern) => [ pattern.checkPattern, // avoid compiling the expensive regex for rare legacy ways of accessing nodes new LazyRegExp(() => pattern.replacePattern(namesRegexp), 'g'), ] as const, ); //// // STEP 3 - Parse expressions used in parameters and build mappings //// // This map is used to change the actual expressions once resolved // The value represents fields in the actual parameters object which require change const parameterTreeMappingByNode = new Map(); // This is used to track all candidates for change, necessary for deduplication const allData = []; for (const node of subGraph) { const [parameterMapping, allMappings] = applyParameterMapping(node.parameters, (s) => parseReferencingExpressions( s, nodeRegexps, nodeNames, insertedStartName, graphInputNodeNames?.includes(node.name) ?? false, ), ); parameterTreeMappingByNode.set(node.name, parameterMapping); allData.push(...allMappings); } //// // STEP 4 - Filter out nodes in subGraph and handle name clashes //// const subGraphNodeNames = new Set(subGraphNames); const dataFromOutsideSubgraph = allData.filter( // `nodeNameInExpression` being absent implies direct access via `$json` or `$binary` (x) => !x.nodeNameInExpression || !subGraphNodeNames.has(x.nodeNameInExpression), ); const { originalExpressionMap, triggerArgumentMap } = resolveDuplicates( dataFromOutsideSubgraph, nodeNames, ); //// // STEP 5 - Apply canonical mappings to nodes and track created variables //// // triggerArgumentMap[originalExpressionMap[originalExpression]] returns its canonical object // These should never be undefined at this stage const getCanonicalData = (e: ExpressionMapping) => { const key = originalExpressionMap.get(e.originalExpression); if (!key) return undefined; return triggerArgumentMap.get(key); }; for (const [key, value] of parameterTreeMappingByNode.entries()) { parameterTreeMappingByNode.set(key, applyCanonicalMapping(value, getCanonicalData)); } const allUsedMappings = []; const output = []; for (const node of subGraph) { const { result, usedMappings } = applyExtractMappingToNode( cloneDeep(node), parameterTreeMappingByNode.get(node.name), ); allUsedMappings.push(...usedMappings); output.push(result); } const variables = new Map(allUsedMappings.map((m) => [m.replacementName, m.originalExpression])); return { nodes: output, variables }; }