From f4d096d837760d7aa3e3edd03590dda1e5412a56 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 3 Feb 2025 13:46:07 -0800 Subject: [PATCH 01/12] include backendDOMNodeId --- lib/a11y/utils.ts | 4 ++++ types/context.ts | 2 ++ 2 files changed, 6 insertions(+) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index d3fa88e0..32a5c74e 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -94,6 +94,9 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { ...(hasValidName && { name: node.name }), // Only include name if it exists and isn't empty ...(node.description && { description: node.description }), ...(node.value && { value: node.value }), + ...(node.backendDOMNodeId !== undefined && { + backendDOMNodeId: node.backendDOMNodeId, + }), }); }); @@ -150,6 +153,7 @@ export async function getAccessibilityTree( description: node.description?.value, value: node.value?.value, nodeId: node.nodeId, + backendDOMNodeId: node.backendDOMNodeId, parentId: node.parentId, childIds: node.childIds, })); diff --git a/types/context.ts b/types/context.ts index af7defcb..1383bf20 100644 --- a/types/context.ts +++ b/types/context.ts @@ -4,6 +4,7 @@ export interface AXNode { description?: { value: string }; value?: { value: string }; nodeId: string; + backendDOMNodeId?: number; parentId?: string; childIds?: string[]; } @@ -17,6 +18,7 @@ export type AccessibilityNode = { childIds?: string[]; parentId?: string; nodeId?: string; + backendDOMNodeId?: number; }; export interface TreeResult { From 40862fd0f24bc5c31bdd55640c001d278ab1c98a Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 3 Feb 2025 13:51:16 -0800 Subject: [PATCH 02/12] skip ax nodeId if negative --- lib/a11y/utils.ts | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 32a5c74e..89c0878f 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -13,7 +13,9 @@ export function formatSimplifiedTree( level = 0, ): string { const indent = " ".repeat(level); - let result = `${indent}[${node.nodeId}] ${node.role}${node.name ? `: ${node.name}` : ""}\n`; + let result = `${indent}[${node.nodeId}] [${node.backendDOMNodeId}] ${node.role}${ + node.name ? `: ${node.name}` : "" + }\n`; if (node.children?.length) { result += node.children @@ -24,10 +26,9 @@ export function formatSimplifiedTree( } /** - * Helper function to remove or collapse unnecessary structural nodes - * Handles three cases: + * Helper function to remove or collapse unnecessary structural nodes: * 1. Removes generic/none nodes with no children - * 2. Collapses generic/none nodes with single child + * 2. Collapses generic/none nodes with a single child * 3. Keeps generic/none nodes with multiple children but cleans their subtrees */ function cleanStructuralNodes( @@ -35,6 +36,7 @@ function cleanStructuralNodes( ): AccessibilityNode | null { // Base case: leaf node if (!node.children) { + // Remove if role is generic/none return node.role === "generic" || node.role === "none" ? null : node; } @@ -46,17 +48,17 @@ function cleanStructuralNodes( // Handle generic/none nodes specially if (node.role === "generic" || node.role === "none") { if (cleanedChildren.length === 1) { - // Collapse single-child generic nodes + // Collapse single-child generic/none nodes return cleanedChildren[0]; } else if (cleanedChildren.length > 1) { - // Keep generic nodes with multiple children + // Keep generic/none nodes with multiple children return { ...node, children: cleanedChildren }; } - // Remove generic nodes with no children + // Remove generic/none node with no children return null; } - // For non-generic nodes, keep them if they have children after cleaning + // For non-generic nodes, keep them if they still have children return cleanedChildren.length > 0 ? { ...node, children: cleanedChildren } : node; @@ -65,22 +67,25 @@ function cleanStructuralNodes( /** * Builds a hierarchical tree structure from a flat array of accessibility nodes. * The function processes nodes in multiple passes to create a clean, meaningful tree. - * @param nodes - Flat array of accessibility nodes from the CDP - * @returns Object containing both the tree structure and a simplified string representation */ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { // Map to store processed nodes for quick lookup const nodeMap = new Map(); - // First pass: Create nodes that are meaningful - // We only keep nodes that either have a name or children to avoid cluttering the tree + // --- First pass: Create nodes that are meaningful (and skip negative IDs) --- nodes.forEach((node) => { + // Skip node if its ID is negative (e.g., "-1000002014") + const nodeIdValue = parseInt(node.nodeId, 10); + if (nodeIdValue < 0) { + return; + } + const hasChildren = node.childIds && node.childIds.length > 0; const hasValidName = node.name && node.name.trim() !== ""; const isInteractive = node.role !== "none" && node.role !== "generic" && - node.role !== "InlineTextBox"; //add other interactive roles here + node.role !== "InlineTextBox"; // Add other interactive roles here as needed // Include nodes that are either named, have children, or are interactive if (!hasValidName && !hasChildren && !isInteractive) { @@ -91,7 +96,7 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { nodeMap.set(node.nodeId, { role: node.role, nodeId: node.nodeId, - ...(hasValidName && { name: node.name }), // Only include name if it exists and isn't empty + ...(hasValidName && { name: node.name }), ...(node.description && { description: node.description }), ...(node.value && { value: node.value }), ...(node.backendDOMNodeId !== undefined && { @@ -121,7 +126,7 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { .filter((node) => !node.parentId && nodeMap.has(node.nodeId)) // Get root nodes .map((node) => nodeMap.get(node.nodeId)) .filter(Boolean) - .map((node) => cleanStructuralNodes(node)) + .map((node) => cleanStructuralNodes(node!)) .filter(Boolean) as AccessibilityNode[]; // Generate a simplified string representation of the tree @@ -135,6 +140,9 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { }; } +/** + * Retrieves the full accessibility tree via CDP and transforms it into a hierarchical structure. + */ export async function getAccessibilityTree( page: StagehandPage, logger: (logLine: LogLine) => void, @@ -142,11 +150,12 @@ export async function getAccessibilityTree( await page.enableCDP("Accessibility"); try { + // Fetch the full accessibility tree from Chrome DevTools Protocol const { nodes } = await page.sendCDP<{ nodes: AXNode[] }>( "Accessibility.getFullAXTree", ); - // Extract specific sources + // Extract specific sources (including backendDOMNodeId) const sources = nodes.map((node) => ({ role: node.role?.value, name: node.name?.value, @@ -157,6 +166,7 @@ export async function getAccessibilityTree( parentId: node.parentId, childIds: node.childIds, })); + // Transform into hierarchical structure const hierarchicalTree = buildHierarchicalTree(sources); From 79a2b1f180bad088733aa0ebad10b5e0d5e982ac Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 3 Feb 2025 19:00:29 -0800 Subject: [PATCH 03/12] replace role with dom tag name if none or generic --- lib/a11y/utils.ts | 98 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 89c0878f..24b03383 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -13,7 +13,7 @@ export function formatSimplifiedTree( level = 0, ): string { const indent = " ".repeat(level); - let result = `${indent}[${node.nodeId}] [${node.backendDOMNodeId}] ${node.role}${ + let result = `${indent}[${node.nodeId}] ${node.role}${ node.name ? `: ${node.name}` : "" }\n`; @@ -102,6 +102,7 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { ...(node.backendDOMNodeId !== undefined && { backendDOMNodeId: node.backendDOMNodeId, }), + ...(node.xpath && { xpath: node.xpath }), }); }); @@ -146,7 +147,7 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { export async function getAccessibilityTree( page: StagehandPage, logger: (logLine: LogLine) => void, -) { +): Promise { await page.enableCDP("Accessibility"); try { @@ -155,7 +156,97 @@ export async function getAccessibilityTree( "Accessibility.getFullAXTree", ); - // Extract specific sources (including backendDOMNodeId) + // For each node with a backendDOMNodeId, resolve it + for (const node of nodes) { + // Convert the AX role to a plain string (since .value is optional) + const role = node.role?.value; + + if (node.backendDOMNodeId !== undefined) { + try { + // 1) Resolve the node to a Runtime object + const { object } = await page.sendCDP<{ + object: { objectId?: string }; + }>("DOM.resolveNode", { + backendNodeId: node.backendDOMNodeId, + }); + + if (object && object.objectId) { + // 2) If valid, fetch the XPath (optional) + try { + const xpath = await getXPathByResolvedObjectId( + await page.getCDPClient(), + object.objectId, + ); + node.xpath = xpath; + } catch (xpathError) { + logger({ + category: "observation", + message: `Error fetching XPath for node ${node.backendDOMNodeId}`, + level: 2, + auxiliary: { + error: { + value: xpathError.message, + type: "string", + }, + }, + }); + } + + // 3) If role is 'generic' or 'none' (or name is missing), + // we call a function on the element to get its tagName. + if (role === "generic" || role === "none") { + try { + const { result } = await page.sendCDP<{ + result: { type: string; value?: string }; + }>("Runtime.callFunctionOn", { + objectId: object.objectId, + functionDeclaration: ` + function() { + // "this" is the DOM element. Return its tagName in lowercase + return this.tagName ? this.tagName.toLowerCase() : ""; + } + `, + returnByValue: true, + }); + + // If we got a tagName, store it in node.name + if (result?.value) { + // Overwrite node.role, so it won't be "generic" or empty + node.role = { value: result.value }; + } + } catch (tagNameError) { + // If we can't resolve the tagName, log and skip + logger({ + category: "observation", + message: `Could not fetch tagName for node ${node.backendDOMNodeId}`, + level: 2, + auxiliary: { + error: { + value: tagNameError.message, + type: "string", + }, + }, + }); + } + } + } + } catch (resolveError) { + logger({ + category: "observation", + message: `Could not resolve DOM node ID ${node.backendDOMNodeId}`, + level: 2, + auxiliary: { + error: { + value: resolveError.message, + type: "string", + }, + }, + }); + } + } + } + + // Now build the final hierarchical structure (including updated .name if replaced by tagName) const sources = nodes.map((node) => ({ role: node.role?.value, name: node.name?.value, @@ -165,6 +256,7 @@ export async function getAccessibilityTree( backendDOMNodeId: node.backendDOMNodeId, parentId: node.parentId, childIds: node.childIds, + xpath: node.xpath, })); // Transform into hierarchical structure From 8212480f95095733e0368a2672c8c47ee4a4050a Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 3 Feb 2025 19:07:07 -0800 Subject: [PATCH 04/12] add xpath to AXNode type --- types/context.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/types/context.ts b/types/context.ts index 1383bf20..a1177a10 100644 --- a/types/context.ts +++ b/types/context.ts @@ -7,6 +7,7 @@ export interface AXNode { backendDOMNodeId?: number; parentId?: string; childIds?: string[]; + xpath?: string; } export type AccessibilityNode = { @@ -19,6 +20,7 @@ export type AccessibilityNode = { parentId?: string; nodeId?: string; backendDOMNodeId?: number; + xpath?: string; }; export interface TreeResult { From 8ec593b5e85d3582e85e9ac46acbfc6ed5d57c61 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 3 Feb 2025 19:22:58 -0800 Subject: [PATCH 05/12] revert unnecessary changed lines --- lib/a11y/utils.ts | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 24b03383..87ddb1ea 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -26,9 +26,10 @@ export function formatSimplifiedTree( } /** - * Helper function to remove or collapse unnecessary structural nodes: + * Helper function to remove or collapse unnecessary structural nodes + * Handles three cases: * 1. Removes generic/none nodes with no children - * 2. Collapses generic/none nodes with a single child + * 2. Collapses generic/none nodes with single child * 3. Keeps generic/none nodes with multiple children but cleans their subtrees */ function cleanStructuralNodes( @@ -36,7 +37,6 @@ function cleanStructuralNodes( ): AccessibilityNode | null { // Base case: leaf node if (!node.children) { - // Remove if role is generic/none return node.role === "generic" || node.role === "none" ? null : node; } @@ -48,13 +48,13 @@ function cleanStructuralNodes( // Handle generic/none nodes specially if (node.role === "generic" || node.role === "none") { if (cleanedChildren.length === 1) { - // Collapse single-child generic/none nodes + // Collapse single-child generic nodes return cleanedChildren[0]; } else if (cleanedChildren.length > 1) { - // Keep generic/none nodes with multiple children + // Keep generic nodes with multiple children return { ...node, children: cleanedChildren }; } - // Remove generic/none node with no children + // Remove generic nodes with no children return null; } @@ -67,12 +67,15 @@ function cleanStructuralNodes( /** * Builds a hierarchical tree structure from a flat array of accessibility nodes. * The function processes nodes in multiple passes to create a clean, meaningful tree. + * @param nodes - Flat array of accessibility nodes from the CDP + * @returns Object containing both the tree structure and a simplified string representation */ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { // Map to store processed nodes for quick lookup const nodeMap = new Map(); - // --- First pass: Create nodes that are meaningful (and skip negative IDs) --- + // First pass: Create nodes that are meaningful + // We only keep nodes that either have a name or children to avoid cluttering the tree nodes.forEach((node) => { // Skip node if its ID is negative (e.g., "-1000002014") const nodeIdValue = parseInt(node.nodeId, 10); @@ -85,7 +88,7 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { const isInteractive = node.role !== "none" && node.role !== "generic" && - node.role !== "InlineTextBox"; // Add other interactive roles here as needed + node.role !== "InlineTextBox"; //add other interactive roles here // Include nodes that are either named, have children, or are interactive if (!hasValidName && !hasChildren && !isInteractive) { @@ -96,7 +99,7 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { nodeMap.set(node.nodeId, { role: node.role, nodeId: node.nodeId, - ...(hasValidName && { name: node.name }), + ...(hasValidName && { name: node.name }), // Only include name if it exists and isn't empty ...(node.description && { description: node.description }), ...(node.value && { value: node.value }), ...(node.backendDOMNodeId !== undefined && { @@ -127,7 +130,7 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { .filter((node) => !node.parentId && nodeMap.has(node.nodeId)) // Get root nodes .map((node) => nodeMap.get(node.nodeId)) .filter(Boolean) - .map((node) => cleanStructuralNodes(node!)) + .map((node) => cleanStructuralNodes(node)) .filter(Boolean) as AccessibilityNode[]; // Generate a simplified string representation of the tree @@ -258,7 +261,6 @@ export async function getAccessibilityTree( childIds: node.childIds, xpath: node.xpath, })); - // Transform into hierarchical structure const hierarchicalTree = buildHierarchicalTree(sources); From 22aee72a23edeb16bbab676d0c9d71132823ead8 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 3 Feb 2025 19:24:29 -0800 Subject: [PATCH 06/12] revert more unnecessary changed lines --- lib/a11y/utils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 87ddb1ea..b105dc0b 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -58,7 +58,7 @@ function cleanStructuralNodes( return null; } - // For non-generic nodes, keep them if they still have children + // For non-generic nodes, keep them if they have children after cleaning return cleanedChildren.length > 0 ? { ...node, children: cleanedChildren } : node; From b04d7c1afeea7eafe1b512a7769d1dd527a36b71 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Mon, 3 Feb 2025 19:25:35 -0800 Subject: [PATCH 07/12] changeset --- .changeset/chilled-apes-sneeze.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/chilled-apes-sneeze.md diff --git a/.changeset/chilled-apes-sneeze.md b/.changeset/chilled-apes-sneeze.md new file mode 100644 index 00000000..8373d35e --- /dev/null +++ b/.changeset/chilled-apes-sneeze.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +create a11y + dom hybrid input for observe From 90e645cc22e8c96b996fdc1bf5012d941211b719 Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 5 Feb 2025 13:28:52 -0800 Subject: [PATCH 08/12] speedup --- lib/a11y/utils.ts | 221 ++++++++++++++++----------------- lib/handlers/observeHandler.ts | 3 +- lib/prompt.ts | 2 +- 3 files changed, 112 insertions(+), 114 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 5f5f347e..991fdf36 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -32,23 +32,92 @@ export function formatSimplifiedTree( * 2. Collapses generic/none nodes with single child * 3. Keeps generic/none nodes with multiple children but cleans their subtrees */ -function cleanStructuralNodes( +async function cleanStructuralNodes( node: AccessibilityNode, -): AccessibilityNode | null { + page?: StagehandPage, + logger?: (logLine: LogLine) => void, +): Promise { // Filter out nodes with negative IDs if (node.nodeId && parseInt(node.nodeId) < 0) { return null; } + // If we have a backendDOMNodeId and the node is generic/none, try to get its tagName + if ( + page && + logger && + node.backendDOMNodeId !== undefined && + (node.role === "generic" || node.role === "none") + ) { + try { + // 1) Resolve the node to a Runtime object + const { object } = await page.sendCDP<{ + object: { objectId?: string }; + }>("DOM.resolveNode", { + backendNodeId: node.backendDOMNodeId, + }); + + if (object && object.objectId) { + try { + // Get the tagName for the node + const { result } = await page.sendCDP<{ + result: { type: string; value?: string }; + }>("Runtime.callFunctionOn", { + objectId: object.objectId, + functionDeclaration: ` + function() { + return this.tagName ? this.tagName.toLowerCase() : ""; + } + `, + returnByValue: true, + }); + + // If we got a tagName, update the node's role + if (result?.value) { + node.role = result.value; + } + } catch (tagNameError) { + logger({ + category: "observation", + message: `Could not fetch tagName for node ${node.backendDOMNodeId}`, + level: 2, + auxiliary: { + error: { + value: tagNameError.message, + type: "string", + }, + }, + }); + } + } + } catch (resolveError) { + logger({ + category: "observation", + message: `Could not resolve DOM node ID ${node.backendDOMNodeId}`, + level: 2, + auxiliary: { + error: { + value: resolveError.message, + type: "string", + }, + }, + }); + } + } + // Base case: leaf node if (!node.children) { return node.role === "generic" || node.role === "none" ? null : node; } // Recursively clean children - const cleanedChildren = node.children - .map((child) => cleanStructuralNodes(child)) - .filter(Boolean) as AccessibilityNode[]; + const cleanedChildrenPromises = node.children.map((child) => + cleanStructuralNodes(child, page, logger), + ); + const resolvedChildren = await Promise.all(cleanedChildrenPromises); + const cleanedChildren = resolvedChildren.filter( + (child): child is AccessibilityNode => child !== null, + ); // Handle generic/none nodes specially if (node.role === "generic" || node.role === "none") { @@ -75,7 +144,11 @@ function cleanStructuralNodes( * @param nodes - Flat array of accessibility nodes from the CDP * @returns Object containing both the tree structure and a simplified string representation */ -export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { +export async function buildHierarchicalTree( + nodes: AccessibilityNode[], + page?: StagehandPage, + logger?: (logLine: LogLine) => void, +): Promise { // Map to store processed nodes for quick lookup const nodeMap = new Map(); @@ -131,13 +204,18 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { }); // Final pass: Build the root-level tree and clean up structural nodes - const finalTree = nodes + const rootNodes = nodes .filter((node) => !node.parentId && nodeMap.has(node.nodeId)) // Get root nodes .map((node) => nodeMap.get(node.nodeId)) - .filter(Boolean) - .map((node) => cleanStructuralNodes(node)) .filter(Boolean) as AccessibilityNode[]; + const cleanedTreePromises = rootNodes.map((node) => + cleanStructuralNodes(node, page, logger), + ); + const finalTree = (await Promise.all(cleanedTreePromises)).filter( + Boolean, + ) as AccessibilityNode[]; + // Generate a simplified string representation of the tree const simplifiedFormat = finalTree .map((node) => formatSimplifiedTree(node)) @@ -163,111 +241,32 @@ export async function getAccessibilityTree( const { nodes } = await page.sendCDP<{ nodes: AXNode[] }>( "Accessibility.getFullAXTree", ); + const startTime = Date.now(); - // For each node with a backendDOMNodeId, resolve it - for (const node of nodes) { - // Convert the AX role to a plain string (since .value is optional) - const role = node.role?.value; - - if (node.backendDOMNodeId !== undefined) { - try { - // 1) Resolve the node to a Runtime object - const { object } = await page.sendCDP<{ - object: { objectId?: string }; - }>("DOM.resolveNode", { - backendNodeId: node.backendDOMNodeId, - }); + // Transform into hierarchical structure + const hierarchicalTree = await buildHierarchicalTree( + nodes.map((node) => ({ + role: node.role?.value, + name: node.name?.value, + description: node.description?.value, + value: node.value?.value, + nodeId: node.nodeId, + backendDOMNodeId: node.backendDOMNodeId, + parentId: node.parentId, + childIds: node.childIds, + xpath: node.xpath, + })), + page, + logger, + ); - if (object && object.objectId) { - // 2) If valid, fetch the XPath (optional) - try { - const xpath = await getXPathByResolvedObjectId( - await page.getCDPClient(), - object.objectId, - ); - node.xpath = xpath; - } catch (xpathError) { - logger({ - category: "observation", - message: `Error fetching XPath for node ${node.backendDOMNodeId}`, - level: 2, - auxiliary: { - error: { - value: xpathError.message, - type: "string", - }, - }, - }); - } - - // 3) If role is 'generic' or 'none' (or name is missing), - // we call a function on the element to get its tagName. - if (role === "generic" || role === "none") { - try { - const { result } = await page.sendCDP<{ - result: { type: string; value?: string }; - }>("Runtime.callFunctionOn", { - objectId: object.objectId, - functionDeclaration: ` - function() { - // "this" is the DOM element. Return its tagName in lowercase - return this.tagName ? this.tagName.toLowerCase() : ""; - } - `, - returnByValue: true, - }); - - // If we got a tagName, store it in node.name - if (result?.value) { - // Overwrite node.role, so it won't be "generic" or empty - node.role = { value: result.value }; - } - } catch (tagNameError) { - // If we can't resolve the tagName, log and skip - logger({ - category: "observation", - message: `Could not fetch tagName for node ${node.backendDOMNodeId}`, - level: 2, - auxiliary: { - error: { - value: tagNameError.message, - type: "string", - }, - }, - }); - } - } - } - } catch (resolveError) { - logger({ - category: "observation", - message: `Could not resolve DOM node ID ${node.backendDOMNodeId}`, - level: 2, - auxiliary: { - error: { - value: resolveError.message, - type: "string", - }, - }, - }); - } - } - } + logger({ + category: "observation", + message: `got accessibility tree in ${Date.now() - startTime}ms`, + level: 1, + }); - // Now build the final hierarchical structure (including updated .name if replaced by tagName) - const sources = nodes.map((node) => ({ - role: node.role?.value, - name: node.name?.value, - description: node.description?.value, - value: node.value?.value, - nodeId: node.nodeId, - backendDOMNodeId: node.backendDOMNodeId, - parentId: node.parentId, - childIds: node.childIds, - xpath: node.xpath, - })); - // Transform into hierarchical structure - const hierarchicalTree = buildHierarchicalTree(sources); + // fs.writeFileSync("../hybrid_tree.txt", hierarchicalTree.simplified); return hierarchicalTree; } catch (error) { diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 9f73caa3..65defd5b 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -108,6 +108,7 @@ export class StagehandObserveHandler { isUsingAccessibilityTree: useAccessibilityTree, returnAction, }); + const elementsWithSelectors = await Promise.all( observationResponse.elements.map(async (element) => { const { elementId, ...rest } = element; @@ -137,7 +138,6 @@ export class StagehandObserveHandler { message: `Invalid object ID returned for element: ${elementId}`, level: 1, }); - return null; } const xpath = await getXPathByResolvedObjectId( @@ -151,7 +151,6 @@ export class StagehandObserveHandler { message: `Empty xpath returned for element: ${elementId}`, level: 1, }); - return null; } return { diff --git a/lib/prompt.ts b/lib/prompt.ts index 3dc7eabb..1ce63581 100644 --- a/lib/prompt.ts +++ b/lib/prompt.ts @@ -361,7 +361,7 @@ You will be given: 1. a instruction of elements to observe 2. ${ isUsingAccessibilityTree - ? "a hierarchical accessibility tree showing the semantic structure of the page" + ? "a hierarchical accessibility tree showing the semantic structure of the page. The tree is a hybrid of the DOM and the accessibility tree." : "a numbered list of possible elements" } From 8b6021229a913eced5c50a8c5dacffd02b888321 Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 5 Feb 2025 13:31:28 -0800 Subject: [PATCH 09/12] prettier --- lib/handlers/observeHandler.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 65defd5b..696165bd 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -108,7 +108,7 @@ export class StagehandObserveHandler { isUsingAccessibilityTree: useAccessibilityTree, returnAction, }); - + const elementsWithSelectors = await Promise.all( observationResponse.elements.map(async (element) => { const { elementId, ...rest } = element; From b2fbf4f7681d28683c649106ef5161e49ffea6d7 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Wed, 5 Feb 2025 14:38:03 -0800 Subject: [PATCH 10/12] prune before updating roles --- lib/a11y/utils.ts | 67 +++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 991fdf36..49451f2d 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -31,18 +31,50 @@ export function formatSimplifiedTree( * 1. Removes generic/none nodes with no children * 2. Collapses generic/none nodes with single child * 3. Keeps generic/none nodes with multiple children but cleans their subtrees + * and attempts to resolve their role to a DOM tag name */ async function cleanStructuralNodes( node: AccessibilityNode, page?: StagehandPage, logger?: (logLine: LogLine) => void, ): Promise { - // Filter out nodes with negative IDs + // 1) Filter out nodes with negative IDs if (node.nodeId && parseInt(node.nodeId) < 0) { return null; } - // If we have a backendDOMNodeId and the node is generic/none, try to get its tagName + // 2) Base case: if no children exist, this is effectively a leaf. + // If it's "generic" or "none", we remove it; otherwise, keep it. + if (!node.children || node.children.length === 0) { + return node.role === "generic" || node.role === "none" ? null : node; + } + + // 3) Recursively clean children + const cleanedChildrenPromises = node.children.map((child) => + cleanStructuralNodes(child, page, logger), + ); + const resolvedChildren = await Promise.all(cleanedChildrenPromises); + const cleanedChildren = resolvedChildren.filter( + (child): child is AccessibilityNode => child !== null, + ); + + // 4) **Prune** "generic" or "none" nodes first, + // before resolving them to their tag names. + if (node.role === "generic" || node.role === "none") { + if (cleanedChildren.length === 1) { + // Collapse single-child structural node + return cleanedChildren[0]; + } else if (cleanedChildren.length === 0) { + // Remove empty structural node + return null; + } + // If we have multiple children, we keep this node as a container. + // We'll update role below if needed. + } + + // 5) If we still have a "generic"/"none" node after pruning + // (i.e., because it had multiple children), now we try + // to resolve and replace its role with the DOM tag name. if ( page && logger && @@ -50,7 +82,6 @@ async function cleanStructuralNodes( (node.role === "generic" || node.role === "none") ) { try { - // 1) Resolve the node to a Runtime object const { object } = await page.sendCDP<{ object: { objectId?: string }; }>("DOM.resolveNode", { @@ -105,34 +136,8 @@ async function cleanStructuralNodes( } } - // Base case: leaf node - if (!node.children) { - return node.role === "generic" || node.role === "none" ? null : node; - } - - // Recursively clean children - const cleanedChildrenPromises = node.children.map((child) => - cleanStructuralNodes(child, page, logger), - ); - const resolvedChildren = await Promise.all(cleanedChildrenPromises); - const cleanedChildren = resolvedChildren.filter( - (child): child is AccessibilityNode => child !== null, - ); - - // Handle generic/none nodes specially - if (node.role === "generic" || node.role === "none") { - if (cleanedChildren.length === 1) { - // Collapse single-child generic nodes - return cleanedChildren[0]; - } else if (cleanedChildren.length > 1) { - // Keep generic nodes with multiple children - return { ...node, children: cleanedChildren }; - } - // Remove generic nodes with no children - return null; - } - - // For non-generic nodes, keep them if they have children after cleaning + // 6) Return the updated node. + // If it has children, update them; otherwise keep it as-is. return cleanedChildren.length > 0 ? { ...node, children: cleanedChildren } : node; From cbd9eb1ab021fbe33857841235da01689ca253ef Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Wed, 5 Feb 2025 18:07:11 -0800 Subject: [PATCH 11/12] take xpath out of AXnode type --- lib/a11y/utils.ts | 2 -- types/context.ts | 2 -- 2 files changed, 4 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 49451f2d..f9dd4ef7 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -188,7 +188,6 @@ export async function buildHierarchicalTree( ...(node.backendDOMNodeId !== undefined && { backendDOMNodeId: node.backendDOMNodeId, }), - ...(node.xpath && { xpath: node.xpath }), }); }); @@ -259,7 +258,6 @@ export async function getAccessibilityTree( backendDOMNodeId: node.backendDOMNodeId, parentId: node.parentId, childIds: node.childIds, - xpath: node.xpath, })), page, logger, diff --git a/types/context.ts b/types/context.ts index a1177a10..1383bf20 100644 --- a/types/context.ts +++ b/types/context.ts @@ -7,7 +7,6 @@ export interface AXNode { backendDOMNodeId?: number; parentId?: string; childIds?: string[]; - xpath?: string; } export type AccessibilityNode = { @@ -20,7 +19,6 @@ export type AccessibilityNode = { parentId?: string; nodeId?: string; backendDOMNodeId?: number; - xpath?: string; }; export interface TreeResult { From 5a7bd4959a03fee8a9eae9d394c7c8f73457e623 Mon Sep 17 00:00:00 2001 From: seanmcguire12 Date: Thu, 6 Feb 2025 14:18:34 -0800 Subject: [PATCH 12/12] rm commented code --- lib/a11y/utils.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index f9dd4ef7..a8345cb4 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -269,8 +269,6 @@ export async function getAccessibilityTree( level: 1, }); - // fs.writeFileSync("../hybrid_tree.txt", hierarchicalTree.simplified); - return hierarchicalTree; } catch (error) { logger({ @@ -368,7 +366,6 @@ export async function performPlaywrightMethod( method: string, args: unknown[], xpath: string, - // domSettleTimeoutMs?: number, ) { const locator = stagehandPage.locator(`xpath=${xpath}`).first(); const initialUrl = stagehandPage.url(); @@ -613,7 +610,6 @@ export async function performPlaywrightMethod( await newOpenedTab.close(); await stagehandPage.goto(newOpenedTab.url()); await stagehandPage.waitForLoadState("domcontentloaded"); - // await stagehandPage._waitForSettledDom(domSettleTimeoutMs); } await Promise.race([ @@ -674,6 +670,4 @@ export async function performPlaywrightMethod( `Method ${method} not supported`, ); } - - // await stagehandPage._waitForSettledDom(domSettleTimeoutMs); }