-
Notifications
You must be signed in to change notification settings - Fork 364
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hybrid of a11y tree & DOM for input to observe #459
Changes from 8 commits
f4d096d
40862fd
79a2b1f
8212480
8ec593b
22aee72
b04d7c1
332b864
7c46416
90e645c
8b60212
b2fbf4f
cbd9eb1
5a7bd49
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"@browserbasehq/stagehand": patch | ||
--- | ||
|
||
create a11y + dom hybrid input for observe |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,9 @@ export function formatSimplifiedTree( | |
level = 0, | ||
): string { | ||
const indent = " ".repeat(level); | ||
let result = `${indent}[${node.nodeId}] ${node.role}${node.name ? `: ${node.name}` : ""}\n`; | ||
let result = `${indent}[${node.nodeId}] ${node.role}${ | ||
node.name ? `: ${node.name}` : "" | ||
}\n`; | ||
|
||
if (node.children?.length) { | ||
result += node.children | ||
|
@@ -80,6 +82,12 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { | |
// First pass: Create nodes that are meaningful | ||
// We only keep nodes that either have a name or children to avoid cluttering the tree | ||
nodes.forEach((node) => { | ||
// Skip node if its ID is negative (e.g., "-1000002014") | ||
const nodeIdValue = parseInt(node.nodeId, 10); | ||
if (nodeIdValue < 0) { | ||
return; | ||
} | ||
|
||
const hasChildren = node.childIds && node.childIds.length > 0; | ||
const hasValidName = node.name && node.name.trim() !== ""; | ||
const isInteractive = | ||
|
@@ -99,6 +107,10 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { | |
...(hasValidName && { name: node.name }), // Only include name if it exists and isn't empty | ||
...(node.description && { description: node.description }), | ||
...(node.value && { value: node.value }), | ||
...(node.backendDOMNodeId !== undefined && { | ||
backendDOMNodeId: node.backendDOMNodeId, | ||
}), | ||
...(node.xpath && { xpath: node.xpath }), | ||
}); | ||
}); | ||
|
||
|
@@ -137,26 +149,122 @@ export function buildHierarchicalTree(nodes: AccessibilityNode[]): TreeResult { | |
}; | ||
} | ||
|
||
/** | ||
* Retrieves the full accessibility tree via CDP and transforms it into a hierarchical structure. | ||
*/ | ||
export async function getAccessibilityTree( | ||
page: StagehandPage, | ||
logger: (logLine: LogLine) => void, | ||
) { | ||
): Promise<TreeResult> { | ||
await page.enableCDP("Accessibility"); | ||
|
||
try { | ||
// Fetch the full accessibility tree from Chrome DevTools Protocol | ||
const { nodes } = await page.sendCDP<{ nodes: AXNode[] }>( | ||
"Accessibility.getFullAXTree", | ||
); | ||
|
||
// Extract specific sources | ||
// For each node with a backendDOMNodeId, resolve it | ||
for (const node of nodes) { | ||
// Convert the AX role to a plain string (since .value is optional) | ||
const role = node.role?.value; | ||
|
||
if (node.backendDOMNodeId !== undefined) { | ||
try { | ||
// 1) Resolve the node to a Runtime object | ||
const { object } = await page.sendCDP<{ | ||
seanmcguire12 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
object: { objectId?: string }; | ||
}>("DOM.resolveNode", { | ||
backendNodeId: node.backendDOMNodeId, | ||
}); | ||
|
||
if (object && object.objectId) { | ||
// 2) If valid, fetch the XPath (optional) | ||
try { | ||
const xpath = await getXPathByResolvedObjectId( | ||
await page.getCDPClient(), | ||
object.objectId, | ||
); | ||
node.xpath = xpath; | ||
} catch (xpathError) { | ||
logger({ | ||
category: "observation", | ||
message: `Error fetching XPath for node ${node.backendDOMNodeId}`, | ||
level: 2, | ||
auxiliary: { | ||
error: { | ||
value: xpathError.message, | ||
type: "string", | ||
}, | ||
}, | ||
}); | ||
} | ||
|
||
// 3) If role is 'generic' or 'none' (or name is missing), | ||
// we call a function on the element to get its tagName. | ||
if (role === "generic" || role === "none") { | ||
try { | ||
const { result } = await page.sendCDP<{ | ||
result: { type: string; value?: string }; | ||
}>("Runtime.callFunctionOn", { | ||
objectId: object.objectId, | ||
functionDeclaration: ` | ||
function() { | ||
// "this" is the DOM element. Return its tagName in lowercase | ||
return this.tagName ? this.tagName.toLowerCase() : ""; | ||
} | ||
`, | ||
returnByValue: true, | ||
}); | ||
|
||
// If we got a tagName, store it in node.name | ||
if (result?.value) { | ||
// Overwrite node.role, so it won't be "generic" or empty | ||
node.role = { value: result.value }; | ||
} | ||
} catch (tagNameError) { | ||
// If we can't resolve the tagName, log and skip | ||
logger({ | ||
category: "observation", | ||
message: `Could not fetch tagName for node ${node.backendDOMNodeId}`, | ||
level: 2, | ||
auxiliary: { | ||
error: { | ||
value: tagNameError.message, | ||
type: "string", | ||
}, | ||
}, | ||
}); | ||
} | ||
} | ||
} | ||
} catch (resolveError) { | ||
logger({ | ||
category: "observation", | ||
message: `Could not resolve DOM node ID ${node.backendDOMNodeId}`, | ||
level: 2, | ||
auxiliary: { | ||
error: { | ||
value: resolveError.message, | ||
type: "string", | ||
}, | ||
}, | ||
}); | ||
} | ||
} | ||
} | ||
|
||
// Now build the final hierarchical structure (including updated .name if replaced by tagName) | ||
const sources = nodes.map((node) => ({ | ||
role: node.role?.value, | ||
name: node.name?.value, | ||
description: node.description?.value, | ||
value: node.value?.value, | ||
nodeId: node.nodeId, | ||
backendDOMNodeId: node.backendDOMNodeId, | ||
parentId: node.parentId, | ||
childIds: node.childIds, | ||
xpath: node.xpath, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for large websites it makes sense to compute the xpaths at runtime for the candidates selected by observe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah nice catch, we arent precomputing them, i'll take them out of the AXNode type since they are unused |
||
})); | ||
// Transform into hierarchical structure | ||
const hierarchicalTree = buildHierarchicalTree(sources); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isnt backendDOMNodeId === nodeId?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no, nodeId is a11y specific (remember we used to have negative values for nodeIds)