Skip to content

Commit

Permalink
drawOverlay observe option
Browse files Browse the repository at this point in the history
  • Loading branch information
sameelarif committed Feb 9, 2025
1 parent 853e131 commit ab51a5c
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 3 deletions.
9 changes: 9 additions & 0 deletions lib/StagehandPage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
import { z } from "zod";
import { StagehandExtractHandler } from "./handlers/extractHandler";
import { StagehandObserveHandler } from "./handlers/observeHandler";
import { clearOverlays } from "./utils";

export class StagehandPage {
private stagehand: Stagehand;
Expand Down Expand Up @@ -292,6 +293,8 @@ export class StagehandPage {
throw new Error("Act handler not initialized");
}

await clearOverlays(this.page);

// If actionOrOptions is an ObserveResult, we call actFromObserveResult.
// We need to ensure there is both a selector and a method in the ObserveResult.
if (typeof actionOrOptions === "object" && actionOrOptions !== null) {
Expand Down Expand Up @@ -408,6 +411,8 @@ export class StagehandPage {
throw new Error("Extract handler not initialized");
}

await clearOverlays(this.page);

const options: ExtractOptions<T> =
typeof instructionOrOptions === "string"
? {
Expand Down Expand Up @@ -491,6 +496,8 @@ export class StagehandPage {
throw new Error("Observe handler not initialized");
}

await clearOverlays(this.page);

const options: ObserveOptions =
typeof instructionOrOptions === "string"
? { instruction: instructionOrOptions }
Expand All @@ -505,6 +512,7 @@ export class StagehandPage {
returnAction = false,
onlyVisible = false,
useAccessibilityTree,
drawOverlay,
} = options;

if (useAccessibilityTree !== undefined) {
Expand Down Expand Up @@ -568,6 +576,7 @@ export class StagehandPage {
domSettleTimeoutMs,
returnAction,
onlyVisible,
drawOverlay,
})
.catch((e) => {
this.stagehand.log({
Expand Down
10 changes: 8 additions & 2 deletions lib/handlers/observeHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Stagehand } from "../index";
import { observe } from "../inference";
import { LLMClient } from "../llm/LLMClient";
import { StagehandPage } from "../StagehandPage";
import { generateId } from "../utils";
import { generateId, drawObserveOverlay } from "../utils";
import {
getAccessibilityTree,
getXPathByResolvedObjectId,
Expand Down Expand Up @@ -55,17 +55,20 @@ export class StagehandObserveHandler {
requestId,
returnAction,
onlyVisible,
drawOverlay,
}: {
instruction: string;
llmClient: LLMClient;
requestId: string;
domSettleTimeoutMs?: number;
returnAction?: boolean;
onlyVisible?: boolean;
drawOverlay?: boolean;
}) {
if (!instruction) {
instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`;
}

this.logger({
category: "observation",
message: "starting observation",
Expand Down Expand Up @@ -108,7 +111,6 @@ export class StagehandObserveHandler {
isUsingAccessibilityTree: useAccessibilityTree,
returnAction,
});

const elementsWithSelectors = await Promise.all(
observationResponse.elements.map(async (element) => {
const { elementId, ...rest } = element;
Expand Down Expand Up @@ -182,6 +184,10 @@ export class StagehandObserveHandler {
},
});

if (drawOverlay) {
await drawObserveOverlay(this.stagehandPage.page, elementsWithSelectors);
}

await this._recordObservation(instruction, elementsWithSelectors);
return elementsWithSelectors;
}
Expand Down
58 changes: 57 additions & 1 deletion lib/utils.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import crypto from "crypto";
import { z } from "zod";
import { ObserveResult, Page } from ".";
import { LogLine } from "../types/log";
import { TextAnnotation } from "../types/textannotation";
import { z } from "zod";

// This is a heuristic for the width of a character in pixels. It seems to work
// better than attempting to calculate character widths dynamically, which sometimes
Expand Down Expand Up @@ -374,3 +375,58 @@ export function validateZodSchema(schema: z.ZodTypeAny, data: unknown) {
return false;
}
}

export async function drawObserveOverlay(page: Page, results: ObserveResult[]) {
// Convert single xpath to array for consistent handling
const xpathList = results.map((result) => result.selector);

// Filter out empty xpaths
const validXpaths = xpathList.filter((xpath) => xpath !== "xpath=");

await page.evaluate((selectors) => {
selectors.forEach((selector) => {
let element;
if (selector.startsWith("xpath=")) {
const xpath = selector.substring(6);
element = document.evaluate(
xpath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null,
).singleNodeValue;
} else {
element = document.querySelector(selector);
}

if (element instanceof HTMLElement) {
const overlay = document.createElement("div");
overlay.setAttribute("stagehandObserve", "true");
const rect = element.getBoundingClientRect();
overlay.style.position = "absolute";
overlay.style.left = rect.left + "px";
overlay.style.top = rect.top + "px";
overlay.style.width = rect.width + "px";
overlay.style.height = rect.height + "px";
overlay.style.backgroundColor = "rgba(255, 255, 0, 0.3)";
overlay.style.pointerEvents = "none";
overlay.style.zIndex = "10000";
document.body.appendChild(overlay);
}
});
}, validXpaths);
}

export async function clearOverlays(page: Page) {
// remove existing stagehandObserve attributes
await page.evaluate(() => {
const elements = document.querySelectorAll('[stagehandObserve="true"]');
elements.forEach((el) => {
const parent = el.parentNode;
while (el.firstChild) {
parent?.insertBefore(el.firstChild, el);
}
parent?.removeChild(el);
});
});
}
1 change: 1 addition & 0 deletions types/stagehand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ export interface ObserveOptions {
onlyVisible?: boolean;
/** @deprecated `useAccessibilityTree` is now deprecated. Use `onlyVisible` instead. */
useAccessibilityTree?: boolean;
drawOverlay?: boolean;
}

export interface ObserveResult {
Expand Down

0 comments on commit ab51a5c

Please sign in to comment.