Skip to content

Commit

Permalink
Sidepanel: enable user-defined action sequences to run in the dynamic…
Browse files Browse the repository at this point in the history
… webAgent (#797)

- Save discovered action sequences in extension local storage
- Add handler to run the user-defined intents based on the inferred plan
- Add helper ui automation to handle dropdowns
- Always run schema inference when use saves new recorded action.
  • Loading branch information
hillary-mutisya authored Mar 7, 2025
1 parent 133861e commit c23b33d
Show file tree
Hide file tree
Showing 8 changed files with 522 additions and 163 deletions.
11 changes: 11 additions & 0 deletions ts/packages/agents/browser/src/agent/browserConnector.mts
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,17 @@ export class BrowserConnector {
return this.sendActionToBrowser(clickAction);
}

async setDropdown(cssSelector: string, optionLabel: string) {
const clickAction = {
actionName: "setDropdownValue",
parameters: {
cssSelector: cssSelector,
optionLabel: optionLabel,
},
};
return this.sendActionToBrowser(clickAction);
}

async enterTextIn(textValue: string, cssSelector?: string) {
let actionName = cssSelector ? "enterTextInElement" : "enterTextOnPage";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,23 +149,23 @@ export async function handleSchemaDiscoveryAction(

async function handleRegisterSiteSchema(action: any) {
const url = await browser.getPageUrl();
let detectedActions = new Map(
const detectedActions = new Map(
Object.entries(
(await browser.getCurrentPageStoredProperty(
url!,
"detectedActionDefinitions",
)) ?? {},
),
);
let authoredActions = new Map(
const authoredActions = new Map(
Object.entries(
(await browser.getCurrentPageStoredProperty(
url!,
"authoredActionDefinitions",
)) ?? {},
),
);
let typeDefinitions: ActionSchemaTypeDefinition[] = [
const typeDefinitions: ActionSchemaTypeDefinition[] = [
...detectedActions.values(),
...authoredActions.values(),
];
Expand All @@ -184,7 +184,7 @@ export async function handleSchemaDiscoveryAction(

const hostName = new URL(url!).hostname.replace(/\./g, "_");
const agentName = `temp_${hostName}`;
let schemaDescription = `A schema that enables interactions with the ${hostName} page`;
const schemaDescription = `A schema that enables interactions with the ${hostName} page`;

const manifest: AppAgentManifest = {
emojiChar: "🚧",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,36 @@ export type LocationInStore = {
};

export type NavigationLink = {
title: string;
// CSS Selector for the link
linkCssSelector: string;
};

export type DropdownControl = {
title: string;
// CSS Selector for the selection element
cssSelector: string;

values: {
text: string;
value: string;
}[];
};

export type TextInput = {
title: string;
// CSS Selector for the text input area
cssSelector: string;
};

export type Button = {
title: string;
// CSS Selector for the button
cssSelector: string;
};

export type Link = {
title: string;
// CSS Selector for the link
linkCssSelector: string;
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,26 @@
export type UserIntentParameter = {
// a concise name for the parameter, in camelCase. This should only contain alphanumeric characters
shortName: string;

// a longer, descriptive name for the parameter. This value can contain non-alphanumeric characters
name: string;

// The valid values are "string", "number" and "boolean"
type: string;

// The default value for the parameter. If this value is set based on a HTML
// page, check whether the target element has a default value. For dropdown elements, use the
// selected value for this entry
defaultValue?: any;

// The text for the various options for this control. This is useful for HTML elements
// that only accept a fixed set of values e.g. dropdown elements, radio-button lists etc.
valueOptions?: string[];

// The description for this parameter. Always include a list of Options as part of the description if the
// HTML control only accepts a fixed set of values e.g. dropdown elements, radio-button lists etc.
description: string;

// Indicates whether a parameter is required. If a parameter has a default value
// then it is not required.
required: boolean;
Expand Down Expand Up @@ -48,7 +59,7 @@ export type EnterText = {
actionName: "enterText";
parameters: {
// the shortName of the UserIntentParameter to use for this value
text: string;
textParameter: string;
};
};

Expand All @@ -64,7 +75,7 @@ export type ClickOnButton = {
actionName: "clickOnButton";
parameters: {
// the shortName of the UserIntentParameter to use for this value
buttonText: string;
buttonTextParameter: string;
};
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,16 @@ import {
NavigateToPage,
} from "./schema/userActionsPool.mjs";
import { handleCommerceAction } from "../commerce/actionHandler.mjs";
import { NavigationLink } from "./schema/pageComponents.mjs";
import {
Button,
DropdownControl,
NavigationLink,
} from "./schema/pageComponents.mjs";
import {
PageManipulationActions,
PageManipulationActionsList,
UserIntent,
} from "./schema/recordedActions.mjs";

export function createTempAgentForSchema(
browser: BrowserConnector,
Expand Down Expand Up @@ -41,6 +50,9 @@ export function createTempAgentForSchema(
break;
case "signUpForNewsletterAction":
break;
default:
handleUserDefinedAction(action);
break;
}
},
};
Expand Down Expand Up @@ -102,4 +114,133 @@ export function createTempAgentForSchema(

await followLink(link?.linkCssSelector);
}

async function handleUserDefinedAction(action: any) {
const url = await browser.getPageUrl();
const intentJson = new Map(
Object.entries(
(await browser.getCurrentPageStoredProperty(
url!,
"authoredIntentJson",
)) ?? {},
),
);

const actionsJson = new Map(
Object.entries(
(await browser.getCurrentPageStoredProperty(
url!,
"authoredActionsJson",
)) ?? {},
),
);

if (
!intentJson.has(action.actionName) ||
!actionsJson.has(action.actionName)
) {
console.log(
`Action ${actionsJson} was not found on the list of user-defined actions`,
);
return;
}

const targetIntent = intentJson.get(action.actionName) as UserIntent;
const targetPlan = actionsJson.get(
action.actionName,
) as PageManipulationActionsList;

console.log(`Running ${targetPlan.planName}`);

targetPlan.steps.forEach(async (step: PageManipulationActions) => {
switch (step.actionName) {
case "ClickOnLink":
const linkParameter = targetIntent.parameters.find(
(param) =>
param.shortName ==
step.parameters.linkTextParameter,
);
const link = (await getComponentFromPage(
"NavigationLink",
`link text ${linkParameter?.name}`,
)) as NavigationLink;

await followLink(link?.linkCssSelector);
break;
case "clickOnButton":
const buttonParameter = targetIntent.parameters.find(
(param) =>
param.shortName ==
step.parameters.buttonTextParameter,
);
const button = (await getComponentFromPage(
"Button",
`button text ${buttonParameter?.name}`,
)) as Button;
await browser.clickOn(button.cssSelector);
await browser.awaitPageInteraction();
await browser.awaitPageLoad();

break;
case "enterText":
const textParameter = targetIntent.parameters.find(
(param) =>
param.shortName == step.parameters.textParameter,
);
const textElement = (await getComponentFromPage(
"TextInput",
`input label ${textParameter?.name}`,
)) as Button;

const userProvidedTextValue =
action.parameters[step.parameters.textParameter];

if (userProvidedTextValue !== undefined) {
await browser.enterTextIn(
userProvidedTextValue,
textElement.cssSelector,
);
}
break;
case "selectElementByText":
break;
case "selectValueFromDropdown":
const selectParameter = targetIntent.parameters.find(
(param) =>
param.shortName ==
step.parameters.valueTextParameter,
);

const userProvidedValue =
action.parameters[step.parameters.valueTextParameter];

if (userProvidedValue !== undefined) {
const selectElement = (await getComponentFromPage(
"DropdownControl",
`text ${selectParameter?.name}`,
)) as DropdownControl;

await browser.clickOn(selectElement.cssSelector);
const selectValue = selectElement.values.find(
(value) =>
value.text ===
action.parameters[
step.parameters.valueTextParameter
],
);
if (selectValue) {
await browser.setDropdown(
selectElement.cssSelector,
selectValue.text,
);
} else {
console.error(`Could not find a dropdown option with text ${action.parameters[step.parameters.valueTextParameter]}
on the ${selectElement.title} dropdown.`);
}
}

break;
}
});
}
}
9 changes: 9 additions & 0 deletions ts/packages/agents/browser/src/extension/serviceWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,14 @@ async function runBrowserAction(action: any) {
});
break;
}
case "setDropdownValue": {
const targetTab = await getActiveTab();
const response = await chrome.tabs.sendMessage(targetTab.id!, {
type: "run_ui_event",
action: action,
});
break;
}
case "getPageSchema": {
const targetTab = await getActiveTab();
const key = action.parameters.url ?? targetTab.url;
Expand Down Expand Up @@ -1569,6 +1577,7 @@ chrome.runtime.onMessage.addListener(

sendResponse({
intent: schemaResult.intent,
intentJson: schemaResult.intentJson,
actions: schemaResult.actions,
intentTypeDefinition: schemaResult.intentTypeDefinition,
});
Expand Down
Loading

0 comments on commit c23b33d

Please sign in to comment.