diff --git a/ts/packages/agents/browser/src/agent/browserConnector.mts b/ts/packages/agents/browser/src/agent/browserConnector.mts index 701b1c85f..8a57e9cb8 100644 --- a/ts/packages/agents/browser/src/agent/browserConnector.mts +++ b/ts/packages/agents/browser/src/agent/browserConnector.mts @@ -225,6 +225,17 @@ export class BrowserConnector { return this.sendActionToBrowser(clickAction); } + async setDropdown(cssSelector: string, optionLabel: string) { + const clickAction = { + actionName: "setDropdownValue", + parameters: { + cssSelector: cssSelector, + optionLabel: optionLabel, + }, + }; + return this.sendActionToBrowser(clickAction); + } + async enterTextIn(textValue: string, cssSelector?: string) { let actionName = cssSelector ? "enterTextInElement" : "enterTextOnPage"; diff --git a/ts/packages/agents/browser/src/agent/discovery/actionHandler.mts b/ts/packages/agents/browser/src/agent/discovery/actionHandler.mts index a5c1d8c45..6d5275014 100644 --- a/ts/packages/agents/browser/src/agent/discovery/actionHandler.mts +++ b/ts/packages/agents/browser/src/agent/discovery/actionHandler.mts @@ -149,7 +149,7 @@ export async function handleSchemaDiscoveryAction( async function handleRegisterSiteSchema(action: any) { const url = await browser.getPageUrl(); - let detectedActions = new Map( + const detectedActions = new Map( Object.entries( (await browser.getCurrentPageStoredProperty( url!, @@ -157,7 +157,7 @@ export async function handleSchemaDiscoveryAction( )) ?? {}, ), ); - let authoredActions = new Map( + const authoredActions = new Map( Object.entries( (await browser.getCurrentPageStoredProperty( url!, @@ -165,7 +165,7 @@ export async function handleSchemaDiscoveryAction( )) ?? {}, ), ); - let typeDefinitions: ActionSchemaTypeDefinition[] = [ + const typeDefinitions: ActionSchemaTypeDefinition[] = [ ...detectedActions.values(), ...authoredActions.values(), ]; @@ -184,7 +184,7 @@ export async function handleSchemaDiscoveryAction( const hostName = new URL(url!).hostname.replace(/\./g, "_"); const agentName = `temp_${hostName}`; - let schemaDescription = `A schema that enables interactions with the ${hostName} page`; + const schemaDescription = `A schema that enables interactions with the ${hostName} page`; const manifest: AppAgentManifest = { emojiChar: "🚧", diff --git a/ts/packages/agents/browser/src/agent/discovery/schema/pageComponents.mts b/ts/packages/agents/browser/src/agent/discovery/schema/pageComponents.mts index b8cbca2c3..3210bfb66 100644 --- a/ts/packages/agents/browser/src/agent/discovery/schema/pageComponents.mts +++ b/ts/packages/agents/browser/src/agent/discovery/schema/pageComponents.mts @@ -55,7 +55,36 @@ export type LocationInStore = { }; export type NavigationLink = { + title: string; // CSS Selector for the link + linkCssSelector: string; +}; + +export type DropdownControl = { + title: string; + // CSS Selector for the selection element + cssSelector: string; + + values: { + text: string; + value: string; + }[]; +}; + +export type TextInput = { title: string; + // CSS Selector for the text input area + cssSelector: string; +}; + +export type Button = { + title: string; + // CSS Selector for the button + cssSelector: string; +}; + +export type Link = { + title: string; + // CSS Selector for the link linkCssSelector: string; }; diff --git a/ts/packages/agents/browser/src/agent/discovery/schema/recordedActions.mts b/ts/packages/agents/browser/src/agent/discovery/schema/recordedActions.mts index 203b9532d..833b53807 100644 --- a/ts/packages/agents/browser/src/agent/discovery/schema/recordedActions.mts +++ b/ts/packages/agents/browser/src/agent/discovery/schema/recordedActions.mts @@ -5,15 +5,26 @@ export type UserIntentParameter = { // a concise name for the parameter, in camelCase. This should only contain alphanumeric characters shortName: string; + // a longer, descriptive name for the parameter. This value can contain non-alphanumeric characters name: string; + // The valid values are "string", "number" and "boolean" type: string; + // The default value for the parameter. If this value is set based on a HTML // page, check whether the target element has a default value. For dropdown elements, use the // selected value for this entry defaultValue?: any; + + // The text for the various options for this control. This is useful for HTML elements + // that only accept a fixed set of values e.g. dropdown elements, radio-button lists etc. + valueOptions?: string[]; + + // The description for this parameter. Always include a list of Options as part of the description if the + // HTML control only accepts a fixed set of values e.g. dropdown elements, radio-button lists etc. description: string; + // Indicates whether a parameter is required. If a parameter has a default value // then it is not required. required: boolean; @@ -48,7 +59,7 @@ export type EnterText = { actionName: "enterText"; parameters: { // the shortName of the UserIntentParameter to use for this value - text: string; + textParameter: string; }; }; @@ -64,7 +75,7 @@ export type ClickOnButton = { actionName: "clickOnButton"; parameters: { // the shortName of the UserIntentParameter to use for this value - buttonText: string; + buttonTextParameter: string; }; }; diff --git a/ts/packages/agents/browser/src/agent/discovery/tempAgentActionHandler.mts b/ts/packages/agents/browser/src/agent/discovery/tempAgentActionHandler.mts index c11d32b0b..1aa3b7b3a 100644 --- a/ts/packages/agents/browser/src/agent/discovery/tempAgentActionHandler.mts +++ b/ts/packages/agents/browser/src/agent/discovery/tempAgentActionHandler.mts @@ -8,7 +8,16 @@ import { NavigateToPage, } from "./schema/userActionsPool.mjs"; import { handleCommerceAction } from "../commerce/actionHandler.mjs"; -import { NavigationLink } from "./schema/pageComponents.mjs"; +import { + Button, + DropdownControl, + NavigationLink, +} from "./schema/pageComponents.mjs"; +import { + PageManipulationActions, + PageManipulationActionsList, + UserIntent, +} from "./schema/recordedActions.mjs"; export function createTempAgentForSchema( browser: BrowserConnector, @@ -41,6 +50,9 @@ export function createTempAgentForSchema( break; case "signUpForNewsletterAction": break; + default: + handleUserDefinedAction(action); + break; } }, }; @@ -102,4 +114,133 @@ export function createTempAgentForSchema( await followLink(link?.linkCssSelector); } + + async function handleUserDefinedAction(action: any) { + const url = await browser.getPageUrl(); + const intentJson = new Map( + Object.entries( + (await browser.getCurrentPageStoredProperty( + url!, + "authoredIntentJson", + )) ?? {}, + ), + ); + + const actionsJson = new Map( + Object.entries( + (await browser.getCurrentPageStoredProperty( + url!, + "authoredActionsJson", + )) ?? {}, + ), + ); + + if ( + !intentJson.has(action.actionName) || + !actionsJson.has(action.actionName) + ) { + console.log( + `Action ${actionsJson} was not found on the list of user-defined actions`, + ); + return; + } + + const targetIntent = intentJson.get(action.actionName) as UserIntent; + const targetPlan = actionsJson.get( + action.actionName, + ) as PageManipulationActionsList; + + console.log(`Running ${targetPlan.planName}`); + + targetPlan.steps.forEach(async (step: PageManipulationActions) => { + switch (step.actionName) { + case "ClickOnLink": + const linkParameter = targetIntent.parameters.find( + (param) => + param.shortName == + step.parameters.linkTextParameter, + ); + const link = (await getComponentFromPage( + "NavigationLink", + `link text ${linkParameter?.name}`, + )) as NavigationLink; + + await followLink(link?.linkCssSelector); + break; + case "clickOnButton": + const buttonParameter = targetIntent.parameters.find( + (param) => + param.shortName == + step.parameters.buttonTextParameter, + ); + const button = (await getComponentFromPage( + "Button", + `button text ${buttonParameter?.name}`, + )) as Button; + await browser.clickOn(button.cssSelector); + await browser.awaitPageInteraction(); + await browser.awaitPageLoad(); + + break; + case "enterText": + const textParameter = targetIntent.parameters.find( + (param) => + param.shortName == step.parameters.textParameter, + ); + const textElement = (await getComponentFromPage( + "TextInput", + `input label ${textParameter?.name}`, + )) as Button; + + const userProvidedTextValue = + action.parameters[step.parameters.textParameter]; + + if (userProvidedTextValue !== undefined) { + await browser.enterTextIn( + userProvidedTextValue, + textElement.cssSelector, + ); + } + break; + case "selectElementByText": + break; + case "selectValueFromDropdown": + const selectParameter = targetIntent.parameters.find( + (param) => + param.shortName == + step.parameters.valueTextParameter, + ); + + const userProvidedValue = + action.parameters[step.parameters.valueTextParameter]; + + if (userProvidedValue !== undefined) { + const selectElement = (await getComponentFromPage( + "DropdownControl", + `text ${selectParameter?.name}`, + )) as DropdownControl; + + await browser.clickOn(selectElement.cssSelector); + const selectValue = selectElement.values.find( + (value) => + value.text === + action.parameters[ + step.parameters.valueTextParameter + ], + ); + if (selectValue) { + await browser.setDropdown( + selectElement.cssSelector, + selectValue.text, + ); + } else { + console.error(`Could not find a dropdown option with text ${action.parameters[step.parameters.valueTextParameter]} + on the ${selectElement.title} dropdown.`); + } + } + + break; + } + }); + } } diff --git a/ts/packages/agents/browser/src/extension/serviceWorker.ts b/ts/packages/agents/browser/src/extension/serviceWorker.ts index 6692f67c5..e54a0d760 100644 --- a/ts/packages/agents/browser/src/extension/serviceWorker.ts +++ b/ts/packages/agents/browser/src/extension/serviceWorker.ts @@ -1205,6 +1205,14 @@ async function runBrowserAction(action: any) { }); break; } + case "setDropdownValue": { + const targetTab = await getActiveTab(); + const response = await chrome.tabs.sendMessage(targetTab.id!, { + type: "run_ui_event", + action: action, + }); + break; + } case "getPageSchema": { const targetTab = await getActiveTab(); const key = action.parameters.url ?? targetTab.url; @@ -1569,6 +1577,7 @@ chrome.runtime.onMessage.addListener( sendResponse({ intent: schemaResult.intent, + intentJson: schemaResult.intentJson, actions: schemaResult.actions, intentTypeDefinition: schemaResult.intentTypeDefinition, }); diff --git a/ts/packages/agents/browser/src/extension/sidepanel.ts b/ts/packages/agents/browser/src/extension/sidepanel.ts index eb3e76214..4affc725c 100644 --- a/ts/packages/agents/browser/src/extension/sidepanel.ts +++ b/ts/packages/agents/browser/src/extension/sidepanel.ts @@ -176,25 +176,83 @@ async function saveUserAction() { const screenshot = JSON.parse(stepsContainer.dataset.screenshot || ""); const html = JSON.parse(stepsContainer.dataset.html || ""); - // Retrieve existing actions from localStorage - const storedActions = localStorage.getItem("userActions"); - const actions = storedActions ? JSON.parse(storedActions) : []; - - // Add new action - actions.push({ - name: actionName, - description: actionDescription, - steps, + const button = document.getElementById("saveAction") as HTMLButtonElement; + const originalContent = button.innerHTML; + const originalClass = button.className; + + function showTemporaryStatus(text: string, newClass: string) { + button.innerHTML = text; + button.className = `btn btn-sm ${newClass}`; + + setTimeout(() => { + button.innerHTML = originalContent; + button.className = originalClass; + button.disabled = false; + }, 5000); + } + + button.innerHTML = ` Processing...`; + button.disabled = true; + + // Get schema based on the recorded action info + const response = await chrome.runtime.sendMessage({ + type: "getIntentFromRecording", + html: [{ content: html, frameId: 0 }], screenshot, - html, + actionName, + actionDescription, + steps: JSON.stringify(steps), }); + if (chrome.runtime.lastError) { + console.error("Error fetching schema:", chrome.runtime.lastError); + showTemporaryStatus("✖ Failed", "btn-outline-danger"); + } else { + await addEntryToStoredPageProperties(actionName!, "userActions", { + name: actionName, + description: actionDescription, + steps, + screenshot, + html, + intentSchema: response.intent, + actionsJson: response.actions, + }); - // Save back to localStorage - localStorage.setItem("userActions", JSON.stringify(actions)); + await addEntryToStoredPageProperties( + actionName!, + "authoredActionDefinitions", + response.intentTypeDefinition, + ); + await addEntryToStoredPageProperties( + actionName!, + "authoredActionsJson", + response.actions, + ); + await addEntryToStoredPageProperties( + actionName!, + "authoredIntentJson", + response.intentJson, + ); + showTemporaryStatus("✔ Succeeded", "btn-outline-success"); + } - // Update UI + toggleActionForm(); await updateUserActionsUI(); - toggleActionForm(); // Hide form after saving +} + +async function addEntryToStoredPageProperties( + actionName: string, + key: string, + value: any, +) { + let currentActionJson = new Map( + Object.entries((await getStoredPageProperty(launchUrl!, key)) ?? {}), + ); + currentActionJson.set(actionName!, value); + await setStoredPageProperty( + launchUrl!, + key, + Object.fromEntries(currentActionJson), + ); } // Function to update user actions display @@ -239,13 +297,10 @@ async function stopRecording() { ).value.trim(); renderTimelineSteps( - actionName, - actionDescription, response.recordedActions, stepsContainer, response.recordedActionScreenshot, response.recordedActionHtml, - true, ); } @@ -266,13 +321,11 @@ async function cancelRecording() { } async function clearRecordedUserAction() { - if (localStorage.getItem("userActions")) { - localStorage.removeItem("userActions"); - } - await chrome.runtime.sendMessage({ type: "clearRecordedActions" }); - + await setStoredPageProperty(launchUrl!, "userActions", null); await setStoredPageProperty(launchUrl!, "authoredActionDefinitions", null); + await setStoredPageProperty(launchUrl!, "authoredActionsJson", null); + await setStoredPageProperty(launchUrl!, "authoredIntentJson", null); // Update UI await updateUserActionsUI(); } @@ -283,8 +336,13 @@ async function showUserDefinedActionsList() { ) as HTMLDivElement; // Fetch recorded actions - const storedActions = localStorage.getItem("userActions"); - const actions = storedActions ? JSON.parse(storedActions) : []; + const storedActions = new Map( + Object.entries( + (await getStoredPageProperty(launchUrl!, "userActions")) ?? {}, + ), + ); + + const actions = Array.from(storedActions.values()); userActionsListContainer.innerHTML = ""; @@ -332,9 +390,6 @@ function renderTimeline(action: any, index: number) { Actions - @@ -364,98 +419,46 @@ function renderTimeline(action: any, index: number) { "#Stepscontent", )! as HTMLElement; renderTimelineSteps( - action.name, - action.description, action.steps, stepsContainer, action.screenshot, action.html, ); - const processActionButton = timelineHeader.querySelector( - "#processAction", - )! as HTMLElement; - - const intentViewContainer = timelineHeader.querySelector( - "#intentContent", - )! as HTMLElement; - - const actionsViewContainer = timelineHeader.querySelector( - "#planContent", - )! as HTMLElement; - - processActionButton.style.display = "block"; - processActionButton.addEventListener("click", () => - getIntentFromRecording( - action.html, - action.screenshot, - action.name, - action.description, - action.steps, - ), - ); - - async function getIntentFromRecording( - html: string, - screenshot: string, - actionName: string, - description: string, - steps: any[], - ) { - const response = await chrome.runtime.sendMessage({ - type: "getIntentFromRecording", - html: [{ content: html, frameId: 0 }], - screenshot, - actionName, - description, - steps: JSON.stringify(steps), - }); - if (chrome.runtime.lastError) { - console.error("Error fetching schema:", chrome.runtime.lastError); - return; - } - + if (action.intentSchema !== undefined) { const card = document.createElement("div"); card.innerHTML = ` -
${response.intent}
+ ${action.intentSchema}
`;
+ const intentViewContainer = timelineHeader.querySelector(
+ "#intentContent",
+ )! as HTMLElement;
+
intentViewContainer.replaceChildren(card);
+ }
+
+ if (action.actionsJson !== undefined) {
+ const actionsViewContainer = timelineHeader.querySelector(
+ "#planContent",
+ )! as HTMLElement;
const actionsCard = document.createElement("div");
actionsCard.innerHTML = `
- ${JSON.stringify(response.actions, null, 2)}
+ ${JSON.stringify(action.actionsJson, null, 2)}
`;
actionsViewContainer.replaceChildren(actionsCard);
-
- let currentTypeDefinitions = new Map(
- Object.entries(
- (await getStoredPageProperty(
- launchUrl!,
- "authoredActionDefinitions",
- )) ?? {},
- ),
- );
- currentTypeDefinitions.set(actionName, response.intentTypeDefinition);
- await setStoredPageProperty(
- launchUrl!,
- "authoredActionDefinitions",
- Object.fromEntries(currentTypeDefinitions),
- );
}
userActionsListContainer.appendChild(timelineHeader);
}
function renderTimelineSteps(
- actionName: string,
- actionDescription: string,
steps: any[],
userActionsListContainer: HTMLElement,
screenshotData: string,
htmlData: string,
- isEditingMode?: boolean,
) {
userActionsListContainer.innerHTML = `