Skip to content

Commit

Permalink
Sidepanel: persisting type definitions in local storage (#793)
Browse files Browse the repository at this point in the history
- Enable saving type definitions in chrome local storage, per URL
- include use-authored actions in the registered dynamic agent schema
- simplify schema registration
  • Loading branch information
hillary-mutisya authored Mar 6, 2025
1 parent d0d0890 commit b3a5385
Show file tree
Hide file tree
Showing 7 changed files with 316 additions and 60 deletions.
3 changes: 2 additions & 1 deletion ts/packages/agents/browser/src/agent/actionHandler.mts
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ async function updateBrowserContext(
break;
}

case "initializePageSchema":
case "detectPageActions":
case "registerPageDynamicAgent":
case "getIntentFromRecording": {
const discoveryResult =
await handleSchemaDiscoveryAction(
Expand Down
27 changes: 27 additions & 0 deletions ts/packages/agents/browser/src/agent/browserConnector.mts
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,33 @@ export class BrowserConnector {
return this.sendActionToBrowser(schemaAction, "browser");
}

async getCurrentPageStoredProperty(url: string, key: string) {
const timeoutPromise = new Promise((f) => setTimeout(f, 3000));
const action = {
actionName: "getPageStoredProperty",
parameters: {
url: url,
key: key,
},
};

const actionPromise = this.getPageDataFromBrowser(action);
return Promise.race([actionPromise, timeoutPromise]);
}

async setCurrentPageStoredProperty(url: string, key: string, value: any) {
const schemaAction = {
actionName: "setPageStoredProperty",
parameters: {
url: url,
key: key,
value: value,
},
};

return this.sendActionToBrowser(schemaAction, "browser");
}

async getPageUrl() {
const action = {
actionName: "getPageUrl",
Expand Down
125 changes: 101 additions & 24 deletions ts/packages/agents/browser/src/agent/discovery/actionHandler.mts
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ export async function handleSchemaDiscoveryAction(
const agent = await createDiscoveryPageTranslator("GPT_4_O");

switch (action.actionName) {
case "initializePageSchema":
case "findUserActions":
case "detectPageActions":
actionData = await handleFindUserActions(action);
break;
case "summarizePage":
Expand All @@ -58,6 +57,9 @@ export async function handleSchemaDiscoveryAction(
case "getIntentFromRecording":
actionData = await handleGetIntentFromReccording(action);
break;
case "registerPageDynamicAgent":
actionData = await handleRegisterSiteSchema(action);
break;
}

async function handleFindUserActions(action: any) {
Expand Down Expand Up @@ -110,14 +112,14 @@ export async function handleSchemaDiscoveryAction(
...new Set(selected.actions.map((action) => action.actionName)),
];

const schema = await getDynamicSchema(actionNames);
const { schema, typeDefinitions } = await getDynamicSchema(actionNames);
message += `\n =========== \n Discovered actions schema: \n ${schema} `;

const url = await browser.getPageUrl();
const hostName = new URL(url!).hostname.replace(/\./g, "_");
const agentName = `temp_${hostName}`;

if (action.parameters.registerAgent) {
if (action.parameters?.registerAgent) {
const manifest: AppAgentManifest = {
emojiChar: "🚧",
description: schemaDescription,
Expand All @@ -142,7 +144,72 @@ export async function handleSchemaDiscoveryAction(
}, 500);
}

return response.data;
return { schema: response.data, typeDefinitions: typeDefinitions };
}

async function handleRegisterSiteSchema(action: any) {
const url = await browser.getPageUrl();
let detectedActions = new Map(
Object.entries(
(await browser.getCurrentPageStoredProperty(
url!,
"detectedActionDefinitions",
)) ?? {},
),
);
let authoredActions = new Map(
Object.entries(
(await browser.getCurrentPageStoredProperty(
url!,
"authoredActionDefinitions",
)) ?? {},
),
);
let typeDefinitions: ActionSchemaTypeDefinition[] = [
...detectedActions.values(),
...authoredActions.values(),
];

const union = sc.union(
typeDefinitions.map((definition) => sc.ref(definition)),
);
const entry = sc.type("DynamicUserPageActions", union);
entry.exported = true;
const actionSchemas = new Map<string, ActionSchemaTypeDefinition>();
const order = new Map<string, number>();
const schema = await generateActionSchema(
{ entry, actionSchemas, order },
{ exact: true },
);

const hostName = new URL(url!).hostname.replace(/\./g, "_");
const agentName = `temp_${hostName}`;
let schemaDescription = `A schema that enables interactions with the ${hostName} page`;

const manifest: AppAgentManifest = {
emojiChar: "🚧",
description: schemaDescription,
schema: {
description: schemaDescription,
schemaType: "DynamicUserPageActions",
schemaFile: { content: schema, type: "ts" },
},
};

// register agent after request is processed to avoid a deadlock
setTimeout(async () => {
try {
await context.removeDynamicAgent(agentName);
} catch {}

await context.addDynamicAgent(
agentName,
manifest,
createTempAgentForSchema(browser, agent, context),
);
}, 500);

return { schema: schema, typeDefinitions: typeDefinitions };
}

async function getDynamicSchema(actionNames: string[]) {
Expand All @@ -167,15 +234,17 @@ export async function handleSchemaDiscoveryAction(
"UserPageActions",
);

let typeDefinitions: ActionSchemaTypeDefinition[] = [];
let typeDefinitions = new Map<string, ActionSchemaTypeDefinition>();
actionNames.forEach((name) => {
if (parsed.actionSchemas.has(name)) {
typeDefinitions.push(parsed.actionSchemas.get(name)!);
typeDefinitions.set(name, parsed.actionSchemas.get(name)!);
}
});

const union = sc.union(
typeDefinitions.map((definition) => sc.ref(definition)),
Array.from(typeDefinitions.values()).map((definition) =>
sc.ref(definition),
),
);
const entry = sc.type("DynamicUserPageActions", union);
entry.exported = true;
Expand All @@ -186,7 +255,7 @@ export async function handleSchemaDiscoveryAction(
{ exact: true },
);

return schema;
return { schema, typeDefinitions: Object.fromEntries(typeDefinitions) };
}

async function handleGetPageSummary(action: any) {
Expand Down Expand Up @@ -281,24 +350,30 @@ export async function handleSchemaDiscoveryAction(
) {
let fields: Map<string, any> = new Map<string, any>();

userIntentJson.parameters.forEach((p) => {
let t: ActionParamType = sc.string();
switch (p.type) {
userIntentJson.parameters.forEach((param) => {
let paramType: ActionParamType = sc.string();
switch (param.type) {
case "string":
t = sc.string();
paramType = sc.string();
break;
case "number":
t = sc.number();
paramType = sc.number();
break;
case "boolean":
t = sc.number();
paramType = sc.number();
break;
}

if (p.required && !p.defaultValue) {
fields.set(p.shortName, sc.field(t, p.description));
if (param.required && !param.defaultValue) {
fields.set(
param.shortName,
sc.field(paramType, param.description),
);
} else {
fields.set(p.shortName, sc.optional(t, p.description));
fields.set(
param.shortName,
sc.optional(paramType, param.description),
);
}
});

Expand All @@ -314,7 +389,10 @@ export async function handleSchemaDiscoveryAction(
true,
);

return await generateSchemaTypeDefinition(schema, { exact: true });
return {
actionSchema: generateSchemaTypeDefinition(schema, { exact: true }),
typeDefinition: schema,
};
}

async function handleGetIntentFromReccording(action: any) {
Expand All @@ -338,12 +416,12 @@ export async function handleSchemaDiscoveryAction(

console.timeEnd(timerName);

const intentSchema = await getIntentSchemaFromJSON(
const { actionSchema, typeDefinition } = await getIntentSchemaFromJSON(
intentResponse.data as UserIntent,
action.parameters.recordedActionDescription,
);

message = "Intent schema: \n" + intentSchema;
message = "Intent schema: \n" + actionSchema;

const timerName2 = `Getting action schema`;
console.time(timerName2);
Expand All @@ -369,14 +447,13 @@ export async function handleSchemaDiscoveryAction(
console.timeEnd(timerName2);

return {
intent: intentSchema,
intent: actionSchema,
intentJson: intentResponse.data,
intentTypeDefinition: typeDefinition,
actions: stepsResponse.data,
};
}

//

return {
displayText: message,
data: actionData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ export type FindPageComponents = {
actionName: "findPageComponents";
};

export type FindUserActions = {
actionName: "findUserActions";
export type DetectPageActions = {
actionName: "detectPageActions";
parameters: {
registerAgent?: boolean;
agentName?: string;
Expand All @@ -25,10 +25,9 @@ export type SummarizePage = {
actionName: "summarizePage";
};

export type InitializePageSchema = {
actionName: "initializePageSchema";
export type RegisterPageDynamicAgent = {
actionName: "registerPageDynamicAgent";
parameters: {
registerAgent?: boolean;
agentName?: string;
};
};
Expand Down Expand Up @@ -70,10 +69,10 @@ export type GetIntentFromRecording = {

export type SchemaDiscoveryActions =
| FindPageComponents
| FindUserActions
| DetectPageActions
| GetSiteType
| GetPageType
| InitializePageSchema
| RegisterPageDynamicAgent
| SummarizePage
| SaveUserActions
| AddUserAction
Expand Down
28 changes: 24 additions & 4 deletions ts/packages/agents/browser/src/extension/serviceWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
isWebAgentMessageFromDispatcher,
WebAgentDisconnectMessage,
} from "../../dist/common/webAgentMessageTypes.mjs";
import { setStoredPageProperty, getStoredPageProperty } from "./storage";

async function getConfigValues(): Promise<Record<string, string>> {
const envLocation = chrome.runtime.getURL(".env");
Expand Down Expand Up @@ -1243,6 +1244,21 @@ async function runBrowserAction(action: any) {

break;
}
case "getPageStoredProperty": {
responseObject = await getStoredPageProperty(
action.parameters.url,
action.parameters.key,
);
break;
}
case "setPageStoredProperty": {
await setStoredPageProperty(
action.parameters.url,
action.parameters.key,
action.parameters.value,
);
break;
}
case "getConfiguration": {
responseObject = await getConfigValues();
break;
Expand Down Expand Up @@ -1516,20 +1532,23 @@ chrome.runtime.onMessage.addListener(
}
case "refreshSchema": {
const schemaResult = await sendActionToAgent({
actionName: "initializePageSchema",
actionName: "detectPageActions",
parameters: {
registerAgent: false,
},
});

sendResponse({ schema: schemaResult });
sendResponse({
schema: schemaResult.schema,
actionDefinitions: schemaResult.typeDefinitions,
});
break;
}
case "registerTempSchema": {
const schemaResult = await sendActionToAgent({
actionName: "initializePageSchema",
actionName: "registerPageDynamicAgent",
parameters: {
registerAgent: true,
agentName: message.agentName,
},
});

Expand All @@ -1551,6 +1570,7 @@ chrome.runtime.onMessage.addListener(
sendResponse({
intent: schemaResult.intent,
actions: schemaResult.actions,
intentTypeDefinition: schemaResult.intentTypeDefinition,
});
break;
}
Expand Down
Loading

0 comments on commit b3a5385

Please sign in to comment.