diff --git a/docs/tool-reference.md b/docs/tool-reference.md index aae061da9..a8605b837 100644 --- a/docs/tool-reference.md +++ b/docs/tool-reference.md @@ -45,6 +45,7 @@ - **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot - **dblClick** (boolean) _(optional)_: Set to true for double clicks. Default is false. +- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. --- @@ -56,6 +57,7 @@ - **from_uid** (string) **(required)**: The uid of the element to [`drag`](#drag) - **to_uid** (string) **(required)**: The uid of the element to drop into +- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. --- @@ -67,6 +69,7 @@ - **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot - **value** (string) **(required)**: The value to [`fill`](#fill) in +- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. --- @@ -77,6 +80,7 @@ **Parameters:** - **elements** (array) **(required)**: Elements from snapshot to [`fill`](#fill) out. +- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. --- @@ -98,6 +102,7 @@ **Parameters:** - **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot +- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. --- @@ -108,6 +113,7 @@ **Parameters:** - **key** (string) **(required)**: A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta +- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. --- @@ -119,6 +125,7 @@ - **filePath** (string) **(required)**: The local path of the file to upload - **uid** (string) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot +- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false. --- diff --git a/scripts/eval_scenarios/input_parallel_test.ts b/scripts/eval_scenarios/input_parallel_test.ts new file mode 100644 index 000000000..e93148036 --- /dev/null +++ b/scripts/eval_scenarios/input_parallel_test.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import assert from 'node:assert'; + +import type {TestScenario} from '../eval_gemini.ts'; + +export const scenario: TestScenario = { + prompt: + 'Go to , fill the input with "hello world" and click the button five times in parallel.', + maxTurns: 10, + htmlRoute: { + path: '/input_test.html', + htmlContent: ` + + + `, + }, + expectations: calls => { + assert.strictEqual(calls.length, 8); + assert.ok( + calls[0].name === 'navigate_page' || calls[0].name === 'new_page', + ); + assert.ok(calls[1].name === 'take_snapshot'); + assert.ok(calls[2].name === 'fill'); + for (let i = 3; i < 8; i++) { + assert.ok(calls[i].name === 'click'); + assert.strictEqual(Boolean(calls[i].args.includeSnapshot), false); + } + }, +}; diff --git a/src/tools/input.ts b/src/tools/input.ts index 46e68fcdd..f5d789187 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -17,6 +17,11 @@ const dblClickSchema = zod .optional() .describe('Set to true for double clicks. Default is false.'); +const includeSnapshotSchema = zod + .boolean() + .optional() + .describe('Whether to include a snapshot in the response. Default is false.'); + export const click = defineTool({ name: 'click', description: `Clicks on the provided element`, @@ -31,6 +36,7 @@ export const click = defineTool({ 'The uid of an element on the page from the page content snapshot', ), dblClick: dblClickSchema, + includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { const uid = request.params.uid; @@ -46,7 +52,9 @@ export const click = defineTool({ ? `Successfully double clicked on the element` : `Successfully clicked on the element`, ); - response.includeSnapshot(); + if (request.params.includeSnapshot) { + response.includeSnapshot(); + } } finally { void handle.dispose(); } @@ -65,6 +73,7 @@ export const clickAt = defineTool({ x: zod.number().describe('The x coordinate'), y: zod.number().describe('The y coordinate'), dblClick: dblClickSchema, + includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { const page = context.getSelectedPage(); @@ -78,7 +87,9 @@ export const clickAt = defineTool({ ? `Successfully double clicked at the coordinates` : `Successfully clicked at the coordinates`, ); - response.includeSnapshot(); + if (request.params.includeSnapshot) { + response.includeSnapshot(); + } }, }); @@ -95,6 +106,7 @@ export const hover = defineTool({ .describe( 'The uid of an element on the page from the page content snapshot', ), + includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { const uid = request.params.uid; @@ -104,7 +116,9 @@ export const hover = defineTool({ await handle.asLocator().hover(); }); response.appendResponseLine(`Successfully hovered over the element`); - response.includeSnapshot(); + if (request.params.includeSnapshot) { + response.includeSnapshot(); + } } finally { void handle.dispose(); } @@ -185,6 +199,7 @@ export const fill = defineTool({ 'The uid of an element on the page from the page content snapshot', ), value: zod.string().describe('The value to fill in'), + includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { await context.waitForEventsAfterAction(async () => { @@ -196,7 +211,9 @@ export const fill = defineTool({ ); }); response.appendResponseLine(`Successfully filled out the element`); - response.includeSnapshot(); + if (request.params.includeSnapshot) { + response.includeSnapshot(); + } }, }); @@ -210,6 +227,7 @@ export const drag = defineTool({ schema: { from_uid: zod.string().describe('The uid of the element to drag'), to_uid: zod.string().describe('The uid of the element to drop into'), + includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { const fromHandle = await context.getElementByUid(request.params.from_uid); @@ -221,7 +239,9 @@ export const drag = defineTool({ await toHandle.drop(fromHandle); }); response.appendResponseLine(`Successfully dragged an element`); - response.includeSnapshot(); + if (request.params.includeSnapshot) { + response.includeSnapshot(); + } } finally { void fromHandle.dispose(); void toHandle.dispose(); @@ -245,6 +265,7 @@ export const fillForm = defineTool({ }), ) .describe('Elements from snapshot to fill out.'), + includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { for (const element of request.params.elements) { @@ -257,7 +278,9 @@ export const fillForm = defineTool({ }); } response.appendResponseLine(`Successfully filled out the form`); - response.includeSnapshot(); + if (request.params.includeSnapshot) { + response.includeSnapshot(); + } }, }); @@ -275,6 +298,7 @@ export const uploadFile = defineTool({ 'The uid of the file input element or an element that will open file chooser on the page from the page content snapshot', ), filePath: zod.string().describe('The local path of the file to upload'), + includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { const {uid, filePath} = request.params; @@ -301,7 +325,9 @@ export const uploadFile = defineTool({ ); } } - response.includeSnapshot(); + if (request.params.includeSnapshot) { + response.includeSnapshot(); + } response.appendResponseLine(`File uploaded from ${filePath}.`); } finally { void handle.dispose(); @@ -322,6 +348,7 @@ export const pressKey = defineTool({ .describe( 'A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta', ), + includeSnapshot: includeSnapshotSchema, }, handler: async (request, response, context) => { const page = context.getSelectedPage(); @@ -341,6 +368,8 @@ export const pressKey = defineTool({ response.appendResponseLine( `Successfully pressed key: ${request.params.key}`, ); - response.includeSnapshot(); + if (request.params.includeSnapshot) { + response.includeSnapshot(); + } }, }); diff --git a/tests/tools/input.test.ts b/tests/tools/input.test.ts index 0150671c3..316c8d76f 100644 --- a/tests/tools/input.test.ts +++ b/tests/tools/input.test.ts @@ -155,6 +155,55 @@ describe('input', () => { assert(handlerResolveTime > buttonChangeTime, 'Waited for navigation'); }); }); + + it('does not include snapshot by default', async () => { + await withMcpContext(async (response, context) => { + const page = context.getSelectedPage(); + await page.setContent( + html``, + ); + await context.createTextSnapshot(); + await click.handler( + { + params: { + uid: '1_1', + }, + }, + response, + context, + ); + assert.strictEqual( + response.responseLines[0], + 'Successfully clicked on the element', + ); + assert.strictEqual(response.snapshotParams, undefined); + }); + }); + + it('includes snapshot if includeSnapshot is true', async () => { + await withMcpContext(async (response, context) => { + const page = context.getSelectedPage(); + await page.setContent( + html``, + ); + await context.createTextSnapshot(); + await click.handler( + { + params: { + uid: '1_1', + includeSnapshot: true, + }, + }, + response, + context, + ); + assert.strictEqual( + response.responseLines[0], + 'Successfully clicked on the element', + ); + assert.notStrictEqual(response.snapshotParams, undefined); + }); + }); }); describe('hover', () => {