Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/tool-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
- **dblClick** (boolean) _(optional)_: Set to true for double clicks. Default is false.
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.

---

Expand All @@ -56,6 +57,7 @@

- **from_uid** (string) **(required)**: The uid of the element to [`drag`](#drag)
- **to_uid** (string) **(required)**: The uid of the element to drop into
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.

---

Expand All @@ -67,6 +69,7 @@

- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
- **value** (string) **(required)**: The value to [`fill`](#fill) in
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.

---

Expand All @@ -77,6 +80,7 @@
**Parameters:**

- **elements** (array) **(required)**: Elements from snapshot to [`fill`](#fill) out.
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.

---

Expand All @@ -98,6 +102,7 @@
**Parameters:**

- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.

---

Expand All @@ -108,6 +113,7 @@
**Parameters:**

- **key** (string) **(required)**: A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.

---

Expand All @@ -119,6 +125,7 @@

- **filePath** (string) **(required)**: The local path of the file to upload
- **uid** (string) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot
- **includeSnapshot** (boolean) _(optional)_: Whether to include a snapshot in the response. Default is false.

---

Expand Down
34 changes: 34 additions & 0 deletions scripts/eval_scenarios/input_parallel_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

import assert from 'node:assert';

import type {TestScenario} from '../eval_gemini.ts';

export const scenario: TestScenario = {
prompt:
'Go to <TEST_URL>, fill the input with "hello world" and click the button five times in parallel.',
maxTurns: 10,
htmlRoute: {
path: '/input_test.html',
htmlContent: `
<input type="text" id="test-input" />
<button id="test-button">Submit</button>
`,
},
expectations: calls => {
assert.strictEqual(calls.length, 8);
assert.ok(
calls[0].name === 'navigate_page' || calls[0].name === 'new_page',
);
assert.ok(calls[1].name === 'take_snapshot');
assert.ok(calls[2].name === 'fill');
for (let i = 3; i < 8; i++) {
assert.ok(calls[i].name === 'click');
assert.strictEqual(Boolean(calls[i].args.includeSnapshot), false);
}
},
};
45 changes: 37 additions & 8 deletions src/tools/input.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ const dblClickSchema = zod
.optional()
.describe('Set to true for double clicks. Default is false.');

const includeSnapshotSchema = zod
.boolean()
.optional()
.describe('Whether to include a snapshot in the response. Default is false.');

export const click = defineTool({
name: 'click',
description: `Clicks on the provided element`,
Expand All @@ -31,6 +36,7 @@ export const click = defineTool({
'The uid of an element on the page from the page content snapshot',
),
dblClick: dblClickSchema,
includeSnapshot: includeSnapshotSchema,
},
handler: async (request, response, context) => {
const uid = request.params.uid;
Expand All @@ -46,7 +52,9 @@ export const click = defineTool({
? `Successfully double clicked on the element`
: `Successfully clicked on the element`,
);
response.includeSnapshot();
if (request.params.includeSnapshot) {
response.includeSnapshot();
}
} finally {
void handle.dispose();
}
Expand All @@ -65,6 +73,7 @@ export const clickAt = defineTool({
x: zod.number().describe('The x coordinate'),
y: zod.number().describe('The y coordinate'),
dblClick: dblClickSchema,
includeSnapshot: includeSnapshotSchema,
},
handler: async (request, response, context) => {
const page = context.getSelectedPage();
Expand All @@ -78,7 +87,9 @@ export const clickAt = defineTool({
? `Successfully double clicked at the coordinates`
: `Successfully clicked at the coordinates`,
);
response.includeSnapshot();
if (request.params.includeSnapshot) {
response.includeSnapshot();
}
},
});

Expand All @@ -95,6 +106,7 @@ export const hover = defineTool({
.describe(
'The uid of an element on the page from the page content snapshot',
),
includeSnapshot: includeSnapshotSchema,
},
handler: async (request, response, context) => {
const uid = request.params.uid;
Expand All @@ -104,7 +116,9 @@ export const hover = defineTool({
await handle.asLocator().hover();
});
response.appendResponseLine(`Successfully hovered over the element`);
response.includeSnapshot();
if (request.params.includeSnapshot) {
response.includeSnapshot();
}
} finally {
void handle.dispose();
}
Expand Down Expand Up @@ -185,6 +199,7 @@ export const fill = defineTool({
'The uid of an element on the page from the page content snapshot',
),
value: zod.string().describe('The value to fill in'),
includeSnapshot: includeSnapshotSchema,
},
handler: async (request, response, context) => {
await context.waitForEventsAfterAction(async () => {
Expand All @@ -196,7 +211,9 @@ export const fill = defineTool({
);
});
response.appendResponseLine(`Successfully filled out the element`);
response.includeSnapshot();
if (request.params.includeSnapshot) {
response.includeSnapshot();
}
},
});

Expand All @@ -210,6 +227,7 @@ export const drag = defineTool({
schema: {
from_uid: zod.string().describe('The uid of the element to drag'),
to_uid: zod.string().describe('The uid of the element to drop into'),
includeSnapshot: includeSnapshotSchema,
},
handler: async (request, response, context) => {
const fromHandle = await context.getElementByUid(request.params.from_uid);
Expand All @@ -221,7 +239,9 @@ export const drag = defineTool({
await toHandle.drop(fromHandle);
});
response.appendResponseLine(`Successfully dragged an element`);
response.includeSnapshot();
if (request.params.includeSnapshot) {
response.includeSnapshot();
}
} finally {
void fromHandle.dispose();
void toHandle.dispose();
Expand All @@ -245,6 +265,7 @@ export const fillForm = defineTool({
}),
)
.describe('Elements from snapshot to fill out.'),
includeSnapshot: includeSnapshotSchema,
},
handler: async (request, response, context) => {
for (const element of request.params.elements) {
Expand All @@ -257,7 +278,9 @@ export const fillForm = defineTool({
});
}
response.appendResponseLine(`Successfully filled out the form`);
response.includeSnapshot();
if (request.params.includeSnapshot) {
response.includeSnapshot();
}
},
});

Expand All @@ -275,6 +298,7 @@ export const uploadFile = defineTool({
'The uid of the file input element or an element that will open file chooser on the page from the page content snapshot',
),
filePath: zod.string().describe('The local path of the file to upload'),
includeSnapshot: includeSnapshotSchema,
},
handler: async (request, response, context) => {
const {uid, filePath} = request.params;
Expand All @@ -301,7 +325,9 @@ export const uploadFile = defineTool({
);
}
}
response.includeSnapshot();
if (request.params.includeSnapshot) {
response.includeSnapshot();
}
response.appendResponseLine(`File uploaded from ${filePath}.`);
} finally {
void handle.dispose();
Expand All @@ -322,6 +348,7 @@ export const pressKey = defineTool({
.describe(
'A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta',
),
includeSnapshot: includeSnapshotSchema,
},
handler: async (request, response, context) => {
const page = context.getSelectedPage();
Expand All @@ -341,6 +368,8 @@ export const pressKey = defineTool({
response.appendResponseLine(
`Successfully pressed key: ${request.params.key}`,
);
response.includeSnapshot();
if (request.params.includeSnapshot) {
response.includeSnapshot();
}
},
});
49 changes: 49 additions & 0 deletions tests/tools/input.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,55 @@ describe('input', () => {
assert(handlerResolveTime > buttonChangeTime, 'Waited for navigation');
});
});

it('does not include snapshot by default', async () => {
await withMcpContext(async (response, context) => {
const page = context.getSelectedPage();
await page.setContent(
html`<button onclick="this.innerText = 'clicked';">test</button>`,
);
await context.createTextSnapshot();
await click.handler(
{
params: {
uid: '1_1',
},
},
response,
context,
);
assert.strictEqual(
response.responseLines[0],
'Successfully clicked on the element',
);
assert.strictEqual(response.snapshotParams, undefined);
});
});

it('includes snapshot if includeSnapshot is true', async () => {
await withMcpContext(async (response, context) => {
const page = context.getSelectedPage();
await page.setContent(
html`<button onclick="this.innerText = 'clicked';">test</button>`,
);
await context.createTextSnapshot();
await click.handler(
{
params: {
uid: '1_1',
includeSnapshot: true,
},
},
response,
context,
);
assert.strictEqual(
response.responseLines[0],
'Successfully clicked on the element',
);
assert.notStrictEqual(response.snapshotParams, undefined);
});
});
});

describe('hover', () => {
Expand Down