diff --git a/agents/README.md b/agents/README.md new file mode 100644 index 0000000000..6ac204478e --- /dev/null +++ b/agents/README.md @@ -0,0 +1,110 @@ +# Hackbot Agents + +Each subdirectory is a single, self-contained agent — its logic, its entrypoint, +and its deployment all live together, so you can understand one agent without +hunting around the repo. + +New here? The best way to start is to read through **`bug-fix/`** — it's our reference +agent, and the fastest path to your own is to copy it and adapt. + +## How an agent works (the important part) + +When the platform runs your agent, it calls `python -m hackbot_agents.`, which runs +your `__main__.py`. Your job is to fill in three small pieces: + +```python +class AgentInputs(BaseSettings): # per-run inputs, read from env (bug_id -> BUG_ID) + bug_id: int + +async def main(ctx: HackbotContext) -> BugFixResult: + inputs = AgentInputs() + return await run_bug_fix(bug=inputs.bug_id, ...) # your real logic lives in agent.py + +run_async(main) # finds hackbot.toml, runs main, exits the process +``` + +Three rules are worth remembering: + +- **To report success,** return a `HackbotAgentResult` (subclass it with your own fields). + The runtime saves it to `summary.json` under `findings`. +- **To report failure,** just raise. Use `AgentError("…")` when it's an expected, explainable + failure; let any other exception bubble up for an unexpected crash. +- **`ctx` is your window to the platform** — everything it prepared for you hangs off it: + `ctx.source_repo`, `ctx.firefox`, `ctx.anthropic.api_key`, `ctx.actions`, + `ctx.publish_file`, `ctx.publish_json`. You never wire these up yourself. + +## What's in an agent folder (`agents//`) + +``` +agents// + pyproject.toml # the distribution "hackbot-agent-" and its dependencies + hackbot.toml # what you need the platform to prepare: [source], [firefox] + Dockerfile # how it ships + compose.yml # how to run it locally + hackbot_agents/ # a shared namespace package — please don't add __init__.py here! + / # your agent's package (e.g. bug_fix) + __init__.py # empty + __main__.py # AgentInputs + main(ctx) + run_async(main) + agent.py # entrypoint: your prompts, logic, and HackbotAgentResult subclass +``` + +One thing to watch: **never create `hackbot_agents/__init__.py`.** Leaving it out is what +lets several agents live side by side in one environment without overwriting each other (PEP 420). +It's an easy mistake to make, and a confusing one to debug. + +## Telling the platform what you need (`hackbot.toml`) + +Think of `hackbot.toml` as your request to the platform: "please have these ready for me." +Everything is optional — only list what you actually use. + +```toml +[source] # the platform shallow-clones and refreshes this for you +repo_url = "https://github.com/mozilla-firefox/firefox.git" + +[firefox] # Firefox build paths, derived from the checkout +enabled = true +objdir = "objdir-ff-asan" +``` + +Everything else has a natural home: your agent's name and description go in `pyproject.toml`, +model and tool choices stay in code, and secrets and per-run inputs come from the environment. + +## Building blocks you can reuse + +Please reach for these instead of rolling your own — they're shared on purpose. + +From **`hackbot-runtime`**: + +- `HackbotContext, AgentError, HackbotAgentResult, run_async` — the pieces from the contract above. +- `from hackbot_runtime.claude import Reporter` — pretty-prints the agent's streamed messages + to stdout and your log (call `reporter.header(...)` per work item, `reporter.message(msg)` per message). +- `from hackbot_runtime.actions.claude_sdk import actions_server_for` — gives you + `(recorder, mcp_server)` so write-actions get recorded into `summary.json` rather than + silently mutating the world. + +Your actual **tools** (the things the model can call) come from **`agent-tools`**, each behind +its own extra (`[bugzilla]`, `[firefox]`): + +```python +from agent_tools import bugzilla +from agent_tools.claude_sdk import build_sdk_server +server = build_sdk_server("bugzilla", BugzillaContext(client=...), bugzilla.TOOLS) +``` + +From there, you assemble your own `ClaudeAgentOptions` and drive the `ClaudeSDKClient` loop — +that part stays in your hands, where you want it. + +## Creating your own agent + +1. **Copy `bug-fix/`** as your starting point. Rename the folder, the distribution name in + `pyproject.toml`, and the commands in `Dockerfile`/`compose.yml` (`python -m hackbot_agents.`). +2. **Trim `hackbot.toml`** to just the `[source]`/`[firefox]` tables you need. +3. **Write your two modules:** `__main__.py` (`AgentInputs` + `main`) and `agent.py` (your logic + plus a `HackbotAgentResult` subclass). Keep `/__init__.py` empty. +4. **Register it** in `services/hackbot-api/`: add a Pydantic input model in `app/schemas.py`, + and a single `AGENT_REGISTRY` entry in `app/agents.py` (`name`/`description`/`job_name`/ + `input_schema`). Env vars are derived from your schema automatically (`bug_id` → `BUG_ID`), + so there's no `build_env` to write — put deploy-time constants like broker URLs in the Job's + static env instead. + +And that's the whole recipe: one folder, one schema, one registry line. Welcome aboard! diff --git a/agents/bug-fix/Dockerfile b/agents/bug-fix/Dockerfile index 347a411b8e..15264a7153 100644 --- a/agents/bug-fix/Dockerfile +++ b/agents/bug-fix/Dockerfile @@ -2,46 +2,43 @@ FROM python:3.12 AS builder COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -WORKDIR /app +ENV UV_PROJECT_ENVIRONMENT=/opt/venv -# Workspace metadata first so the dep-download layer caches independently -# of source changes. -COPY pyproject.toml uv.lock VERSION ./ -COPY http_service/pyproject.toml ./http_service/ -COPY services/hackbot-api/pyproject.toml ./services/hackbot-api/ -COPY agents/bug-fix/pyproject.toml ./agents/bug-fix/ -COPY libs/hackbot-runtime/pyproject.toml ./libs/hackbot-runtime/ +WORKDIR /app # Install external deps without building workspace members. RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev --no-install-workspace --package hackbot-agent-bug-fix - -# Workspace members the agent image actually needs (source included). -COPY agents/bug-fix ./agents/bug-fix -COPY bugbug ./bugbug -COPY libs/hackbot-runtime ./libs/hackbot-runtime + --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ + --mount=type=bind,source=uv.lock,target=uv.lock \ + --mount=type=bind,source=VERSION,target=VERSION \ + uv sync --frozen --no-dev --no-install-workspace --package hackbot-agent-bug-fix RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev --package hackbot-agent-bug-fix + --mount=type=bind,target=/app,rw \ + uv sync --locked --no-dev --no-editable --package hackbot-agent-bug-fix FROM python:3.12 AS base -COPY --from=builder /app /app -WORKDIR /app/agents/bug-fix +COPY --from=builder /opt/venv /opt/venv +WORKDIR /app ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 -ENV PATH="/app/.venv/bin:$PATH" +ENV PATH="/opt/venv/bin:$PATH" FROM base AS agent +# hackbot.toml lives at the agent root (not inside the package), so copy it into +# the working dir; the runtime discovers it there (cwd) at startup. +COPY agents/bug-fix/hackbot.toml /app/hackbot.toml + RUN useradd --create-home --shell /bin/bash agent \ && mkdir -p /workspace \ && chown agent:agent /workspace USER agent -CMD ["python", "-m", "agent_runner"] +CMD ["python", "-m", "hackbot_agents.bug_fix"] FROM base AS broker @@ -51,4 +48,4 @@ USER broker EXPOSE 8765 -CMD ["python", "-m", "broker"] +CMD ["python", "-m", "hackbot_agents.bug_fix.broker"] diff --git a/agents/bug-fix/agent_runner/__main__.py b/agents/bug-fix/agent_runner/__main__.py deleted file mode 100644 index 0979e9d9d3..0000000000 --- a/agents/bug-fix/agent_runner/__main__.py +++ /dev/null @@ -1,118 +0,0 @@ -import logging -import subprocess -import sys -import tempfile -from pathlib import Path - -from hackbot_runtime import AgentResult, Context, run_async -from pydantic_settings import BaseSettings, SettingsConfigDict - -log = logging.getLogger("bug-fix-agent") - -FIREFOX_REPO_URL = "https://github.com/mozilla-firefox/firefox.git" - - -class AgentInputs(BaseSettings): - bug_id: int - bugzilla_mcp_url: str - source_repo: Path = Path("/workspace/firefox") - model: str | None = None - max_turns: int | None = None - effort: str | None = None - - model_config = SettingsConfigDict(extra="ignore") - - -def ensure_firefox_source(source_repo: Path) -> None: - """Shallow-clone the Firefox source tree if it isn't already present. - - Idempotent and recovers from a partial checkout left by an earlier - failed run (e.g. clone succeeded but checkout ran out of disk). - """ - if (source_repo / ".git").exists(): - status = subprocess.run( - ["git", "-C", str(source_repo), "status", "--porcelain"], - check=True, - capture_output=True, - text=True, - ) - # A healthy fresh shallow clone has an empty status; a broken - # checkout shows thousands of missing-file "D" entries. - if status.stdout.strip(): - log.warning( - "firefox source at %s is incomplete; restoring working tree", - source_repo, - ) - subprocess.run( - ["git", "-C", str(source_repo), "restore", "--source=HEAD", ":/"], - check=True, - stdout=sys.stderr, - stderr=sys.stderr, - ) - log.info("updating firefox source at %s (shallow fetch)", source_repo) - subprocess.run( - ["git", "-C", str(source_repo), "fetch", "--depth=1", "origin", "HEAD"], - check=True, - stdout=sys.stderr, - stderr=sys.stderr, - ) - subprocess.run( - ["git", "-C", str(source_repo), "reset", "--hard", "FETCH_HEAD"], - check=True, - stdout=sys.stderr, - stderr=sys.stderr, - ) - return - source_repo.mkdir(parents=True, exist_ok=True) - log.info("cloning firefox source (shallow) to %s", source_repo) - subprocess.run( - ["git", "clone", "--depth=1", FIREFOX_REPO_URL, str(source_repo)], - check=True, - stdout=sys.stderr, - stderr=sys.stderr, - ) - log.info("firefox shallow clone complete") - - -async def main(ctx: Context) -> AgentResult: - from bugbug.tools.bug_fix.agent import BugFixTool - - inputs = AgentInputs() - ensure_firefox_source(inputs.source_repo) - - log_path = Path(tempfile.mkdtemp(prefix="bug-fix-log-")) / "agent.log" - - tool = BugFixTool.create() - result = await tool.run( - task="Triage and fix the bug, and verify the fix", - bugzilla_mcp_server={ - "type": "http", - "url": inputs.bugzilla_mcp_url, - }, - source_repo=inputs.source_repo, - bugs=[inputs.bug_id], - model=inputs.model, - max_turns=inputs.max_turns, - effort=inputs.effort, - log=log_path, - verbose=True, - actions_recorder=ctx.actions, - ) - - if log_path.exists(): - # Uploaded when a signed policy is set, else copied into ./artifacts. - ctx.publish_file("logs/agent.log", log_path, "text/plain") - - return AgentResult( - status="ok" if result.exit_code == 0 else "error", - error=None if result.exit_code == 0 else f"exit_code={result.exit_code}", - findings={ - "exit_code": result.exit_code, - "bugs_processed": result.bugs_processed, - }, - exit_code=result.exit_code, - ) - - -if __name__ == "__main__": - raise SystemExit(run_async(main)) diff --git a/agents/bug-fix/broker/__init__.py b/agents/bug-fix/broker/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/agents/bug-fix/hackbot.toml b/agents/bug-fix/hackbot.toml new file mode 100644 index 0000000000..227e28b529 --- /dev/null +++ b/agents/bug-fix/hackbot.toml @@ -0,0 +1,7 @@ +[source] +repo_url = "https://github.com/mozilla-firefox/firefox.git" +checkout_path = "/workspace/firefox" + +[firefox] +enabled = true +objdir = "objdir-ff-asan" diff --git a/agents/bug-fix/agent_runner/__init__.py b/agents/bug-fix/hackbot_agents/bug_fix/__init__.py similarity index 100% rename from agents/bug-fix/agent_runner/__init__.py rename to agents/bug-fix/hackbot_agents/bug_fix/__init__.py diff --git a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py new file mode 100644 index 0000000000..c1423e3d67 --- /dev/null +++ b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py @@ -0,0 +1,39 @@ +from hackbot_runtime import HackbotContext, run_async +from pydantic_settings import BaseSettings, SettingsConfigDict + +from .agent import BugFixResult, run_bug_fix + + +class AgentInputs(BaseSettings): + bug_id: int + bugzilla_mcp_url: str + model: str | None = None + max_turns: int | None = None + effort: str | None = None + + model_config = SettingsConfigDict(extra="ignore") + + +async def main(ctx: HackbotContext) -> BugFixResult: + inputs = AgentInputs() + + return await run_bug_fix( + task="Triage and fix the bug, and verify the fix", + bugzilla_mcp_server={ + "type": "http", + "url": inputs.bugzilla_mcp_url, + }, + source_repo=ctx.source_repo, + fx_ctx=ctx.firefox, + bug=inputs.bug_id, + model=inputs.model, + max_turns=inputs.max_turns, + effort=inputs.effort, + log=ctx.log_path, + verbose=True, + actions_recorder=ctx.actions, + ) + + +if __name__ == "__main__": + run_async(main) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/agent.py b/agents/bug-fix/hackbot_agents/bug_fix/agent.py new file mode 100644 index 0000000000..d6868a92b0 --- /dev/null +++ b/agents/bug-fix/hackbot_agents/bug_fix/agent.py @@ -0,0 +1,185 @@ +"""Bug fix triage tool -- a Bugzilla triage agent. + +Orchestrates a Claude agent that triages bugs according to rulesets +in the rules/ directory. The agent has access to a source repository +and reaches Bugzilla via an out-of-process MCP broker (HTTP transport) +that holds the Bugzilla token — the agent process itself never sees it. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +from agent_tools import firefox +from agent_tools.claude_sdk import build_sdk_server +from agent_tools.firefox import FirefoxContext +from claude_agent_sdk import ( + AgentDefinition, + ClaudeAgentOptions, + ClaudeSDKClient, + McpServerConfig, + ResultMessage, +) +from hackbot_runtime import ActionsRecorder, AgentError, HackbotAgentResult +from hackbot_runtime.actions import ACTIONS_SERVER_NAME +from hackbot_runtime.actions.claude_sdk import actions_server_for, actions_to_tool_names +from hackbot_runtime.claude import Reporter + +from .config import ( + BUGZILLA_READ_TOOLS, + ENABLED_ACTION_TYPES, + FIREFOX_TOOLS, + SOURCE_WRITE_TOOLS, +) + +HERE = Path(__file__).resolve().parent + + +class BugFixResult(HackbotAgentResult): + bug_id: int + result: str | None = None + + +def load_system_prompt(rules_dir: Path, extra: str) -> str: + tmpl = (HERE / "prompts" / "system.md").read_text() + + return tmpl.format( + rules_dir=str(rules_dir.resolve()), + extra_instructions=extra or "(none)", + ) + + +def make_investigator() -> AgentDefinition: + """Create a single generic investigator subagent definition.""" + return AgentDefinition( + description=( + "Focused investigator for answering a specific question about " + "a bug or the source tree. The main agent writes your complete " + "instructions at spawn time — follow them precisely and return " + "only what was asked for." + ), + prompt=( + "You are a focused investigator subagent. You will be given a " + "self-contained task by the triage agent. Complete it and return " + "a concise answer. Do not make Bugzilla modifications — you have " + "read-only access. Do not speculate beyond what you can verify." + ), + tools=[ + "Read", + "Grep", + "Glob", + "Bash", + *BUGZILLA_READ_TOOLS, + *FIREFOX_TOOLS, + ], + model="inherit", + ) + + +async def run_bug_fix( + *, + bugzilla_mcp_server: McpServerConfig, + source_repo: Path, + fx_ctx: FirefoxContext, + bug: int, + instructions: str = "", + task: str | None = None, + rules_dir: Path | None = None, + model: str | None = None, + max_turns: int | None = None, + effort: str | None = None, + verbose: bool = False, + log: Path | None = None, + actions_recorder: ActionsRecorder | None = None, +) -> BugFixResult: + """Triage and fix a single Bugzilla bug with a claude-agent-sdk agent. + + Returns a :class:`BugFixResult` on success; raises :class:`AgentError` if the + agent ends in an error. + """ + if rules_dir is None: + rules_dir = HERE / "rules" + + print(f"[bug_fix] triaging bug {bug}", file=sys.stderr) + + # Firefox build/eval MCP server (in-process; no tokens). The runtime + # derives fx_ctx from the prepared source checkout and the agent's + # hackbot.toml; here we only wrap its tools as an MCP server. + firefox_server = build_sdk_server("firefox", fx_ctx, firefox.TOOLS) + + # Action-recording MCP server (in-process). Standalone/script runs pass + # actions_recorder=None and get a local recorder that copies attachments + # under ./artifacts (no uploader). + actions_recorder, actions_server = actions_server_for( + actions_recorder, types=ENABLED_ACTION_TYPES + ) + enabled_action_tools = actions_to_tool_names(ENABLED_ACTION_TYPES) + + system_prompt = load_system_prompt(rules_dir, instructions) + + options = ClaudeAgentOptions( + system_prompt=system_prompt, + mcp_servers={ + "bugzilla": bugzilla_mcp_server, + "firefox": firefox_server, + ACTIONS_SERVER_NAME: actions_server, + }, + agents={"investigator": make_investigator()}, + cwd=str(source_repo.resolve()), + add_dirs=[str(rules_dir.resolve())], + permission_mode="bypassPermissions", + allowed_tools=[ + "Read", + "Grep", + "Glob", + "Bash", + "Task", + *SOURCE_WRITE_TOOLS, + *BUGZILLA_READ_TOOLS, + *enabled_action_tools, + *FIREFOX_TOOLS, + ], + model=model, + max_turns=max_turns, + **({"effort": effort} if effort else {}), + setting_sources=[], + ) + + rules_path = rules_dir.resolve() + if task: + user_prompt = ( + f"Bug to work on: {bug}\n\n" + f"Task: {task}\n\n" + f"The rules in {rules_path} are available if the task " + f"calls for them, but the task above is your primary " + f"directive — it overrides the default triage workflow." + ) + else: + user_prompt = ( + f"Triage bug {bug}.\n\nConsult the relevant rules in {rules_path}." + ) + + result_msg: ResultMessage | None = None + with Reporter(verbose=verbose, log_path=log) as reporter: + reporter.header(f"bug {bug}") + async with ClaudeSDKClient(options=options) as client: + await client.query(user_prompt) + async for msg in client.receive_response(): + reporter.message(msg) + if isinstance(msg, ResultMessage): + result_msg = msg + + if result_msg is None: + raise AgentError(f"bug {bug}: agent produced no result message") + if result_msg.is_error: + raise AgentError( + f"bug {bug} triage failed: {result_msg.result or result_msg.subtype}" + ) + + return BugFixResult( + bug_id=bug, + result=result_msg.result, + num_turns=result_msg.num_turns, + total_cost_usd=result_msg.total_cost_usd, + ) diff --git a/agents/bug-fix/broker/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/broker.py similarity index 90% rename from agents/bug-fix/broker/__main__.py rename to agents/bug-fix/hackbot_agents/bug_fix/broker.py index 89b328b96b..f10e69a637 100644 --- a/agents/bug-fix/broker/__main__.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/broker.py @@ -11,14 +11,14 @@ import bugsy import uvicorn +from agent_tools import bugzilla +from agent_tools.bugzilla import BugzillaContext +from agent_tools.claude_sdk import build_sdk_server from mcp.server.streamable_http_manager import StreamableHTTPSessionManager from pydantic_settings import BaseSettings, SettingsConfigDict from starlette.applications import Starlette from starlette.routing import Mount -from bugbug.tools.bug_fix.bugzilla_mcp import BugzillaContext -from bugbug.tools.bug_fix.bugzilla_mcp import build_server as build_bugzilla_server - log = logging.getLogger("bugzilla-broker") @@ -36,7 +36,7 @@ def build_app(inputs: BrokerInputs) -> Starlette: api_key=inputs.bugzilla_api_key, bugzilla_url=inputs.bugzilla_api_url ) ctx = BugzillaContext(client=client) - sdk_config = build_bugzilla_server(ctx) + sdk_config = build_sdk_server("bugzilla", ctx, bugzilla.TOOLS) mcp_server = sdk_config["instance"] manager = StreamableHTTPSessionManager(app=mcp_server, stateless=True) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/config.py b/agents/bug-fix/hackbot_agents/bug_fix/config.py new file mode 100644 index 0000000000..61524f36e2 --- /dev/null +++ b/agents/bug-fix/hackbot_agents/bug_fix/config.py @@ -0,0 +1,28 @@ +# Tools that can modify the source repo — blocked under dry-run. +SOURCE_WRITE_TOOLS = {"Write", "Edit", "MultiEdit", "NotebookEdit"} + +# Bugzilla MCP tool names as exposed to the agent (mcp____). +BUGZILLA_READ_TOOLS = [ + "mcp__bugzilla__search_bugs", + "mcp__bugzilla__get_bugs", + "mcp__bugzilla__get_bug_comments", + "mcp__bugzilla__get_bug_attachments", + "mcp__bugzilla__download_attachment", +] + + +# Recordable action types the agent may take, by dotted id. +ENABLED_ACTION_TYPES = [ + "bugzilla.update_bug", + "bugzilla.add_comment", + "bugzilla.add_attachment", + "bugzilla.create_bug", +] + +# Firefox build/test tools. +FIREFOX_TOOLS = [ + "mcp__firefox__evaluate_testcase", + "mcp__firefox__build_firefox", + "mcp__firefox__evaluate_js_shell", + "mcp__firefox__bootstrap_firefox", +] diff --git a/bugbug/tools/bug_fix/prompts/system.md b/agents/bug-fix/hackbot_agents/bug_fix/prompts/system.md similarity index 100% rename from bugbug/tools/bug_fix/prompts/system.md rename to agents/bug-fix/hackbot_agents/bug_fix/prompts/system.md diff --git a/bugbug/tools/bug_fix/rules/README.md b/agents/bug-fix/hackbot_agents/bug_fix/rules/README.md similarity index 100% rename from bugbug/tools/bug_fix/rules/README.md rename to agents/bug-fix/hackbot_agents/bug_fix/rules/README.md diff --git a/bugbug/tools/bug_fix/rules/unsupported-config.md b/agents/bug-fix/hackbot_agents/bug_fix/rules/unsupported-config.md similarity index 100% rename from bugbug/tools/bug_fix/rules/unsupported-config.md rename to agents/bug-fix/hackbot_agents/bug_fix/rules/unsupported-config.md diff --git a/agents/bug-fix/pyproject.toml b/agents/bug-fix/pyproject.toml index 6b84e190b5..492df37f36 100644 --- a/agents/bug-fix/pyproject.toml +++ b/agents/bug-fix/pyproject.toml @@ -4,11 +4,9 @@ version = "0.1.0" description = "Cloud Run Job image that runs the bug-fix agent for hackbot-api" requires-python = ">=3.12" dependencies = [ - "bugbug", "hackbot-runtime[claude-sdk]", + "agent-tools[bugzilla,firefox]", "bugsy", - "grizzly-framework", - "prefpicker", "claude-agent-sdk>=0.1.30", "mcp>=1.0.0", "starlette>=0.36.0", @@ -16,5 +14,12 @@ dependencies = [ ] [tool.uv.sources] -bugbug = { workspace = true } hackbot-runtime = { workspace = true } +agent-tools = { workspace = true } + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["hackbot_agents"] diff --git a/bugbug/tools/bug_fix/__init__.py b/bugbug/tools/bug_fix/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/bugbug/tools/bug_fix/agent.py b/bugbug/tools/bug_fix/agent.py deleted file mode 100644 index 169db340be..0000000000 --- a/bugbug/tools/bug_fix/agent.py +++ /dev/null @@ -1,318 +0,0 @@ -"""Bug fix triage tool -- a Bugzilla triage agent. - -Orchestrates a Claude agent that triages bugs according to rulesets -in the rules/ directory. The agent has access to a source repository -and reaches Bugzilla via an out-of-process MCP broker (HTTP transport) -that holds the Bugzilla token — the agent process itself never sees it. -""" - -from __future__ import annotations - -import json -import sys -from dataclasses import dataclass -from pathlib import Path - -from claude_agent_sdk import ( - AgentDefinition, - AssistantMessage, - ClaudeAgentOptions, - ClaudeSDKClient, - McpServerConfig, - ResultMessage, - SystemMessage, - TextBlock, - ThinkingBlock, - ToolResultBlock, - ToolUseBlock, - UserMessage, -) -from hackbot_runtime import ActionsRecorder -from hackbot_runtime.actions.claude_sdk import build_actions_sdk_server -from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME - -from bugbug.tools.base import GenerativeModelTool -from bugbug.tools.bug_fix.config import ( - BUGZILLA_READ_TOOLS, - ENABLED_ACTION_TOOLS, - ENABLED_ACTION_TYPES, - FIREFOX_TOOLS, - SOURCE_WRITE_TOOLS, -) -from bugbug.tools.bug_fix.firefox_mcp import FirefoxContext -from bugbug.tools.bug_fix.firefox_mcp import build_server as build_firefox_server - -HERE = Path(__file__).resolve().parent - - -# --------------------------------------------------------------------------- # -# Result type -# --------------------------------------------------------------------------- # - - -@dataclass -class BugFixResult: - exit_code: int = 0 - bugs_processed: int = 0 - - -# --------------------------------------------------------------------------- # -# Prompts & agents -# --------------------------------------------------------------------------- # - - -def load_system_prompt(rules_dir: Path, extra: str) -> str: - tmpl = (HERE / "prompts" / "system.md").read_text() - - return tmpl.format( - rules_dir=str(rules_dir.resolve()), - extra_instructions=extra or "(none)", - ) - - -def make_investigator() -> AgentDefinition: - """Create a single generic investigator subagent definition.""" - return AgentDefinition( - description=( - "Focused investigator for answering a specific question about " - "a bug or the source tree. The main agent writes your complete " - "instructions at spawn time — follow them precisely and return " - "only what was asked for." - ), - prompt=( - "You are a focused investigator subagent. You will be given a " - "self-contained task by the triage agent. Complete it and return " - "a concise answer. Do not make Bugzilla modifications — you have " - "read-only access. Do not speculate beyond what you can verify." - ), - tools=[ - "Read", - "Grep", - "Glob", - "Bash", - *BUGZILLA_READ_TOOLS, - *FIREFOX_TOOLS, - ], - model="inherit", - ) - - -# --------------------------------------------------------------------------- # -# Output streaming -# --------------------------------------------------------------------------- # - - -def _truncate(s: str, n: int = 500) -> str: - return s if len(s) <= n else s[:n] + f"... [{len(s) - n} more chars]" - - -class Reporter: - """Routes streamed agent messages to stdout and/or a log file.""" - - def __init__(self, verbose: bool, log_path: Path | None): - self.verbose = verbose - self._log = log_path.open("w", encoding="utf-8") if log_path else None - self._turn = 0 - - def __enter__(self): - return self - - def __exit__(self, *exc): - if self._log: - self._log.close() - - def start_bug(self, bug_id: int) -> None: - self._turn = 0 - header = f"\n{'#' * 60}\n# bug {bug_id}\n{'#' * 60}" - self._emit(header, always=True) - - def _emit(self, line: str, *, always: bool = False, full: str | None = None): - if self._log: - self._log.write((full if full is not None else line) + "\n") - self._log.flush() - if always or self.verbose: - print(line) - - def message(self, msg) -> None: - if isinstance(msg, AssistantMessage): - is_main = msg.parent_tool_use_id is None - label = "agent" if is_main else "subagent" - if is_main: - self._turn += 1 - self._emit(f"\n--- turn {self._turn} ---") - for block in msg.content: - if isinstance(block, TextBlock): - self._emit(f"\n[{label}] {block.text}", always=is_main) - elif isinstance(block, ThinkingBlock): - thinking = block.thinking.strip() - snippet = thinking.split("\n", 1)[0] - self._emit( - f"[{label}:thinking] {_truncate(snippet, 120)}", - full=f"[{label}:thinking]\n{thinking}", - ) - elif isinstance(block, ToolUseBlock): - inp = json.dumps(block.input, default=str) - inp_full = json.dumps(block.input, indent=2, default=str) - self._emit( - f"[{label}→tool] {block.name}({_truncate(inp, 300)})", - full=f"[{label}→tool] {block.name}\n{inp_full}", - ) - - elif isinstance(msg, UserMessage): - if isinstance(msg.content, list): - for block in msg.content: - if isinstance(block, ToolResultBlock): - marker = "ERROR" if block.is_error else "ok" - if isinstance(block.content, str): - text = block.content - elif isinstance(block.content, list): - parts = [ - c.get("text", "") - for c in block.content - if isinstance(c, dict) and c.get("type") == "text" - ] - text = "\n".join(parts) - else: - text = str(block.content) - self._emit( - f" [tool←{marker}] {_truncate(text, 400)}", - full=f" [tool←{marker}]\n{text}", - ) - - elif isinstance(msg, SystemMessage): - if msg.subtype == "init": - model = msg.data.get("model", "?") - self._emit(f"[system] session started (model={model})") - else: - data = json.dumps(msg.data, default=str) - self._emit( - f"[system:{msg.subtype}] {_truncate(data, 200)}", - full=f"[system:{msg.subtype}] {data}", - ) - - elif isinstance(msg, ResultMessage): - self._emit(f"\n{'=' * 60}", always=True) - if msg.total_cost_usd: - line = f"[done] turns={msg.num_turns} cost=${msg.total_cost_usd:.4f}" - else: - line = f"[done] turns={msg.num_turns}" - self._emit(line, always=True) - if msg.is_error: - self._emit(f"[done] ERROR: {msg.result}", always=True) - - -# --------------------------------------------------------------------------- # -# Tool class -# --------------------------------------------------------------------------- # - - -class BugFixTool(GenerativeModelTool): - """Bugzilla triage agent using claude-agent-sdk.""" - - @classmethod - def create(cls, **kwargs): - return cls() - - async def run( - self, - *, - bugzilla_mcp_server: McpServerConfig, - source_repo: Path, - bugs: list[int], - instructions: str = "", - task: str | None = None, - rules_dir: Path | None = None, - newest_first: bool = False, - model: str | None = None, - max_turns: int | None = None, - effort: str | None = None, - verbose: bool = False, - log: Path | None = None, - actions_recorder: ActionsRecorder | None = None, - ) -> BugFixResult: - if rules_dir is None: - rules_dir = HERE / "rules" - - if not bugs: - print("[bug_fix] no bug ids supplied — nothing to do", file=sys.stderr) - return BugFixResult(exit_code=0) - - selected = sorted(bugs, reverse=newest_first) - print(f"[bug_fix] triaging {len(selected)} bug(s): {selected}", file=sys.stderr) - - # --- Firefox build/eval MCP server (in-process; no tokens) -------- # - fx_ctx = FirefoxContext.from_source_repo(source_repo) - firefox_server = build_firefox_server(fx_ctx) - - # --- Action-recording MCP server (in-process) --------------------- # - if actions_recorder is None: - # Standalone/script runs have no uploader; copy attachments locally. - actions_recorder = ActionsRecorder(artifacts_dir=Path("artifacts")) - actions_server = build_actions_sdk_server( - actions_recorder, types=ENABLED_ACTION_TYPES - ) - - # --- Build agent options ------------------------------------------ # - system_prompt = load_system_prompt(rules_dir, instructions) - - options = ClaudeAgentOptions( - system_prompt=system_prompt, - mcp_servers={ - "bugzilla": bugzilla_mcp_server, - "firefox": firefox_server, - ACTIONS_SERVER_NAME: actions_server, - }, - agents={"investigator": make_investigator()}, - cwd=str(source_repo.resolve()), - add_dirs=[str(rules_dir.resolve())], - permission_mode="bypassPermissions", - allowed_tools=[ - "Read", - "Grep", - "Glob", - "Bash", - "Task", - *SOURCE_WRITE_TOOLS, - *BUGZILLA_READ_TOOLS, - *ENABLED_ACTION_TOOLS, - *FIREFOX_TOOLS, - ], - model=model, - max_turns=max_turns, - **({"effort": effort} if effort else {}), - setting_sources=[], - ) - - # --- Run: one fresh agent context per bug ------------------------- # - exit_code = 0 - rules_path = rules_dir.resolve() - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, bug_id in enumerate(selected, 1): - print(f"[bug_fix] bug {i}/{len(selected)}: {bug_id}", file=sys.stderr) - reporter.start_bug(bug_id) - - if task: - user_prompt = ( - f"Bug to work on: {bug_id}\n\n" - f"Task: {task}\n\n" - f"The rules in {rules_path} are available if the task " - f"calls for them, but the task above is your primary " - f"directive — it overrides the default triage workflow." - ) - else: - user_prompt = ( - f"Triage bug {bug_id}.\n\n" - f"Consult the relevant rules in {rules_path}." - ) - - async with ClaudeSDKClient(options=options) as client: - await client.query(user_prompt) - async for msg in client.receive_response(): - reporter.message(msg) - if isinstance(msg, ResultMessage) and msg.is_error: - exit_code = 1 - - return BugFixResult( - exit_code=exit_code, - bugs_processed=len(selected), - ) diff --git a/bugbug/tools/bug_fix/bugzilla_mcp.py b/bugbug/tools/bug_fix/bugzilla_mcp.py deleted file mode 100644 index 71aeb888f3..0000000000 --- a/bugbug/tools/bug_fix/bugzilla_mcp.py +++ /dev/null @@ -1,322 +0,0 @@ -"""In-process MCP server wrapping bugsy for Bugzilla REST access. - -Exposes read-only tools to a Claude agent. Write actions are recorded -via the in-process ``actions`` MCP server built from the framework-agnostic -registry in ``hackbot_runtime.actions`` (see -``hackbot_runtime/actions/claude_sdk.py``), so the broker holds the Bugzilla -API key but has no write capability at all. -All tools gracefully handle proxy-level restrictions (code 101: -endpoint not exposed, code 102: access denied). -""" - -from __future__ import annotations - -import base64 -import json -from dataclasses import dataclass - -import bugsy -from claude_agent_sdk import create_sdk_mcp_server, tool - -# --------------------------------------------------------------------------- # -# Shared context -# --------------------------------------------------------------------------- # - - -@dataclass -class BugzillaContext: - """Holds the live bugsy client. - - The MCP tool functions close over a single instance of this class so - they share auth and one TCP connection pool. - """ - - client: bugsy.Bugsy - - -def _text(content: str) -> dict: - """Wrap plain text in MCP content format.""" - return {"content": [{"type": "text", "text": content}]} - - -def _jtext(obj) -> dict: - """Serialise an object to pretty JSON inside MCP text content.""" - return _text(json.dumps(obj, indent=2, default=str)) - - -def _handle_bugsy_error(e: bugsy.BugsyException) -> dict: - """Turn a bugsy exception into a structured tool error response. - - We deliberately return ``is_error: True`` but with a friendly, - machine-parseable payload so the agent can decide what to do - (skip the bug, try a different endpoint, etc) rather than just - seeing a stack trace. - """ - code = getattr(e, "code", None) - msg = getattr(e, "msg", str(e)) - if code == 101: - kind = "endpoint_not_exposed" - hint = "This Bugzilla proxy does not expose this endpoint." - elif code == 102: - kind = "access_denied" - hint = "Your API key cannot access this bug. Skip it." - else: - kind = "bugzilla_error" - hint = None - payload = {"error": kind, "code": code, "message": msg} - if hint: - payload["hint"] = hint - return { - "content": [{"type": "text", "text": json.dumps(payload, indent=2)}], - "is_error": True, - } - - -# --------------------------------------------------------------------------- # -# Server factory -# --------------------------------------------------------------------------- # - - -def build_server(ctx: BugzillaContext): - """Create and return the in-process MCP server bound to ``ctx``. - - All tool functions are closures over ``ctx`` so they share the same - bugsy session (one TCP connection pool, one auth header). - """ - # ----- READ TOOLS -------------------------------------------------- # - - @tool( - "search_bugs", - "Search Bugzilla using raw REST query parameters. Returns matching " - "bugs in one bulk request. Parameters are ANDed together (intersect). " - "IMPORTANT: this proxy drops 'whiteboard' and 'keywords' from _all / " - "_default field sets — list them explicitly in include_fields if you " - "need them. Common params: id, keywords, blocks, depends_on, product, " - "component, status, resolution, priority, severity, assigned_to, " - "whiteboard, include_fields, limit.", - { - "type": "object", - "properties": { - "params": { - "type": "object", - "description": ( - "Bugzilla REST /bug query parameters. Values may be " - "strings, ints, or comma-separated lists. Example: " - '{"blocks": 12345, "keywords": "sec-low", ' - '"include_fields": "id,summary,status,whiteboard,keywords"}' - ), - "additionalProperties": True, - } - }, - "required": ["params"], - }, - ) - async def search_bugs(args): - params = args["params"] - try: - result = ctx.client.request("bug", params=params) - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - bugs = result.get("bugs", []) - return _jtext({"count": len(bugs), "bugs": bugs}) - - @tool( - "get_bugs", - "Fetch one or more bugs by ID in a single bulk request. " - "Inaccessible bugs are silently dropped by the proxy — this tool " - "diffs requested vs returned and reports them under 'inaccessible'. " - "Remember: request 'whiteboard' and 'keywords' explicitly in " - "include_fields if you need them.", - { - "type": "object", - "properties": { - "ids": { - "type": "array", - "items": {"type": "integer"}, - "description": "Bug IDs to fetch.", - }, - "include_fields": { - "type": "string", - "description": ( - "Comma-separated field list, or '_default'/'_all'. " - "Defaults to a sensible triage set." - ), - }, - "include_comments": { - "type": "boolean", - "description": ( - "If true, also bulk-fetch comments (one extra request " - "total, not one per bug)." - ), - }, - }, - "required": ["ids"], - }, - ) - async def get_bugs(args): - ids = args["ids"] - if not ids: - return _jtext({"count": 0, "bugs": [], "inaccessible": []}) - include = args.get("include_fields") or ( - "id,summary,status,resolution,product,component,priority," - "severity,keywords,whiteboard,assigned_to,creator," - "creation_time,last_change_time,blocks,depends_on,see_also," - "cf_crash_signature,url,version,op_sys,platform" - ) - id_csv = ",".join(str(i) for i in ids) - try: - result = ctx.client.request( - "bug", params={"id": id_csv, "include_fields": include} - ) - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - bugs = result.get("bugs", []) - returned = {b["id"] for b in bugs} - inaccessible = [i for i in ids if i not in returned] - - payload = { - "count": len(bugs), - "bugs": bugs, - "inaccessible": inaccessible, - } - - if args.get("include_comments") and bugs: - # Bugzilla lets us fetch comments for many bugs in one call by - # hitting /bug/{first}/comment?ids=rest. One extra round trip - # total regardless of bug count. - first, *rest = [b["id"] for b in bugs] - cparams = {"ids": ",".join(str(i) for i in rest)} if rest else {} - try: - cres = ctx.client.request(f"bug/{first}/comment", params=cparams) - # Response keys bugs by string ID. - comments_by_bug = { - int(bid): data["comments"] - for bid, data in cres.get("bugs", {}).items() - } - for b in bugs: - b["comments"] = comments_by_bug.get(b["id"], []) - except bugsy.BugsyException as e: - payload["comments_error"] = { - "code": getattr(e, "code", None), - "message": getattr(e, "msg", str(e)), - } - - return _jtext(payload) - - @tool( - "get_bug_comments", - "Fetch all comments for a single bug.", - {"bug_id": int}, - ) - async def get_bug_comments(args): - bug_id = args["bug_id"] - try: - result = ctx.client.request(f"bug/{bug_id}/comment") - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - comments = result.get("bugs", {}).get(str(bug_id), {}).get("comments", []) - return _jtext({"bug_id": bug_id, "count": len(comments), "comments": comments}) - - @tool( - "get_bug_attachments", - "Fetch attachments for a bug. By default returns metadata only " - "(cheap, safe for large binaries). Set include_data=true to also " - "download the content — Bugzilla returns it base64-encoded in the " - "'data' field of each attachment.", - { - "type": "object", - "properties": { - "bug_id": {"type": "integer"}, - "include_data": { - "type": "boolean", - "description": ( - "If true, include base64-encoded attachment content. " - "Default false. Use sparingly — attachments can be large." - ), - }, - }, - "required": ["bug_id"], - }, - ) - async def get_bug_attachments(args): - bug_id = args["bug_id"] - params = {} if args.get("include_data") else {"exclude_fields": "data"} - try: - result = ctx.client.request(f"bug/{bug_id}/attachment", params=params) - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - atts = result.get("bugs", {}).get(str(bug_id), []) - return _jtext({"bug_id": bug_id, "count": len(atts), "attachments": atts}) - - @tool( - "download_attachment", - "Fetch a single Bugzilla attachment by ID and write its decoded " - "content to a local file. This is the inverse of add_attachment: " - "it handles the base64 decode server-side so the agent never has " - "to round-trip the blob through its own context. Use " - "get_bug_attachments first to discover attachment IDs. Returns " - "the written path, size, and content_type.", - { - "type": "object", - "properties": { - "attachment_id": {"type": "integer"}, - "dest_path": { - "type": "string", - "description": "Local filesystem path to write the " - "decoded attachment to. Parent directory " - "must already exist. Overwrites if present.", - }, - }, - "required": ["attachment_id", "dest_path"], - }, - ) - async def download_attachment(args): - attachment_id = args["attachment_id"] - dest_path = args["dest_path"] - try: - result = ctx.client.request(f"bug/attachment/{attachment_id}") - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - - att = result.get("attachments", {}).get(str(attachment_id)) - if att is None: - return { - "content": [ - { - "type": "text", - "text": json.dumps( - { - "error": "attachment_not_found", - "attachment_id": attachment_id, - } - ), - } - ], - "is_error": True, - } - - raw = base64.b64decode(att["data"]) - with open(dest_path, "wb") as fp: - fp.write(raw) - - return _jtext( - { - "attachment_id": attachment_id, - "dest_path": dest_path, - "size_bytes": len(raw), - "file_name": att.get("file_name"), - "content_type": att.get("content_type"), - } - ) - - return create_sdk_mcp_server( - name="bugzilla", - version="0.1.0", - tools=[ - search_bugs, - get_bugs, - get_bug_comments, - get_bug_attachments, - download_attachment, - ], - ) diff --git a/bugbug/tools/bug_fix/config.py b/bugbug/tools/bug_fix/config.py deleted file mode 100644 index e03b637154..0000000000 --- a/bugbug/tools/bug_fix/config.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import annotations - -from pathlib import Path - -import yaml -from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME, tool_name_for - -# Tools that can modify the source repo — blocked under dry-run. -SOURCE_WRITE_TOOLS = {"Write", "Edit", "MultiEdit", "NotebookEdit"} - -# Bugzilla MCP tool names as exposed to the agent (mcp____). -BUGZILLA_READ_TOOLS = [ - "mcp__bugzilla__search_bugs", - "mcp__bugzilla__get_bugs", - "mcp__bugzilla__get_bug_comments", - "mcp__bugzilla__get_bug_attachments", - "mcp__bugzilla__download_attachment", -] -# Recording action types this agent enables. Served by the in-process -# `actions` MCP server (hackbot_runtime.actions.claude_sdk). Tool calls land -# in summary.json's `actions` array instead of mutating any external system. -# New domains (phabricator, treeherder, ...) just append to this list. -ENABLED_ACTION_TYPES = [ - "bugzilla.update_bug", - "bugzilla.add_comment", - "bugzilla.add_attachment", - "bugzilla.create_bug", -] -# claude-agent-sdk tool identifiers derived from the above, using the shared -# server name and tool-name helper so they stay in sync with the adapter. -ENABLED_ACTION_TOOLS = [ - f"mcp__{ACTIONS_SERVER_NAME}__{tool_name_for(t)}" for t in ENABLED_ACTION_TYPES -] - -# Firefox build/test tools. -FIREFOX_TOOLS = [ - "mcp__firefox__evaluate_testcase", - "mcp__firefox__build_firefox", - "mcp__firefox__evaluate_js_shell", - "mcp__firefox__bootstrap_firefox", -] - -# Deployment-stable settings that may be supplied via config YAML. -_CONFIG_KEYS = {"base_url", "source_repo", "rules_dir", "model", "max_turns", "effort"} - - -def load_config(path: Path) -> dict: - with path.open() as f: - data = yaml.safe_load(f) or {} - unknown = set(data) - _CONFIG_KEYS - if unknown: - raise ValueError( - f"unknown config key(s) in {path}: {sorted(unknown)}\n" - f"allowed: {sorted(_CONFIG_KEYS)}" - ) - return data diff --git a/bugbug/tools/bug_fix/firefox_mcp.py b/bugbug/tools/bug_fix/firefox_mcp.py deleted file mode 100644 index 08a553a585..0000000000 --- a/bugbug/tools/bug_fix/firefox_mcp.py +++ /dev/null @@ -1,250 +0,0 @@ -"""In-process MCP server for Firefox build + testcase evaluation.""" - -from __future__ import annotations - -import json -from dataclasses import dataclass -from pathlib import Path - -from claude_agent_sdk import create_sdk_mcp_server, tool - -from bugbug.tools.bug_fix.firefox_tools import ( - bootstrap_firefox, - build_firefox, - evaluate_testcase, - js_shell_evaluator, -) - - -@dataclass -class FirefoxContext: - """Firefox-related paths, derived from --source-repo at startup. - - Defaults follow: mozconfig at the source root, objdir-ff-asan/ under it. The - agent can still override firefox_binary per-call if it wants to test a - different build. - """ - - source_dir: Path - mozconfig: Path - objdir: Path - binary: Path - js_binary: Path - - @classmethod - def from_source_repo(cls, source_repo: Path) -> "FirefoxContext": - src = source_repo.resolve() - objdir = src / "objdir-ff-asan" - return cls( - source_dir=src, - mozconfig=src / ".mozconfig", - objdir=objdir, - binary=objdir / "dist" / "bin" / "firefox", - js_binary=objdir / "dist" / "bin" / "js", - ) - - -def _jtext(obj) -> dict: - return {"content": [{"type": "text", "text": json.dumps(obj, indent=2)}]} - - -def build_server(ctx: FirefoxContext): - """Create the in-process Firefox MCP server bound to ``ctx``.""" - - @tool( - "evaluate_testcase", - "Run a testcase in Firefox under xvfb and capture crash output via " - "grizzly. The build's sanitizer configuration (ASAN, TSAN, plain " - "debug, etc.) is whatever the configured mozconfig produces. " - "Returns JSON: " - "crashed (bool) — whether Firefox crashed; " - "crashed_parent (bool) — parent process vs content process crash; " - "logs (dict) — stderr/stdout and, if crashed, crashdata (crash/" - "sanitizer report); " - "files (dict) — the testcase bundle that triggered the crash; " - "message (str) — human-readable summary. " - "When crashed=false, logs.stderr/stdout often reveal why the trigger " - "missed (JS exception, wrong pref, feature gated off).", - { - "type": "object", - "properties": { - "content": { - "type": "string", - "description": "Testcase file content (HTML, JS, SVG, etc.)", - }, - "filename": { - "type": "string", - "description": ( - "Name for the testcase entry point, e.g. 'test.html'. " - "Extension matters: grizzly serves it with the matching " - "MIME type." - ), - }, - "firefox_binary": { - "type": "string", - "description": ( - f"Path to Firefox binary. Optional — defaults to {ctx.binary}" - ), - }, - "timeout": { - "type": "integer", - "description": "Seconds to wait for a crash (default: 30)", - }, - "prefs": { - "type": "object", - "description": ( - "Firefox about:config prefs to set before launch, e.g. " - '{"dom.webgpu.enabled": true}. Use this to unlock ' - "gated features your testcase needs." - ), - "additionalProperties": { - "oneOf": [ - {"type": "string"}, - {"type": "integer"}, - {"type": "boolean"}, - ] - }, - }, - }, - "required": ["content", "filename"], - }, - ) - async def evaluate_testcase_tool(args): - binary = Path(args.get("firefox_binary") or ctx.binary) - crash_info = await evaluate_testcase( - content=args["content"], - filename=args["filename"], - firefox_binary=binary, - timeout=args.get("timeout", 30), - prefs=args.get("prefs") or {}, - ) - return _jtext(crash_info) - - @tool( - "build_firefox", - "Build Firefox using the configured mozconfig. Slow (tens of " - "minutes on a cold build, faster incremental). Returns JSON: " - "success (bool), build_dir (str), message (str), stdout/stderr. " - "Only call this if you've changed source or the binary is missing — " - "check if the binary exists first.", - { - "type": "object", - "properties": { - "firefox_dir": { - "type": "string", - "description": ( - "Firefox source directory. Optional — defaults to " - f"{ctx.source_dir}" - ), - }, - "mozconfig_path": { - "type": "string", - "description": ( - f"MOZCONFIG to use. Optional — defaults to {ctx.mozconfig}" - ), - }, - }, - }, - ) - async def build_firefox_tool(args): - firefox_dir = ( - Path(args["firefox_dir"]) if "firefox_dir" in args else ctx.source_dir - ) - mozconfig = ( - Path(args["mozconfig_path"]) if "mozconfig_path" in args else ctx.mozconfig - ) - result = await build_firefox(firefox_dir, mozconfig, ctx.objdir) - return _jtext(result) - - @tool( - "evaluate_js_shell", - "Run a JS testcase in the SpiderMonkey shell and capture crash " - "output. The shell's sanitizer configuration is whatever the " - "configured mozconfig produces. Much faster than full-browser " - "evaluate_testcase — use this for engine-level bugs (JIT, GC, " - "TypedArrays, WASM) that don't need a DOM. Returns JSON: " - "crashed (bool) — whether the shell crashed (signal or sanitizer); " - "message (str) — human-readable summary, includes signal name if killed; " - "logs (dict) — stderr/stdout (tail-truncated to 1 MB) and, if crashed, " - "crashdata (crash/sanitizer report); " - "files (dict) — the .js testcase that triggered the crash. " - "A nonzero exit without a signal is a JS exception, NOT a crash — " - "check logs.stderr for the syntax/runtime error.", - { - "type": "object", - "properties": { - "content": { - "type": "string", - "description": "JavaScript testcase source", - }, - "js_binary": { - "type": "string", - "description": ( - "Path to the SpiderMonkey js binary. Optional — " - f"defaults to {ctx.js_binary}" - ), - }, - "timeout": { - "type": "integer", - "description": "Seconds to wait before killing the shell (default: 30)", - }, - "flags": { - "type": "array", - "items": {"type": "string"}, - "description": ( - 'Extra shell flags, e.g. ["--no-threads", ' - '"--ion-eager"]. --fuzzing-safe is always prepended.' - ), - }, - }, - "required": ["content"], - }, - ) - async def evaluate_js_shell_tool(args): - binary = Path(args.get("js_binary") or ctx.js_binary) - crash_info = await js_shell_evaluator( - content=args["content"], - js_binary=binary, - timeout=args.get("timeout", 30), - flags=args.get("flags"), - ) - return _jtext(crash_info) - - @tool( - "bootstrap_firefox", - "Run `./mach bootstrap` to install the Firefox build toolchain " - "(rust, clang, cbindgen) under the running user's ~/.mozbuild/. " - "Required before a full (non-artifact) build. Slow — ~10-15 min on a " - "fresh image, fast on re-runs. Returns JSON: success, message, " - "stdout, stderr. Only call this if you intend to do a full build; " - "artifact builds don't need bootstrap.", - { - "type": "object", - "properties": { - "firefox_dir": { - "type": "string", - "description": ( - "Firefox source directory. Optional — defaults to " - f"{ctx.source_dir}" - ), - }, - }, - }, - ) - async def bootstrap_firefox_tool(args): - firefox_dir = ( - Path(args["firefox_dir"]) if "firefox_dir" in args else ctx.source_dir - ) - result = await bootstrap_firefox(firefox_dir) - return _jtext(result) - - return create_sdk_mcp_server( - name="firefox", - version="0.1.0", - tools=[ - evaluate_testcase_tool, - build_firefox_tool, - evaluate_js_shell_tool, - bootstrap_firefox_tool, - ], - ) diff --git a/bugbug/tools/duplicate_bugs/__init__.py b/bugbug/tools/duplicate_bugs/__init__.py deleted file mode 100644 index 3d38945862..0000000000 --- a/bugbug/tools/duplicate_bugs/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from bugbug.tools.duplicate_bugs.agent import DuplicateBugsTool - -__all__ = ["DuplicateBugsTool"] diff --git a/bugbug/tools/duplicate_bugs/agent.py b/bugbug/tools/duplicate_bugs/agent.py deleted file mode 100644 index 752c01ac4e..0000000000 --- a/bugbug/tools/duplicate_bugs/agent.py +++ /dev/null @@ -1,531 +0,0 @@ -r"""Duplicate bug detector -- find duplicate bugs, three ways. - -mode="local" One crash per sub-directory. For each, decide - whether it is already filed as a blocker of - meta_bug on Bugzilla. - -mode="bugs" Already-filed bugs. For each, decide whether some - *other* blocker of meta_bug covers the same crash. - -mode="local_to_local" One crash per sub-directory, but the directory - still contains internal duplicates. Groups the - sub-directories by crash and copies one - representative per group into results_dir. -""" - -from __future__ import annotations - -import json -import shutil -import sys -from dataclasses import dataclass, field -from pathlib import Path - -import bugsy -from claude_agent_sdk import ( - AssistantMessage, - ClaudeAgentOptions, - ClaudeSDKClient, - ResultMessage, - SystemMessage, - TextBlock, - ThinkingBlock, - ToolResultBlock, - ToolUseBlock, - UserMessage, -) - -from bugbug.tools.base import GenerativeModelTool -from bugbug.tools.bug_fix.bugzilla_mcp import BugzillaContext -from bugbug.tools.bug_fix.bugzilla_mcp import build_server as build_bugzilla_server -from bugbug.tools.duplicate_bugs.config import ( - BUGZILLA_READ_TOOLS, - parse_dir_verdict, - parse_verdict, -) - -HERE = Path(__file__).resolve().parent - - -# --------------------------------------------------------------------------- # -# Result type -# --------------------------------------------------------------------------- # - - -@dataclass -class DuplicateResult: - exit_code: int = 0 - results: list[tuple[str, str]] = field(default_factory=list) - - -# --------------------------------------------------------------------------- # -# Transcript streaming -# --------------------------------------------------------------------------- # - - -def _truncate(s: str, n: int = 400) -> str: - return s if len(s) <= n else s[:n] + f"... [{len(s) - n} more chars]" - - -class Reporter: - def __init__(self, verbose: bool, log_path: Path | None): - self.verbose = verbose - self._log = log_path.open("w", encoding="utf-8") if log_path else None - - def __enter__(self): - return self - - def __exit__(self, *exc): - if self._log: - self._log.close() - - def start_item(self, label: str) -> None: - header = f"\n{'#' * 60}\n# {label}\n{'#' * 60}" - self._emit(header, always=True) - - def _emit(self, line: str, *, always: bool = False, full: str | None = None): - if self._log: - self._log.write((full if full is not None else line) + "\n") - self._log.flush() - if always or self.verbose: - print(line, file=sys.stderr) - - def message(self, msg) -> None: - if isinstance(msg, AssistantMessage): - for block in msg.content: - if isinstance(block, TextBlock): - self._emit(f"[agent] {block.text}", always=True) - elif isinstance(block, ThinkingBlock): - thinking = block.thinking.strip() - self._emit( - f"[thinking] {_truncate(thinking.split(chr(10), 1)[0], 120)}", - full=f"[thinking]\n{thinking}", - ) - elif isinstance(block, ToolUseBlock): - inp = json.dumps(block.input, default=str) - self._emit( - f"[→tool] {block.name}({_truncate(inp, 200)})", - full=f"[→tool] {block.name}\n" - f"{json.dumps(block.input, indent=2, default=str)}", - ) - elif isinstance(msg, UserMessage) and isinstance(msg.content, list): - for block in msg.content: - if isinstance(block, ToolResultBlock): - marker = "ERR" if block.is_error else "ok" - if isinstance(block.content, str): - text = block.content - elif isinstance(block.content, list): - text = "\n".join( - c.get("text", "") - for c in block.content - if isinstance(c, dict) and c.get("type") == "text" - ) - else: - text = str(block.content) - self._emit( - f" [tool←{marker}] {_truncate(text, 300)}", - full=f" [tool←{marker}]\n{text}", - ) - elif isinstance(msg, SystemMessage): - if msg.subtype == "init": - self._emit( - f"[system] session started (model={msg.data.get('model', '?')})" - ) - elif isinstance(msg, ResultMessage): - cost = f" cost=${msg.total_cost_usd:.4f}" if msg.total_cost_usd else "" - self._emit(f"[done] turns={msg.num_turns}{cost}") - if msg.is_error: - self._emit(f"[done] ERROR: {msg.result}", always=True) - - -# --------------------------------------------------------------------------- # -# Agent sessions -# --------------------------------------------------------------------------- # - - -async def _run_session( - options: ClaudeAgentOptions, - prompt: str, - reporter: Reporter, -) -> str: - """Run one agent session to completion and extract its verdict.""" - final_text = "" - errored = False - async with ClaudeSDKClient(options=options) as client: - await client.query(prompt) - async for msg in client.receive_response(): - reporter.message(msg) - if isinstance(msg, AssistantMessage): - for block in msg.content: - if isinstance(block, TextBlock): - final_text = block.text - elif isinstance(msg, ResultMessage): - errored = msg.is_error - - if errored: - return "ERROR" - return parse_verdict(final_text) or "UNKNOWN" - - -async def match_local_crash( - crash_path: Path, - meta_bug: int, - base_options: ClaudeAgentOptions, - reporter: Reporter, -) -> str: - opts = ClaudeAgentOptions(**{**base_options.__dict__, "cwd": str(crash_path)}) - - contents = sorted( - p.name + ("/" if p.is_dir() else "") for p in crash_path.iterdir() - ) - prompt = ( - f"Crash directory: {crash_path}\n" - f"Meta bug: {meta_bug}\n" - f"Directory contents: {', '.join(contents) or '(empty)'}\n\n" - f"Determine whether this crash is already filed as a blocker of " - f"bug {meta_bug}. End your final response with the VERDICT: line." - ) - return await _run_session(opts, prompt, reporter) - - -async def match_local_to_local( - subject: Path, - candidates: list[str], - base_options: ClaudeAgentOptions, - reporter: Reporter, -) -> str: - contents = sorted(p.name + ("/" if p.is_dir() else "") for p in subject.iterdir()) - cand_lines = "\n".join(f" - {c}" for c in candidates) - prompt = ( - f"Subject directory: {subject.name}\n" - f"Subject contents: {', '.join(contents) or '(empty)'}\n\n" - f"Candidate directories ({len(candidates)}):\n{cand_lines}\n\n" - f"Determine whether the subject crash matches any candidate. " - f"End your final response with the VERDICT: line — either NEW " - f"or exactly one of the candidate names above." - ) - - final_text = "" - errored = False - async with ClaudeSDKClient(options=base_options) as client: - await client.query(prompt) - async for msg in client.receive_response(): - reporter.message(msg) - if isinstance(msg, AssistantMessage): - for block in msg.content: - if isinstance(block, TextBlock): - final_text = block.text - elif isinstance(msg, ResultMessage): - errored = msg.is_error - - if errored: - return "ERROR" - return parse_dir_verdict(final_text, set(candidates)) or "UNKNOWN" - - -async def match_filed_bug( - subject: int, - meta_bug: int, - options: ClaudeAgentOptions, - reporter: Reporter, -) -> str: - prompt = ( - f"Subject bug: {subject}\n" - f"Meta bug: {meta_bug}\n\n" - f"Determine whether bug {subject} has a duplicate among the " - f"blockers of bug {meta_bug}. End your final response with the " - f"VERDICT: line." - ) - return await _run_session(options, prompt, reporter) - - -# --------------------------------------------------------------------------- # -# Mode runners -# --------------------------------------------------------------------------- # - - -def _build_options( - system_prompt: str, - bugzilla_server, - *, - allow_local_fs: bool, - model: str | None = None, - max_turns: int | None = None, -) -> ClaudeAgentOptions: - tools = list(BUGZILLA_READ_TOOLS) - if allow_local_fs: - tools = ["Read", "Glob", "Grep", *tools] - return ClaudeAgentOptions( - system_prompt=system_prompt, - mcp_servers={"bugzilla": bugzilla_server}, - permission_mode="bypassPermissions", - allowed_tools=tools, - model=model, - max_turns=max_turns, - setting_sources=[], - ) - - -async def _run_local( - *, - local_dir: Path, - meta_bug: int, - bugzilla_server, - model: str | None, - max_turns: int | None, - verbose: bool, - log: Path | None, -) -> DuplicateResult: - system_prompt = ( - (HERE / "prompts" / "dupdetector_local.md") - .read_text() - .format(meta_bug=meta_bug) - ) - base_options = _build_options( - system_prompt, - bugzilla_server, - allow_local_fs=True, - model=model, - max_turns=max_turns, - ) - - crash_subdirs = sorted(d for d in local_dir.iterdir() if d.is_dir()) - if not crash_subdirs: - print( - f"[duplicate_bugs] no sub-directories found in {local_dir}", file=sys.stderr - ) - return DuplicateResult() - - print( - f"[duplicate_bugs] matching {len(crash_subdirs)} crash(es) against " - f"meta bug {meta_bug}", - file=sys.stderr, - ) - - results: list[tuple[str, str]] = [] - exit_code = 0 - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, subdir in enumerate(crash_subdirs, 1): - print( - f"[duplicate_bugs] {i}/{len(crash_subdirs)}: {subdir.name}", - file=sys.stderr, - ) - reporter.start_item(f"crash: {subdir.name}") - verdict = await match_local_crash(subdir, meta_bug, base_options, reporter) - results.append((subdir.name, verdict)) - if verdict in ("ERROR", "UNKNOWN"): - exit_code = 1 - - return DuplicateResult(exit_code=exit_code, results=results) - - -async def _run_bugs( - *, - bug_ids: list[int], - meta_bug: int, - bugzilla_server, - model: str | None, - max_turns: int | None, - verbose: bool, - log: Path | None, -) -> DuplicateResult: - print( - f"[duplicate_bugs] checking {len(bug_ids)} bug(s) against blockers " - f"of meta bug {meta_bug}", - file=sys.stderr, - ) - - prompt_tmpl = (HERE / "prompts" / "dupdetector_bugs.md").read_text() - - results: list[tuple[str, str]] = [] - exit_code = 0 - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, subject in enumerate(bug_ids, 1): - print( - f"[duplicate_bugs] {i}/{len(bug_ids)}: bug {subject}", file=sys.stderr - ) - reporter.start_item(f"bug {subject}") - - system_prompt = prompt_tmpl.format(subject=subject, meta_bug=meta_bug) - options = _build_options( - system_prompt, - bugzilla_server, - allow_local_fs=False, - model=model, - max_turns=max_turns, - ) - - verdict = await match_filed_bug(subject, meta_bug, options, reporter) - if verdict.isdigit() and int(verdict) == subject: - reporter._emit( - f"[duplicate_bugs] bug {subject}: verdict was itself — " - f"demoting to NEW", - always=True, - ) - verdict = "NEW" - results.append((str(subject), verdict)) - if verdict in ("ERROR", "UNKNOWN"): - exit_code = 1 - - return DuplicateResult(exit_code=exit_code, results=results) - - -async def _run_local_to_local( - *, - local_dir: Path, - results_dir: Path, - model: str | None, - max_turns: int | None, - verbose: bool, - log: Path | None, -) -> DuplicateResult: - crash_subdirs = sorted(d for d in local_dir.iterdir() if d.is_dir()) - if not crash_subdirs: - print( - f"[duplicate_bugs] no sub-directories found in {local_dir}", file=sys.stderr - ) - return DuplicateResult() - - print( - f"[duplicate_bugs] deduplicating {len(crash_subdirs)} crash(es) locally", - file=sys.stderr, - ) - - system_prompt = (HERE / "prompts" / "dupdetector_local_to_local.md").read_text() - options = ClaudeAgentOptions( - system_prompt=system_prompt, - permission_mode="bypassPermissions", - allowed_tools=["Read", "Glob", "Grep"], - model=model, - max_turns=max_turns, - setting_sources=[], - cwd=str(local_dir), - ) - - groups: dict[str, list[str]] = {} - results: list[tuple[str, str]] = [] - exit_code = 0 - - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, subdir in enumerate(crash_subdirs, 1): - print( - f"[duplicate_bugs] {i}/{len(crash_subdirs)}: {subdir.name}", - file=sys.stderr, - ) - - representatives = list(groups.keys()) - if not representatives: - verdict = "NEW" - else: - reporter.start_item(f"crash: {subdir.name}") - verdict = await match_local_to_local( - subdir, representatives, options, reporter - ) - if verdict == subdir.name: - verdict = "NEW" - - if verdict == "NEW": - groups[subdir.name] = [subdir.name] - rep = subdir.name - elif verdict in ("ERROR", "UNKNOWN"): - groups[subdir.name] = [subdir.name] - rep = verdict - exit_code = 1 - else: - groups[verdict].append(subdir.name) - rep = verdict - - results.append((subdir.name, rep)) - - if results_dir is not None: - results_dir.mkdir(parents=True) - for rep_name in groups: - shutil.copytree(local_dir / rep_name, results_dir / rep_name) - - print( - f"[duplicate_bugs] {len(groups)} unique crash(es) copied to {results_dir}", - file=sys.stderr, - ) - - return DuplicateResult(exit_code=exit_code, results=results) - - -# --------------------------------------------------------------------------- # -# Tool class -# --------------------------------------------------------------------------- # - - -class DuplicateBugsTool(GenerativeModelTool): - """Duplicate bug detector using claude-agent-sdk.""" - - @classmethod - def create(cls, **kwargs): - return cls() - - async def run( - self, - *, - mode: str, - base_url: str | None = None, - api_key: str | None = None, - meta_bug: int | None = None, - bug_ids: list[int] | None = None, - local_dir: Path | None = None, - results_dir: Path | None = None, - model: str | None = None, - max_turns: int | None = None, - verbose: bool = False, - log: Path | None = None, - ) -> DuplicateResult: - if mode == "local_to_local": - if local_dir is None: - raise ValueError("local_dir is required for local_to_local mode") - if results_dir is None: - raise ValueError("results_dir is required for local_to_local mode") - return await _run_local_to_local( - local_dir=local_dir, - results_dir=results_dir, - model=model, - max_turns=max_turns, - verbose=verbose, - log=log, - ) - - # Modes that need Bugzilla - if not base_url or not api_key: - raise ValueError("base_url and api_key are required for local/bugs modes") - if meta_bug is None: - raise ValueError("meta_bug is required for local/bugs modes") - - bz = bugsy.Bugsy(api_key=api_key, bugzilla_url=base_url) - bz_ctx = BugzillaContext(client=bz) - bugzilla_server = build_bugzilla_server(bz_ctx) - - if mode == "local": - if local_dir is None: - raise ValueError("local_dir is required for local mode") - return await _run_local( - local_dir=local_dir, - meta_bug=meta_bug, - bugzilla_server=bugzilla_server, - model=model, - max_turns=max_turns, - verbose=verbose, - log=log, - ) - elif mode == "bugs": - if not bug_ids: - raise ValueError("bug_ids is required for bugs mode") - return await _run_bugs( - bug_ids=bug_ids, - meta_bug=meta_bug, - bugzilla_server=bugzilla_server, - model=model, - max_turns=max_turns, - verbose=verbose, - log=log, - ) - else: - raise ValueError( - f"Unknown mode: {mode}. Must be 'local', 'bugs', or 'local_to_local'" - ) diff --git a/bugbug/tools/duplicate_bugs/config.py b/bugbug/tools/duplicate_bugs/config.py deleted file mode 100644 index 1ffe6d8f55..0000000000 --- a/bugbug/tools/duplicate_bugs/config.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import annotations - -import re -from pathlib import Path - -import yaml - -# Read-only Bugzilla surface. -BUGZILLA_READ_TOOLS = [ - "mcp__bugzilla__search_bugs", - "mcp__bugzilla__get_bugs", - "mcp__bugzilla__get_bug_comments", - "mcp__bugzilla__get_bug_attachments", -] - -# The VERDICT: line the agent is told to emit. -_VERDICT_RE = re.compile( - r"^VERDICT:\s*" - r"(?:bug\s*)?" - r"(?:https?://\S+?id=)?" - r"(NEW|\d+)\b", - re.IGNORECASE | re.MULTILINE, -) - -# --local-to-local verdicts name a directory, not a bug ID. -_VERDICT_LINE_RE = re.compile(r"^VERDICT:\s*(.+?)\s*$", re.MULTILINE) - -_CONFIG_KEYS = {"base_url", "model", "max_turns"} - - -def load_config(path: Path) -> dict: - with path.open() as f: - data = yaml.safe_load(f) or {} - return {k: v for k, v in data.items() if k in _CONFIG_KEYS} - - -def parse_verdict(text: str) -> str | None: - matches = _VERDICT_RE.findall(text) - if not matches: - return None - v = matches[-1].upper() - return "NEW" if v == "NEW" else v - - -def parse_dir_verdict(text: str, candidates: set[str]) -> str | None: - matches = _VERDICT_LINE_RE.findall(text) - if not matches: - return None - v = matches[-1] - if v.upper() == "NEW": - return "NEW" - v = v.rstrip("/") - return v if v in candidates else None diff --git a/bugbug/tools/duplicate_bugs/prompts/dupdetector_bugs.md b/bugbug/tools/duplicate_bugs/prompts/dupdetector_bugs.md deleted file mode 100644 index e984191603..0000000000 --- a/bugbug/tools/duplicate_bugs/prompts/dupdetector_bugs.md +++ /dev/null @@ -1,38 +0,0 @@ -You are a duplicate detector. Your sole job: decide whether **bug {subject}** is already covered by some _other_ bug blocking meta bug **{meta_bug}**. - -# Inputs - -- **Subject bug**: {subject} — the one you are evaluating. It may or may not already block {meta_bug}; doesn't matter. -- **Meta bug**: {meta_bug} — the search scope. Only its blockers are valid matches. - -# Approach - -1. **Read the subject.** `get_bugs` with `ids=[{subject}]`, `include_comments=true`, and `include_fields=id,summary,status,resolution,cf_crash_signature,product,component`. Extract the discriminating signal from summary / comment 0 / `cf_crash_signature`: the top stack frame, the assertion text, a fuzzer hash. Pick the fragment that would have to appear in a true duplicate. - -2. **Search the blockers.** `search_bugs` with `blocks={meta_bug}` plus your best term. Request `include_fields=id,summary,status,resolution,cf_crash_signature`. If {subject} itself shows up, that's just the subject blocking the meta — ignore it, you're looking for _different_ bugs. - -3. **Widen if empty.** Drop the term constraint, try your second-best signal, still scoped to `blocks={meta_bug}`. Stop after ~3 attempts. - -4. **Verify.** Pull comment 0 on your best candidate. Same component + same rough area is not enough — a match needs the _same_ crash: same assertion, same top frames, or same `cf_crash_signature`. Different crashing function in the same file → not a match. - -# Edge cases - -- {subject} is `RESOLVED DUPLICATE` → if the dupe target blocks {meta_bug}, report the target; otherwise keep searching normally. -- Two candidates both match → pick the older (lower ID). -- {subject} inaccessible → report `VERDICT: NEW` with a note that you couldn't read it. - -# Output - -Your **final message** must end with exactly one line: - -``` -VERDICT: -``` - -or - -``` -VERDICT: NEW -``` - -One or two sentences of justification above the line. Keep it tight. diff --git a/bugbug/tools/duplicate_bugs/prompts/dupdetector_local.md b/bugbug/tools/duplicate_bugs/prompts/dupdetector_local.md deleted file mode 100644 index 7f5dd736f8..0000000000 --- a/bugbug/tools/duplicate_bugs/prompts/dupdetector_local.md +++ /dev/null @@ -1,49 +0,0 @@ -You are a crash-to-bug matcher. Your sole job: decide whether the crash in your current working directory already has a bug filed on Bugzilla that blocks meta bug **{meta_bug}**. - -# Your working directory - -Your cwd is a single crash sub-directory. It typically contains things like an ASAN log, a minidump, a testcase, a `crash_info.json`, or similar. Start by reading whatever is there — there is no fixed schema. - -From those files, extract the **distinctive signals** you'll search for: - -- The crash signature / top-of-stack function name -- The assertion or ASAN error message (the short, greppable part — not the full trace) -- Any hash or ID the fuzzer embedded -- Source file + line of the crashing frame - -Pick the one or two fragments most likely to appear verbatim in a bug summary or comment 0. Prefer specific over generic: a mangled symbol beats "heap-buffer-overflow". - -# How to search - -You only have **read-only** Bugzilla tools. No writes, no Firefox tools. - -1. **Get the candidate set once.** Use `search_bugs` with `blocks={meta_bug}` plus your best discriminating term (e.g. `short_desc` / `cf_crash_signature`). Request `include_fields=id,summary,status,resolution,cf_crash_signature`. Don't omit the search term — pulling every blocker of a busy meta bug wastes turns. - -2. **If that's empty, widen**: drop the field constraint and try a quicksearch/content match, or try your second-best term, still scoped to `blocks={meta_bug}`. - -3. **Verify the best candidate.** Summaries lie. Use `get_bug_comments` (or `get_bugs` with `include_comments=true`) on your top one or two hits and check comment 0 for the same stack / assertion / testcase shape you see locally. - -4. **Stop after ~3 search attempts.** Diminishing returns. If you haven't found it by then, it's probably not filed. - -# Deciding - -- **Match**: comment 0 or the crash signature clearly shows the _same_ crash — same assertion or same top frames. A duplicate that was resolved DUPLICATE still counts; report the dupe target if obvious, otherwise the dupe itself. -- **No match**: nothing in the meta bug's dependency tree lines up. - -Same component + same rough area but a _different_ crashing function → **not** a match. - -# Output - -Your **final message** must end with exactly one line in this form (no markdown, no trailing punctuation): - -``` -VERDICT: -``` - -or - -``` -VERDICT: NEW -``` - -Before that line, give one or two sentences of justification so a human skimming the transcript can see why. Keep it short — the orchestrator only parses the `VERDICT:` line. diff --git a/bugbug/tools/duplicate_bugs/prompts/dupdetector_local_to_local.md b/bugbug/tools/duplicate_bugs/prompts/dupdetector_local_to_local.md deleted file mode 100644 index e9b56d7a7b..0000000000 --- a/bugbug/tools/duplicate_bugs/prompts/dupdetector_local_to_local.md +++ /dev/null @@ -1,37 +0,0 @@ -You are a crash deduplicator. Your sole job: decide whether the **subject** crash directory represents the same crash as any of the **candidate** directories. - -# Setup - -Your cwd is the parent directory containing all crash sub-directories. You have Read/Glob/Grep — no Bugzilla, no network, no writes. - -The user message names one **subject** directory and a list of **candidate** directories. All paths are relative to your cwd. - -# Approach - -1. **Read the subject.** Look at whatever is in the subject directory — ASAN log, minidump, `crash_info.json`, testcase. There is no fixed schema. Extract the discriminating signal: top-of-stack function, assertion text, ASAN error line, source file + crashing line. - -2. **Scan the candidates.** For each candidate directory, read the corresponding artifact and compare. You don't have to read every file in every candidate — once you find the stack/assertion in one file, check the same filename in the others. - -3. **Decide.** A match needs the _same_ crash: same assertion string, or same top stack frames, or same fuzzer-assigned signature hash. Same component + same rough area but a _different_ crashing function → **not** a match. Slightly different line numbers on the same function are fine (builds drift). - -# Short-circuit - -If you find a clear match, stop — don't keep reading the remaining candidates. Report the first one that matches. - -If two candidates both match, pick the one listed first. - -# Output - -Your **final message** must end with exactly one line (no markdown, no trailing punctuation): - -``` -VERDICT: -``` - -where `` is _exactly_ one of the candidate names you were given — or: - -``` -VERDICT: NEW -``` - -if none of them match. One or two sentences of justification above the line. Keep it tight. diff --git a/http_service/Dockerfile b/http_service/Dockerfile index 13c8c12d17..66e89e8b3f 100644 --- a/http_service/Dockerfile +++ b/http_service/Dockerfile @@ -6,11 +6,7 @@ FROM mozilla/bugbug-base:$BUGBUG_VERSION RUN --mount=type=bind,source=pyproject.toml,target=/tmp/workspace/pyproject.toml \ --mount=type=bind,source=uv.lock,target=/tmp/workspace/uv.lock \ --mount=type=bind,source=VERSION,target=/tmp/workspace/VERSION \ - --mount=type=bind,source=http_service/pyproject.toml,target=/tmp/workspace/http_service/pyproject.toml \ - --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/workspace/services/hackbot-api/pyproject.toml \ - --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/workspace/agents/bug-fix/pyproject.toml \ - --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/workspace/libs/hackbot-runtime/pyproject.toml \ - cd /tmp/workspace && uv sync --locked --no-dev --package bugbug-http-service --no-install-project + cd /tmp/workspace && uv sync --frozen --no-dev --package bugbug-http-service --no-install-workspace # Setup http service as package RUN --mount=type=bind,target=/tmp/workspace,rw \ diff --git a/http_service/Dockerfile.bg_worker b/http_service/Dockerfile.bg_worker index 630508848e..8ec753aa7b 100644 --- a/http_service/Dockerfile.bg_worker +++ b/http_service/Dockerfile.bg_worker @@ -6,11 +6,7 @@ FROM mozilla/bugbug-commit-retrieval:$BUGBUG_VERSION RUN --mount=type=bind,source=pyproject.toml,target=/tmp/workspace/pyproject.toml \ --mount=type=bind,source=uv.lock,target=/tmp/workspace/uv.lock \ --mount=type=bind,source=VERSION,target=/tmp/workspace/VERSION \ - --mount=type=bind,source=http_service/pyproject.toml,target=/tmp/workspace/http_service/pyproject.toml \ - --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/workspace/services/hackbot-api/pyproject.toml \ - --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/workspace/agents/bug-fix/pyproject.toml \ - --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/workspace/libs/hackbot-runtime/pyproject.toml \ - cd /tmp/workspace && uv sync --locked --no-dev --package bugbug-http-service --no-install-project + cd /tmp/workspace && uv sync --frozen --no-dev --package bugbug-http-service --no-install-workspace # Setup http service as package RUN --mount=type=bind,target=/tmp/workspace,rw \ diff --git a/infra/dockerfile.base b/infra/dockerfile.base index 461156dbd4..ba182e4ba2 100644 --- a/infra/dockerfile.base +++ b/infra/dockerfile.base @@ -10,13 +10,9 @@ ENV UV_PROJECT_ENVIRONMENT="/opt/venv" RUN --mount=type=bind,source=pyproject.toml,target=/tmp/bugbug/pyproject.toml \ --mount=type=bind,source=uv.lock,target=/tmp/bugbug/uv.lock \ --mount=type=bind,source=VERSION,target=/tmp/bugbug/VERSION \ - --mount=type=bind,source=http_service/pyproject.toml,target=/tmp/bugbug/http_service/pyproject.toml \ - --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/bugbug/services/hackbot-api/pyproject.toml \ - --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/bugbug/agents/bug-fix/pyproject.toml \ - --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/bugbug/libs/hackbot-runtime/pyproject.toml \ apt-get update && \ apt-get install -y --no-install-recommends gcc g++ libgomp1 libffi-dev libjemalloc2 zstd patch git && \ - cd /tmp/bugbug && uv sync --locked --package bugbug --no-dev --no-install-project && \ + cd /tmp/bugbug && uv sync --frozen --package bugbug --no-dev --no-install-workspace && \ apt-get purge -y gcc g++ libffi-dev patch git && \ apt-get autoremove -y && \ rm -rf /var/lib/apt/lists/* diff --git a/infra/dockerfile.spawn_pipeline b/infra/dockerfile.spawn_pipeline index b91998fcfa..97c616e641 100644 --- a/infra/dockerfile.spawn_pipeline +++ b/infra/dockerfile.spawn_pipeline @@ -10,11 +10,7 @@ ENV UV_PROJECT_ENVIRONMENT="/opt/venv" RUN --mount=type=bind,source=pyproject.toml,target=/tmp/bugbug/pyproject.toml \ --mount=type=bind,source=uv.lock,target=/tmp/bugbug/uv.lock \ --mount=type=bind,source=VERSION,target=/tmp/bugbug/VERSION \ - --mount=type=bind,source=http_service/pyproject.toml,target=/tmp/bugbug/http_service/pyproject.toml \ - --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/bugbug/services/hackbot-api/pyproject.toml \ - --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/bugbug/agents/bug-fix/pyproject.toml \ - --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/bugbug/libs/hackbot-runtime/pyproject.toml \ - cd /tmp/bugbug && uv sync --locked --package bugbug --no-dev --only-group spawn-pipeline --no-install-project + cd /tmp/bugbug && uv sync --frozen --package bugbug --no-dev --only-group spawn-pipeline --no-install-workspace ADD infra/spawn_pipeline.py /code/ diff --git a/libs/agent-tools/agent_tools/__init__.py b/libs/agent-tools/agent_tools/__init__.py new file mode 100644 index 0000000000..2b21ddbf6a --- /dev/null +++ b/libs/agent-tools/agent_tools/__init__.py @@ -0,0 +1,14 @@ +"""Reusable, framework-neutral agent tools. + +Each tool is an async handler decorated with :func:`agent_tools.registry.tool`; +the decorator infers its name, namespace, description and argument schema. A +per-framework adapter (``agent_tools.claude_sdk`` today) turns a module's tools +into a runnable server. Import the submodule you need directly (e.g. +``from agent_tools import bugzilla``) — this ``__init__`` imports no submodules, +so pulling one tool never drags in another's optional dependencies, and the +base package never imports any agent framework. +""" + +from agent_tools.registry import ToolDefinition, ToolError, tool, tools_in + +__all__ = ["ToolDefinition", "ToolError", "tool", "tools_in"] diff --git a/libs/agent-tools/agent_tools/bugzilla.py b/libs/agent-tools/agent_tools/bugzilla.py new file mode 100644 index 0000000000..03f2bcf8a4 --- /dev/null +++ b/libs/agent-tools/agent_tools/bugzilla.py @@ -0,0 +1,251 @@ +"""Read-only Bugzilla tools backed by bugsy. + +Framework-neutral: each tool is a ``@tool``-decorated handler whose first +parameter is a :class:`BugzillaContext`. Handlers return plain data and surface +proxy-level restrictions (code 101: endpoint not exposed, code 102: access +denied) as a structured :class:`~agent_tools.registry.ToolError`. +""" + +from __future__ import annotations + +import base64 +from dataclasses import dataclass +from typing import Annotated, Any + +import bugsy +from pydantic import Field + +from agent_tools.registry import ToolError, tool, tools_in + + +@dataclass +class BugzillaContext: + """Holds the live bugsy client. + + Every tool receives the same instance, so they share auth and one TCP + connection pool. + """ + + client: bugsy.Bugsy + + +def _bugsy_error(e: bugsy.BugsyException) -> ToolError: + """Turn a bugsy exception into a structured ToolError. + + The payload is friendly and machine-parseable so the agent can decide what + to do (skip the bug, try a different endpoint, ...) rather than just seeing + a stack trace. + """ + code = getattr(e, "code", None) + msg = getattr(e, "msg", str(e)) + if code == 101: + kind = "endpoint_not_exposed" + hint = "This Bugzilla proxy does not expose this endpoint." + elif code == 102: + kind = "access_denied" + hint = "Your API key cannot access this bug. Skip it." + else: + kind = "bugzilla_error" + hint = None + payload: dict[str, Any] = {"error": kind, "code": code, "message": msg} + if hint: + payload["hint"] = hint + return ToolError(msg, payload=payload) + + +@tool +async def search_bugs( + ctx: BugzillaContext, + params: Annotated[ + dict[str, Any], + Field( + description=( + "Bugzilla REST /bug query parameters. Values may be strings, " + "ints, or comma-separated lists. Example: " + '{"blocks": 12345, "keywords": "sec-low", ' + '"include_fields": "id,summary,status,whiteboard,keywords"}' + ) + ), + ], +) -> dict: + """Search Bugzilla using raw REST query parameters. + + Returns matching bugs in one bulk request. Parameters are ANDed together + (intersect). IMPORTANT: this proxy drops 'whiteboard' and 'keywords' from + _all / _default field sets — list them explicitly in include_fields if you + need them. Common params: id, keywords, blocks, depends_on, product, + component, status, resolution, priority, severity, assigned_to, whiteboard, + include_fields, limit. + """ + try: + result = ctx.client.request("bug", params=params) + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + bugs = result.get("bugs", []) + return {"count": len(bugs), "bugs": bugs} + + +@tool +async def get_bugs( + ctx: BugzillaContext, + ids: Annotated[list[int], Field(description="Bug IDs to fetch.")], + include_fields: Annotated[ + str | None, + Field( + description=( + "Comma-separated field list, or '_default'/'_all'. Defaults to " + "a sensible triage set." + ) + ), + ] = None, + include_comments: Annotated[ + bool, + Field( + description=( + "If true, also bulk-fetch comments (one extra request total, " + "not one per bug)." + ) + ), + ] = False, +) -> dict: + """Fetch one or more bugs by ID in a single bulk request. + + Inaccessible bugs are silently dropped by the proxy — this tool diffs + requested vs returned and reports them under 'inaccessible'. Remember: + request 'whiteboard' and 'keywords' explicitly in include_fields if you need + them. + """ + if not ids: + return {"count": 0, "bugs": [], "inaccessible": []} + include = include_fields or ( + "id,summary,status,resolution,product,component,priority," + "severity,keywords,whiteboard,assigned_to,creator," + "creation_time,last_change_time,blocks,depends_on,see_also," + "cf_crash_signature,url,version,op_sys,platform" + ) + id_csv = ",".join(str(i) for i in ids) + try: + result = ctx.client.request( + "bug", params={"id": id_csv, "include_fields": include} + ) + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + bugs = result.get("bugs", []) + returned = {b["id"] for b in bugs} + inaccessible = [i for i in ids if i not in returned] + + payload = {"count": len(bugs), "bugs": bugs, "inaccessible": inaccessible} + + if include_comments and bugs: + # Bugzilla lets us fetch comments for many bugs in one call by hitting + # /bug/{first}/comment?ids=rest. One extra round trip total. + first, *rest = [b["id"] for b in bugs] + cparams = {"ids": ",".join(str(i) for i in rest)} if rest else {} + try: + cres = ctx.client.request(f"bug/{first}/comment", params=cparams) + comments_by_bug = { + int(bid): data["comments"] for bid, data in cres.get("bugs", {}).items() + } + for b in bugs: + b["comments"] = comments_by_bug.get(b["id"], []) + except bugsy.BugsyException as e: + payload["comments_error"] = { + "code": getattr(e, "code", None), + "message": getattr(e, "msg", str(e)), + } + + return payload + + +@tool +async def get_bug_comments( + ctx: BugzillaContext, + bug_id: Annotated[int, Field(description="Bug ID.")], +) -> dict: + """Fetch all comments for a single bug.""" + try: + result = ctx.client.request(f"bug/{bug_id}/comment") + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + comments = result.get("bugs", {}).get(str(bug_id), {}).get("comments", []) + return {"bug_id": bug_id, "count": len(comments), "comments": comments} + + +@tool +async def get_bug_attachments( + ctx: BugzillaContext, + bug_id: Annotated[int, Field(description="Bug ID.")], + include_data: Annotated[ + bool, + Field( + description=( + "If true, include base64-encoded attachment content. Default " + "false. Use sparingly — attachments can be large." + ) + ), + ] = False, +) -> dict: + """Fetch attachments for a bug. + + By default returns metadata only (cheap, safe for large binaries). Set + include_data=true to also download the content — Bugzilla returns it + base64-encoded in the 'data' field of each attachment. + """ + params = {} if include_data else {"exclude_fields": "data"} + try: + result = ctx.client.request(f"bug/{bug_id}/attachment", params=params) + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + atts = result.get("bugs", {}).get(str(bug_id), []) + return {"bug_id": bug_id, "count": len(atts), "attachments": atts} + + +@tool +async def download_attachment( + ctx: BugzillaContext, + attachment_id: Annotated[ + int, Field(description="Attachment ID (discover via get_bug_attachments).") + ], + dest_path: Annotated[ + str, + Field( + description=( + "Local filesystem path to write the decoded attachment to. " + "Parent directory must already exist. Overwrites if present." + ) + ), + ], +) -> dict: + """Fetch a Bugzilla attachment by ID and write its decoded content to a file. + + The inverse of add_attachment: it handles the base64 decode server-side so + the agent never has to round-trip the blob through its own context. Use + get_bug_attachments first to discover attachment IDs. Returns the written + path, size, and content_type. + """ + try: + result = ctx.client.request(f"bug/attachment/{attachment_id}") + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + + att = result.get("attachments", {}).get(str(attachment_id)) + if att is None: + raise ToolError( + f"attachment {attachment_id} not found", + payload={"error": "attachment_not_found", "attachment_id": attachment_id}, + ) + + raw = base64.b64decode(att["data"]) + with open(dest_path, "wb") as fp: + fp.write(raw) + + return { + "attachment_id": attachment_id, + "dest_path": dest_path, + "size_bytes": len(raw), + "file_name": att.get("file_name"), + "content_type": att.get("content_type"), + } + + +TOOLS = tools_in(__name__) diff --git a/libs/agent-tools/agent_tools/claude_sdk.py b/libs/agent-tools/agent_tools/claude_sdk.py new file mode 100644 index 0000000000..b4fadcede0 --- /dev/null +++ b/libs/agent-tools/agent_tools/claude_sdk.py @@ -0,0 +1,63 @@ +"""claude-agent-sdk adapter for framework-neutral tool definitions. + +The ONLY module in agent-tools that imports claude-agent-sdk. Wraps a list of +:class:`~agent_tools.registry.ToolDefinition` into an in-process MCP server. +Requires the ``claude-sdk`` optional extra. +""" + +from __future__ import annotations + +import json + +from claude_agent_sdk import create_sdk_mcp_server +from claude_agent_sdk import tool as sdk_tool + +from agent_tools.registry import ToolDefinition, ToolError, tool_name_for + + +def _text(content: str) -> dict: + """Wrap plain text in the MCP tool-result content shape the SDK expects.""" + return {"content": [{"type": "text", "text": content}]} + + +def _jtext(obj) -> dict: + """Serialise an object to pretty JSON inside MCP text content.""" + return _text(json.dumps(obj, indent=2, default=str)) + + +def _make_tool(defn: ToolDefinition, ctx, prefix_namespace: bool): + mcp_name = tool_name_for(defn.dotted) if prefix_namespace else defn.name + + @sdk_tool(mcp_name, defn.description, defn.input_schema) + async def run(args): + try: + result = await defn.handler(ctx, **args) + except ToolError as e: + payload = e.payload if e.payload is not None else {"error": str(e)} + return {**_jtext(payload), "is_error": True} + # Handlers return plain data; str is shown verbatim, everything else as JSON. + return _text(result) if isinstance(result, str) else _jtext(result) + + return run + + +def build_sdk_server( + name: str, + ctx, + tools: list[ToolDefinition], + *, + version: str = "0.1.0", + prefix_namespace: bool = False, +): + """Build a claude-agent-sdk ``McpSdkServerConfig`` from tool definitions. + + ``ctx`` is passed as each handler's first argument. ``prefix_namespace`` + names the MCP tools ``_`` (used by the shared ``actions`` + server, where one server hosts multiple domains); otherwise the tool name is + the function name (per-domain servers like ``bugzilla``/``firefox``). + """ + return create_sdk_mcp_server( + name=name, + version=version, + tools=[_make_tool(d, ctx, prefix_namespace) for d in tools], + ) diff --git a/libs/agent-tools/agent_tools/firefox/__init__.py b/libs/agent-tools/agent_tools/firefox/__init__.py new file mode 100644 index 0000000000..e371a6be2f --- /dev/null +++ b/libs/agent-tools/agent_tools/firefox/__init__.py @@ -0,0 +1,202 @@ +"""Firefox build + testcase-evaluation tools. + +Framework-neutral ``@tool`` handlers over the implementations in ``.tools``; +each takes a :class:`FirefoxContext` (paths derived from the source repo) as its +first parameter and returns plain data. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Annotated + +from pydantic import Field + +from agent_tools.registry import tool, tools_in + +from .tools import bootstrap_firefox as _bootstrap_firefox +from .tools import build_firefox as _build_firefox +from .tools import evaluate_testcase as _evaluate_testcase +from .tools import js_shell_evaluator as _js_shell_evaluator + + +@dataclass +class FirefoxContext: + """Firefox-related paths, derived from the source repo at startup. + + Defaults follow: mozconfig at the source root, objdir-ff-asan/ under it. The + agent can still override the binary per-call if it wants to test a different + build. + """ + + source_dir: Path + mozconfig: Path + objdir: Path + binary: Path + js_binary: Path + + @classmethod + def from_source_repo( + cls, source_repo: Path, objdir: str = "objdir-ff-asan" + ) -> "FirefoxContext": + src = source_repo.resolve() + objdir_path = src / objdir + return cls( + source_dir=src, + mozconfig=src / ".mozconfig", + objdir=objdir_path, + binary=objdir_path / "dist" / "bin" / "firefox", + js_binary=objdir_path / "dist" / "bin" / "js", + ) + + +@tool +async def evaluate_testcase( + ctx: FirefoxContext, + content: Annotated[ + str, Field(description="Testcase file content (HTML, JS, SVG, etc.)") + ], + filename: Annotated[ + str, + Field( + description=( + "Name for the testcase entry point, e.g. 'test.html'. Extension " + "matters: grizzly serves it with the matching MIME type." + ) + ), + ], + firefox_binary: Annotated[ + str | None, + Field( + description="Path to Firefox binary. Optional — defaults to the configured build's binary." + ), + ] = None, + timeout: Annotated[ + int, Field(description="Seconds to wait for a crash (default: 30)") + ] = 30, + prefs: Annotated[ + dict[str, str | int | bool] | None, + Field( + description=( + "Firefox about:config prefs to set before launch, e.g. " + '{"dom.webgpu.enabled": true}. Use this to unlock gated features ' + "your testcase needs." + ) + ), + ] = None, +) -> dict: + """Run a testcase in Firefox under xvfb and capture crash output via grizzly. + + The build's sanitizer configuration (ASAN, TSAN, plain debug, etc.) is + whatever the configured mozconfig produces. Returns JSON: crashed (bool) — + whether Firefox crashed; crashed_parent (bool) — parent process vs content + process crash; logs (dict) — stderr/stdout and, if crashed, crashdata + (crash/sanitizer report); files (dict) — the testcase bundle that triggered + the crash; message (str) — human-readable summary. When crashed=false, + logs.stderr/stdout often reveal why the trigger missed (JS exception, wrong + pref, feature gated off). + """ + binary = Path(firefox_binary or ctx.binary) + return await _evaluate_testcase( + content=content, + filename=filename, + firefox_binary=binary, + timeout=timeout, + prefs=prefs or {}, + ) + + +@tool +async def build_firefox( + ctx: FirefoxContext, + firefox_dir: Annotated[ + str | None, + Field( + description="Firefox source directory. Optional — defaults to the configured source dir." + ), + ] = None, + mozconfig_path: Annotated[ + str | None, + Field( + description="MOZCONFIG to use. Optional — defaults to the configured mozconfig." + ), + ] = None, +) -> dict: + """Build Firefox using the configured mozconfig. + + Slow (tens of minutes on a cold build, faster incremental). Returns JSON: + success (bool), build_dir (str), message (str), stdout/stderr. Only call this + if you've changed source or the binary is missing — check if the binary + exists first. + """ + firefox_dir_p = Path(firefox_dir) if firefox_dir else ctx.source_dir + mozconfig_p = Path(mozconfig_path) if mozconfig_path else ctx.mozconfig + return await _build_firefox(firefox_dir_p, mozconfig_p, ctx.objdir) + + +@tool +async def evaluate_js_shell( + ctx: FirefoxContext, + content: Annotated[str, Field(description="JavaScript testcase source")], + js_binary: Annotated[ + str | None, + Field( + description="Path to the SpiderMonkey js binary. Optional — defaults to the configured build's js shell." + ), + ] = None, + timeout: Annotated[ + int, + Field(description="Seconds to wait before killing the shell (default: 30)"), + ] = 30, + flags: Annotated[ + list[str] | None, + Field( + description=( + 'Extra shell flags, e.g. ["--no-threads", "--ion-eager"]. ' + "--fuzzing-safe is always prepended." + ) + ), + ] = None, +) -> dict: + """Run a JS testcase in the SpiderMonkey shell and capture crash output. + + The shell's sanitizer configuration is whatever the configured mozconfig + produces. Much faster than full-browser evaluate_testcase — use this for + engine-level bugs (JIT, GC, TypedArrays, WASM) that don't need a DOM. Returns + JSON: crashed (bool) — whether the shell crashed (signal or sanitizer); + message (str) — human-readable summary, includes signal name if killed; logs + (dict) — stderr/stdout (tail-truncated to 1 MB) and, if crashed, crashdata + (crash/sanitizer report); files (dict) — the .js testcase that triggered the + crash. A nonzero exit without a signal is a JS exception, NOT a crash — check + logs.stderr for the syntax/runtime error. + """ + binary = Path(js_binary or ctx.js_binary) + return await _js_shell_evaluator( + content=content, js_binary=binary, timeout=timeout, flags=flags + ) + + +@tool +async def bootstrap_firefox( + ctx: FirefoxContext, + firefox_dir: Annotated[ + str | None, + Field( + description="Firefox source directory. Optional — defaults to the configured source dir." + ), + ] = None, +) -> dict: + """Run ``./mach bootstrap`` to install the Firefox build toolchain. + + Installs rust, clang, cbindgen under the running user's ~/.mozbuild/. + Required before a full (non-artifact) build. Slow — ~10-15 min on a fresh + image, fast on re-runs. Returns JSON: success, message, stdout, stderr. Only + call this if you intend to do a full build; artifact builds don't need + bootstrap. + """ + firefox_dir_p = Path(firefox_dir) if firefox_dir else ctx.source_dir + return await _bootstrap_firefox(firefox_dir_p) + + +TOOLS = tools_in(__name__) diff --git a/bugbug/tools/bug_fix/firefox_tools/__init__.py b/libs/agent-tools/agent_tools/firefox/tools/__init__.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/__init__.py rename to libs/agent-tools/agent_tools/firefox/tools/__init__.py diff --git a/bugbug/tools/bug_fix/firefox_tools/bootstrap_firefox.py b/libs/agent-tools/agent_tools/firefox/tools/bootstrap_firefox.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/bootstrap_firefox.py rename to libs/agent-tools/agent_tools/firefox/tools/bootstrap_firefox.py diff --git a/bugbug/tools/bug_fix/firefox_tools/build_firefox.py b/libs/agent-tools/agent_tools/firefox/tools/build_firefox.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/build_firefox.py rename to libs/agent-tools/agent_tools/firefox/tools/build_firefox.py diff --git a/bugbug/tools/bug_fix/firefox_tools/evaluate_testcase.py b/libs/agent-tools/agent_tools/firefox/tools/evaluate_testcase.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/evaluate_testcase.py rename to libs/agent-tools/agent_tools/firefox/tools/evaluate_testcase.py diff --git a/bugbug/tools/bug_fix/firefox_tools/js_shell_evaluator.py b/libs/agent-tools/agent_tools/firefox/tools/js_shell_evaluator.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/js_shell_evaluator.py rename to libs/agent-tools/agent_tools/firefox/tools/js_shell_evaluator.py diff --git a/libs/agent-tools/agent_tools/registry.py b/libs/agent-tools/agent_tools/registry.py new file mode 100644 index 0000000000..2f449b7a61 --- /dev/null +++ b/libs/agent-tools/agent_tools/registry.py @@ -0,0 +1,107 @@ +"""Framework-neutral declaration of agent tools. + +A ``@tool``-decorated handler is the single source of truth for one agent tool: +its name (the function name), namespace (the defining module's basename), +description (the docstring) and argument schema (the typed signature, minus the +first ``ctx`` parameter). Per-framework adapters (claude-agent-sdk today, +LangChain later) consume :class:`ToolDefinition` without the handlers importing +any framework. This module imports no agent framework — only pydantic. +""" + +from __future__ import annotations + +import functools +import inspect +from collections import defaultdict +from collections.abc import Awaitable, Callable +from dataclasses import dataclass + +from pydantic import create_model + + +def tool_name_for(dotted: str) -> str: + """Map a dotted tool id to its MCP tool name: ``bugzilla.update_bug`` -> ``bugzilla_update_bug``.""" + return dotted.replace(".", "_") + + +class ToolError(Exception): + """An agent tool failed in an expected way. + + Raised by handlers; a per-framework adapter renders it as that framework's + tool-error signal. The optional ``payload`` carries a structured error body + (preferred over a bare message when the agent benefits from machine-readable + detail). The tool layer imports no framework error type. + """ + + def __init__(self, message: str, *, payload: dict | None = None) -> None: + super().__init__(message) + self.payload = payload + + +@dataclass +class ToolDefinition: + """Declarative description of one agent tool, derived from a handler. + + ``handler`` is an async function whose **first positional parameter** is the + tool context (e.g. a ``BugzillaContext`` or an actions recorder); the + remaining parameters carry ``Annotated[T, Field(...)]`` annotations that + define the agent-facing schema. + """ + + name: str + namespace: str + description: str + handler: Callable[..., Awaitable] + + @property + def dotted(self) -> str: + return f"{self.namespace}.{self.name}" + + @functools.cached_property + def args_model(self): + """Pydantic model of the agent-facing args (excludes the ``ctx`` param). + + Derived once from the handler signature so every adapter shares one + schema — claude-agent-sdk consumes ``input_schema``; a LangChain adapter + can use this model directly as ``args_schema``. + """ + sig = inspect.signature(self.handler, eval_str=True) + fields = { + name: ( + param.annotation, + ... if param.default is inspect.Parameter.empty else param.default, + ) + for name, param in list(sig.parameters.items())[1:] # skip `ctx` + } + return create_model(f"{self.namespace}_{self.name}_args", **fields) + + @functools.cached_property + def input_schema(self) -> dict: + return self.args_model.model_json_schema() + + +_REGISTRY: dict[str, list[ToolDefinition]] = defaultdict(list) + + +def tool(fn: Callable[..., Awaitable]) -> Callable[..., Awaitable]: + """Register ``fn`` as a tool, inferring name/namespace/description. + + name = function name; namespace = defining module's basename; description = + function docstring. The function is returned unchanged (still callable); + collect a module's tools with :func:`tools_in`. + """ + namespace = fn.__module__.rsplit(".", 1)[-1] + _REGISTRY[fn.__module__].append( + ToolDefinition( + name=fn.__name__, + namespace=namespace, + description=inspect.getdoc(fn) or "", + handler=fn, + ) + ) + return fn + + +def tools_in(module_name: str) -> list[ToolDefinition]: + """Return the tools registered by ``@tool`` in the given module (``__name__``).""" + return list(_REGISTRY[module_name]) diff --git a/libs/agent-tools/pyproject.toml b/libs/agent-tools/pyproject.toml new file mode 100644 index 0000000000..307e733045 --- /dev/null +++ b/libs/agent-tools/pyproject.toml @@ -0,0 +1,24 @@ +[project] +name = "agent-tools" +version = "0.1.0" +description = "Reusable, framework-neutral agent tools (declaration + per-framework adapters)" +requires-python = ">=3.12" +dependencies = [ + "pydantic>=2.6.0", +] + +[project.optional-dependencies] +bugzilla = ["bugsy"] +firefox = ["grizzly-framework", "prefpicker"] +claude-sdk = ["claude-agent-sdk>=0.1.30"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["agent_tools"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/libs/agent-tools/tests/test_bugzilla.py b/libs/agent-tools/tests/test_bugzilla.py new file mode 100644 index 0000000000..587202c2d2 --- /dev/null +++ b/libs/agent-tools/tests/test_bugzilla.py @@ -0,0 +1,54 @@ +"""Tests for the Bugzilla read tools.""" + +from unittest.mock import MagicMock + +import pytest +from agent_tools import bugzilla +from agent_tools.bugzilla import BugzillaContext +from agent_tools.claude_sdk import build_sdk_server +from agent_tools.registry import ToolError +from mcp.types import ListToolsRequest + + +async def _list(server): + return ( + await server.request_handlers[ListToolsRequest]( + ListToolsRequest(method="tools/list") + ) + ).root.tools + + +async def test_exposes_read_only_tools(): + config = build_sdk_server( + "bugzilla", BugzillaContext(client=MagicMock()), bugzilla.TOOLS + ) + assert config["type"] == "sdk" + tools = await _list(config["instance"]) + assert {t.name for t in tools} == { + "search_bugs", + "get_bugs", + "get_bug_comments", + "get_bug_attachments", + "download_attachment", + } + + +async def test_search_bugs_returns_data(): + client = MagicMock() + client.request.return_value = {"bugs": [{"id": 1}, {"id": 2}]} + result = await bugzilla.search_bugs( + BugzillaContext(client=client), params={"id": "1,2"} + ) + assert result == {"count": 2, "bugs": [{"id": 1}, {"id": 2}]} + + +async def test_search_bugs_raises_tool_error_on_bugsy_failure(): + import bugsy + + client = MagicMock() + err = bugsy.BugsyException("nope") + err.code = 102 + client.request.side_effect = err + with pytest.raises(ToolError) as ei: + await bugzilla.search_bugs(BugzillaContext(client=client), params={}) + assert ei.value.payload["error"] == "access_denied" diff --git a/libs/agent-tools/tests/test_firefox.py b/libs/agent-tools/tests/test_firefox.py new file mode 100644 index 0000000000..9de7a3b208 --- /dev/null +++ b/libs/agent-tools/tests/test_firefox.py @@ -0,0 +1,26 @@ +"""Tests for the Firefox tools.""" + +from agent_tools import firefox +from agent_tools.claude_sdk import build_sdk_server +from mcp.types import ListToolsRequest + + +async def _list(server): + return ( + await server.request_handlers[ListToolsRequest]( + ListToolsRequest(method="tools/list") + ) + ).root.tools + + +async def test_exposes_firefox_tools(tmp_path): + ctx = firefox.FirefoxContext.from_source_repo(tmp_path) + config = build_sdk_server("firefox", ctx, firefox.TOOLS) + assert config["type"] == "sdk" + tools = await _list(config["instance"]) + assert {t.name for t in tools} == { + "evaluate_testcase", + "build_firefox", + "evaluate_js_shell", + "bootstrap_firefox", + } diff --git a/libs/agent-tools/tests/test_registry.py b/libs/agent-tools/tests/test_registry.py new file mode 100644 index 0000000000..8ad4659038 --- /dev/null +++ b/libs/agent-tools/tests/test_registry.py @@ -0,0 +1,64 @@ +"""Tests for the @tool decorator and signature-derived schema.""" + +from dataclasses import dataclass +from typing import Annotated + +from agent_tools.registry import ToolError, tool, tool_name_for, tools_in +from pydantic import Field + + +@dataclass +class _Ctx: + value: int + + +@tool +async def sample_tool( + ctx: _Ctx, + bug_id: Annotated[int, Field(description="The bug id.")], + note: Annotated[str, Field(description="A note.")] = "x", +) -> dict: + """Sample tool docstring.""" + return {"bug_id": bug_id, "note": note} + + +_DEFN = next(d for d in tools_in(__name__) if d.name == "sample_tool") + + +def test_decorator_infers_identity(): + assert _DEFN.name == "sample_tool" + assert _DEFN.namespace == "test_registry" # module basename + assert _DEFN.description == "Sample tool docstring." + assert _DEFN.dotted == "test_registry.sample_tool" + + +def test_schema_excludes_ctx_and_keeps_descriptions(): + schema = _DEFN.input_schema + props = schema["properties"] + assert "ctx" not in props + assert set(props) == {"bug_id", "note"} + assert props["bug_id"]["description"] == "The bug id." + + +def test_schema_marks_required_vs_optional(): + schema = _DEFN.input_schema + assert "bug_id" in schema.get("required", []) + assert "note" not in schema.get("required", []) # has a default + + +def test_schema_is_cached(): + assert _DEFN.input_schema is _DEFN.input_schema + + +async def test_handler_remains_callable(): + out = await sample_tool(_Ctx(value=1), bug_id=7) + assert out == {"bug_id": 7, "note": "x"} + + +def test_tool_name_for(): + assert tool_name_for("bugzilla.update_bug") == "bugzilla_update_bug" + + +def test_tool_error_carries_payload(): + err = ToolError("bad", payload={"error": "x"}) + assert err.payload == {"error": "x"} diff --git a/libs/hackbot-runtime/hackbot_runtime/__init__.py b/libs/hackbot-runtime/hackbot_runtime/__init__.py index 5bef861fa6..277d2084f0 100644 --- a/libs/hackbot-runtime/hackbot_runtime/__init__.py +++ b/libs/hackbot-runtime/hackbot_runtime/__init__.py @@ -1,24 +1,20 @@ -from hackbot_runtime.actions import ( - ALL_ACTIONS, - ActionDefinition, - ActionInputError, - ActionsRecorder, - get_actions, -) -from hackbot_runtime.context import Context -from hackbot_runtime.result import AgentResult +from hackbot_runtime.actions.recorder import ActionsRecorder +from hackbot_runtime.config import HackbotConfig +from hackbot_runtime.context import HackbotContext +from hackbot_runtime.errors import AgentError +from hackbot_runtime.results import HackbotAgentResult from hackbot_runtime.runtime import run, run_async +from hackbot_runtime.source import ensure_source_repo from hackbot_runtime.uploader import SignedPolicyUploader __all__ = [ - "ALL_ACTIONS", - "ActionDefinition", - "ActionInputError", "ActionsRecorder", - "AgentResult", - "Context", + "AgentError", + "HackbotAgentResult", + "HackbotConfig", + "HackbotContext", "SignedPolicyUploader", - "get_actions", + "ensure_source_repo", "run", "run_async", ] diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py b/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py index 71e257d003..829cdebeb6 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py @@ -1,26 +1,15 @@ """Recordable actions for hackbot agents. -The runtime exposes a generic ``ActionsRecorder`` plus a registry of -domain-grouped declarative actions (``bugzilla.update_bug``, -``bugzilla.add_comment``, ...). Per-framework wrappers (MCP today, -LangChain later) wrap the registry without touching the action -declarations themselves. +``ActionsRecorder`` is the framework-neutral sink whose collected actions the +runtime serialises into ``summary.json``. The action *declarations* live in +domain modules (``bugzilla``, ...) and use the shared ``@tool`` decorator from +agent-tools, so one mechanism backs both read tools and write-actions. The +claude-sdk adapter is ``hackbot_runtime.actions.claude_sdk.actions_server_for``. """ -from hackbot_runtime.actions import bugzilla as _bugzilla +from hackbot_runtime.actions import bugzilla from hackbot_runtime.actions.recorder import ActionsRecorder -from hackbot_runtime.actions.registry import ( - ActionDefinition, - ActionInputError, - get_actions, -) -ALL_ACTIONS: list[ActionDefinition] = [*_bugzilla.DEFINITIONS] +ACTIONS_SERVER_NAME = "actions" -__all__ = [ - "ALL_ACTIONS", - "ActionDefinition", - "ActionInputError", - "ActionsRecorder", - "get_actions", -] +__all__ = ["ACTIONS_SERVER_NAME", "ActionsRecorder", "bugzilla"] diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/bugzilla.py b/libs/hackbot-runtime/hackbot_runtime/actions/bugzilla.py index e1c7a255a3..c8087b298b 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/bugzilla.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/bugzilla.py @@ -1,10 +1,11 @@ """Bugzilla-domain recordable actions. -Each handler takes the ``ActionsRecorder`` as its first positional -parameter (excluded from the agent-facing schema) plus the agent-facing -args annotated with ``Annotated[T, Field(...)]`` so any adapter can derive -the JSON Schema from the signature. Handlers return a short confirmation -string and raise ``ActionInputError`` on invalid input. +Each handler takes the ``ActionsRecorder`` as its first positional parameter +(excluded from the agent-facing schema) plus the agent-facing args annotated +with ``Annotated[T, Field(...)]``. Declared with the shared ``@tool`` decorator +from agent-tools so the same mechanism backs read tools and write-actions. +Handlers record an intended change (nothing is mutated) and return a short +confirmation string, raising ``ToolError`` on invalid input. """ from __future__ import annotations @@ -14,10 +15,10 @@ from pathlib import Path from typing import Annotated, Any +from agent_tools.registry import ToolError, tool, tools_in from pydantic import Field from hackbot_runtime.actions.recorder import ActionsRecorder -from hackbot_runtime.actions.registry import ActionDefinition, ActionInputError _COMMENT_FOOTER = ( "*This is an automated analysis result. If this result is incorrect " @@ -34,6 +35,7 @@ def _confirm(recorder: ActionsRecorder, action_type: str) -> str: return f"Recorded {action_type} (#{len(recorder.actions) - 1})." +@tool async def update_bug( recorder: ActionsRecorder, bug_id: Annotated[int, Field(description="Bug ID to change.")], @@ -60,6 +62,10 @@ async def update_bug( ), ], ) -> str: + """Record an intended change to a Bugzilla bug. + + Recorded into the run summary for human review — does not modify Bugzilla. + """ recorder.record( "bugzilla.update_bug", {"bug_id": bug_id, "changes": changes}, @@ -68,6 +74,7 @@ async def update_bug( return _confirm(recorder, "bugzilla.update_bug") +@tool async def add_comment( recorder: ActionsRecorder, bug_id: Annotated[int, Field(description="Bug ID to comment on.")], @@ -83,6 +90,11 @@ async def add_comment( ), ] = False, ) -> str: + """Record an intended comment on a bug. + + Use is_private=true for security-sensitive notes. Recorded into the run + summary for human review — does not post to Bugzilla. + """ text_with_footer = text.rstrip() + "\n\n" + _COMMENT_FOOTER recorder.record( "bugzilla.add_comment", @@ -92,6 +104,7 @@ async def add_comment( return _confirm(recorder, "bugzilla.add_comment") +@tool async def add_attachment( recorder: ActionsRecorder, bug_id: Annotated[int, Field(description="Bug ID to attach to.")], @@ -142,8 +155,15 @@ async def add_attachment( ), ] = None, ) -> str: + """Record an intended file attachment on a bug. + + Pass a local filesystem path — the runtime uploads a copy of the file + alongside summary.json so the apply step can fetch it. For patches, set + is_patch=true and omit content_type. Recorded into the run summary for human + review — does not upload to Bugzilla. + """ if not os.path.isfile(file_path): - raise ActionInputError(f"file not found: {file_path}") + raise ToolError(f"file not found: {file_path}") file_name = os.path.basename(file_path) resolved_summary = summary or file_name @@ -175,6 +195,7 @@ async def add_attachment( return _confirm(recorder, "bugzilla.add_attachment") +@tool async def create_bug( recorder: ActionsRecorder, product: Annotated[str, Field(description="Bugzilla product.")], @@ -199,6 +220,11 @@ async def create_bug( ), ] = None, ) -> str: + """Record an intended new-bug filing. + + The description becomes comment 0 and is rendered as Markdown. Recorded into + the run summary for human review — does not file in Bugzilla. + """ body: dict[str, Any] = { "product": product, "component": component, @@ -214,43 +240,4 @@ async def create_bug( return _confirm(recorder, "bugzilla.create_bug") -DEFINITIONS: list[ActionDefinition] = [ - ActionDefinition( - type="bugzilla.update_bug", - description=( - "Record an intended change to a Bugzilla bug. Recorded into the " - "run summary for human review — does not modify Bugzilla." - ), - handler=update_bug, - ), - ActionDefinition( - type="bugzilla.add_comment", - description=( - "Record an intended comment on a bug. Use is_private=true for " - "security-sensitive notes. Recorded into the run summary for " - "human review — does not post to Bugzilla." - ), - handler=add_comment, - ), - ActionDefinition( - type="bugzilla.add_attachment", - description=( - "Record an intended file attachment on a bug. Pass a local " - "filesystem path — the runtime uploads a copy of the file " - "alongside summary.json so the apply step can fetch it. For " - "patches, set is_patch=true and omit content_type. Recorded " - "into the run summary for human review — does not upload to " - "Bugzilla." - ), - handler=add_attachment, - ), - ActionDefinition( - type="bugzilla.create_bug", - description=( - "Record an intended new-bug filing. The description becomes " - "comment 0 and is rendered as Markdown. Recorded into the run " - "summary for human review — does not file in Bugzilla." - ), - handler=create_bug, - ), -] +TOOLS = tools_in(__name__) diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py index d4cac79e95..a6cafc94d6 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py @@ -1,49 +1,51 @@ -"""claude-agent-sdk adapter for runtime-registered actions. +"""Build the claude-agent-sdk ``actions`` MCP server from recordable actions. -Exposes the enabled actions as an in-process MCP server built with the -SDK's own ``tool`` + ``create_sdk_mcp_server`` — guaranteed compatible with -claude-agent-sdk. Other frameworks (LangChain, ...) get their own sibling -adapter as needed; the action registry is shared and framework-neutral. - -Requires the ``claude-sdk`` optional extra of hackbot-runtime. +Thin wrapper over agent-tools' generic adapter: the ``ActionsRecorder`` is the +tool context, and tools are namespace-prefixed (one ``actions`` server hosts +every domain). Requires the ``claude-sdk`` optional extra. """ from __future__ import annotations -from claude_agent_sdk import create_sdk_mcp_server, tool - -from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME, tool_name_for -from hackbot_runtime.actions.recorder import ActionsRecorder -from hackbot_runtime.actions.registry import ActionDefinition, get_actions - - -def _text(message: str) -> dict: - """Wrap a message in the MCP tool-result content shape the SDK expects.""" - return {"content": [{"type": "text", "text": message}]} - +from pathlib import Path -def _make_tool(defn: ActionDefinition, recorder: ActionsRecorder): - @tool(tool_name_for(defn.type), defn.description, defn.input_schema) - async def run(args): - # The handler returns a short confirmation string. An ActionInputError - # raised inside it propagates and is rendered by the SDK as an - # is_error result with the message preserved. - return _text(await defn.handler(recorder, **args)) +from agent_tools.claude_sdk import build_sdk_server +from agent_tools.registry import tool_name_for - return run +from hackbot_runtime.actions import ACTIONS_SERVER_NAME +from hackbot_runtime.actions import bugzilla as _bugzilla +from hackbot_runtime.actions.recorder import ActionsRecorder -def build_actions_sdk_server( - recorder: ActionsRecorder, +def actions_server_for( + recorder: ActionsRecorder | None, types: list[str] | None = None, - name: str = ACTIONS_SERVER_NAME, + *, + fallback_artifacts_dir: Path = Path("artifacts"), ): - """Return a claude-agent-sdk ``McpSdkServerConfig`` for the enabled actions. + """Return ``(recorder, sdk_server)`` for the enabled recordable actions. - ``types`` selects a subset of action types; ``None`` exposes all. + ``recorder=None`` creates a local recorder that copies attachments under + ``fallback_artifacts_dir`` (standalone/script runs with no uploader). + ``types`` selects a subset by dotted id (e.g. ``bugzilla.update_bug``); + ``None`` exposes all. """ - return create_sdk_mcp_server( - name=name, - version="0.1.0", - tools=[_make_tool(defn, recorder) for defn in get_actions(types)], + if recorder is None: + recorder = ActionsRecorder(artifacts_dir=fallback_artifacts_dir) + tools = _bugzilla.TOOLS + if types is not None: + wanted = set(types) + tools = [t for t in tools if t.dotted in wanted] + return recorder, build_sdk_server( + ACTIONS_SERVER_NAME, recorder, tools, prefix_namespace=True ) + + +def actions_to_tool_names(types: list[str]) -> list[str]: + """claude-agent-sdk tool ids for the given action types. + + e.g. ``"bugzilla.update_bug"`` -> ``"mcp__actions__bugzilla_update_bug"``. + Kept beside ``actions_server_for`` so the ids stay in sync with the server it + builds (same server name + tool-name mapping). + """ + return [f"mcp__{ACTIONS_SERVER_NAME}__{tool_name_for(t)}" for t in types] diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/naming.py b/libs/hackbot-runtime/hackbot_runtime/actions/naming.py deleted file mode 100644 index fe3b3b44fd..0000000000 --- a/libs/hackbot-runtime/hackbot_runtime/actions/naming.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Shared naming for the actions MCP server. - -Kept dependency-light (no framework imports) so both the runtime adapter -and agent-side config can derive identical tool names from one place. -""" - -ACTIONS_SERVER_NAME = "actions" - - -def tool_name_for(action_type: str) -> str: - """Map an action type to its MCP tool name. - - ``"bugzilla.update_bug"`` -> ``"bugzilla_update_bug"``. - """ - return action_type.replace(".", "_") diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/registry.py b/libs/hackbot-runtime/hackbot_runtime/actions/registry.py deleted file mode 100644 index b9bae37589..0000000000 --- a/libs/hackbot-runtime/hackbot_runtime/actions/registry.py +++ /dev/null @@ -1,63 +0,0 @@ -import functools -import inspect -from collections.abc import Awaitable, Callable -from dataclasses import dataclass - -from pydantic import create_model - - -class ActionInputError(Exception): - """Invalid action input (bad path, etc.). - - Raised by handlers; a per-framework adapter turns it into the - framework's tool-error signal. The action layer imports no framework - error type. - """ - - -@dataclass -class ActionDefinition: - """Declarative description of one recordable action. - - ``handler`` is an async function whose **first positional parameter** is - the ``ActionsRecorder``. The remaining parameters carry typed - annotations (``Annotated[T, Field(...)]``) that double as the - agent-facing schema, exposed framework-neutrally via ``input_schema``. - Handlers return a short confirmation string. - """ - - type: str - description: str - handler: Callable[..., Awaitable[str]] - - @functools.cached_property - def input_schema(self) -> dict: - """JSON schema of the agent-facing arguments (excludes ``recorder``). - - Derived once from the handler signature so every adapter (MCP today, - LangChain later) shares one schema. - """ - sig = inspect.signature(self.handler, eval_str=True) - fields = { - name: ( - param.annotation, - ... if param.default is inspect.Parameter.empty else param.default, - ) - for name, param in list(sig.parameters.items())[1:] # skip `recorder` - } - model = create_model(self.type.replace(".", "_") + "_args", **fields) - return model.model_json_schema() - - -def get_actions(types: list[str] | None = None) -> list[ActionDefinition]: - """Return registered actions, optionally filtered by ``type`` list. - - Import is deferred to avoid an import cycle between the registry and - the per-domain modules that register actions. - """ - from hackbot_runtime.actions import ALL_ACTIONS - - if types is None: - return list(ALL_ACTIONS) - wanted = set(types) - return [a for a in ALL_ACTIONS if a.type in wanted] diff --git a/libs/hackbot-runtime/hackbot_runtime/claude.py b/libs/hackbot-runtime/hackbot_runtime/claude.py new file mode 100644 index 0000000000..da1d6c2234 --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/claude.py @@ -0,0 +1,125 @@ +"""Shared claude-agent-sdk helpers for hackbot agents. + +Generic, agent-neutral building blocks that every claude-agent-sdk agent would +otherwise copy verbatim. Agents still assemble their own ``ClaudeAgentOptions`` +and drive the ``ClaudeSDKClient`` loop — these just remove the boilerplate of +rendering the streamed messages. + +Requires the ``claude-sdk`` optional extra of hackbot-runtime. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from claude_agent_sdk import ( + AssistantMessage, + ResultMessage, + SystemMessage, + TextBlock, + ThinkingBlock, + ToolResultBlock, + ToolUseBlock, + UserMessage, +) + + +def _truncate(s: str, n: int = 500) -> str: + return s if len(s) <= n else s[:n] + f"... [{len(s) - n} more chars]" + + +class Reporter: + """Routes streamed claude-agent-sdk messages to stdout and/or a log file.""" + + def __init__(self, verbose: bool, log_path: Path | None): + self.verbose = verbose + self._log = log_path.open("w", encoding="utf-8") if log_path else None + self._turn = 0 + + def __enter__(self): + return self + + def __exit__(self, *exc): + if self._log: + self._log.close() + + def header(self, title: str) -> None: + """Emit a section header (e.g. ``"bug 12345"``) and reset the turn count.""" + self._turn = 0 + banner = f"\n{'#' * 60}\n# {title}\n{'#' * 60}" + self._emit(banner, always=True) + + def _emit(self, line: str, *, always: bool = False, full: str | None = None): + if self._log: + self._log.write((full if full is not None else line) + "\n") + self._log.flush() + if always or self.verbose: + print(line) + + def message(self, msg) -> None: + if isinstance(msg, AssistantMessage): + is_main = msg.parent_tool_use_id is None + label = "agent" if is_main else "subagent" + if is_main: + self._turn += 1 + self._emit(f"\n--- turn {self._turn} ---") + for block in msg.content: + if isinstance(block, TextBlock): + self._emit(f"\n[{label}] {block.text}", always=is_main) + elif isinstance(block, ThinkingBlock): + thinking = block.thinking.strip() + snippet = thinking.split("\n", 1)[0] + self._emit( + f"[{label}:thinking] {_truncate(snippet, 120)}", + full=f"[{label}:thinking]\n{thinking}", + ) + elif isinstance(block, ToolUseBlock): + inp = json.dumps(block.input, default=str) + inp_full = json.dumps(block.input, indent=2, default=str) + self._emit( + f"[{label}→tool] {block.name}({_truncate(inp, 300)})", + full=f"[{label}→tool] {block.name}\n{inp_full}", + ) + + elif isinstance(msg, UserMessage): + if isinstance(msg.content, list): + for block in msg.content: + if isinstance(block, ToolResultBlock): + marker = "ERROR" if block.is_error else "ok" + if isinstance(block.content, str): + text = block.content + elif isinstance(block.content, list): + parts = [ + c.get("text", "") + for c in block.content + if isinstance(c, dict) and c.get("type") == "text" + ] + text = "\n".join(parts) + else: + text = str(block.content) + self._emit( + f" [tool←{marker}] {_truncate(text, 400)}", + full=f" [tool←{marker}]\n{text}", + ) + + elif isinstance(msg, SystemMessage): + if msg.subtype == "init": + model = msg.data.get("model", "?") + self._emit(f"[system] session started (model={model})") + else: + data = json.dumps(msg.data, default=str) + self._emit( + f"[system:{msg.subtype}] {_truncate(data, 200)}", + full=f"[system:{msg.subtype}] {data}", + ) + + elif isinstance(msg, ResultMessage): + self._emit(f"\n{'=' * 60}", always=True) + if msg.total_cost_usd: + line = f"[done] turns={msg.num_turns} cost=${msg.total_cost_usd:.4f}" + else: + line = f"[done] turns={msg.num_turns}" + self._emit(line, always=True) + if msg.is_error: + self._emit(f"[done] ERROR: {msg.result}", always=True) diff --git a/libs/hackbot-runtime/hackbot_runtime/config.py b/libs/hackbot-runtime/hackbot_runtime/config.py new file mode 100644 index 0000000000..ef1ae6173d --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/config.py @@ -0,0 +1,59 @@ +"""Declarative agent configuration loaded from ``hackbot.toml``. + +Captures the capability declarations that are intrinsic to an agent (which +source repo it operates on, whether it needs a Firefox build) so the runtime +can prepare them on the agent's behalf. Per-run inputs and secrets are NOT here +— they arrive via environment variables. +""" + +from __future__ import annotations + +import tomllib +from pathlib import Path + +from pydantic import BaseModel + + +class SourceConfig(BaseModel): + """The source repository an agent operates on (see ``ensure_source_repo``).""" + + repo_url: str + # Where the checkout lands. The env var SOURCE_REPO overrides this at runtime + # (the orchestrator points it at the task-local workspace). + checkout_path: Path = Path("/workspace/source") + + +class FirefoxConfig(BaseModel): + """Firefox build the agent needs (paths derived from the source checkout).""" + + enabled: bool = True + # Object directory name under the source root; matches the agent-tools + # FirefoxContext default. + objdir: str = "objdir-ff-asan" + + +class HackbotConfig(BaseModel): + """Parsed ``hackbot.toml``. Every table is optional. + + An agent that does not operate on a repo omits ``[source]``; one that does + not need Firefox omits ``[firefox]``. A missing file yields an empty config. + """ + + source: SourceConfig | None = None + firefox: FirefoxConfig | None = None + + +def load_config(path: Path) -> HackbotConfig: + """Load and validate ``hackbot.toml`` at ``path``. + + Strict: the file must exist. The "agent declares no capabilities" fallback is + handled by discovery (``_resolve_config`` returns an empty + :class:`HackbotConfig` when no toml is found), which never passes a missing + path here. + """ + if not path.exists(): + raise FileNotFoundError(f"Config file {path} does not exist") + + with path.open("rb") as fh: + data = tomllib.load(fh) + return HackbotConfig.model_validate(data) diff --git a/libs/hackbot-runtime/hackbot_runtime/context.py b/libs/hackbot-runtime/hackbot_runtime/context.py index 5a2c1aef25..6f71c664e1 100644 --- a/libs/hackbot-runtime/hackbot_runtime/context.py +++ b/libs/hackbot-runtime/hackbot_runtime/context.py @@ -1,15 +1,38 @@ +"""The single object an agent's ``main()`` receives from the runtime. + +``HackbotContext`` is what an agent author touches. It answers for everything +the platform provides — the prepared source checkout, Firefox build paths, +model-provider credentials — plus the results/artifacts/actions plumbing, so the +author never cares how or from where those come. + +Its platform fields are read from the environment (the orchestrator sets them); +its capability declarations come from the agent's ``hackbot.toml`` +(:class:`HackbotConfig`), attached via :meth:`from_config`. +""" + +from __future__ import annotations + import datetime +import os +import tempfile import uuid from functools import cached_property from pathlib import Path +from typing import TYPE_CHECKING -from pydantic import Field +from pydantic import Field, PrivateAttr from pydantic_settings import BaseSettings, SettingsConfigDict from hackbot_runtime import artifacts -from hackbot_runtime.actions import ActionsRecorder +from hackbot_runtime.actions.recorder import ActionsRecorder +from hackbot_runtime.config import HackbotConfig, load_config +from hackbot_runtime.providers import AnthropicAuth +from hackbot_runtime.source import ensure_source_repo from hackbot_runtime.uploader import SignedPolicyUploader +if TYPE_CHECKING: + from agent_tools.firefox import FirefoxContext + def _default_run_id() -> str: """A unique, sortable id for runs that don't get one from the platform. @@ -22,14 +45,14 @@ def _default_run_id() -> str: return f"local-{stamp}-{uuid.uuid4().hex[:6]}" -class Context(BaseSettings): - """Platform context handed to every agent's main() by the runtime. +class HackbotContext(BaseSettings): + """Platform capabilities + results plumbing handed to every agent's main(). - `run_id` defaults to a generated unique id (the orchestrator overrides it - via ``RUN_ID`` in production). The results-upload fields are optional so - local-dev runs (compose, scripts) can start the agent without a - signed POST policy — in that case the runtime writes results into the - local artifacts dir rather than uploading. + `run_id` defaults to a generated unique id (the orchestrator overrides it via + ``RUN_ID`` in production). The results-upload fields are optional so local-dev + runs (compose, scripts) can start the agent without a signed POST policy — in + that case results are written into the local artifacts dir rather than + uploaded. """ run_id: str = Field(default_factory=_default_run_id) @@ -45,6 +68,71 @@ class Context(BaseSettings): model_config = SettingsConfigDict(extra="ignore") + # Capability declarations from hackbot.toml (not env); attached after + # construction via from_config()/from_config_obj(). + _config: HackbotConfig = PrivateAttr(default_factory=HackbotConfig) + + @classmethod + def from_config(cls, config_path: Path) -> "HackbotContext": + """Build from ``hackbot.toml`` at ``config_path`` plus env-derived fields.""" + return cls.from_config_obj(load_config(config_path)) + + @classmethod + def from_config_obj(cls, config: HackbotConfig) -> "HackbotContext": + """Build from an already-parsed config plus env-derived fields.""" + obj = cls() + obj._config = config + return obj + + @property + def config(self) -> HackbotConfig: + return self._config + + # --- Platform capabilities (declared in hackbot.toml) ------------- # + + @cached_property + def source_repo(self) -> Path: + """The prepared source checkout, cloned/refreshed on first access. + + The path comes from ``SOURCE_REPO`` (set by the orchestrator) or, failing + that, the ``[source].checkout_path`` in ``hackbot.toml``. The checkout is + prepared lazily so agents that never touch source pay no git cost. + """ + if self._config.source is None: + raise RuntimeError( + "This agent did not declare a [source] in hackbot.toml; " + "no source repository is available." + ) + env_path = os.environ.get("SOURCE_REPO") + path = Path(env_path) if env_path else self._config.source.checkout_path + ensure_source_repo(path, self._config.source.repo_url) + return path + + @cached_property + def firefox(self) -> "FirefoxContext": + """Firefox build paths derived from the prepared source checkout. + + Importing ``agent_tools.firefox`` lazily keeps the base runtime free of + the ``agent-tools[firefox]`` extra for agents that don't need it. + """ + if self._config.firefox is None or not self._config.firefox.enabled: + raise RuntimeError( + "This agent did not declare an enabled [firefox] in " + "hackbot.toml; no Firefox build is available." + ) + from agent_tools.firefox import FirefoxContext + + return FirefoxContext.from_source_repo( + self.source_repo, objdir=self._config.firefox.objdir + ) + + @cached_property + def anthropic(self) -> AnthropicAuth: + """Anthropic credentials (validated on first key access).""" + return AnthropicAuth() + + # --- Results / artifacts / actions plumbing ----------------------- # + @cached_property def uploader(self) -> SignedPolicyUploader | None: if not self.results_policy_url: @@ -60,6 +148,16 @@ def run_artifacts_dir(self) -> Path: """Per-run local artifacts directory: ``artifacts_dir / run_id``.""" return self.artifacts_dir / self.run_id + @cached_property + def log_path(self) -> Path: + """A writable path for the agent's run log; published by the runtime. + + The parent dir is created on first access (so a ``Reporter`` can open the + file straight away). Agents that never write a log just leave it absent, + and :meth:`publish_log` becomes a no-op. + """ + return Path(tempfile.mkdtemp(prefix=f"hackbot-{self.run_id}-")) / "agent.log" + @cached_property def actions(self) -> ActionsRecorder: return ActionsRecorder(self.uploader, artifacts_dir=self.run_artifacts_dir) diff --git a/libs/hackbot-runtime/hackbot_runtime/errors.py b/libs/hackbot-runtime/hackbot_runtime/errors.py new file mode 100644 index 0000000000..5ab01e466d --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/errors.py @@ -0,0 +1,7 @@ +class AgentError(Exception): + """Raise from an agent's ``main()`` to fail the run with a clear message. + + The runtime records the message as the run's ``error`` in ``summary.json`` + and exits non-zero. Any other exception fails the run too — ``AgentError`` + just reads as a deliberate, expected failure rather than a crash. + """ diff --git a/libs/hackbot-runtime/hackbot_runtime/providers.py b/libs/hackbot-runtime/hackbot_runtime/providers.py new file mode 100644 index 0000000000..da618c474e --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/providers.py @@ -0,0 +1,48 @@ +"""Credentials the runtime provides to agents. + +The runtime owns where credentials come from so agents don't reach into the +environment themselves. Today only Anthropic is wired; the :class:`Provider` +protocol leaves room to add others (Vertex, OpenAI, ...) without changing the +agent-facing surface. +""" + +from __future__ import annotations + +import os +from typing import Protocol, runtime_checkable + + +class ProviderError(RuntimeError): + """A required credential for a provider is missing or invalid.""" + + +@runtime_checkable +class Provider(Protocol): + """A credentialed model/service provider the runtime can hand to an agent.""" + + name: str + + @property + def api_key(self) -> str: ... + + +class AnthropicAuth: + """Anthropic credentials, read from the environment and validated on access. + + Exposes the API key explicitly (rather than relying on the SDK implicitly + reading the env) so a missing key fails fast with a clear message instead of + surfacing as an opaque error deep inside a request. + """ + + name = "anthropic" + env_var = "ANTHROPIC_API_KEY" + + @property + def api_key(self) -> str: + key = os.environ.get(self.env_var) + if not key: + raise ProviderError( + f"{self.env_var} is not set; the runtime cannot provide " + "Anthropic credentials to this agent." + ) + return key diff --git a/libs/hackbot-runtime/hackbot_runtime/result.py b/libs/hackbot-runtime/hackbot_runtime/result.py deleted file mode 100644 index 8589aac069..0000000000 --- a/libs/hackbot-runtime/hackbot_runtime/result.py +++ /dev/null @@ -1,18 +0,0 @@ -from dataclasses import dataclass, field -from typing import Any, Literal - - -@dataclass -class AgentResult: - """Outcome reported by an agent's main() to the runtime. - - The runtime serialises this into the summary.json artifact the orchestrator - reads. `status` drives the run's terminal state in hackbot-api; `findings` - is opaque to the platform and surfaced verbatim. Recorded actions are not - carried here — the runtime reads them from `Context.actions`. - """ - - status: Literal["ok", "error"] = "ok" - error: str | None = None - findings: dict[str, Any] = field(default_factory=dict) - exit_code: int = 0 diff --git a/libs/hackbot-runtime/hackbot_runtime/results.py b/libs/hackbot-runtime/hackbot_runtime/results.py new file mode 100644 index 0000000000..a4b3832bd8 --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/results.py @@ -0,0 +1,13 @@ +"""Base result model for hackbot agents. + +An agent's ``main()`` may return a subclass of :class:`HackbotAgentResult`; the +runtime serializes it into ``summary.json``'s ``findings``. Framework-neutral — +plain pydantic, no claude-agent-sdk dependency. +""" + +from pydantic import BaseModel + + +class HackbotAgentResult(BaseModel): + num_turns: int + total_cost_usd: float | None = None diff --git a/libs/hackbot-runtime/hackbot_runtime/runtime.py b/libs/hackbot-runtime/hackbot_runtime/runtime.py index 3f832dee95..d60234aac4 100644 --- a/libs/hackbot-runtime/hackbot_runtime/runtime.py +++ b/libs/hackbot-runtime/hackbot_runtime/runtime.py @@ -1,20 +1,35 @@ import asyncio +import inspect import logging import sys import traceback from collections.abc import Awaitable, Callable +from pathlib import Path +from typing import NoReturn from pydantic import ValidationError -from hackbot_runtime.context import Context -from hackbot_runtime.result import AgentResult +from hackbot_runtime.config import HackbotConfig, load_config +from hackbot_runtime.context import HackbotContext +from hackbot_runtime.results import HackbotAgentResult log = logging.getLogger("hackbot_runtime") -AgentMain = Callable[[Context], AgentResult] -AsyncAgentMain = Callable[[Context], Awaitable[AgentResult]] +# An agent's main() returns a HackbotAgentResult on success; to fail the run it +# raises (AgentError, or any exception). The runtime turns that outcome into +# summary.json + an exit code. +Findings = HackbotAgentResult +AgentMain = Callable[[HackbotContext], Findings] +AsyncAgentMain = Callable[[HackbotContext], Awaitable[Findings]] +# What run()/run_async() accept to locate an agent's hackbot.toml: a path to it, +# an already-parsed config, or None to auto-discover ``hackbot.toml`` (in the +# working directory or above the entry point's module). +ConfigArg = Path | HackbotConfig | None + +_CONFIG_NAME = "hackbot.toml" _SUMMARY_NAME = "summary.json" +_AGENT_LOG_KEY = "logs/agent.log" def _configure_logging() -> None: @@ -26,44 +41,113 @@ def _configure_logging() -> None: ) -def _summary_payload_from_result(result: AgentResult, ctx: Context) -> dict: - # Actions are recorded via Context.actions; the result never carries them. +def _ok_payload(ctx: HackbotContext, findings: dict) -> dict: + # Actions are recorded via ctx.actions; the agent never carries them. return { - "status": result.status, - "error": result.error, - "findings": result.findings, + "status": "ok", + "error": None, + "findings": findings, "actions": ctx.actions.actions, } -def _summary_payload_from_exception(exc: BaseException, ctx: Context) -> dict: +def _error_payload( + ctx: HackbotContext, error: str, *, traceback_str: str | None = None +) -> dict: return { "status": "error", - "error": f"{type(exc).__name__}: {exc}", - "findings": {"traceback": traceback.format_exc()}, + "error": error, + "findings": {"traceback": traceback_str} if traceback_str else {}, "actions": ctx.actions.actions, } -def _load_context() -> Context | None: +def _discover_config_path(entrypoint: Callable) -> Path | None: + """Locate ``hackbot.toml`` for an agent that didn't pass one explicitly. + + Agents keep ``hackbot.toml`` at their agent root (alongside ``pyproject.toml`` + / ``Dockerfile``), above the ``hackbot_agents`` package. Two layouts to cover: + + - **Deployed image**: the package is installed into site-packages, but the + Dockerfile copies ``hackbot.toml`` into the working directory — so check + the cwd first. + - **Editable checkout / tests**: the entry point's module lives under the + agent root, so walk up from it until the toml turns up. + """ + cwd_candidate = Path.cwd() / _CONFIG_NAME + if cwd_candidate.exists(): + return cwd_candidate + try: + module_file = inspect.getsourcefile(entrypoint) + except TypeError: + module_file = None + if module_file: + for parent in Path(module_file).resolve().parents: + candidate = parent / _CONFIG_NAME + if candidate.exists(): + return candidate + return None + + +def _resolve_config(entrypoint: Callable, config: ConfigArg) -> HackbotConfig: + if isinstance(config, HackbotConfig): + return config + path = config if isinstance(config, Path) else _discover_config_path(entrypoint) + return load_config(path) if path else HackbotConfig() + + +def _load_hackbot(entrypoint: Callable, config: ConfigArg) -> HackbotContext | None: + """Build the HackbotContext (and its inner env-derived Context). + + ``config`` may be a path to a ``hackbot.toml``, an already-parsed + :class:`HackbotConfig`, or ``None`` to auto-discover the toml (cwd or above + the entry point's module), falling back to an empty config when there's none. + """ + parsed = _resolve_config(entrypoint, config) try: - return Context() + return HackbotContext.from_config_obj(parsed) except ValidationError as exc: log.error( - "Failed to load Context from env; no summary can be written.\n%s", + "Failed to load HackbotContext from env; no summary can be written.\n%s", exc, ) return None -def _finish(ctx: Context, result_or_exc: AgentResult | BaseException) -> int: - if isinstance(result_or_exc, AgentResult): - payload = _summary_payload_from_result(result_or_exc, ctx) - exit_code = result_or_exc.exit_code +def _publish_log(ctx: HackbotContext) -> None: + """Publish the run log under the canonical key, if the agent wrote one.""" + if ctx.log_path.exists(): + ctx.publish_file(_AGENT_LOG_KEY, ctx.log_path, "text/plain") + + +def _finish(ctx: HackbotContext, outcome: object) -> int: + """Write summary.json from the agent's outcome and return the exit code. + + ``outcome`` is the agent's :class:`HackbotAgentResult` on success, or the + exception it raised on failure. + """ + if isinstance(outcome, BaseException): + payload = _error_payload( + ctx, + f"{type(outcome).__name__}: {outcome}", + traceback_str=traceback.format_exc(), + ) + exit_code = 1 + elif isinstance(outcome, HackbotAgentResult): + payload = _ok_payload(ctx, outcome.model_dump()) + exit_code = 0 else: - payload = _summary_payload_from_exception(result_or_exc, ctx) + # Contract violation: not a HackbotAgentResult or an exception. + msg = f"Agent returned {type(outcome).__name__}; expected a HackbotAgentResult" + log.error(msg) + payload = _error_payload(ctx, msg) exit_code = 1 + try: + _publish_log(ctx) + except Exception: + log.exception("Failed to publish agent log") + # Upload when a signed policy is configured, else write into the local # artifacts dir (so local/compose/direct runs leave it on the host). try: @@ -81,46 +165,31 @@ def _finish(ctx: Context, result_or_exc: AgentResult | BaseException) -> int: return exit_code -def _validate_result(result: object) -> AgentResult: - """Coerce arbitrary agent return values into an AgentResult. - - Returning a synthetic AgentResult (rather than letting an exception - object flow into `_finish`) keeps the summary deterministic: the - exception path calls `traceback.format_exc()`, which evaluates to - "NoneType: None" when no exception is active. - """ - if isinstance(result, AgentResult): - return result - msg = f"Agent returned {type(result).__name__}; expected AgentResult" - log.error(msg) - return AgentResult(status="error", error=msg, exit_code=1) - - -def run(entrypoint: AgentMain) -> int: +def run(entrypoint: AgentMain, config: ConfigArg = None) -> NoReturn: _configure_logging() - ctx = _load_context() + ctx = _load_hackbot(entrypoint, config) if ctx is None: - return 2 + raise SystemExit(2) try: - result = entrypoint(ctx) + outcome: object = entrypoint(ctx) except Exception as exc: log.exception("Agent raised an exception") - return _finish(ctx, exc) + outcome = exc - return _finish(ctx, _validate_result(result)) + raise SystemExit(_finish(ctx, outcome)) -def run_async(entrypoint: AsyncAgentMain) -> int: +def run_async(entrypoint: AsyncAgentMain, config: ConfigArg = None) -> NoReturn: _configure_logging() - ctx = _load_context() + ctx = _load_hackbot(entrypoint, config) if ctx is None: - return 2 + raise SystemExit(2) try: - result = asyncio.run(entrypoint(ctx)) + outcome: object = asyncio.run(entrypoint(ctx)) except Exception as exc: log.exception("Agent raised an exception") - return _finish(ctx, exc) + outcome = exc - return _finish(ctx, _validate_result(result)) + raise SystemExit(_finish(ctx, outcome)) diff --git a/libs/hackbot-runtime/hackbot_runtime/source.py b/libs/hackbot-runtime/hackbot_runtime/source.py new file mode 100644 index 0000000000..04352876eb --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/source.py @@ -0,0 +1,70 @@ +"""Prepare a source checkout for agents that operate on a code repository.""" + +from __future__ import annotations + +import logging +import subprocess +import sys +from pathlib import Path + +log = logging.getLogger("hackbot_runtime.source") + + +def ensure_source_repo(source_repo: Path, repo_url: str) -> None: + """Ensure a shallow checkout of ``repo_url`` exists at ``source_repo``. + + Idempotent: clones if absent, otherwise shallow-fetches and hard-resets to + the remote HEAD. Recovers from a partial checkout left by an earlier failed + run (e.g. the clone succeeded but the checkout ran out of disk). + """ + git_dir = source_repo / ".git" + if git_dir.exists(): + # An earlier run killed mid-fetch (e.g. the container was stopped) + # leaves stale lock files behind. Since each run drives git + # sequentially, any lock present at startup is stale and safe to + # remove. + for lock in (git_dir / "shallow.lock", git_dir / "index.lock"): + if lock.exists(): + log.warning("removing stale git lock %s", lock) + lock.unlink() + status = subprocess.run( + ["git", "-C", str(source_repo), "status", "--porcelain"], + check=True, + capture_output=True, + text=True, + ) + # A healthy fresh shallow clone has an empty status; a broken + # checkout shows thousands of missing-file "D" entries. + if status.stdout.strip(): + log.warning( + "source at %s is incomplete; restoring working tree", source_repo + ) + subprocess.run( + ["git", "-C", str(source_repo), "restore", "--source=HEAD", ":/"], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) + log.info("updating source at %s (shallow fetch)", source_repo) + subprocess.run( + ["git", "-C", str(source_repo), "fetch", "--depth=1", "origin", "HEAD"], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) + subprocess.run( + ["git", "-C", str(source_repo), "reset", "--hard", "FETCH_HEAD"], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) + return + source_repo.mkdir(parents=True, exist_ok=True) + log.info("cloning %s (shallow) to %s", repo_url, source_repo) + subprocess.run( + ["git", "clone", "--depth=1", repo_url, str(source_repo)], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) + log.info("shallow clone complete") diff --git a/libs/hackbot-runtime/hackbot_runtime/uploader.py b/libs/hackbot-runtime/hackbot_runtime/uploader.py index dc888ba381..038a92c8bd 100644 --- a/libs/hackbot-runtime/hackbot_runtime/uploader.py +++ b/libs/hackbot-runtime/hackbot_runtime/uploader.py @@ -8,7 +8,7 @@ class SignedPolicyUploader: """POST artifacts to a GCS V4 signed POST policy. - The orchestrator passes the policy via env vars consumed by `Context`. + The orchestrator passes the policy via env vars consumed by `HackbotContext`. The Job has no GCP identity; this signed policy is its only write capability. """ diff --git a/libs/hackbot-runtime/pyproject.toml b/libs/hackbot-runtime/pyproject.toml index e05c460233..00a4bee4a1 100644 --- a/libs/hackbot-runtime/pyproject.toml +++ b/libs/hackbot-runtime/pyproject.toml @@ -6,13 +6,21 @@ requires-python = ">=3.12" dependencies = [ "requests>=2.32.0", "pydantic-settings>=2.1.0", + "agent-tools", ] [project.optional-dependencies] -# claude-agent-sdk adapter (hackbot_runtime.actions.claude_sdk) that exposes -# the action registry as an in-process MCP server. Not needed by consumers -# that only read the summary contract. -claude-sdk = ["claude-agent-sdk>=0.1.30"] +claude-sdk = ["claude-agent-sdk>=0.1.30", "agent-tools[claude-sdk]"] + +[tool.uv.sources] +agent-tools = { workspace = true } + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["hackbot_runtime"] [tool.pytest.ini_options] asyncio_mode = "auto" diff --git a/libs/hackbot-runtime/tests/test_bugzilla_actions.py b/libs/hackbot-runtime/tests/test_bugzilla_actions.py index 7b0cc2b289..ab27e3447c 100644 --- a/libs/hackbot-runtime/tests/test_bugzilla_actions.py +++ b/libs/hackbot-runtime/tests/test_bugzilla_actions.py @@ -1,7 +1,8 @@ """Tests for the bugzilla action handlers (footers, mime, merge, errors).""" import pytest -from hackbot_runtime.actions import ActionInputError, ActionsRecorder, bugzilla +from agent_tools.registry import ToolError +from hackbot_runtime.actions import ActionsRecorder, bugzilla async def test_add_comment_appends_footer(): @@ -37,7 +38,7 @@ async def test_add_attachment_guesses_mime(tmp_path): async def test_add_attachment_missing_file_raises(): rec = ActionsRecorder() - with pytest.raises(ActionInputError): + with pytest.raises(ToolError): await bugzilla.add_attachment( rec, bug_id=1, file_path="/no/such.patch", reasoning="r" ) diff --git a/libs/hackbot-runtime/tests/test_claude.py b/libs/hackbot-runtime/tests/test_claude.py new file mode 100644 index 0000000000..0433437778 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_claude.py @@ -0,0 +1,36 @@ +"""Tests for the shared claude-agent-sdk Reporter (hackbot_runtime.claude).""" + +from hackbot_runtime.claude import Reporter, _truncate + + +def test_truncate_short_string_unchanged(): + assert _truncate("hello", 10) == "hello" + + +def test_truncate_long_string_marks_remainder(): + out = _truncate("x" * 20, 5) + assert out.startswith("xxxxx") + assert "15 more chars" in out + + +def test_header_writes_banner_to_log(tmp_path): + log = tmp_path / "agent.log" + with Reporter(verbose=False, log_path=log) as reporter: + reporter.header("bug 12345") + contents = log.read_text() + assert "# bug 12345" in contents + assert "#" * 60 in contents + + +def test_header_always_prints_even_when_not_verbose(capsys): + with Reporter(verbose=False, log_path=None) as reporter: + reporter.header("bug 999") + out = capsys.readouterr().out + assert "# bug 999" in out + + +def test_no_log_file_when_path_is_none(tmp_path): + # Should not raise and should not create any file. + with Reporter(verbose=True, log_path=None) as reporter: + reporter.header("section") + assert not list(tmp_path.iterdir()) diff --git a/libs/hackbot-runtime/tests/test_claude_sdk.py b/libs/hackbot-runtime/tests/test_claude_sdk.py index 9def348094..7f665ca436 100644 --- a/libs/hackbot-runtime/tests/test_claude_sdk.py +++ b/libs/hackbot-runtime/tests/test_claude_sdk.py @@ -1,8 +1,8 @@ -"""Tests for the claude-agent-sdk actions adapter (guards issue #1).""" +"""Tests for the actions MCP server (built via agent-tools' adapter).""" import mcp.server.lowlevel.server as low from hackbot_runtime.actions import ActionsRecorder -from hackbot_runtime.actions.claude_sdk import build_actions_sdk_server +from hackbot_runtime.actions.claude_sdk import actions_server_for from mcp.types import CallToolRequest, CallToolRequestParams, ListToolsRequest _ALL = [ @@ -14,7 +14,7 @@ def _server(recorder): - config = build_actions_sdk_server(recorder, types=_ALL) + _, config = actions_server_for(recorder, types=_ALL) assert config["type"] == "sdk" return config["instance"] @@ -84,3 +84,24 @@ async def test_missing_file_surfaces_is_error(): assert result.isError is True text = " ".join(getattr(c, "text", "") for c in result.content) assert "file not found" in text + + +def test_actions_server_for_creates_fallback_recorder(tmp_path): + recorder, config = actions_server_for( + None, types=_ALL, fallback_artifacts_dir=tmp_path + ) + assert isinstance(recorder, ActionsRecorder) + assert config["type"] == "sdk" + + +def test_actions_server_for_reuses_given_recorder(): + given = ActionsRecorder() + recorder, config = actions_server_for(given, types=_ALL) + assert recorder is given + assert config["type"] == "sdk" + + +async def test_actions_server_for_exposes_selected_tools(): + _, config = actions_server_for(ActionsRecorder(), types=["bugzilla.update_bug"]) + tools = await _list(config["instance"]) + assert {t.name for t in tools} == {"bugzilla_update_bug"} diff --git a/libs/hackbot-runtime/tests/test_config.py b/libs/hackbot-runtime/tests/test_config.py new file mode 100644 index 0000000000..80a82067e5 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_config.py @@ -0,0 +1,60 @@ +"""Tests for hackbot.toml parsing into HackbotConfig.""" + +from pathlib import Path + +import pytest +from hackbot_runtime.config import load_config + +FULL_TOML = """ +[source] +repo_url = "https://example.com/repo.git" +checkout_path = "/workspace/repo" + +[firefox] +enabled = true +objdir = "objdir-custom" +""" + + +def test_load_full_config(tmp_path): + path = tmp_path / "hackbot.toml" + path.write_text(FULL_TOML) + + cfg = load_config(path) + + assert cfg.source is not None + assert cfg.source.repo_url == "https://example.com/repo.git" + assert cfg.source.checkout_path == Path("/workspace/repo") + assert cfg.firefox is not None + assert cfg.firefox.enabled is True + assert cfg.firefox.objdir == "objdir-custom" + + +def test_missing_file_raises(tmp_path): + # load_config is strict; the "no config" fallback lives in discovery + # (_resolve_config), which never hands a missing path to load_config. + with pytest.raises(FileNotFoundError): + load_config(tmp_path / "does-not-exist.toml") + + +def test_missing_tables_default_to_none(tmp_path): + path = tmp_path / "hackbot.toml" + path.write_text('[source]\nrepo_url = "https://example.com/repo.git"\n') + + cfg = load_config(path) + + assert cfg.source is not None + # checkout_path falls back to the SourceConfig default. + assert cfg.source.checkout_path == Path("/workspace/source") + assert cfg.firefox is None + + +def test_firefox_defaults(tmp_path): + path = tmp_path / "hackbot.toml" + path.write_text("[firefox]\n") + + cfg = load_config(path) + + assert cfg.firefox is not None + assert cfg.firefox.enabled is True + assert cfg.firefox.objdir == "objdir-ff-asan" diff --git a/libs/hackbot-runtime/tests/test_context.py b/libs/hackbot-runtime/tests/test_context.py new file mode 100644 index 0000000000..6dea30342a --- /dev/null +++ b/libs/hackbot-runtime/tests/test_context.py @@ -0,0 +1,83 @@ +"""Tests for HackbotContext capabilities and results plumbing.""" + +from pathlib import Path + +import pytest +from hackbot_runtime import HackbotContext +from hackbot_runtime.config import FirefoxConfig, HackbotConfig, SourceConfig + + +def _hb(tmp_path, config: HackbotConfig) -> HackbotContext: + hb = HackbotContext(run_id="local-test", artifacts_dir=tmp_path / "artifacts") + hb._config = config + return hb + + +def test_source_repo_without_declaration_raises(tmp_path): + hb = _hb(tmp_path, HackbotConfig()) + with pytest.raises(RuntimeError, match="\\[source\\]"): + hb.source_repo + + +def test_firefox_without_declaration_raises(tmp_path): + hb = _hb(tmp_path, HackbotConfig()) + with pytest.raises(RuntimeError, match="\\[firefox\\]"): + hb.firefox + + +def test_firefox_disabled_raises(tmp_path): + cfg = HackbotConfig( + source=SourceConfig(repo_url="x"), firefox=FirefoxConfig(enabled=False) + ) + hb = _hb(tmp_path, cfg) + with pytest.raises(RuntimeError, match="\\[firefox\\]"): + hb.firefox + + +def test_source_repo_prepares_and_honors_env_override(tmp_path, monkeypatch): + calls = [] + + def fake_ensure(path: Path, repo_url: str) -> None: + calls.append((path, repo_url)) + + monkeypatch.setattr("hackbot_runtime.context.ensure_source_repo", fake_ensure) + monkeypatch.setenv("SOURCE_REPO", str(tmp_path / "from-env")) + + cfg = HackbotConfig( + source=SourceConfig( + repo_url="https://example.com/r.git", + checkout_path=Path("/from/toml"), + ) + ) + hb = _hb(tmp_path, cfg) + + assert hb.source_repo == tmp_path / "from-env" + assert calls == [(tmp_path / "from-env", "https://example.com/r.git")] + + +def test_source_repo_uses_toml_path_without_env(tmp_path, monkeypatch): + monkeypatch.delenv("SOURCE_REPO", raising=False) + monkeypatch.setattr( + "hackbot_runtime.context.ensure_source_repo", lambda *a, **k: None + ) + cfg = HackbotConfig( + source=SourceConfig(repo_url="r", checkout_path=Path("/from/toml")) + ) + hb = _hb(tmp_path, cfg) + assert hb.source_repo == Path("/from/toml") + + +def test_results_plumbing(tmp_path): + hb = _hb(tmp_path, HackbotConfig()) + + assert hb.run_id == "local-test" + + log = tmp_path / "agent.log" + log.write_text("hello") + key = hb.publish_file("logs/agent.log", log) + assert key == "logs/agent.log" + written = tmp_path / "artifacts" / "local-test" / "logs" / "agent.log" + assert written.read_text() == "hello" + + hb.actions.record("bugzilla.update_bug", {"bug_id": 1}, reasoning="r") + assert hb.actions.actions[0]["type"] == "bugzilla.update_bug" diff --git a/libs/hackbot-runtime/tests/test_providers.py b/libs/hackbot-runtime/tests/test_providers.py new file mode 100644 index 0000000000..fae300db25 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_providers.py @@ -0,0 +1,26 @@ +"""Tests for provider credential exposure/validation.""" + +import pytest +from hackbot_runtime.providers import AnthropicAuth, Provider, ProviderError + + +def test_api_key_returned_when_set(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test") + assert AnthropicAuth().api_key == "sk-test" + + +def test_missing_key_raises_clear_error(monkeypatch): + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + with pytest.raises(ProviderError, match="ANTHROPIC_API_KEY"): + AnthropicAuth().api_key + + +def test_empty_key_treated_as_missing(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "") + with pytest.raises(ProviderError): + AnthropicAuth().api_key + + +def test_satisfies_provider_protocol(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test") + assert isinstance(AnthropicAuth(), Provider) diff --git a/libs/hackbot-runtime/tests/test_registry.py b/libs/hackbot-runtime/tests/test_registry.py deleted file mode 100644 index bc7d055324..0000000000 --- a/libs/hackbot-runtime/tests/test_registry.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Tests for the action registry and schema derivation.""" - -from hackbot_runtime.actions import ActionInputError, get_actions -from hackbot_runtime.actions.registry import ActionDefinition - -_BUGZILLA_TYPES = { - "bugzilla.update_bug", - "bugzilla.add_comment", - "bugzilla.add_attachment", - "bugzilla.create_bug", -} - - -def test_get_actions_returns_all(): - assert {a.type for a in get_actions()} == _BUGZILLA_TYPES - - -def test_get_actions_filtered(): - got = get_actions(["bugzilla.update_bug", "bugzilla.add_comment"]) - assert {a.type for a in got} == {"bugzilla.update_bug", "bugzilla.add_comment"} - - -def test_action_input_error_is_exception(): - assert issubclass(ActionInputError, Exception) - - -def test_input_schema_excludes_recorder_and_keeps_descriptions(): - update = next(a for a in get_actions() if a.type == "bugzilla.update_bug") - schema = update.input_schema - props = schema["properties"] - assert "recorder" not in props - assert set(props) == {"bug_id", "changes", "reasoning"} - assert set(schema["required"]) == {"bug_id", "changes", "reasoning"} - assert props["bug_id"]["description"] - - -def test_input_schema_marks_optional_params(): - comment = next(a for a in get_actions() if a.type == "bugzilla.add_comment") - # is_private has a default -> not required. - assert "is_private" not in comment.input_schema.get("required", []) - - -def test_input_schema_is_cached(): - defn = ActionDefinition( - type="x.y", description="d", handler=get_actions()[0].handler - ) - assert defn.input_schema is defn.input_schema diff --git a/libs/hackbot-runtime/tests/test_runtime.py b/libs/hackbot-runtime/tests/test_runtime.py index 9ad5ebef7e..3469283b42 100644 --- a/libs/hackbot-runtime/tests/test_runtime.py +++ b/libs/hackbot-runtime/tests/test_runtime.py @@ -2,25 +2,26 @@ import json -from hackbot_runtime import AgentResult, Context -from hackbot_runtime.runtime import _finish +import pytest +from hackbot_runtime import AgentError, HackbotAgentResult, HackbotContext, run_async +from hackbot_runtime.runtime import _discover_config_path, _finish, _resolve_config def test_run_id_defaults_to_unique_generated_id(monkeypatch): monkeypatch.delenv("RUN_ID", raising=False) - a, b = Context(), Context() + a, b = HackbotContext(), HackbotContext() assert a.run_id != b.run_id assert a.run_id.startswith("local-") def test_run_id_env_overrides_default(monkeypatch): monkeypatch.setenv("RUN_ID", "orchestrator-42") - assert Context().run_id == "orchestrator-42" + assert HackbotContext().run_id == "orchestrator-42" def _ctx(tmp_path, run_id="local-test"): # No results_policy_url -> uploader is None -> local artifacts path. - return Context(run_id=run_id, artifacts_dir=tmp_path / "artifacts") + return HackbotContext(run_id=run_id, artifacts_dir=tmp_path / "artifacts") def test_summary_written_locally_without_uploader(tmp_path): @@ -31,7 +32,7 @@ def test_summary_written_locally_without_uploader(tmp_path): reasoning="rule X", ) - code = _finish(ctx, AgentResult(status="ok", findings={"bugs_processed": 1})) + code = _finish(ctx, HackbotAgentResult(num_turns=1)) assert code == 0 # Written under the per-run subdir: artifacts_dir / run_id. @@ -39,7 +40,7 @@ def test_summary_written_locally_without_uploader(tmp_path): (tmp_path / "artifacts" / "local-test" / "summary.json").read_text() ) assert summary["status"] == "ok" - assert summary["findings"] == {"bugs_processed": 1} + assert summary["findings"] == {"num_turns": 1, "total_cost_usd": None} assert summary["actions"][0]["type"] == "bugzilla.update_bug" @@ -55,11 +56,61 @@ def test_summary_written_for_exception(tmp_path): assert "boom" in summary["error"] +def test_non_result_return_is_contract_error(tmp_path): + ctx = _ctx(tmp_path) + # A bare dict (or None) is no longer accepted — only a HackbotAgentResult. + code = _finish(ctx, {"bugs_processed": 1}) + + assert code == 1 + summary = json.loads( + (tmp_path / "artifacts" / "local-test" / "summary.json").read_text() + ) + assert summary["status"] == "error" + assert "expected a HackbotAgentResult" in summary["error"] + + +def test_summary_written_for_agent_result(tmp_path): + class _Result(HackbotAgentResult): + bug_id: int + + ctx = _ctx(tmp_path) + code = _finish(ctx, _Result(bug_id=42, num_turns=3, total_cost_usd=0.12)) + + assert code == 0 + summary = json.loads( + (tmp_path / "artifacts" / "local-test" / "summary.json").read_text() + ) + assert summary["status"] == "ok" + assert summary["findings"] == { + "num_turns": 3, + "total_cost_usd": 0.12, + "bug_id": 42, + } + + +def test_finish_publishes_agent_log_when_written(tmp_path): + ctx = _ctx(tmp_path) + ctx.log_path.write_text("hello from the agent\n") + + _finish(ctx, HackbotAgentResult(num_turns=1)) + + published = tmp_path / "artifacts" / "local-test" / "logs" / "agent.log" + assert published.read_text() == "hello from the agent\n" + + +def test_finish_skips_log_when_none_written(tmp_path): + ctx = _ctx(tmp_path) # never touch ctx.log_path -> no file written + + _finish(ctx, HackbotAgentResult(num_turns=1)) + + assert not (tmp_path / "artifacts" / "local-test" / "logs" / "agent.log").exists() + + def test_runs_are_namespaced_by_run_id(tmp_path): ctx_a = _ctx(tmp_path, run_id="run-a") ctx_b = _ctx(tmp_path, run_id="run-b") - _finish(ctx_a, AgentResult(status="ok")) - _finish(ctx_b, AgentResult(status="error", error="x")) + _finish(ctx_a, HackbotAgentResult(num_turns=0)) + _finish(ctx_b, RuntimeError("x")) base = tmp_path / "artifacts" assert json.loads((base / "run-a" / "summary.json").read_text())["status"] == "ok" @@ -68,6 +119,92 @@ def test_runs_are_namespaced_by_run_id(tmp_path): ) +def _dummy_entry(ctx): # stand-in entrypoint for discovery tests + return None + + +def test_config_auto_discovered_from_cwd(tmp_path, monkeypatch): + (tmp_path / "hackbot.toml").write_text('[source]\nrepo_url = "https://x/y.git"\n') + monkeypatch.chdir(tmp_path) + + assert _discover_config_path(_dummy_entry) == tmp_path / "hackbot.toml" + cfg = _resolve_config(_dummy_entry, None) + assert cfg.source is not None + assert cfg.source.repo_url == "https://x/y.git" + + +def test_config_discovered_above_entrypoint_module(tmp_path, monkeypatch): + # Agent root holds hackbot.toml; the entry module lives below it (editable + # checkout). cwd has no toml, so discovery must walk up from the module. + agent_root = tmp_path / "agent" + pkg = agent_root / "mypkg" + pkg.mkdir(parents=True) + (pkg / "__init__.py").write_text("") + (pkg / "agent.py").write_text("def main(ctx):\n return None\n") + (agent_root / "hackbot.toml").write_text('[source]\nrepo_url = "https://a/b.git"\n') + + empty = tmp_path / "elsewhere" + empty.mkdir() + monkeypatch.chdir(empty) + monkeypatch.syspath_prepend(str(agent_root)) + from mypkg.agent import main # type: ignore + + assert _discover_config_path(main) == agent_root / "hackbot.toml" + + +def test_no_config_discovered_yields_empty(tmp_path, monkeypatch): + pkg = tmp_path / "barepkg" + pkg.mkdir() + (pkg / "__init__.py").write_text("") + (pkg / "agent.py").write_text("def main(ctx):\n return None\n") + + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(str(tmp_path)) + from barepkg.agent import main # type: ignore + + assert _discover_config_path(main) is None + cfg = _resolve_config(main, None) + assert cfg.source is None and cfg.firefox is None + + +def _run_env(tmp_path, monkeypatch): + # Make run_async write into tmp and discover no hackbot.toml. + monkeypatch.setenv("ARTIFACTS_DIR", str(tmp_path)) + monkeypatch.setenv("RUN_ID", "t") + monkeypatch.delenv("RESULTS_POLICY_URL", raising=False) + monkeypatch.chdir(tmp_path) # no hackbot.toml here + + +def test_run_async_exits_zero_and_writes_summary(tmp_path, monkeypatch): + _run_env(tmp_path, monkeypatch) + + async def main(ctx): + return HackbotAgentResult(num_turns=1) + + with pytest.raises(SystemExit) as exc: + run_async(main) + + assert exc.value.code == 0 + summary = json.loads((tmp_path / "t" / "summary.json").read_text()) + assert summary["status"] == "ok" + assert summary["findings"] == {"num_turns": 1, "total_cost_usd": None} + + +def test_run_async_exits_nonzero_when_agent_raises(tmp_path, monkeypatch): + _run_env(tmp_path, monkeypatch) + + async def main(ctx): + raise AgentError("nope") + + with pytest.raises(SystemExit) as exc: + run_async(main) + + assert exc.value.code == 1 + summary = json.loads((tmp_path / "t" / "summary.json").read_text()) + assert summary["status"] == "error" + assert "nope" in summary["error"] + + def test_publish_file_copies_locally_without_uploader(tmp_path): ctx = _ctx(tmp_path) log = tmp_path / "agent.log" diff --git a/libs/hackbot-runtime/tests/test_source.py b/libs/hackbot-runtime/tests/test_source.py new file mode 100644 index 0000000000..4bc83d30e3 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_source.py @@ -0,0 +1,47 @@ +"""Tests for ensure_source_repo (shallow git checkout helper).""" + +import subprocess +from pathlib import Path + +from hackbot_runtime import ensure_source_repo + + +def _make_remote(path: Path) -> None: + subprocess.run(["git", "init", "-q", str(path)], check=True) + (path / "README.md").write_text("hello") + subprocess.run(["git", "-C", str(path), "add", "."], check=True) + subprocess.run( + [ + "git", + "-C", + str(path), + "-c", + "user.email=t@example.com", + "-c", + "user.name=test", + "commit", + "-q", + "-m", + "init", + ], + check=True, + ) + + +def test_clones_when_absent(tmp_path): + remote = tmp_path / "remote" + _make_remote(remote) + dest = tmp_path / "dest" + ensure_source_repo(dest, f"file://{remote}") + assert (dest / ".git").is_dir() + assert (dest / "README.md").read_text() == "hello" + + +def test_idempotent_update_when_present(tmp_path): + remote = tmp_path / "remote" + _make_remote(remote) + dest = tmp_path / "dest" + ensure_source_repo(dest, f"file://{remote}") + # Second call takes the fetch + hard-reset branch and must still succeed. + ensure_source_repo(dest, f"file://{remote}") + assert (dest / "README.md").read_text() == "hello" diff --git a/pyproject.toml b/pyproject.toml index f8b9114397..b33ceef701 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,12 +71,6 @@ nlp = [ "spacy==3.8.14", ] nn = [] -# Tooling for the hackbot bug-fix agent (bugbug/tools/bug_fix). Not a base -# dependency: hackbot-runtime is a workspace-only package, so a standalone -# `pip install bugbug` must not require it. -bug-fix = [ - "hackbot-runtime[claude-sdk]", -] [dependency-groups] test = [ @@ -131,18 +125,13 @@ include = ["/bugbug", "/scripts", "/VERSION"] [tool.hatch.build.targets.wheel] packages = ["bugbug", "scripts"] -artifacts = [ - "bugbug/tools/bug_fix/prompts/", - "bugbug/tools/bug_fix/rules/", - "bugbug/tools/bug_fix/memory/", - "bugbug/tools/duplicate_bugs/prompts/", -] [tool.uv.workspace] -members = ["http_service", "services/hackbot-api", "agents/bug-fix", "libs/hackbot-runtime"] +members = ["http_service", "services/hackbot-api", "agents/bug-fix", "libs/hackbot-runtime", "libs/agent-tools"] [tool.uv.sources] hackbot-runtime = { workspace = true } +agent-tools = { workspace = true } [tool.ruff] extend-exclude = ["data"] diff --git a/scripts/run_bug_fix.py b/scripts/run_bug_fix.py deleted file mode 100644 index 9eb9e2961e..0000000000 --- a/scripts/run_bug_fix.py +++ /dev/null @@ -1,55 +0,0 @@ -"""Run the bug_fix tool locally.""" - -import asyncio -from pathlib import Path - -import bugsy -from pydantic_settings import BaseSettings, SettingsConfigDict - -from bugbug.tools.bug_fix.agent import BugFixTool -from bugbug.tools.bug_fix.bugzilla_mcp import BugzillaContext, build_server - - -class Settings(BaseSettings): - bug_id: int - bugzilla_api_url: str = "https://bugzilla.mozilla.org/rest" - bugzilla_api_key: str - source_repo: Path - model: str | None = None - max_turns: int | None = None - effort: str | None = None - - model_config = SettingsConfigDict( - cli_parse_args=True, - env_file=".env", - extra="ignore", - ) - - -async def main(): - settings = Settings() - - bugzilla_mcp_server = build_server( - BugzillaContext( - client=bugsy.Bugsy( - api_key=settings.bugzilla_api_key, - bugzilla_url=settings.bugzilla_api_url, - ), - ) - ) - - tool = BugFixTool.create() - result = await tool.run( - bugzilla_mcp_server=bugzilla_mcp_server, - source_repo=settings.source_repo, - model=settings.model, - max_turns=settings.max_turns, - effort=settings.effort, - bugs=[settings.bug_id], - verbose=True, - ) - print(f"\nexit_code={result.exit_code} bugs_processed={result.bugs_processed}") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/services/hackbot-api/Dockerfile b/services/hackbot-api/Dockerfile index f5a13479c7..4108d20f79 100644 --- a/services/hackbot-api/Dockerfile +++ b/services/hackbot-api/Dockerfile @@ -2,35 +2,30 @@ FROM python:3.12-slim AS builder COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -WORKDIR /app +ENV UV_PROJECT_ENVIRONMENT=/opt/venv -# Workspace metadata first so the dep-download layer caches independently -# of source changes. -COPY pyproject.toml uv.lock VERSION ./ -COPY http_service/pyproject.toml ./http_service/ -COPY services/hackbot-api/pyproject.toml ./services/hackbot-api/ -COPY agents/bug-fix/pyproject.toml ./agents/bug-fix/ -COPY libs/hackbot-runtime/pyproject.toml ./libs/hackbot-runtime/ +WORKDIR /app # Install external deps without building workspace members. RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev --no-install-workspace --package hackbot-api - -# Workspace member source. -COPY services/hackbot-api ./services/hackbot-api + --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ + --mount=type=bind,source=uv.lock,target=uv.lock \ + --mount=type=bind,source=VERSION,target=VERSION \ + uv sync --frozen --no-dev --no-install-workspace --package hackbot-api RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev --package hackbot-api + --mount=type=bind,target=/app,rw \ + uv sync --locked --no-dev --no-editable --package hackbot-api FROM python:3.12-slim AS base -COPY --from=builder /app /app -WORKDIR /app/services/hackbot-api +COPY --from=builder /opt/venv /opt/venv +WORKDIR /app ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 ENV PORT=8080 -ENV PATH="/app/.venv/bin:$PATH" +ENV PATH="/opt/venv/bin:$PATH" RUN useradd --create-home --shell /bin/bash app USER app diff --git a/services/hackbot-api/app/agents.py b/services/hackbot-api/app/agents.py index ad60d0f882..5802f6f511 100644 --- a/services/hackbot-api/app/agents.py +++ b/services/hackbot-api/app/agents.py @@ -1,3 +1,4 @@ +import json from collections.abc import Callable from dataclasses import dataclass @@ -12,26 +13,31 @@ class AgentSpec: description: str job_name: str input_schema: type[BaseModel] - build_env: Callable[[BaseModel], dict[str, str]] - - -def _bug_fix_env(inputs: BaseModel) -> dict[str, str]: - assert isinstance(inputs, BugFixInputs) - # The bug-fix agent's Job is multi-container: an `agent` container - # (no tokens) and a `broker` sidecar (holds BZ_API_KEY at deploy time - # via Secret Manager). The orchestrator only overrides the `agent` - # container's env per execution — the broker is fully configured at - # deploy time. The agent reaches the broker on the task's loopback. - env: dict[str, str] = { - "BUG_ID": str(inputs.bug_id), - "BUGZILLA_MCP_URL": "http://127.0.0.1:8765/mcp", - } - if inputs.model is not None: - env["MODEL"] = inputs.model - if inputs.max_turns is not None: - env["MAX_TURNS"] = str(inputs.max_turns) - if inputs.effort is not None: - env["EFFORT"] = inputs.effort + # Optional override for the rare agent whose env vars don't map 1:1 from + # its input schema. Defaults to ``model_to_env`` (field -> UPPER_SNAKE env). + build_env: Callable[[BaseModel], dict[str, str]] | None = None + + +def model_to_env(inputs: BaseModel) -> dict[str, str]: + """Serialise validated inputs into Cloud Run Job env overrides. + + Each schema field maps to an upper-cased env var (``bug_id`` -> ``BUG_ID``); + ``None`` fields are skipped, and the agent reads them back via + ``pydantic_settings.BaseSettings`` (which upper-cases field names by + default). Lists/dicts are JSON-encoded. Deploy-time constants (e.g. the + broker loopback URL) are NOT inputs — they belong in the Job's static env + config, not here. + """ + env: dict[str, str] = {} + for name, value in inputs.model_dump(mode="json").items(): + if value is None: + continue + if isinstance(value, str): + env[name.upper()] = value + elif isinstance(value, (list, dict)): + env[name.upper()] = json.dumps(value) + else: + env[name.upper()] = str(value) return env @@ -41,6 +47,5 @@ def _bug_fix_env(inputs: BaseModel) -> dict[str, str]: description="Investigate a Bugzilla bug and produce a candidate fix patch against the Firefox source tree.", job_name="hackbot-agent-bug-fix", input_schema=BugFixInputs, - build_env=_bug_fix_env, ), } diff --git a/services/hackbot-api/app/routers/runs.py b/services/hackbot-api/app/routers/runs.py index d4c4709924..2a4fd3af53 100644 --- a/services/hackbot-api/app/routers/runs.py +++ b/services/hackbot-api/app/routers/runs.py @@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app import gcs, jobs -from app.agents import AGENT_REGISTRY, AgentSpec +from app.agents import AGENT_REGISTRY, AgentSpec, model_to_env from app.auth import require_api_key from app.config import settings from app.database.connection import get_db @@ -82,7 +82,7 @@ async def create_run( "RESULTS_PREFIX": results_prefix, "RESULTS_POLICY_URL": policy["url"], "RESULTS_POLICY_FIELDS": json.dumps(policy["fields"]), - **agent.build_env(inputs), + **(agent.build_env or model_to_env)(inputs), } try: diff --git a/services/hackbot-api/pyproject.toml b/services/hackbot-api/pyproject.toml index 14a8a0b5d4..69f555c7e1 100644 --- a/services/hackbot-api/pyproject.toml +++ b/services/hackbot-api/pyproject.toml @@ -20,6 +20,13 @@ dependencies = [ [project.optional-dependencies] dev = ["pytest>=8.0.0", "pytest-asyncio>=0.23.0", "httpx>=0.26.0"] +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["app"] + [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] diff --git a/services/hackbot-api/tests/test_agents.py b/services/hackbot-api/tests/test_agents.py new file mode 100644 index 0000000000..c99c9d4689 --- /dev/null +++ b/services/hackbot-api/tests/test_agents.py @@ -0,0 +1,32 @@ +"""Tests for the agent registry and generic env serialization.""" + +from app.agents import AGENT_REGISTRY, model_to_env +from app.schemas import BugFixInputs + + +def test_model_to_env_uppercases_and_stringifies(): + env = model_to_env(BugFixInputs(bug_id=12345, model="claude-opus", max_turns=8)) + assert env["BUG_ID"] == "12345" + assert env["MODEL"] == "claude-opus" + assert env["MAX_TURNS"] == "8" + + +def test_model_to_env_skips_none_fields(): + env = model_to_env(BugFixInputs(bug_id=1)) + assert env == {"BUG_ID": "1"} + # Optional fields left unset must not leak as empty/"None" env vars. + assert "MODEL" not in env + assert "EFFORT" not in env + + +def test_model_to_env_does_not_emit_deploy_constants(): + # The broker loopback URL is static Job config, not a per-run input. + env = model_to_env(BugFixInputs(bug_id=1, model="x", max_turns=2, effort="high")) + assert "BUGZILLA_MCP_URL" not in env + + +def test_bug_fix_registry_uses_default_env_serializer(): + spec = AGENT_REGISTRY["bug-fix"] + # No hand-written build_env: the router falls back to model_to_env. + assert spec.build_env is None + assert spec.input_schema is BugFixInputs diff --git a/uv.lock b/uv.lock index bacaad9662..98b9d7bf13 100644 --- a/uv.lock +++ b/uv.lock @@ -18,6 +18,7 @@ resolution-markers = [ [manifest] members = [ + "agent-tools", "bugbug", "bugbug-http-service", "hackbot-agent-bug-fix", @@ -46,6 +47,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/a6/907a406bb7d359e6a63f99c313846d9eec4f7e6f7437809e03aa00fa3074/absl_py-2.4.0-py3-none-any.whl", hash = "sha256:88476fd881ca8aab94ffa78b7b6c632a782ab3ba1cd19c9bd423abc4fb4cd28d", size = 135750, upload-time = "2026-01-28T10:17:04.19Z" }, ] +[[package]] +name = "agent-tools" +version = "0.1.0" +source = { editable = "libs/agent-tools" } +dependencies = [ + { name = "pydantic" }, +] + +[package.optional-dependencies] +bugzilla = [ + { name = "bugsy" }, +] +claude-sdk = [ + { name = "claude-agent-sdk" }, +] +firefox = [ + { name = "grizzly-framework" }, + { name = "prefpicker" }, +] + +[package.metadata] +requires-dist = [ + { name = "bugsy", marker = "extra == 'bugzilla'" }, + { name = "claude-agent-sdk", marker = "extra == 'claude-sdk'", specifier = ">=0.1.30" }, + { name = "grizzly-framework", marker = "extra == 'firefox'" }, + { name = "prefpicker", marker = "extra == 'firefox'" }, + { name = "pydantic", specifier = ">=2.6.0" }, +] +provides-extras = ["bugzilla", "firefox", "claude-sdk"] + [[package]] name = "aiofiles" version = "0.8.0" @@ -561,9 +592,6 @@ dependencies = [ ] [package.optional-dependencies] -bug-fix = [ - { name = "hackbot-runtime", extra = ["claude-sdk"] }, -] nlp = [ { name = "spacy" }, ] @@ -595,7 +623,6 @@ requires-dist = [ { name = "beautifulsoup4", specifier = "~=4.14.3" }, { name = "boto3", specifier = ">=1.42.78,<1.44.0" }, { name = "claude-agent-sdk", specifier = ">=0.1.30" }, - { name = "hackbot-runtime", extras = ["claude-sdk"], marker = "extra == 'bug-fix'", editable = "libs/hackbot-runtime" }, { name = "httpx", specifier = "~=0.28.1" }, { name = "imbalanced-learn", specifier = "~=0.14.1" }, { name = "langchain", specifier = "~=1.2.13" }, @@ -642,7 +669,7 @@ requires-dist = [ { name = "xgboost", specifier = "~=3.2.0" }, { name = "zstandard", specifier = "~=0.25.0" }, ] -provides-extras = ["bug-fix", "nlp", "nn"] +provides-extras = ["nlp", "nn"] [package.metadata.requires-dev] spawn-pipeline = [ @@ -2107,28 +2134,24 @@ wheels = [ [[package]] name = "hackbot-agent-bug-fix" version = "0.1.0" -source = { virtual = "agents/bug-fix" } +source = { editable = "agents/bug-fix" } dependencies = [ - { name = "bugbug" }, + { name = "agent-tools", extra = ["bugzilla", "firefox"] }, { name = "bugsy" }, { name = "claude-agent-sdk" }, - { name = "grizzly-framework" }, { name = "hackbot-runtime", extra = ["claude-sdk"] }, { name = "mcp" }, - { name = "prefpicker" }, { name = "starlette" }, { name = "uvicorn" }, ] [package.metadata] requires-dist = [ - { name = "bugbug", editable = "." }, + { name = "agent-tools", extras = ["bugzilla", "firefox"], editable = "libs/agent-tools" }, { name = "bugsy" }, { name = "claude-agent-sdk", specifier = ">=0.1.30" }, - { name = "grizzly-framework" }, { name = "hackbot-runtime", extras = ["claude-sdk"], editable = "libs/hackbot-runtime" }, { name = "mcp", specifier = ">=1.0.0" }, - { name = "prefpicker" }, { name = "starlette", specifier = ">=0.36.0" }, { name = "uvicorn", specifier = ">=0.27.0" }, ] @@ -2136,7 +2159,7 @@ requires-dist = [ [[package]] name = "hackbot-api" version = "0.1.0" -source = { virtual = "services/hackbot-api" } +source = { editable = "services/hackbot-api" } dependencies = [ { name = "alembic" }, { name = "asyncpg" }, @@ -2182,17 +2205,21 @@ name = "hackbot-runtime" version = "0.1.0" source = { editable = "libs/hackbot-runtime" } dependencies = [ + { name = "agent-tools" }, { name = "pydantic-settings" }, { name = "requests" }, ] [package.optional-dependencies] claude-sdk = [ + { name = "agent-tools", extra = ["claude-sdk"] }, { name = "claude-agent-sdk" }, ] [package.metadata] requires-dist = [ + { name = "agent-tools", editable = "libs/agent-tools" }, + { name = "agent-tools", extras = ["claude-sdk"], marker = "extra == 'claude-sdk'", editable = "libs/agent-tools" }, { name = "claude-agent-sdk", marker = "extra == 'claude-sdk'", specifier = ">=0.1.30" }, { name = "pydantic-settings", specifier = ">=2.1.0" }, { name = "requests", specifier = ">=2.32.0" },