From e76da6a30b5d0e87e58e03b8740d0bb9a26a7849 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Tue, 9 Jun 2026 01:22:14 -0400 Subject: [PATCH 01/21] Simplify hackbot agent structure into self-contained agents// folders --- agents/README.md | 50 +++ agents/bug-fix/Dockerfile | 1 - agents/bug-fix/agent/__init__.py | 198 +++++++++++ .../bug-fix/agent}/config.py | 0 .../bug-fix/agent}/firefox_mcp.py | 2 +- .../bug-fix/agent}/firefox_tools/__init__.py | 0 .../agent}/firefox_tools/bootstrap_firefox.py | 0 .../agent}/firefox_tools/build_firefox.py | 0 .../agent}/firefox_tools/evaluate_testcase.py | 0 .../firefox_tools/js_shell_evaluator.py | 0 .../bug-fix/agent}/prompts/system.md | 0 .../bug-fix/agent}/rules/README.md | 0 .../agent}/rules/unsupported-config.md | 0 agents/bug-fix/agent_runner/__main__.py | 5 +- agents/bug-fix/broker/__main__.py | 5 +- agents/bug-fix/pyproject.toml | 2 - .../bug-fix/run_local.py | 19 +- bugbug/tools/bug_fix/__init__.py | 0 bugbug/tools/bug_fix/agent.py | 318 ------------------ bugbug/tools/duplicate_bugs/agent.py | 4 +- .../hackbot_runtime/actions/claude_sdk.py | 21 ++ .../hackbot-runtime/hackbot_runtime/claude.py | 125 +++++++ .../hackbot_runtime/mcp/__init__.py | 1 + .../hackbot_runtime/mcp/bugzilla.py | 0 libs/hackbot-runtime/pyproject.toml | 10 +- libs/hackbot-runtime/tests/test_claude.py | 36 ++ libs/hackbot-runtime/tests/test_claude_sdk.py | 26 +- pyproject.toml | 6 +- services/hackbot-api/app/agents.py | 47 +-- services/hackbot-api/app/routers/runs.py | 4 +- services/hackbot-api/tests/test_agents.py | 32 ++ uv.lock | 4 +- 32 files changed, 547 insertions(+), 369 deletions(-) create mode 100644 agents/README.md create mode 100644 agents/bug-fix/agent/__init__.py rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/config.py (100%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/firefox_mcp.py (99%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/firefox_tools/__init__.py (100%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/firefox_tools/bootstrap_firefox.py (100%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/firefox_tools/build_firefox.py (100%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/firefox_tools/evaluate_testcase.py (100%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/firefox_tools/js_shell_evaluator.py (100%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/prompts/system.md (100%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/rules/README.md (100%) rename {bugbug/tools/bug_fix => agents/bug-fix/agent}/rules/unsupported-config.md (100%) rename scripts/run_bug_fix.py => agents/bug-fix/run_local.py (63%) delete mode 100644 bugbug/tools/bug_fix/__init__.py delete mode 100644 bugbug/tools/bug_fix/agent.py create mode 100644 libs/hackbot-runtime/hackbot_runtime/claude.py create mode 100644 libs/hackbot-runtime/hackbot_runtime/mcp/__init__.py rename bugbug/tools/bug_fix/bugzilla_mcp.py => libs/hackbot-runtime/hackbot_runtime/mcp/bugzilla.py (100%) create mode 100644 libs/hackbot-runtime/tests/test_claude.py create mode 100644 services/hackbot-api/tests/test_agents.py diff --git a/agents/README.md b/agents/README.md new file mode 100644 index 0000000000..0a9bda6112 --- /dev/null +++ b/agents/README.md @@ -0,0 +1,50 @@ +# hackbot agents + +Each subdirectory here is **one self-contained hackbot agent** — its logic, +entrypoint, and deployment live together. `bug-fix/` is the reference example. + +## Anatomy of an agent (`agents//`) + +``` +agents// + pyproject.toml # package "hackbot-agent-"; deps: hackbot-runtime[claude-sdk] + agent-specific + Dockerfile # multi-stage: builder / agent [/ broker] + compose.yml # local run; sets static env (e.g. the broker URL) + agent_runner/ + __main__.py # AgentInputs(BaseSettings); async def main(ctx) -> AgentResult; run_async(main) + agent/ # the agent's brain: run_bug_fix()-style entrypoint + prompts/, rules/, MCP servers + broker/ # OPTIONAL: secret-holding MCP sidecar (e.g. holds the Bugzilla API key) + run_local.py # OPTIONAL: run without Docker/broker for quick iteration +``` + +`agent_runner` is the thin deployment wrapper the runtime invokes; `agent/` is +the actual logic. The runner does `from agent import run_` and passes +`ctx.actions` (the recorder) plus the validated inputs into it. + +## Shared building blocks (in `hackbot-runtime`) + +Don't re-implement these — import them: + +- `from hackbot_runtime import Context, AgentResult, run_async` — the entrypoint contract. +- `from hackbot_runtime.claude import Reporter` — renders streamed claude-agent-sdk + messages to stdout/log. Call `reporter.header("...")` per work item, `reporter.message(msg)` per message. +- `from hackbot_runtime.actions.claude_sdk import actions_server_for` — returns + `(recorder, mcp_server)`; write actions land in `summary.json` instead of mutating anything. +- `from hackbot_runtime.mcp.bugzilla import BugzillaContext, build_server` — read-only Bugzilla MCP. + +You still assemble your own `ClaudeAgentOptions` and drive the `ClaudeSDKClient` +loop — those stay explicit and in your hands. + +## Adding a new agent + +1. `agents//agent_runner/__main__.py` — define `AgentInputs(BaseSettings)`, + `async def main(ctx) -> AgentResult`, end with `raise SystemExit(run_async(main))`. +2. `agents//agent/` — your prompts/logic, exposing an async entrypoint. +3. Copy `pyproject.toml`, `Dockerfile`, `compose.yml` from `bug-fix/` and rename. +4. In `services/hackbot-api/app/schemas.py`, add a Pydantic input model. +5. In `services/hackbot-api/app/agents.py`, add one `AGENT_REGISTRY` entry + (`name` + `description` + `job_name` + `input_schema`). **No `build_env`** — + env vars are derived from the schema by `model_to_env` (field `bug_id` → `BUG_ID`). + Put deploy-time constants (broker URLs, etc.) in the Job's static env config, not the schema. + +That's it: one folder + one schema + one registry line. diff --git a/agents/bug-fix/Dockerfile b/agents/bug-fix/Dockerfile index 347a411b8e..f69769e71e 100644 --- a/agents/bug-fix/Dockerfile +++ b/agents/bug-fix/Dockerfile @@ -18,7 +18,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # Workspace members the agent image actually needs (source included). COPY agents/bug-fix ./agents/bug-fix -COPY bugbug ./bugbug COPY libs/hackbot-runtime ./libs/hackbot-runtime RUN --mount=type=cache,target=/root/.cache/uv \ diff --git a/agents/bug-fix/agent/__init__.py b/agents/bug-fix/agent/__init__.py new file mode 100644 index 0000000000..fa2ed05b96 --- /dev/null +++ b/agents/bug-fix/agent/__init__.py @@ -0,0 +1,198 @@ +"""Bug fix triage tool -- a Bugzilla triage agent. + +Orchestrates a Claude agent that triages bugs according to rulesets +in the rules/ directory. The agent has access to a source repository +and reaches Bugzilla via an out-of-process MCP broker (HTTP transport) +that holds the Bugzilla token — the agent process itself never sees it. +""" + +from __future__ import annotations + +import sys +from dataclasses import dataclass +from pathlib import Path + +from claude_agent_sdk import ( + AgentDefinition, + ClaudeAgentOptions, + ClaudeSDKClient, + McpServerConfig, + ResultMessage, +) +from hackbot_runtime import ActionsRecorder +from hackbot_runtime.actions.claude_sdk import actions_server_for +from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME +from hackbot_runtime.claude import Reporter + +from .config import ( + BUGZILLA_READ_TOOLS, + ENABLED_ACTION_TOOLS, + ENABLED_ACTION_TYPES, + FIREFOX_TOOLS, + SOURCE_WRITE_TOOLS, +) +from .firefox_mcp import FirefoxContext +from .firefox_mcp import build_server as build_firefox_server + +HERE = Path(__file__).resolve().parent + + +# --------------------------------------------------------------------------- # +# Result type +# --------------------------------------------------------------------------- # + + +@dataclass +class BugFixResult: + exit_code: int = 0 + bugs_processed: int = 0 + + +# --------------------------------------------------------------------------- # +# Prompts & agents +# --------------------------------------------------------------------------- # + + +def load_system_prompt(rules_dir: Path, extra: str) -> str: + tmpl = (HERE / "prompts" / "system.md").read_text() + + return tmpl.format( + rules_dir=str(rules_dir.resolve()), + extra_instructions=extra or "(none)", + ) + + +def make_investigator() -> AgentDefinition: + """Create a single generic investigator subagent definition.""" + return AgentDefinition( + description=( + "Focused investigator for answering a specific question about " + "a bug or the source tree. The main agent writes your complete " + "instructions at spawn time — follow them precisely and return " + "only what was asked for." + ), + prompt=( + "You are a focused investigator subagent. You will be given a " + "self-contained task by the triage agent. Complete it and return " + "a concise answer. Do not make Bugzilla modifications — you have " + "read-only access. Do not speculate beyond what you can verify." + ), + tools=[ + "Read", + "Grep", + "Glob", + "Bash", + *BUGZILLA_READ_TOOLS, + *FIREFOX_TOOLS, + ], + model="inherit", + ) + + +# --------------------------------------------------------------------------- # +# Agent entrypoint +# --------------------------------------------------------------------------- # + + +async def run_bug_fix( + *, + bugzilla_mcp_server: McpServerConfig, + source_repo: Path, + bugs: list[int], + instructions: str = "", + task: str | None = None, + rules_dir: Path | None = None, + newest_first: bool = False, + model: str | None = None, + max_turns: int | None = None, + effort: str | None = None, + verbose: bool = False, + log: Path | None = None, + actions_recorder: ActionsRecorder | None = None, +) -> BugFixResult: + """Triage and fix the given Bugzilla bug(s) with a claude-agent-sdk agent.""" + if rules_dir is None: + rules_dir = HERE / "rules" + + if not bugs: + print("[bug_fix] no bug ids supplied — nothing to do", file=sys.stderr) + return BugFixResult(exit_code=0) + + selected = sorted(bugs, reverse=newest_first) + print(f"[bug_fix] triaging {len(selected)} bug(s): {selected}", file=sys.stderr) + + # --- Firefox build/eval MCP server (in-process; no tokens) -------- # + fx_ctx = FirefoxContext.from_source_repo(source_repo) + firefox_server = build_firefox_server(fx_ctx) + + # --- Action-recording MCP server (in-process) --------------------- # + # Standalone/script runs pass actions_recorder=None and get a local + # recorder that copies attachments under ./artifacts (no uploader). + actions_recorder, actions_server = actions_server_for( + actions_recorder, types=ENABLED_ACTION_TYPES + ) + + # --- Build agent options ------------------------------------------ # + system_prompt = load_system_prompt(rules_dir, instructions) + + options = ClaudeAgentOptions( + system_prompt=system_prompt, + mcp_servers={ + "bugzilla": bugzilla_mcp_server, + "firefox": firefox_server, + ACTIONS_SERVER_NAME: actions_server, + }, + agents={"investigator": make_investigator()}, + cwd=str(source_repo.resolve()), + add_dirs=[str(rules_dir.resolve())], + permission_mode="bypassPermissions", + allowed_tools=[ + "Read", + "Grep", + "Glob", + "Bash", + "Task", + *SOURCE_WRITE_TOOLS, + *BUGZILLA_READ_TOOLS, + *ENABLED_ACTION_TOOLS, + *FIREFOX_TOOLS, + ], + model=model, + max_turns=max_turns, + **({"effort": effort} if effort else {}), + setting_sources=[], + ) + + # --- Run: one fresh agent context per bug ------------------------- # + exit_code = 0 + rules_path = rules_dir.resolve() + with Reporter(verbose=verbose, log_path=log) as reporter: + for i, bug_id in enumerate(selected, 1): + print(f"[bug_fix] bug {i}/{len(selected)}: {bug_id}", file=sys.stderr) + reporter.header(f"bug {bug_id}") + + if task: + user_prompt = ( + f"Bug to work on: {bug_id}\n\n" + f"Task: {task}\n\n" + f"The rules in {rules_path} are available if the task " + f"calls for them, but the task above is your primary " + f"directive — it overrides the default triage workflow." + ) + else: + user_prompt = ( + f"Triage bug {bug_id}.\n\n" + f"Consult the relevant rules in {rules_path}." + ) + + async with ClaudeSDKClient(options=options) as client: + await client.query(user_prompt) + async for msg in client.receive_response(): + reporter.message(msg) + if isinstance(msg, ResultMessage) and msg.is_error: + exit_code = 1 + + return BugFixResult( + exit_code=exit_code, + bugs_processed=len(selected), + ) diff --git a/bugbug/tools/bug_fix/config.py b/agents/bug-fix/agent/config.py similarity index 100% rename from bugbug/tools/bug_fix/config.py rename to agents/bug-fix/agent/config.py diff --git a/bugbug/tools/bug_fix/firefox_mcp.py b/agents/bug-fix/agent/firefox_mcp.py similarity index 99% rename from bugbug/tools/bug_fix/firefox_mcp.py rename to agents/bug-fix/agent/firefox_mcp.py index 08a553a585..6a030b291c 100644 --- a/bugbug/tools/bug_fix/firefox_mcp.py +++ b/agents/bug-fix/agent/firefox_mcp.py @@ -8,7 +8,7 @@ from claude_agent_sdk import create_sdk_mcp_server, tool -from bugbug.tools.bug_fix.firefox_tools import ( +from .firefox_tools import ( bootstrap_firefox, build_firefox, evaluate_testcase, diff --git a/bugbug/tools/bug_fix/firefox_tools/__init__.py b/agents/bug-fix/agent/firefox_tools/__init__.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/__init__.py rename to agents/bug-fix/agent/firefox_tools/__init__.py diff --git a/bugbug/tools/bug_fix/firefox_tools/bootstrap_firefox.py b/agents/bug-fix/agent/firefox_tools/bootstrap_firefox.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/bootstrap_firefox.py rename to agents/bug-fix/agent/firefox_tools/bootstrap_firefox.py diff --git a/bugbug/tools/bug_fix/firefox_tools/build_firefox.py b/agents/bug-fix/agent/firefox_tools/build_firefox.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/build_firefox.py rename to agents/bug-fix/agent/firefox_tools/build_firefox.py diff --git a/bugbug/tools/bug_fix/firefox_tools/evaluate_testcase.py b/agents/bug-fix/agent/firefox_tools/evaluate_testcase.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/evaluate_testcase.py rename to agents/bug-fix/agent/firefox_tools/evaluate_testcase.py diff --git a/bugbug/tools/bug_fix/firefox_tools/js_shell_evaluator.py b/agents/bug-fix/agent/firefox_tools/js_shell_evaluator.py similarity index 100% rename from bugbug/tools/bug_fix/firefox_tools/js_shell_evaluator.py rename to agents/bug-fix/agent/firefox_tools/js_shell_evaluator.py diff --git a/bugbug/tools/bug_fix/prompts/system.md b/agents/bug-fix/agent/prompts/system.md similarity index 100% rename from bugbug/tools/bug_fix/prompts/system.md rename to agents/bug-fix/agent/prompts/system.md diff --git a/bugbug/tools/bug_fix/rules/README.md b/agents/bug-fix/agent/rules/README.md similarity index 100% rename from bugbug/tools/bug_fix/rules/README.md rename to agents/bug-fix/agent/rules/README.md diff --git a/bugbug/tools/bug_fix/rules/unsupported-config.md b/agents/bug-fix/agent/rules/unsupported-config.md similarity index 100% rename from bugbug/tools/bug_fix/rules/unsupported-config.md rename to agents/bug-fix/agent/rules/unsupported-config.md diff --git a/agents/bug-fix/agent_runner/__main__.py b/agents/bug-fix/agent_runner/__main__.py index 0979e9d9d3..8fc2655795 100644 --- a/agents/bug-fix/agent_runner/__main__.py +++ b/agents/bug-fix/agent_runner/__main__.py @@ -75,15 +75,14 @@ def ensure_firefox_source(source_repo: Path) -> None: async def main(ctx: Context) -> AgentResult: - from bugbug.tools.bug_fix.agent import BugFixTool + from agent import run_bug_fix inputs = AgentInputs() ensure_firefox_source(inputs.source_repo) log_path = Path(tempfile.mkdtemp(prefix="bug-fix-log-")) / "agent.log" - tool = BugFixTool.create() - result = await tool.run( + result = await run_bug_fix( task="Triage and fix the bug, and verify the fix", bugzilla_mcp_server={ "type": "http", diff --git a/agents/bug-fix/broker/__main__.py b/agents/bug-fix/broker/__main__.py index 89b328b96b..58e65c66ad 100644 --- a/agents/bug-fix/broker/__main__.py +++ b/agents/bug-fix/broker/__main__.py @@ -11,14 +11,13 @@ import bugsy import uvicorn +from hackbot_runtime.mcp.bugzilla import BugzillaContext +from hackbot_runtime.mcp.bugzilla import build_server as build_bugzilla_server from mcp.server.streamable_http_manager import StreamableHTTPSessionManager from pydantic_settings import BaseSettings, SettingsConfigDict from starlette.applications import Starlette from starlette.routing import Mount -from bugbug.tools.bug_fix.bugzilla_mcp import BugzillaContext -from bugbug.tools.bug_fix.bugzilla_mcp import build_server as build_bugzilla_server - log = logging.getLogger("bugzilla-broker") diff --git a/agents/bug-fix/pyproject.toml b/agents/bug-fix/pyproject.toml index 6b84e190b5..1933cfcd04 100644 --- a/agents/bug-fix/pyproject.toml +++ b/agents/bug-fix/pyproject.toml @@ -4,7 +4,6 @@ version = "0.1.0" description = "Cloud Run Job image that runs the bug-fix agent for hackbot-api" requires-python = ">=3.12" dependencies = [ - "bugbug", "hackbot-runtime[claude-sdk]", "bugsy", "grizzly-framework", @@ -16,5 +15,4 @@ dependencies = [ ] [tool.uv.sources] -bugbug = { workspace = true } hackbot-runtime = { workspace = true } diff --git a/scripts/run_bug_fix.py b/agents/bug-fix/run_local.py similarity index 63% rename from scripts/run_bug_fix.py rename to agents/bug-fix/run_local.py index 9eb9e2961e..ca7149d178 100644 --- a/scripts/run_bug_fix.py +++ b/agents/bug-fix/run_local.py @@ -1,13 +1,23 @@ -"""Run the bug_fix tool locally.""" +"""Run the bug-fix agent locally, without Docker or the broker sidecar. + +Builds the read-only Bugzilla MCP server in-process, so this script sees the +Bugzilla API key directly — unlike the deployed agent, which reaches a broker +sidecar over HTTP and never holds the key. Handy for quick iteration; for a +faithful end-to-end run use ``docker compose -f compose.yml up``. +""" import asyncio +import sys from pathlib import Path import bugsy from pydantic_settings import BaseSettings, SettingsConfigDict -from bugbug.tools.bug_fix.agent import BugFixTool -from bugbug.tools.bug_fix.bugzilla_mcp import BugzillaContext, build_server +# Make the co-located `agent` package importable regardless of cwd. +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from agent import run_bug_fix # noqa: E402 +from hackbot_runtime.mcp.bugzilla import BugzillaContext, build_server # noqa: E402 class Settings(BaseSettings): @@ -38,8 +48,7 @@ async def main(): ) ) - tool = BugFixTool.create() - result = await tool.run( + result = await run_bug_fix( bugzilla_mcp_server=bugzilla_mcp_server, source_repo=settings.source_repo, model=settings.model, diff --git a/bugbug/tools/bug_fix/__init__.py b/bugbug/tools/bug_fix/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/bugbug/tools/bug_fix/agent.py b/bugbug/tools/bug_fix/agent.py deleted file mode 100644 index 169db340be..0000000000 --- a/bugbug/tools/bug_fix/agent.py +++ /dev/null @@ -1,318 +0,0 @@ -"""Bug fix triage tool -- a Bugzilla triage agent. - -Orchestrates a Claude agent that triages bugs according to rulesets -in the rules/ directory. The agent has access to a source repository -and reaches Bugzilla via an out-of-process MCP broker (HTTP transport) -that holds the Bugzilla token — the agent process itself never sees it. -""" - -from __future__ import annotations - -import json -import sys -from dataclasses import dataclass -from pathlib import Path - -from claude_agent_sdk import ( - AgentDefinition, - AssistantMessage, - ClaudeAgentOptions, - ClaudeSDKClient, - McpServerConfig, - ResultMessage, - SystemMessage, - TextBlock, - ThinkingBlock, - ToolResultBlock, - ToolUseBlock, - UserMessage, -) -from hackbot_runtime import ActionsRecorder -from hackbot_runtime.actions.claude_sdk import build_actions_sdk_server -from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME - -from bugbug.tools.base import GenerativeModelTool -from bugbug.tools.bug_fix.config import ( - BUGZILLA_READ_TOOLS, - ENABLED_ACTION_TOOLS, - ENABLED_ACTION_TYPES, - FIREFOX_TOOLS, - SOURCE_WRITE_TOOLS, -) -from bugbug.tools.bug_fix.firefox_mcp import FirefoxContext -from bugbug.tools.bug_fix.firefox_mcp import build_server as build_firefox_server - -HERE = Path(__file__).resolve().parent - - -# --------------------------------------------------------------------------- # -# Result type -# --------------------------------------------------------------------------- # - - -@dataclass -class BugFixResult: - exit_code: int = 0 - bugs_processed: int = 0 - - -# --------------------------------------------------------------------------- # -# Prompts & agents -# --------------------------------------------------------------------------- # - - -def load_system_prompt(rules_dir: Path, extra: str) -> str: - tmpl = (HERE / "prompts" / "system.md").read_text() - - return tmpl.format( - rules_dir=str(rules_dir.resolve()), - extra_instructions=extra or "(none)", - ) - - -def make_investigator() -> AgentDefinition: - """Create a single generic investigator subagent definition.""" - return AgentDefinition( - description=( - "Focused investigator for answering a specific question about " - "a bug or the source tree. The main agent writes your complete " - "instructions at spawn time — follow them precisely and return " - "only what was asked for." - ), - prompt=( - "You are a focused investigator subagent. You will be given a " - "self-contained task by the triage agent. Complete it and return " - "a concise answer. Do not make Bugzilla modifications — you have " - "read-only access. Do not speculate beyond what you can verify." - ), - tools=[ - "Read", - "Grep", - "Glob", - "Bash", - *BUGZILLA_READ_TOOLS, - *FIREFOX_TOOLS, - ], - model="inherit", - ) - - -# --------------------------------------------------------------------------- # -# Output streaming -# --------------------------------------------------------------------------- # - - -def _truncate(s: str, n: int = 500) -> str: - return s if len(s) <= n else s[:n] + f"... [{len(s) - n} more chars]" - - -class Reporter: - """Routes streamed agent messages to stdout and/or a log file.""" - - def __init__(self, verbose: bool, log_path: Path | None): - self.verbose = verbose - self._log = log_path.open("w", encoding="utf-8") if log_path else None - self._turn = 0 - - def __enter__(self): - return self - - def __exit__(self, *exc): - if self._log: - self._log.close() - - def start_bug(self, bug_id: int) -> None: - self._turn = 0 - header = f"\n{'#' * 60}\n# bug {bug_id}\n{'#' * 60}" - self._emit(header, always=True) - - def _emit(self, line: str, *, always: bool = False, full: str | None = None): - if self._log: - self._log.write((full if full is not None else line) + "\n") - self._log.flush() - if always or self.verbose: - print(line) - - def message(self, msg) -> None: - if isinstance(msg, AssistantMessage): - is_main = msg.parent_tool_use_id is None - label = "agent" if is_main else "subagent" - if is_main: - self._turn += 1 - self._emit(f"\n--- turn {self._turn} ---") - for block in msg.content: - if isinstance(block, TextBlock): - self._emit(f"\n[{label}] {block.text}", always=is_main) - elif isinstance(block, ThinkingBlock): - thinking = block.thinking.strip() - snippet = thinking.split("\n", 1)[0] - self._emit( - f"[{label}:thinking] {_truncate(snippet, 120)}", - full=f"[{label}:thinking]\n{thinking}", - ) - elif isinstance(block, ToolUseBlock): - inp = json.dumps(block.input, default=str) - inp_full = json.dumps(block.input, indent=2, default=str) - self._emit( - f"[{label}→tool] {block.name}({_truncate(inp, 300)})", - full=f"[{label}→tool] {block.name}\n{inp_full}", - ) - - elif isinstance(msg, UserMessage): - if isinstance(msg.content, list): - for block in msg.content: - if isinstance(block, ToolResultBlock): - marker = "ERROR" if block.is_error else "ok" - if isinstance(block.content, str): - text = block.content - elif isinstance(block.content, list): - parts = [ - c.get("text", "") - for c in block.content - if isinstance(c, dict) and c.get("type") == "text" - ] - text = "\n".join(parts) - else: - text = str(block.content) - self._emit( - f" [tool←{marker}] {_truncate(text, 400)}", - full=f" [tool←{marker}]\n{text}", - ) - - elif isinstance(msg, SystemMessage): - if msg.subtype == "init": - model = msg.data.get("model", "?") - self._emit(f"[system] session started (model={model})") - else: - data = json.dumps(msg.data, default=str) - self._emit( - f"[system:{msg.subtype}] {_truncate(data, 200)}", - full=f"[system:{msg.subtype}] {data}", - ) - - elif isinstance(msg, ResultMessage): - self._emit(f"\n{'=' * 60}", always=True) - if msg.total_cost_usd: - line = f"[done] turns={msg.num_turns} cost=${msg.total_cost_usd:.4f}" - else: - line = f"[done] turns={msg.num_turns}" - self._emit(line, always=True) - if msg.is_error: - self._emit(f"[done] ERROR: {msg.result}", always=True) - - -# --------------------------------------------------------------------------- # -# Tool class -# --------------------------------------------------------------------------- # - - -class BugFixTool(GenerativeModelTool): - """Bugzilla triage agent using claude-agent-sdk.""" - - @classmethod - def create(cls, **kwargs): - return cls() - - async def run( - self, - *, - bugzilla_mcp_server: McpServerConfig, - source_repo: Path, - bugs: list[int], - instructions: str = "", - task: str | None = None, - rules_dir: Path | None = None, - newest_first: bool = False, - model: str | None = None, - max_turns: int | None = None, - effort: str | None = None, - verbose: bool = False, - log: Path | None = None, - actions_recorder: ActionsRecorder | None = None, - ) -> BugFixResult: - if rules_dir is None: - rules_dir = HERE / "rules" - - if not bugs: - print("[bug_fix] no bug ids supplied — nothing to do", file=sys.stderr) - return BugFixResult(exit_code=0) - - selected = sorted(bugs, reverse=newest_first) - print(f"[bug_fix] triaging {len(selected)} bug(s): {selected}", file=sys.stderr) - - # --- Firefox build/eval MCP server (in-process; no tokens) -------- # - fx_ctx = FirefoxContext.from_source_repo(source_repo) - firefox_server = build_firefox_server(fx_ctx) - - # --- Action-recording MCP server (in-process) --------------------- # - if actions_recorder is None: - # Standalone/script runs have no uploader; copy attachments locally. - actions_recorder = ActionsRecorder(artifacts_dir=Path("artifacts")) - actions_server = build_actions_sdk_server( - actions_recorder, types=ENABLED_ACTION_TYPES - ) - - # --- Build agent options ------------------------------------------ # - system_prompt = load_system_prompt(rules_dir, instructions) - - options = ClaudeAgentOptions( - system_prompt=system_prompt, - mcp_servers={ - "bugzilla": bugzilla_mcp_server, - "firefox": firefox_server, - ACTIONS_SERVER_NAME: actions_server, - }, - agents={"investigator": make_investigator()}, - cwd=str(source_repo.resolve()), - add_dirs=[str(rules_dir.resolve())], - permission_mode="bypassPermissions", - allowed_tools=[ - "Read", - "Grep", - "Glob", - "Bash", - "Task", - *SOURCE_WRITE_TOOLS, - *BUGZILLA_READ_TOOLS, - *ENABLED_ACTION_TOOLS, - *FIREFOX_TOOLS, - ], - model=model, - max_turns=max_turns, - **({"effort": effort} if effort else {}), - setting_sources=[], - ) - - # --- Run: one fresh agent context per bug ------------------------- # - exit_code = 0 - rules_path = rules_dir.resolve() - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, bug_id in enumerate(selected, 1): - print(f"[bug_fix] bug {i}/{len(selected)}: {bug_id}", file=sys.stderr) - reporter.start_bug(bug_id) - - if task: - user_prompt = ( - f"Bug to work on: {bug_id}\n\n" - f"Task: {task}\n\n" - f"The rules in {rules_path} are available if the task " - f"calls for them, but the task above is your primary " - f"directive — it overrides the default triage workflow." - ) - else: - user_prompt = ( - f"Triage bug {bug_id}.\n\n" - f"Consult the relevant rules in {rules_path}." - ) - - async with ClaudeSDKClient(options=options) as client: - await client.query(user_prompt) - async for msg in client.receive_response(): - reporter.message(msg) - if isinstance(msg, ResultMessage) and msg.is_error: - exit_code = 1 - - return BugFixResult( - exit_code=exit_code, - bugs_processed=len(selected), - ) diff --git a/bugbug/tools/duplicate_bugs/agent.py b/bugbug/tools/duplicate_bugs/agent.py index 752c01ac4e..c1fcb7f34e 100644 --- a/bugbug/tools/duplicate_bugs/agent.py +++ b/bugbug/tools/duplicate_bugs/agent.py @@ -34,10 +34,10 @@ ToolUseBlock, UserMessage, ) +from hackbot_runtime.mcp.bugzilla import BugzillaContext +from hackbot_runtime.mcp.bugzilla import build_server as build_bugzilla_server from bugbug.tools.base import GenerativeModelTool -from bugbug.tools.bug_fix.bugzilla_mcp import BugzillaContext -from bugbug.tools.bug_fix.bugzilla_mcp import build_server as build_bugzilla_server from bugbug.tools.duplicate_bugs.config import ( BUGZILLA_READ_TOOLS, parse_dir_verdict, diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py index d4cac79e95..5312dc99ca 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py @@ -10,6 +10,8 @@ from __future__ import annotations +from pathlib import Path + from claude_agent_sdk import create_sdk_mcp_server, tool from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME, tool_name_for @@ -47,3 +49,22 @@ def build_actions_sdk_server( version="0.1.0", tools=[_make_tool(defn, recorder) for defn in get_actions(types)], ) + + +def actions_server_for( + recorder: ActionsRecorder | None, + types: list[str] | None = None, + *, + fallback_artifacts_dir: Path = Path("artifacts"), +): + """Return ``(recorder, sdk_server)`` ready to plug into ``ClaudeAgentOptions``. + + Convenience around :func:`build_actions_sdk_server` that supplies the common + fallback: standalone/script runs pass ``recorder=None`` and get a local + recorder that copies attachments under ``fallback_artifacts_dir`` (no + uploader). Agents running under the runtime pass ``ctx.actions`` and it is + used as-is. + """ + if recorder is None: + recorder = ActionsRecorder(artifacts_dir=fallback_artifacts_dir) + return recorder, build_actions_sdk_server(recorder, types=types) diff --git a/libs/hackbot-runtime/hackbot_runtime/claude.py b/libs/hackbot-runtime/hackbot_runtime/claude.py new file mode 100644 index 0000000000..da1d6c2234 --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/claude.py @@ -0,0 +1,125 @@ +"""Shared claude-agent-sdk helpers for hackbot agents. + +Generic, agent-neutral building blocks that every claude-agent-sdk agent would +otherwise copy verbatim. Agents still assemble their own ``ClaudeAgentOptions`` +and drive the ``ClaudeSDKClient`` loop — these just remove the boilerplate of +rendering the streamed messages. + +Requires the ``claude-sdk`` optional extra of hackbot-runtime. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from claude_agent_sdk import ( + AssistantMessage, + ResultMessage, + SystemMessage, + TextBlock, + ThinkingBlock, + ToolResultBlock, + ToolUseBlock, + UserMessage, +) + + +def _truncate(s: str, n: int = 500) -> str: + return s if len(s) <= n else s[:n] + f"... [{len(s) - n} more chars]" + + +class Reporter: + """Routes streamed claude-agent-sdk messages to stdout and/or a log file.""" + + def __init__(self, verbose: bool, log_path: Path | None): + self.verbose = verbose + self._log = log_path.open("w", encoding="utf-8") if log_path else None + self._turn = 0 + + def __enter__(self): + return self + + def __exit__(self, *exc): + if self._log: + self._log.close() + + def header(self, title: str) -> None: + """Emit a section header (e.g. ``"bug 12345"``) and reset the turn count.""" + self._turn = 0 + banner = f"\n{'#' * 60}\n# {title}\n{'#' * 60}" + self._emit(banner, always=True) + + def _emit(self, line: str, *, always: bool = False, full: str | None = None): + if self._log: + self._log.write((full if full is not None else line) + "\n") + self._log.flush() + if always or self.verbose: + print(line) + + def message(self, msg) -> None: + if isinstance(msg, AssistantMessage): + is_main = msg.parent_tool_use_id is None + label = "agent" if is_main else "subagent" + if is_main: + self._turn += 1 + self._emit(f"\n--- turn {self._turn} ---") + for block in msg.content: + if isinstance(block, TextBlock): + self._emit(f"\n[{label}] {block.text}", always=is_main) + elif isinstance(block, ThinkingBlock): + thinking = block.thinking.strip() + snippet = thinking.split("\n", 1)[0] + self._emit( + f"[{label}:thinking] {_truncate(snippet, 120)}", + full=f"[{label}:thinking]\n{thinking}", + ) + elif isinstance(block, ToolUseBlock): + inp = json.dumps(block.input, default=str) + inp_full = json.dumps(block.input, indent=2, default=str) + self._emit( + f"[{label}→tool] {block.name}({_truncate(inp, 300)})", + full=f"[{label}→tool] {block.name}\n{inp_full}", + ) + + elif isinstance(msg, UserMessage): + if isinstance(msg.content, list): + for block in msg.content: + if isinstance(block, ToolResultBlock): + marker = "ERROR" if block.is_error else "ok" + if isinstance(block.content, str): + text = block.content + elif isinstance(block.content, list): + parts = [ + c.get("text", "") + for c in block.content + if isinstance(c, dict) and c.get("type") == "text" + ] + text = "\n".join(parts) + else: + text = str(block.content) + self._emit( + f" [tool←{marker}] {_truncate(text, 400)}", + full=f" [tool←{marker}]\n{text}", + ) + + elif isinstance(msg, SystemMessage): + if msg.subtype == "init": + model = msg.data.get("model", "?") + self._emit(f"[system] session started (model={model})") + else: + data = json.dumps(msg.data, default=str) + self._emit( + f"[system:{msg.subtype}] {_truncate(data, 200)}", + full=f"[system:{msg.subtype}] {data}", + ) + + elif isinstance(msg, ResultMessage): + self._emit(f"\n{'=' * 60}", always=True) + if msg.total_cost_usd: + line = f"[done] turns={msg.num_turns} cost=${msg.total_cost_usd:.4f}" + else: + line = f"[done] turns={msg.num_turns}" + self._emit(line, always=True) + if msg.is_error: + self._emit(f"[done] ERROR: {msg.result}", always=True) diff --git a/libs/hackbot-runtime/hackbot_runtime/mcp/__init__.py b/libs/hackbot-runtime/hackbot_runtime/mcp/__init__.py new file mode 100644 index 0000000000..663427615d --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/mcp/__init__.py @@ -0,0 +1 @@ +"""In-process MCP servers for hackbot agents (claude-sdk extra).""" diff --git a/bugbug/tools/bug_fix/bugzilla_mcp.py b/libs/hackbot-runtime/hackbot_runtime/mcp/bugzilla.py similarity index 100% rename from bugbug/tools/bug_fix/bugzilla_mcp.py rename to libs/hackbot-runtime/hackbot_runtime/mcp/bugzilla.py diff --git a/libs/hackbot-runtime/pyproject.toml b/libs/hackbot-runtime/pyproject.toml index e05c460233..ab0c2e2ad2 100644 --- a/libs/hackbot-runtime/pyproject.toml +++ b/libs/hackbot-runtime/pyproject.toml @@ -9,10 +9,12 @@ dependencies = [ ] [project.optional-dependencies] -# claude-agent-sdk adapter (hackbot_runtime.actions.claude_sdk) that exposes -# the action registry as an in-process MCP server. Not needed by consumers -# that only read the summary contract. -claude-sdk = ["claude-agent-sdk>=0.1.30"] +# claude-agent-sdk building blocks: the action-registry MCP adapter +# (hackbot_runtime.actions.claude_sdk), the streamed-message Reporter +# (hackbot_runtime.claude), and the read-only Bugzilla MCP server +# (hackbot_runtime.mcp.bugzilla, which also needs bugsy). Not needed by +# consumers that only read the summary contract. +claude-sdk = ["claude-agent-sdk>=0.1.30", "bugsy"] [tool.pytest.ini_options] asyncio_mode = "auto" diff --git a/libs/hackbot-runtime/tests/test_claude.py b/libs/hackbot-runtime/tests/test_claude.py new file mode 100644 index 0000000000..0433437778 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_claude.py @@ -0,0 +1,36 @@ +"""Tests for the shared claude-agent-sdk Reporter (hackbot_runtime.claude).""" + +from hackbot_runtime.claude import Reporter, _truncate + + +def test_truncate_short_string_unchanged(): + assert _truncate("hello", 10) == "hello" + + +def test_truncate_long_string_marks_remainder(): + out = _truncate("x" * 20, 5) + assert out.startswith("xxxxx") + assert "15 more chars" in out + + +def test_header_writes_banner_to_log(tmp_path): + log = tmp_path / "agent.log" + with Reporter(verbose=False, log_path=log) as reporter: + reporter.header("bug 12345") + contents = log.read_text() + assert "# bug 12345" in contents + assert "#" * 60 in contents + + +def test_header_always_prints_even_when_not_verbose(capsys): + with Reporter(verbose=False, log_path=None) as reporter: + reporter.header("bug 999") + out = capsys.readouterr().out + assert "# bug 999" in out + + +def test_no_log_file_when_path_is_none(tmp_path): + # Should not raise and should not create any file. + with Reporter(verbose=True, log_path=None) as reporter: + reporter.header("section") + assert not list(tmp_path.iterdir()) diff --git a/libs/hackbot-runtime/tests/test_claude_sdk.py b/libs/hackbot-runtime/tests/test_claude_sdk.py index 9def348094..0c30aa336f 100644 --- a/libs/hackbot-runtime/tests/test_claude_sdk.py +++ b/libs/hackbot-runtime/tests/test_claude_sdk.py @@ -2,7 +2,10 @@ import mcp.server.lowlevel.server as low from hackbot_runtime.actions import ActionsRecorder -from hackbot_runtime.actions.claude_sdk import build_actions_sdk_server +from hackbot_runtime.actions.claude_sdk import ( + actions_server_for, + build_actions_sdk_server, +) from mcp.types import CallToolRequest, CallToolRequestParams, ListToolsRequest _ALL = [ @@ -84,3 +87,24 @@ async def test_missing_file_surfaces_is_error(): assert result.isError is True text = " ".join(getattr(c, "text", "") for c in result.content) assert "file not found" in text + + +def test_actions_server_for_creates_fallback_recorder(tmp_path): + recorder, config = actions_server_for( + None, types=_ALL, fallback_artifacts_dir=tmp_path + ) + assert isinstance(recorder, ActionsRecorder) + assert config["type"] == "sdk" + + +def test_actions_server_for_reuses_given_recorder(): + given = ActionsRecorder() + recorder, config = actions_server_for(given, types=_ALL) + assert recorder is given + assert config["type"] == "sdk" + + +async def test_actions_server_for_exposes_selected_tools(): + _, config = actions_server_for(ActionsRecorder(), types=["bugzilla.update_bug"]) + tools = await _list(config["instance"]) + assert {t.name for t in tools} == {"bugzilla_update_bug"} diff --git a/pyproject.toml b/pyproject.toml index f8b9114397..94fdc705e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,8 @@ nlp = [ "spacy==3.8.14", ] nn = [] -# Tooling for the hackbot bug-fix agent (bugbug/tools/bug_fix). Not a base +# hackbot runtime for bugbug tools that use it (e.g. tools/duplicate_bugs, +# which builds the Bugzilla MCP via hackbot_runtime.mcp.bugzilla). Not a base # dependency: hackbot-runtime is a workspace-only package, so a standalone # `pip install bugbug` must not require it. bug-fix = [ @@ -132,9 +133,6 @@ include = ["/bugbug", "/scripts", "/VERSION"] [tool.hatch.build.targets.wheel] packages = ["bugbug", "scripts"] artifacts = [ - "bugbug/tools/bug_fix/prompts/", - "bugbug/tools/bug_fix/rules/", - "bugbug/tools/bug_fix/memory/", "bugbug/tools/duplicate_bugs/prompts/", ] diff --git a/services/hackbot-api/app/agents.py b/services/hackbot-api/app/agents.py index ad60d0f882..5802f6f511 100644 --- a/services/hackbot-api/app/agents.py +++ b/services/hackbot-api/app/agents.py @@ -1,3 +1,4 @@ +import json from collections.abc import Callable from dataclasses import dataclass @@ -12,26 +13,31 @@ class AgentSpec: description: str job_name: str input_schema: type[BaseModel] - build_env: Callable[[BaseModel], dict[str, str]] - - -def _bug_fix_env(inputs: BaseModel) -> dict[str, str]: - assert isinstance(inputs, BugFixInputs) - # The bug-fix agent's Job is multi-container: an `agent` container - # (no tokens) and a `broker` sidecar (holds BZ_API_KEY at deploy time - # via Secret Manager). The orchestrator only overrides the `agent` - # container's env per execution — the broker is fully configured at - # deploy time. The agent reaches the broker on the task's loopback. - env: dict[str, str] = { - "BUG_ID": str(inputs.bug_id), - "BUGZILLA_MCP_URL": "http://127.0.0.1:8765/mcp", - } - if inputs.model is not None: - env["MODEL"] = inputs.model - if inputs.max_turns is not None: - env["MAX_TURNS"] = str(inputs.max_turns) - if inputs.effort is not None: - env["EFFORT"] = inputs.effort + # Optional override for the rare agent whose env vars don't map 1:1 from + # its input schema. Defaults to ``model_to_env`` (field -> UPPER_SNAKE env). + build_env: Callable[[BaseModel], dict[str, str]] | None = None + + +def model_to_env(inputs: BaseModel) -> dict[str, str]: + """Serialise validated inputs into Cloud Run Job env overrides. + + Each schema field maps to an upper-cased env var (``bug_id`` -> ``BUG_ID``); + ``None`` fields are skipped, and the agent reads them back via + ``pydantic_settings.BaseSettings`` (which upper-cases field names by + default). Lists/dicts are JSON-encoded. Deploy-time constants (e.g. the + broker loopback URL) are NOT inputs — they belong in the Job's static env + config, not here. + """ + env: dict[str, str] = {} + for name, value in inputs.model_dump(mode="json").items(): + if value is None: + continue + if isinstance(value, str): + env[name.upper()] = value + elif isinstance(value, (list, dict)): + env[name.upper()] = json.dumps(value) + else: + env[name.upper()] = str(value) return env @@ -41,6 +47,5 @@ def _bug_fix_env(inputs: BaseModel) -> dict[str, str]: description="Investigate a Bugzilla bug and produce a candidate fix patch against the Firefox source tree.", job_name="hackbot-agent-bug-fix", input_schema=BugFixInputs, - build_env=_bug_fix_env, ), } diff --git a/services/hackbot-api/app/routers/runs.py b/services/hackbot-api/app/routers/runs.py index d4c4709924..2a4fd3af53 100644 --- a/services/hackbot-api/app/routers/runs.py +++ b/services/hackbot-api/app/routers/runs.py @@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app import gcs, jobs -from app.agents import AGENT_REGISTRY, AgentSpec +from app.agents import AGENT_REGISTRY, AgentSpec, model_to_env from app.auth import require_api_key from app.config import settings from app.database.connection import get_db @@ -82,7 +82,7 @@ async def create_run( "RESULTS_PREFIX": results_prefix, "RESULTS_POLICY_URL": policy["url"], "RESULTS_POLICY_FIELDS": json.dumps(policy["fields"]), - **agent.build_env(inputs), + **(agent.build_env or model_to_env)(inputs), } try: diff --git a/services/hackbot-api/tests/test_agents.py b/services/hackbot-api/tests/test_agents.py new file mode 100644 index 0000000000..c99c9d4689 --- /dev/null +++ b/services/hackbot-api/tests/test_agents.py @@ -0,0 +1,32 @@ +"""Tests for the agent registry and generic env serialization.""" + +from app.agents import AGENT_REGISTRY, model_to_env +from app.schemas import BugFixInputs + + +def test_model_to_env_uppercases_and_stringifies(): + env = model_to_env(BugFixInputs(bug_id=12345, model="claude-opus", max_turns=8)) + assert env["BUG_ID"] == "12345" + assert env["MODEL"] == "claude-opus" + assert env["MAX_TURNS"] == "8" + + +def test_model_to_env_skips_none_fields(): + env = model_to_env(BugFixInputs(bug_id=1)) + assert env == {"BUG_ID": "1"} + # Optional fields left unset must not leak as empty/"None" env vars. + assert "MODEL" not in env + assert "EFFORT" not in env + + +def test_model_to_env_does_not_emit_deploy_constants(): + # The broker loopback URL is static Job config, not a per-run input. + env = model_to_env(BugFixInputs(bug_id=1, model="x", max_turns=2, effort="high")) + assert "BUGZILLA_MCP_URL" not in env + + +def test_bug_fix_registry_uses_default_env_serializer(): + spec = AGENT_REGISTRY["bug-fix"] + # No hand-written build_env: the router falls back to model_to_env. + assert spec.build_env is None + assert spec.input_schema is BugFixInputs diff --git a/uv.lock b/uv.lock index bacaad9662..9aebc9b008 100644 --- a/uv.lock +++ b/uv.lock @@ -2109,7 +2109,6 @@ name = "hackbot-agent-bug-fix" version = "0.1.0" source = { virtual = "agents/bug-fix" } dependencies = [ - { name = "bugbug" }, { name = "bugsy" }, { name = "claude-agent-sdk" }, { name = "grizzly-framework" }, @@ -2122,7 +2121,6 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "bugbug", editable = "." }, { name = "bugsy" }, { name = "claude-agent-sdk", specifier = ">=0.1.30" }, { name = "grizzly-framework" }, @@ -2188,11 +2186,13 @@ dependencies = [ [package.optional-dependencies] claude-sdk = [ + { name = "bugsy" }, { name = "claude-agent-sdk" }, ] [package.metadata] requires-dist = [ + { name = "bugsy", marker = "extra == 'claude-sdk'" }, { name = "claude-agent-sdk", marker = "extra == 'claude-sdk'", specifier = ">=0.1.30" }, { name = "pydantic-settings", specifier = ">=2.1.0" }, { name = "requests", specifier = ">=2.32.0" }, From 3a1079dd5ee35e9f8815f0649e23a811da70b0d3 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Tue, 9 Jun 2026 15:24:44 -0400 Subject: [PATCH 02/21] Remove unused load_config from bug-fix agent config --- agents/bug-fix/agent/config.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/agents/bug-fix/agent/config.py b/agents/bug-fix/agent/config.py index e03b637154..0cc18bfc8a 100644 --- a/agents/bug-fix/agent/config.py +++ b/agents/bug-fix/agent/config.py @@ -1,8 +1,3 @@ -from __future__ import annotations - -from pathlib import Path - -import yaml from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME, tool_name_for # Tools that can modify the source repo — blocked under dry-run. @@ -39,18 +34,3 @@ "mcp__firefox__evaluate_js_shell", "mcp__firefox__bootstrap_firefox", ] - -# Deployment-stable settings that may be supplied via config YAML. -_CONFIG_KEYS = {"base_url", "source_repo", "rules_dir", "model", "max_turns", "effort"} - - -def load_config(path: Path) -> dict: - with path.open() as f: - data = yaml.safe_load(f) or {} - unknown = set(data) - _CONFIG_KEYS - if unknown: - raise ValueError( - f"unknown config key(s) in {path}: {sorted(unknown)}\n" - f"allowed: {sorted(_CONFIG_KEYS)}" - ) - return data From 4f728658aecad08132a39b42b111ee8956e21fbb Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Tue, 9 Jun 2026 18:40:19 -0400 Subject: [PATCH 03/21] Add framework-neutral agent-tools package with unified @tool declarations --- agents/README.md | 7 +- agents/bug-fix/Dockerfile | 2 + agents/bug-fix/agent/__init__.py | 10 +- agents/bug-fix/agent/config.py | 2 +- agents/bug-fix/agent/firefox_mcp.py | 250 -------------- agents/bug-fix/broker/__main__.py | 7 +- agents/bug-fix/pyproject.toml | 4 +- agents/bug-fix/run_local.py | 10 +- bugbug/tools/duplicate_bugs/agent.py | 7 +- http_service/Dockerfile | 1 + http_service/Dockerfile.bg_worker | 1 + infra/dockerfile.base | 1 + infra/dockerfile.spawn_pipeline | 1 + libs/agent-tools/agent_tools/__init__.py | 14 + libs/agent-tools/agent_tools/bugzilla.py | 251 ++++++++++++++ libs/agent-tools/agent_tools/claude_sdk.py | 63 ++++ .../agent_tools/firefox/__init__.py | 200 +++++++++++ .../agent_tools/firefox/tools}/__init__.py | 0 .../firefox/tools}/bootstrap_firefox.py | 0 .../firefox/tools}/build_firefox.py | 0 .../firefox/tools}/evaluate_testcase.py | 0 .../firefox/tools}/js_shell_evaluator.py | 0 libs/agent-tools/agent_tools/registry.py | 109 ++++++ libs/agent-tools/pyproject.toml | 17 + libs/agent-tools/tests/test_bugzilla.py | 54 +++ libs/agent-tools/tests/test_firefox.py | 26 ++ libs/agent-tools/tests/test_registry.py | 64 ++++ .../hackbot_runtime/__init__.py | 12 +- .../hackbot_runtime/actions/__init__.py | 27 +- .../hackbot_runtime/actions/bugzilla.py | 81 ++--- .../hackbot_runtime/actions/claude_sdk.py | 68 ++-- .../hackbot_runtime/actions/naming.py | 15 - .../hackbot_runtime/actions/registry.py | 63 ---- .../hackbot_runtime/context.py | 2 +- .../hackbot_runtime/mcp/__init__.py | 1 - .../hackbot_runtime/mcp/bugzilla.py | 322 ------------------ libs/hackbot-runtime/pyproject.toml | 11 +- .../tests/test_bugzilla_actions.py | 5 +- libs/hackbot-runtime/tests/test_claude_sdk.py | 9 +- libs/hackbot-runtime/tests/test_registry.py | 47 --- pyproject.toml | 11 +- services/hackbot-api/Dockerfile | 1 + uv.lock | 47 ++- 43 files changed, 952 insertions(+), 871 deletions(-) delete mode 100644 agents/bug-fix/agent/firefox_mcp.py create mode 100644 libs/agent-tools/agent_tools/__init__.py create mode 100644 libs/agent-tools/agent_tools/bugzilla.py create mode 100644 libs/agent-tools/agent_tools/claude_sdk.py create mode 100644 libs/agent-tools/agent_tools/firefox/__init__.py rename {agents/bug-fix/agent/firefox_tools => libs/agent-tools/agent_tools/firefox/tools}/__init__.py (100%) rename {agents/bug-fix/agent/firefox_tools => libs/agent-tools/agent_tools/firefox/tools}/bootstrap_firefox.py (100%) rename {agents/bug-fix/agent/firefox_tools => libs/agent-tools/agent_tools/firefox/tools}/build_firefox.py (100%) rename {agents/bug-fix/agent/firefox_tools => libs/agent-tools/agent_tools/firefox/tools}/evaluate_testcase.py (100%) rename {agents/bug-fix/agent/firefox_tools => libs/agent-tools/agent_tools/firefox/tools}/js_shell_evaluator.py (100%) create mode 100644 libs/agent-tools/agent_tools/registry.py create mode 100644 libs/agent-tools/pyproject.toml create mode 100644 libs/agent-tools/tests/test_bugzilla.py create mode 100644 libs/agent-tools/tests/test_firefox.py create mode 100644 libs/agent-tools/tests/test_registry.py delete mode 100644 libs/hackbot-runtime/hackbot_runtime/actions/naming.py delete mode 100644 libs/hackbot-runtime/hackbot_runtime/actions/registry.py delete mode 100644 libs/hackbot-runtime/hackbot_runtime/mcp/__init__.py delete mode 100644 libs/hackbot-runtime/hackbot_runtime/mcp/bugzilla.py delete mode 100644 libs/hackbot-runtime/tests/test_registry.py diff --git a/agents/README.md b/agents/README.md index 0a9bda6112..0b35b57558 100644 --- a/agents/README.md +++ b/agents/README.md @@ -30,7 +30,12 @@ Don't re-implement these — import them: messages to stdout/log. Call `reporter.header("...")` per work item, `reporter.message(msg)` per message. - `from hackbot_runtime.actions.claude_sdk import actions_server_for` — returns `(recorder, mcp_server)`; write actions land in `summary.json` instead of mutating anything. -- `from hackbot_runtime.mcp.bugzilla import BugzillaContext, build_server` — read-only Bugzilla MCP. + +Reusable MCP **tool servers** live in the separate `agent-tools` package, each +behind its own optional extra (`agent-tools[bugzilla]`, `agent-tools[firefox]`): + +- `from agent_tools.bugzilla import BugzillaContext, build_server` — read-only Bugzilla MCP. +- `from agent_tools.firefox import FirefoxContext, build_server` — Firefox build/test MCP. You still assemble your own `ClaudeAgentOptions` and drive the `ClaudeSDKClient` loop — those stay explicit and in your hands. diff --git a/agents/bug-fix/Dockerfile b/agents/bug-fix/Dockerfile index f69769e71e..57c829574a 100644 --- a/agents/bug-fix/Dockerfile +++ b/agents/bug-fix/Dockerfile @@ -11,6 +11,7 @@ COPY http_service/pyproject.toml ./http_service/ COPY services/hackbot-api/pyproject.toml ./services/hackbot-api/ COPY agents/bug-fix/pyproject.toml ./agents/bug-fix/ COPY libs/hackbot-runtime/pyproject.toml ./libs/hackbot-runtime/ +COPY libs/agent-tools/pyproject.toml ./libs/agent-tools/ # Install external deps without building workspace members. RUN --mount=type=cache,target=/root/.cache/uv \ @@ -19,6 +20,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # Workspace members the agent image actually needs (source included). COPY agents/bug-fix ./agents/bug-fix COPY libs/hackbot-runtime ./libs/hackbot-runtime +COPY libs/agent-tools ./libs/agent-tools RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --locked --no-dev --package hackbot-agent-bug-fix diff --git a/agents/bug-fix/agent/__init__.py b/agents/bug-fix/agent/__init__.py index fa2ed05b96..100a44a7fc 100644 --- a/agents/bug-fix/agent/__init__.py +++ b/agents/bug-fix/agent/__init__.py @@ -12,6 +12,9 @@ from dataclasses import dataclass from pathlib import Path +from agent_tools import firefox +from agent_tools.claude_sdk import build_sdk_server +from agent_tools.registry import ACTIONS_SERVER_NAME from claude_agent_sdk import ( AgentDefinition, ClaudeAgentOptions, @@ -21,7 +24,6 @@ ) from hackbot_runtime import ActionsRecorder from hackbot_runtime.actions.claude_sdk import actions_server_for -from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME from hackbot_runtime.claude import Reporter from .config import ( @@ -31,8 +33,6 @@ FIREFOX_TOOLS, SOURCE_WRITE_TOOLS, ) -from .firefox_mcp import FirefoxContext -from .firefox_mcp import build_server as build_firefox_server HERE = Path(__file__).resolve().parent @@ -122,8 +122,8 @@ async def run_bug_fix( print(f"[bug_fix] triaging {len(selected)} bug(s): {selected}", file=sys.stderr) # --- Firefox build/eval MCP server (in-process; no tokens) -------- # - fx_ctx = FirefoxContext.from_source_repo(source_repo) - firefox_server = build_firefox_server(fx_ctx) + fx_ctx = firefox.FirefoxContext.from_source_repo(source_repo) + firefox_server = build_sdk_server("firefox", fx_ctx, firefox.TOOLS) # --- Action-recording MCP server (in-process) --------------------- # # Standalone/script runs pass actions_recorder=None and get a local diff --git a/agents/bug-fix/agent/config.py b/agents/bug-fix/agent/config.py index 0cc18bfc8a..948b0001de 100644 --- a/agents/bug-fix/agent/config.py +++ b/agents/bug-fix/agent/config.py @@ -1,4 +1,4 @@ -from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME, tool_name_for +from agent_tools.registry import ACTIONS_SERVER_NAME, tool_name_for # Tools that can modify the source repo — blocked under dry-run. SOURCE_WRITE_TOOLS = {"Write", "Edit", "MultiEdit", "NotebookEdit"} diff --git a/agents/bug-fix/agent/firefox_mcp.py b/agents/bug-fix/agent/firefox_mcp.py deleted file mode 100644 index 6a030b291c..0000000000 --- a/agents/bug-fix/agent/firefox_mcp.py +++ /dev/null @@ -1,250 +0,0 @@ -"""In-process MCP server for Firefox build + testcase evaluation.""" - -from __future__ import annotations - -import json -from dataclasses import dataclass -from pathlib import Path - -from claude_agent_sdk import create_sdk_mcp_server, tool - -from .firefox_tools import ( - bootstrap_firefox, - build_firefox, - evaluate_testcase, - js_shell_evaluator, -) - - -@dataclass -class FirefoxContext: - """Firefox-related paths, derived from --source-repo at startup. - - Defaults follow: mozconfig at the source root, objdir-ff-asan/ under it. The - agent can still override firefox_binary per-call if it wants to test a - different build. - """ - - source_dir: Path - mozconfig: Path - objdir: Path - binary: Path - js_binary: Path - - @classmethod - def from_source_repo(cls, source_repo: Path) -> "FirefoxContext": - src = source_repo.resolve() - objdir = src / "objdir-ff-asan" - return cls( - source_dir=src, - mozconfig=src / ".mozconfig", - objdir=objdir, - binary=objdir / "dist" / "bin" / "firefox", - js_binary=objdir / "dist" / "bin" / "js", - ) - - -def _jtext(obj) -> dict: - return {"content": [{"type": "text", "text": json.dumps(obj, indent=2)}]} - - -def build_server(ctx: FirefoxContext): - """Create the in-process Firefox MCP server bound to ``ctx``.""" - - @tool( - "evaluate_testcase", - "Run a testcase in Firefox under xvfb and capture crash output via " - "grizzly. The build's sanitizer configuration (ASAN, TSAN, plain " - "debug, etc.) is whatever the configured mozconfig produces. " - "Returns JSON: " - "crashed (bool) — whether Firefox crashed; " - "crashed_parent (bool) — parent process vs content process crash; " - "logs (dict) — stderr/stdout and, if crashed, crashdata (crash/" - "sanitizer report); " - "files (dict) — the testcase bundle that triggered the crash; " - "message (str) — human-readable summary. " - "When crashed=false, logs.stderr/stdout often reveal why the trigger " - "missed (JS exception, wrong pref, feature gated off).", - { - "type": "object", - "properties": { - "content": { - "type": "string", - "description": "Testcase file content (HTML, JS, SVG, etc.)", - }, - "filename": { - "type": "string", - "description": ( - "Name for the testcase entry point, e.g. 'test.html'. " - "Extension matters: grizzly serves it with the matching " - "MIME type." - ), - }, - "firefox_binary": { - "type": "string", - "description": ( - f"Path to Firefox binary. Optional — defaults to {ctx.binary}" - ), - }, - "timeout": { - "type": "integer", - "description": "Seconds to wait for a crash (default: 30)", - }, - "prefs": { - "type": "object", - "description": ( - "Firefox about:config prefs to set before launch, e.g. " - '{"dom.webgpu.enabled": true}. Use this to unlock ' - "gated features your testcase needs." - ), - "additionalProperties": { - "oneOf": [ - {"type": "string"}, - {"type": "integer"}, - {"type": "boolean"}, - ] - }, - }, - }, - "required": ["content", "filename"], - }, - ) - async def evaluate_testcase_tool(args): - binary = Path(args.get("firefox_binary") or ctx.binary) - crash_info = await evaluate_testcase( - content=args["content"], - filename=args["filename"], - firefox_binary=binary, - timeout=args.get("timeout", 30), - prefs=args.get("prefs") or {}, - ) - return _jtext(crash_info) - - @tool( - "build_firefox", - "Build Firefox using the configured mozconfig. Slow (tens of " - "minutes on a cold build, faster incremental). Returns JSON: " - "success (bool), build_dir (str), message (str), stdout/stderr. " - "Only call this if you've changed source or the binary is missing — " - "check if the binary exists first.", - { - "type": "object", - "properties": { - "firefox_dir": { - "type": "string", - "description": ( - "Firefox source directory. Optional — defaults to " - f"{ctx.source_dir}" - ), - }, - "mozconfig_path": { - "type": "string", - "description": ( - f"MOZCONFIG to use. Optional — defaults to {ctx.mozconfig}" - ), - }, - }, - }, - ) - async def build_firefox_tool(args): - firefox_dir = ( - Path(args["firefox_dir"]) if "firefox_dir" in args else ctx.source_dir - ) - mozconfig = ( - Path(args["mozconfig_path"]) if "mozconfig_path" in args else ctx.mozconfig - ) - result = await build_firefox(firefox_dir, mozconfig, ctx.objdir) - return _jtext(result) - - @tool( - "evaluate_js_shell", - "Run a JS testcase in the SpiderMonkey shell and capture crash " - "output. The shell's sanitizer configuration is whatever the " - "configured mozconfig produces. Much faster than full-browser " - "evaluate_testcase — use this for engine-level bugs (JIT, GC, " - "TypedArrays, WASM) that don't need a DOM. Returns JSON: " - "crashed (bool) — whether the shell crashed (signal or sanitizer); " - "message (str) — human-readable summary, includes signal name if killed; " - "logs (dict) — stderr/stdout (tail-truncated to 1 MB) and, if crashed, " - "crashdata (crash/sanitizer report); " - "files (dict) — the .js testcase that triggered the crash. " - "A nonzero exit without a signal is a JS exception, NOT a crash — " - "check logs.stderr for the syntax/runtime error.", - { - "type": "object", - "properties": { - "content": { - "type": "string", - "description": "JavaScript testcase source", - }, - "js_binary": { - "type": "string", - "description": ( - "Path to the SpiderMonkey js binary. Optional — " - f"defaults to {ctx.js_binary}" - ), - }, - "timeout": { - "type": "integer", - "description": "Seconds to wait before killing the shell (default: 30)", - }, - "flags": { - "type": "array", - "items": {"type": "string"}, - "description": ( - 'Extra shell flags, e.g. ["--no-threads", ' - '"--ion-eager"]. --fuzzing-safe is always prepended.' - ), - }, - }, - "required": ["content"], - }, - ) - async def evaluate_js_shell_tool(args): - binary = Path(args.get("js_binary") or ctx.js_binary) - crash_info = await js_shell_evaluator( - content=args["content"], - js_binary=binary, - timeout=args.get("timeout", 30), - flags=args.get("flags"), - ) - return _jtext(crash_info) - - @tool( - "bootstrap_firefox", - "Run `./mach bootstrap` to install the Firefox build toolchain " - "(rust, clang, cbindgen) under the running user's ~/.mozbuild/. " - "Required before a full (non-artifact) build. Slow — ~10-15 min on a " - "fresh image, fast on re-runs. Returns JSON: success, message, " - "stdout, stderr. Only call this if you intend to do a full build; " - "artifact builds don't need bootstrap.", - { - "type": "object", - "properties": { - "firefox_dir": { - "type": "string", - "description": ( - "Firefox source directory. Optional — defaults to " - f"{ctx.source_dir}" - ), - }, - }, - }, - ) - async def bootstrap_firefox_tool(args): - firefox_dir = ( - Path(args["firefox_dir"]) if "firefox_dir" in args else ctx.source_dir - ) - result = await bootstrap_firefox(firefox_dir) - return _jtext(result) - - return create_sdk_mcp_server( - name="firefox", - version="0.1.0", - tools=[ - evaluate_testcase_tool, - build_firefox_tool, - evaluate_js_shell_tool, - bootstrap_firefox_tool, - ], - ) diff --git a/agents/bug-fix/broker/__main__.py b/agents/bug-fix/broker/__main__.py index 58e65c66ad..f10e69a637 100644 --- a/agents/bug-fix/broker/__main__.py +++ b/agents/bug-fix/broker/__main__.py @@ -11,8 +11,9 @@ import bugsy import uvicorn -from hackbot_runtime.mcp.bugzilla import BugzillaContext -from hackbot_runtime.mcp.bugzilla import build_server as build_bugzilla_server +from agent_tools import bugzilla +from agent_tools.bugzilla import BugzillaContext +from agent_tools.claude_sdk import build_sdk_server from mcp.server.streamable_http_manager import StreamableHTTPSessionManager from pydantic_settings import BaseSettings, SettingsConfigDict from starlette.applications import Starlette @@ -35,7 +36,7 @@ def build_app(inputs: BrokerInputs) -> Starlette: api_key=inputs.bugzilla_api_key, bugzilla_url=inputs.bugzilla_api_url ) ctx = BugzillaContext(client=client) - sdk_config = build_bugzilla_server(ctx) + sdk_config = build_sdk_server("bugzilla", ctx, bugzilla.TOOLS) mcp_server = sdk_config["instance"] manager = StreamableHTTPSessionManager(app=mcp_server, stateless=True) diff --git a/agents/bug-fix/pyproject.toml b/agents/bug-fix/pyproject.toml index 1933cfcd04..93e4ed8f0d 100644 --- a/agents/bug-fix/pyproject.toml +++ b/agents/bug-fix/pyproject.toml @@ -5,9 +5,8 @@ description = "Cloud Run Job image that runs the bug-fix agent for hackbot-api" requires-python = ">=3.12" dependencies = [ "hackbot-runtime[claude-sdk]", + "agent-tools[bugzilla,firefox]", "bugsy", - "grizzly-framework", - "prefpicker", "claude-agent-sdk>=0.1.30", "mcp>=1.0.0", "starlette>=0.36.0", @@ -16,3 +15,4 @@ dependencies = [ [tool.uv.sources] hackbot-runtime = { workspace = true } +agent-tools = { workspace = true } diff --git a/agents/bug-fix/run_local.py b/agents/bug-fix/run_local.py index ca7149d178..5627b88aaf 100644 --- a/agents/bug-fix/run_local.py +++ b/agents/bug-fix/run_local.py @@ -17,7 +17,9 @@ sys.path.insert(0, str(Path(__file__).resolve().parent)) from agent import run_bug_fix # noqa: E402 -from hackbot_runtime.mcp.bugzilla import BugzillaContext, build_server # noqa: E402 +from agent_tools import bugzilla # noqa: E402 +from agent_tools.bugzilla import BugzillaContext # noqa: E402 +from agent_tools.claude_sdk import build_sdk_server # noqa: E402 class Settings(BaseSettings): @@ -39,13 +41,15 @@ class Settings(BaseSettings): async def main(): settings = Settings() - bugzilla_mcp_server = build_server( + bugzilla_mcp_server = build_sdk_server( + "bugzilla", BugzillaContext( client=bugsy.Bugsy( api_key=settings.bugzilla_api_key, bugzilla_url=settings.bugzilla_api_url, ), - ) + ), + bugzilla.TOOLS, ) result = await run_bug_fix( diff --git a/bugbug/tools/duplicate_bugs/agent.py b/bugbug/tools/duplicate_bugs/agent.py index c1fcb7f34e..35fd3015d0 100644 --- a/bugbug/tools/duplicate_bugs/agent.py +++ b/bugbug/tools/duplicate_bugs/agent.py @@ -22,6 +22,9 @@ from pathlib import Path import bugsy +from agent_tools import bugzilla as bugzilla_tools +from agent_tools.bugzilla import BugzillaContext +from agent_tools.claude_sdk import build_sdk_server from claude_agent_sdk import ( AssistantMessage, ClaudeAgentOptions, @@ -34,8 +37,6 @@ ToolUseBlock, UserMessage, ) -from hackbot_runtime.mcp.bugzilla import BugzillaContext -from hackbot_runtime.mcp.bugzilla import build_server as build_bugzilla_server from bugbug.tools.base import GenerativeModelTool from bugbug.tools.duplicate_bugs.config import ( @@ -499,7 +500,7 @@ async def run( bz = bugsy.Bugsy(api_key=api_key, bugzilla_url=base_url) bz_ctx = BugzillaContext(client=bz) - bugzilla_server = build_bugzilla_server(bz_ctx) + bugzilla_server = build_sdk_server("bugzilla", bz_ctx, bugzilla_tools.TOOLS) if mode == "local": if local_dir is None: diff --git a/http_service/Dockerfile b/http_service/Dockerfile index 13c8c12d17..5f7f312a1a 100644 --- a/http_service/Dockerfile +++ b/http_service/Dockerfile @@ -10,6 +10,7 @@ RUN --mount=type=bind,source=pyproject.toml,target=/tmp/workspace/pyproject.toml --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/workspace/services/hackbot-api/pyproject.toml \ --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/workspace/agents/bug-fix/pyproject.toml \ --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/workspace/libs/hackbot-runtime/pyproject.toml \ + --mount=type=bind,source=libs/agent-tools/pyproject.toml,target=/tmp/workspace/libs/agent-tools/pyproject.toml \ cd /tmp/workspace && uv sync --locked --no-dev --package bugbug-http-service --no-install-project # Setup http service as package diff --git a/http_service/Dockerfile.bg_worker b/http_service/Dockerfile.bg_worker index 630508848e..ebee605271 100644 --- a/http_service/Dockerfile.bg_worker +++ b/http_service/Dockerfile.bg_worker @@ -10,6 +10,7 @@ RUN --mount=type=bind,source=pyproject.toml,target=/tmp/workspace/pyproject.toml --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/workspace/services/hackbot-api/pyproject.toml \ --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/workspace/agents/bug-fix/pyproject.toml \ --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/workspace/libs/hackbot-runtime/pyproject.toml \ + --mount=type=bind,source=libs/agent-tools/pyproject.toml,target=/tmp/workspace/libs/agent-tools/pyproject.toml \ cd /tmp/workspace && uv sync --locked --no-dev --package bugbug-http-service --no-install-project # Setup http service as package diff --git a/infra/dockerfile.base b/infra/dockerfile.base index 461156dbd4..57d4bb5a35 100644 --- a/infra/dockerfile.base +++ b/infra/dockerfile.base @@ -14,6 +14,7 @@ RUN --mount=type=bind,source=pyproject.toml,target=/tmp/bugbug/pyproject.toml \ --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/bugbug/services/hackbot-api/pyproject.toml \ --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/bugbug/agents/bug-fix/pyproject.toml \ --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/bugbug/libs/hackbot-runtime/pyproject.toml \ + --mount=type=bind,source=libs/agent-tools/pyproject.toml,target=/tmp/bugbug/libs/agent-tools/pyproject.toml \ apt-get update && \ apt-get install -y --no-install-recommends gcc g++ libgomp1 libffi-dev libjemalloc2 zstd patch git && \ cd /tmp/bugbug && uv sync --locked --package bugbug --no-dev --no-install-project && \ diff --git a/infra/dockerfile.spawn_pipeline b/infra/dockerfile.spawn_pipeline index b91998fcfa..cad78c6a37 100644 --- a/infra/dockerfile.spawn_pipeline +++ b/infra/dockerfile.spawn_pipeline @@ -14,6 +14,7 @@ RUN --mount=type=bind,source=pyproject.toml,target=/tmp/bugbug/pyproject.toml \ --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/bugbug/services/hackbot-api/pyproject.toml \ --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/bugbug/agents/bug-fix/pyproject.toml \ --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/bugbug/libs/hackbot-runtime/pyproject.toml \ + --mount=type=bind,source=libs/agent-tools/pyproject.toml,target=/tmp/bugbug/libs/agent-tools/pyproject.toml \ cd /tmp/bugbug && uv sync --locked --package bugbug --no-dev --only-group spawn-pipeline --no-install-project ADD infra/spawn_pipeline.py /code/ diff --git a/libs/agent-tools/agent_tools/__init__.py b/libs/agent-tools/agent_tools/__init__.py new file mode 100644 index 0000000000..2b21ddbf6a --- /dev/null +++ b/libs/agent-tools/agent_tools/__init__.py @@ -0,0 +1,14 @@ +"""Reusable, framework-neutral agent tools. + +Each tool is an async handler decorated with :func:`agent_tools.registry.tool`; +the decorator infers its name, namespace, description and argument schema. A +per-framework adapter (``agent_tools.claude_sdk`` today) turns a module's tools +into a runnable server. Import the submodule you need directly (e.g. +``from agent_tools import bugzilla``) — this ``__init__`` imports no submodules, +so pulling one tool never drags in another's optional dependencies, and the +base package never imports any agent framework. +""" + +from agent_tools.registry import ToolDefinition, ToolError, tool, tools_in + +__all__ = ["ToolDefinition", "ToolError", "tool", "tools_in"] diff --git a/libs/agent-tools/agent_tools/bugzilla.py b/libs/agent-tools/agent_tools/bugzilla.py new file mode 100644 index 0000000000..03f2bcf8a4 --- /dev/null +++ b/libs/agent-tools/agent_tools/bugzilla.py @@ -0,0 +1,251 @@ +"""Read-only Bugzilla tools backed by bugsy. + +Framework-neutral: each tool is a ``@tool``-decorated handler whose first +parameter is a :class:`BugzillaContext`. Handlers return plain data and surface +proxy-level restrictions (code 101: endpoint not exposed, code 102: access +denied) as a structured :class:`~agent_tools.registry.ToolError`. +""" + +from __future__ import annotations + +import base64 +from dataclasses import dataclass +from typing import Annotated, Any + +import bugsy +from pydantic import Field + +from agent_tools.registry import ToolError, tool, tools_in + + +@dataclass +class BugzillaContext: + """Holds the live bugsy client. + + Every tool receives the same instance, so they share auth and one TCP + connection pool. + """ + + client: bugsy.Bugsy + + +def _bugsy_error(e: bugsy.BugsyException) -> ToolError: + """Turn a bugsy exception into a structured ToolError. + + The payload is friendly and machine-parseable so the agent can decide what + to do (skip the bug, try a different endpoint, ...) rather than just seeing + a stack trace. + """ + code = getattr(e, "code", None) + msg = getattr(e, "msg", str(e)) + if code == 101: + kind = "endpoint_not_exposed" + hint = "This Bugzilla proxy does not expose this endpoint." + elif code == 102: + kind = "access_denied" + hint = "Your API key cannot access this bug. Skip it." + else: + kind = "bugzilla_error" + hint = None + payload: dict[str, Any] = {"error": kind, "code": code, "message": msg} + if hint: + payload["hint"] = hint + return ToolError(msg, payload=payload) + + +@tool +async def search_bugs( + ctx: BugzillaContext, + params: Annotated[ + dict[str, Any], + Field( + description=( + "Bugzilla REST /bug query parameters. Values may be strings, " + "ints, or comma-separated lists. Example: " + '{"blocks": 12345, "keywords": "sec-low", ' + '"include_fields": "id,summary,status,whiteboard,keywords"}' + ) + ), + ], +) -> dict: + """Search Bugzilla using raw REST query parameters. + + Returns matching bugs in one bulk request. Parameters are ANDed together + (intersect). IMPORTANT: this proxy drops 'whiteboard' and 'keywords' from + _all / _default field sets — list them explicitly in include_fields if you + need them. Common params: id, keywords, blocks, depends_on, product, + component, status, resolution, priority, severity, assigned_to, whiteboard, + include_fields, limit. + """ + try: + result = ctx.client.request("bug", params=params) + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + bugs = result.get("bugs", []) + return {"count": len(bugs), "bugs": bugs} + + +@tool +async def get_bugs( + ctx: BugzillaContext, + ids: Annotated[list[int], Field(description="Bug IDs to fetch.")], + include_fields: Annotated[ + str | None, + Field( + description=( + "Comma-separated field list, or '_default'/'_all'. Defaults to " + "a sensible triage set." + ) + ), + ] = None, + include_comments: Annotated[ + bool, + Field( + description=( + "If true, also bulk-fetch comments (one extra request total, " + "not one per bug)." + ) + ), + ] = False, +) -> dict: + """Fetch one or more bugs by ID in a single bulk request. + + Inaccessible bugs are silently dropped by the proxy — this tool diffs + requested vs returned and reports them under 'inaccessible'. Remember: + request 'whiteboard' and 'keywords' explicitly in include_fields if you need + them. + """ + if not ids: + return {"count": 0, "bugs": [], "inaccessible": []} + include = include_fields or ( + "id,summary,status,resolution,product,component,priority," + "severity,keywords,whiteboard,assigned_to,creator," + "creation_time,last_change_time,blocks,depends_on,see_also," + "cf_crash_signature,url,version,op_sys,platform" + ) + id_csv = ",".join(str(i) for i in ids) + try: + result = ctx.client.request( + "bug", params={"id": id_csv, "include_fields": include} + ) + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + bugs = result.get("bugs", []) + returned = {b["id"] for b in bugs} + inaccessible = [i for i in ids if i not in returned] + + payload = {"count": len(bugs), "bugs": bugs, "inaccessible": inaccessible} + + if include_comments and bugs: + # Bugzilla lets us fetch comments for many bugs in one call by hitting + # /bug/{first}/comment?ids=rest. One extra round trip total. + first, *rest = [b["id"] for b in bugs] + cparams = {"ids": ",".join(str(i) for i in rest)} if rest else {} + try: + cres = ctx.client.request(f"bug/{first}/comment", params=cparams) + comments_by_bug = { + int(bid): data["comments"] for bid, data in cres.get("bugs", {}).items() + } + for b in bugs: + b["comments"] = comments_by_bug.get(b["id"], []) + except bugsy.BugsyException as e: + payload["comments_error"] = { + "code": getattr(e, "code", None), + "message": getattr(e, "msg", str(e)), + } + + return payload + + +@tool +async def get_bug_comments( + ctx: BugzillaContext, + bug_id: Annotated[int, Field(description="Bug ID.")], +) -> dict: + """Fetch all comments for a single bug.""" + try: + result = ctx.client.request(f"bug/{bug_id}/comment") + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + comments = result.get("bugs", {}).get(str(bug_id), {}).get("comments", []) + return {"bug_id": bug_id, "count": len(comments), "comments": comments} + + +@tool +async def get_bug_attachments( + ctx: BugzillaContext, + bug_id: Annotated[int, Field(description="Bug ID.")], + include_data: Annotated[ + bool, + Field( + description=( + "If true, include base64-encoded attachment content. Default " + "false. Use sparingly — attachments can be large." + ) + ), + ] = False, +) -> dict: + """Fetch attachments for a bug. + + By default returns metadata only (cheap, safe for large binaries). Set + include_data=true to also download the content — Bugzilla returns it + base64-encoded in the 'data' field of each attachment. + """ + params = {} if include_data else {"exclude_fields": "data"} + try: + result = ctx.client.request(f"bug/{bug_id}/attachment", params=params) + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + atts = result.get("bugs", {}).get(str(bug_id), []) + return {"bug_id": bug_id, "count": len(atts), "attachments": atts} + + +@tool +async def download_attachment( + ctx: BugzillaContext, + attachment_id: Annotated[ + int, Field(description="Attachment ID (discover via get_bug_attachments).") + ], + dest_path: Annotated[ + str, + Field( + description=( + "Local filesystem path to write the decoded attachment to. " + "Parent directory must already exist. Overwrites if present." + ) + ), + ], +) -> dict: + """Fetch a Bugzilla attachment by ID and write its decoded content to a file. + + The inverse of add_attachment: it handles the base64 decode server-side so + the agent never has to round-trip the blob through its own context. Use + get_bug_attachments first to discover attachment IDs. Returns the written + path, size, and content_type. + """ + try: + result = ctx.client.request(f"bug/attachment/{attachment_id}") + except bugsy.BugsyException as e: + raise _bugsy_error(e) from e + + att = result.get("attachments", {}).get(str(attachment_id)) + if att is None: + raise ToolError( + f"attachment {attachment_id} not found", + payload={"error": "attachment_not_found", "attachment_id": attachment_id}, + ) + + raw = base64.b64decode(att["data"]) + with open(dest_path, "wb") as fp: + fp.write(raw) + + return { + "attachment_id": attachment_id, + "dest_path": dest_path, + "size_bytes": len(raw), + "file_name": att.get("file_name"), + "content_type": att.get("content_type"), + } + + +TOOLS = tools_in(__name__) diff --git a/libs/agent-tools/agent_tools/claude_sdk.py b/libs/agent-tools/agent_tools/claude_sdk.py new file mode 100644 index 0000000000..b4fadcede0 --- /dev/null +++ b/libs/agent-tools/agent_tools/claude_sdk.py @@ -0,0 +1,63 @@ +"""claude-agent-sdk adapter for framework-neutral tool definitions. + +The ONLY module in agent-tools that imports claude-agent-sdk. Wraps a list of +:class:`~agent_tools.registry.ToolDefinition` into an in-process MCP server. +Requires the ``claude-sdk`` optional extra. +""" + +from __future__ import annotations + +import json + +from claude_agent_sdk import create_sdk_mcp_server +from claude_agent_sdk import tool as sdk_tool + +from agent_tools.registry import ToolDefinition, ToolError, tool_name_for + + +def _text(content: str) -> dict: + """Wrap plain text in the MCP tool-result content shape the SDK expects.""" + return {"content": [{"type": "text", "text": content}]} + + +def _jtext(obj) -> dict: + """Serialise an object to pretty JSON inside MCP text content.""" + return _text(json.dumps(obj, indent=2, default=str)) + + +def _make_tool(defn: ToolDefinition, ctx, prefix_namespace: bool): + mcp_name = tool_name_for(defn.dotted) if prefix_namespace else defn.name + + @sdk_tool(mcp_name, defn.description, defn.input_schema) + async def run(args): + try: + result = await defn.handler(ctx, **args) + except ToolError as e: + payload = e.payload if e.payload is not None else {"error": str(e)} + return {**_jtext(payload), "is_error": True} + # Handlers return plain data; str is shown verbatim, everything else as JSON. + return _text(result) if isinstance(result, str) else _jtext(result) + + return run + + +def build_sdk_server( + name: str, + ctx, + tools: list[ToolDefinition], + *, + version: str = "0.1.0", + prefix_namespace: bool = False, +): + """Build a claude-agent-sdk ``McpSdkServerConfig`` from tool definitions. + + ``ctx`` is passed as each handler's first argument. ``prefix_namespace`` + names the MCP tools ``_`` (used by the shared ``actions`` + server, where one server hosts multiple domains); otherwise the tool name is + the function name (per-domain servers like ``bugzilla``/``firefox``). + """ + return create_sdk_mcp_server( + name=name, + version=version, + tools=[_make_tool(d, ctx, prefix_namespace) for d in tools], + ) diff --git a/libs/agent-tools/agent_tools/firefox/__init__.py b/libs/agent-tools/agent_tools/firefox/__init__.py new file mode 100644 index 0000000000..f0fcbb652a --- /dev/null +++ b/libs/agent-tools/agent_tools/firefox/__init__.py @@ -0,0 +1,200 @@ +"""Firefox build + testcase-evaluation tools. + +Framework-neutral ``@tool`` handlers over the implementations in ``.tools``; +each takes a :class:`FirefoxContext` (paths derived from the source repo) as its +first parameter and returns plain data. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Annotated + +from pydantic import Field + +from agent_tools.registry import tool, tools_in + +from .tools import bootstrap_firefox as _bootstrap_firefox +from .tools import build_firefox as _build_firefox +from .tools import evaluate_testcase as _evaluate_testcase +from .tools import js_shell_evaluator as _js_shell_evaluator + + +@dataclass +class FirefoxContext: + """Firefox-related paths, derived from the source repo at startup. + + Defaults follow: mozconfig at the source root, objdir-ff-asan/ under it. The + agent can still override the binary per-call if it wants to test a different + build. + """ + + source_dir: Path + mozconfig: Path + objdir: Path + binary: Path + js_binary: Path + + @classmethod + def from_source_repo(cls, source_repo: Path) -> "FirefoxContext": + src = source_repo.resolve() + objdir = src / "objdir-ff-asan" + return cls( + source_dir=src, + mozconfig=src / ".mozconfig", + objdir=objdir, + binary=objdir / "dist" / "bin" / "firefox", + js_binary=objdir / "dist" / "bin" / "js", + ) + + +@tool +async def evaluate_testcase( + ctx: FirefoxContext, + content: Annotated[ + str, Field(description="Testcase file content (HTML, JS, SVG, etc.)") + ], + filename: Annotated[ + str, + Field( + description=( + "Name for the testcase entry point, e.g. 'test.html'. Extension " + "matters: grizzly serves it with the matching MIME type." + ) + ), + ], + firefox_binary: Annotated[ + str | None, + Field( + description="Path to Firefox binary. Optional — defaults to the configured build's binary." + ), + ] = None, + timeout: Annotated[ + int, Field(description="Seconds to wait for a crash (default: 30)") + ] = 30, + prefs: Annotated[ + dict[str, str | int | bool] | None, + Field( + description=( + "Firefox about:config prefs to set before launch, e.g. " + '{"dom.webgpu.enabled": true}. Use this to unlock gated features ' + "your testcase needs." + ) + ), + ] = None, +) -> dict: + """Run a testcase in Firefox under xvfb and capture crash output via grizzly. + + The build's sanitizer configuration (ASAN, TSAN, plain debug, etc.) is + whatever the configured mozconfig produces. Returns JSON: crashed (bool) — + whether Firefox crashed; crashed_parent (bool) — parent process vs content + process crash; logs (dict) — stderr/stdout and, if crashed, crashdata + (crash/sanitizer report); files (dict) — the testcase bundle that triggered + the crash; message (str) — human-readable summary. When crashed=false, + logs.stderr/stdout often reveal why the trigger missed (JS exception, wrong + pref, feature gated off). + """ + binary = Path(firefox_binary or ctx.binary) + return await _evaluate_testcase( + content=content, + filename=filename, + firefox_binary=binary, + timeout=timeout, + prefs=prefs or {}, + ) + + +@tool +async def build_firefox( + ctx: FirefoxContext, + firefox_dir: Annotated[ + str | None, + Field( + description="Firefox source directory. Optional — defaults to the configured source dir." + ), + ] = None, + mozconfig_path: Annotated[ + str | None, + Field( + description="MOZCONFIG to use. Optional — defaults to the configured mozconfig." + ), + ] = None, +) -> dict: + """Build Firefox using the configured mozconfig. + + Slow (tens of minutes on a cold build, faster incremental). Returns JSON: + success (bool), build_dir (str), message (str), stdout/stderr. Only call this + if you've changed source or the binary is missing — check if the binary + exists first. + """ + firefox_dir_p = Path(firefox_dir) if firefox_dir else ctx.source_dir + mozconfig_p = Path(mozconfig_path) if mozconfig_path else ctx.mozconfig + return await _build_firefox(firefox_dir_p, mozconfig_p, ctx.objdir) + + +@tool +async def evaluate_js_shell( + ctx: FirefoxContext, + content: Annotated[str, Field(description="JavaScript testcase source")], + js_binary: Annotated[ + str | None, + Field( + description="Path to the SpiderMonkey js binary. Optional — defaults to the configured build's js shell." + ), + ] = None, + timeout: Annotated[ + int, + Field(description="Seconds to wait before killing the shell (default: 30)"), + ] = 30, + flags: Annotated[ + list[str] | None, + Field( + description=( + 'Extra shell flags, e.g. ["--no-threads", "--ion-eager"]. ' + "--fuzzing-safe is always prepended." + ) + ), + ] = None, +) -> dict: + """Run a JS testcase in the SpiderMonkey shell and capture crash output. + + The shell's sanitizer configuration is whatever the configured mozconfig + produces. Much faster than full-browser evaluate_testcase — use this for + engine-level bugs (JIT, GC, TypedArrays, WASM) that don't need a DOM. Returns + JSON: crashed (bool) — whether the shell crashed (signal or sanitizer); + message (str) — human-readable summary, includes signal name if killed; logs + (dict) — stderr/stdout (tail-truncated to 1 MB) and, if crashed, crashdata + (crash/sanitizer report); files (dict) — the .js testcase that triggered the + crash. A nonzero exit without a signal is a JS exception, NOT a crash — check + logs.stderr for the syntax/runtime error. + """ + binary = Path(js_binary or ctx.js_binary) + return await _js_shell_evaluator( + content=content, js_binary=binary, timeout=timeout, flags=flags + ) + + +@tool +async def bootstrap_firefox( + ctx: FirefoxContext, + firefox_dir: Annotated[ + str | None, + Field( + description="Firefox source directory. Optional — defaults to the configured source dir." + ), + ] = None, +) -> dict: + """Run ``./mach bootstrap`` to install the Firefox build toolchain. + + Installs rust, clang, cbindgen under the running user's ~/.mozbuild/. + Required before a full (non-artifact) build. Slow — ~10-15 min on a fresh + image, fast on re-runs. Returns JSON: success, message, stdout, stderr. Only + call this if you intend to do a full build; artifact builds don't need + bootstrap. + """ + firefox_dir_p = Path(firefox_dir) if firefox_dir else ctx.source_dir + return await _bootstrap_firefox(firefox_dir_p) + + +TOOLS = tools_in(__name__) diff --git a/agents/bug-fix/agent/firefox_tools/__init__.py b/libs/agent-tools/agent_tools/firefox/tools/__init__.py similarity index 100% rename from agents/bug-fix/agent/firefox_tools/__init__.py rename to libs/agent-tools/agent_tools/firefox/tools/__init__.py diff --git a/agents/bug-fix/agent/firefox_tools/bootstrap_firefox.py b/libs/agent-tools/agent_tools/firefox/tools/bootstrap_firefox.py similarity index 100% rename from agents/bug-fix/agent/firefox_tools/bootstrap_firefox.py rename to libs/agent-tools/agent_tools/firefox/tools/bootstrap_firefox.py diff --git a/agents/bug-fix/agent/firefox_tools/build_firefox.py b/libs/agent-tools/agent_tools/firefox/tools/build_firefox.py similarity index 100% rename from agents/bug-fix/agent/firefox_tools/build_firefox.py rename to libs/agent-tools/agent_tools/firefox/tools/build_firefox.py diff --git a/agents/bug-fix/agent/firefox_tools/evaluate_testcase.py b/libs/agent-tools/agent_tools/firefox/tools/evaluate_testcase.py similarity index 100% rename from agents/bug-fix/agent/firefox_tools/evaluate_testcase.py rename to libs/agent-tools/agent_tools/firefox/tools/evaluate_testcase.py diff --git a/agents/bug-fix/agent/firefox_tools/js_shell_evaluator.py b/libs/agent-tools/agent_tools/firefox/tools/js_shell_evaluator.py similarity index 100% rename from agents/bug-fix/agent/firefox_tools/js_shell_evaluator.py rename to libs/agent-tools/agent_tools/firefox/tools/js_shell_evaluator.py diff --git a/libs/agent-tools/agent_tools/registry.py b/libs/agent-tools/agent_tools/registry.py new file mode 100644 index 0000000000..ce6bc3010c --- /dev/null +++ b/libs/agent-tools/agent_tools/registry.py @@ -0,0 +1,109 @@ +"""Framework-neutral declaration of agent tools. + +A ``@tool``-decorated handler is the single source of truth for one agent tool: +its name (the function name), namespace (the defining module's basename), +description (the docstring) and argument schema (the typed signature, minus the +first ``ctx`` parameter). Per-framework adapters (claude-agent-sdk today, +LangChain later) consume :class:`ToolDefinition` without the handlers importing +any framework. This module imports no agent framework — only pydantic. +""" + +from __future__ import annotations + +import functools +import inspect +from collections import defaultdict +from collections.abc import Awaitable, Callable +from dataclasses import dataclass + +from pydantic import create_model + +ACTIONS_SERVER_NAME = "actions" + + +def tool_name_for(dotted: str) -> str: + """Map a dotted tool id to its MCP tool name: ``bugzilla.update_bug`` -> ``bugzilla_update_bug``.""" + return dotted.replace(".", "_") + + +class ToolError(Exception): + """An agent tool failed in an expected way. + + Raised by handlers; a per-framework adapter renders it as that framework's + tool-error signal. The optional ``payload`` carries a structured error body + (preferred over a bare message when the agent benefits from machine-readable + detail). The tool layer imports no framework error type. + """ + + def __init__(self, message: str, *, payload: dict | None = None) -> None: + super().__init__(message) + self.payload = payload + + +@dataclass +class ToolDefinition: + """Declarative description of one agent tool, derived from a handler. + + ``handler`` is an async function whose **first positional parameter** is the + tool context (e.g. a ``BugzillaContext`` or an actions recorder); the + remaining parameters carry ``Annotated[T, Field(...)]`` annotations that + define the agent-facing schema. + """ + + name: str + namespace: str + description: str + handler: Callable[..., Awaitable] + + @property + def dotted(self) -> str: + return f"{self.namespace}.{self.name}" + + @functools.cached_property + def args_model(self): + """Pydantic model of the agent-facing args (excludes the ``ctx`` param). + + Derived once from the handler signature so every adapter shares one + schema — claude-agent-sdk consumes ``input_schema``; a LangChain adapter + can use this model directly as ``args_schema``. + """ + sig = inspect.signature(self.handler, eval_str=True) + fields = { + name: ( + param.annotation, + ... if param.default is inspect.Parameter.empty else param.default, + ) + for name, param in list(sig.parameters.items())[1:] # skip `ctx` + } + return create_model(f"{self.namespace}_{self.name}_args", **fields) + + @functools.cached_property + def input_schema(self) -> dict: + return self.args_model.model_json_schema() + + +_REGISTRY: dict[str, list[ToolDefinition]] = defaultdict(list) + + +def tool(fn: Callable[..., Awaitable]) -> Callable[..., Awaitable]: + """Register ``fn`` as a tool, inferring name/namespace/description. + + name = function name; namespace = defining module's basename; description = + function docstring. The function is returned unchanged (still callable); + collect a module's tools with :func:`tools_in`. + """ + namespace = fn.__module__.rsplit(".", 1)[-1] + _REGISTRY[fn.__module__].append( + ToolDefinition( + name=fn.__name__, + namespace=namespace, + description=inspect.getdoc(fn) or "", + handler=fn, + ) + ) + return fn + + +def tools_in(module_name: str) -> list[ToolDefinition]: + """Return the tools registered by ``@tool`` in the given module (``__name__``).""" + return list(_REGISTRY[module_name]) diff --git a/libs/agent-tools/pyproject.toml b/libs/agent-tools/pyproject.toml new file mode 100644 index 0000000000..b485e29abd --- /dev/null +++ b/libs/agent-tools/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "agent-tools" +version = "0.1.0" +description = "Reusable, framework-neutral agent tools (declaration + per-framework adapters)" +requires-python = ">=3.12" +dependencies = [ + "pydantic>=2.6.0", +] + +[project.optional-dependencies] +bugzilla = ["bugsy"] +firefox = ["grizzly-framework", "prefpicker"] +claude-sdk = ["claude-agent-sdk>=0.1.30"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/libs/agent-tools/tests/test_bugzilla.py b/libs/agent-tools/tests/test_bugzilla.py new file mode 100644 index 0000000000..587202c2d2 --- /dev/null +++ b/libs/agent-tools/tests/test_bugzilla.py @@ -0,0 +1,54 @@ +"""Tests for the Bugzilla read tools.""" + +from unittest.mock import MagicMock + +import pytest +from agent_tools import bugzilla +from agent_tools.bugzilla import BugzillaContext +from agent_tools.claude_sdk import build_sdk_server +from agent_tools.registry import ToolError +from mcp.types import ListToolsRequest + + +async def _list(server): + return ( + await server.request_handlers[ListToolsRequest]( + ListToolsRequest(method="tools/list") + ) + ).root.tools + + +async def test_exposes_read_only_tools(): + config = build_sdk_server( + "bugzilla", BugzillaContext(client=MagicMock()), bugzilla.TOOLS + ) + assert config["type"] == "sdk" + tools = await _list(config["instance"]) + assert {t.name for t in tools} == { + "search_bugs", + "get_bugs", + "get_bug_comments", + "get_bug_attachments", + "download_attachment", + } + + +async def test_search_bugs_returns_data(): + client = MagicMock() + client.request.return_value = {"bugs": [{"id": 1}, {"id": 2}]} + result = await bugzilla.search_bugs( + BugzillaContext(client=client), params={"id": "1,2"} + ) + assert result == {"count": 2, "bugs": [{"id": 1}, {"id": 2}]} + + +async def test_search_bugs_raises_tool_error_on_bugsy_failure(): + import bugsy + + client = MagicMock() + err = bugsy.BugsyException("nope") + err.code = 102 + client.request.side_effect = err + with pytest.raises(ToolError) as ei: + await bugzilla.search_bugs(BugzillaContext(client=client), params={}) + assert ei.value.payload["error"] == "access_denied" diff --git a/libs/agent-tools/tests/test_firefox.py b/libs/agent-tools/tests/test_firefox.py new file mode 100644 index 0000000000..9de7a3b208 --- /dev/null +++ b/libs/agent-tools/tests/test_firefox.py @@ -0,0 +1,26 @@ +"""Tests for the Firefox tools.""" + +from agent_tools import firefox +from agent_tools.claude_sdk import build_sdk_server +from mcp.types import ListToolsRequest + + +async def _list(server): + return ( + await server.request_handlers[ListToolsRequest]( + ListToolsRequest(method="tools/list") + ) + ).root.tools + + +async def test_exposes_firefox_tools(tmp_path): + ctx = firefox.FirefoxContext.from_source_repo(tmp_path) + config = build_sdk_server("firefox", ctx, firefox.TOOLS) + assert config["type"] == "sdk" + tools = await _list(config["instance"]) + assert {t.name for t in tools} == { + "evaluate_testcase", + "build_firefox", + "evaluate_js_shell", + "bootstrap_firefox", + } diff --git a/libs/agent-tools/tests/test_registry.py b/libs/agent-tools/tests/test_registry.py new file mode 100644 index 0000000000..8ad4659038 --- /dev/null +++ b/libs/agent-tools/tests/test_registry.py @@ -0,0 +1,64 @@ +"""Tests for the @tool decorator and signature-derived schema.""" + +from dataclasses import dataclass +from typing import Annotated + +from agent_tools.registry import ToolError, tool, tool_name_for, tools_in +from pydantic import Field + + +@dataclass +class _Ctx: + value: int + + +@tool +async def sample_tool( + ctx: _Ctx, + bug_id: Annotated[int, Field(description="The bug id.")], + note: Annotated[str, Field(description="A note.")] = "x", +) -> dict: + """Sample tool docstring.""" + return {"bug_id": bug_id, "note": note} + + +_DEFN = next(d for d in tools_in(__name__) if d.name == "sample_tool") + + +def test_decorator_infers_identity(): + assert _DEFN.name == "sample_tool" + assert _DEFN.namespace == "test_registry" # module basename + assert _DEFN.description == "Sample tool docstring." + assert _DEFN.dotted == "test_registry.sample_tool" + + +def test_schema_excludes_ctx_and_keeps_descriptions(): + schema = _DEFN.input_schema + props = schema["properties"] + assert "ctx" not in props + assert set(props) == {"bug_id", "note"} + assert props["bug_id"]["description"] == "The bug id." + + +def test_schema_marks_required_vs_optional(): + schema = _DEFN.input_schema + assert "bug_id" in schema.get("required", []) + assert "note" not in schema.get("required", []) # has a default + + +def test_schema_is_cached(): + assert _DEFN.input_schema is _DEFN.input_schema + + +async def test_handler_remains_callable(): + out = await sample_tool(_Ctx(value=1), bug_id=7) + assert out == {"bug_id": 7, "note": "x"} + + +def test_tool_name_for(): + assert tool_name_for("bugzilla.update_bug") == "bugzilla_update_bug" + + +def test_tool_error_carries_payload(): + err = ToolError("bad", payload={"error": "x"}) + assert err.payload == {"error": "x"} diff --git a/libs/hackbot-runtime/hackbot_runtime/__init__.py b/libs/hackbot-runtime/hackbot_runtime/__init__.py index 5bef861fa6..e54bd1ed6c 100644 --- a/libs/hackbot-runtime/hackbot_runtime/__init__.py +++ b/libs/hackbot-runtime/hackbot_runtime/__init__.py @@ -1,24 +1,14 @@ -from hackbot_runtime.actions import ( - ALL_ACTIONS, - ActionDefinition, - ActionInputError, - ActionsRecorder, - get_actions, -) +from hackbot_runtime.actions.recorder import ActionsRecorder from hackbot_runtime.context import Context from hackbot_runtime.result import AgentResult from hackbot_runtime.runtime import run, run_async from hackbot_runtime.uploader import SignedPolicyUploader __all__ = [ - "ALL_ACTIONS", - "ActionDefinition", - "ActionInputError", "ActionsRecorder", "AgentResult", "Context", "SignedPolicyUploader", - "get_actions", "run", "run_async", ] diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py b/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py index 71e257d003..4356804aac 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py @@ -1,26 +1,13 @@ """Recordable actions for hackbot agents. -The runtime exposes a generic ``ActionsRecorder`` plus a registry of -domain-grouped declarative actions (``bugzilla.update_bug``, -``bugzilla.add_comment``, ...). Per-framework wrappers (MCP today, -LangChain later) wrap the registry without touching the action -declarations themselves. +``ActionsRecorder`` is the framework-neutral sink whose collected actions the +runtime serialises into ``summary.json``. The action *declarations* live in +domain modules (``bugzilla``, ...) and use the shared ``@tool`` decorator from +agent-tools, so one mechanism backs both read tools and write-actions. The +claude-sdk adapter is ``hackbot_runtime.actions.claude_sdk.actions_server_for``. """ -from hackbot_runtime.actions import bugzilla as _bugzilla +from hackbot_runtime.actions import bugzilla from hackbot_runtime.actions.recorder import ActionsRecorder -from hackbot_runtime.actions.registry import ( - ActionDefinition, - ActionInputError, - get_actions, -) -ALL_ACTIONS: list[ActionDefinition] = [*_bugzilla.DEFINITIONS] - -__all__ = [ - "ALL_ACTIONS", - "ActionDefinition", - "ActionInputError", - "ActionsRecorder", - "get_actions", -] +__all__ = ["ActionsRecorder", "bugzilla"] diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/bugzilla.py b/libs/hackbot-runtime/hackbot_runtime/actions/bugzilla.py index e1c7a255a3..c8087b298b 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/bugzilla.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/bugzilla.py @@ -1,10 +1,11 @@ """Bugzilla-domain recordable actions. -Each handler takes the ``ActionsRecorder`` as its first positional -parameter (excluded from the agent-facing schema) plus the agent-facing -args annotated with ``Annotated[T, Field(...)]`` so any adapter can derive -the JSON Schema from the signature. Handlers return a short confirmation -string and raise ``ActionInputError`` on invalid input. +Each handler takes the ``ActionsRecorder`` as its first positional parameter +(excluded from the agent-facing schema) plus the agent-facing args annotated +with ``Annotated[T, Field(...)]``. Declared with the shared ``@tool`` decorator +from agent-tools so the same mechanism backs read tools and write-actions. +Handlers record an intended change (nothing is mutated) and return a short +confirmation string, raising ``ToolError`` on invalid input. """ from __future__ import annotations @@ -14,10 +15,10 @@ from pathlib import Path from typing import Annotated, Any +from agent_tools.registry import ToolError, tool, tools_in from pydantic import Field from hackbot_runtime.actions.recorder import ActionsRecorder -from hackbot_runtime.actions.registry import ActionDefinition, ActionInputError _COMMENT_FOOTER = ( "*This is an automated analysis result. If this result is incorrect " @@ -34,6 +35,7 @@ def _confirm(recorder: ActionsRecorder, action_type: str) -> str: return f"Recorded {action_type} (#{len(recorder.actions) - 1})." +@tool async def update_bug( recorder: ActionsRecorder, bug_id: Annotated[int, Field(description="Bug ID to change.")], @@ -60,6 +62,10 @@ async def update_bug( ), ], ) -> str: + """Record an intended change to a Bugzilla bug. + + Recorded into the run summary for human review — does not modify Bugzilla. + """ recorder.record( "bugzilla.update_bug", {"bug_id": bug_id, "changes": changes}, @@ -68,6 +74,7 @@ async def update_bug( return _confirm(recorder, "bugzilla.update_bug") +@tool async def add_comment( recorder: ActionsRecorder, bug_id: Annotated[int, Field(description="Bug ID to comment on.")], @@ -83,6 +90,11 @@ async def add_comment( ), ] = False, ) -> str: + """Record an intended comment on a bug. + + Use is_private=true for security-sensitive notes. Recorded into the run + summary for human review — does not post to Bugzilla. + """ text_with_footer = text.rstrip() + "\n\n" + _COMMENT_FOOTER recorder.record( "bugzilla.add_comment", @@ -92,6 +104,7 @@ async def add_comment( return _confirm(recorder, "bugzilla.add_comment") +@tool async def add_attachment( recorder: ActionsRecorder, bug_id: Annotated[int, Field(description="Bug ID to attach to.")], @@ -142,8 +155,15 @@ async def add_attachment( ), ] = None, ) -> str: + """Record an intended file attachment on a bug. + + Pass a local filesystem path — the runtime uploads a copy of the file + alongside summary.json so the apply step can fetch it. For patches, set + is_patch=true and omit content_type. Recorded into the run summary for human + review — does not upload to Bugzilla. + """ if not os.path.isfile(file_path): - raise ActionInputError(f"file not found: {file_path}") + raise ToolError(f"file not found: {file_path}") file_name = os.path.basename(file_path) resolved_summary = summary or file_name @@ -175,6 +195,7 @@ async def add_attachment( return _confirm(recorder, "bugzilla.add_attachment") +@tool async def create_bug( recorder: ActionsRecorder, product: Annotated[str, Field(description="Bugzilla product.")], @@ -199,6 +220,11 @@ async def create_bug( ), ] = None, ) -> str: + """Record an intended new-bug filing. + + The description becomes comment 0 and is rendered as Markdown. Recorded into + the run summary for human review — does not file in Bugzilla. + """ body: dict[str, Any] = { "product": product, "component": component, @@ -214,43 +240,4 @@ async def create_bug( return _confirm(recorder, "bugzilla.create_bug") -DEFINITIONS: list[ActionDefinition] = [ - ActionDefinition( - type="bugzilla.update_bug", - description=( - "Record an intended change to a Bugzilla bug. Recorded into the " - "run summary for human review — does not modify Bugzilla." - ), - handler=update_bug, - ), - ActionDefinition( - type="bugzilla.add_comment", - description=( - "Record an intended comment on a bug. Use is_private=true for " - "security-sensitive notes. Recorded into the run summary for " - "human review — does not post to Bugzilla." - ), - handler=add_comment, - ), - ActionDefinition( - type="bugzilla.add_attachment", - description=( - "Record an intended file attachment on a bug. Pass a local " - "filesystem path — the runtime uploads a copy of the file " - "alongside summary.json so the apply step can fetch it. For " - "patches, set is_patch=true and omit content_type. Recorded " - "into the run summary for human review — does not upload to " - "Bugzilla." - ), - handler=add_attachment, - ), - ActionDefinition( - type="bugzilla.create_bug", - description=( - "Record an intended new-bug filing. The description becomes " - "comment 0 and is rendered as Markdown. Recorded into the run " - "summary for human review — does not file in Bugzilla." - ), - handler=create_bug, - ), -] +TOOLS = tools_in(__name__) diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py index 5312dc99ca..63c3f5bd0c 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py @@ -1,54 +1,19 @@ -"""claude-agent-sdk adapter for runtime-registered actions. +"""Build the claude-agent-sdk ``actions`` MCP server from recordable actions. -Exposes the enabled actions as an in-process MCP server built with the -SDK's own ``tool`` + ``create_sdk_mcp_server`` — guaranteed compatible with -claude-agent-sdk. Other frameworks (LangChain, ...) get their own sibling -adapter as needed; the action registry is shared and framework-neutral. - -Requires the ``claude-sdk`` optional extra of hackbot-runtime. +Thin wrapper over agent-tools' generic adapter: the ``ActionsRecorder`` is the +tool context, and tools are namespace-prefixed (one ``actions`` server hosts +every domain). Requires the ``claude-sdk`` optional extra. """ from __future__ import annotations from pathlib import Path -from claude_agent_sdk import create_sdk_mcp_server, tool +from agent_tools.claude_sdk import build_sdk_server +from agent_tools.registry import ACTIONS_SERVER_NAME -from hackbot_runtime.actions.naming import ACTIONS_SERVER_NAME, tool_name_for +from hackbot_runtime.actions import bugzilla as _bugzilla from hackbot_runtime.actions.recorder import ActionsRecorder -from hackbot_runtime.actions.registry import ActionDefinition, get_actions - - -def _text(message: str) -> dict: - """Wrap a message in the MCP tool-result content shape the SDK expects.""" - return {"content": [{"type": "text", "text": message}]} - - -def _make_tool(defn: ActionDefinition, recorder: ActionsRecorder): - @tool(tool_name_for(defn.type), defn.description, defn.input_schema) - async def run(args): - # The handler returns a short confirmation string. An ActionInputError - # raised inside it propagates and is rendered by the SDK as an - # is_error result with the message preserved. - return _text(await defn.handler(recorder, **args)) - - return run - - -def build_actions_sdk_server( - recorder: ActionsRecorder, - types: list[str] | None = None, - name: str = ACTIONS_SERVER_NAME, -): - """Return a claude-agent-sdk ``McpSdkServerConfig`` for the enabled actions. - - ``types`` selects a subset of action types; ``None`` exposes all. - """ - return create_sdk_mcp_server( - name=name, - version="0.1.0", - tools=[_make_tool(defn, recorder) for defn in get_actions(types)], - ) def actions_server_for( @@ -57,14 +22,19 @@ def actions_server_for( *, fallback_artifacts_dir: Path = Path("artifacts"), ): - """Return ``(recorder, sdk_server)`` ready to plug into ``ClaudeAgentOptions``. + """Return ``(recorder, sdk_server)`` for the enabled recordable actions. - Convenience around :func:`build_actions_sdk_server` that supplies the common - fallback: standalone/script runs pass ``recorder=None`` and get a local - recorder that copies attachments under ``fallback_artifacts_dir`` (no - uploader). Agents running under the runtime pass ``ctx.actions`` and it is - used as-is. + ``recorder=None`` creates a local recorder that copies attachments under + ``fallback_artifacts_dir`` (standalone/script runs with no uploader). + ``types`` selects a subset by dotted id (e.g. ``bugzilla.update_bug``); + ``None`` exposes all. """ if recorder is None: recorder = ActionsRecorder(artifacts_dir=fallback_artifacts_dir) - return recorder, build_actions_sdk_server(recorder, types=types) + tools = _bugzilla.TOOLS + if types is not None: + wanted = set(types) + tools = [t for t in tools if t.dotted in wanted] + return recorder, build_sdk_server( + ACTIONS_SERVER_NAME, recorder, tools, prefix_namespace=True + ) diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/naming.py b/libs/hackbot-runtime/hackbot_runtime/actions/naming.py deleted file mode 100644 index fe3b3b44fd..0000000000 --- a/libs/hackbot-runtime/hackbot_runtime/actions/naming.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Shared naming for the actions MCP server. - -Kept dependency-light (no framework imports) so both the runtime adapter -and agent-side config can derive identical tool names from one place. -""" - -ACTIONS_SERVER_NAME = "actions" - - -def tool_name_for(action_type: str) -> str: - """Map an action type to its MCP tool name. - - ``"bugzilla.update_bug"`` -> ``"bugzilla_update_bug"``. - """ - return action_type.replace(".", "_") diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/registry.py b/libs/hackbot-runtime/hackbot_runtime/actions/registry.py deleted file mode 100644 index b9bae37589..0000000000 --- a/libs/hackbot-runtime/hackbot_runtime/actions/registry.py +++ /dev/null @@ -1,63 +0,0 @@ -import functools -import inspect -from collections.abc import Awaitable, Callable -from dataclasses import dataclass - -from pydantic import create_model - - -class ActionInputError(Exception): - """Invalid action input (bad path, etc.). - - Raised by handlers; a per-framework adapter turns it into the - framework's tool-error signal. The action layer imports no framework - error type. - """ - - -@dataclass -class ActionDefinition: - """Declarative description of one recordable action. - - ``handler`` is an async function whose **first positional parameter** is - the ``ActionsRecorder``. The remaining parameters carry typed - annotations (``Annotated[T, Field(...)]``) that double as the - agent-facing schema, exposed framework-neutrally via ``input_schema``. - Handlers return a short confirmation string. - """ - - type: str - description: str - handler: Callable[..., Awaitable[str]] - - @functools.cached_property - def input_schema(self) -> dict: - """JSON schema of the agent-facing arguments (excludes ``recorder``). - - Derived once from the handler signature so every adapter (MCP today, - LangChain later) shares one schema. - """ - sig = inspect.signature(self.handler, eval_str=True) - fields = { - name: ( - param.annotation, - ... if param.default is inspect.Parameter.empty else param.default, - ) - for name, param in list(sig.parameters.items())[1:] # skip `recorder` - } - model = create_model(self.type.replace(".", "_") + "_args", **fields) - return model.model_json_schema() - - -def get_actions(types: list[str] | None = None) -> list[ActionDefinition]: - """Return registered actions, optionally filtered by ``type`` list. - - Import is deferred to avoid an import cycle between the registry and - the per-domain modules that register actions. - """ - from hackbot_runtime.actions import ALL_ACTIONS - - if types is None: - return list(ALL_ACTIONS) - wanted = set(types) - return [a for a in ALL_ACTIONS if a.type in wanted] diff --git a/libs/hackbot-runtime/hackbot_runtime/context.py b/libs/hackbot-runtime/hackbot_runtime/context.py index 5a2c1aef25..17d6daa5ad 100644 --- a/libs/hackbot-runtime/hackbot_runtime/context.py +++ b/libs/hackbot-runtime/hackbot_runtime/context.py @@ -7,7 +7,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict from hackbot_runtime import artifacts -from hackbot_runtime.actions import ActionsRecorder +from hackbot_runtime.actions.recorder import ActionsRecorder from hackbot_runtime.uploader import SignedPolicyUploader diff --git a/libs/hackbot-runtime/hackbot_runtime/mcp/__init__.py b/libs/hackbot-runtime/hackbot_runtime/mcp/__init__.py deleted file mode 100644 index 663427615d..0000000000 --- a/libs/hackbot-runtime/hackbot_runtime/mcp/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""In-process MCP servers for hackbot agents (claude-sdk extra).""" diff --git a/libs/hackbot-runtime/hackbot_runtime/mcp/bugzilla.py b/libs/hackbot-runtime/hackbot_runtime/mcp/bugzilla.py deleted file mode 100644 index 71aeb888f3..0000000000 --- a/libs/hackbot-runtime/hackbot_runtime/mcp/bugzilla.py +++ /dev/null @@ -1,322 +0,0 @@ -"""In-process MCP server wrapping bugsy for Bugzilla REST access. - -Exposes read-only tools to a Claude agent. Write actions are recorded -via the in-process ``actions`` MCP server built from the framework-agnostic -registry in ``hackbot_runtime.actions`` (see -``hackbot_runtime/actions/claude_sdk.py``), so the broker holds the Bugzilla -API key but has no write capability at all. -All tools gracefully handle proxy-level restrictions (code 101: -endpoint not exposed, code 102: access denied). -""" - -from __future__ import annotations - -import base64 -import json -from dataclasses import dataclass - -import bugsy -from claude_agent_sdk import create_sdk_mcp_server, tool - -# --------------------------------------------------------------------------- # -# Shared context -# --------------------------------------------------------------------------- # - - -@dataclass -class BugzillaContext: - """Holds the live bugsy client. - - The MCP tool functions close over a single instance of this class so - they share auth and one TCP connection pool. - """ - - client: bugsy.Bugsy - - -def _text(content: str) -> dict: - """Wrap plain text in MCP content format.""" - return {"content": [{"type": "text", "text": content}]} - - -def _jtext(obj) -> dict: - """Serialise an object to pretty JSON inside MCP text content.""" - return _text(json.dumps(obj, indent=2, default=str)) - - -def _handle_bugsy_error(e: bugsy.BugsyException) -> dict: - """Turn a bugsy exception into a structured tool error response. - - We deliberately return ``is_error: True`` but with a friendly, - machine-parseable payload so the agent can decide what to do - (skip the bug, try a different endpoint, etc) rather than just - seeing a stack trace. - """ - code = getattr(e, "code", None) - msg = getattr(e, "msg", str(e)) - if code == 101: - kind = "endpoint_not_exposed" - hint = "This Bugzilla proxy does not expose this endpoint." - elif code == 102: - kind = "access_denied" - hint = "Your API key cannot access this bug. Skip it." - else: - kind = "bugzilla_error" - hint = None - payload = {"error": kind, "code": code, "message": msg} - if hint: - payload["hint"] = hint - return { - "content": [{"type": "text", "text": json.dumps(payload, indent=2)}], - "is_error": True, - } - - -# --------------------------------------------------------------------------- # -# Server factory -# --------------------------------------------------------------------------- # - - -def build_server(ctx: BugzillaContext): - """Create and return the in-process MCP server bound to ``ctx``. - - All tool functions are closures over ``ctx`` so they share the same - bugsy session (one TCP connection pool, one auth header). - """ - # ----- READ TOOLS -------------------------------------------------- # - - @tool( - "search_bugs", - "Search Bugzilla using raw REST query parameters. Returns matching " - "bugs in one bulk request. Parameters are ANDed together (intersect). " - "IMPORTANT: this proxy drops 'whiteboard' and 'keywords' from _all / " - "_default field sets — list them explicitly in include_fields if you " - "need them. Common params: id, keywords, blocks, depends_on, product, " - "component, status, resolution, priority, severity, assigned_to, " - "whiteboard, include_fields, limit.", - { - "type": "object", - "properties": { - "params": { - "type": "object", - "description": ( - "Bugzilla REST /bug query parameters. Values may be " - "strings, ints, or comma-separated lists. Example: " - '{"blocks": 12345, "keywords": "sec-low", ' - '"include_fields": "id,summary,status,whiteboard,keywords"}' - ), - "additionalProperties": True, - } - }, - "required": ["params"], - }, - ) - async def search_bugs(args): - params = args["params"] - try: - result = ctx.client.request("bug", params=params) - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - bugs = result.get("bugs", []) - return _jtext({"count": len(bugs), "bugs": bugs}) - - @tool( - "get_bugs", - "Fetch one or more bugs by ID in a single bulk request. " - "Inaccessible bugs are silently dropped by the proxy — this tool " - "diffs requested vs returned and reports them under 'inaccessible'. " - "Remember: request 'whiteboard' and 'keywords' explicitly in " - "include_fields if you need them.", - { - "type": "object", - "properties": { - "ids": { - "type": "array", - "items": {"type": "integer"}, - "description": "Bug IDs to fetch.", - }, - "include_fields": { - "type": "string", - "description": ( - "Comma-separated field list, or '_default'/'_all'. " - "Defaults to a sensible triage set." - ), - }, - "include_comments": { - "type": "boolean", - "description": ( - "If true, also bulk-fetch comments (one extra request " - "total, not one per bug)." - ), - }, - }, - "required": ["ids"], - }, - ) - async def get_bugs(args): - ids = args["ids"] - if not ids: - return _jtext({"count": 0, "bugs": [], "inaccessible": []}) - include = args.get("include_fields") or ( - "id,summary,status,resolution,product,component,priority," - "severity,keywords,whiteboard,assigned_to,creator," - "creation_time,last_change_time,blocks,depends_on,see_also," - "cf_crash_signature,url,version,op_sys,platform" - ) - id_csv = ",".join(str(i) for i in ids) - try: - result = ctx.client.request( - "bug", params={"id": id_csv, "include_fields": include} - ) - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - bugs = result.get("bugs", []) - returned = {b["id"] for b in bugs} - inaccessible = [i for i in ids if i not in returned] - - payload = { - "count": len(bugs), - "bugs": bugs, - "inaccessible": inaccessible, - } - - if args.get("include_comments") and bugs: - # Bugzilla lets us fetch comments for many bugs in one call by - # hitting /bug/{first}/comment?ids=rest. One extra round trip - # total regardless of bug count. - first, *rest = [b["id"] for b in bugs] - cparams = {"ids": ",".join(str(i) for i in rest)} if rest else {} - try: - cres = ctx.client.request(f"bug/{first}/comment", params=cparams) - # Response keys bugs by string ID. - comments_by_bug = { - int(bid): data["comments"] - for bid, data in cres.get("bugs", {}).items() - } - for b in bugs: - b["comments"] = comments_by_bug.get(b["id"], []) - except bugsy.BugsyException as e: - payload["comments_error"] = { - "code": getattr(e, "code", None), - "message": getattr(e, "msg", str(e)), - } - - return _jtext(payload) - - @tool( - "get_bug_comments", - "Fetch all comments for a single bug.", - {"bug_id": int}, - ) - async def get_bug_comments(args): - bug_id = args["bug_id"] - try: - result = ctx.client.request(f"bug/{bug_id}/comment") - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - comments = result.get("bugs", {}).get(str(bug_id), {}).get("comments", []) - return _jtext({"bug_id": bug_id, "count": len(comments), "comments": comments}) - - @tool( - "get_bug_attachments", - "Fetch attachments for a bug. By default returns metadata only " - "(cheap, safe for large binaries). Set include_data=true to also " - "download the content — Bugzilla returns it base64-encoded in the " - "'data' field of each attachment.", - { - "type": "object", - "properties": { - "bug_id": {"type": "integer"}, - "include_data": { - "type": "boolean", - "description": ( - "If true, include base64-encoded attachment content. " - "Default false. Use sparingly — attachments can be large." - ), - }, - }, - "required": ["bug_id"], - }, - ) - async def get_bug_attachments(args): - bug_id = args["bug_id"] - params = {} if args.get("include_data") else {"exclude_fields": "data"} - try: - result = ctx.client.request(f"bug/{bug_id}/attachment", params=params) - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - atts = result.get("bugs", {}).get(str(bug_id), []) - return _jtext({"bug_id": bug_id, "count": len(atts), "attachments": atts}) - - @tool( - "download_attachment", - "Fetch a single Bugzilla attachment by ID and write its decoded " - "content to a local file. This is the inverse of add_attachment: " - "it handles the base64 decode server-side so the agent never has " - "to round-trip the blob through its own context. Use " - "get_bug_attachments first to discover attachment IDs. Returns " - "the written path, size, and content_type.", - { - "type": "object", - "properties": { - "attachment_id": {"type": "integer"}, - "dest_path": { - "type": "string", - "description": "Local filesystem path to write the " - "decoded attachment to. Parent directory " - "must already exist. Overwrites if present.", - }, - }, - "required": ["attachment_id", "dest_path"], - }, - ) - async def download_attachment(args): - attachment_id = args["attachment_id"] - dest_path = args["dest_path"] - try: - result = ctx.client.request(f"bug/attachment/{attachment_id}") - except bugsy.BugsyException as e: - return _handle_bugsy_error(e) - - att = result.get("attachments", {}).get(str(attachment_id)) - if att is None: - return { - "content": [ - { - "type": "text", - "text": json.dumps( - { - "error": "attachment_not_found", - "attachment_id": attachment_id, - } - ), - } - ], - "is_error": True, - } - - raw = base64.b64decode(att["data"]) - with open(dest_path, "wb") as fp: - fp.write(raw) - - return _jtext( - { - "attachment_id": attachment_id, - "dest_path": dest_path, - "size_bytes": len(raw), - "file_name": att.get("file_name"), - "content_type": att.get("content_type"), - } - ) - - return create_sdk_mcp_server( - name="bugzilla", - version="0.1.0", - tools=[ - search_bugs, - get_bugs, - get_bug_comments, - get_bug_attachments, - download_attachment, - ], - ) diff --git a/libs/hackbot-runtime/pyproject.toml b/libs/hackbot-runtime/pyproject.toml index ab0c2e2ad2..e34f2ca1df 100644 --- a/libs/hackbot-runtime/pyproject.toml +++ b/libs/hackbot-runtime/pyproject.toml @@ -6,15 +6,14 @@ requires-python = ">=3.12" dependencies = [ "requests>=2.32.0", "pydantic-settings>=2.1.0", + "agent-tools", ] [project.optional-dependencies] -# claude-agent-sdk building blocks: the action-registry MCP adapter -# (hackbot_runtime.actions.claude_sdk), the streamed-message Reporter -# (hackbot_runtime.claude), and the read-only Bugzilla MCP server -# (hackbot_runtime.mcp.bugzilla, which also needs bugsy). Not needed by -# consumers that only read the summary contract. -claude-sdk = ["claude-agent-sdk>=0.1.30", "bugsy"] +claude-sdk = ["claude-agent-sdk>=0.1.30", "agent-tools[claude-sdk]"] + +[tool.uv.sources] +agent-tools = { workspace = true } [tool.pytest.ini_options] asyncio_mode = "auto" diff --git a/libs/hackbot-runtime/tests/test_bugzilla_actions.py b/libs/hackbot-runtime/tests/test_bugzilla_actions.py index 7b0cc2b289..ab27e3447c 100644 --- a/libs/hackbot-runtime/tests/test_bugzilla_actions.py +++ b/libs/hackbot-runtime/tests/test_bugzilla_actions.py @@ -1,7 +1,8 @@ """Tests for the bugzilla action handlers (footers, mime, merge, errors).""" import pytest -from hackbot_runtime.actions import ActionInputError, ActionsRecorder, bugzilla +from agent_tools.registry import ToolError +from hackbot_runtime.actions import ActionsRecorder, bugzilla async def test_add_comment_appends_footer(): @@ -37,7 +38,7 @@ async def test_add_attachment_guesses_mime(tmp_path): async def test_add_attachment_missing_file_raises(): rec = ActionsRecorder() - with pytest.raises(ActionInputError): + with pytest.raises(ToolError): await bugzilla.add_attachment( rec, bug_id=1, file_path="/no/such.patch", reasoning="r" ) diff --git a/libs/hackbot-runtime/tests/test_claude_sdk.py b/libs/hackbot-runtime/tests/test_claude_sdk.py index 0c30aa336f..7f665ca436 100644 --- a/libs/hackbot-runtime/tests/test_claude_sdk.py +++ b/libs/hackbot-runtime/tests/test_claude_sdk.py @@ -1,11 +1,8 @@ -"""Tests for the claude-agent-sdk actions adapter (guards issue #1).""" +"""Tests for the actions MCP server (built via agent-tools' adapter).""" import mcp.server.lowlevel.server as low from hackbot_runtime.actions import ActionsRecorder -from hackbot_runtime.actions.claude_sdk import ( - actions_server_for, - build_actions_sdk_server, -) +from hackbot_runtime.actions.claude_sdk import actions_server_for from mcp.types import CallToolRequest, CallToolRequestParams, ListToolsRequest _ALL = [ @@ -17,7 +14,7 @@ def _server(recorder): - config = build_actions_sdk_server(recorder, types=_ALL) + _, config = actions_server_for(recorder, types=_ALL) assert config["type"] == "sdk" return config["instance"] diff --git a/libs/hackbot-runtime/tests/test_registry.py b/libs/hackbot-runtime/tests/test_registry.py deleted file mode 100644 index bc7d055324..0000000000 --- a/libs/hackbot-runtime/tests/test_registry.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Tests for the action registry and schema derivation.""" - -from hackbot_runtime.actions import ActionInputError, get_actions -from hackbot_runtime.actions.registry import ActionDefinition - -_BUGZILLA_TYPES = { - "bugzilla.update_bug", - "bugzilla.add_comment", - "bugzilla.add_attachment", - "bugzilla.create_bug", -} - - -def test_get_actions_returns_all(): - assert {a.type for a in get_actions()} == _BUGZILLA_TYPES - - -def test_get_actions_filtered(): - got = get_actions(["bugzilla.update_bug", "bugzilla.add_comment"]) - assert {a.type for a in got} == {"bugzilla.update_bug", "bugzilla.add_comment"} - - -def test_action_input_error_is_exception(): - assert issubclass(ActionInputError, Exception) - - -def test_input_schema_excludes_recorder_and_keeps_descriptions(): - update = next(a for a in get_actions() if a.type == "bugzilla.update_bug") - schema = update.input_schema - props = schema["properties"] - assert "recorder" not in props - assert set(props) == {"bug_id", "changes", "reasoning"} - assert set(schema["required"]) == {"bug_id", "changes", "reasoning"} - assert props["bug_id"]["description"] - - -def test_input_schema_marks_optional_params(): - comment = next(a for a in get_actions() if a.type == "bugzilla.add_comment") - # is_private has a default -> not required. - assert "is_private" not in comment.input_schema.get("required", []) - - -def test_input_schema_is_cached(): - defn = ActionDefinition( - type="x.y", description="d", handler=get_actions()[0].handler - ) - assert defn.input_schema is defn.input_schema diff --git a/pyproject.toml b/pyproject.toml index 94fdc705e6..71749b30c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,12 +71,12 @@ nlp = [ "spacy==3.8.14", ] nn = [] -# hackbot runtime for bugbug tools that use it (e.g. tools/duplicate_bugs, -# which builds the Bugzilla MCP via hackbot_runtime.mcp.bugzilla). Not a base -# dependency: hackbot-runtime is a workspace-only package, so a standalone +# Reusable MCP tool servers for bugbug tools that use them (e.g. +# tools/duplicate_bugs, which builds the Bugzilla MCP via agent_tools.bugzilla). +# Not a base dependency: agent-tools is a workspace-only package, so a standalone # `pip install bugbug` must not require it. bug-fix = [ - "hackbot-runtime[claude-sdk]", + "agent-tools[bugzilla]", ] [dependency-groups] @@ -137,10 +137,11 @@ artifacts = [ ] [tool.uv.workspace] -members = ["http_service", "services/hackbot-api", "agents/bug-fix", "libs/hackbot-runtime"] +members = ["http_service", "services/hackbot-api", "agents/bug-fix", "libs/hackbot-runtime", "libs/agent-tools"] [tool.uv.sources] hackbot-runtime = { workspace = true } +agent-tools = { workspace = true } [tool.ruff] extend-exclude = ["data"] diff --git a/services/hackbot-api/Dockerfile b/services/hackbot-api/Dockerfile index f5a13479c7..2c8001b9a5 100644 --- a/services/hackbot-api/Dockerfile +++ b/services/hackbot-api/Dockerfile @@ -11,6 +11,7 @@ COPY http_service/pyproject.toml ./http_service/ COPY services/hackbot-api/pyproject.toml ./services/hackbot-api/ COPY agents/bug-fix/pyproject.toml ./agents/bug-fix/ COPY libs/hackbot-runtime/pyproject.toml ./libs/hackbot-runtime/ +COPY libs/agent-tools/pyproject.toml ./libs/agent-tools/ # Install external deps without building workspace members. RUN --mount=type=cache,target=/root/.cache/uv \ diff --git a/uv.lock b/uv.lock index 9aebc9b008..2c79e5257d 100644 --- a/uv.lock +++ b/uv.lock @@ -18,6 +18,7 @@ resolution-markers = [ [manifest] members = [ + "agent-tools", "bugbug", "bugbug-http-service", "hackbot-agent-bug-fix", @@ -46,6 +47,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/a6/907a406bb7d359e6a63f99c313846d9eec4f7e6f7437809e03aa00fa3074/absl_py-2.4.0-py3-none-any.whl", hash = "sha256:88476fd881ca8aab94ffa78b7b6c632a782ab3ba1cd19c9bd423abc4fb4cd28d", size = 135750, upload-time = "2026-01-28T10:17:04.19Z" }, ] +[[package]] +name = "agent-tools" +version = "0.1.0" +source = { editable = "libs/agent-tools" } +dependencies = [ + { name = "pydantic" }, +] + +[package.optional-dependencies] +bugzilla = [ + { name = "bugsy" }, +] +claude-sdk = [ + { name = "claude-agent-sdk" }, +] +firefox = [ + { name = "grizzly-framework" }, + { name = "prefpicker" }, +] + +[package.metadata] +requires-dist = [ + { name = "bugsy", marker = "extra == 'bugzilla'" }, + { name = "claude-agent-sdk", marker = "extra == 'claude-sdk'", specifier = ">=0.1.30" }, + { name = "grizzly-framework", marker = "extra == 'firefox'" }, + { name = "prefpicker", marker = "extra == 'firefox'" }, + { name = "pydantic", specifier = ">=2.6.0" }, +] +provides-extras = ["bugzilla", "firefox", "claude-sdk"] + [[package]] name = "aiofiles" version = "0.8.0" @@ -562,7 +593,7 @@ dependencies = [ [package.optional-dependencies] bug-fix = [ - { name = "hackbot-runtime", extra = ["claude-sdk"] }, + { name = "agent-tools", extra = ["bugzilla"] }, ] nlp = [ { name = "spacy" }, @@ -590,12 +621,12 @@ test = [ [package.metadata] requires-dist = [ + { name = "agent-tools", extras = ["bugzilla"], marker = "extra == 'bug-fix'", editable = "libs/agent-tools" }, { name = "amqp", specifier = "~=5.3.1" }, { name = "async-lru", specifier = "~=2.3.0" }, { name = "beautifulsoup4", specifier = "~=4.14.3" }, { name = "boto3", specifier = ">=1.42.78,<1.44.0" }, { name = "claude-agent-sdk", specifier = ">=0.1.30" }, - { name = "hackbot-runtime", extras = ["claude-sdk"], marker = "extra == 'bug-fix'", editable = "libs/hackbot-runtime" }, { name = "httpx", specifier = "~=0.28.1" }, { name = "imbalanced-learn", specifier = "~=0.14.1" }, { name = "langchain", specifier = "~=1.2.13" }, @@ -2109,24 +2140,22 @@ name = "hackbot-agent-bug-fix" version = "0.1.0" source = { virtual = "agents/bug-fix" } dependencies = [ + { name = "agent-tools", extra = ["bugzilla", "firefox"] }, { name = "bugsy" }, { name = "claude-agent-sdk" }, - { name = "grizzly-framework" }, { name = "hackbot-runtime", extra = ["claude-sdk"] }, { name = "mcp" }, - { name = "prefpicker" }, { name = "starlette" }, { name = "uvicorn" }, ] [package.metadata] requires-dist = [ + { name = "agent-tools", extras = ["bugzilla", "firefox"], editable = "libs/agent-tools" }, { name = "bugsy" }, { name = "claude-agent-sdk", specifier = ">=0.1.30" }, - { name = "grizzly-framework" }, { name = "hackbot-runtime", extras = ["claude-sdk"], editable = "libs/hackbot-runtime" }, { name = "mcp", specifier = ">=1.0.0" }, - { name = "prefpicker" }, { name = "starlette", specifier = ">=0.36.0" }, { name = "uvicorn", specifier = ">=0.27.0" }, ] @@ -2180,19 +2209,21 @@ name = "hackbot-runtime" version = "0.1.0" source = { editable = "libs/hackbot-runtime" } dependencies = [ + { name = "agent-tools" }, { name = "pydantic-settings" }, { name = "requests" }, ] [package.optional-dependencies] claude-sdk = [ - { name = "bugsy" }, + { name = "agent-tools", extra = ["claude-sdk"] }, { name = "claude-agent-sdk" }, ] [package.metadata] requires-dist = [ - { name = "bugsy", marker = "extra == 'claude-sdk'" }, + { name = "agent-tools", editable = "libs/agent-tools" }, + { name = "agent-tools", extras = ["claude-sdk"], marker = "extra == 'claude-sdk'", editable = "libs/agent-tools" }, { name = "claude-agent-sdk", marker = "extra == 'claude-sdk'", specifier = ">=0.1.30" }, { name = "pydantic-settings", specifier = ">=2.1.0" }, { name = "requests", specifier = ">=2.32.0" }, From 101c290ca5fc3ce586ff375655c71461a9ee413e Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Tue, 9 Jun 2026 18:49:02 -0400 Subject: [PATCH 04/21] Drop the duplicate_bugs tool Remove bugbug/tools/duplicate_bugs and the root pyproject bits it was the sole user of: the bug-fix optional extra (agent-tools[bugzilla]) and the duplicate_bugs/prompts wheel artifact. bugbug no longer depends on agent-tools. --- bugbug/tools/duplicate_bugs/__init__.py | 3 - bugbug/tools/duplicate_bugs/agent.py | 532 ------------------ bugbug/tools/duplicate_bugs/config.py | 53 -- .../prompts/dupdetector_bugs.md | 38 -- .../prompts/dupdetector_local.md | 49 -- .../prompts/dupdetector_local_to_local.md | 37 -- pyproject.toml | 10 - uv.lock | 6 +- 8 files changed, 1 insertion(+), 727 deletions(-) delete mode 100644 bugbug/tools/duplicate_bugs/__init__.py delete mode 100644 bugbug/tools/duplicate_bugs/agent.py delete mode 100644 bugbug/tools/duplicate_bugs/config.py delete mode 100644 bugbug/tools/duplicate_bugs/prompts/dupdetector_bugs.md delete mode 100644 bugbug/tools/duplicate_bugs/prompts/dupdetector_local.md delete mode 100644 bugbug/tools/duplicate_bugs/prompts/dupdetector_local_to_local.md diff --git a/bugbug/tools/duplicate_bugs/__init__.py b/bugbug/tools/duplicate_bugs/__init__.py deleted file mode 100644 index 3d38945862..0000000000 --- a/bugbug/tools/duplicate_bugs/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from bugbug.tools.duplicate_bugs.agent import DuplicateBugsTool - -__all__ = ["DuplicateBugsTool"] diff --git a/bugbug/tools/duplicate_bugs/agent.py b/bugbug/tools/duplicate_bugs/agent.py deleted file mode 100644 index 35fd3015d0..0000000000 --- a/bugbug/tools/duplicate_bugs/agent.py +++ /dev/null @@ -1,532 +0,0 @@ -r"""Duplicate bug detector -- find duplicate bugs, three ways. - -mode="local" One crash per sub-directory. For each, decide - whether it is already filed as a blocker of - meta_bug on Bugzilla. - -mode="bugs" Already-filed bugs. For each, decide whether some - *other* blocker of meta_bug covers the same crash. - -mode="local_to_local" One crash per sub-directory, but the directory - still contains internal duplicates. Groups the - sub-directories by crash and copies one - representative per group into results_dir. -""" - -from __future__ import annotations - -import json -import shutil -import sys -from dataclasses import dataclass, field -from pathlib import Path - -import bugsy -from agent_tools import bugzilla as bugzilla_tools -from agent_tools.bugzilla import BugzillaContext -from agent_tools.claude_sdk import build_sdk_server -from claude_agent_sdk import ( - AssistantMessage, - ClaudeAgentOptions, - ClaudeSDKClient, - ResultMessage, - SystemMessage, - TextBlock, - ThinkingBlock, - ToolResultBlock, - ToolUseBlock, - UserMessage, -) - -from bugbug.tools.base import GenerativeModelTool -from bugbug.tools.duplicate_bugs.config import ( - BUGZILLA_READ_TOOLS, - parse_dir_verdict, - parse_verdict, -) - -HERE = Path(__file__).resolve().parent - - -# --------------------------------------------------------------------------- # -# Result type -# --------------------------------------------------------------------------- # - - -@dataclass -class DuplicateResult: - exit_code: int = 0 - results: list[tuple[str, str]] = field(default_factory=list) - - -# --------------------------------------------------------------------------- # -# Transcript streaming -# --------------------------------------------------------------------------- # - - -def _truncate(s: str, n: int = 400) -> str: - return s if len(s) <= n else s[:n] + f"... [{len(s) - n} more chars]" - - -class Reporter: - def __init__(self, verbose: bool, log_path: Path | None): - self.verbose = verbose - self._log = log_path.open("w", encoding="utf-8") if log_path else None - - def __enter__(self): - return self - - def __exit__(self, *exc): - if self._log: - self._log.close() - - def start_item(self, label: str) -> None: - header = f"\n{'#' * 60}\n# {label}\n{'#' * 60}" - self._emit(header, always=True) - - def _emit(self, line: str, *, always: bool = False, full: str | None = None): - if self._log: - self._log.write((full if full is not None else line) + "\n") - self._log.flush() - if always or self.verbose: - print(line, file=sys.stderr) - - def message(self, msg) -> None: - if isinstance(msg, AssistantMessage): - for block in msg.content: - if isinstance(block, TextBlock): - self._emit(f"[agent] {block.text}", always=True) - elif isinstance(block, ThinkingBlock): - thinking = block.thinking.strip() - self._emit( - f"[thinking] {_truncate(thinking.split(chr(10), 1)[0], 120)}", - full=f"[thinking]\n{thinking}", - ) - elif isinstance(block, ToolUseBlock): - inp = json.dumps(block.input, default=str) - self._emit( - f"[→tool] {block.name}({_truncate(inp, 200)})", - full=f"[→tool] {block.name}\n" - f"{json.dumps(block.input, indent=2, default=str)}", - ) - elif isinstance(msg, UserMessage) and isinstance(msg.content, list): - for block in msg.content: - if isinstance(block, ToolResultBlock): - marker = "ERR" if block.is_error else "ok" - if isinstance(block.content, str): - text = block.content - elif isinstance(block.content, list): - text = "\n".join( - c.get("text", "") - for c in block.content - if isinstance(c, dict) and c.get("type") == "text" - ) - else: - text = str(block.content) - self._emit( - f" [tool←{marker}] {_truncate(text, 300)}", - full=f" [tool←{marker}]\n{text}", - ) - elif isinstance(msg, SystemMessage): - if msg.subtype == "init": - self._emit( - f"[system] session started (model={msg.data.get('model', '?')})" - ) - elif isinstance(msg, ResultMessage): - cost = f" cost=${msg.total_cost_usd:.4f}" if msg.total_cost_usd else "" - self._emit(f"[done] turns={msg.num_turns}{cost}") - if msg.is_error: - self._emit(f"[done] ERROR: {msg.result}", always=True) - - -# --------------------------------------------------------------------------- # -# Agent sessions -# --------------------------------------------------------------------------- # - - -async def _run_session( - options: ClaudeAgentOptions, - prompt: str, - reporter: Reporter, -) -> str: - """Run one agent session to completion and extract its verdict.""" - final_text = "" - errored = False - async with ClaudeSDKClient(options=options) as client: - await client.query(prompt) - async for msg in client.receive_response(): - reporter.message(msg) - if isinstance(msg, AssistantMessage): - for block in msg.content: - if isinstance(block, TextBlock): - final_text = block.text - elif isinstance(msg, ResultMessage): - errored = msg.is_error - - if errored: - return "ERROR" - return parse_verdict(final_text) or "UNKNOWN" - - -async def match_local_crash( - crash_path: Path, - meta_bug: int, - base_options: ClaudeAgentOptions, - reporter: Reporter, -) -> str: - opts = ClaudeAgentOptions(**{**base_options.__dict__, "cwd": str(crash_path)}) - - contents = sorted( - p.name + ("/" if p.is_dir() else "") for p in crash_path.iterdir() - ) - prompt = ( - f"Crash directory: {crash_path}\n" - f"Meta bug: {meta_bug}\n" - f"Directory contents: {', '.join(contents) or '(empty)'}\n\n" - f"Determine whether this crash is already filed as a blocker of " - f"bug {meta_bug}. End your final response with the VERDICT: line." - ) - return await _run_session(opts, prompt, reporter) - - -async def match_local_to_local( - subject: Path, - candidates: list[str], - base_options: ClaudeAgentOptions, - reporter: Reporter, -) -> str: - contents = sorted(p.name + ("/" if p.is_dir() else "") for p in subject.iterdir()) - cand_lines = "\n".join(f" - {c}" for c in candidates) - prompt = ( - f"Subject directory: {subject.name}\n" - f"Subject contents: {', '.join(contents) or '(empty)'}\n\n" - f"Candidate directories ({len(candidates)}):\n{cand_lines}\n\n" - f"Determine whether the subject crash matches any candidate. " - f"End your final response with the VERDICT: line — either NEW " - f"or exactly one of the candidate names above." - ) - - final_text = "" - errored = False - async with ClaudeSDKClient(options=base_options) as client: - await client.query(prompt) - async for msg in client.receive_response(): - reporter.message(msg) - if isinstance(msg, AssistantMessage): - for block in msg.content: - if isinstance(block, TextBlock): - final_text = block.text - elif isinstance(msg, ResultMessage): - errored = msg.is_error - - if errored: - return "ERROR" - return parse_dir_verdict(final_text, set(candidates)) or "UNKNOWN" - - -async def match_filed_bug( - subject: int, - meta_bug: int, - options: ClaudeAgentOptions, - reporter: Reporter, -) -> str: - prompt = ( - f"Subject bug: {subject}\n" - f"Meta bug: {meta_bug}\n\n" - f"Determine whether bug {subject} has a duplicate among the " - f"blockers of bug {meta_bug}. End your final response with the " - f"VERDICT: line." - ) - return await _run_session(options, prompt, reporter) - - -# --------------------------------------------------------------------------- # -# Mode runners -# --------------------------------------------------------------------------- # - - -def _build_options( - system_prompt: str, - bugzilla_server, - *, - allow_local_fs: bool, - model: str | None = None, - max_turns: int | None = None, -) -> ClaudeAgentOptions: - tools = list(BUGZILLA_READ_TOOLS) - if allow_local_fs: - tools = ["Read", "Glob", "Grep", *tools] - return ClaudeAgentOptions( - system_prompt=system_prompt, - mcp_servers={"bugzilla": bugzilla_server}, - permission_mode="bypassPermissions", - allowed_tools=tools, - model=model, - max_turns=max_turns, - setting_sources=[], - ) - - -async def _run_local( - *, - local_dir: Path, - meta_bug: int, - bugzilla_server, - model: str | None, - max_turns: int | None, - verbose: bool, - log: Path | None, -) -> DuplicateResult: - system_prompt = ( - (HERE / "prompts" / "dupdetector_local.md") - .read_text() - .format(meta_bug=meta_bug) - ) - base_options = _build_options( - system_prompt, - bugzilla_server, - allow_local_fs=True, - model=model, - max_turns=max_turns, - ) - - crash_subdirs = sorted(d for d in local_dir.iterdir() if d.is_dir()) - if not crash_subdirs: - print( - f"[duplicate_bugs] no sub-directories found in {local_dir}", file=sys.stderr - ) - return DuplicateResult() - - print( - f"[duplicate_bugs] matching {len(crash_subdirs)} crash(es) against " - f"meta bug {meta_bug}", - file=sys.stderr, - ) - - results: list[tuple[str, str]] = [] - exit_code = 0 - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, subdir in enumerate(crash_subdirs, 1): - print( - f"[duplicate_bugs] {i}/{len(crash_subdirs)}: {subdir.name}", - file=sys.stderr, - ) - reporter.start_item(f"crash: {subdir.name}") - verdict = await match_local_crash(subdir, meta_bug, base_options, reporter) - results.append((subdir.name, verdict)) - if verdict in ("ERROR", "UNKNOWN"): - exit_code = 1 - - return DuplicateResult(exit_code=exit_code, results=results) - - -async def _run_bugs( - *, - bug_ids: list[int], - meta_bug: int, - bugzilla_server, - model: str | None, - max_turns: int | None, - verbose: bool, - log: Path | None, -) -> DuplicateResult: - print( - f"[duplicate_bugs] checking {len(bug_ids)} bug(s) against blockers " - f"of meta bug {meta_bug}", - file=sys.stderr, - ) - - prompt_tmpl = (HERE / "prompts" / "dupdetector_bugs.md").read_text() - - results: list[tuple[str, str]] = [] - exit_code = 0 - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, subject in enumerate(bug_ids, 1): - print( - f"[duplicate_bugs] {i}/{len(bug_ids)}: bug {subject}", file=sys.stderr - ) - reporter.start_item(f"bug {subject}") - - system_prompt = prompt_tmpl.format(subject=subject, meta_bug=meta_bug) - options = _build_options( - system_prompt, - bugzilla_server, - allow_local_fs=False, - model=model, - max_turns=max_turns, - ) - - verdict = await match_filed_bug(subject, meta_bug, options, reporter) - if verdict.isdigit() and int(verdict) == subject: - reporter._emit( - f"[duplicate_bugs] bug {subject}: verdict was itself — " - f"demoting to NEW", - always=True, - ) - verdict = "NEW" - results.append((str(subject), verdict)) - if verdict in ("ERROR", "UNKNOWN"): - exit_code = 1 - - return DuplicateResult(exit_code=exit_code, results=results) - - -async def _run_local_to_local( - *, - local_dir: Path, - results_dir: Path, - model: str | None, - max_turns: int | None, - verbose: bool, - log: Path | None, -) -> DuplicateResult: - crash_subdirs = sorted(d for d in local_dir.iterdir() if d.is_dir()) - if not crash_subdirs: - print( - f"[duplicate_bugs] no sub-directories found in {local_dir}", file=sys.stderr - ) - return DuplicateResult() - - print( - f"[duplicate_bugs] deduplicating {len(crash_subdirs)} crash(es) locally", - file=sys.stderr, - ) - - system_prompt = (HERE / "prompts" / "dupdetector_local_to_local.md").read_text() - options = ClaudeAgentOptions( - system_prompt=system_prompt, - permission_mode="bypassPermissions", - allowed_tools=["Read", "Glob", "Grep"], - model=model, - max_turns=max_turns, - setting_sources=[], - cwd=str(local_dir), - ) - - groups: dict[str, list[str]] = {} - results: list[tuple[str, str]] = [] - exit_code = 0 - - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, subdir in enumerate(crash_subdirs, 1): - print( - f"[duplicate_bugs] {i}/{len(crash_subdirs)}: {subdir.name}", - file=sys.stderr, - ) - - representatives = list(groups.keys()) - if not representatives: - verdict = "NEW" - else: - reporter.start_item(f"crash: {subdir.name}") - verdict = await match_local_to_local( - subdir, representatives, options, reporter - ) - if verdict == subdir.name: - verdict = "NEW" - - if verdict == "NEW": - groups[subdir.name] = [subdir.name] - rep = subdir.name - elif verdict in ("ERROR", "UNKNOWN"): - groups[subdir.name] = [subdir.name] - rep = verdict - exit_code = 1 - else: - groups[verdict].append(subdir.name) - rep = verdict - - results.append((subdir.name, rep)) - - if results_dir is not None: - results_dir.mkdir(parents=True) - for rep_name in groups: - shutil.copytree(local_dir / rep_name, results_dir / rep_name) - - print( - f"[duplicate_bugs] {len(groups)} unique crash(es) copied to {results_dir}", - file=sys.stderr, - ) - - return DuplicateResult(exit_code=exit_code, results=results) - - -# --------------------------------------------------------------------------- # -# Tool class -# --------------------------------------------------------------------------- # - - -class DuplicateBugsTool(GenerativeModelTool): - """Duplicate bug detector using claude-agent-sdk.""" - - @classmethod - def create(cls, **kwargs): - return cls() - - async def run( - self, - *, - mode: str, - base_url: str | None = None, - api_key: str | None = None, - meta_bug: int | None = None, - bug_ids: list[int] | None = None, - local_dir: Path | None = None, - results_dir: Path | None = None, - model: str | None = None, - max_turns: int | None = None, - verbose: bool = False, - log: Path | None = None, - ) -> DuplicateResult: - if mode == "local_to_local": - if local_dir is None: - raise ValueError("local_dir is required for local_to_local mode") - if results_dir is None: - raise ValueError("results_dir is required for local_to_local mode") - return await _run_local_to_local( - local_dir=local_dir, - results_dir=results_dir, - model=model, - max_turns=max_turns, - verbose=verbose, - log=log, - ) - - # Modes that need Bugzilla - if not base_url or not api_key: - raise ValueError("base_url and api_key are required for local/bugs modes") - if meta_bug is None: - raise ValueError("meta_bug is required for local/bugs modes") - - bz = bugsy.Bugsy(api_key=api_key, bugzilla_url=base_url) - bz_ctx = BugzillaContext(client=bz) - bugzilla_server = build_sdk_server("bugzilla", bz_ctx, bugzilla_tools.TOOLS) - - if mode == "local": - if local_dir is None: - raise ValueError("local_dir is required for local mode") - return await _run_local( - local_dir=local_dir, - meta_bug=meta_bug, - bugzilla_server=bugzilla_server, - model=model, - max_turns=max_turns, - verbose=verbose, - log=log, - ) - elif mode == "bugs": - if not bug_ids: - raise ValueError("bug_ids is required for bugs mode") - return await _run_bugs( - bug_ids=bug_ids, - meta_bug=meta_bug, - bugzilla_server=bugzilla_server, - model=model, - max_turns=max_turns, - verbose=verbose, - log=log, - ) - else: - raise ValueError( - f"Unknown mode: {mode}. Must be 'local', 'bugs', or 'local_to_local'" - ) diff --git a/bugbug/tools/duplicate_bugs/config.py b/bugbug/tools/duplicate_bugs/config.py deleted file mode 100644 index 1ffe6d8f55..0000000000 --- a/bugbug/tools/duplicate_bugs/config.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import annotations - -import re -from pathlib import Path - -import yaml - -# Read-only Bugzilla surface. -BUGZILLA_READ_TOOLS = [ - "mcp__bugzilla__search_bugs", - "mcp__bugzilla__get_bugs", - "mcp__bugzilla__get_bug_comments", - "mcp__bugzilla__get_bug_attachments", -] - -# The VERDICT: line the agent is told to emit. -_VERDICT_RE = re.compile( - r"^VERDICT:\s*" - r"(?:bug\s*)?" - r"(?:https?://\S+?id=)?" - r"(NEW|\d+)\b", - re.IGNORECASE | re.MULTILINE, -) - -# --local-to-local verdicts name a directory, not a bug ID. -_VERDICT_LINE_RE = re.compile(r"^VERDICT:\s*(.+?)\s*$", re.MULTILINE) - -_CONFIG_KEYS = {"base_url", "model", "max_turns"} - - -def load_config(path: Path) -> dict: - with path.open() as f: - data = yaml.safe_load(f) or {} - return {k: v for k, v in data.items() if k in _CONFIG_KEYS} - - -def parse_verdict(text: str) -> str | None: - matches = _VERDICT_RE.findall(text) - if not matches: - return None - v = matches[-1].upper() - return "NEW" if v == "NEW" else v - - -def parse_dir_verdict(text: str, candidates: set[str]) -> str | None: - matches = _VERDICT_LINE_RE.findall(text) - if not matches: - return None - v = matches[-1] - if v.upper() == "NEW": - return "NEW" - v = v.rstrip("/") - return v if v in candidates else None diff --git a/bugbug/tools/duplicate_bugs/prompts/dupdetector_bugs.md b/bugbug/tools/duplicate_bugs/prompts/dupdetector_bugs.md deleted file mode 100644 index e984191603..0000000000 --- a/bugbug/tools/duplicate_bugs/prompts/dupdetector_bugs.md +++ /dev/null @@ -1,38 +0,0 @@ -You are a duplicate detector. Your sole job: decide whether **bug {subject}** is already covered by some _other_ bug blocking meta bug **{meta_bug}**. - -# Inputs - -- **Subject bug**: {subject} — the one you are evaluating. It may or may not already block {meta_bug}; doesn't matter. -- **Meta bug**: {meta_bug} — the search scope. Only its blockers are valid matches. - -# Approach - -1. **Read the subject.** `get_bugs` with `ids=[{subject}]`, `include_comments=true`, and `include_fields=id,summary,status,resolution,cf_crash_signature,product,component`. Extract the discriminating signal from summary / comment 0 / `cf_crash_signature`: the top stack frame, the assertion text, a fuzzer hash. Pick the fragment that would have to appear in a true duplicate. - -2. **Search the blockers.** `search_bugs` with `blocks={meta_bug}` plus your best term. Request `include_fields=id,summary,status,resolution,cf_crash_signature`. If {subject} itself shows up, that's just the subject blocking the meta — ignore it, you're looking for _different_ bugs. - -3. **Widen if empty.** Drop the term constraint, try your second-best signal, still scoped to `blocks={meta_bug}`. Stop after ~3 attempts. - -4. **Verify.** Pull comment 0 on your best candidate. Same component + same rough area is not enough — a match needs the _same_ crash: same assertion, same top frames, or same `cf_crash_signature`. Different crashing function in the same file → not a match. - -# Edge cases - -- {subject} is `RESOLVED DUPLICATE` → if the dupe target blocks {meta_bug}, report the target; otherwise keep searching normally. -- Two candidates both match → pick the older (lower ID). -- {subject} inaccessible → report `VERDICT: NEW` with a note that you couldn't read it. - -# Output - -Your **final message** must end with exactly one line: - -``` -VERDICT: -``` - -or - -``` -VERDICT: NEW -``` - -One or two sentences of justification above the line. Keep it tight. diff --git a/bugbug/tools/duplicate_bugs/prompts/dupdetector_local.md b/bugbug/tools/duplicate_bugs/prompts/dupdetector_local.md deleted file mode 100644 index 7f5dd736f8..0000000000 --- a/bugbug/tools/duplicate_bugs/prompts/dupdetector_local.md +++ /dev/null @@ -1,49 +0,0 @@ -You are a crash-to-bug matcher. Your sole job: decide whether the crash in your current working directory already has a bug filed on Bugzilla that blocks meta bug **{meta_bug}**. - -# Your working directory - -Your cwd is a single crash sub-directory. It typically contains things like an ASAN log, a minidump, a testcase, a `crash_info.json`, or similar. Start by reading whatever is there — there is no fixed schema. - -From those files, extract the **distinctive signals** you'll search for: - -- The crash signature / top-of-stack function name -- The assertion or ASAN error message (the short, greppable part — not the full trace) -- Any hash or ID the fuzzer embedded -- Source file + line of the crashing frame - -Pick the one or two fragments most likely to appear verbatim in a bug summary or comment 0. Prefer specific over generic: a mangled symbol beats "heap-buffer-overflow". - -# How to search - -You only have **read-only** Bugzilla tools. No writes, no Firefox tools. - -1. **Get the candidate set once.** Use `search_bugs` with `blocks={meta_bug}` plus your best discriminating term (e.g. `short_desc` / `cf_crash_signature`). Request `include_fields=id,summary,status,resolution,cf_crash_signature`. Don't omit the search term — pulling every blocker of a busy meta bug wastes turns. - -2. **If that's empty, widen**: drop the field constraint and try a quicksearch/content match, or try your second-best term, still scoped to `blocks={meta_bug}`. - -3. **Verify the best candidate.** Summaries lie. Use `get_bug_comments` (or `get_bugs` with `include_comments=true`) on your top one or two hits and check comment 0 for the same stack / assertion / testcase shape you see locally. - -4. **Stop after ~3 search attempts.** Diminishing returns. If you haven't found it by then, it's probably not filed. - -# Deciding - -- **Match**: comment 0 or the crash signature clearly shows the _same_ crash — same assertion or same top frames. A duplicate that was resolved DUPLICATE still counts; report the dupe target if obvious, otherwise the dupe itself. -- **No match**: nothing in the meta bug's dependency tree lines up. - -Same component + same rough area but a _different_ crashing function → **not** a match. - -# Output - -Your **final message** must end with exactly one line in this form (no markdown, no trailing punctuation): - -``` -VERDICT: -``` - -or - -``` -VERDICT: NEW -``` - -Before that line, give one or two sentences of justification so a human skimming the transcript can see why. Keep it short — the orchestrator only parses the `VERDICT:` line. diff --git a/bugbug/tools/duplicate_bugs/prompts/dupdetector_local_to_local.md b/bugbug/tools/duplicate_bugs/prompts/dupdetector_local_to_local.md deleted file mode 100644 index e9b56d7a7b..0000000000 --- a/bugbug/tools/duplicate_bugs/prompts/dupdetector_local_to_local.md +++ /dev/null @@ -1,37 +0,0 @@ -You are a crash deduplicator. Your sole job: decide whether the **subject** crash directory represents the same crash as any of the **candidate** directories. - -# Setup - -Your cwd is the parent directory containing all crash sub-directories. You have Read/Glob/Grep — no Bugzilla, no network, no writes. - -The user message names one **subject** directory and a list of **candidate** directories. All paths are relative to your cwd. - -# Approach - -1. **Read the subject.** Look at whatever is in the subject directory — ASAN log, minidump, `crash_info.json`, testcase. There is no fixed schema. Extract the discriminating signal: top-of-stack function, assertion text, ASAN error line, source file + crashing line. - -2. **Scan the candidates.** For each candidate directory, read the corresponding artifact and compare. You don't have to read every file in every candidate — once you find the stack/assertion in one file, check the same filename in the others. - -3. **Decide.** A match needs the _same_ crash: same assertion string, or same top stack frames, or same fuzzer-assigned signature hash. Same component + same rough area but a _different_ crashing function → **not** a match. Slightly different line numbers on the same function are fine (builds drift). - -# Short-circuit - -If you find a clear match, stop — don't keep reading the remaining candidates. Report the first one that matches. - -If two candidates both match, pick the one listed first. - -# Output - -Your **final message** must end with exactly one line (no markdown, no trailing punctuation): - -``` -VERDICT: -``` - -where `` is _exactly_ one of the candidate names you were given — or: - -``` -VERDICT: NEW -``` - -if none of them match. One or two sentences of justification above the line. Keep it tight. diff --git a/pyproject.toml b/pyproject.toml index 71749b30c7..b33ceef701 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,13 +71,6 @@ nlp = [ "spacy==3.8.14", ] nn = [] -# Reusable MCP tool servers for bugbug tools that use them (e.g. -# tools/duplicate_bugs, which builds the Bugzilla MCP via agent_tools.bugzilla). -# Not a base dependency: agent-tools is a workspace-only package, so a standalone -# `pip install bugbug` must not require it. -bug-fix = [ - "agent-tools[bugzilla]", -] [dependency-groups] test = [ @@ -132,9 +125,6 @@ include = ["/bugbug", "/scripts", "/VERSION"] [tool.hatch.build.targets.wheel] packages = ["bugbug", "scripts"] -artifacts = [ - "bugbug/tools/duplicate_bugs/prompts/", -] [tool.uv.workspace] members = ["http_service", "services/hackbot-api", "agents/bug-fix", "libs/hackbot-runtime", "libs/agent-tools"] diff --git a/uv.lock b/uv.lock index 2c79e5257d..ab441b44db 100644 --- a/uv.lock +++ b/uv.lock @@ -592,9 +592,6 @@ dependencies = [ ] [package.optional-dependencies] -bug-fix = [ - { name = "agent-tools", extra = ["bugzilla"] }, -] nlp = [ { name = "spacy" }, ] @@ -621,7 +618,6 @@ test = [ [package.metadata] requires-dist = [ - { name = "agent-tools", extras = ["bugzilla"], marker = "extra == 'bug-fix'", editable = "libs/agent-tools" }, { name = "amqp", specifier = "~=5.3.1" }, { name = "async-lru", specifier = "~=2.3.0" }, { name = "beautifulsoup4", specifier = "~=4.14.3" }, @@ -673,7 +669,7 @@ requires-dist = [ { name = "xgboost", specifier = "~=3.2.0" }, { name = "zstandard", specifier = "~=0.25.0" }, ] -provides-extras = ["bug-fix", "nlp", "nn"] +provides-extras = ["nlp", "nn"] [package.metadata.requires-dev] spawn-pipeline = [ From 231867627332e5c6abab6b996ee9109e72d0f5f3 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Tue, 9 Jun 2026 22:54:02 -0400 Subject: [PATCH 05/21] Avoid copying per-member pyproject files into build contexts --- agents/bug-fix/Dockerfile | 25 ++++++++++--------------- agents/bug-fix/pyproject.toml | 11 +++++++++++ http_service/Dockerfile | 7 +------ http_service/Dockerfile.bg_worker | 7 +------ infra/dockerfile.base | 7 +------ infra/dockerfile.spawn_pipeline | 7 +------ services/hackbot-api/Dockerfile | 23 ++++++++++------------- services/hackbot-api/pyproject.toml | 7 +++++++ uv.lock | 4 ++-- 9 files changed, 44 insertions(+), 54 deletions(-) diff --git a/agents/bug-fix/Dockerfile b/agents/bug-fix/Dockerfile index 57c829574a..73c6fdd5ce 100644 --- a/agents/bug-fix/Dockerfile +++ b/agents/bug-fix/Dockerfile @@ -4,31 +4,26 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ WORKDIR /app -# Workspace metadata first so the dep-download layer caches independently -# of source changes. +# Workspace root metadata first so the dep-download layer caches independently +# of source changes. Member pyproject.toml files aren't needed here: --frozen +# trusts uv.lock as-is, so no per-member lockfile validation is required. COPY pyproject.toml uv.lock VERSION ./ -COPY http_service/pyproject.toml ./http_service/ -COPY services/hackbot-api/pyproject.toml ./services/hackbot-api/ -COPY agents/bug-fix/pyproject.toml ./agents/bug-fix/ -COPY libs/hackbot-runtime/pyproject.toml ./libs/hackbot-runtime/ -COPY libs/agent-tools/pyproject.toml ./libs/agent-tools/ # Install external deps without building workspace members. RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev --no-install-workspace --package hackbot-agent-bug-fix + uv sync --frozen --no-dev --no-install-workspace --package hackbot-agent-bug-fix -# Workspace members the agent image actually needs (source included). -COPY agents/bug-fix ./agents/bug-fix -COPY libs/hackbot-runtime ./libs/hackbot-runtime -COPY libs/agent-tools ./libs/agent-tools +# Bring in the full workspace and install the package itself non-editable, so +# the build artifact lands in the venv and the final image needs no source tree. +COPY . . RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev --package hackbot-agent-bug-fix + uv sync --locked --no-dev --no-editable --package hackbot-agent-bug-fix FROM python:3.12 AS base -COPY --from=builder /app /app -WORKDIR /app/agents/bug-fix +COPY --from=builder /app/.venv /app/.venv +WORKDIR /app ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 diff --git a/agents/bug-fix/pyproject.toml b/agents/bug-fix/pyproject.toml index 93e4ed8f0d..0c9af581ae 100644 --- a/agents/bug-fix/pyproject.toml +++ b/agents/bug-fix/pyproject.toml @@ -16,3 +16,14 @@ dependencies = [ [tool.uv.sources] hackbot-runtime = { workspace = true } agent-tools = { workspace = true } + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +# The importable packages (`agent_runner`, `broker`) don't match the project +# name, so declare them explicitly. Without this a wheel build (e.g. +# `uv sync --no-editable`) would bundle nothing and `python -m agent_runner` +# would fail in a source-free image. +[tool.hatch.build.targets.wheel] +packages = ["agent_runner", "broker"] diff --git a/http_service/Dockerfile b/http_service/Dockerfile index 5f7f312a1a..66e89e8b3f 100644 --- a/http_service/Dockerfile +++ b/http_service/Dockerfile @@ -6,12 +6,7 @@ FROM mozilla/bugbug-base:$BUGBUG_VERSION RUN --mount=type=bind,source=pyproject.toml,target=/tmp/workspace/pyproject.toml \ --mount=type=bind,source=uv.lock,target=/tmp/workspace/uv.lock \ --mount=type=bind,source=VERSION,target=/tmp/workspace/VERSION \ - --mount=type=bind,source=http_service/pyproject.toml,target=/tmp/workspace/http_service/pyproject.toml \ - --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/workspace/services/hackbot-api/pyproject.toml \ - --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/workspace/agents/bug-fix/pyproject.toml \ - --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/workspace/libs/hackbot-runtime/pyproject.toml \ - --mount=type=bind,source=libs/agent-tools/pyproject.toml,target=/tmp/workspace/libs/agent-tools/pyproject.toml \ - cd /tmp/workspace && uv sync --locked --no-dev --package bugbug-http-service --no-install-project + cd /tmp/workspace && uv sync --frozen --no-dev --package bugbug-http-service --no-install-workspace # Setup http service as package RUN --mount=type=bind,target=/tmp/workspace,rw \ diff --git a/http_service/Dockerfile.bg_worker b/http_service/Dockerfile.bg_worker index ebee605271..8ec753aa7b 100644 --- a/http_service/Dockerfile.bg_worker +++ b/http_service/Dockerfile.bg_worker @@ -6,12 +6,7 @@ FROM mozilla/bugbug-commit-retrieval:$BUGBUG_VERSION RUN --mount=type=bind,source=pyproject.toml,target=/tmp/workspace/pyproject.toml \ --mount=type=bind,source=uv.lock,target=/tmp/workspace/uv.lock \ --mount=type=bind,source=VERSION,target=/tmp/workspace/VERSION \ - --mount=type=bind,source=http_service/pyproject.toml,target=/tmp/workspace/http_service/pyproject.toml \ - --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/workspace/services/hackbot-api/pyproject.toml \ - --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/workspace/agents/bug-fix/pyproject.toml \ - --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/workspace/libs/hackbot-runtime/pyproject.toml \ - --mount=type=bind,source=libs/agent-tools/pyproject.toml,target=/tmp/workspace/libs/agent-tools/pyproject.toml \ - cd /tmp/workspace && uv sync --locked --no-dev --package bugbug-http-service --no-install-project + cd /tmp/workspace && uv sync --frozen --no-dev --package bugbug-http-service --no-install-workspace # Setup http service as package RUN --mount=type=bind,target=/tmp/workspace,rw \ diff --git a/infra/dockerfile.base b/infra/dockerfile.base index 57d4bb5a35..ba182e4ba2 100644 --- a/infra/dockerfile.base +++ b/infra/dockerfile.base @@ -10,14 +10,9 @@ ENV UV_PROJECT_ENVIRONMENT="/opt/venv" RUN --mount=type=bind,source=pyproject.toml,target=/tmp/bugbug/pyproject.toml \ --mount=type=bind,source=uv.lock,target=/tmp/bugbug/uv.lock \ --mount=type=bind,source=VERSION,target=/tmp/bugbug/VERSION \ - --mount=type=bind,source=http_service/pyproject.toml,target=/tmp/bugbug/http_service/pyproject.toml \ - --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/bugbug/services/hackbot-api/pyproject.toml \ - --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/bugbug/agents/bug-fix/pyproject.toml \ - --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/bugbug/libs/hackbot-runtime/pyproject.toml \ - --mount=type=bind,source=libs/agent-tools/pyproject.toml,target=/tmp/bugbug/libs/agent-tools/pyproject.toml \ apt-get update && \ apt-get install -y --no-install-recommends gcc g++ libgomp1 libffi-dev libjemalloc2 zstd patch git && \ - cd /tmp/bugbug && uv sync --locked --package bugbug --no-dev --no-install-project && \ + cd /tmp/bugbug && uv sync --frozen --package bugbug --no-dev --no-install-workspace && \ apt-get purge -y gcc g++ libffi-dev patch git && \ apt-get autoremove -y && \ rm -rf /var/lib/apt/lists/* diff --git a/infra/dockerfile.spawn_pipeline b/infra/dockerfile.spawn_pipeline index cad78c6a37..97c616e641 100644 --- a/infra/dockerfile.spawn_pipeline +++ b/infra/dockerfile.spawn_pipeline @@ -10,12 +10,7 @@ ENV UV_PROJECT_ENVIRONMENT="/opt/venv" RUN --mount=type=bind,source=pyproject.toml,target=/tmp/bugbug/pyproject.toml \ --mount=type=bind,source=uv.lock,target=/tmp/bugbug/uv.lock \ --mount=type=bind,source=VERSION,target=/tmp/bugbug/VERSION \ - --mount=type=bind,source=http_service/pyproject.toml,target=/tmp/bugbug/http_service/pyproject.toml \ - --mount=type=bind,source=services/hackbot-api/pyproject.toml,target=/tmp/bugbug/services/hackbot-api/pyproject.toml \ - --mount=type=bind,source=agents/bug-fix/pyproject.toml,target=/tmp/bugbug/agents/bug-fix/pyproject.toml \ - --mount=type=bind,source=libs/hackbot-runtime/pyproject.toml,target=/tmp/bugbug/libs/hackbot-runtime/pyproject.toml \ - --mount=type=bind,source=libs/agent-tools/pyproject.toml,target=/tmp/bugbug/libs/agent-tools/pyproject.toml \ - cd /tmp/bugbug && uv sync --locked --package bugbug --no-dev --only-group spawn-pipeline --no-install-project + cd /tmp/bugbug && uv sync --frozen --package bugbug --no-dev --only-group spawn-pipeline --no-install-workspace ADD infra/spawn_pipeline.py /code/ diff --git a/services/hackbot-api/Dockerfile b/services/hackbot-api/Dockerfile index 2c8001b9a5..e15d3527f8 100644 --- a/services/hackbot-api/Dockerfile +++ b/services/hackbot-api/Dockerfile @@ -4,29 +4,26 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ WORKDIR /app -# Workspace metadata first so the dep-download layer caches independently -# of source changes. +# Workspace root metadata first so the dep-download layer caches independently +# of source changes. Member pyproject.toml files aren't needed here: --frozen +# trusts uv.lock as-is, so no per-member lockfile validation is required. COPY pyproject.toml uv.lock VERSION ./ -COPY http_service/pyproject.toml ./http_service/ -COPY services/hackbot-api/pyproject.toml ./services/hackbot-api/ -COPY agents/bug-fix/pyproject.toml ./agents/bug-fix/ -COPY libs/hackbot-runtime/pyproject.toml ./libs/hackbot-runtime/ -COPY libs/agent-tools/pyproject.toml ./libs/agent-tools/ # Install external deps without building workspace members. RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev --no-install-workspace --package hackbot-api + uv sync --frozen --no-dev --no-install-workspace --package hackbot-api -# Workspace member source. -COPY services/hackbot-api ./services/hackbot-api +# Bring in the full workspace and install the package itself non-editable, so +# the build artifact lands in the venv and the final image needs no source tree. +COPY . . RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-dev --package hackbot-api + uv sync --locked --no-dev --no-editable --package hackbot-api FROM python:3.12-slim AS base -COPY --from=builder /app /app -WORKDIR /app/services/hackbot-api +COPY --from=builder /app/.venv /app/.venv +WORKDIR /app ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 diff --git a/services/hackbot-api/pyproject.toml b/services/hackbot-api/pyproject.toml index 14a8a0b5d4..69f555c7e1 100644 --- a/services/hackbot-api/pyproject.toml +++ b/services/hackbot-api/pyproject.toml @@ -20,6 +20,13 @@ dependencies = [ [project.optional-dependencies] dev = ["pytest>=8.0.0", "pytest-asyncio>=0.23.0", "httpx>=0.26.0"] +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["app"] + [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] diff --git a/uv.lock b/uv.lock index ab441b44db..98b9d7bf13 100644 --- a/uv.lock +++ b/uv.lock @@ -2134,7 +2134,7 @@ wheels = [ [[package]] name = "hackbot-agent-bug-fix" version = "0.1.0" -source = { virtual = "agents/bug-fix" } +source = { editable = "agents/bug-fix" } dependencies = [ { name = "agent-tools", extra = ["bugzilla", "firefox"] }, { name = "bugsy" }, @@ -2159,7 +2159,7 @@ requires-dist = [ [[package]] name = "hackbot-api" version = "0.1.0" -source = { virtual = "services/hackbot-api" } +source = { editable = "services/hackbot-api" } dependencies = [ { name = "alembic" }, { name = "asyncpg" }, From 61690f5e325a91559ec12d64dff0545db7691bc5 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 01:20:46 -0400 Subject: [PATCH 06/21] Drop the thin agent_runner wrapper and let agent be the single package --- agents/README.md | 16 ++++++++-------- agents/bug-fix/Dockerfile | 2 +- agents/bug-fix/agent/__main__.py | 6 ++++++ .../__main__.py => agent/hackbot.py} | 8 ++------ agents/bug-fix/agent_runner/__init__.py | 0 agents/bug-fix/pyproject.toml | 6 +----- 6 files changed, 18 insertions(+), 20 deletions(-) create mode 100644 agents/bug-fix/agent/__main__.py rename agents/bug-fix/{agent_runner/__main__.py => agent/hackbot.py} (95%) delete mode 100644 agents/bug-fix/agent_runner/__init__.py diff --git a/agents/README.md b/agents/README.md index 0b35b57558..2a757e235c 100644 --- a/agents/README.md +++ b/agents/README.md @@ -10,16 +10,16 @@ agents// pyproject.toml # package "hackbot-agent-"; deps: hackbot-runtime[claude-sdk] + agent-specific Dockerfile # multi-stage: builder / agent [/ broker] compose.yml # local run; sets static env (e.g. the broker URL) - agent_runner/ - __main__.py # AgentInputs(BaseSettings); async def main(ctx) -> AgentResult; run_async(main) - agent/ # the agent's brain: run_bug_fix()-style entrypoint + prompts/, rules/, MCP servers + agent/ # the agent: run_() logic + prompts/, rules/, MCP servers + __main__.py # entrypoint: AgentInputs(BaseSettings); async def main(ctx) -> AgentResult; run_async(main) broker/ # OPTIONAL: secret-holding MCP sidecar (e.g. holds the Bugzilla API key) run_local.py # OPTIONAL: run without Docker/broker for quick iteration ``` -`agent_runner` is the thin deployment wrapper the runtime invokes; `agent/` is -the actual logic. The runner does `from agent import run_` and passes -`ctx.actions` (the recorder) plus the validated inputs into it. +The runtime invokes the agent with `python -m agent`. `agent/__main__.py` is the +thin deployment wrapper — it validates inputs, calls the `run_()` logic in +`agent/__init__.py`, and passes `ctx.actions` (the recorder) plus the inputs into +it. ## Shared building blocks (in `hackbot-runtime`) @@ -42,9 +42,9 @@ loop — those stay explicit and in your hands. ## Adding a new agent -1. `agents//agent_runner/__main__.py` — define `AgentInputs(BaseSettings)`, +1. `agents//agent/__main__.py` — define `AgentInputs(BaseSettings)`, `async def main(ctx) -> AgentResult`, end with `raise SystemExit(run_async(main))`. -2. `agents//agent/` — your prompts/logic, exposing an async entrypoint. +2. `agents//agent/__init__.py` — your prompts/logic, exposing an async entrypoint. 3. Copy `pyproject.toml`, `Dockerfile`, `compose.yml` from `bug-fix/` and rename. 4. In `services/hackbot-api/app/schemas.py`, add a Pydantic input model. 5. In `services/hackbot-api/app/agents.py`, add one `AGENT_REGISTRY` entry diff --git a/agents/bug-fix/Dockerfile b/agents/bug-fix/Dockerfile index 73c6fdd5ce..2da64d4859 100644 --- a/agents/bug-fix/Dockerfile +++ b/agents/bug-fix/Dockerfile @@ -37,7 +37,7 @@ RUN useradd --create-home --shell /bin/bash agent \ USER agent -CMD ["python", "-m", "agent_runner"] +CMD ["python", "-m", "agent"] FROM base AS broker diff --git a/agents/bug-fix/agent/__main__.py b/agents/bug-fix/agent/__main__.py new file mode 100644 index 0000000000..c9b44b457a --- /dev/null +++ b/agents/bug-fix/agent/__main__.py @@ -0,0 +1,6 @@ +from hackbot_runtime import run_async + +from .hackbot import main + +if __name__ == "__main__": + raise SystemExit(run_async(main)) diff --git a/agents/bug-fix/agent_runner/__main__.py b/agents/bug-fix/agent/hackbot.py similarity index 95% rename from agents/bug-fix/agent_runner/__main__.py rename to agents/bug-fix/agent/hackbot.py index 8fc2655795..5a6e286f21 100644 --- a/agents/bug-fix/agent_runner/__main__.py +++ b/agents/bug-fix/agent/hackbot.py @@ -4,7 +4,7 @@ import tempfile from pathlib import Path -from hackbot_runtime import AgentResult, Context, run_async +from hackbot_runtime import AgentResult, Context from pydantic_settings import BaseSettings, SettingsConfigDict log = logging.getLogger("bug-fix-agent") @@ -75,7 +75,7 @@ def ensure_firefox_source(source_repo: Path) -> None: async def main(ctx: Context) -> AgentResult: - from agent import run_bug_fix + from . import run_bug_fix inputs = AgentInputs() ensure_firefox_source(inputs.source_repo) @@ -111,7 +111,3 @@ async def main(ctx: Context) -> AgentResult: }, exit_code=result.exit_code, ) - - -if __name__ == "__main__": - raise SystemExit(run_async(main)) diff --git a/agents/bug-fix/agent_runner/__init__.py b/agents/bug-fix/agent_runner/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/agents/bug-fix/pyproject.toml b/agents/bug-fix/pyproject.toml index 0c9af581ae..d912011b21 100644 --- a/agents/bug-fix/pyproject.toml +++ b/agents/bug-fix/pyproject.toml @@ -21,9 +21,5 @@ agent-tools = { workspace = true } requires = ["hatchling"] build-backend = "hatchling.build" -# The importable packages (`agent_runner`, `broker`) don't match the project -# name, so declare them explicitly. Without this a wheel build (e.g. -# `uv sync --no-editable`) would bundle nothing and `python -m agent_runner` -# would fail in a source-free image. [tool.hatch.build.targets.wheel] -packages = ["agent_runner", "broker"] +packages = ["agent", "broker"] From f37de1f8c9d8d7eb44c0c6b37f918e19c2bcadc1 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 01:30:52 -0400 Subject: [PATCH 07/21] Move source-checkout helper into hackbot-runtime --- agents/bug-fix/agent/hackbot.py | 60 +----------------- .../hackbot_runtime/__init__.py | 2 + .../hackbot-runtime/hackbot_runtime/source.py | 61 +++++++++++++++++++ libs/hackbot-runtime/tests/test_source.py | 47 ++++++++++++++ 4 files changed, 112 insertions(+), 58 deletions(-) create mode 100644 libs/hackbot-runtime/hackbot_runtime/source.py create mode 100644 libs/hackbot-runtime/tests/test_source.py diff --git a/agents/bug-fix/agent/hackbot.py b/agents/bug-fix/agent/hackbot.py index 5a6e286f21..60073be37e 100644 --- a/agents/bug-fix/agent/hackbot.py +++ b/agents/bug-fix/agent/hackbot.py @@ -1,14 +1,9 @@ -import logging -import subprocess -import sys import tempfile from pathlib import Path -from hackbot_runtime import AgentResult, Context +from hackbot_runtime import AgentResult, Context, ensure_source_repo from pydantic_settings import BaseSettings, SettingsConfigDict -log = logging.getLogger("bug-fix-agent") - FIREFOX_REPO_URL = "https://github.com/mozilla-firefox/firefox.git" @@ -23,62 +18,11 @@ class AgentInputs(BaseSettings): model_config = SettingsConfigDict(extra="ignore") -def ensure_firefox_source(source_repo: Path) -> None: - """Shallow-clone the Firefox source tree if it isn't already present. - - Idempotent and recovers from a partial checkout left by an earlier - failed run (e.g. clone succeeded but checkout ran out of disk). - """ - if (source_repo / ".git").exists(): - status = subprocess.run( - ["git", "-C", str(source_repo), "status", "--porcelain"], - check=True, - capture_output=True, - text=True, - ) - # A healthy fresh shallow clone has an empty status; a broken - # checkout shows thousands of missing-file "D" entries. - if status.stdout.strip(): - log.warning( - "firefox source at %s is incomplete; restoring working tree", - source_repo, - ) - subprocess.run( - ["git", "-C", str(source_repo), "restore", "--source=HEAD", ":/"], - check=True, - stdout=sys.stderr, - stderr=sys.stderr, - ) - log.info("updating firefox source at %s (shallow fetch)", source_repo) - subprocess.run( - ["git", "-C", str(source_repo), "fetch", "--depth=1", "origin", "HEAD"], - check=True, - stdout=sys.stderr, - stderr=sys.stderr, - ) - subprocess.run( - ["git", "-C", str(source_repo), "reset", "--hard", "FETCH_HEAD"], - check=True, - stdout=sys.stderr, - stderr=sys.stderr, - ) - return - source_repo.mkdir(parents=True, exist_ok=True) - log.info("cloning firefox source (shallow) to %s", source_repo) - subprocess.run( - ["git", "clone", "--depth=1", FIREFOX_REPO_URL, str(source_repo)], - check=True, - stdout=sys.stderr, - stderr=sys.stderr, - ) - log.info("firefox shallow clone complete") - - async def main(ctx: Context) -> AgentResult: from . import run_bug_fix inputs = AgentInputs() - ensure_firefox_source(inputs.source_repo) + ensure_source_repo(inputs.source_repo, FIREFOX_REPO_URL) log_path = Path(tempfile.mkdtemp(prefix="bug-fix-log-")) / "agent.log" diff --git a/libs/hackbot-runtime/hackbot_runtime/__init__.py b/libs/hackbot-runtime/hackbot_runtime/__init__.py index e54bd1ed6c..0c0bc24cdb 100644 --- a/libs/hackbot-runtime/hackbot_runtime/__init__.py +++ b/libs/hackbot-runtime/hackbot_runtime/__init__.py @@ -2,6 +2,7 @@ from hackbot_runtime.context import Context from hackbot_runtime.result import AgentResult from hackbot_runtime.runtime import run, run_async +from hackbot_runtime.source import ensure_source_repo from hackbot_runtime.uploader import SignedPolicyUploader __all__ = [ @@ -9,6 +10,7 @@ "AgentResult", "Context", "SignedPolicyUploader", + "ensure_source_repo", "run", "run_async", ] diff --git a/libs/hackbot-runtime/hackbot_runtime/source.py b/libs/hackbot-runtime/hackbot_runtime/source.py new file mode 100644 index 0000000000..67f739e0b4 --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/source.py @@ -0,0 +1,61 @@ +"""Prepare a source checkout for agents that operate on a code repository.""" + +from __future__ import annotations + +import logging +import subprocess +import sys +from pathlib import Path + +log = logging.getLogger("hackbot_runtime.source") + + +def ensure_source_repo(source_repo: Path, repo_url: str) -> None: + """Ensure a shallow checkout of ``repo_url`` exists at ``source_repo``. + + Idempotent: clones if absent, otherwise shallow-fetches and hard-resets to + the remote HEAD. Recovers from a partial checkout left by an earlier failed + run (e.g. the clone succeeded but the checkout ran out of disk). + """ + if (source_repo / ".git").exists(): + status = subprocess.run( + ["git", "-C", str(source_repo), "status", "--porcelain"], + check=True, + capture_output=True, + text=True, + ) + # A healthy fresh shallow clone has an empty status; a broken + # checkout shows thousands of missing-file "D" entries. + if status.stdout.strip(): + log.warning( + "source at %s is incomplete; restoring working tree", source_repo + ) + subprocess.run( + ["git", "-C", str(source_repo), "restore", "--source=HEAD", ":/"], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) + log.info("updating source at %s (shallow fetch)", source_repo) + subprocess.run( + ["git", "-C", str(source_repo), "fetch", "--depth=1", "origin", "HEAD"], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) + subprocess.run( + ["git", "-C", str(source_repo), "reset", "--hard", "FETCH_HEAD"], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) + return + source_repo.mkdir(parents=True, exist_ok=True) + log.info("cloning %s (shallow) to %s", repo_url, source_repo) + subprocess.run( + ["git", "clone", "--depth=1", repo_url, str(source_repo)], + check=True, + stdout=sys.stderr, + stderr=sys.stderr, + ) + log.info("shallow clone complete") diff --git a/libs/hackbot-runtime/tests/test_source.py b/libs/hackbot-runtime/tests/test_source.py new file mode 100644 index 0000000000..4bc83d30e3 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_source.py @@ -0,0 +1,47 @@ +"""Tests for ensure_source_repo (shallow git checkout helper).""" + +import subprocess +from pathlib import Path + +from hackbot_runtime import ensure_source_repo + + +def _make_remote(path: Path) -> None: + subprocess.run(["git", "init", "-q", str(path)], check=True) + (path / "README.md").write_text("hello") + subprocess.run(["git", "-C", str(path), "add", "."], check=True) + subprocess.run( + [ + "git", + "-C", + str(path), + "-c", + "user.email=t@example.com", + "-c", + "user.name=test", + "commit", + "-q", + "-m", + "init", + ], + check=True, + ) + + +def test_clones_when_absent(tmp_path): + remote = tmp_path / "remote" + _make_remote(remote) + dest = tmp_path / "dest" + ensure_source_repo(dest, f"file://{remote}") + assert (dest / ".git").is_dir() + assert (dest / "README.md").read_text() == "hello" + + +def test_idempotent_update_when_present(tmp_path): + remote = tmp_path / "remote" + _make_remote(remote) + dest = tmp_path / "dest" + ensure_source_repo(dest, f"file://{remote}") + # Second call takes the fetch + hard-reset branch and must still succeed. + ensure_source_repo(dest, f"file://{remote}") + assert (dest / "README.md").read_text() == "hello" From c676bba3d3f4b41fc754d09dc921b4850d181496 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 13:50:59 -0400 Subject: [PATCH 08/21] Bind-mount the workspace instead of COPY . . in uv builder images --- agents/bug-fix/Dockerfile | 19 ++++++++----------- services/hackbot-api/Dockerfile | 19 ++++++++----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/agents/bug-fix/Dockerfile b/agents/bug-fix/Dockerfile index 2da64d4859..d38187ecf1 100644 --- a/agents/bug-fix/Dockerfile +++ b/agents/bug-fix/Dockerfile @@ -2,32 +2,29 @@ FROM python:3.12 AS builder COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -WORKDIR /app +ENV UV_PROJECT_ENVIRONMENT=/opt/venv -# Workspace root metadata first so the dep-download layer caches independently -# of source changes. Member pyproject.toml files aren't needed here: --frozen -# trusts uv.lock as-is, so no per-member lockfile validation is required. -COPY pyproject.toml uv.lock VERSION ./ +WORKDIR /app # Install external deps without building workspace members. RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ + --mount=type=bind,source=uv.lock,target=uv.lock \ + --mount=type=bind,source=VERSION,target=VERSION \ uv sync --frozen --no-dev --no-install-workspace --package hackbot-agent-bug-fix -# Bring in the full workspace and install the package itself non-editable, so -# the build artifact lands in the venv and the final image needs no source tree. -COPY . . - RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,target=/app,rw \ uv sync --locked --no-dev --no-editable --package hackbot-agent-bug-fix FROM python:3.12 AS base -COPY --from=builder /app/.venv /app/.venv +COPY --from=builder /opt/venv /opt/venv WORKDIR /app ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 -ENV PATH="/app/.venv/bin:$PATH" +ENV PATH="/opt/venv/bin:$PATH" FROM base AS agent diff --git a/services/hackbot-api/Dockerfile b/services/hackbot-api/Dockerfile index e15d3527f8..4108d20f79 100644 --- a/services/hackbot-api/Dockerfile +++ b/services/hackbot-api/Dockerfile @@ -2,33 +2,30 @@ FROM python:3.12-slim AS builder COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -WORKDIR /app +ENV UV_PROJECT_ENVIRONMENT=/opt/venv -# Workspace root metadata first so the dep-download layer caches independently -# of source changes. Member pyproject.toml files aren't needed here: --frozen -# trusts uv.lock as-is, so no per-member lockfile validation is required. -COPY pyproject.toml uv.lock VERSION ./ +WORKDIR /app # Install external deps without building workspace members. RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ + --mount=type=bind,source=uv.lock,target=uv.lock \ + --mount=type=bind,source=VERSION,target=VERSION \ uv sync --frozen --no-dev --no-install-workspace --package hackbot-api -# Bring in the full workspace and install the package itself non-editable, so -# the build artifact lands in the venv and the final image needs no source tree. -COPY . . - RUN --mount=type=cache,target=/root/.cache/uv \ + --mount=type=bind,target=/app,rw \ uv sync --locked --no-dev --no-editable --package hackbot-api FROM python:3.12-slim AS base -COPY --from=builder /app/.venv /app/.venv +COPY --from=builder /opt/venv /opt/venv WORKDIR /app ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 ENV PORT=8080 -ENV PATH="/app/.venv/bin:$PATH" +ENV PATH="/opt/venv/bin:$PATH" RUN useradd --create-home --shell /bin/bash app USER app From b4d9ab7277298c7c883e32a40f65bbbf0d38222c Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 18:35:59 -0400 Subject: [PATCH 09/21] Move bug-fix agent under the hackbot_agents PEP 420 namespace The agent installed generic top-level packages (agent, broker), which would collide when multiple agents are installed together (e.g. uv sync --all-packages in the dev workspace). Ship each agent under a shared PEP 420 namespace instead: bug-fix becomes hackbot_agents.bug_fix (broker -> hackbot_agents.bug_fix.broker). hackbot_agents/ has no __init__.py, so future agents merge into the namespace rather than clobbering. Entrypoints, packages, run_local, and the README are updated; internal imports are relative so they're unchanged. --- agents/README.md | 49 ++++++++++++------- agents/bug-fix/Dockerfile | 4 +- .../bug_fix}/__init__.py | 0 .../bug_fix}/__main__.py | 0 .../bug_fix}/broker/__init__.py | 0 .../bug_fix}/broker/__main__.py | 0 .../bug_fix}/config.py | 0 .../bug_fix}/hackbot.py | 0 .../bug_fix}/prompts/system.md | 0 .../bug_fix}/rules/README.md | 0 .../bug_fix}/rules/unsupported-config.md | 0 agents/bug-fix/pyproject.toml | 2 +- agents/bug-fix/run_local.py | 4 +- 13 files changed, 37 insertions(+), 22 deletions(-) rename agents/bug-fix/{agent => hackbot_agents/bug_fix}/__init__.py (100%) rename agents/bug-fix/{agent => hackbot_agents/bug_fix}/__main__.py (100%) rename agents/bug-fix/{ => hackbot_agents/bug_fix}/broker/__init__.py (100%) rename agents/bug-fix/{ => hackbot_agents/bug_fix}/broker/__main__.py (100%) rename agents/bug-fix/{agent => hackbot_agents/bug_fix}/config.py (100%) rename agents/bug-fix/{agent => hackbot_agents/bug_fix}/hackbot.py (100%) rename agents/bug-fix/{agent => hackbot_agents/bug_fix}/prompts/system.md (100%) rename agents/bug-fix/{agent => hackbot_agents/bug_fix}/rules/README.md (100%) rename agents/bug-fix/{agent => hackbot_agents/bug_fix}/rules/unsupported-config.md (100%) diff --git a/agents/README.md b/agents/README.md index 2a757e235c..a9056db527 100644 --- a/agents/README.md +++ b/agents/README.md @@ -7,45 +7,60 @@ entrypoint, and deployment live together. `bug-fix/` is the reference example. ``` agents// - pyproject.toml # package "hackbot-agent-"; deps: hackbot-runtime[claude-sdk] + agent-specific + pyproject.toml # dist "hackbot-agent-"; packages = ["hackbot_agents"]; deps: hackbot-runtime[claude-sdk] + agent-specific Dockerfile # multi-stage: builder / agent [/ broker] compose.yml # local run; sets static env (e.g. the broker URL) - agent/ # the agent: run_() logic + prompts/, rules/, MCP servers - __main__.py # entrypoint: AgentInputs(BaseSettings); async def main(ctx) -> AgentResult; run_async(main) - broker/ # OPTIONAL: secret-holding MCP sidecar (e.g. holds the Bugzilla API key) + hackbot_agents/ # shared PEP 420 namespace — NO __init__.py here + / # the agent package (e.g. bug_fix) + __init__.py # run_() logic + helpers + __main__.py # entrypoint: AgentInputs(BaseSettings); async def main(ctx) -> AgentResult; run_async(main) + prompts/ rules/ # assets read via Path(__file__).parent + broker/ # OPTIONAL: secret-holding MCP sidecar (python -m hackbot_agents..broker) run_local.py # OPTIONAL: run without Docker/broker for quick iteration ``` -The runtime invokes the agent with `python -m agent`. `agent/__main__.py` is the -thin deployment wrapper — it validates inputs, calls the `run_()` logic in -`agent/__init__.py`, and passes `ctx.actions` (the recorder) plus the inputs into -it. +Every agent ships its package under the shared **`hackbot_agents` PEP 420 namespace** +(`hackbot_agents.`), so multiple agents installed into one environment never +collide. **Never add `hackbot_agents/__init__.py`** — the missing namespace-level +`__init__.py` is what lets the agent distributions merge instead of clobbering each other. + +The runtime invokes the agent with `python -m hackbot_agents.`. +`hackbot_agents//__main__.py` is the thin deployment wrapper — it validates inputs, +calls the `run_()` logic in `__init__.py`, and passes `ctx.actions` (the recorder) +plus the inputs into it. ## Shared building blocks (in `hackbot-runtime`) Don't re-implement these — import them: - `from hackbot_runtime import Context, AgentResult, run_async` — the entrypoint contract. +- `from hackbot_runtime import ensure_source_repo` — shallow-clone/refresh a source repo. - `from hackbot_runtime.claude import Reporter` — renders streamed claude-agent-sdk messages to stdout/log. Call `reporter.header("...")` per work item, `reporter.message(msg)` per message. - `from hackbot_runtime.actions.claude_sdk import actions_server_for` — returns `(recorder, mcp_server)`; write actions land in `summary.json` instead of mutating anything. -Reusable MCP **tool servers** live in the separate `agent-tools` package, each -behind its own optional extra (`agent-tools[bugzilla]`, `agent-tools[firefox]`): +Reusable MCP **tool servers** live in the separate `agent-tools` package, each behind its +own optional extra (`agent-tools[bugzilla]`, `agent-tools[firefox]`). Import the domain +module and build the server via the adapter: -- `from agent_tools.bugzilla import BugzillaContext, build_server` — read-only Bugzilla MCP. -- `from agent_tools.firefox import FirefoxContext, build_server` — Firefox build/test MCP. +```python +from agent_tools import bugzilla +from agent_tools.claude_sdk import build_sdk_server +server = build_sdk_server("bugzilla", BugzillaContext(client=...), bugzilla.TOOLS) +``` -You still assemble your own `ClaudeAgentOptions` and drive the `ClaudeSDKClient` -loop — those stay explicit and in your hands. +You still assemble your own `ClaudeAgentOptions` and drive the `ClaudeSDKClient` loop — +those stay explicit and in your hands. ## Adding a new agent -1. `agents//agent/__main__.py` — define `AgentInputs(BaseSettings)`, +1. `agents//hackbot_agents//__main__.py` — define `AgentInputs(BaseSettings)`, `async def main(ctx) -> AgentResult`, end with `raise SystemExit(run_async(main))`. -2. `agents//agent/__init__.py` — your prompts/logic, exposing an async entrypoint. -3. Copy `pyproject.toml`, `Dockerfile`, `compose.yml` from `bug-fix/` and rename. +2. `agents//hackbot_agents//__init__.py` — your prompts/logic, exposing an + async entrypoint. Do **not** create `agents//hackbot_agents/__init__.py`. +3. Copy `pyproject.toml`, `Dockerfile`, `compose.yml` from `bug-fix/` and rename (the + Dockerfile CMDs become `python -m hackbot_agents.` / `… .broker`). 4. In `services/hackbot-api/app/schemas.py`, add a Pydantic input model. 5. In `services/hackbot-api/app/agents.py`, add one `AGENT_REGISTRY` entry (`name` + `description` + `job_name` + `input_schema`). **No `build_env`** — diff --git a/agents/bug-fix/Dockerfile b/agents/bug-fix/Dockerfile index d38187ecf1..10d49d378b 100644 --- a/agents/bug-fix/Dockerfile +++ b/agents/bug-fix/Dockerfile @@ -34,7 +34,7 @@ RUN useradd --create-home --shell /bin/bash agent \ USER agent -CMD ["python", "-m", "agent"] +CMD ["python", "-m", "hackbot_agents.bug_fix"] FROM base AS broker @@ -44,4 +44,4 @@ USER broker EXPOSE 8765 -CMD ["python", "-m", "broker"] +CMD ["python", "-m", "hackbot_agents.bug_fix.broker"] diff --git a/agents/bug-fix/agent/__init__.py b/agents/bug-fix/hackbot_agents/bug_fix/__init__.py similarity index 100% rename from agents/bug-fix/agent/__init__.py rename to agents/bug-fix/hackbot_agents/bug_fix/__init__.py diff --git a/agents/bug-fix/agent/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py similarity index 100% rename from agents/bug-fix/agent/__main__.py rename to agents/bug-fix/hackbot_agents/bug_fix/__main__.py diff --git a/agents/bug-fix/broker/__init__.py b/agents/bug-fix/hackbot_agents/bug_fix/broker/__init__.py similarity index 100% rename from agents/bug-fix/broker/__init__.py rename to agents/bug-fix/hackbot_agents/bug_fix/broker/__init__.py diff --git a/agents/bug-fix/broker/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/broker/__main__.py similarity index 100% rename from agents/bug-fix/broker/__main__.py rename to agents/bug-fix/hackbot_agents/bug_fix/broker/__main__.py diff --git a/agents/bug-fix/agent/config.py b/agents/bug-fix/hackbot_agents/bug_fix/config.py similarity index 100% rename from agents/bug-fix/agent/config.py rename to agents/bug-fix/hackbot_agents/bug_fix/config.py diff --git a/agents/bug-fix/agent/hackbot.py b/agents/bug-fix/hackbot_agents/bug_fix/hackbot.py similarity index 100% rename from agents/bug-fix/agent/hackbot.py rename to agents/bug-fix/hackbot_agents/bug_fix/hackbot.py diff --git a/agents/bug-fix/agent/prompts/system.md b/agents/bug-fix/hackbot_agents/bug_fix/prompts/system.md similarity index 100% rename from agents/bug-fix/agent/prompts/system.md rename to agents/bug-fix/hackbot_agents/bug_fix/prompts/system.md diff --git a/agents/bug-fix/agent/rules/README.md b/agents/bug-fix/hackbot_agents/bug_fix/rules/README.md similarity index 100% rename from agents/bug-fix/agent/rules/README.md rename to agents/bug-fix/hackbot_agents/bug_fix/rules/README.md diff --git a/agents/bug-fix/agent/rules/unsupported-config.md b/agents/bug-fix/hackbot_agents/bug_fix/rules/unsupported-config.md similarity index 100% rename from agents/bug-fix/agent/rules/unsupported-config.md rename to agents/bug-fix/hackbot_agents/bug_fix/rules/unsupported-config.md diff --git a/agents/bug-fix/pyproject.toml b/agents/bug-fix/pyproject.toml index d912011b21..492df37f36 100644 --- a/agents/bug-fix/pyproject.toml +++ b/agents/bug-fix/pyproject.toml @@ -22,4 +22,4 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["agent", "broker"] +packages = ["hackbot_agents"] diff --git a/agents/bug-fix/run_local.py b/agents/bug-fix/run_local.py index 5627b88aaf..7560239146 100644 --- a/agents/bug-fix/run_local.py +++ b/agents/bug-fix/run_local.py @@ -13,13 +13,13 @@ import bugsy from pydantic_settings import BaseSettings, SettingsConfigDict -# Make the co-located `agent` package importable regardless of cwd. +# Make the co-located `hackbot_agents` namespace importable regardless of cwd. sys.path.insert(0, str(Path(__file__).resolve().parent)) -from agent import run_bug_fix # noqa: E402 from agent_tools import bugzilla # noqa: E402 from agent_tools.bugzilla import BugzillaContext # noqa: E402 from agent_tools.claude_sdk import build_sdk_server # noqa: E402 +from hackbot_agents.bug_fix import run_bug_fix # noqa: E402 class Settings(BaseSettings): From 36f5f1c46fa4b2f61882e25b6ef3d6a71161539c Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 19:23:20 -0400 Subject: [PATCH 10/21] Remove stale git lock files before updating the source checkout A run killed mid-fetch (e.g. container stop) leaves shallow.lock or index.lock behind, causing the next run to fail with 'Another git process seems to be running'. Since ensure_source_repo is the sole, sequential git driver, any lock present at startup is stale and safe to remove. --- libs/hackbot-runtime/hackbot_runtime/source.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libs/hackbot-runtime/hackbot_runtime/source.py b/libs/hackbot-runtime/hackbot_runtime/source.py index 67f739e0b4..04352876eb 100644 --- a/libs/hackbot-runtime/hackbot_runtime/source.py +++ b/libs/hackbot-runtime/hackbot_runtime/source.py @@ -17,7 +17,16 @@ def ensure_source_repo(source_repo: Path, repo_url: str) -> None: the remote HEAD. Recovers from a partial checkout left by an earlier failed run (e.g. the clone succeeded but the checkout ran out of disk). """ - if (source_repo / ".git").exists(): + git_dir = source_repo / ".git" + if git_dir.exists(): + # An earlier run killed mid-fetch (e.g. the container was stopped) + # leaves stale lock files behind. Since each run drives git + # sequentially, any lock present at startup is stale and safe to + # remove. + for lock in (git_dir / "shallow.lock", git_dir / "index.lock"): + if lock.exists(): + log.warning("removing stale git lock %s", lock) + lock.unlink() status = subprocess.run( ["git", "-C", str(source_repo), "status", "--porcelain"], check=True, From adfd22185a34c371fe12386ffcd64fc3ca726a93 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 20:01:17 -0400 Subject: [PATCH 11/21] Convert the bug-fix broker from a package to a single module broker/{__init__,__main__}.py -> broker.py. The empty __init__.py added nothing, and python -m hackbot_agents.bug_fix.broker resolves to the module just as it did to the package's __main__, so the Dockerfile CMD is unchanged. --- .../hackbot_agents/bug_fix/{broker/__main__.py => broker.py} | 0 agents/bug-fix/hackbot_agents/bug_fix/broker/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename agents/bug-fix/hackbot_agents/bug_fix/{broker/__main__.py => broker.py} (100%) delete mode 100644 agents/bug-fix/hackbot_agents/bug_fix/broker/__init__.py diff --git a/agents/bug-fix/hackbot_agents/bug_fix/broker/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/broker.py similarity index 100% rename from agents/bug-fix/hackbot_agents/bug_fix/broker/__main__.py rename to agents/bug-fix/hackbot_agents/bug_fix/broker.py diff --git a/agents/bug-fix/hackbot_agents/bug_fix/broker/__init__.py b/agents/bug-fix/hackbot_agents/bug_fix/broker/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 From 4409ab51bb7f97107c042259bd3230b36a20e246 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 20:25:33 -0400 Subject: [PATCH 12/21] Declare hatchling build backend for agent-tools and hackbot-runtime Without a [build-system], uv installs these editable via setuptools' default import-hook editable mode, which writes a .pth that runs a custom MetaPathFinder. Editors' static analysis (Pylance) won't execute .pth code, so it can't locate the packages and flags every import as unresolved. Matching the rest of the workspace (hatchling) makes the editable install path-based, which resolves natively in any editor. --- libs/agent-tools/pyproject.toml | 7 +++++++ libs/hackbot-runtime/pyproject.toml | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/libs/agent-tools/pyproject.toml b/libs/agent-tools/pyproject.toml index b485e29abd..307e733045 100644 --- a/libs/agent-tools/pyproject.toml +++ b/libs/agent-tools/pyproject.toml @@ -12,6 +12,13 @@ bugzilla = ["bugsy"] firefox = ["grizzly-framework", "prefpicker"] claude-sdk = ["claude-agent-sdk>=0.1.30"] +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["agent_tools"] + [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] diff --git a/libs/hackbot-runtime/pyproject.toml b/libs/hackbot-runtime/pyproject.toml index e34f2ca1df..00a4bee4a1 100644 --- a/libs/hackbot-runtime/pyproject.toml +++ b/libs/hackbot-runtime/pyproject.toml @@ -15,6 +15,13 @@ claude-sdk = ["claude-agent-sdk>=0.1.30", "agent-tools[claude-sdk]"] [tool.uv.sources] agent-tools = { workspace = true } +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["hackbot_runtime"] + [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] From 2816854be08fcd9ad4f59ab571db86eafe92fb8e Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 20:28:28 -0400 Subject: [PATCH 13/21] Add actions_to_tool_names and use in config --- .../bug-fix/hackbot_agents/bug_fix/config.py | 27 ++++++++----------- .../hackbot_runtime/actions/claude_sdk.py | 12 ++++++++- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/config.py b/agents/bug-fix/hackbot_agents/bug_fix/config.py index 948b0001de..ee5f877ac9 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/config.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/config.py @@ -1,4 +1,4 @@ -from agent_tools.registry import ACTIONS_SERVER_NAME, tool_name_for +from hackbot_runtime.actions.claude_sdk import actions_to_tool_names # Tools that can modify the source repo — blocked under dry-run. SOURCE_WRITE_TOOLS = {"Write", "Edit", "MultiEdit", "NotebookEdit"} @@ -11,21 +11,16 @@ "mcp__bugzilla__get_bug_attachments", "mcp__bugzilla__download_attachment", ] -# Recording action types this agent enables. Served by the in-process -# `actions` MCP server (hackbot_runtime.actions.claude_sdk). Tool calls land -# in summary.json's `actions` array instead of mutating any external system. -# New domains (phabricator, treeherder, ...) just append to this list. -ENABLED_ACTION_TYPES = [ - "bugzilla.update_bug", - "bugzilla.add_comment", - "bugzilla.add_attachment", - "bugzilla.create_bug", -] -# claude-agent-sdk tool identifiers derived from the above, using the shared -# server name and tool-name helper so they stay in sync with the adapter. -ENABLED_ACTION_TOOLS = [ - f"mcp__{ACTIONS_SERVER_NAME}__{tool_name_for(t)}" for t in ENABLED_ACTION_TYPES -] + + +ENABLED_ACTION_TOOLS = actions_to_tool_names( + [ + "bugzilla.update_bug", + "bugzilla.add_comment", + "bugzilla.add_attachment", + "bugzilla.create_bug", + ] +) # Firefox build/test tools. FIREFOX_TOOLS = [ diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py index 63c3f5bd0c..8fc630f396 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py @@ -10,7 +10,7 @@ from pathlib import Path from agent_tools.claude_sdk import build_sdk_server -from agent_tools.registry import ACTIONS_SERVER_NAME +from agent_tools.registry import ACTIONS_SERVER_NAME, tool_name_for from hackbot_runtime.actions import bugzilla as _bugzilla from hackbot_runtime.actions.recorder import ActionsRecorder @@ -38,3 +38,13 @@ def actions_server_for( return recorder, build_sdk_server( ACTIONS_SERVER_NAME, recorder, tools, prefix_namespace=True ) + + +def actions_to_tool_names(types: list[str]) -> list[str]: + """claude-agent-sdk tool ids for the given action types. + + e.g. ``"bugzilla.update_bug"`` -> ``"mcp__actions__bugzilla_update_bug"``. + Kept beside ``actions_server_for`` so the ids stay in sync with the server it + builds (same server name + tool-name mapping). + """ + return [f"mcp__{ACTIONS_SERVER_NAME}__{tool_name_for(t)}" for t in types] From bc982780aa374ca3115a6c473b16eb6a2f5ce602 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 20:41:48 -0400 Subject: [PATCH 14/21] Move ACTIONS_SERVER_NAME into hackbot-runtime --- agents/bug-fix/hackbot_agents/bug_fix/__init__.py | 2 +- libs/agent-tools/agent_tools/registry.py | 2 -- libs/hackbot-runtime/hackbot_runtime/actions/__init__.py | 4 +++- libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py | 3 ++- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/__init__.py b/agents/bug-fix/hackbot_agents/bug_fix/__init__.py index 100a44a7fc..1f948b6e28 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/__init__.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/__init__.py @@ -14,7 +14,6 @@ from agent_tools import firefox from agent_tools.claude_sdk import build_sdk_server -from agent_tools.registry import ACTIONS_SERVER_NAME from claude_agent_sdk import ( AgentDefinition, ClaudeAgentOptions, @@ -23,6 +22,7 @@ ResultMessage, ) from hackbot_runtime import ActionsRecorder +from hackbot_runtime.actions import ACTIONS_SERVER_NAME from hackbot_runtime.actions.claude_sdk import actions_server_for from hackbot_runtime.claude import Reporter diff --git a/libs/agent-tools/agent_tools/registry.py b/libs/agent-tools/agent_tools/registry.py index ce6bc3010c..2f449b7a61 100644 --- a/libs/agent-tools/agent_tools/registry.py +++ b/libs/agent-tools/agent_tools/registry.py @@ -18,8 +18,6 @@ from pydantic import create_model -ACTIONS_SERVER_NAME = "actions" - def tool_name_for(dotted: str) -> str: """Map a dotted tool id to its MCP tool name: ``bugzilla.update_bug`` -> ``bugzilla_update_bug``.""" diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py b/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py index 4356804aac..829cdebeb6 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/__init__.py @@ -10,4 +10,6 @@ from hackbot_runtime.actions import bugzilla from hackbot_runtime.actions.recorder import ActionsRecorder -__all__ = ["ActionsRecorder", "bugzilla"] +ACTIONS_SERVER_NAME = "actions" + +__all__ = ["ACTIONS_SERVER_NAME", "ActionsRecorder", "bugzilla"] diff --git a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py index 8fc630f396..a6cafc94d6 100644 --- a/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py +++ b/libs/hackbot-runtime/hackbot_runtime/actions/claude_sdk.py @@ -10,8 +10,9 @@ from pathlib import Path from agent_tools.claude_sdk import build_sdk_server -from agent_tools.registry import ACTIONS_SERVER_NAME, tool_name_for +from agent_tools.registry import tool_name_for +from hackbot_runtime.actions import ACTIONS_SERVER_NAME from hackbot_runtime.actions import bugzilla as _bugzilla from hackbot_runtime.actions.recorder import ActionsRecorder From d86661e8e767f54b9d54052122a6df8cff7945a5 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Wed, 10 Jun 2026 22:42:36 -0400 Subject: [PATCH 15/21] Improve hackbot-runtime --- agents/README.md | 76 +++++-- agents/bug-fix/Dockerfile | 4 + agents/bug-fix/hackbot.toml | 7 + .../hackbot_agents/bug_fix/__init__.py | 198 ----------------- .../hackbot_agents/bug_fix/__main__.py | 58 ++++- .../bug-fix/hackbot_agents/bug_fix/agent.py | 201 ++++++++++++++++++ .../bug-fix/hackbot_agents/bug_fix/hackbot.py | 57 ----- agents/bug-fix/run_local.py | 4 +- .../agent_tools/firefox/__init__.py | 12 +- .../hackbot_runtime/__init__.py | 10 +- .../hackbot-runtime/hackbot_runtime/config.py | 59 +++++ .../hackbot_runtime/context.py | 103 ++++++++- .../hackbot-runtime/hackbot_runtime/errors.py | 7 + .../hackbot_runtime/providers.py | 48 +++++ .../hackbot-runtime/hackbot_runtime/result.py | 18 -- .../hackbot_runtime/runtime.py | 152 +++++++++---- .../hackbot_runtime/uploader.py | 2 +- libs/hackbot-runtime/tests/test_config.py | 60 ++++++ libs/hackbot-runtime/tests/test_context.py | 83 ++++++++ libs/hackbot-runtime/tests/test_providers.py | 26 +++ libs/hackbot-runtime/tests/test_runtime.py | 115 +++++++++- 21 files changed, 934 insertions(+), 366 deletions(-) create mode 100644 agents/bug-fix/hackbot.toml create mode 100644 agents/bug-fix/hackbot_agents/bug_fix/agent.py delete mode 100644 agents/bug-fix/hackbot_agents/bug_fix/hackbot.py create mode 100644 libs/hackbot-runtime/hackbot_runtime/config.py create mode 100644 libs/hackbot-runtime/hackbot_runtime/errors.py create mode 100644 libs/hackbot-runtime/hackbot_runtime/providers.py delete mode 100644 libs/hackbot-runtime/hackbot_runtime/result.py create mode 100644 libs/hackbot-runtime/tests/test_config.py create mode 100644 libs/hackbot-runtime/tests/test_context.py create mode 100644 libs/hackbot-runtime/tests/test_providers.py diff --git a/agents/README.md b/agents/README.md index a9056db527..e483f32148 100644 --- a/agents/README.md +++ b/agents/README.md @@ -8,33 +8,71 @@ entrypoint, and deployment live together. `bug-fix/` is the reference example. ``` agents// pyproject.toml # dist "hackbot-agent-"; packages = ["hackbot_agents"]; deps: hackbot-runtime[claude-sdk] + agent-specific + hackbot.toml # declares platform capabilities: [source], [firefox] Dockerfile # multi-stage: builder / agent [/ broker] compose.yml # local run; sets static env (e.g. the broker URL) hackbot_agents/ # shared PEP 420 namespace — NO __init__.py here / # the agent package (e.g. bug_fix) - __init__.py # run_() logic + helpers - __main__.py # entrypoint: AgentInputs(BaseSettings); async def main(ctx) -> AgentResult; run_async(main) + __init__.py # empty package marker + agent.py # run_() logic + helpers (the reusable agent library) + __main__.py # entrypoint: AgentInputs(BaseSettings) + async def main(ctx) -> dict + run_async(main) prompts/ rules/ # assets read via Path(__file__).parent broker/ # OPTIONAL: secret-holding MCP sidecar (python -m hackbot_agents..broker) run_local.py # OPTIONAL: run without Docker/broker for quick iteration ``` +## `hackbot.toml` — what the platform provides + +Declare the capabilities your agent needs in a `hackbot.toml` at the agent root +(alongside `pyproject.toml` / `Dockerfile`); the runtime prepares them and hands +you a single `HackbotContext`. Every table is optional — omit `[source]` if you +don't operate on a repo, omit `[firefox]` if you don't need a Firefox build. + +```toml +[source] # the runtime clones/refreshes this for you +repo_url = "https://github.com/mozilla-firefox/firefox.git" +checkout_path = "/workspace/firefox" # default; env SOURCE_REPO overrides + +[firefox] # Firefox build paths, derived from the checkout +enabled = true +objdir = "objdir-ff-asan" +``` + +Agent identity (name/description) stays in `pyproject.toml`; model defaults and +tool allowlists stay in code; secrets and per-run inputs stay in the +environment. The toml holds only platform-capability declarations. + Every agent ships its package under the shared **`hackbot_agents` PEP 420 namespace** (`hackbot_agents.`), so multiple agents installed into one environment never collide. **Never add `hackbot_agents/__init__.py`** — the missing namespace-level `__init__.py` is what lets the agent distributions merge instead of clobbering each other. -The runtime invokes the agent with `python -m hackbot_agents.`. -`hackbot_agents//__main__.py` is the thin deployment wrapper — it validates inputs, -calls the `run_()` logic in `__init__.py`, and passes `ctx.actions` (the recorder) -plus the inputs into it. +The runtime invokes the agent with `python -m hackbot_agents.`, running +`hackbot_agents//__main__.py`. That module is the thin deployment wrapper: +it defines `AgentInputs(BaseSettings)`, an `async def main(ctx)`, and calls +`run_async(main)`. `run_async` auto-discovers `hackbot.toml` (cwd first — the +Dockerfile copies it into `/app` — then walks up from the entry module to the +agent root in an editable checkout) and exits the process with the run's status. +`main` validates inputs and calls the `run_()` logic in `agent.py`, +reading everything the platform provides off `ctx` (`ctx.source_repo`, +`ctx.firefox`, `ctx.anthropic`, `ctx.actions`, `ctx.publish_file`). ## Shared building blocks (in `hackbot-runtime`) Don't re-implement these — import them: -- `from hackbot_runtime import Context, AgentResult, run_async` — the entrypoint contract. -- `from hackbot_runtime import ensure_source_repo` — shallow-clone/refresh a source repo. +- `from hackbot_runtime import HackbotContext, AgentError, run_async` — the entrypoint + contract. `main(ctx)` **returns a findings dict** (or `None`) on success, and **raises** + to fail — `AgentError("…")` for an expected failure, any exception for a crash. The + runtime turns that into `summary.json` (`status`/`error`/`findings`) and the process + exit code; `run_async(main)` exits the process itself, so the entrypoint is just that + one call. `HackbotContext` is the one object `main()` receives; it answers for the + platform: `ctx.source_repo` (prepared from `[source]` on first access), `ctx.firefox` + (a `FirefoxContext` from `[firefox]`), `ctx.anthropic.api_key` (validated), plus the + results/artifacts/actions plumbing (`ctx.actions`, `ctx.publish_file`, + `ctx.publish_json`). +- `from hackbot_runtime import ensure_source_repo` — the lower-level shallow-clone/refresh + primitive (you normally don't call this directly; `ctx.source_repo` does it for you). - `from hackbot_runtime.claude import Reporter` — renders streamed claude-agent-sdk messages to stdout/log. Call `reporter.header("...")` per work item, `reporter.message(msg)` per message. - `from hackbot_runtime.actions.claude_sdk import actions_server_for` — returns @@ -55,14 +93,20 @@ those stay explicit and in your hands. ## Adding a new agent -1. `agents//hackbot_agents//__main__.py` — define `AgentInputs(BaseSettings)`, - `async def main(ctx) -> AgentResult`, end with `raise SystemExit(run_async(main))`. -2. `agents//hackbot_agents//__init__.py` — your prompts/logic, exposing an - async entrypoint. Do **not** create `agents//hackbot_agents/__init__.py`. -3. Copy `pyproject.toml`, `Dockerfile`, `compose.yml` from `bug-fix/` and rename (the - Dockerfile CMDs become `python -m hackbot_agents.` / `… .broker`). -4. In `services/hackbot-api/app/schemas.py`, add a Pydantic input model. -5. In `services/hackbot-api/app/agents.py`, add one `AGENT_REGISTRY` entry +1. `agents//hackbot.toml` — declare `[source]`/`[firefox]` if you need them + (omit either otherwise). +2. `agents//hackbot_agents//__main__.py` — define `AgentInputs(BaseSettings)` + (domain inputs only), an `async def main(ctx: HackbotContext) -> dict` that returns + findings on success and raises `AgentError` to fail, and end with `run_async(main)` (it + discovers `hackbot.toml` — cwd, then up to the agent root — and exits the process itself). +3. `agents//hackbot_agents//agent.py` — your prompts/logic, exposing the + `run_()` entrypoint `main` calls (leave `/__init__.py` empty). Do **not** + create `agents//hackbot_agents/__init__.py`. +4. Copy `pyproject.toml`, `Dockerfile`, `compose.yml` from `bug-fix/` and rename (the + Dockerfile CMDs become `python -m hackbot_agents.` / `… .broker`, and it copy + `agents//hackbot.toml` into `/app`). +5. In `services/hackbot-api/app/schemas.py`, add a Pydantic input model. +6. In `services/hackbot-api/app/agents.py`, add one `AGENT_REGISTRY` entry (`name` + `description` + `job_name` + `input_schema`). **No `build_env`** — env vars are derived from the schema by `model_to_env` (field `bug_id` → `BUG_ID`). Put deploy-time constants (broker URLs, etc.) in the Job's static env config, not the schema. diff --git a/agents/bug-fix/Dockerfile b/agents/bug-fix/Dockerfile index 10d49d378b..15264a7153 100644 --- a/agents/bug-fix/Dockerfile +++ b/agents/bug-fix/Dockerfile @@ -28,6 +28,10 @@ ENV PATH="/opt/venv/bin:$PATH" FROM base AS agent +# hackbot.toml lives at the agent root (not inside the package), so copy it into +# the working dir; the runtime discovers it there (cwd) at startup. +COPY agents/bug-fix/hackbot.toml /app/hackbot.toml + RUN useradd --create-home --shell /bin/bash agent \ && mkdir -p /workspace \ && chown agent:agent /workspace diff --git a/agents/bug-fix/hackbot.toml b/agents/bug-fix/hackbot.toml new file mode 100644 index 0000000000..227e28b529 --- /dev/null +++ b/agents/bug-fix/hackbot.toml @@ -0,0 +1,7 @@ +[source] +repo_url = "https://github.com/mozilla-firefox/firefox.git" +checkout_path = "/workspace/firefox" + +[firefox] +enabled = true +objdir = "objdir-ff-asan" diff --git a/agents/bug-fix/hackbot_agents/bug_fix/__init__.py b/agents/bug-fix/hackbot_agents/bug_fix/__init__.py index 1f948b6e28..e69de29bb2 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/__init__.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/__init__.py @@ -1,198 +0,0 @@ -"""Bug fix triage tool -- a Bugzilla triage agent. - -Orchestrates a Claude agent that triages bugs according to rulesets -in the rules/ directory. The agent has access to a source repository -and reaches Bugzilla via an out-of-process MCP broker (HTTP transport) -that holds the Bugzilla token — the agent process itself never sees it. -""" - -from __future__ import annotations - -import sys -from dataclasses import dataclass -from pathlib import Path - -from agent_tools import firefox -from agent_tools.claude_sdk import build_sdk_server -from claude_agent_sdk import ( - AgentDefinition, - ClaudeAgentOptions, - ClaudeSDKClient, - McpServerConfig, - ResultMessage, -) -from hackbot_runtime import ActionsRecorder -from hackbot_runtime.actions import ACTIONS_SERVER_NAME -from hackbot_runtime.actions.claude_sdk import actions_server_for -from hackbot_runtime.claude import Reporter - -from .config import ( - BUGZILLA_READ_TOOLS, - ENABLED_ACTION_TOOLS, - ENABLED_ACTION_TYPES, - FIREFOX_TOOLS, - SOURCE_WRITE_TOOLS, -) - -HERE = Path(__file__).resolve().parent - - -# --------------------------------------------------------------------------- # -# Result type -# --------------------------------------------------------------------------- # - - -@dataclass -class BugFixResult: - exit_code: int = 0 - bugs_processed: int = 0 - - -# --------------------------------------------------------------------------- # -# Prompts & agents -# --------------------------------------------------------------------------- # - - -def load_system_prompt(rules_dir: Path, extra: str) -> str: - tmpl = (HERE / "prompts" / "system.md").read_text() - - return tmpl.format( - rules_dir=str(rules_dir.resolve()), - extra_instructions=extra or "(none)", - ) - - -def make_investigator() -> AgentDefinition: - """Create a single generic investigator subagent definition.""" - return AgentDefinition( - description=( - "Focused investigator for answering a specific question about " - "a bug or the source tree. The main agent writes your complete " - "instructions at spawn time — follow them precisely and return " - "only what was asked for." - ), - prompt=( - "You are a focused investigator subagent. You will be given a " - "self-contained task by the triage agent. Complete it and return " - "a concise answer. Do not make Bugzilla modifications — you have " - "read-only access. Do not speculate beyond what you can verify." - ), - tools=[ - "Read", - "Grep", - "Glob", - "Bash", - *BUGZILLA_READ_TOOLS, - *FIREFOX_TOOLS, - ], - model="inherit", - ) - - -# --------------------------------------------------------------------------- # -# Agent entrypoint -# --------------------------------------------------------------------------- # - - -async def run_bug_fix( - *, - bugzilla_mcp_server: McpServerConfig, - source_repo: Path, - bugs: list[int], - instructions: str = "", - task: str | None = None, - rules_dir: Path | None = None, - newest_first: bool = False, - model: str | None = None, - max_turns: int | None = None, - effort: str | None = None, - verbose: bool = False, - log: Path | None = None, - actions_recorder: ActionsRecorder | None = None, -) -> BugFixResult: - """Triage and fix the given Bugzilla bug(s) with a claude-agent-sdk agent.""" - if rules_dir is None: - rules_dir = HERE / "rules" - - if not bugs: - print("[bug_fix] no bug ids supplied — nothing to do", file=sys.stderr) - return BugFixResult(exit_code=0) - - selected = sorted(bugs, reverse=newest_first) - print(f"[bug_fix] triaging {len(selected)} bug(s): {selected}", file=sys.stderr) - - # --- Firefox build/eval MCP server (in-process; no tokens) -------- # - fx_ctx = firefox.FirefoxContext.from_source_repo(source_repo) - firefox_server = build_sdk_server("firefox", fx_ctx, firefox.TOOLS) - - # --- Action-recording MCP server (in-process) --------------------- # - # Standalone/script runs pass actions_recorder=None and get a local - # recorder that copies attachments under ./artifacts (no uploader). - actions_recorder, actions_server = actions_server_for( - actions_recorder, types=ENABLED_ACTION_TYPES - ) - - # --- Build agent options ------------------------------------------ # - system_prompt = load_system_prompt(rules_dir, instructions) - - options = ClaudeAgentOptions( - system_prompt=system_prompt, - mcp_servers={ - "bugzilla": bugzilla_mcp_server, - "firefox": firefox_server, - ACTIONS_SERVER_NAME: actions_server, - }, - agents={"investigator": make_investigator()}, - cwd=str(source_repo.resolve()), - add_dirs=[str(rules_dir.resolve())], - permission_mode="bypassPermissions", - allowed_tools=[ - "Read", - "Grep", - "Glob", - "Bash", - "Task", - *SOURCE_WRITE_TOOLS, - *BUGZILLA_READ_TOOLS, - *ENABLED_ACTION_TOOLS, - *FIREFOX_TOOLS, - ], - model=model, - max_turns=max_turns, - **({"effort": effort} if effort else {}), - setting_sources=[], - ) - - # --- Run: one fresh agent context per bug ------------------------- # - exit_code = 0 - rules_path = rules_dir.resolve() - with Reporter(verbose=verbose, log_path=log) as reporter: - for i, bug_id in enumerate(selected, 1): - print(f"[bug_fix] bug {i}/{len(selected)}: {bug_id}", file=sys.stderr) - reporter.header(f"bug {bug_id}") - - if task: - user_prompt = ( - f"Bug to work on: {bug_id}\n\n" - f"Task: {task}\n\n" - f"The rules in {rules_path} are available if the task " - f"calls for them, but the task above is your primary " - f"directive — it overrides the default triage workflow." - ) - else: - user_prompt = ( - f"Triage bug {bug_id}.\n\n" - f"Consult the relevant rules in {rules_path}." - ) - - async with ClaudeSDKClient(options=options) as client: - await client.query(user_prompt) - async for msg in client.receive_response(): - reporter.message(msg) - if isinstance(msg, ResultMessage) and msg.is_error: - exit_code = 1 - - return BugFixResult( - exit_code=exit_code, - bugs_processed=len(selected), - ) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py index c9b44b457a..51dea6a41d 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py @@ -1,6 +1,58 @@ -from hackbot_runtime import run_async +import tempfile +from pathlib import Path + +from hackbot_runtime import AgentError, HackbotContext, run_async +from pydantic_settings import BaseSettings, SettingsConfigDict + +from .agent import run_bug_fix + + +class AgentInputs(BaseSettings): + bug_id: int + bugzilla_mcp_url: str + model: str | None = None + max_turns: int | None = None + effort: str | None = None + + model_config = SettingsConfigDict(extra="ignore") + + +async def main(ctx: HackbotContext) -> dict: + inputs = AgentInputs() + # Fail fast if the platform did not provide Anthropic credentials. + ctx.anthropic.api_key + + log_path = Path(tempfile.mkdtemp(prefix="bug-fix-log-")) / "agent.log" + + result = await run_bug_fix( + task="Triage and fix the bug, and verify the fix", + bugzilla_mcp_server={ + "type": "http", + "url": inputs.bugzilla_mcp_url, + }, + source_repo=ctx.source_repo, + fx_ctx=ctx.firefox, + bugs=[inputs.bug_id], + model=inputs.model, + max_turns=inputs.max_turns, + effort=inputs.effort, + log=log_path, + verbose=True, + actions_recorder=ctx.actions, + ) + + if log_path.exists(): + # Uploaded when a signed policy is set, else copied into ./artifacts. + ctx.publish_file("logs/agent.log", log_path, "text/plain") + + if result.exit_code != 0: + raise AgentError( + f"bug triage failed ({result.bugs_processed} bug(s) processed, " + f"exit_code={result.exit_code})" + ) + + return {"bugs_processed": result.bugs_processed} -from .hackbot import main if __name__ == "__main__": - raise SystemExit(run_async(main)) + run_async(main) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/agent.py b/agents/bug-fix/hackbot_agents/bug_fix/agent.py new file mode 100644 index 0000000000..55be4b4328 --- /dev/null +++ b/agents/bug-fix/hackbot_agents/bug_fix/agent.py @@ -0,0 +1,201 @@ +"""Bug fix triage tool -- a Bugzilla triage agent. + +Orchestrates a Claude agent that triages bugs according to rulesets +in the rules/ directory. The agent has access to a source repository +and reaches Bugzilla via an out-of-process MCP broker (HTTP transport) +that holds the Bugzilla token — the agent process itself never sees it. +""" + +from __future__ import annotations + +import sys +from dataclasses import dataclass +from pathlib import Path + +from agent_tools import firefox +from agent_tools.claude_sdk import build_sdk_server +from agent_tools.firefox import FirefoxContext +from claude_agent_sdk import ( + AgentDefinition, + ClaudeAgentOptions, + ClaudeSDKClient, + McpServerConfig, + ResultMessage, +) +from hackbot_runtime import ActionsRecorder +from hackbot_runtime.actions import ACTIONS_SERVER_NAME +from hackbot_runtime.actions.claude_sdk import actions_server_for +from hackbot_runtime.claude import Reporter + +from .config import ( + BUGZILLA_READ_TOOLS, + ENABLED_ACTION_TOOLS, + ENABLED_ACTION_TYPES, + FIREFOX_TOOLS, + SOURCE_WRITE_TOOLS, +) + +HERE = Path(__file__).resolve().parent + + +# --------------------------------------------------------------------------- # +# Result type +# --------------------------------------------------------------------------- # + + +@dataclass +class BugFixResult: + exit_code: int = 0 + bugs_processed: int = 0 + + +# --------------------------------------------------------------------------- # +# Prompts & agents +# --------------------------------------------------------------------------- # + + +def load_system_prompt(rules_dir: Path, extra: str) -> str: + tmpl = (HERE / "prompts" / "system.md").read_text() + + return tmpl.format( + rules_dir=str(rules_dir.resolve()), + extra_instructions=extra or "(none)", + ) + + +def make_investigator() -> AgentDefinition: + """Create a single generic investigator subagent definition.""" + return AgentDefinition( + description=( + "Focused investigator for answering a specific question about " + "a bug or the source tree. The main agent writes your complete " + "instructions at spawn time — follow them precisely and return " + "only what was asked for." + ), + prompt=( + "You are a focused investigator subagent. You will be given a " + "self-contained task by the triage agent. Complete it and return " + "a concise answer. Do not make Bugzilla modifications — you have " + "read-only access. Do not speculate beyond what you can verify." + ), + tools=[ + "Read", + "Grep", + "Glob", + "Bash", + *BUGZILLA_READ_TOOLS, + *FIREFOX_TOOLS, + ], + model="inherit", + ) + + +# --------------------------------------------------------------------------- # +# Agent entrypoint +# --------------------------------------------------------------------------- # + + +async def run_bug_fix( + *, + bugzilla_mcp_server: McpServerConfig, + source_repo: Path, + fx_ctx: FirefoxContext, + bugs: list[int], + instructions: str = "", + task: str | None = None, + rules_dir: Path | None = None, + newest_first: bool = False, + model: str | None = None, + max_turns: int | None = None, + effort: str | None = None, + verbose: bool = False, + log: Path | None = None, + actions_recorder: ActionsRecorder | None = None, +) -> BugFixResult: + """Triage and fix the given Bugzilla bug(s) with a claude-agent-sdk agent.""" + if rules_dir is None: + rules_dir = HERE / "rules" + + if not bugs: + print("[bug_fix] no bug ids supplied — nothing to do", file=sys.stderr) + return BugFixResult(exit_code=0) + + selected = sorted(bugs, reverse=newest_first) + print(f"[bug_fix] triaging {len(selected)} bug(s): {selected}", file=sys.stderr) + + # --- Firefox build/eval MCP server (in-process; no tokens) -------- # + # The runtime derives fx_ctx from the prepared source checkout and the + # agent's hackbot.toml; here we only wrap its tools as an MCP server. + firefox_server = build_sdk_server("firefox", fx_ctx, firefox.TOOLS) + + # --- Action-recording MCP server (in-process) --------------------- # + # Standalone/script runs pass actions_recorder=None and get a local + # recorder that copies attachments under ./artifacts (no uploader). + actions_recorder, actions_server = actions_server_for( + actions_recorder, types=ENABLED_ACTION_TYPES + ) + + # --- Build agent options ------------------------------------------ # + system_prompt = load_system_prompt(rules_dir, instructions) + + options = ClaudeAgentOptions( + system_prompt=system_prompt, + mcp_servers={ + "bugzilla": bugzilla_mcp_server, + "firefox": firefox_server, + ACTIONS_SERVER_NAME: actions_server, + }, + agents={"investigator": make_investigator()}, + cwd=str(source_repo.resolve()), + add_dirs=[str(rules_dir.resolve())], + permission_mode="bypassPermissions", + allowed_tools=[ + "Read", + "Grep", + "Glob", + "Bash", + "Task", + *SOURCE_WRITE_TOOLS, + *BUGZILLA_READ_TOOLS, + *ENABLED_ACTION_TOOLS, + *FIREFOX_TOOLS, + ], + model=model, + max_turns=max_turns, + **({"effort": effort} if effort else {}), + setting_sources=[], + ) + + # --- Run: one fresh agent context per bug ------------------------- # + exit_code = 0 + rules_path = rules_dir.resolve() + with Reporter(verbose=verbose, log_path=log) as reporter: + for i, bug_id in enumerate(selected, 1): + print(f"[bug_fix] bug {i}/{len(selected)}: {bug_id}", file=sys.stderr) + reporter.header(f"bug {bug_id}") + + if task: + user_prompt = ( + f"Bug to work on: {bug_id}\n\n" + f"Task: {task}\n\n" + f"The rules in {rules_path} are available if the task " + f"calls for them, but the task above is your primary " + f"directive — it overrides the default triage workflow." + ) + else: + user_prompt = ( + f"Triage bug {bug_id}.\n\n" + f"Consult the relevant rules in {rules_path}." + ) + + async with ClaudeSDKClient(options=options) as client: + await client.query(user_prompt) + async for msg in client.receive_response(): + reporter.message(msg) + if isinstance(msg, ResultMessage) and msg.is_error: + exit_code = 1 + + return BugFixResult( + exit_code=exit_code, + bugs_processed=len(selected), + ) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/hackbot.py b/agents/bug-fix/hackbot_agents/bug_fix/hackbot.py deleted file mode 100644 index 60073be37e..0000000000 --- a/agents/bug-fix/hackbot_agents/bug_fix/hackbot.py +++ /dev/null @@ -1,57 +0,0 @@ -import tempfile -from pathlib import Path - -from hackbot_runtime import AgentResult, Context, ensure_source_repo -from pydantic_settings import BaseSettings, SettingsConfigDict - -FIREFOX_REPO_URL = "https://github.com/mozilla-firefox/firefox.git" - - -class AgentInputs(BaseSettings): - bug_id: int - bugzilla_mcp_url: str - source_repo: Path = Path("/workspace/firefox") - model: str | None = None - max_turns: int | None = None - effort: str | None = None - - model_config = SettingsConfigDict(extra="ignore") - - -async def main(ctx: Context) -> AgentResult: - from . import run_bug_fix - - inputs = AgentInputs() - ensure_source_repo(inputs.source_repo, FIREFOX_REPO_URL) - - log_path = Path(tempfile.mkdtemp(prefix="bug-fix-log-")) / "agent.log" - - result = await run_bug_fix( - task="Triage and fix the bug, and verify the fix", - bugzilla_mcp_server={ - "type": "http", - "url": inputs.bugzilla_mcp_url, - }, - source_repo=inputs.source_repo, - bugs=[inputs.bug_id], - model=inputs.model, - max_turns=inputs.max_turns, - effort=inputs.effort, - log=log_path, - verbose=True, - actions_recorder=ctx.actions, - ) - - if log_path.exists(): - # Uploaded when a signed policy is set, else copied into ./artifacts. - ctx.publish_file("logs/agent.log", log_path, "text/plain") - - return AgentResult( - status="ok" if result.exit_code == 0 else "error", - error=None if result.exit_code == 0 else f"exit_code={result.exit_code}", - findings={ - "exit_code": result.exit_code, - "bugs_processed": result.bugs_processed, - }, - exit_code=result.exit_code, - ) diff --git a/agents/bug-fix/run_local.py b/agents/bug-fix/run_local.py index 7560239146..50236ac7de 100644 --- a/agents/bug-fix/run_local.py +++ b/agents/bug-fix/run_local.py @@ -19,7 +19,8 @@ from agent_tools import bugzilla # noqa: E402 from agent_tools.bugzilla import BugzillaContext # noqa: E402 from agent_tools.claude_sdk import build_sdk_server # noqa: E402 -from hackbot_agents.bug_fix import run_bug_fix # noqa: E402 +from agent_tools.firefox import FirefoxContext # noqa: E402 +from hackbot_agents.bug_fix.agent import run_bug_fix # noqa: E402 class Settings(BaseSettings): @@ -55,6 +56,7 @@ async def main(): result = await run_bug_fix( bugzilla_mcp_server=bugzilla_mcp_server, source_repo=settings.source_repo, + fx_ctx=FirefoxContext.from_source_repo(settings.source_repo), model=settings.model, max_turns=settings.max_turns, effort=settings.effort, diff --git a/libs/agent-tools/agent_tools/firefox/__init__.py b/libs/agent-tools/agent_tools/firefox/__init__.py index f0fcbb652a..e371a6be2f 100644 --- a/libs/agent-tools/agent_tools/firefox/__init__.py +++ b/libs/agent-tools/agent_tools/firefox/__init__.py @@ -37,15 +37,17 @@ class FirefoxContext: js_binary: Path @classmethod - def from_source_repo(cls, source_repo: Path) -> "FirefoxContext": + def from_source_repo( + cls, source_repo: Path, objdir: str = "objdir-ff-asan" + ) -> "FirefoxContext": src = source_repo.resolve() - objdir = src / "objdir-ff-asan" + objdir_path = src / objdir return cls( source_dir=src, mozconfig=src / ".mozconfig", - objdir=objdir, - binary=objdir / "dist" / "bin" / "firefox", - js_binary=objdir / "dist" / "bin" / "js", + objdir=objdir_path, + binary=objdir_path / "dist" / "bin" / "firefox", + js_binary=objdir_path / "dist" / "bin" / "js", ) diff --git a/libs/hackbot-runtime/hackbot_runtime/__init__.py b/libs/hackbot-runtime/hackbot_runtime/__init__.py index 0c0bc24cdb..6335ac369c 100644 --- a/libs/hackbot-runtime/hackbot_runtime/__init__.py +++ b/libs/hackbot-runtime/hackbot_runtime/__init__.py @@ -1,14 +1,16 @@ from hackbot_runtime.actions.recorder import ActionsRecorder -from hackbot_runtime.context import Context -from hackbot_runtime.result import AgentResult +from hackbot_runtime.config import HackbotConfig +from hackbot_runtime.context import HackbotContext +from hackbot_runtime.errors import AgentError from hackbot_runtime.runtime import run, run_async from hackbot_runtime.source import ensure_source_repo from hackbot_runtime.uploader import SignedPolicyUploader __all__ = [ "ActionsRecorder", - "AgentResult", - "Context", + "AgentError", + "HackbotConfig", + "HackbotContext", "SignedPolicyUploader", "ensure_source_repo", "run", diff --git a/libs/hackbot-runtime/hackbot_runtime/config.py b/libs/hackbot-runtime/hackbot_runtime/config.py new file mode 100644 index 0000000000..ef1ae6173d --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/config.py @@ -0,0 +1,59 @@ +"""Declarative agent configuration loaded from ``hackbot.toml``. + +Captures the capability declarations that are intrinsic to an agent (which +source repo it operates on, whether it needs a Firefox build) so the runtime +can prepare them on the agent's behalf. Per-run inputs and secrets are NOT here +— they arrive via environment variables. +""" + +from __future__ import annotations + +import tomllib +from pathlib import Path + +from pydantic import BaseModel + + +class SourceConfig(BaseModel): + """The source repository an agent operates on (see ``ensure_source_repo``).""" + + repo_url: str + # Where the checkout lands. The env var SOURCE_REPO overrides this at runtime + # (the orchestrator points it at the task-local workspace). + checkout_path: Path = Path("/workspace/source") + + +class FirefoxConfig(BaseModel): + """Firefox build the agent needs (paths derived from the source checkout).""" + + enabled: bool = True + # Object directory name under the source root; matches the agent-tools + # FirefoxContext default. + objdir: str = "objdir-ff-asan" + + +class HackbotConfig(BaseModel): + """Parsed ``hackbot.toml``. Every table is optional. + + An agent that does not operate on a repo omits ``[source]``; one that does + not need Firefox omits ``[firefox]``. A missing file yields an empty config. + """ + + source: SourceConfig | None = None + firefox: FirefoxConfig | None = None + + +def load_config(path: Path) -> HackbotConfig: + """Load and validate ``hackbot.toml`` at ``path``. + + Strict: the file must exist. The "agent declares no capabilities" fallback is + handled by discovery (``_resolve_config`` returns an empty + :class:`HackbotConfig` when no toml is found), which never passes a missing + path here. + """ + if not path.exists(): + raise FileNotFoundError(f"Config file {path} does not exist") + + with path.open("rb") as fh: + data = tomllib.load(fh) + return HackbotConfig.model_validate(data) diff --git a/libs/hackbot-runtime/hackbot_runtime/context.py b/libs/hackbot-runtime/hackbot_runtime/context.py index 17d6daa5ad..5aa364ca8b 100644 --- a/libs/hackbot-runtime/hackbot_runtime/context.py +++ b/libs/hackbot-runtime/hackbot_runtime/context.py @@ -1,15 +1,37 @@ +"""The single object an agent's ``main()`` receives from the runtime. + +``HackbotContext`` is what an agent author touches. It answers for everything +the platform provides — the prepared source checkout, Firefox build paths, +model-provider credentials — plus the results/artifacts/actions plumbing, so the +author never cares how or from where those come. + +Its platform fields are read from the environment (the orchestrator sets them); +its capability declarations come from the agent's ``hackbot.toml`` +(:class:`HackbotConfig`), attached via :meth:`from_config`. +""" + +from __future__ import annotations + import datetime +import os import uuid from functools import cached_property from pathlib import Path +from typing import TYPE_CHECKING -from pydantic import Field +from pydantic import Field, PrivateAttr from pydantic_settings import BaseSettings, SettingsConfigDict from hackbot_runtime import artifacts from hackbot_runtime.actions.recorder import ActionsRecorder +from hackbot_runtime.config import HackbotConfig, load_config +from hackbot_runtime.providers import AnthropicAuth +from hackbot_runtime.source import ensure_source_repo from hackbot_runtime.uploader import SignedPolicyUploader +if TYPE_CHECKING: + from agent_tools.firefox import FirefoxContext + def _default_run_id() -> str: """A unique, sortable id for runs that don't get one from the platform. @@ -22,14 +44,14 @@ def _default_run_id() -> str: return f"local-{stamp}-{uuid.uuid4().hex[:6]}" -class Context(BaseSettings): - """Platform context handed to every agent's main() by the runtime. +class HackbotContext(BaseSettings): + """Platform capabilities + results plumbing handed to every agent's main(). - `run_id` defaults to a generated unique id (the orchestrator overrides it - via ``RUN_ID`` in production). The results-upload fields are optional so - local-dev runs (compose, scripts) can start the agent without a - signed POST policy — in that case the runtime writes results into the - local artifacts dir rather than uploading. + `run_id` defaults to a generated unique id (the orchestrator overrides it via + ``RUN_ID`` in production). The results-upload fields are optional so local-dev + runs (compose, scripts) can start the agent without a signed POST policy — in + that case results are written into the local artifacts dir rather than + uploaded. """ run_id: str = Field(default_factory=_default_run_id) @@ -45,6 +67,71 @@ class Context(BaseSettings): model_config = SettingsConfigDict(extra="ignore") + # Capability declarations from hackbot.toml (not env); attached after + # construction via from_config()/from_config_obj(). + _config: HackbotConfig = PrivateAttr(default_factory=HackbotConfig) + + @classmethod + def from_config(cls, config_path: Path) -> "HackbotContext": + """Build from ``hackbot.toml`` at ``config_path`` plus env-derived fields.""" + return cls.from_config_obj(load_config(config_path)) + + @classmethod + def from_config_obj(cls, config: HackbotConfig) -> "HackbotContext": + """Build from an already-parsed config plus env-derived fields.""" + obj = cls() + obj._config = config + return obj + + @property + def config(self) -> HackbotConfig: + return self._config + + # --- Platform capabilities (declared in hackbot.toml) ------------- # + + @cached_property + def source_repo(self) -> Path: + """The prepared source checkout, cloned/refreshed on first access. + + The path comes from ``SOURCE_REPO`` (set by the orchestrator) or, failing + that, the ``[source].checkout_path`` in ``hackbot.toml``. The checkout is + prepared lazily so agents that never touch source pay no git cost. + """ + if self._config.source is None: + raise RuntimeError( + "This agent did not declare a [source] in hackbot.toml; " + "no source repository is available." + ) + env_path = os.environ.get("SOURCE_REPO") + path = Path(env_path) if env_path else self._config.source.checkout_path + ensure_source_repo(path, self._config.source.repo_url) + return path + + @cached_property + def firefox(self) -> "FirefoxContext": + """Firefox build paths derived from the prepared source checkout. + + Importing ``agent_tools.firefox`` lazily keeps the base runtime free of + the ``agent-tools[firefox]`` extra for agents that don't need it. + """ + if self._config.firefox is None or not self._config.firefox.enabled: + raise RuntimeError( + "This agent did not declare an enabled [firefox] in " + "hackbot.toml; no Firefox build is available." + ) + from agent_tools.firefox import FirefoxContext + + return FirefoxContext.from_source_repo( + self.source_repo, objdir=self._config.firefox.objdir + ) + + @cached_property + def anthropic(self) -> AnthropicAuth: + """Anthropic credentials (validated on first key access).""" + return AnthropicAuth() + + # --- Results / artifacts / actions plumbing ----------------------- # + @cached_property def uploader(self) -> SignedPolicyUploader | None: if not self.results_policy_url: diff --git a/libs/hackbot-runtime/hackbot_runtime/errors.py b/libs/hackbot-runtime/hackbot_runtime/errors.py new file mode 100644 index 0000000000..5ab01e466d --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/errors.py @@ -0,0 +1,7 @@ +class AgentError(Exception): + """Raise from an agent's ``main()`` to fail the run with a clear message. + + The runtime records the message as the run's ``error`` in ``summary.json`` + and exits non-zero. Any other exception fails the run too — ``AgentError`` + just reads as a deliberate, expected failure rather than a crash. + """ diff --git a/libs/hackbot-runtime/hackbot_runtime/providers.py b/libs/hackbot-runtime/hackbot_runtime/providers.py new file mode 100644 index 0000000000..da618c474e --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/providers.py @@ -0,0 +1,48 @@ +"""Credentials the runtime provides to agents. + +The runtime owns where credentials come from so agents don't reach into the +environment themselves. Today only Anthropic is wired; the :class:`Provider` +protocol leaves room to add others (Vertex, OpenAI, ...) without changing the +agent-facing surface. +""" + +from __future__ import annotations + +import os +from typing import Protocol, runtime_checkable + + +class ProviderError(RuntimeError): + """A required credential for a provider is missing or invalid.""" + + +@runtime_checkable +class Provider(Protocol): + """A credentialed model/service provider the runtime can hand to an agent.""" + + name: str + + @property + def api_key(self) -> str: ... + + +class AnthropicAuth: + """Anthropic credentials, read from the environment and validated on access. + + Exposes the API key explicitly (rather than relying on the SDK implicitly + reading the env) so a missing key fails fast with a clear message instead of + surfacing as an opaque error deep inside a request. + """ + + name = "anthropic" + env_var = "ANTHROPIC_API_KEY" + + @property + def api_key(self) -> str: + key = os.environ.get(self.env_var) + if not key: + raise ProviderError( + f"{self.env_var} is not set; the runtime cannot provide " + "Anthropic credentials to this agent." + ) + return key diff --git a/libs/hackbot-runtime/hackbot_runtime/result.py b/libs/hackbot-runtime/hackbot_runtime/result.py deleted file mode 100644 index 8589aac069..0000000000 --- a/libs/hackbot-runtime/hackbot_runtime/result.py +++ /dev/null @@ -1,18 +0,0 @@ -from dataclasses import dataclass, field -from typing import Any, Literal - - -@dataclass -class AgentResult: - """Outcome reported by an agent's main() to the runtime. - - The runtime serialises this into the summary.json artifact the orchestrator - reads. `status` drives the run's terminal state in hackbot-api; `findings` - is opaque to the platform and surfaced verbatim. Recorded actions are not - carried here — the runtime reads them from `Context.actions`. - """ - - status: Literal["ok", "error"] = "ok" - error: str | None = None - findings: dict[str, Any] = field(default_factory=dict) - exit_code: int = 0 diff --git a/libs/hackbot-runtime/hackbot_runtime/runtime.py b/libs/hackbot-runtime/hackbot_runtime/runtime.py index 3f832dee95..ef67c65146 100644 --- a/libs/hackbot-runtime/hackbot_runtime/runtime.py +++ b/libs/hackbot-runtime/hackbot_runtime/runtime.py @@ -1,19 +1,32 @@ import asyncio +import inspect import logging import sys import traceback from collections.abc import Awaitable, Callable +from pathlib import Path +from typing import Any, NoReturn from pydantic import ValidationError -from hackbot_runtime.context import Context -from hackbot_runtime.result import AgentResult +from hackbot_runtime.config import HackbotConfig, load_config +from hackbot_runtime.context import HackbotContext log = logging.getLogger("hackbot_runtime") -AgentMain = Callable[[Context], AgentResult] -AsyncAgentMain = Callable[[Context], Awaitable[AgentResult]] +# An agent's main() returns its findings (a JSON-able dict) on success, or None +# if it has nothing to report; to fail the run it raises (AgentError, or any +# exception). The runtime turns that outcome into summary.json + an exit code. +Findings = dict[str, Any] | None +AgentMain = Callable[[HackbotContext], Findings] +AsyncAgentMain = Callable[[HackbotContext], Awaitable[Findings]] +# What run()/run_async() accept to locate an agent's hackbot.toml: a path to it, +# an already-parsed config, or None to auto-discover ``hackbot.toml`` (in the +# working directory or above the entry point's module). +ConfigArg = Path | HackbotConfig | None + +_CONFIG_NAME = "hackbot.toml" _SUMMARY_NAME = "summary.json" @@ -26,42 +39,102 @@ def _configure_logging() -> None: ) -def _summary_payload_from_result(result: AgentResult, ctx: Context) -> dict: - # Actions are recorded via Context.actions; the result never carries them. +def _ok_payload(ctx: HackbotContext, findings: dict) -> dict: + # Actions are recorded via ctx.actions; the agent never carries them. return { - "status": result.status, - "error": result.error, - "findings": result.findings, + "status": "ok", + "error": None, + "findings": findings, "actions": ctx.actions.actions, } -def _summary_payload_from_exception(exc: BaseException, ctx: Context) -> dict: +def _error_payload( + ctx: HackbotContext, error: str, *, traceback_str: str | None = None +) -> dict: return { "status": "error", - "error": f"{type(exc).__name__}: {exc}", - "findings": {"traceback": traceback.format_exc()}, + "error": error, + "findings": {"traceback": traceback_str} if traceback_str else {}, "actions": ctx.actions.actions, } -def _load_context() -> Context | None: +def _discover_config_path(entrypoint: Callable) -> Path | None: + """Locate ``hackbot.toml`` for an agent that didn't pass one explicitly. + + Agents keep ``hackbot.toml`` at their agent root (alongside ``pyproject.toml`` + / ``Dockerfile``), above the ``hackbot_agents`` package. Two layouts to cover: + + - **Deployed image**: the package is installed into site-packages, but the + Dockerfile copies ``hackbot.toml`` into the working directory — so check + the cwd first. + - **Editable checkout / tests**: the entry point's module lives under the + agent root, so walk up from it until the toml turns up. + """ + cwd_candidate = Path.cwd() / _CONFIG_NAME + if cwd_candidate.exists(): + return cwd_candidate try: - return Context() + module_file = inspect.getsourcefile(entrypoint) + except TypeError: + module_file = None + if module_file: + for parent in Path(module_file).resolve().parents: + candidate = parent / _CONFIG_NAME + if candidate.exists(): + return candidate + return None + + +def _resolve_config(entrypoint: Callable, config: ConfigArg) -> HackbotConfig: + if isinstance(config, HackbotConfig): + return config + path = config if isinstance(config, Path) else _discover_config_path(entrypoint) + return load_config(path) if path else HackbotConfig() + + +def _load_hackbot(entrypoint: Callable, config: ConfigArg) -> HackbotContext | None: + """Build the HackbotContext (and its inner env-derived Context). + + ``config`` may be a path to a ``hackbot.toml``, an already-parsed + :class:`HackbotConfig`, or ``None`` to auto-discover the toml (cwd or above + the entry point's module), falling back to an empty config when there's none. + """ + parsed = _resolve_config(entrypoint, config) + try: + return HackbotContext.from_config_obj(parsed) except ValidationError as exc: log.error( - "Failed to load Context from env; no summary can be written.\n%s", + "Failed to load HackbotContext from env; no summary can be written.\n%s", exc, ) return None -def _finish(ctx: Context, result_or_exc: AgentResult | BaseException) -> int: - if isinstance(result_or_exc, AgentResult): - payload = _summary_payload_from_result(result_or_exc, ctx) - exit_code = result_or_exc.exit_code +def _finish(ctx: HackbotContext, outcome: object) -> int: + """Write summary.json from the agent's outcome and return the exit code. + + ``outcome`` is the agent's return value (findings dict or None) on success, + or the exception it raised on failure. + """ + if isinstance(outcome, BaseException): + payload = _error_payload( + ctx, + f"{type(outcome).__name__}: {outcome}", + traceback_str=traceback.format_exc(), + ) + exit_code = 1 + elif outcome is None or isinstance(outcome, dict): + payload = _ok_payload(ctx, outcome or {}) + exit_code = 0 else: - payload = _summary_payload_from_exception(result_or_exc, ctx) + # Contract violation: not a findings dict, None, or an exception. + msg = ( + f"Agent returned {type(outcome).__name__}; expected a findings dict or None" + ) + log.error(msg) + payload = _error_payload(ctx, msg) exit_code = 1 # Upload when a signed policy is configured, else write into the local @@ -81,46 +154,31 @@ def _finish(ctx: Context, result_or_exc: AgentResult | BaseException) -> int: return exit_code -def _validate_result(result: object) -> AgentResult: - """Coerce arbitrary agent return values into an AgentResult. - - Returning a synthetic AgentResult (rather than letting an exception - object flow into `_finish`) keeps the summary deterministic: the - exception path calls `traceback.format_exc()`, which evaluates to - "NoneType: None" when no exception is active. - """ - if isinstance(result, AgentResult): - return result - msg = f"Agent returned {type(result).__name__}; expected AgentResult" - log.error(msg) - return AgentResult(status="error", error=msg, exit_code=1) - - -def run(entrypoint: AgentMain) -> int: +def run(entrypoint: AgentMain, config: ConfigArg = None) -> NoReturn: _configure_logging() - ctx = _load_context() + ctx = _load_hackbot(entrypoint, config) if ctx is None: - return 2 + raise SystemExit(2) try: - result = entrypoint(ctx) + outcome: object = entrypoint(ctx) except Exception as exc: log.exception("Agent raised an exception") - return _finish(ctx, exc) + outcome = exc - return _finish(ctx, _validate_result(result)) + raise SystemExit(_finish(ctx, outcome)) -def run_async(entrypoint: AsyncAgentMain) -> int: +def run_async(entrypoint: AsyncAgentMain, config: ConfigArg = None) -> NoReturn: _configure_logging() - ctx = _load_context() + ctx = _load_hackbot(entrypoint, config) if ctx is None: - return 2 + raise SystemExit(2) try: - result = asyncio.run(entrypoint(ctx)) + outcome: object = asyncio.run(entrypoint(ctx)) except Exception as exc: log.exception("Agent raised an exception") - return _finish(ctx, exc) + outcome = exc - return _finish(ctx, _validate_result(result)) + raise SystemExit(_finish(ctx, outcome)) diff --git a/libs/hackbot-runtime/hackbot_runtime/uploader.py b/libs/hackbot-runtime/hackbot_runtime/uploader.py index dc888ba381..038a92c8bd 100644 --- a/libs/hackbot-runtime/hackbot_runtime/uploader.py +++ b/libs/hackbot-runtime/hackbot_runtime/uploader.py @@ -8,7 +8,7 @@ class SignedPolicyUploader: """POST artifacts to a GCS V4 signed POST policy. - The orchestrator passes the policy via env vars consumed by `Context`. + The orchestrator passes the policy via env vars consumed by `HackbotContext`. The Job has no GCP identity; this signed policy is its only write capability. """ diff --git a/libs/hackbot-runtime/tests/test_config.py b/libs/hackbot-runtime/tests/test_config.py new file mode 100644 index 0000000000..80a82067e5 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_config.py @@ -0,0 +1,60 @@ +"""Tests for hackbot.toml parsing into HackbotConfig.""" + +from pathlib import Path + +import pytest +from hackbot_runtime.config import load_config + +FULL_TOML = """ +[source] +repo_url = "https://example.com/repo.git" +checkout_path = "/workspace/repo" + +[firefox] +enabled = true +objdir = "objdir-custom" +""" + + +def test_load_full_config(tmp_path): + path = tmp_path / "hackbot.toml" + path.write_text(FULL_TOML) + + cfg = load_config(path) + + assert cfg.source is not None + assert cfg.source.repo_url == "https://example.com/repo.git" + assert cfg.source.checkout_path == Path("/workspace/repo") + assert cfg.firefox is not None + assert cfg.firefox.enabled is True + assert cfg.firefox.objdir == "objdir-custom" + + +def test_missing_file_raises(tmp_path): + # load_config is strict; the "no config" fallback lives in discovery + # (_resolve_config), which never hands a missing path to load_config. + with pytest.raises(FileNotFoundError): + load_config(tmp_path / "does-not-exist.toml") + + +def test_missing_tables_default_to_none(tmp_path): + path = tmp_path / "hackbot.toml" + path.write_text('[source]\nrepo_url = "https://example.com/repo.git"\n') + + cfg = load_config(path) + + assert cfg.source is not None + # checkout_path falls back to the SourceConfig default. + assert cfg.source.checkout_path == Path("/workspace/source") + assert cfg.firefox is None + + +def test_firefox_defaults(tmp_path): + path = tmp_path / "hackbot.toml" + path.write_text("[firefox]\n") + + cfg = load_config(path) + + assert cfg.firefox is not None + assert cfg.firefox.enabled is True + assert cfg.firefox.objdir == "objdir-ff-asan" diff --git a/libs/hackbot-runtime/tests/test_context.py b/libs/hackbot-runtime/tests/test_context.py new file mode 100644 index 0000000000..6dea30342a --- /dev/null +++ b/libs/hackbot-runtime/tests/test_context.py @@ -0,0 +1,83 @@ +"""Tests for HackbotContext capabilities and results plumbing.""" + +from pathlib import Path + +import pytest +from hackbot_runtime import HackbotContext +from hackbot_runtime.config import FirefoxConfig, HackbotConfig, SourceConfig + + +def _hb(tmp_path, config: HackbotConfig) -> HackbotContext: + hb = HackbotContext(run_id="local-test", artifacts_dir=tmp_path / "artifacts") + hb._config = config + return hb + + +def test_source_repo_without_declaration_raises(tmp_path): + hb = _hb(tmp_path, HackbotConfig()) + with pytest.raises(RuntimeError, match="\\[source\\]"): + hb.source_repo + + +def test_firefox_without_declaration_raises(tmp_path): + hb = _hb(tmp_path, HackbotConfig()) + with pytest.raises(RuntimeError, match="\\[firefox\\]"): + hb.firefox + + +def test_firefox_disabled_raises(tmp_path): + cfg = HackbotConfig( + source=SourceConfig(repo_url="x"), firefox=FirefoxConfig(enabled=False) + ) + hb = _hb(tmp_path, cfg) + with pytest.raises(RuntimeError, match="\\[firefox\\]"): + hb.firefox + + +def test_source_repo_prepares_and_honors_env_override(tmp_path, monkeypatch): + calls = [] + + def fake_ensure(path: Path, repo_url: str) -> None: + calls.append((path, repo_url)) + + monkeypatch.setattr("hackbot_runtime.context.ensure_source_repo", fake_ensure) + monkeypatch.setenv("SOURCE_REPO", str(tmp_path / "from-env")) + + cfg = HackbotConfig( + source=SourceConfig( + repo_url="https://example.com/r.git", + checkout_path=Path("/from/toml"), + ) + ) + hb = _hb(tmp_path, cfg) + + assert hb.source_repo == tmp_path / "from-env" + assert calls == [(tmp_path / "from-env", "https://example.com/r.git")] + + +def test_source_repo_uses_toml_path_without_env(tmp_path, monkeypatch): + monkeypatch.delenv("SOURCE_REPO", raising=False) + monkeypatch.setattr( + "hackbot_runtime.context.ensure_source_repo", lambda *a, **k: None + ) + cfg = HackbotConfig( + source=SourceConfig(repo_url="r", checkout_path=Path("/from/toml")) + ) + hb = _hb(tmp_path, cfg) + assert hb.source_repo == Path("/from/toml") + + +def test_results_plumbing(tmp_path): + hb = _hb(tmp_path, HackbotConfig()) + + assert hb.run_id == "local-test" + + log = tmp_path / "agent.log" + log.write_text("hello") + key = hb.publish_file("logs/agent.log", log) + assert key == "logs/agent.log" + written = tmp_path / "artifacts" / "local-test" / "logs" / "agent.log" + assert written.read_text() == "hello" + + hb.actions.record("bugzilla.update_bug", {"bug_id": 1}, reasoning="r") + assert hb.actions.actions[0]["type"] == "bugzilla.update_bug" diff --git a/libs/hackbot-runtime/tests/test_providers.py b/libs/hackbot-runtime/tests/test_providers.py new file mode 100644 index 0000000000..fae300db25 --- /dev/null +++ b/libs/hackbot-runtime/tests/test_providers.py @@ -0,0 +1,26 @@ +"""Tests for provider credential exposure/validation.""" + +import pytest +from hackbot_runtime.providers import AnthropicAuth, Provider, ProviderError + + +def test_api_key_returned_when_set(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test") + assert AnthropicAuth().api_key == "sk-test" + + +def test_missing_key_raises_clear_error(monkeypatch): + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + with pytest.raises(ProviderError, match="ANTHROPIC_API_KEY"): + AnthropicAuth().api_key + + +def test_empty_key_treated_as_missing(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "") + with pytest.raises(ProviderError): + AnthropicAuth().api_key + + +def test_satisfies_provider_protocol(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test") + assert isinstance(AnthropicAuth(), Provider) diff --git a/libs/hackbot-runtime/tests/test_runtime.py b/libs/hackbot-runtime/tests/test_runtime.py index 9ad5ebef7e..ce0f69ee7b 100644 --- a/libs/hackbot-runtime/tests/test_runtime.py +++ b/libs/hackbot-runtime/tests/test_runtime.py @@ -2,25 +2,26 @@ import json -from hackbot_runtime import AgentResult, Context -from hackbot_runtime.runtime import _finish +import pytest +from hackbot_runtime import AgentError, HackbotContext, run_async +from hackbot_runtime.runtime import _discover_config_path, _finish, _resolve_config def test_run_id_defaults_to_unique_generated_id(monkeypatch): monkeypatch.delenv("RUN_ID", raising=False) - a, b = Context(), Context() + a, b = HackbotContext(), HackbotContext() assert a.run_id != b.run_id assert a.run_id.startswith("local-") def test_run_id_env_overrides_default(monkeypatch): monkeypatch.setenv("RUN_ID", "orchestrator-42") - assert Context().run_id == "orchestrator-42" + assert HackbotContext().run_id == "orchestrator-42" def _ctx(tmp_path, run_id="local-test"): # No results_policy_url -> uploader is None -> local artifacts path. - return Context(run_id=run_id, artifacts_dir=tmp_path / "artifacts") + return HackbotContext(run_id=run_id, artifacts_dir=tmp_path / "artifacts") def test_summary_written_locally_without_uploader(tmp_path): @@ -31,7 +32,7 @@ def test_summary_written_locally_without_uploader(tmp_path): reasoning="rule X", ) - code = _finish(ctx, AgentResult(status="ok", findings={"bugs_processed": 1})) + code = _finish(ctx, {"bugs_processed": 1}) assert code == 0 # Written under the per-run subdir: artifacts_dir / run_id. @@ -55,11 +56,23 @@ def test_summary_written_for_exception(tmp_path): assert "boom" in summary["error"] +def test_non_dict_return_is_contract_error(tmp_path): + ctx = _ctx(tmp_path) + code = _finish(ctx, "not a dict") + + assert code == 1 + summary = json.loads( + (tmp_path / "artifacts" / "local-test" / "summary.json").read_text() + ) + assert summary["status"] == "error" + assert "expected a findings dict" in summary["error"] + + def test_runs_are_namespaced_by_run_id(tmp_path): ctx_a = _ctx(tmp_path, run_id="run-a") ctx_b = _ctx(tmp_path, run_id="run-b") - _finish(ctx_a, AgentResult(status="ok")) - _finish(ctx_b, AgentResult(status="error", error="x")) + _finish(ctx_a, None) + _finish(ctx_b, RuntimeError("x")) base = tmp_path / "artifacts" assert json.loads((base / "run-a" / "summary.json").read_text())["status"] == "ok" @@ -68,6 +81,92 @@ def test_runs_are_namespaced_by_run_id(tmp_path): ) +def _dummy_entry(ctx): # stand-in entrypoint for discovery tests + return None + + +def test_config_auto_discovered_from_cwd(tmp_path, monkeypatch): + (tmp_path / "hackbot.toml").write_text('[source]\nrepo_url = "https://x/y.git"\n') + monkeypatch.chdir(tmp_path) + + assert _discover_config_path(_dummy_entry) == tmp_path / "hackbot.toml" + cfg = _resolve_config(_dummy_entry, None) + assert cfg.source is not None + assert cfg.source.repo_url == "https://x/y.git" + + +def test_config_discovered_above_entrypoint_module(tmp_path, monkeypatch): + # Agent root holds hackbot.toml; the entry module lives below it (editable + # checkout). cwd has no toml, so discovery must walk up from the module. + agent_root = tmp_path / "agent" + pkg = agent_root / "mypkg" + pkg.mkdir(parents=True) + (pkg / "__init__.py").write_text("") + (pkg / "agent.py").write_text("def main(ctx):\n return None\n") + (agent_root / "hackbot.toml").write_text('[source]\nrepo_url = "https://a/b.git"\n') + + empty = tmp_path / "elsewhere" + empty.mkdir() + monkeypatch.chdir(empty) + monkeypatch.syspath_prepend(str(agent_root)) + from mypkg.agent import main # type: ignore + + assert _discover_config_path(main) == agent_root / "hackbot.toml" + + +def test_no_config_discovered_yields_empty(tmp_path, monkeypatch): + pkg = tmp_path / "barepkg" + pkg.mkdir() + (pkg / "__init__.py").write_text("") + (pkg / "agent.py").write_text("def main(ctx):\n return None\n") + + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(str(tmp_path)) + from barepkg.agent import main # type: ignore + + assert _discover_config_path(main) is None + cfg = _resolve_config(main, None) + assert cfg.source is None and cfg.firefox is None + + +def _run_env(tmp_path, monkeypatch): + # Make run_async write into tmp and discover no hackbot.toml. + monkeypatch.setenv("ARTIFACTS_DIR", str(tmp_path)) + monkeypatch.setenv("RUN_ID", "t") + monkeypatch.delenv("RESULTS_POLICY_URL", raising=False) + monkeypatch.chdir(tmp_path) # no hackbot.toml here + + +def test_run_async_exits_zero_and_writes_summary(tmp_path, monkeypatch): + _run_env(tmp_path, monkeypatch) + + async def main(ctx): + return {"did": "work"} + + with pytest.raises(SystemExit) as exc: + run_async(main) + + assert exc.value.code == 0 + summary = json.loads((tmp_path / "t" / "summary.json").read_text()) + assert summary["status"] == "ok" + assert summary["findings"] == {"did": "work"} + + +def test_run_async_exits_nonzero_when_agent_raises(tmp_path, monkeypatch): + _run_env(tmp_path, monkeypatch) + + async def main(ctx): + raise AgentError("nope") + + with pytest.raises(SystemExit) as exc: + run_async(main) + + assert exc.value.code == 1 + summary = json.loads((tmp_path / "t" / "summary.json").read_text()) + assert summary["status"] == "error" + assert "nope" in summary["error"] + + def test_publish_file_copies_locally_without_uploader(tmp_path): ctx = _ctx(tmp_path) log = tmp_path / "agent.log" From 040d895b88d6be441d3aa449fd9231886ca0dc94 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Thu, 11 Jun 2026 00:55:02 -0400 Subject: [PATCH 16/21] Drop the run_local.py convenience script --- agents/README.md | 1 - agents/bug-fix/run_local.py | 70 ------------------------------------- 2 files changed, 71 deletions(-) delete mode 100644 agents/bug-fix/run_local.py diff --git a/agents/README.md b/agents/README.md index e483f32148..9483b2b97f 100644 --- a/agents/README.md +++ b/agents/README.md @@ -18,7 +18,6 @@ agents// __main__.py # entrypoint: AgentInputs(BaseSettings) + async def main(ctx) -> dict + run_async(main) prompts/ rules/ # assets read via Path(__file__).parent broker/ # OPTIONAL: secret-holding MCP sidecar (python -m hackbot_agents..broker) - run_local.py # OPTIONAL: run without Docker/broker for quick iteration ``` ## `hackbot.toml` — what the platform provides diff --git a/agents/bug-fix/run_local.py b/agents/bug-fix/run_local.py deleted file mode 100644 index 50236ac7de..0000000000 --- a/agents/bug-fix/run_local.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Run the bug-fix agent locally, without Docker or the broker sidecar. - -Builds the read-only Bugzilla MCP server in-process, so this script sees the -Bugzilla API key directly — unlike the deployed agent, which reaches a broker -sidecar over HTTP and never holds the key. Handy for quick iteration; for a -faithful end-to-end run use ``docker compose -f compose.yml up``. -""" - -import asyncio -import sys -from pathlib import Path - -import bugsy -from pydantic_settings import BaseSettings, SettingsConfigDict - -# Make the co-located `hackbot_agents` namespace importable regardless of cwd. -sys.path.insert(0, str(Path(__file__).resolve().parent)) - -from agent_tools import bugzilla # noqa: E402 -from agent_tools.bugzilla import BugzillaContext # noqa: E402 -from agent_tools.claude_sdk import build_sdk_server # noqa: E402 -from agent_tools.firefox import FirefoxContext # noqa: E402 -from hackbot_agents.bug_fix.agent import run_bug_fix # noqa: E402 - - -class Settings(BaseSettings): - bug_id: int - bugzilla_api_url: str = "https://bugzilla.mozilla.org/rest" - bugzilla_api_key: str - source_repo: Path - model: str | None = None - max_turns: int | None = None - effort: str | None = None - - model_config = SettingsConfigDict( - cli_parse_args=True, - env_file=".env", - extra="ignore", - ) - - -async def main(): - settings = Settings() - - bugzilla_mcp_server = build_sdk_server( - "bugzilla", - BugzillaContext( - client=bugsy.Bugsy( - api_key=settings.bugzilla_api_key, - bugzilla_url=settings.bugzilla_api_url, - ), - ), - bugzilla.TOOLS, - ) - - result = await run_bug_fix( - bugzilla_mcp_server=bugzilla_mcp_server, - source_repo=settings.source_repo, - fx_ctx=FirefoxContext.from_source_repo(settings.source_repo), - model=settings.model, - max_turns=settings.max_turns, - effort=settings.effort, - bugs=[settings.bug_id], - verbose=True, - ) - print(f"\nexit_code={result.exit_code} bugs_processed={result.bugs_processed}") - - -if __name__ == "__main__": - asyncio.run(main()) From d5054f9110ae80953838c5f26362c382dd363838 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Thu, 11 Jun 2026 10:12:30 -0400 Subject: [PATCH 17/21] Cleanup comment separators and reflow comments --- .../bug-fix/hackbot_agents/bug_fix/agent.py | 30 +++++-------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/agent.py b/agents/bug-fix/hackbot_agents/bug_fix/agent.py index 55be4b4328..d24c095288 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/agent.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/agent.py @@ -38,22 +38,12 @@ HERE = Path(__file__).resolve().parent -# --------------------------------------------------------------------------- # -# Result type -# --------------------------------------------------------------------------- # - - @dataclass class BugFixResult: exit_code: int = 0 bugs_processed: int = 0 -# --------------------------------------------------------------------------- # -# Prompts & agents -# --------------------------------------------------------------------------- # - - def load_system_prompt(rules_dir: Path, extra: str) -> str: tmpl = (HERE / "prompts" / "system.md").read_text() @@ -90,11 +80,6 @@ def make_investigator() -> AgentDefinition: ) -# --------------------------------------------------------------------------- # -# Agent entrypoint -# --------------------------------------------------------------------------- # - - async def run_bug_fix( *, bugzilla_mcp_server: McpServerConfig, @@ -123,19 +108,18 @@ async def run_bug_fix( selected = sorted(bugs, reverse=newest_first) print(f"[bug_fix] triaging {len(selected)} bug(s): {selected}", file=sys.stderr) - # --- Firefox build/eval MCP server (in-process; no tokens) -------- # - # The runtime derives fx_ctx from the prepared source checkout and the - # agent's hackbot.toml; here we only wrap its tools as an MCP server. + # Firefox build/eval MCP server (in-process; no tokens). The runtime + # derives fx_ctx from the prepared source checkout and the agent's + # hackbot.toml; here we only wrap its tools as an MCP server. firefox_server = build_sdk_server("firefox", fx_ctx, firefox.TOOLS) - # --- Action-recording MCP server (in-process) --------------------- # - # Standalone/script runs pass actions_recorder=None and get a local - # recorder that copies attachments under ./artifacts (no uploader). + # Action-recording MCP server (in-process). Standalone/script runs pass + # actions_recorder=None and get a local recorder that copies attachments + # under ./artifacts (no uploader). actions_recorder, actions_server = actions_server_for( actions_recorder, types=ENABLED_ACTION_TYPES ) - # --- Build agent options ------------------------------------------ # system_prompt = load_system_prompt(rules_dir, instructions) options = ClaudeAgentOptions( @@ -166,7 +150,7 @@ async def run_bug_fix( setting_sources=[], ) - # --- Run: one fresh agent context per bug ------------------------- # + # Run one fresh agent context per bug. exit_code = 0 rules_path = rules_dir.resolve() with Reporter(verbose=verbose, log_path=log) as reporter: From 6761966c59b3621e56aa580e3f6e5be4def8b757 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Thu, 11 Jun 2026 12:04:25 -0400 Subject: [PATCH 18/21] Return a HackbotAgentResult model from agent main() Replace the bug-fix agent's exit_code/bool result handling with a HackbotAgentResult model (num_turns + optional total_cost_usd) that the runtime serializes into summary.json findings. run_bug_fix now raises AgentError on failure instead of flagging it, and handles a single bug. Also drop the script-style banner comments in bug_fix/agent.py and fix a stale ENABLED_ACTION_TYPES import in its config. --- .../hackbot_agents/bug_fix/__main__.py | 53 +++++------ .../bug-fix/hackbot_agents/bug_fix/agent.py | 92 +++++++++---------- .../bug-fix/hackbot_agents/bug_fix/config.py | 17 ++-- .../hackbot_runtime/__init__.py | 2 + .../hackbot_runtime/results.py | 13 +++ .../hackbot_runtime/runtime.py | 25 +++-- libs/hackbot-runtime/tests/test_runtime.py | 38 ++++++-- 7 files changed, 133 insertions(+), 107 deletions(-) create mode 100644 libs/hackbot-runtime/hackbot_runtime/results.py diff --git a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py index 51dea6a41d..c4045d755f 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py @@ -1,10 +1,10 @@ import tempfile from pathlib import Path -from hackbot_runtime import AgentError, HackbotContext, run_async +from hackbot_runtime import HackbotContext, run_async from pydantic_settings import BaseSettings, SettingsConfigDict -from .agent import run_bug_fix +from .agent import BugFixResult, run_bug_fix class AgentInputs(BaseSettings): @@ -17,41 +17,36 @@ class AgentInputs(BaseSettings): model_config = SettingsConfigDict(extra="ignore") -async def main(ctx: HackbotContext) -> dict: +async def main(ctx: HackbotContext) -> BugFixResult: inputs = AgentInputs() # Fail fast if the platform did not provide Anthropic credentials. ctx.anthropic.api_key log_path = Path(tempfile.mkdtemp(prefix="bug-fix-log-")) / "agent.log" - result = await run_bug_fix( - task="Triage and fix the bug, and verify the fix", - bugzilla_mcp_server={ - "type": "http", - "url": inputs.bugzilla_mcp_url, - }, - source_repo=ctx.source_repo, - fx_ctx=ctx.firefox, - bugs=[inputs.bug_id], - model=inputs.model, - max_turns=inputs.max_turns, - effort=inputs.effort, - log=log_path, - verbose=True, - actions_recorder=ctx.actions, - ) - - if log_path.exists(): - # Uploaded when a signed policy is set, else copied into ./artifacts. - ctx.publish_file("logs/agent.log", log_path, "text/plain") - - if result.exit_code != 0: - raise AgentError( - f"bug triage failed ({result.bugs_processed} bug(s) processed, " - f"exit_code={result.exit_code})" + try: + result = await run_bug_fix( + task="Triage and fix the bug, and verify the fix", + bugzilla_mcp_server={ + "type": "http", + "url": inputs.bugzilla_mcp_url, + }, + source_repo=ctx.source_repo, + fx_ctx=ctx.firefox, + bug=inputs.bug_id, + model=inputs.model, + max_turns=inputs.max_turns, + effort=inputs.effort, + log=log_path, + verbose=True, + actions_recorder=ctx.actions, ) + finally: + if log_path.exists(): + # Uploaded when a signed policy is set, else copied into ./artifacts. + ctx.publish_file("logs/agent.log", log_path, "text/plain") - return {"bugs_processed": result.bugs_processed} + return result if __name__ == "__main__": diff --git a/agents/bug-fix/hackbot_agents/bug_fix/agent.py b/agents/bug-fix/hackbot_agents/bug_fix/agent.py index d24c095288..d6868a92b0 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/agent.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/agent.py @@ -9,7 +9,6 @@ from __future__ import annotations import sys -from dataclasses import dataclass from pathlib import Path from agent_tools import firefox @@ -22,14 +21,13 @@ McpServerConfig, ResultMessage, ) -from hackbot_runtime import ActionsRecorder +from hackbot_runtime import ActionsRecorder, AgentError, HackbotAgentResult from hackbot_runtime.actions import ACTIONS_SERVER_NAME -from hackbot_runtime.actions.claude_sdk import actions_server_for +from hackbot_runtime.actions.claude_sdk import actions_server_for, actions_to_tool_names from hackbot_runtime.claude import Reporter from .config import ( BUGZILLA_READ_TOOLS, - ENABLED_ACTION_TOOLS, ENABLED_ACTION_TYPES, FIREFOX_TOOLS, SOURCE_WRITE_TOOLS, @@ -38,10 +36,9 @@ HERE = Path(__file__).resolve().parent -@dataclass -class BugFixResult: - exit_code: int = 0 - bugs_processed: int = 0 +class BugFixResult(HackbotAgentResult): + bug_id: int + result: str | None = None def load_system_prompt(rules_dir: Path, extra: str) -> str: @@ -85,11 +82,10 @@ async def run_bug_fix( bugzilla_mcp_server: McpServerConfig, source_repo: Path, fx_ctx: FirefoxContext, - bugs: list[int], + bug: int, instructions: str = "", task: str | None = None, rules_dir: Path | None = None, - newest_first: bool = False, model: str | None = None, max_turns: int | None = None, effort: str | None = None, @@ -97,16 +93,15 @@ async def run_bug_fix( log: Path | None = None, actions_recorder: ActionsRecorder | None = None, ) -> BugFixResult: - """Triage and fix the given Bugzilla bug(s) with a claude-agent-sdk agent.""" + """Triage and fix a single Bugzilla bug with a claude-agent-sdk agent. + + Returns a :class:`BugFixResult` on success; raises :class:`AgentError` if the + agent ends in an error. + """ if rules_dir is None: rules_dir = HERE / "rules" - if not bugs: - print("[bug_fix] no bug ids supplied — nothing to do", file=sys.stderr) - return BugFixResult(exit_code=0) - - selected = sorted(bugs, reverse=newest_first) - print(f"[bug_fix] triaging {len(selected)} bug(s): {selected}", file=sys.stderr) + print(f"[bug_fix] triaging bug {bug}", file=sys.stderr) # Firefox build/eval MCP server (in-process; no tokens). The runtime # derives fx_ctx from the prepared source checkout and the agent's @@ -119,6 +114,7 @@ async def run_bug_fix( actions_recorder, actions_server = actions_server_for( actions_recorder, types=ENABLED_ACTION_TYPES ) + enabled_action_tools = actions_to_tool_names(ENABLED_ACTION_TYPES) system_prompt = load_system_prompt(rules_dir, instructions) @@ -141,7 +137,7 @@ async def run_bug_fix( "Task", *SOURCE_WRITE_TOOLS, *BUGZILLA_READ_TOOLS, - *ENABLED_ACTION_TOOLS, + *enabled_action_tools, *FIREFOX_TOOLS, ], model=model, @@ -150,36 +146,40 @@ async def run_bug_fix( setting_sources=[], ) - # Run one fresh agent context per bug. - exit_code = 0 rules_path = rules_dir.resolve() + if task: + user_prompt = ( + f"Bug to work on: {bug}\n\n" + f"Task: {task}\n\n" + f"The rules in {rules_path} are available if the task " + f"calls for them, but the task above is your primary " + f"directive — it overrides the default triage workflow." + ) + else: + user_prompt = ( + f"Triage bug {bug}.\n\nConsult the relevant rules in {rules_path}." + ) + + result_msg: ResultMessage | None = None with Reporter(verbose=verbose, log_path=log) as reporter: - for i, bug_id in enumerate(selected, 1): - print(f"[bug_fix] bug {i}/{len(selected)}: {bug_id}", file=sys.stderr) - reporter.header(f"bug {bug_id}") - - if task: - user_prompt = ( - f"Bug to work on: {bug_id}\n\n" - f"Task: {task}\n\n" - f"The rules in {rules_path} are available if the task " - f"calls for them, but the task above is your primary " - f"directive — it overrides the default triage workflow." - ) - else: - user_prompt = ( - f"Triage bug {bug_id}.\n\n" - f"Consult the relevant rules in {rules_path}." - ) - - async with ClaudeSDKClient(options=options) as client: - await client.query(user_prompt) - async for msg in client.receive_response(): - reporter.message(msg) - if isinstance(msg, ResultMessage) and msg.is_error: - exit_code = 1 + reporter.header(f"bug {bug}") + async with ClaudeSDKClient(options=options) as client: + await client.query(user_prompt) + async for msg in client.receive_response(): + reporter.message(msg) + if isinstance(msg, ResultMessage): + result_msg = msg + + if result_msg is None: + raise AgentError(f"bug {bug}: agent produced no result message") + if result_msg.is_error: + raise AgentError( + f"bug {bug} triage failed: {result_msg.result or result_msg.subtype}" + ) return BugFixResult( - exit_code=exit_code, - bugs_processed=len(selected), + bug_id=bug, + result=result_msg.result, + num_turns=result_msg.num_turns, + total_cost_usd=result_msg.total_cost_usd, ) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/config.py b/agents/bug-fix/hackbot_agents/bug_fix/config.py index ee5f877ac9..61524f36e2 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/config.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/config.py @@ -1,5 +1,3 @@ -from hackbot_runtime.actions.claude_sdk import actions_to_tool_names - # Tools that can modify the source repo — blocked under dry-run. SOURCE_WRITE_TOOLS = {"Write", "Edit", "MultiEdit", "NotebookEdit"} @@ -13,14 +11,13 @@ ] -ENABLED_ACTION_TOOLS = actions_to_tool_names( - [ - "bugzilla.update_bug", - "bugzilla.add_comment", - "bugzilla.add_attachment", - "bugzilla.create_bug", - ] -) +# Recordable action types the agent may take, by dotted id. +ENABLED_ACTION_TYPES = [ + "bugzilla.update_bug", + "bugzilla.add_comment", + "bugzilla.add_attachment", + "bugzilla.create_bug", +] # Firefox build/test tools. FIREFOX_TOOLS = [ diff --git a/libs/hackbot-runtime/hackbot_runtime/__init__.py b/libs/hackbot-runtime/hackbot_runtime/__init__.py index 6335ac369c..277d2084f0 100644 --- a/libs/hackbot-runtime/hackbot_runtime/__init__.py +++ b/libs/hackbot-runtime/hackbot_runtime/__init__.py @@ -2,6 +2,7 @@ from hackbot_runtime.config import HackbotConfig from hackbot_runtime.context import HackbotContext from hackbot_runtime.errors import AgentError +from hackbot_runtime.results import HackbotAgentResult from hackbot_runtime.runtime import run, run_async from hackbot_runtime.source import ensure_source_repo from hackbot_runtime.uploader import SignedPolicyUploader @@ -9,6 +10,7 @@ __all__ = [ "ActionsRecorder", "AgentError", + "HackbotAgentResult", "HackbotConfig", "HackbotContext", "SignedPolicyUploader", diff --git a/libs/hackbot-runtime/hackbot_runtime/results.py b/libs/hackbot-runtime/hackbot_runtime/results.py new file mode 100644 index 0000000000..a4b3832bd8 --- /dev/null +++ b/libs/hackbot-runtime/hackbot_runtime/results.py @@ -0,0 +1,13 @@ +"""Base result model for hackbot agents. + +An agent's ``main()`` may return a subclass of :class:`HackbotAgentResult`; the +runtime serializes it into ``summary.json``'s ``findings``. Framework-neutral — +plain pydantic, no claude-agent-sdk dependency. +""" + +from pydantic import BaseModel + + +class HackbotAgentResult(BaseModel): + num_turns: int + total_cost_usd: float | None = None diff --git a/libs/hackbot-runtime/hackbot_runtime/runtime.py b/libs/hackbot-runtime/hackbot_runtime/runtime.py index ef67c65146..7cecdbbd89 100644 --- a/libs/hackbot-runtime/hackbot_runtime/runtime.py +++ b/libs/hackbot-runtime/hackbot_runtime/runtime.py @@ -5,19 +5,20 @@ import traceback from collections.abc import Awaitable, Callable from pathlib import Path -from typing import Any, NoReturn +from typing import NoReturn from pydantic import ValidationError from hackbot_runtime.config import HackbotConfig, load_config from hackbot_runtime.context import HackbotContext +from hackbot_runtime.results import HackbotAgentResult log = logging.getLogger("hackbot_runtime") -# An agent's main() returns its findings (a JSON-able dict) on success, or None -# if it has nothing to report; to fail the run it raises (AgentError, or any -# exception). The runtime turns that outcome into summary.json + an exit code. -Findings = dict[str, Any] | None +# An agent's main() returns a HackbotAgentResult on success; to fail the run it +# raises (AgentError, or any exception). The runtime turns that outcome into +# summary.json + an exit code. +Findings = HackbotAgentResult AgentMain = Callable[[HackbotContext], Findings] AsyncAgentMain = Callable[[HackbotContext], Awaitable[Findings]] @@ -115,8 +116,8 @@ def _load_hackbot(entrypoint: Callable, config: ConfigArg) -> HackbotContext | N def _finish(ctx: HackbotContext, outcome: object) -> int: """Write summary.json from the agent's outcome and return the exit code. - ``outcome`` is the agent's return value (findings dict or None) on success, - or the exception it raised on failure. + ``outcome`` is the agent's :class:`HackbotAgentResult` on success, or the + exception it raised on failure. """ if isinstance(outcome, BaseException): payload = _error_payload( @@ -125,14 +126,12 @@ def _finish(ctx: HackbotContext, outcome: object) -> int: traceback_str=traceback.format_exc(), ) exit_code = 1 - elif outcome is None or isinstance(outcome, dict): - payload = _ok_payload(ctx, outcome or {}) + elif isinstance(outcome, HackbotAgentResult): + payload = _ok_payload(ctx, outcome.model_dump()) exit_code = 0 else: - # Contract violation: not a findings dict, None, or an exception. - msg = ( - f"Agent returned {type(outcome).__name__}; expected a findings dict or None" - ) + # Contract violation: not a HackbotAgentResult or an exception. + msg = f"Agent returned {type(outcome).__name__}; expected a HackbotAgentResult" log.error(msg) payload = _error_payload(ctx, msg) exit_code = 1 diff --git a/libs/hackbot-runtime/tests/test_runtime.py b/libs/hackbot-runtime/tests/test_runtime.py index ce0f69ee7b..d85f0296b1 100644 --- a/libs/hackbot-runtime/tests/test_runtime.py +++ b/libs/hackbot-runtime/tests/test_runtime.py @@ -3,7 +3,7 @@ import json import pytest -from hackbot_runtime import AgentError, HackbotContext, run_async +from hackbot_runtime import AgentError, HackbotAgentResult, HackbotContext, run_async from hackbot_runtime.runtime import _discover_config_path, _finish, _resolve_config @@ -32,7 +32,7 @@ def test_summary_written_locally_without_uploader(tmp_path): reasoning="rule X", ) - code = _finish(ctx, {"bugs_processed": 1}) + code = _finish(ctx, HackbotAgentResult(num_turns=1)) assert code == 0 # Written under the per-run subdir: artifacts_dir / run_id. @@ -40,7 +40,7 @@ def test_summary_written_locally_without_uploader(tmp_path): (tmp_path / "artifacts" / "local-test" / "summary.json").read_text() ) assert summary["status"] == "ok" - assert summary["findings"] == {"bugs_processed": 1} + assert summary["findings"] == {"num_turns": 1, "total_cost_usd": None} assert summary["actions"][0]["type"] == "bugzilla.update_bug" @@ -56,22 +56,42 @@ def test_summary_written_for_exception(tmp_path): assert "boom" in summary["error"] -def test_non_dict_return_is_contract_error(tmp_path): +def test_non_result_return_is_contract_error(tmp_path): ctx = _ctx(tmp_path) - code = _finish(ctx, "not a dict") + # A bare dict (or None) is no longer accepted — only a HackbotAgentResult. + code = _finish(ctx, {"bugs_processed": 1}) assert code == 1 summary = json.loads( (tmp_path / "artifacts" / "local-test" / "summary.json").read_text() ) assert summary["status"] == "error" - assert "expected a findings dict" in summary["error"] + assert "expected a HackbotAgentResult" in summary["error"] + + +def test_summary_written_for_agent_result(tmp_path): + class _Result(HackbotAgentResult): + bug_id: int + + ctx = _ctx(tmp_path) + code = _finish(ctx, _Result(bug_id=42, num_turns=3, total_cost_usd=0.12)) + + assert code == 0 + summary = json.loads( + (tmp_path / "artifacts" / "local-test" / "summary.json").read_text() + ) + assert summary["status"] == "ok" + assert summary["findings"] == { + "num_turns": 3, + "total_cost_usd": 0.12, + "bug_id": 42, + } def test_runs_are_namespaced_by_run_id(tmp_path): ctx_a = _ctx(tmp_path, run_id="run-a") ctx_b = _ctx(tmp_path, run_id="run-b") - _finish(ctx_a, None) + _finish(ctx_a, HackbotAgentResult(num_turns=0)) _finish(ctx_b, RuntimeError("x")) base = tmp_path / "artifacts" @@ -141,7 +161,7 @@ def test_run_async_exits_zero_and_writes_summary(tmp_path, monkeypatch): _run_env(tmp_path, monkeypatch) async def main(ctx): - return {"did": "work"} + return HackbotAgentResult(num_turns=1) with pytest.raises(SystemExit) as exc: run_async(main) @@ -149,7 +169,7 @@ async def main(ctx): assert exc.value.code == 0 summary = json.loads((tmp_path / "t" / "summary.json").read_text()) assert summary["status"] == "ok" - assert summary["findings"] == {"did": "work"} + assert summary["findings"] == {"num_turns": 1, "total_cost_usd": None} def test_run_async_exits_nonzero_when_agent_raises(tmp_path, monkeypatch): From 5a08a060f243d72d25bff4cf161dd62aedf99734 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Thu, 11 Jun 2026 13:00:19 -0400 Subject: [PATCH 19/21] Rewrite agents/README.md as a friendlier onboarding guide --- agents/README.md | 175 +++++++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 89 deletions(-) diff --git a/agents/README.md b/agents/README.md index 9483b2b97f..6ac204478e 100644 --- a/agents/README.md +++ b/agents/README.md @@ -1,85 +1,89 @@ -# hackbot agents +# Hackbot Agents -Each subdirectory here is **one self-contained hackbot agent** — its logic, -entrypoint, and deployment live together. `bug-fix/` is the reference example. +Each subdirectory is a single, self-contained agent — its logic, its entrypoint, +and its deployment all live together, so you can understand one agent without +hunting around the repo. -## Anatomy of an agent (`agents//`) +New here? The best way to start is to read through **`bug-fix/`** — it's our reference +agent, and the fastest path to your own is to copy it and adapt. + +## How an agent works (the important part) + +When the platform runs your agent, it calls `python -m hackbot_agents.`, which runs +your `__main__.py`. Your job is to fill in three small pieces: + +```python +class AgentInputs(BaseSettings): # per-run inputs, read from env (bug_id -> BUG_ID) + bug_id: int + +async def main(ctx: HackbotContext) -> BugFixResult: + inputs = AgentInputs() + return await run_bug_fix(bug=inputs.bug_id, ...) # your real logic lives in agent.py + +run_async(main) # finds hackbot.toml, runs main, exits the process +``` + +Three rules are worth remembering: + +- **To report success,** return a `HackbotAgentResult` (subclass it with your own fields). + The runtime saves it to `summary.json` under `findings`. +- **To report failure,** just raise. Use `AgentError("…")` when it's an expected, explainable + failure; let any other exception bubble up for an unexpected crash. +- **`ctx` is your window to the platform** — everything it prepared for you hangs off it: + `ctx.source_repo`, `ctx.firefox`, `ctx.anthropic.api_key`, `ctx.actions`, + `ctx.publish_file`, `ctx.publish_json`. You never wire these up yourself. + +## What's in an agent folder (`agents//`) ``` agents// - pyproject.toml # dist "hackbot-agent-"; packages = ["hackbot_agents"]; deps: hackbot-runtime[claude-sdk] + agent-specific - hackbot.toml # declares platform capabilities: [source], [firefox] - Dockerfile # multi-stage: builder / agent [/ broker] - compose.yml # local run; sets static env (e.g. the broker URL) - hackbot_agents/ # shared PEP 420 namespace — NO __init__.py here - / # the agent package (e.g. bug_fix) - __init__.py # empty package marker - agent.py # run_() logic + helpers (the reusable agent library) - __main__.py # entrypoint: AgentInputs(BaseSettings) + async def main(ctx) -> dict + run_async(main) - prompts/ rules/ # assets read via Path(__file__).parent - broker/ # OPTIONAL: secret-holding MCP sidecar (python -m hackbot_agents..broker) + pyproject.toml # the distribution "hackbot-agent-" and its dependencies + hackbot.toml # what you need the platform to prepare: [source], [firefox] + Dockerfile # how it ships + compose.yml # how to run it locally + hackbot_agents/ # a shared namespace package — please don't add __init__.py here! + / # your agent's package (e.g. bug_fix) + __init__.py # empty + __main__.py # AgentInputs + main(ctx) + run_async(main) + agent.py # entrypoint: your prompts, logic, and HackbotAgentResult subclass ``` -## `hackbot.toml` — what the platform provides +One thing to watch: **never create `hackbot_agents/__init__.py`.** Leaving it out is what +lets several agents live side by side in one environment without overwriting each other (PEP 420). +It's an easy mistake to make, and a confusing one to debug. -Declare the capabilities your agent needs in a `hackbot.toml` at the agent root -(alongside `pyproject.toml` / `Dockerfile`); the runtime prepares them and hands -you a single `HackbotContext`. Every table is optional — omit `[source]` if you -don't operate on a repo, omit `[firefox]` if you don't need a Firefox build. +## Telling the platform what you need (`hackbot.toml`) + +Think of `hackbot.toml` as your request to the platform: "please have these ready for me." +Everything is optional — only list what you actually use. ```toml -[source] # the runtime clones/refreshes this for you +[source] # the platform shallow-clones and refreshes this for you repo_url = "https://github.com/mozilla-firefox/firefox.git" -checkout_path = "/workspace/firefox" # default; env SOURCE_REPO overrides [firefox] # Firefox build paths, derived from the checkout enabled = true objdir = "objdir-ff-asan" ``` -Agent identity (name/description) stays in `pyproject.toml`; model defaults and -tool allowlists stay in code; secrets and per-run inputs stay in the -environment. The toml holds only platform-capability declarations. - -Every agent ships its package under the shared **`hackbot_agents` PEP 420 namespace** -(`hackbot_agents.`), so multiple agents installed into one environment never -collide. **Never add `hackbot_agents/__init__.py`** — the missing namespace-level -`__init__.py` is what lets the agent distributions merge instead of clobbering each other. - -The runtime invokes the agent with `python -m hackbot_agents.`, running -`hackbot_agents//__main__.py`. That module is the thin deployment wrapper: -it defines `AgentInputs(BaseSettings)`, an `async def main(ctx)`, and calls -`run_async(main)`. `run_async` auto-discovers `hackbot.toml` (cwd first — the -Dockerfile copies it into `/app` — then walks up from the entry module to the -agent root in an editable checkout) and exits the process with the run's status. -`main` validates inputs and calls the `run_()` logic in `agent.py`, -reading everything the platform provides off `ctx` (`ctx.source_repo`, -`ctx.firefox`, `ctx.anthropic`, `ctx.actions`, `ctx.publish_file`). - -## Shared building blocks (in `hackbot-runtime`) - -Don't re-implement these — import them: - -- `from hackbot_runtime import HackbotContext, AgentError, run_async` — the entrypoint - contract. `main(ctx)` **returns a findings dict** (or `None`) on success, and **raises** - to fail — `AgentError("…")` for an expected failure, any exception for a crash. The - runtime turns that into `summary.json` (`status`/`error`/`findings`) and the process - exit code; `run_async(main)` exits the process itself, so the entrypoint is just that - one call. `HackbotContext` is the one object `main()` receives; it answers for the - platform: `ctx.source_repo` (prepared from `[source]` on first access), `ctx.firefox` - (a `FirefoxContext` from `[firefox]`), `ctx.anthropic.api_key` (validated), plus the - results/artifacts/actions plumbing (`ctx.actions`, `ctx.publish_file`, - `ctx.publish_json`). -- `from hackbot_runtime import ensure_source_repo` — the lower-level shallow-clone/refresh - primitive (you normally don't call this directly; `ctx.source_repo` does it for you). -- `from hackbot_runtime.claude import Reporter` — renders streamed claude-agent-sdk - messages to stdout/log. Call `reporter.header("...")` per work item, `reporter.message(msg)` per message. -- `from hackbot_runtime.actions.claude_sdk import actions_server_for` — returns - `(recorder, mcp_server)`; write actions land in `summary.json` instead of mutating anything. - -Reusable MCP **tool servers** live in the separate `agent-tools` package, each behind its -own optional extra (`agent-tools[bugzilla]`, `agent-tools[firefox]`). Import the domain -module and build the server via the adapter: +Everything else has a natural home: your agent's name and description go in `pyproject.toml`, +model and tool choices stay in code, and secrets and per-run inputs come from the environment. + +## Building blocks you can reuse + +Please reach for these instead of rolling your own — they're shared on purpose. + +From **`hackbot-runtime`**: + +- `HackbotContext, AgentError, HackbotAgentResult, run_async` — the pieces from the contract above. +- `from hackbot_runtime.claude import Reporter` — pretty-prints the agent's streamed messages + to stdout and your log (call `reporter.header(...)` per work item, `reporter.message(msg)` per message). +- `from hackbot_runtime.actions.claude_sdk import actions_server_for` — gives you + `(recorder, mcp_server)` so write-actions get recorded into `summary.json` rather than + silently mutating the world. + +Your actual **tools** (the things the model can call) come from **`agent-tools`**, each behind +its own extra (`[bugzilla]`, `[firefox]`): ```python from agent_tools import bugzilla @@ -87,27 +91,20 @@ from agent_tools.claude_sdk import build_sdk_server server = build_sdk_server("bugzilla", BugzillaContext(client=...), bugzilla.TOOLS) ``` -You still assemble your own `ClaudeAgentOptions` and drive the `ClaudeSDKClient` loop — -those stay explicit and in your hands. - -## Adding a new agent - -1. `agents//hackbot.toml` — declare `[source]`/`[firefox]` if you need them - (omit either otherwise). -2. `agents//hackbot_agents//__main__.py` — define `AgentInputs(BaseSettings)` - (domain inputs only), an `async def main(ctx: HackbotContext) -> dict` that returns - findings on success and raises `AgentError` to fail, and end with `run_async(main)` (it - discovers `hackbot.toml` — cwd, then up to the agent root — and exits the process itself). -3. `agents//hackbot_agents//agent.py` — your prompts/logic, exposing the - `run_()` entrypoint `main` calls (leave `/__init__.py` empty). Do **not** - create `agents//hackbot_agents/__init__.py`. -4. Copy `pyproject.toml`, `Dockerfile`, `compose.yml` from `bug-fix/` and rename (the - Dockerfile CMDs become `python -m hackbot_agents.` / `… .broker`, and it copy - `agents//hackbot.toml` into `/app`). -5. In `services/hackbot-api/app/schemas.py`, add a Pydantic input model. -6. In `services/hackbot-api/app/agents.py`, add one `AGENT_REGISTRY` entry - (`name` + `description` + `job_name` + `input_schema`). **No `build_env`** — - env vars are derived from the schema by `model_to_env` (field `bug_id` → `BUG_ID`). - Put deploy-time constants (broker URLs, etc.) in the Job's static env config, not the schema. - -That's it: one folder + one schema + one registry line. +From there, you assemble your own `ClaudeAgentOptions` and drive the `ClaudeSDKClient` loop — +that part stays in your hands, where you want it. + +## Creating your own agent + +1. **Copy `bug-fix/`** as your starting point. Rename the folder, the distribution name in + `pyproject.toml`, and the commands in `Dockerfile`/`compose.yml` (`python -m hackbot_agents.`). +2. **Trim `hackbot.toml`** to just the `[source]`/`[firefox]` tables you need. +3. **Write your two modules:** `__main__.py` (`AgentInputs` + `main`) and `agent.py` (your logic + plus a `HackbotAgentResult` subclass). Keep `/__init__.py` empty. +4. **Register it** in `services/hackbot-api/`: add a Pydantic input model in `app/schemas.py`, + and a single `AGENT_REGISTRY` entry in `app/agents.py` (`name`/`description`/`job_name`/ + `input_schema`). Env vars are derived from your schema automatically (`bug_id` → `BUG_ID`), + so there's no `build_env` to write — put deploy-time constants like broker URLs in the Job's + static env instead. + +And that's the whole recipe: one folder, one schema, one registry line. Welcome aboard! From 2cff913c76999fe108b6de93221fbf76097dc3e8 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Thu, 11 Jun 2026 13:58:09 -0400 Subject: [PATCH 20/21] Remove Anthropic credentials precheck --- agents/bug-fix/hackbot_agents/bug_fix/__main__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py index c4045d755f..20f2136344 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py @@ -19,8 +19,6 @@ class AgentInputs(BaseSettings): async def main(ctx: HackbotContext) -> BugFixResult: inputs = AgentInputs() - # Fail fast if the platform did not provide Anthropic credentials. - ctx.anthropic.api_key log_path = Path(tempfile.mkdtemp(prefix="bug-fix-log-")) / "agent.log" From 5e994403aaac4c3437e21150f861b3f29376bda6 Mon Sep 17 00:00:00 2001 From: Suhaib Mujahid Date: Thu, 11 Jun 2026 14:21:53 -0400 Subject: [PATCH 21/21] Move agent log path and upload to hackbot-runtime --- .../hackbot_agents/bug_fix/__main__.py | 44 +++++++------------ .../hackbot_runtime/context.py | 11 +++++ .../hackbot_runtime/runtime.py | 12 +++++ libs/hackbot-runtime/tests/test_runtime.py | 18 ++++++++ 4 files changed, 57 insertions(+), 28 deletions(-) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py index 20f2136344..c1423e3d67 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/__main__.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/__main__.py @@ -1,6 +1,3 @@ -import tempfile -from pathlib import Path - from hackbot_runtime import HackbotContext, run_async from pydantic_settings import BaseSettings, SettingsConfigDict @@ -20,31 +17,22 @@ class AgentInputs(BaseSettings): async def main(ctx: HackbotContext) -> BugFixResult: inputs = AgentInputs() - log_path = Path(tempfile.mkdtemp(prefix="bug-fix-log-")) / "agent.log" - - try: - result = await run_bug_fix( - task="Triage and fix the bug, and verify the fix", - bugzilla_mcp_server={ - "type": "http", - "url": inputs.bugzilla_mcp_url, - }, - source_repo=ctx.source_repo, - fx_ctx=ctx.firefox, - bug=inputs.bug_id, - model=inputs.model, - max_turns=inputs.max_turns, - effort=inputs.effort, - log=log_path, - verbose=True, - actions_recorder=ctx.actions, - ) - finally: - if log_path.exists(): - # Uploaded when a signed policy is set, else copied into ./artifacts. - ctx.publish_file("logs/agent.log", log_path, "text/plain") - - return result + return await run_bug_fix( + task="Triage and fix the bug, and verify the fix", + bugzilla_mcp_server={ + "type": "http", + "url": inputs.bugzilla_mcp_url, + }, + source_repo=ctx.source_repo, + fx_ctx=ctx.firefox, + bug=inputs.bug_id, + model=inputs.model, + max_turns=inputs.max_turns, + effort=inputs.effort, + log=ctx.log_path, + verbose=True, + actions_recorder=ctx.actions, + ) if __name__ == "__main__": diff --git a/libs/hackbot-runtime/hackbot_runtime/context.py b/libs/hackbot-runtime/hackbot_runtime/context.py index 5aa364ca8b..6f71c664e1 100644 --- a/libs/hackbot-runtime/hackbot_runtime/context.py +++ b/libs/hackbot-runtime/hackbot_runtime/context.py @@ -14,6 +14,7 @@ import datetime import os +import tempfile import uuid from functools import cached_property from pathlib import Path @@ -147,6 +148,16 @@ def run_artifacts_dir(self) -> Path: """Per-run local artifacts directory: ``artifacts_dir / run_id``.""" return self.artifacts_dir / self.run_id + @cached_property + def log_path(self) -> Path: + """A writable path for the agent's run log; published by the runtime. + + The parent dir is created on first access (so a ``Reporter`` can open the + file straight away). Agents that never write a log just leave it absent, + and :meth:`publish_log` becomes a no-op. + """ + return Path(tempfile.mkdtemp(prefix=f"hackbot-{self.run_id}-")) / "agent.log" + @cached_property def actions(self) -> ActionsRecorder: return ActionsRecorder(self.uploader, artifacts_dir=self.run_artifacts_dir) diff --git a/libs/hackbot-runtime/hackbot_runtime/runtime.py b/libs/hackbot-runtime/hackbot_runtime/runtime.py index 7cecdbbd89..d60234aac4 100644 --- a/libs/hackbot-runtime/hackbot_runtime/runtime.py +++ b/libs/hackbot-runtime/hackbot_runtime/runtime.py @@ -29,6 +29,7 @@ _CONFIG_NAME = "hackbot.toml" _SUMMARY_NAME = "summary.json" +_AGENT_LOG_KEY = "logs/agent.log" def _configure_logging() -> None: @@ -113,6 +114,12 @@ def _load_hackbot(entrypoint: Callable, config: ConfigArg) -> HackbotContext | N return None +def _publish_log(ctx: HackbotContext) -> None: + """Publish the run log under the canonical key, if the agent wrote one.""" + if ctx.log_path.exists(): + ctx.publish_file(_AGENT_LOG_KEY, ctx.log_path, "text/plain") + + def _finish(ctx: HackbotContext, outcome: object) -> int: """Write summary.json from the agent's outcome and return the exit code. @@ -136,6 +143,11 @@ def _finish(ctx: HackbotContext, outcome: object) -> int: payload = _error_payload(ctx, msg) exit_code = 1 + try: + _publish_log(ctx) + except Exception: + log.exception("Failed to publish agent log") + # Upload when a signed policy is configured, else write into the local # artifacts dir (so local/compose/direct runs leave it on the host). try: diff --git a/libs/hackbot-runtime/tests/test_runtime.py b/libs/hackbot-runtime/tests/test_runtime.py index d85f0296b1..3469283b42 100644 --- a/libs/hackbot-runtime/tests/test_runtime.py +++ b/libs/hackbot-runtime/tests/test_runtime.py @@ -88,6 +88,24 @@ class _Result(HackbotAgentResult): } +def test_finish_publishes_agent_log_when_written(tmp_path): + ctx = _ctx(tmp_path) + ctx.log_path.write_text("hello from the agent\n") + + _finish(ctx, HackbotAgentResult(num_turns=1)) + + published = tmp_path / "artifacts" / "local-test" / "logs" / "agent.log" + assert published.read_text() == "hello from the agent\n" + + +def test_finish_skips_log_when_none_written(tmp_path): + ctx = _ctx(tmp_path) # never touch ctx.log_path -> no file written + + _finish(ctx, HackbotAgentResult(num_turns=1)) + + assert not (tmp_path / "artifacts" / "local-test" / "logs" / "agent.log").exists() + + def test_runs_are_namespaced_by_run_id(tmp_path): ctx_a = _ctx(tmp_path, run_id="run-a") ctx_b = _ctx(tmp_path, run_id="run-b")