feat(web): multi-phase review agent with per-file parallel LLM calls

Gavin Williams · claude · Gavin Williams · commit f643f862f91c · 2026-04-30T10:15:35.000+01:00
Replace the per-chunk (N LLM calls) architecture with a two-phase approach:

1. A single MR summary pass over all changed files to detect cross-file
   semantic changes (renames, signature changes, removed exports, etc.)
   that individual file reviewers should be aware of.
2. Per-file LLM reviews that batch all hunks for a file into a single call,
   parallelised across files via a concurrency-capped pool
   (MAX_CONCURRENT_FILE_REVIEWS = 5).

This reduces LLM calls from one-per-hunk to one-per-file (plus one summary
call), while giving each file review the full picture via the MR summary
context.

Additional changes:
- Export `validateLogPath` from `invokeDiffReviewLlm` for reuse in the
  summary node
- Add "How it works" section to the review agent docs

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/docs/docs/features/agents/review-agent.mdx b/docs/docs/features/agents/review-agent.mdx
@@ -3,12 +3,24 @@ title: AI Code Review Agent
 sidebarTitle: AI code review agent
 ---
 
-This agent provides codebase-aware reviews for your GitHub PRs and GitLab MRs. For each diff, the agent fetches relevant context from your indexed codebase and feeds it into a configured language model for a detailed review.
+This agent provides codebase-aware reviews for your GitHub PRs and GitLab MRs. When a review is triggered, the agent runs a two-phase LLM pipeline and posts inline comments on the changed files.
 
 The AI Code Review Agent is [fair source](https://github.com/sourcebot-dev/sourcebot/tree/main/packages/web/src/features/agents/review-agent) and packaged in [Sourcebot](https://github.com/sourcebot-dev/sourcebot). To get started, [deploy Sourcebot](/docs/deployment/docker-compose) and follow the configuration instructions below.
 
 ![AI Code Review Agent Example](/images/review_agent_example.png)
 
+# How it works
+
+When a review is triggered, the agent runs the following steps:
+
+1. **MR summary pass.** A single LLM call analyses the full set of changed files to identify cross-file semantic changes, such as renamed functions, changed signatures, removed exports, or behaviour changes with cross-file implications. This summary is passed as additional context into each per-file review.
+
+2. **Per-file reviews.** One LLM call is made per changed file. Each call receives the file's complete diff (all hunks combined), the full file content, the PR title and description, the MR summary from step 1, and any configured context files. Reviews run in parallel, up to five files at a time.
+
+3. **Inline comments.** The agent posts the results as inline review comments on the PR or MR.
+
+If the MR summary pass finds no cross-file concerns, it returns nothing and the per-file reviews proceed without it.
+
 # Language model
 
 The review agent uses whichever language model you have configured in your `config.json`. All providers supported by Sourcebot (OpenAI, Anthropic, AWS Bedrock, Azure OpenAI, and others) work out of the box.
@@ -135,7 +147,7 @@ If you have multiple models configured, set `REVIEW_AGENT_MODEL` to the `display
 
 By default, the agent does not review PRs and MRs automatically. To enable automatic reviews on every new or updated PR/MR, set `REVIEW_AGENT_AUTO_REVIEW_ENABLED` to `true`.
 
-You can also trigger a review manually by commenting `/review` on any PR or MR. To use a different command, set `REVIEW_AGENT_REVIEW_COMMAND` to your preferred value (without the leading slash).
+You can also trigger a review manually by commenting `review` on any PR or MR. To use a different command, set `REVIEW_AGENT_REVIEW_COMMAND` to your preferred value.
 
 # Environment variable reference
 
diff --git a/packages/web/src/features/agents/review-agent/nodes/generateDiffReviewPrompt.ts b/packages/web/src/features/agents/review-agent/nodes/generateDiffReviewPrompt.ts
@@ -4,29 +4,35 @@ import { createLogger } from "@sourcebot/shared";
 
 const logger = createLogger('generate-diff-review-prompt');
 
-export const generateDiffReviewPrompt = async (diff: sourcebot_diff, context: sourcebot_context[], rules: string[]) => {
+export const generateDiffReviewPrompt = async (diffs: sourcebot_diff[], context: sourcebot_context[], rules: string[]) => {
     logger.debug("Executing generate_diff_review_prompt");
-        
-    const prompt = `
-    You are an expert software engineer that excels at reviewing code changes. Given the input, additional context, and rules defined below, review the code changes and provide a detailed review. The review you provide
-    must conform to all of the rules defined below. The output format of your review must conform to the output format defined below.
-
-    # Input
 
-    The input is the old and new code snippets, which represent a single hunk from a git diff. The old code snippet is the code before the changes were made, and the new code snippet is the code after the changes were made. Each code snippet
-    is a sequence of lines each with a line number.
+    const hunksText = diffs.map((diff, i) => `
+    ## Hunk ${i + 1}
 
-    ## Old Code Snippet
+    ### Old Code
 
     \`\`\`
     ${diff.oldSnippet}
     \`\`\`
 
-    ## New Code Snippet
+    ### New Code
 
     \`\`\`
     ${diff.newSnippet}
     \`\`\`
+    `).join('\n');
+
+    const prompt = `
+    You are an expert software engineer that excels at reviewing code changes. Given the input, additional context, and rules defined below, review the code changes and provide a detailed review. The review you provide
+    must conform to all of the rules defined below. The output format of your review must conform to the output format defined below.
+
+    # Input
+
+    The input is the old and new code snippets for one or more hunks from a git diff for a single file. The old code snippet is the code before the changes were made, and the new code snippet is the code after the changes were made. Each code snippet
+    is a sequence of lines each with a line number.
+
+    ${hunksText}
 
     # Additional Context
 
diff --git a/packages/web/src/features/agents/review-agent/nodes/generateMrSummary.ts b/packages/web/src/features/agents/review-agent/nodes/generateMrSummary.ts
@@ -0,0 +1,97 @@
+import { sourcebot_context, sourcebot_pr_payload } from "@/features/agents/review-agent/types";
+import { getAISDKLanguageModelAndOptions, getConfiguredLanguageModels } from "@/features/chat/utils.server";
+import { validateLogPath } from "@/features/agents/review-agent/nodes/invokeDiffReviewLlm";
+import { env } from "@sourcebot/shared";
+import { generateText } from "ai";
+import { createLogger } from "@sourcebot/shared";
+import fs from "fs";
+
+const logger = createLogger('generate-mr-summary');
+
+/**
+ * Makes a single LLM call over the entire MR diff to identify cross-file
+ * semantic changes (renames, signature changes, removed exports, etc.) that
+ * individual per-file reviewers should be aware of. Returns null when there
+ * are no notable cross-file concerns or if the call fails — the per-file
+ * review pipeline always continues regardless.
+ */
+export const generateMrSummary = async (
+    pr_payload: sourcebot_pr_payload,
+    reviewAgentLogPath: string | undefined,
+): Promise<sourcebot_context | null> => {
+    logger.debug("Executing generate_mr_summary");
+
+    const models = await getConfiguredLanguageModels();
+    if (models.length === 0) {
+        logger.warn("No language models configured, skipping MR summary");
+        return null;
+    }
+
+    let selectedModel = models[0];
+    if (env.REVIEW_AGENT_MODEL) {
+        const match = models.find((m) => m.displayName === modelName);
+        if (match) {
+            selectedModel = match;
+        } else {
+            logger.warn(`REVIEW_AGENT_MODEL="${env.REVIEW_AGENT_MODEL}" did not match any configured model displayName. Falling back to the first configured model.`);
+        }
+    }
+
+    const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(selectedModel);
+
+    const diffSummary = pr_payload.file_diffs.map((fileDiff) => {
+        const header = fileDiff.from !== fileDiff.to
+            ? `File: ${fileDiff.to} (renamed from ${fileDiff.from})`
+            : `File: ${fileDiff.to}`;
+        const hunks = fileDiff.diffs.map((d, i) =>
+            `Hunk ${i + 1}:\n--- Old\n${d.oldSnippet}\n+++ New\n${d.newSnippet}`
+        ).join('\n\n');
+        return `${header}\n${hunks}`;
+    }).join('\n\n---\n\n');
+
+    const prompt = `You are reviewing a pull request titled "${pr_payload.title}".
+
+Below are all the changed files and their diffs. Identify and summarise semantic changes that reviewers of individual files should be aware of — such as renamed functions or types, changed signatures or interfaces, removed exports, or behaviour changes with cross-file implications.
+
+If there are no noteworthy cross-file semantic concerns, respond with an empty string.
+
+# Changed Files
+
+${diffSummary}`;
+
+    if (reviewAgentLogPath) {
+        validateLogPath(reviewAgentLogPath);
+        fs.appendFileSync(reviewAgentLogPath, `\n\nMR Summary Prompt:\n${prompt}`);
+    }
+
+    try {
+        const result = await generateText({
+            model,
+            system: "You are a code review assistant. Provide a concise plain-text summary of cross-file semantic changes in a pull request. Respond with an empty string if there are none.",
+            prompt,
+            providerOptions,
+            temperature,
+        });
+
+        const summary = result.text.trim();
+
+        if (reviewAgentLogPath) {
+            validateLogPath(reviewAgentLogPath);
+            fs.appendFileSync(reviewAgentLogPath, `\n\nMR Summary Response:\n${summary}`);
+        }
+        if (!summary) {
+            logger.debug("No cross-file semantic changes detected, skipping summary context");
+            return null;
+        }
+
+        logger.debug("Completed generate_mr_summary");
+        return {
+            type: "pr_summary",
+            description: "A summary of cross-file semantic changes in this pull request",
+            context: summary,
+        };
+    } catch (error) {
+        logger.error("Error generating MR summary, proceeding without it:", error);
+        return null;
+    }
+};
diff --git a/packages/web/src/features/agents/review-agent/nodes/generatePrReview.ts b/packages/web/src/features/agents/review-agent/nodes/generatePrReview.ts
@@ -1,52 +1,101 @@
-import { sourcebot_pr_payload, sourcebot_diff_review, sourcebot_file_diff_review, sourcebot_context } from "@/features/agents/review-agent/types";
+import { sourcebot_pr_payload, sourcebot_file_diff_review, sourcebot_context } from "@/features/agents/review-agent/types";
 import { generateDiffReviewPrompt } from "@/features/agents/review-agent/nodes/generateDiffReviewPrompt";
 import { invokeDiffReviewLlm } from "@/features/agents/review-agent/nodes/invokeDiffReviewLlm";
 import { fetchFileContent } from "@/features/agents/review-agent/nodes/fetchFileContent";
+import { generateMrSummary } from "@/features/agents/review-agent/nodes/generateMrSummary";
 import { createLogger } from "@sourcebot/shared";
 
 const logger = createLogger('generate-pr-review');
 
-export const generatePrReviews = async (reviewAgentLogFileName: string | undefined, pr_payload: sourcebot_pr_payload, rules: string[]): Promise<sourcebot_file_diff_review[]> => {
-    logger.debug("Executing generate_pr_reviews");
+const MAX_CONCURRENT_FILE_REVIEWS = 5;
 
-    const file_diff_reviews: sourcebot_file_diff_review[] = [];
-    for (const file_diff of pr_payload.file_diffs) {
-        const reviews: sourcebot_diff_review[] = [];
+/**
+ * Runs tasks with a bounded concurrency limit, returning results in the same
+ * order as the input array and using the same PromiseSettledResult shape as
+ * Promise.allSettled.
+ */
+async function withConcurrencyLimit<T>(
+    tasks: Array<() => Promise<T>>,
+    limit: number,
+): Promise<PromiseSettledResult<T>[]> {
+    const results: PromiseSettledResult<T>[] = new Array(tasks.length);
+    let nextIndex = 0;
 
-        for (const diff of file_diff.diffs) {
+    async function worker() {
+        while (nextIndex < tasks.length) {
+            const index = nextIndex++;
             try {
-                const fileContentContext = await fetchFileContent(pr_payload, file_diff.to);
-                const context: sourcebot_context[] = [
-                    {
-                        type: "pr_title",
-                        description: "The title of the pull request",
-                        context: pr_payload.title,
-                    },
-                    {
-                        type: "pr_description",
-                        description: "The description of the pull request",
-                        context: pr_payload.description,
-                    },
-                    fileContentContext,
-                ];
-
-                const prompt = await generateDiffReviewPrompt(diff, context, rules);
-                
-                const diffReview = await invokeDiffReviewLlm(reviewAgentLogFileName, prompt);
-                reviews.push(...diffReview.reviews);
-            } catch (error) {
-                logger.error(`Error generating review for ${file_diff.to}: ${error}`);
+                results[index] = { status: 'fulfilled', value: await tasks[index]() };
+            } catch (reason) {
+                results[index] = { status: 'rejected', reason };
             }
         }
-        
-        if (reviews.length > 0) {
-            file_diff_reviews.push({
+    }
+
+    await Promise.all(Array.from({ length: Math.min(limit, tasks.length) }, worker));
+    return results;
+}
+
+export const generatePrReviews = async (reviewAgentLogFileName: string | undefined, pr_payload: sourcebot_pr_payload, rules: string[]): Promise<sourcebot_file_diff_review[]> => {
+    logger.debug("Executing generate_pr_reviews");
+
+    // Run MR summary upfront to detect cross-file semantic changes.
+    const mrSummaryResult = await Promise.allSettled([
+        generateMrSummary(pr_payload, reviewAgentLogFileName),
+    ]);
+
+    const mrSummaryContext: sourcebot_context[] = [];
+    if (mrSummaryResult[0].status === 'fulfilled' && mrSummaryResult[0].value !== null) {
+        mrSummaryContext.push(mrSummaryResult[0].value);
+    } else if (mrSummaryResult[0].status === 'rejected') {
+        logger.warn(`MR summary generation failed: ${mrSummaryResult[0].reason}`);
+    }
+
+    // Per-file review — one LLM call per file, parallelised with a concurrency cap.
+    logger.debug(`Reviewing ${pr_payload.file_diffs.length} file(s)`);
+    const fileResults = await withConcurrencyLimit(
+        pr_payload.file_diffs.map((file_diff) => async () => {
+            const fileContentContext = await fetchFileContent(pr_payload, file_diff.to);
+            const context: sourcebot_context[] = [
+                {
+                    type: "pr_title",
+                    description: "The title of the pull request",
+                    context: pr_payload.title,
+                },
+                {
+                    type: "pr_description",
+                    description: "The description of the pull request",
+                    context: pr_payload.description,
+                },
+                fileContentContext,
+                ...mrSummaryContext,
+            ];
+
+            const prompt = await generateDiffReviewPrompt(file_diff.diffs, context, rules);
+            const diffReview = await invokeDiffReviewLlm(reviewAgentLogFileName, prompt);
+
+            if (diffReview.reviews.length === 0) {
+                return null;
+            }
+
+            return {
                 filename: file_diff.to,
-                reviews: reviews,
-            });
+                oldFilename: file_diff.from,
+                reviews: diffReview.reviews,
+            } satisfies sourcebot_file_diff_review;
+        }),
+        MAX_CONCURRENT_FILE_REVIEWS,
+    );
+
+    const file_diff_reviews: sourcebot_file_diff_review[] = [];
+    for (const result of fileResults) {
+        if (result.status === 'rejected') {
+            logger.error(`Error generating review: ${result.reason}`);
+        } else if (result.value !== null) {
+            file_diff_reviews.push(result.value);
         }
     }
 
     logger.debug("Completed generate_pr_reviews");
     return file_diff_reviews;
-}
+}
diff --git a/packages/web/src/features/agents/review-agent/nodes/gitlabMrParser.ts b/packages/web/src/features/agents/review-agent/nodes/gitlabMrParser.ts
@@ -88,7 +88,7 @@ export const gitlabMrParser = async (
         (file): file is sourcebot_file_diff => file !== null,
     );
 
-    logger.debug("Completed gitlab_mr_parser");
+    logger.debug(`Completed gitlab_mr_parser: ${filteredSourcebotFileDiffs.length} file(s) parsed`);
     return {
         title: mr.title,
         description: mr.description ?? "",
diff --git a/packages/web/src/features/agents/review-agent/nodes/invokeDiffReviewLlm.ts b/packages/web/src/features/agents/review-agent/nodes/invokeDiffReviewLlm.ts
@@ -12,7 +12,7 @@ export const getReviewAgentLogDir = (): string => {
     return path.join(env.DATA_CACHE_DIR, 'review-agent');
 };
 
-const validateLogPath = (logPath: string): void => {
+export const validateLogPath = (logPath: string): void => {
     const resolved = path.resolve(logPath);
     const logDir = getReviewAgentLogDir();
     if (!resolved.startsWith(logDir + path.sep)) {