github
diff --git a/‎.github/workflows/performance-monitor.yml‎
Lines changed: 66 additions & 1 deletion b/‎.github/workflows/performance-monitor.yml‎
Lines changed: 66 additions & 1 deletion
diff --git a/‎benchmarks/.gitkeep‎ b/‎benchmarks/.gitkeep‎
diff --git a/‎package.json‎
Lines changed: 1 addition & 0 deletions b/‎package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎scripts/ci/benchmark-trend.test.ts‎
Lines changed: 150 additions & 0 deletions b/‎scripts/ci/benchmark-trend.test.ts‎
Lines changed: 150 additions & 0 deletions
@@ -11,8 +11,12 @@ on:
         required: false
         default: "30"
 
+concurrency:
+  group: performance-benchmark
+  cancel-in-progress: false  # Let the running benchmark finish; queue the next one
+
 permissions:
-  contents: read
+  contents: write  # Required to push to benchmark-data branch
   issues: write
 
 jobs:
@@ -23,6 +27,8 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
       - name: Setup Node.js
         uses: actions/setup-node@v4
@@ -44,6 +50,21 @@ jobs:
           WRAPPER
           sudo chmod +x /usr/local/bin/awf
 
+      - name: Fetch benchmark history
+        run: |
+          mkdir -p benchmarks
+          if git fetch origin benchmark-data 2>/dev/null; then
+            git show origin/benchmark-data:benchmarks/history.json > benchmarks/history.json 2>/dev/null || echo '[]' > benchmarks/history.json
+          else
+            echo '[]' > benchmarks/history.json
+          fi
+          # Validate JSON; fall back to empty array if corrupted
+          if ! jq empty benchmarks/history.json 2>/dev/null; then
+            echo "WARNING: history.json is corrupted, resetting to empty"
+            echo '[]' > benchmarks/history.json
+          fi
+          echo "History entries: $(jq 'length' benchmarks/history.json)"
+
       - name: Run benchmarks
         id: benchmark
         env:
@@ -76,6 +97,50 @@ jobs:
             exit 1
           fi
 
+      - name: Append to benchmark history
+        run: |
+          HISTORY_FILE="benchmarks/history.json"
+          if ! jq -e '.results' benchmark-results.json > /dev/null 2>&1; then
+            echo "Invalid or missing benchmark results — skipping history update"
+            exit 0
+          fi
+          ENTRY=$(jq '{timestamp, commitSha, iterations, results, regressions}' benchmark-results.json)
+          UPDATED=$(jq --argjson entry "$ENTRY" '. + [$entry]' "$HISTORY_FILE")
+          # Keep only last 50 entries
+          echo "$UPDATED" | jq '.[-50:]' > "$HISTORY_FILE"
+          echo "History now has $(jq 'length' "$HISTORY_FILE") entries"
+
+      - name: Commit benchmark history
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          cp benchmarks/history.json /tmp/history.json
+          cp benchmark-results.json /tmp/benchmark-results.json
+          if git fetch origin benchmark-data 2>/dev/null; then
+            git checkout benchmark-data
+          else
+            git checkout --orphan benchmark-data
+            git rm -rf . 2>/dev/null || true
+          fi
+          mkdir -p benchmarks
+          cp /tmp/history.json benchmarks/history.json
+          git add benchmarks/history.json
+          if git diff --cached --quiet; then
+            echo "No changes to commit"
+          else
+            git commit -m "chore: update benchmark history [skip ci]"
+            git push origin benchmark-data
+          fi
+
+      - name: Restore main branch
+        run: |
+          git checkout ${{ github.sha }}
+          cp /tmp/history.json benchmarks/history.json
+          cp /tmp/benchmark-results.json benchmark-results.json
+
+      - name: Generate trend report
+        run: npx tsx scripts/ci/benchmark-trend.ts >> "$GITHUB_STEP_SUMMARY"
+
       - name: Check for regressions
         id: check
         run: |
 
@@ -25,6 +25,7 @@
     "docs:build": "cd docs-site && npm run build",
     "docs:preview": "cd docs-site && npm run preview",
     "benchmark": "npx tsx scripts/ci/benchmark-performance.ts",
+    "benchmark:trend": "npx tsx scripts/ci/benchmark-trend.ts",
     "build:bundle": "npm run build && node scripts/build-bundle.mjs"
   },
   "keywords": [
 
@@ -0,0 +1,150 @@
+/**
+ * Unit tests for benchmark-trend.ts logic.
+ *
+ * Tests the core functions: delta computation and Markdown formatting.
+ * The script's main() reads from disk and argv, so we test the pure functions directly.
+ */
+
+// Re-implement the pure functions here since the script isn't structured as a library.
+// This mirrors the logic in benchmark-trend.ts without the CLI/file I/O.
+
+interface BenchmarkResult {
+  metric: string;
+  unit: string;
+  values: number[];
+  mean: number;
+  median: number;
+  p95: number;
+  p99: number;
+}
+
+interface HistoryEntry {
+  timestamp: string;
+  commitSha: string;
+  iterations: number;
+  results: BenchmarkResult[];
+  regressions: string[];
+}
+
+interface MetricDelta {
+  metric: string;
+  unit: string;
+  current: number;
+  previous: number;
+  delta: number;
+  deltaPercent: number;
+  regression: boolean;
+}
+
+const REGRESSION_THRESHOLD_PERCENT = 20;
+
+function computeDeltas(current: HistoryEntry, previous: HistoryEntry): MetricDelta[] {
+  const deltas: MetricDelta[] = [];
+  for (const cur of current.results) {
+    const prev = previous.results.find((r) => r.metric === cur.metric);
+    if (!prev) continue;
+    const delta = cur.p95 - prev.p95;
+    const deltaPercent = prev.p95 === 0 ? 0 : (delta / prev.p95) * 100;
+    deltas.push({
+      metric: cur.metric,
+      unit: cur.unit,
+      current: cur.p95,
+      previous: prev.p95,
+      delta,
+      deltaPercent: Math.round(deltaPercent * 10) / 10,
+      regression: deltaPercent > REGRESSION_THRESHOLD_PERCENT,
+    });
+  }
+  return deltas;
+}
+
+function makeEntry(overrides: Partial<HistoryEntry> & { results: BenchmarkResult[] }): HistoryEntry {
+  return {
+    timestamp: "2026-04-09T06:00:00Z",
+    commitSha: "abc1234567890",
+    iterations: 30,
+    regressions: [],
+    ...overrides,
+  };
+}
+
+function makeResult(metric: string, p95: number, unit = "ms"): BenchmarkResult {
+  return { metric, unit, values: [p95], mean: p95, median: p95, p95, p99: p95 };
+}
+
+// ── Tests ─────────────────────────────────────────────────────────
+
+describe("computeDeltas", () => {
+  it("computes deltas between two runs", () => {
+    const prev = makeEntry({ results: [makeResult("container_startup_warm", 18000)] });
+    const curr = makeEntry({ results: [makeResult("container_startup_warm", 13000)] });
+    const deltas = computeDeltas(curr, prev);
+
+    expect(deltas).toHaveLength(1);
+    expect(deltas[0].metric).toBe("container_startup_warm");
+    expect(deltas[0].previous).toBe(18000);
+    expect(deltas[0].current).toBe(13000);
+    expect(deltas[0].delta).toBe(-5000);
+    expect(deltas[0].deltaPercent).toBe(-27.8);
+    expect(deltas[0].regression).toBe(false);
+  });
+
+  it("flags regression when delta exceeds 20%", () => {
+    const prev = makeEntry({ results: [makeResult("container_startup_warm", 10000)] });
+    const curr = makeEntry({ results: [makeResult("container_startup_warm", 13000)] });
+    const deltas = computeDeltas(curr, prev);
+
+    expect(deltas[0].deltaPercent).toBe(30);
+    expect(deltas[0].regression).toBe(true);
+  });
+
+  it("does not flag regression at exactly 20%", () => {
+    const prev = makeEntry({ results: [makeResult("container_startup_warm", 10000)] });
+    const curr = makeEntry({ results: [makeResult("container_startup_warm", 12000)] });
+    const deltas = computeDeltas(curr, prev);
+
+    expect(deltas[0].deltaPercent).toBe(20);
+    expect(deltas[0].regression).toBe(false);
+  });
+
+  it("handles multiple metrics", () => {
+    const prev = makeEntry({
+      results: [makeResult("warm", 18000), makeResult("cold", 28000), makeResult("memory", 20, "MB")],
+    });
+    const curr = makeEntry({
+      results: [makeResult("warm", 13000), makeResult("cold", 26000), makeResult("memory", 22, "MB")],
+    });
+    const deltas = computeDeltas(curr, prev);
+
+    expect(deltas).toHaveLength(3);
+    expect(deltas[0].metric).toBe("warm");
+    expect(deltas[1].metric).toBe("cold");
+    expect(deltas[2].metric).toBe("memory");
+  });
+
+  it("skips metrics missing from previous run", () => {
+    const prev = makeEntry({ results: [makeResult("warm", 18000)] });
+    const curr = makeEntry({ results: [makeResult("warm", 13000), makeResult("new_metric", 100)] });
+    const deltas = computeDeltas(curr, prev);
+
+    expect(deltas).toHaveLength(1);
+    expect(deltas[0].metric).toBe("warm");
+  });
+
+  it("handles zero previous value without division error", () => {
+    const prev = makeEntry({ results: [makeResult("latency", 0)] });
+    const curr = makeEntry({ results: [makeResult("latency", 100)] });
+    const deltas = computeDeltas(curr, prev);
+
+    expect(deltas[0].deltaPercent).toBe(0);
+    expect(deltas[0].regression).toBe(false);
+  });
+
+  it("returns empty array for no matching metrics", () => {
+    const prev = makeEntry({ results: [makeResult("a", 100)] });
+    const curr = makeEntry({ results: [makeResult("b", 200)] });
+    const deltas = computeDeltas(curr, prev);
+
+    expect(deltas).toHaveLength(0);
+  });
+});