ci(benchmarks): wire v3 ingest dual-write into bench/sql workflows

connortsui20 · connortsui20 · commit e849742d2ed3 · 2026-04-27T23:06:14.000-04:00
Adds --gh-json-v3 plumbing through vx-bench and post-ingest steps
in bench.yml, sql-benchmarks.yml, plus a v3-commit-metadata workflow.
All v3 ingest is gated on vars.V3_INGEST_URL and continue-on-error,
so it's a clean no-op until the deploy track sets the variable.
v2's cat-s3.sh path is unchanged.

Signed-off-by: Connor Tsui &lt;connor.tsui20@gmail.com&gt;
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -92,7 +92,7 @@ jobs:
           VORTEX_EXPERIMENTAL_PATCHED_ARRAY: "1"
           FLAT_LAYOUT_INLINE_ARRAY_NODE: "1"
         run: |
-          bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
+          bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json --gh-json-v3 results.v3.jsonl
 
       - name: Setup AWS CLI
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37  # v6
@@ -105,6 +105,19 @@ jobs:
         run: |
           bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
 
+      - name: Ingest results to v3 server
+        if: vars.V3_INGEST_URL != ''
+        continue-on-error: true
+        shell: bash
+        env:
+          INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
+        run: |
+          python3 scripts/post-ingest.py results.v3.jsonl \
+            --server "${{ vars.V3_INGEST_URL }}" \
+            --commit-sha "${{ github.sha }}" \
+            --benchmark-id "${{ matrix.benchmark.id }}" \
+            --repo-url "${{ github.server_url }}/${{ github.repository }}"
+
       - name: Alert incident.io
         if: failure()
         uses: ./.github/actions/alert-incident-io
diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml
@@ -376,6 +376,7 @@ jobs:
           bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
             --targets-json '${{ steps.targets.outputs.targets_json }}' \
             --output results.json \
+            --gh-json-v3 results.v3.jsonl \
             --no-build \
             --runner "ec2_${{ inputs.machine_type }}" \
             ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
@@ -395,6 +396,7 @@ jobs:
           bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
             --targets-json '${{ steps.targets.outputs.targets_json }}' \
             --output results.json \
+            --gh-json-v3 results.v3.jsonl \
             --no-build \
             --runner "ec2_${{ inputs.machine_type }}" \
             ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
@@ -499,6 +501,19 @@ jobs:
         run: |
           bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
 
+      - name: Ingest results to v3 server
+        if: inputs.mode == 'develop' && vars.V3_INGEST_URL != ''
+        continue-on-error: true
+        shell: bash
+        env:
+          INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
+        run: |
+          python3 scripts/post-ingest.py results.v3.jsonl \
+            --server "${{ vars.V3_INGEST_URL }}" \
+            --commit-sha "${{ github.sha }}" \
+            --benchmark-id "${{ matrix.id }}" \
+            --repo-url "${{ github.server_url }}/${{ github.repository }}"
+
       - name: Upload File Sizes
         if: inputs.mode == 'develop' && matrix.remote_storage == null
         shell: bash
diff --git a/.github/workflows/v3-commit-metadata.yml b/.github/workflows/v3-commit-metadata.yml
@@ -0,0 +1,35 @@
+# Posts a v3 ingest envelope with no records on every push to develop, so the
+# `commits` dim stays populated even when no benchmark ran.
+
+name: v3 commit metadata
+
+on:
+  push:
+    branches: [develop]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  commit-metadata:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 2
+
+      - name: Ingest commit metadata to v3 server
+        if: vars.V3_INGEST_URL != ''
+        continue-on-error: true
+        shell: bash
+        env:
+          INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
+        run: |
+          echo -n > empty.jsonl
+          python3 scripts/post-ingest.py empty.jsonl \
+            --server "${{ vars.V3_INGEST_URL }}" \
+            --commit-sha "${{ github.sha }}" \
+            --benchmark-id "commit-metadata" \
+            --repo-url "${{ github.server_url }}/${{ github.repository }}"
diff --git a/bench-orchestrator/bench_orchestrator/cli.py b/bench-orchestrator/bench_orchestrator/cli.py
@@ -210,6 +210,10 @@ def run(
         Path | None,
         typer.Option("--output", help="Optional path for compatibility JSONL output"),
     ] = None,
+    gh_json_v3: Annotated[
+        Path | None,
+        typer.Option("--gh-json-v3", help="Optional path for v3 JSONL records emitted by the benchmark binary"),
+    ] = None,
     options: Annotated[list[str] | None, typer.Option("--opt", help="Engine or benchmark specific options")] = None,
 ) -> None:
     """Run benchmarks with specified configuration."""
@@ -294,6 +298,7 @@ def run(
                         sample_rate=sample_rate,
                         tracing=tracing,
                         runner=runner,
+                        gh_json_v3=gh_json_v3,
                         on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: (
                             write_result_line(
                                 line,
diff --git a/bench-orchestrator/bench_orchestrator/runner/executor.py b/bench-orchestrator/bench_orchestrator/runner/executor.py
@@ -40,6 +40,7 @@ def build_command(
         sample_rate: int | None = None,
         tracing: bool = False,
         runner: str | None = None,
+        gh_json_v3: Path | None = None,
     ) -> list[str]:
         """Build the command used to execute a benchmark binary."""
         cmd = [
@@ -67,6 +68,8 @@ def build_command(
             cmd.append("--tracing")
         if runner:
             cmd.extend(["--runner", runner])
+        if gh_json_v3 is not None:
+            cmd.extend(["--gh-json-v3", str(gh_json_v3)])
         if options:
             for key, value in options.items():
                 cmd.extend(["--opt", f"{key}={value}"])
@@ -98,6 +101,7 @@ def run(
         sample_rate: int | None = None,
         tracing: bool = False,
         runner: str | None = None,
+        gh_json_v3: Path | None = None,
         on_result: Callable[[str], None] | None = None,
     ) -> list[str]:
         """
@@ -128,6 +132,7 @@ def run(
             sample_rate=sample_rate,
             tracing=tracing,
             runner=runner,
+            gh_json_v3=gh_json_v3,
         )
 
         if self.verbose:
diff --git a/bench-orchestrator/tests/test_executor.py b/bench-orchestrator/tests/test_executor.py
@@ -48,6 +48,31 @@ def test_build_command_omits_formats_for_lance_backend() -> None:
     assert "1,3" in cmd
 
 
+def test_build_command_includes_gh_json_v3_when_set() -> None:
+    executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)
+
+    cmd = executor.build_command(
+        benchmark=Benchmark.TPCH,
+        formats=[Format.PARQUET],
+        gh_json_v3=Path("results.v3.jsonl"),
+    )
+
+    assert "--gh-json-v3" in cmd
+    flag_idx = cmd.index("--gh-json-v3")
+    assert cmd[flag_idx + 1] == "results.v3.jsonl"
+
+
+def test_build_command_omits_gh_json_v3_when_unset() -> None:
+    executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)
+
+    cmd = executor.build_command(
+        benchmark=Benchmark.TPCH,
+        formats=[Format.PARQUET],
+    )
+
+    assert "--gh-json-v3" not in cmd
+
+
 def test_run_streams_logs_without_counting_them(tmp_path: Path) -> None:
     script = tmp_path / "fake-bench.py"
     script.write_text(