Skip to content

Commit e849742

Browse files
committed
ci(benchmarks): wire v3 ingest dual-write into bench/sql workflows
Adds --gh-json-v3 plumbing through vx-bench and post-ingest steps in bench.yml, sql-benchmarks.yml, plus a v3-commit-metadata workflow. All v3 ingest is gated on vars.V3_INGEST_URL and continue-on-error, so it's a clean no-op until the deploy track sets the variable. v2's cat-s3.sh path is unchanged. Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent ff0a1d9 commit e849742

6 files changed

Lines changed: 99 additions & 1 deletion

File tree

.github/workflows/bench.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
VORTEX_EXPERIMENTAL_PATCHED_ARRAY: "1"
9393
FLAT_LAYOUT_INLINE_ARRAY_NODE: "1"
9494
run: |
95-
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
95+
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json --gh-json-v3 results.v3.jsonl
9696
9797
- name: Setup AWS CLI
9898
uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6
@@ -105,6 +105,19 @@ jobs:
105105
run: |
106106
bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
107107
108+
- name: Ingest results to v3 server
109+
if: vars.V3_INGEST_URL != ''
110+
continue-on-error: true
111+
shell: bash
112+
env:
113+
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
114+
run: |
115+
python3 scripts/post-ingest.py results.v3.jsonl \
116+
--server "${{ vars.V3_INGEST_URL }}" \
117+
--commit-sha "${{ github.sha }}" \
118+
--benchmark-id "${{ matrix.benchmark.id }}" \
119+
--repo-url "${{ github.server_url }}/${{ github.repository }}"
120+
108121
- name: Alert incident.io
109122
if: failure()
110123
uses: ./.github/actions/alert-incident-io

.github/workflows/sql-benchmarks.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ jobs:
376376
bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
377377
--targets-json '${{ steps.targets.outputs.targets_json }}' \
378378
--output results.json \
379+
--gh-json-v3 results.v3.jsonl \
379380
--no-build \
380381
--runner "ec2_${{ inputs.machine_type }}" \
381382
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
@@ -395,6 +396,7 @@ jobs:
395396
bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
396397
--targets-json '${{ steps.targets.outputs.targets_json }}' \
397398
--output results.json \
399+
--gh-json-v3 results.v3.jsonl \
398400
--no-build \
399401
--runner "ec2_${{ inputs.machine_type }}" \
400402
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
@@ -499,6 +501,19 @@ jobs:
499501
run: |
500502
bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
501503
504+
- name: Ingest results to v3 server
505+
if: inputs.mode == 'develop' && vars.V3_INGEST_URL != ''
506+
continue-on-error: true
507+
shell: bash
508+
env:
509+
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
510+
run: |
511+
python3 scripts/post-ingest.py results.v3.jsonl \
512+
--server "${{ vars.V3_INGEST_URL }}" \
513+
--commit-sha "${{ github.sha }}" \
514+
--benchmark-id "${{ matrix.id }}" \
515+
--repo-url "${{ github.server_url }}/${{ github.repository }}"
516+
502517
- name: Upload File Sizes
503518
if: inputs.mode == 'develop' && matrix.remote_storage == null
504519
shell: bash
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Posts a v3 ingest envelope with no records on every push to develop, so the
2+
# `commits` dim stays populated even when no benchmark ran.
3+
4+
name: v3 commit metadata
5+
6+
on:
7+
push:
8+
branches: [develop]
9+
workflow_dispatch:
10+
11+
permissions:
12+
contents: read
13+
14+
jobs:
15+
commit-metadata:
16+
runs-on: ubuntu-latest
17+
timeout-minutes: 10
18+
steps:
19+
- uses: actions/checkout@v6
20+
with:
21+
fetch-depth: 2
22+
23+
- name: Ingest commit metadata to v3 server
24+
if: vars.V3_INGEST_URL != ''
25+
continue-on-error: true
26+
shell: bash
27+
env:
28+
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
29+
run: |
30+
echo -n > empty.jsonl
31+
python3 scripts/post-ingest.py empty.jsonl \
32+
--server "${{ vars.V3_INGEST_URL }}" \
33+
--commit-sha "${{ github.sha }}" \
34+
--benchmark-id "commit-metadata" \
35+
--repo-url "${{ github.server_url }}/${{ github.repository }}"

bench-orchestrator/bench_orchestrator/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ def run(
210210
Path | None,
211211
typer.Option("--output", help="Optional path for compatibility JSONL output"),
212212
] = None,
213+
gh_json_v3: Annotated[
214+
Path | None,
215+
typer.Option("--gh-json-v3", help="Optional path for v3 JSONL records emitted by the benchmark binary"),
216+
] = None,
213217
options: Annotated[list[str] | None, typer.Option("--opt", help="Engine or benchmark specific options")] = None,
214218
) -> None:
215219
"""Run benchmarks with specified configuration."""
@@ -294,6 +298,7 @@ def run(
294298
sample_rate=sample_rate,
295299
tracing=tracing,
296300
runner=runner,
301+
gh_json_v3=gh_json_v3,
297302
on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: (
298303
write_result_line(
299304
line,

bench-orchestrator/bench_orchestrator/runner/executor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def build_command(
4040
sample_rate: int | None = None,
4141
tracing: bool = False,
4242
runner: str | None = None,
43+
gh_json_v3: Path | None = None,
4344
) -> list[str]:
4445
"""Build the command used to execute a benchmark binary."""
4546
cmd = [
@@ -67,6 +68,8 @@ def build_command(
6768
cmd.append("--tracing")
6869
if runner:
6970
cmd.extend(["--runner", runner])
71+
if gh_json_v3 is not None:
72+
cmd.extend(["--gh-json-v3", str(gh_json_v3)])
7073
if options:
7174
for key, value in options.items():
7275
cmd.extend(["--opt", f"{key}={value}"])
@@ -98,6 +101,7 @@ def run(
98101
sample_rate: int | None = None,
99102
tracing: bool = False,
100103
runner: str | None = None,
104+
gh_json_v3: Path | None = None,
101105
on_result: Callable[[str], None] | None = None,
102106
) -> list[str]:
103107
"""
@@ -128,6 +132,7 @@ def run(
128132
sample_rate=sample_rate,
129133
tracing=tracing,
130134
runner=runner,
135+
gh_json_v3=gh_json_v3,
131136
)
132137

133138
if self.verbose:

bench-orchestrator/tests/test_executor.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,31 @@ def test_build_command_omits_formats_for_lance_backend() -> None:
4848
assert "1,3" in cmd
4949

5050

51+
def test_build_command_includes_gh_json_v3_when_set() -> None:
52+
executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)
53+
54+
cmd = executor.build_command(
55+
benchmark=Benchmark.TPCH,
56+
formats=[Format.PARQUET],
57+
gh_json_v3=Path("results.v3.jsonl"),
58+
)
59+
60+
assert "--gh-json-v3" in cmd
61+
flag_idx = cmd.index("--gh-json-v3")
62+
assert cmd[flag_idx + 1] == "results.v3.jsonl"
63+
64+
65+
def test_build_command_omits_gh_json_v3_when_unset() -> None:
66+
executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)
67+
68+
cmd = executor.build_command(
69+
benchmark=Benchmark.TPCH,
70+
formats=[Format.PARQUET],
71+
)
72+
73+
assert "--gh-json-v3" not in cmd
74+
75+
5176
def test_run_streams_logs_without_counting_them(tmp_path: Path) -> None:
5277
script = tmp_path / "fake-bench.py"
5378
script.write_text(

0 commit comments

Comments
 (0)