Skip to content

Commit 50531ab

Browse files
feat: add a runner field to all runners (#7622)
Adds a runner field to all benchmark runner which could be later used to diff runs from different runners --------- Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent a83c9b3 commit 50531ab

9 files changed

Lines changed: 85 additions & 0 deletions

File tree

.github/workflows/sql-benchmarks.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ jobs:
377377
--targets-json '${{ steps.targets.outputs.targets_json }}' \
378378
--output results.json \
379379
--no-build \
380+
--runner "ec2_${{ inputs.machine_type }}" \
380381
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
381382
${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }}
382383
@@ -395,6 +396,7 @@ jobs:
395396
--targets-json '${{ steps.targets.outputs.targets_json }}' \
396397
--output results.json \
397398
--no-build \
399+
--runner "ec2_${{ inputs.machine_type }}" \
398400
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
399401
--opt remote-data-dir=${{ matrix.remote_storage }} \
400402
${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }}

bench-orchestrator/bench_orchestrator/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,10 @@ def run(
202202
str | None,
203203
typer.Option("--targets-json", help="Exact benchmark targets as a JSON array"),
204204
] = None,
205+
runner: Annotated[
206+
str | None,
207+
typer.Option("--runner", help="Benchmark runner ID (e.g., ec2_c6id.8xlarge)"),
208+
] = None,
205209
output: Annotated[
206210
Path | None,
207211
typer.Option("--output", help="Optional path for compatibility JSONL output"),
@@ -289,6 +293,7 @@ def run(
289293
samply=samply,
290294
sample_rate=sample_rate,
291295
tracing=tracing,
296+
runner=runner,
292297
on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: (
293298
write_result_line(
294299
line,

bench-orchestrator/bench_orchestrator/runner/executor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def build_command(
3939
samply: bool = False,
4040
sample_rate: int | None = None,
4141
tracing: bool = False,
42+
runner: str | None = None,
4243
) -> list[str]:
4344
"""Build the command used to execute a benchmark binary."""
4445
cmd = [
@@ -64,6 +65,8 @@ def build_command(
6465
cmd.append("--track-memory")
6566
if tracing:
6667
cmd.append("--tracing")
68+
if runner:
69+
cmd.extend(["--runner", runner])
6770
if options:
6871
for key, value in options.items():
6972
cmd.extend(["--opt", f"{key}={value}"])
@@ -94,6 +97,7 @@ def run(
9497
samply: bool = False,
9598
sample_rate: int | None = None,
9699
tracing: bool = False,
100+
runner: str | None = None,
97101
on_result: Callable[[str], None] | None = None,
98102
) -> list[str]:
99103
"""
@@ -123,6 +127,7 @@ def run(
123127
samply=samply,
124128
sample_rate=sample_rate,
125129
tracing=tracing,
130+
runner=runner,
126131
)
127132

128133
if self.verbose:

benchmarks/datafusion-bench/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ struct Args {
9494
#[arg(long, default_value_t = false)]
9595
track_memory: bool,
9696

97+
#[arg(long, default_value = "unknown")]
98+
runner: String,
99+
97100
#[arg(long, default_value_t = false)]
98101
explain: bool,
99102

@@ -149,6 +152,7 @@ async fn main() -> anyhow::Result<()> {
149152
let mut runner = SqlBenchmarkRunner::new(
150153
&*benchmark,
151154
Engine::DataFusion,
155+
args.runner.clone(),
152156
args.formats.clone(),
153157
args.track_memory,
154158
args.hide_progress_bar,

benchmarks/duckdb-bench/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ struct Args {
6464
#[arg(long, default_value_t = false)]
6565
hide_progress_bar: bool,
6666

67+
#[arg(long, default_value = "unknown")]
68+
runner: String,
69+
6770
#[arg(long, value_delimiter = ',', value_parser = value_parser!(Format))]
6871
formats: Vec<Format>,
6972

@@ -142,6 +145,7 @@ fn main() -> anyhow::Result<()> {
142145
let mut runner = SqlBenchmarkRunner::new(
143146
&*benchmark,
144147
Engine::DuckDB,
148+
args.runner.clone(),
145149
args.formats.clone(),
146150
args.track_memory,
147151
args.hide_progress_bar,

benchmarks/lance-bench/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ struct Args {
6565
#[arg(long, default_value_t = false)]
6666
track_memory: bool,
6767

68+
#[arg(long, default_value = "unknown")]
69+
runner: String,
70+
6871
#[arg(long = "opt", value_delimiter = ',', value_parser = value_parser!(Opt))]
6972
options: Vec<Opt>,
7073
}
@@ -93,6 +96,7 @@ async fn main() -> anyhow::Result<()> {
9396
let mut runner = SqlBenchmarkRunner::new(
9497
&*benchmark,
9598
Engine::DataFusion,
99+
args.runner.clone(),
96100
vec![Format::Lance],
97101
args.track_memory,
98102
args.hide_progress_bar,

vortex-bench/src/datasets/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@ pub mod tpch_l_comment;
2222

2323
use std::path::PathBuf;
2424

25+
pub(crate) const DEFAULT_BENCHMARK_RUNNER_ID: &str = "unknown";
26+
27+
pub(crate) fn normalize_benchmark_runner_id(benchmark_runner: &str) -> String {
28+
let benchmark_runner = benchmark_runner.trim().replace('/', "_");
29+
if benchmark_runner.is_empty() {
30+
DEFAULT_BENCHMARK_RUNNER_ID.to_string()
31+
} else {
32+
benchmark_runner
33+
}
34+
}
35+
2536
#[async_trait]
2637
pub trait Dataset {
2738
fn name(&self) -> &str;

vortex-bench/src/measurements.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ pub struct QueryMeasurement {
243243
pub query_idx: usize,
244244
pub target: Target,
245245
pub benchmark_dataset: BenchmarkDataset,
246+
pub benchmark_runner: String,
246247
/// The storage backend against which this test was run. One of: s3, gcs, nvme.
247248
pub storage: String,
248249
pub runs: Vec<Duration>,
@@ -279,6 +280,8 @@ pub struct QueryMeasurementJson {
279280
pub name: String,
280281
pub storage: String,
281282
pub dataset: BenchmarkDataset,
283+
/// The cloud runner used to run this
284+
pub runner: String,
282285
pub unit: String,
283286
pub value: u128,
284287
pub all_runtimes: Vec<u128>,
@@ -310,6 +313,7 @@ impl ToJson for QueryMeasurement {
310313
name,
311314
storage: self.storage.clone(),
312315
dataset: self.benchmark_dataset.clone(),
316+
runner: self.benchmark_runner.clone(),
313317
unit: "ns".to_string(),
314318
value: self.median_run().as_nanos(),
315319
all_runtimes: self.runs.iter().map(|r| r.as_nanos()).collect_vec(),
@@ -430,6 +434,7 @@ pub struct MemoryMeasurement {
430434
pub query_idx: usize,
431435
pub target: Target,
432436
pub benchmark_dataset: BenchmarkDataset,
437+
pub benchmark_runner: String,
433438
pub storage: String,
434439
pub physical_memory_delta: i64,
435440
pub virtual_memory_delta: i64,
@@ -442,13 +447,15 @@ impl MemoryMeasurement {
442447
query_idx: usize,
443448
target: Target,
444449
benchmark_dataset: BenchmarkDataset,
450+
benchmark_runner: String,
445451
storage: String,
446452
memory_result: MemoryMeasurementResult,
447453
) -> Self {
448454
Self {
449455
query_idx,
450456
target,
451457
benchmark_dataset,
458+
benchmark_runner,
452459
storage,
453460
physical_memory_delta: memory_result.physical_memory_delta,
454461
virtual_memory_delta: memory_result.virtual_memory_delta,
@@ -474,6 +481,7 @@ impl ToJson for MemoryMeasurement {
474481
name,
475482
storage: self.storage.clone(),
476483
dataset: self.benchmark_dataset.clone(),
484+
runner: self.benchmark_runner.clone(),
477485
physical_memory_delta: self.physical_memory_delta,
478486
virtual_memory_delta: self.virtual_memory_delta,
479487
peak_physical_memory: self.peak_physical_memory,
@@ -507,6 +515,7 @@ pub struct MemoryMeasurementJson {
507515
pub name: String,
508516
pub storage: String,
509517
pub dataset: BenchmarkDataset,
518+
pub runner: String,
510519
pub physical_memory_delta: i64,
511520
pub virtual_memory_delta: i64,
512521
pub peak_physical_memory: u64,

vortex-bench/src/runner.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use crate::BenchmarkDataset;
2020
use crate::Engine;
2121
use crate::Format;
2222
use crate::Target;
23+
use crate::datasets::DEFAULT_BENCHMARK_RUNNER_ID;
24+
use crate::datasets::normalize_benchmark_runner_id;
2325

2426
/// Controls whether queries are benchmarked or explained.
2527
pub enum BenchmarkMode {
@@ -66,6 +68,7 @@ pub struct BenchmarkResults {
6668
pub struct SqlBenchmarkRunner {
6769
engine: Engine,
6870
benchmark_dataset: BenchmarkDataset,
71+
benchmark_runner: String,
6972
storage: String,
7073
expected_row_counts: Option<Vec<usize>>,
7174
/// Deduplicated, preserving insertion order.
@@ -81,19 +84,23 @@ impl SqlBenchmarkRunner {
8184
pub fn new<B: Benchmark + ?Sized>(
8285
benchmark: &B,
8386
engine: Engine,
87+
benchmark_runner: String,
8488
formats: impl IntoIterator<Item = Format>,
8589
track_memory: bool,
8690
hide_progress_bar: bool,
8791
) -> anyhow::Result<Self> {
8892
let mut seen = HashSet::new();
8993
let formats: Vec<Format> = formats.into_iter().filter(|f| seen.insert(*f)).collect();
9094
let storage = url_scheme_to_storage(benchmark.data_url())?;
95+
let benchmark_runner = normalize_benchmark_runner_id(&benchmark_runner);
96+
validate_benchmark_runner_id(&benchmark_runner, is_ci())?;
9197

9298
let memory_tracker = track_memory.then(BenchmarkMemoryTracker::new);
9399

94100
Ok(Self {
95101
engine,
96102
benchmark_dataset: benchmark.dataset(),
103+
benchmark_runner,
97104
storage,
98105
expected_row_counts: benchmark.expected_row_counts().map(|s| s.to_vec()),
99106
formats,
@@ -168,6 +175,7 @@ impl SqlBenchmarkRunner {
168175
query_idx,
169176
target,
170177
benchmark_dataset: self.benchmark_dataset.clone(),
178+
benchmark_runner: self.benchmark_runner.clone(),
171179
storage: self.storage.clone(),
172180
runs,
173181
});
@@ -192,6 +200,7 @@ impl SqlBenchmarkRunner {
192200
query_idx,
193201
target,
194202
self.benchmark_dataset.clone(),
203+
self.benchmark_runner.clone(),
195204
self.storage.clone(),
196205
memory_result,
197206
));
@@ -411,6 +420,18 @@ impl SqlBenchmarkRunner {
411420
}
412421
}
413422

423+
fn is_ci() -> bool {
424+
matches!(std::env::var("CI").as_deref(), Ok("true"))
425+
}
426+
427+
fn validate_benchmark_runner_id(benchmark_runner: &str, is_ci: bool) -> anyhow::Result<()> {
428+
anyhow::ensure!(
429+
!is_ci || benchmark_runner != DEFAULT_BENCHMARK_RUNNER_ID,
430+
"benchmark runner must not be unknown in CI; pass --runner"
431+
);
432+
Ok(())
433+
}
434+
414435
pub fn export_results<W: Write>(
415436
queries: Vec<QueryMeasurement>,
416437
memory: Vec<MemoryMeasurement>,
@@ -460,3 +481,23 @@ pub fn filter_queries(
460481
})
461482
.collect()
462483
}
484+
485+
#[cfg(test)]
486+
mod tests {
487+
use super::*;
488+
489+
#[test]
490+
fn ci_rejects_unknown_benchmark_runner() {
491+
assert!(validate_benchmark_runner_id("unknown", true).is_err());
492+
}
493+
494+
#[test]
495+
fn ci_accepts_explicit_benchmark_runner() {
496+
assert!(validate_benchmark_runner_id("ec2_c6id.8xlarge", true).is_ok());
497+
}
498+
499+
#[test]
500+
fn local_accepts_unknown_benchmark_runner() {
501+
assert!(validate_benchmark_runner_id("unknown", false).is_ok());
502+
}
503+
}

0 commit comments

Comments
 (0)