Skip to content

Commit 5e5572b

Browse files
authored
chore: codspeed display names (#7752)
As part of that we only run benchmarks on 100M element count inputs. Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent 09a57ad commit 5e5572b

11 files changed

Lines changed: 210 additions & 163 deletions

File tree

vortex-cuda/benches/alp_cuda.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ fn benchmark_alp_decode_typed<T>(c: &mut Criterion, type_name: &str)
9292
where
9393
T: ALPFloat + NativePType + DeviceRepr,
9494
{
95-
let mut group = c.benchmark_group(format!("cuda/alp_{}", type_name));
95+
let mut group = c.benchmark_group("cuda");
9696

9797
for &(len, len_str) in BENCH_SIZES {
9898
group.throughput(Throughput::Bytes((len * size_of::<T>()) as u64));
@@ -101,7 +101,7 @@ where
101101
let array = make_alp_array::<T>(len, patch_freq);
102102

103103
group.bench_with_input(
104-
BenchmarkId::new(patch_label, len_str),
104+
BenchmarkId::new(format!("cuda/alp_{}/{}", type_name, patch_label), len_str),
105105
&array,
106106
|b, array| {
107107
b.iter_custom(|iters| {

vortex-cuda/benches/bench_config/mod.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,8 @@ use criterion::Criterion;
77

88
/// Benchmark input sizes.
99
///
10-
/// On codspeed, only the 100M variant runs — kernels under ~200 µs
11-
/// (i.e. the 10M cases) swing 15-45% across ephemeral GPU instances,
12-
/// drowning real regressions in noise. Locally both sizes run.
13-
#[cfg(not(codspeed))]
14-
pub const BENCH_SIZES: &[(usize, &str)] = &[(10_000_000, "10M"), (100_000_000, "100M")];
15-
#[cfg(codspeed)]
10+
/// 100M elements keeps every kernel above ~500 µs, well above the
11+
/// ~15 µs CUDA driver noise floor that caused 15-45% swings at 10M.
1612
pub const BENCH_SIZES: &[(usize, &str)] = &[(100_000_000, "100M")];
1713

1814
/// Returns a [`Criterion`] configuration tuned for CUDA benchmarks.

vortex-cuda/benches/bitpacked_cuda.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ where
110110
T: BitPacked + NativePType + DeviceRepr + Add<Output = T> + From<u8>,
111111
T::Physical: DeviceRepr,
112112
{
113-
let mut group = c.benchmark_group(format!("cuda/bitpacked_{}", type_name));
113+
let mut group = c.benchmark_group("cuda");
114114

115115
for &(n_rows, size_str) in bench_config::BENCH_SIZES {
116116
let array = make_bitpacked_array::<T>(bit_width, n_rows);
@@ -119,7 +119,10 @@ where
119119
group.throughput(Throughput::Bytes(nbytes as u64));
120120

121121
group.bench_with_input(
122-
BenchmarkId::new(format!("unpack/{}bw", bit_width), size_str),
122+
BenchmarkId::new(
123+
format!("cuda/bitpacked_{}/unpack/{}bw", type_name, bit_width),
124+
size_str,
125+
),
123126
&array,
124127
|b, array| {
125128
b.iter_custom(|iters| {
@@ -157,7 +160,7 @@ where
157160
T: BitPacked + NativePType + DeviceRepr + Add<Output = T> + From<u8>,
158161
T::Physical: DeviceRepr,
159162
{
160-
let mut group = c.benchmark_group(format!("cuda/bitpacked_patched_{}", type_name));
163+
let mut group = c.benchmark_group("cuda");
161164

162165
for &(n_rows, size_str) in bench_config::BENCH_SIZES {
163166
let nbytes = n_rows * size_of::<T>();
@@ -167,7 +170,13 @@ where
167170
let array = make_bitpacked_array_with_patches::<T>(n_rows, patch_freq);
168171

169172
group.bench_with_input(
170-
BenchmarkId::new(format!("unpack/{}", patch_label), size_str),
173+
BenchmarkId::new(
174+
format!(
175+
"cuda/bitpacked_patched_{}/unpack/{}",
176+
type_name, patch_label
177+
),
178+
size_str,
179+
),
171180
&array,
172181
|b, array| {
173182
b.iter_custom(|iters| {

vortex-cuda/benches/date_time_parts_cuda.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ fn make_datetimeparts_array(len: usize, time_unit: TimeUnit) -> DateTimePartsArr
5252
}
5353

5454
fn benchmark_datetimeparts(c: &mut Criterion) {
55-
let mut group = c.benchmark_group("cuda/datetimeparts");
55+
let mut group = c.benchmark_group("cuda");
5656

5757
for &(len, len_str) in bench_config::BENCH_SIZES {
5858
group.throughput(Throughput::Bytes((len * size_of::<i64>()) as u64));
@@ -61,7 +61,7 @@ fn benchmark_datetimeparts(c: &mut Criterion) {
6161
let dtp_array = make_datetimeparts_array(len, time_unit);
6262

6363
group.bench_with_input(
64-
BenchmarkId::new(unit_str, len_str),
64+
BenchmarkId::new(format!("cuda/datetimeparts/{unit_str}"), len_str),
6565
&dtp_array,
6666
|b, dtp_array| {
6767
b.iter_custom(|iters| {

vortex-cuda/benches/dict_cuda.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ where
7474
C: NativePType + DeviceRepr + TryFrom<usize>,
7575
<C as TryFrom<usize>>::Error: Debug,
7676
{
77-
let mut group = c.benchmark_group("cuda/dict");
77+
let mut group = c.benchmark_group("cuda");
7878

7979
for (len, len_str) in BENCH_SIZES {
8080
// Throughput is based on output size (values read from dictionary)
@@ -85,7 +85,7 @@ where
8585
group.bench_with_input(
8686
BenchmarkId::new(
8787
format!(
88-
"{}_values_{}_codes",
88+
"cuda/dict/{}_values_{}_codes",
8989
config.value_type_name, config.code_type_name
9090
),
9191
len_str,

0 commit comments

Comments
 (0)