@@ -40,8 +40,6 @@ use vortex_cuda_macros::cuda_not_available;
4040
4141use crate :: timed_launch_strategy:: TimedLaunchStrategy ;
4242
43- const N_ROWS : usize = 100_000_000 ;
44-
4543/// Patch frequencies to benchmark (as fractions)
4644const PATCH_FREQUENCIES : & [ ( f64 , & str ) ] = & [ ( 0.01 , "1%" ) , ( 0.10 , "10%" ) ] ;
4745
@@ -112,61 +110,16 @@ where
112110 T : BitPacked + NativePType + DeviceRepr + Add < Output = T > + From < u8 > ,
113111 T :: Physical : DeviceRepr ,
114112{
115- let mut group = c. benchmark_group ( format ! ( "bitunpack_cuda_{}" , type_name) ) ;
116-
117- let array = make_bitpacked_array :: < T > ( bit_width, N_ROWS ) ;
118- let nbytes = N_ROWS * size_of :: < T > ( ) ;
119-
120- group. throughput ( Throughput :: Bytes ( nbytes as u64 ) ) ;
121-
122- group. bench_with_input (
123- BenchmarkId :: new ( "bitunpack" , format ! ( "{}bw" , bit_width) ) ,
124- & array,
125- |b, array| {
126- b. iter_custom ( |iters| {
127- let timed = TimedLaunchStrategy :: default ( ) ;
128- let timer = timed. timer ( ) ;
129-
130- let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
131- . vortex_expect ( "failed to create execution context" )
132- . with_dispatch_mode ( CudaDispatchMode :: StandaloneOnly )
133- . with_launch_strategy ( Arc :: new ( timed) ) ;
134-
135- for _ in 0 ..iters {
136- block_on ( array. clone ( ) . into_array ( ) . execute_cuda ( & mut cuda_ctx) ) . unwrap ( ) ;
137- }
138-
139- Duration :: from_nanos ( timer. load ( Ordering :: Relaxed ) )
140- } ) ;
141- } ,
142- ) ;
143-
144- group. finish ( ) ;
145- }
146-
147- fn benchmark_bitunpack ( c : & mut Criterion ) {
148- benchmark_bitunpack_typed :: < u8 > ( c, 3 , "u8" ) ;
149- benchmark_bitunpack_typed :: < u16 > ( c, 5 , "u16" ) ;
150- benchmark_bitunpack_typed :: < u32 > ( c, 6 , "u32" ) ;
151- benchmark_bitunpack_typed :: < u64 > ( c, 8 , "u64" ) ;
152- }
153-
154- /// Benchmark function for unpacking with patches at various frequencies
155- fn benchmark_bitunpack_with_patches_typed < T > ( c : & mut Criterion , type_name : & str )
156- where
157- T : BitPacked + NativePType + DeviceRepr + Add < Output = T > + From < u8 > ,
158- T :: Physical : DeviceRepr ,
159- {
160- let mut group = c. benchmark_group ( format ! ( "bitunpack_cuda_patched_{}" , type_name) ) ;
113+ let mut group = c. benchmark_group ( format ! ( "cuda/bitpacked_{}" , type_name) ) ;
161114
162- let nbytes = N_ROWS * size_of :: < T > ( ) ;
163- group. throughput ( Throughput :: Bytes ( nbytes as u64 ) ) ;
115+ for & ( n_rows, size_str) in bench_config:: BENCH_SIZES {
116+ let array = make_bitpacked_array :: < T > ( bit_width, n_rows) ;
117+ let nbytes = n_rows * size_of :: < T > ( ) ;
164118
165- for & ( patch_freq, patch_label) in PATCH_FREQUENCIES {
166- let array = make_bitpacked_array_with_patches :: < T > ( N_ROWS , patch_freq) ;
119+ group. throughput ( Throughput :: Bytes ( nbytes as u64 ) ) ;
167120
168121 group. bench_with_input (
169- BenchmarkId :: new ( "bitunpack_patched ", patch_label ) ,
122+ BenchmarkId :: new ( format ! ( "unpack/{}bw ", bit_width ) , size_str ) ,
170123 & array,
171124 |b, array| {
172125 b. iter_custom ( |iters| {
@@ -191,6 +144,57 @@ where
191144 group. finish ( ) ;
192145}
193146
147+ fn benchmark_bitunpack ( c : & mut Criterion ) {
148+ benchmark_bitunpack_typed :: < u8 > ( c, 3 , "u8" ) ;
149+ benchmark_bitunpack_typed :: < u16 > ( c, 5 , "u16" ) ;
150+ benchmark_bitunpack_typed :: < u32 > ( c, 6 , "u32" ) ;
151+ benchmark_bitunpack_typed :: < u64 > ( c, 8 , "u64" ) ;
152+ }
153+
154+ /// Benchmark function for unpacking with patches at various frequencies
155+ fn benchmark_bitunpack_with_patches_typed < T > ( c : & mut Criterion , type_name : & str )
156+ where
157+ T : BitPacked + NativePType + DeviceRepr + Add < Output = T > + From < u8 > ,
158+ T :: Physical : DeviceRepr ,
159+ {
160+ let mut group = c. benchmark_group ( format ! ( "cuda/bitpacked_patched_{}" , type_name) ) ;
161+
162+ for & ( n_rows, size_str) in bench_config:: BENCH_SIZES {
163+ let nbytes = n_rows * size_of :: < T > ( ) ;
164+ group. throughput ( Throughput :: Bytes ( nbytes as u64 ) ) ;
165+
166+ for & ( patch_freq, patch_label) in PATCH_FREQUENCIES {
167+ let array = make_bitpacked_array_with_patches :: < T > ( n_rows, patch_freq) ;
168+
169+ group. bench_with_input (
170+ BenchmarkId :: new ( format ! ( "unpack/{}" , patch_label) , size_str) ,
171+ & array,
172+ |b, array| {
173+ b. iter_custom ( |iters| {
174+ let timed = TimedLaunchStrategy :: default ( ) ;
175+ let timer = timed. timer ( ) ;
176+
177+ let mut cuda_ctx =
178+ CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
179+ . vortex_expect ( "failed to create execution context" )
180+ . with_dispatch_mode ( CudaDispatchMode :: StandaloneOnly )
181+ . with_launch_strategy ( Arc :: new ( timed) ) ;
182+
183+ for _ in 0 ..iters {
184+ block_on ( array. clone ( ) . into_array ( ) . execute_cuda ( & mut cuda_ctx) )
185+ . unwrap ( ) ;
186+ }
187+
188+ Duration :: from_nanos ( timer. load ( Ordering :: Relaxed ) )
189+ } ) ;
190+ } ,
191+ ) ;
192+ }
193+ }
194+
195+ group. finish ( ) ;
196+ }
197+
194198fn benchmark_bitunpack_with_patches ( c : & mut Criterion ) {
195199 benchmark_bitunpack_with_patches_typed :: < u8 > ( c, "u8" ) ;
196200 benchmark_bitunpack_with_patches_typed :: < u16 > ( c, "u16" ) ;
0 commit comments