diff --git a/encodings/runend/src/arbitrary.rs b/encodings/runend/src/arbitrary.rs index f32a32dbf4a..baa15e0fd13 100644 --- a/encodings/runend/src/arbitrary.rs +++ b/encodings/runend/src/arbitrary.rs @@ -9,6 +9,8 @@ use vortex_array::LEGACY_SESSION; use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::arbitrary::ArbitraryArray; +use vortex_array::arrays::arbitrary::ArbitraryArrayConfig; +use vortex_array::arrays::arbitrary::ArbitraryWith; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; @@ -46,14 +48,28 @@ impl ArbitraryRunEndArray { if num_runs == 0 { // Empty RunEndArray let ends = PrimitiveArray::from_iter(Vec::::new()).into_array(); - let values = ArbitraryArray::arbitrary_with(u, Some(0), dtype)?.0; + let values = ArbitraryArray::arbitrary_with_config( + u, + &ArbitraryArrayConfig { + dtype: Some(dtype.clone()), + len: 0..=0, + }, + )? + .0; let runend_array = RunEnd::try_new(ends, values, &mut ctx) .vortex_expect("Empty RunEndArray creation should succeed"); return Ok(ArbitraryRunEndArray(runend_array)); } // Generate arbitrary values for each run - let values = ArbitraryArray::arbitrary_with(u, Some(num_runs), dtype)?.0; + let values = ArbitraryArray::arbitrary_with_config( + u, + &ArbitraryArrayConfig { + dtype: Some(dtype.clone()), + len: num_runs..=num_runs, + }, + )? + .0; // Generate strictly increasing ends // Each end must be > previous end, and first end must be >= 1 diff --git a/fuzz/src/array/mod.rs b/fuzz/src/array/mod.rs index 8f2a5cd0fe6..3509ce4be5a 100644 --- a/fuzz/src/array/mod.rs +++ b/fuzz/src/array/mod.rs @@ -49,6 +49,8 @@ use vortex_array::aggregate_fn::fns::sum::sum; use vortex_array::arrays::ConstantArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::arbitrary::ArbitraryArray; +use vortex_array::arrays::arbitrary::ArbitraryArrayConfig; +use vortex_array::arrays::arbitrary::ArbitraryWith; use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; @@ -67,6 +69,7 @@ use vortex_error::vortex_panic; use vortex_mask::Mask; use vortex_utils::aliases::hash_set::HashSet; +use crate::FUZZ_ARRAY_MAX_LEN; use crate::SESSION; use crate::error::Backtrace; use crate::error::VortexFuzzError; @@ -170,7 +173,14 @@ impl ExpectedValue { impl<'a> Arbitrary<'a> for FuzzArrayAction { fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let array = ArbitraryArray::arbitrary(u)?.0; + let array = ArbitraryArray::arbitrary_with_config( + u, + &ArbitraryArrayConfig { + dtype: None, + len: 0..=FUZZ_ARRAY_MAX_LEN, + }, + )? + .0; let mut current_array = array.clone(); let mut ctx = SESSION.create_execution_ctx(); diff --git a/fuzz/src/file/mod.rs b/fuzz/src/file/mod.rs index bcd6ac0c704..0ec01a0d55e 100644 --- a/fuzz/src/file/mod.rs +++ b/fuzz/src/file/mod.rs @@ -5,10 +5,13 @@ use arbitrary::Arbitrary; use arbitrary::Unstructured; use vortex_array::ArrayRef; use vortex_array::arrays::arbitrary::ArbitraryArray; +use vortex_array::arrays::arbitrary::ArbitraryArrayConfig; +use vortex_array::arrays::arbitrary::ArbitraryWith; use vortex_array::expr::Expression; use vortex_array::expr::arbitrary::filter_expr; use vortex_array::expr::arbitrary::projection_expr; +use crate::FUZZ_ARRAY_MAX_LEN; use crate::array::CompressorStrategy; #[derive(Debug)] @@ -21,7 +24,14 @@ pub struct FuzzFileAction { impl<'a> Arbitrary<'a> for FuzzFileAction { fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let array = ArbitraryArray::arbitrary(u)?.0; + let array = ArbitraryArray::arbitrary_with_config( + u, + &ArbitraryArrayConfig { + dtype: None, + len: 0..=FUZZ_ARRAY_MAX_LEN, + }, + )? + .0; let dtype = array.dtype().clone(); Ok(FuzzFileAction { array, diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 5bdc1b33485..1384adfa9c5 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -32,6 +32,8 @@ pub use gpu::FuzzCompressGpu; #[cfg(feature = "cuda")] pub use gpu::run_compress_gpu; +pub const FUZZ_ARRAY_MAX_LEN: usize = 16_384; + // Runtime initialization - platform-specific #[cfg(not(target_arch = "wasm32"))] mod native_runtime { diff --git a/vortex-array/src/arrays/arbitrary.rs b/vortex-array/src/arrays/arbitrary.rs index fc5919d3e9b..bfe237bc20b 100644 --- a/vortex-array/src/arrays/arbitrary.rs +++ b/vortex-array/src/arrays/arbitrary.rs @@ -2,9 +2,11 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::iter; +use std::ops::RangeInclusive; use std::sync::Arc; use arbitrary::Arbitrary; +use arbitrary::Error::IncorrectFormat; use arbitrary::Result; use arbitrary::Unstructured; use vortex_buffer::BitBuffer; @@ -41,16 +43,37 @@ use crate::validity::Validity; #[derive(Clone, Debug)] pub struct ArbitraryArray(pub ArrayRef); -impl<'a> Arbitrary<'a> for ArbitraryArray { - fn arbitrary(u: &mut Unstructured<'a>) -> Result { - let dtype = u.arbitrary()?; - Self::arbitrary_with(u, None, &dtype) - } +/// Trait for generating arbitrary values with a caller-provided configuration. +pub trait ArbitraryWith<'a, C>: Sized { + /// Generate an arbitrary value using the provided configuration. + fn arbitrary_with_config(u: &mut Unstructured<'a>, config: &C) -> Result; +} + +/// Configuration for arbitrary array generation. +#[derive(Clone, Debug)] +pub struct ArbitraryArrayConfig { + /// Fixed dtype, or `None` to generate one from [`Unstructured`]. + pub dtype: Option, + /// Inclusive range for the total array length. + pub len: RangeInclusive, } -impl ArbitraryArray { - pub fn arbitrary_with(u: &mut Unstructured, len: Option, dtype: &DType) -> Result { - random_array(u, dtype, len).map(ArbitraryArray) +impl<'a> ArbitraryWith<'a, ArbitraryArrayConfig> for ArbitraryArray { + fn arbitrary_with_config( + u: &mut Unstructured<'a>, + config: &ArbitraryArrayConfig, + ) -> Result { + if config.len.is_empty() { + return Err(IncorrectFormat); + } + + let dtype = match &config.dtype { + Some(dtype) => dtype.clone(), + None => u.arbitrary()?, + }; + let len = u.int_in_range(config.len.clone())?; + + random_array(u, &dtype, Some(len)).map(ArbitraryArray) } } diff --git a/vortex-array/src/arrays/dict/arbitrary.rs b/vortex-array/src/arrays/dict/arbitrary.rs index 0c4f5780874..4eb938181b9 100644 --- a/vortex-array/src/arrays/dict/arbitrary.rs +++ b/vortex-array/src/arrays/dict/arbitrary.rs @@ -13,6 +13,8 @@ use crate::ArrayRef; use crate::IntoArray; use crate::arrays::PrimitiveArray; use crate::arrays::arbitrary::ArbitraryArray; +use crate::arrays::arbitrary::ArbitraryArrayConfig; +use crate::arrays::arbitrary::ArbitraryWith; use crate::arrays::arbitrary::random_validity; use crate::dtype::DType; use crate::dtype::NativePType; @@ -36,7 +38,14 @@ impl ArbitraryDictArray { // Generate the number of unique values (dictionary size) let values_len = u.int_in_range(1..=20)?; // Generate values array with the given dtype - let values = ArbitraryArray::arbitrary_with(u, Some(values_len), dtype)?.0; + let values = ArbitraryArray::arbitrary_with_config( + u, + &ArbitraryArrayConfig { + dtype: Some(dtype.clone()), + len: values_len..=values_len, + }, + )? + .0; // Generate codes that index into the values let codes_len = len.unwrap_or(u.int_in_range(0..=100)?);