Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions encodings/sparse/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,72 @@ pub fn vortex_sparse::SparseMetadata::clear(&mut self)

pub fn vortex_sparse::SparseMetadata::encoded_len(&self) -> usize

pub struct vortex_sparse::SparseSlots

pub vortex_sparse::SparseSlots::patch_chunk_offsets: core::option::Option<vortex_array::array::erased::ArrayRef>

pub vortex_sparse::SparseSlots::patch_indices: vortex_array::array::erased::ArrayRef

pub vortex_sparse::SparseSlots::patch_values: vortex_array::array::erased::ArrayRef

impl vortex_sparse::SparseSlots

pub const vortex_sparse::SparseSlots::COUNT: usize

pub const vortex_sparse::SparseSlots::NAMES: [&'static str; 3]

pub const vortex_sparse::SparseSlots::PATCH_CHUNK_OFFSETS: usize

pub const vortex_sparse::SparseSlots::PATCH_INDICES: usize

pub const vortex_sparse::SparseSlots::PATCH_VALUES: usize

pub fn vortex_sparse::SparseSlots::from_slots(slots: alloc::vec::Vec<core::option::Option<vortex_array::array::erased::ArrayRef>>) -> Self

pub fn vortex_sparse::SparseSlots::into_slots(self) -> alloc::vec::Vec<core::option::Option<vortex_array::array::erased::ArrayRef>>

pub struct vortex_sparse::SparseSlotsView<'a>

pub vortex_sparse::SparseSlotsView::patch_chunk_offsets: core::option::Option<&'a vortex_array::array::erased::ArrayRef>

pub vortex_sparse::SparseSlotsView::patch_indices: &'a vortex_array::array::erased::ArrayRef

pub vortex_sparse::SparseSlotsView::patch_values: &'a vortex_array::array::erased::ArrayRef

impl<'a> vortex_sparse::SparseSlotsView<'a>

pub fn vortex_sparse::SparseSlotsView<'a>::from_slots(slots: &'a [core::option::Option<vortex_array::array::erased::ArrayRef>]) -> Self

pub fn vortex_sparse::SparseSlotsView<'a>::to_owned(&self) -> vortex_sparse::SparseSlots

impl<'a> core::clone::Clone for vortex_sparse::SparseSlotsView<'a>

pub fn vortex_sparse::SparseSlotsView<'a>::clone(&self) -> vortex_sparse::SparseSlotsView<'a>

impl<'a> core::fmt::Debug for vortex_sparse::SparseSlotsView<'a>

pub fn vortex_sparse::SparseSlotsView<'a>::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl<'a> core::marker::Copy for vortex_sparse::SparseSlotsView<'a>

pub trait vortex_sparse::SparseArraySlotsExt: vortex_array::array::typed::TypedArrayRef<vortex_sparse::Sparse>

pub fn vortex_sparse::SparseArraySlotsExt::patch_chunk_offsets(&self) -> core::option::Option<&vortex_array::array::erased::ArrayRef>

pub fn vortex_sparse::SparseArraySlotsExt::patch_indices(&self) -> &vortex_array::array::erased::ArrayRef

pub fn vortex_sparse::SparseArraySlotsExt::patch_values(&self) -> &vortex_array::array::erased::ArrayRef

pub fn vortex_sparse::SparseArraySlotsExt::slots_view(&self) -> vortex_sparse::SparseSlotsView<'_>

impl<T: vortex_array::array::typed::TypedArrayRef<vortex_sparse::Sparse>> vortex_sparse::SparseArraySlotsExt for T

pub fn T::patch_chunk_offsets(&self) -> core::option::Option<&vortex_array::array::erased::ArrayRef>

pub fn T::patch_indices(&self) -> &vortex_array::array::erased::ArrayRef

pub fn T::patch_values(&self) -> &vortex_array::array::erased::ArrayRef

pub fn T::slots_view(&self) -> vortex_sparse::SparseSlotsView<'_>

pub type vortex_sparse::SparseArray = vortex_array::array::typed::Array<vortex_sparse::Sparse>
166 changes: 90 additions & 76 deletions encodings/sparse/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,66 +56,92 @@ use vortex_error::vortex_bail;
use crate::ConstantArray;
use crate::Sparse;
use crate::SparseArray;
pub(super) fn execute_sparse(
array: &SparseArray,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrayRef> {
if array.patches().num_patches() == 0 {
return Ok(ConstantArray::new(array.fill_scalar().clone(), array.len()).into_array());
use crate::SparseParts;

/// Build a temporary [`SparseArray`] from resolved patches (for computing validity).
fn sparse_array_for_validity(patches: &Patches, fill_value: &Scalar, len: usize) -> SparseArray {
// Re-wrap resolved patches (offset=0) into a SparseArray so we can call .validity().
Sparse::try_new(
patches.indices().clone(),
patches.values().clone(),
len,
fill_value.clone(),
)
.vortex_expect("rebuilding SparseArray for validity")
}

pub(super) fn execute_sparse(parts: SparseParts, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
let SparseParts {
patches,
fill_value,
dtype,
len,
} = parts;

if patches.num_patches() == 0 {
return Ok(ConstantArray::new(fill_value, len).into_array());
}

Ok(match array.dtype() {
// Patches are already resolved (offset subtracted) by SparseParts::resolve_patches().
Ok(match &dtype {
DType::Null => {
assert!(array.fill_scalar().is_null());
NullArray::new(array.len()).into_array()
}
DType::Bool(..) => {
let resolved_patches = array.resolved_patches()?;
execute_sparse_bools(&resolved_patches, array.fill_scalar(), ctx)?
assert!(fill_value.is_null());
NullArray::new(len).into_array()
}
DType::Bool(..) => execute_sparse_bools(&patches, &fill_value, ctx)?,
DType::Primitive(ptype, ..) => {
let resolved_patches = array.resolved_patches()?;
match_each_native_ptype!(ptype, |P| {
execute_sparse_primitives::<P>(&resolved_patches, array.fill_scalar(), ctx)?
execute_sparse_primitives::<P>(&patches, &fill_value, ctx)?
})
}
DType::Struct(struct_fields, ..) => execute_sparse_struct(
struct_fields,
array.fill_scalar().as_struct(),
array.dtype().nullability(),
array.patches(),
array.len(),
fill_value.as_struct(),
dtype.nullability(),
&patches,
len,
ctx,
)?,
DType::Decimal(decimal_dtype, nullability) => {
let canonical_decimal_value_type =
DecimalType::smallest_decimal_value_type(decimal_dtype);
let fill_value = array.fill_scalar().as_decimal();
let fill_decimal = fill_value.as_decimal();
match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
execute_sparse_decimal::<D>(
*decimal_dtype,
*nullability,
fill_value,
array.patches(),
array.len(),
fill_decimal,
&patches,
len,
ctx,
)?
})
}
dtype @ DType::Utf8(..) => {
let fill_value = array.fill_scalar().as_utf8().value().cloned();
let fill_value = fill_value.map(BufferString::into_inner);
execute_varbin(array, dtype.clone(), fill_value, ctx)?
let fill = fill_value.as_utf8().value().cloned();
let fill = fill.map(BufferString::into_inner);
let validity_arr = sparse_array_for_validity(&patches, &fill_value, len);
execute_varbin(&patches, &validity_arr, dtype.clone(), fill, ctx)?
}
dtype @ DType::Binary(..) => {
let fill_value = array.fill_scalar().as_binary().value().cloned();
execute_varbin(array, dtype.clone(), fill_value, ctx)?
let fill = fill_value.as_binary().value().cloned();
let validity_arr = sparse_array_for_validity(&patches, &fill_value, len);
execute_varbin(&patches, &validity_arr, dtype.clone(), fill, ctx)?
}
DType::List(values_dtype, nullability) => {
execute_sparse_lists(array, Arc::clone(values_dtype), *nullability, ctx)?
let validity_arr = sparse_array_for_validity(&patches, &fill_value, len);
execute_sparse_lists(
&patches,
&validity_arr,
&fill_value,
Arc::clone(values_dtype),
*nullability,
ctx,
)?
}
DType::FixedSizeList(.., nullability) => {
execute_sparse_fixed_size_list(array, *nullability, ctx)?
let validity_arr = sparse_array_for_validity(&patches, &fill_value, len);
execute_sparse_fixed_size_list(&patches, &validity_arr, &fill_value, *nullability, ctx)?
}
DType::Extension(_ext_dtype) => todo!(),
DType::Variant(_) => vortex_bail!("Sparse canonicalization does not support Variant"),
Expand All @@ -127,39 +153,32 @@ pub(super) fn execute_sparse(
reason = "complexity is from nested match_smallest_offset_type macro"
)]
fn execute_sparse_lists(
array: &SparseArray,
resolved: &Patches,
validity_array: &SparseArray,
fill_value: &Scalar,
values_dtype: Arc<DType>,
nullability: Nullability,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrayRef> {
let resolved_patches = array.resolved_patches()?;

let indices = resolved_patches
.indices()
.clone()
.execute::<PrimitiveArray>(ctx)?;
let values = resolved_patches
.values()
.clone()
.execute::<ListViewArray>(ctx)?;
let fill_value = array.fill_scalar().as_list();
let indices = resolved.indices().clone().execute::<PrimitiveArray>(ctx)?;
let values = resolved.values().clone().execute::<ListViewArray>(ctx)?;
let fill_list = fill_value.as_list();

let n_filled = array.len() - resolved_patches.num_patches();
let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
let len = validity_array.len();
let n_filled = len - resolved.num_patches();
let total_canonical_values = values.elements().len() + fill_list.len() * n_filled;

let validity = {
let arr = array.as_array();
Validity::from_mask(arr.validity()?.execute_mask(arr.len(), ctx)?, nullability)
};
let arr = validity_array.as_ref();
let validity = Validity::from_mask(arr.validity()?.execute_mask(arr.len(), ctx)?, nullability);

Ok(match_each_integer_ptype!(indices.ptype(), |I| {
match_smallest_offset_type!(total_canonical_values, |O| {
execute_sparse_lists_inner::<I, O>(
indices.as_slice(),
values,
fill_value,
fill_list,
values_dtype,
array.len(),
len,
total_canonical_values,
validity,
ctx,
Expand Down Expand Up @@ -223,32 +242,29 @@ fn execute_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(

/// Canonicalize a sparse [`FixedSizeListArray`] by expanding it into a dense representation.
fn execute_sparse_fixed_size_list(
array: &SparseArray,
resolved: &Patches,
validity_array: &SparseArray,
fill_value: &Scalar,
nullability: Nullability,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrayRef> {
let resolved_patches = array.resolved_patches()?;
let indices = resolved_patches
.indices()
.clone()
.execute::<PrimitiveArray>(ctx)?;
let values = resolved_patches
let indices = resolved.indices().clone().execute::<PrimitiveArray>(ctx)?;
let values = resolved
.values()
.clone()
.execute::<FixedSizeListArray>(ctx)?;
let fill_value = array.fill_scalar().as_list();
let fill_list = fill_value.as_list();
let len = validity_array.len();

let validity = {
let arr = array.as_array();
Validity::from_mask(arr.validity()?.execute_mask(arr.len(), ctx)?, nullability)
};
let arr = validity_array.as_ref();
let validity = Validity::from_mask(arr.validity()?.execute_mask(arr.len(), ctx)?, nullability);

Ok(match_each_integer_ptype!(indices.ptype(), |I| {
execute_sparse_fixed_size_list_inner::<I>(
indices.as_slice(),
values,
fill_value,
array.len(),
fill_list,
len,
validity,
ctx,
)
Expand Down Expand Up @@ -487,22 +503,20 @@ fn execute_sparse_decimal<D: NativeDecimalType>(
}

fn execute_varbin(
array: &SparseArray,
resolved: &Patches,
validity_array: &SparseArray,
dtype: DType,
fill_value: Option<ByteBuffer>,
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrayRef> {
let patches = array.resolved_patches()?;
let indices = patches.indices().clone().execute::<PrimitiveArray>(ctx)?;
let values = patches.values().clone().execute::<VarBinViewArray>(ctx)?;
let validity = {
let arr = array.as_array();
Validity::from_mask(
arr.validity()?.execute_mask(arr.len(), ctx)?,
dtype.nullability(),
)
};
let len = array.len();
let indices = resolved.indices().clone().execute::<PrimitiveArray>(ctx)?;
let values = resolved.values().clone().execute::<VarBinViewArray>(ctx)?;
let arr = validity_array.as_ref();
let validity = Validity::from_mask(
arr.validity()?.execute_mask(arr.len(), ctx)?,
dtype.nullability(),
);
let len = arr.len();

Ok(match_each_integer_ptype!(indices.ptype(), |I| {
let indices = indices.to_buffer::<I>();
Expand Down
Loading
Loading