-
Notifications
You must be signed in to change notification settings - Fork 149
Expand file tree
/
Copy pathmod.rs
More file actions
235 lines (190 loc) · 6.65 KB
/
mod.rs
File metadata and controls
235 lines (190 loc) · 6.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
use std::fmt::Debug;
use std::fmt::Display;
use std::fmt::Formatter;
use enum_iterator::Sequence;
use enum_iterator::all;
use num_enum::IntoPrimitive;
use num_enum::TryFromPrimitive;
use crate::dtype::DType;
use crate::dtype::Nullability::NonNullable;
use crate::dtype::PType;
mod bound;
mod precision;
mod provider;
mod stat_bound;
pub use bound::*;
pub use precision::*;
pub use provider::*;
pub use stat_bound::*;
use crate::aggregate_fn;
use crate::aggregate_fn::AggregateFnVTable;
use crate::aggregate_fn::EmptyOptions;
#[derive(
Debug,
Clone,
Copy,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
Sequence,
IntoPrimitive,
TryFromPrimitive,
)]
#[repr(u8)]
pub enum Stat {
/// Whether all values are the same (nulls are not equal to other non-null values,
/// so this is true iff all values are null or all values are the same non-null value)
IsConstant = 0,
/// Whether the non-null values in the array are sorted in ascending order (i.e., we skip nulls)
/// This may later be extended to support descending order, but for now we only support ascending order.
IsSorted = 1,
/// Whether the non-null values in the array are strictly sorted in ascending order (i.e., sorted with no duplicates)
/// This may later be extended to support descending order, but for now we only support ascending order.
IsStrictSorted = 2,
/// The maximum value in the array (ignoring nulls, unless all values are null)
Max = 3,
/// The minimum value in the array (ignoring nulls, unless all values are null)
Min = 4,
/// The sum of the non-null values of the array.
Sum = 5,
/// The number of null values in the array
NullCount = 6,
/// The uncompressed size of the array in bytes
UncompressedSizeInBytes = 7,
/// The number of NaN values in the array
NaNCount = 8,
}
/// These structs allow the extraction of the bound from the `Precision` value.
/// They tie together the Stat and the StatBound, which allows the bound to be extracted.
pub struct Max;
pub struct Min;
pub struct Sum;
pub struct IsConstant;
pub struct IsSorted;
pub struct IsStrictSorted;
pub struct NullCount;
pub struct UncompressedSizeInBytes;
pub struct NaNCount;
impl StatType<bool> for IsConstant {
type Bound = Precision<bool>;
const STAT: Stat = Stat::IsConstant;
}
impl StatType<bool> for IsSorted {
type Bound = Precision<bool>;
const STAT: Stat = Stat::IsSorted;
}
impl StatType<bool> for IsStrictSorted {
type Bound = Precision<bool>;
const STAT: Stat = Stat::IsStrictSorted;
}
impl<T: PartialOrd + Clone> StatType<T> for NullCount {
type Bound = UpperBound<T>;
const STAT: Stat = Stat::NullCount;
}
impl<T: PartialOrd + Clone> StatType<T> for UncompressedSizeInBytes {
type Bound = UpperBound<T>;
const STAT: Stat = Stat::UncompressedSizeInBytes;
}
impl<T: PartialOrd + Clone + Debug> StatType<T> for Max {
type Bound = UpperBound<T>;
const STAT: Stat = Stat::Max;
}
impl<T: PartialOrd + Clone + Debug> StatType<T> for Min {
type Bound = LowerBound<T>;
const STAT: Stat = Stat::Min;
}
impl<T: PartialOrd + Clone + Debug> StatType<T> for Sum {
type Bound = Precision<T>;
const STAT: Stat = Stat::Sum;
}
impl<T: PartialOrd + Clone> StatType<T> for NaNCount {
type Bound = UpperBound<T>;
const STAT: Stat = Stat::NaNCount;
}
impl Stat {
/// Whether the statistic is commutative (i.e., whether merging can be done independently of ordering)
/// e.g., min/max are commutative, but is_sorted is not
pub fn is_commutative(&self) -> bool {
// NOTE: we prefer this syntax to force a compile error if we add a new stat
match self {
Self::IsConstant
| Self::Max
| Self::Min
| Self::NullCount
| Self::Sum
| Self::NaNCount
| Self::UncompressedSizeInBytes => true,
Self::IsSorted | Self::IsStrictSorted => false,
}
}
/// Whether the statistic has the same dtype as the array it's computed on
pub fn has_same_dtype_as_array(&self) -> bool {
matches!(self, Stat::Min | Stat::Max)
}
/// Return the [`DType`] of the statistic scalar assuming the array is of the given [`DType`].
pub fn dtype(&self, data_type: &DType) -> Option<DType> {
Some(match self {
Self::IsConstant => DType::Bool(NonNullable),
Self::IsSorted => DType::Bool(NonNullable),
Self::IsStrictSorted => DType::Bool(NonNullable),
Self::Max if matches!(data_type, DType::Null) => return None,
Self::Max => data_type.clone(),
Self::Min if matches!(data_type, DType::Null) => return None,
Self::Min => data_type.clone(),
Self::NullCount => DType::Primitive(PType::U64, NonNullable),
Self::UncompressedSizeInBytes => DType::Primitive(PType::U64, NonNullable),
Self::NaNCount => {
aggregate_fn::fns::nan_count::NanCount.return_dtype(&EmptyOptions, data_type)?
}
Self::Sum => aggregate_fn::fns::sum::Sum.return_dtype(&EmptyOptions, data_type)?,
})
}
pub fn name(&self) -> &str {
match self {
Self::IsConstant => "is_constant",
Self::IsSorted => "is_sorted",
Self::IsStrictSorted => "is_strict_sorted",
Self::Max => "max",
Self::Min => "min",
Self::NullCount => "null_count",
Self::UncompressedSizeInBytes => "uncompressed_size_in_bytes",
Self::Sum => "sum",
Self::NaNCount => "nan_count",
}
}
pub fn all() -> impl Iterator<Item = Stat> {
all::<Self>()
}
}
impl Display for Stat {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
#[cfg(test)]
mod test {
use enum_iterator::all;
use crate::LEGACY_SESSION;
use crate::VortexSessionExecute;
use crate::arrays::PrimitiveArray;
use crate::expr::stats::Stat;
#[test]
fn min_of_nulls_is_not_panic() {
let min = PrimitiveArray::from_option_iter::<i32, _>([None, None, None, None])
.statistics()
.compute_as::<i64>(Stat::Min, &mut LEGACY_SESSION.create_execution_ctx());
assert_eq!(min, None);
}
#[test]
fn has_same_dtype_as_array() {
assert!(Stat::Min.has_same_dtype_as_array());
assert!(Stat::Max.has_same_dtype_as_array());
for stat in all::<Stat>().filter(|s| !matches!(s, Stat::Min | Stat::Max)) {
assert!(!stat.has_same_dtype_as_array());
}
}
}