@@ -126,7 +126,10 @@ impl StatsCatalog for FileStatsLayoutReader {
126126
127127 let stat_value = field_stats. get ( stat) ?. as_exact ( ) ?;
128128 let field_dtype = self . struct_fields . field_by_index ( field_idx) ?;
129- let stat_scalar = Scalar :: try_new ( field_dtype, Some ( stat_value) ) . ok ( ) ?;
129+ // Use the stat's own dtype rather than the field dtype. For example,
130+ // NullCount is always u64 regardless of the column type.
131+ let stat_dtype = stat. dtype ( & field_dtype) ?;
132+ let stat_scalar = Scalar :: try_new ( stat_dtype, Some ( stat_value) ) . ok ( ) ?;
130133
131134 Some ( lit ( stat_scalar) )
132135 }
@@ -209,16 +212,20 @@ mod tests {
209212
210213 use vortex_array:: ArrayContext ;
211214 use vortex_array:: IntoArray as _;
215+ use vortex_array:: arrays:: PrimitiveArray ;
212216 use vortex_array:: arrays:: StructArray ;
217+ use vortex_array:: arrays:: datetime:: TemporalData ;
213218 use vortex_array:: dtype:: DType ;
214219 use vortex_array:: dtype:: Nullability ;
215220 use vortex_array:: dtype:: PType ;
216221 use vortex_array:: expr:: get_item;
217222 use vortex_array:: expr:: gt;
223+ use vortex_array:: expr:: is_null;
218224 use vortex_array:: expr:: lit;
219225 use vortex_array:: expr:: root;
220226 use vortex_array:: expr:: stats:: Precision ;
221227 use vortex_array:: expr:: stats:: Stat ;
228+ use vortex_array:: extension:: datetime:: TimeUnit ;
222229 use vortex_array:: scalar:: ScalarValue ;
223230 use vortex_array:: scalar_fn:: session:: ScalarFnSession ;
224231 use vortex_array:: session:: ArraySession ;
@@ -232,6 +239,7 @@ mod tests {
232239 use vortex_layout:: LayoutStrategy ;
233240 use vortex_layout:: layouts:: flat:: writer:: FlatLayoutStrategy ;
234241 use vortex_layout:: layouts:: table:: TableStrategy ;
242+ use vortex_layout:: segments:: SegmentSink ;
235243 use vortex_layout:: segments:: TestSegments ;
236244 use vortex_layout:: sequence:: SequenceId ;
237245 use vortex_layout:: sequence:: SequentialArrayStreamExt ;
@@ -337,4 +345,59 @@ mod tests {
337345 Ok ( ( ) )
338346 } )
339347 }
348+
349+ /// Regression test: `IS NULL` on a nullable timestamp column must not fail with a
350+ /// dtype mismatch. The bug was that `stats_ref` used the *field* dtype (timestamp)
351+ /// for the `NullCount` stat scalar instead of the stat's own dtype (u64).
352+ #[ test]
353+ fn is_null_pruning_on_nullable_timestamp_column ( ) -> VortexResult < ( ) > {
354+ block_on ( |handle| async {
355+ let session = SESSION . clone ( ) . with_handle ( handle) ;
356+ let ctx = ArrayContext :: empty ( ) ;
357+ let segments = Arc :: new ( TestSegments :: default ( ) ) ;
358+ let ( ptr, eof) = SequenceId :: root ( ) . split ( ) ;
359+
360+ // Build a struct with a nullable timestamp column containing some nulls.
361+ let prim_array =
362+ PrimitiveArray :: from_option_iter ( [ Some ( 1_000_000i64 ) , None , Some ( 3_000_000 ) ] )
363+ . into_array ( ) ;
364+ let ts_data = TemporalData :: new_timestamp ( prim_array, TimeUnit :: Microseconds , None ) ;
365+ let ts_dtype = ts_data. dtype ( ) . clone ( ) ;
366+ let ts_array = ts_data. into_array ( ) ;
367+
368+ let struct_array = StructArray :: from_fields ( [ ( "deleted_at" , ts_array) ] . as_slice ( ) ) ?;
369+
370+ let strategy = TableStrategy :: new (
371+ Arc :: new ( FlatLayoutStrategy :: default ( ) ) ,
372+ Arc :: new ( FlatLayoutStrategy :: default ( ) ) ,
373+ ) ;
374+ let layout = strategy
375+ . write_stream (
376+ ctx,
377+ Arc :: clone ( & segments) as Arc < dyn SegmentSink > ,
378+ struct_array. into_array ( ) . to_array_stream ( ) . sequenced ( ptr) ,
379+ eof,
380+ & session,
381+ )
382+ . await ?;
383+
384+ let child = layout. new_reader ( "" . into ( ) , segments, & SESSION ) ?;
385+
386+ // File-level stats: 1 null in deleted_at.
387+ let mut stats = StatsSet :: default ( ) ;
388+ stats. set ( Stat :: NullCount , Precision :: exact ( ScalarValue :: from ( 1u64 ) ) ) ;
389+ let file_stats = FileStatistics :: new ( Arc :: from ( [ stats] ) , Arc :: from ( [ ts_dtype] ) ) ;
390+
391+ let reader = FileStatsLayoutReader :: new ( child, file_stats, SESSION . clone ( ) ) ;
392+
393+ // `is_null(deleted_at)` — should NOT panic or error due to dtype mismatch.
394+ let expr = is_null ( get_item ( "deleted_at" , root ( ) ) ) ;
395+ let mask = Mask :: new_true ( 3 ) ;
396+ let result = reader. pruning_evaluation ( & ( 0 ..3 ) , & expr, mask) ?. await ?;
397+ // null_count is 1 (non-zero), so is_null is not falsified => not pruned.
398+ assert_eq ! ( result, Mask :: new_true( 3 ) ) ;
399+
400+ Ok ( ( ) )
401+ } )
402+ }
340403}
0 commit comments