@@ -28,12 +28,16 @@ use vortex::array::optimizer::ArrayOptimizer;
2828use vortex:: array:: stats:: StatsSet ;
2929use vortex:: dtype:: DType ;
3030use vortex:: dtype:: FieldNames ;
31+ use vortex:: dtype:: PType ;
3132use vortex:: error:: VortexExpect ;
3233use vortex:: error:: VortexResult ;
3334use vortex:: error:: vortex_err;
3435use vortex:: expr:: Expression ;
3536use vortex:: expr:: and_collect;
37+ use vortex:: expr:: cast;
3638use vortex:: expr:: col;
39+ use vortex:: expr:: merge;
40+ use vortex:: expr:: pack;
3741use vortex:: expr:: root;
3842use vortex:: expr:: select;
3943use vortex:: expr:: stats:: Precision ;
@@ -42,6 +46,7 @@ use vortex::file::v2::FileStatsLayoutReader;
4246use vortex:: io:: kanal_ext:: KanalExt ;
4347use vortex:: io:: runtime:: BlockingRuntime ;
4448use vortex:: io:: runtime:: current:: ThreadSafeIterator ;
49+ use vortex:: layout:: layouts:: row_idx:: row_idx;
4550use vortex:: layout:: scan:: multi:: MultiLayoutChild ;
4651use vortex:: layout:: scan:: multi:: MultiLayoutDataSource ;
4752use vortex:: metrics:: tracing:: get_global_labels;
@@ -75,9 +80,11 @@ use crate::duckdb::Value;
7580use crate :: exporter:: ArrayExporter ;
7681use crate :: exporter:: ConversionCache ;
7782
78- /// "file_index" virtual column, may be requested either by user or optimizer.
83+ // See MultiFileReader for constants
84+
85+ /// "file_index" virtual column
7986static FILE_INDEX_COLUMN_IDX : u64 = 9223372036854775810 ;
80- /// "file_row_number" virtual column, may be requested either by user or optimizer.
87+ /// "file_row_number" virtual column
8188static FILE_ROW_NUMBER_COLUMN_IDX : u64 = 9223372036854775809 ;
8289
8390/// See duckdb/src/common/constants.cpp
@@ -156,7 +163,7 @@ pub struct DataSourceLocal {
156163 iterator : DataSourceIterator ,
157164 exporter : Option < ArrayExporter > ,
158165 batch_id : u64 ,
159- file_idx : usize ,
166+ file_index : usize ,
160167}
161168
162169/// Returns scan progress as a percentage (0.0–100.0).
@@ -302,7 +309,7 @@ impl<T: DataSourceTableFunction> TableFunction for T {
302309 let request = ScanRequest {
303310 projection,
304311 filter : filter_expr,
305- ordered : false ,
312+ ordered : file_row_number_column_pos . is_some ( ) ,
306313 ..Default :: default ( )
307314 } ;
308315
@@ -321,11 +328,11 @@ impl<T: DataSourceTableFunction> TableFunction for T {
321328 let stream = scan
322329 . partitions ( )
323330 . enumerate ( )
324- . map ( move |( file_idx , partition) | {
331+ . map ( move |( file_index , partition) | {
325332 // We create a new conversion cache scoped to the partition, since there's no point
326333 // caching anything across partitions.
327334 let cache = Arc :: new ( ConversionCache {
328- file_idx ,
335+ file_index ,
329336 ..Default :: default ( )
330337 } ) ;
331338 let tx = tx. clone ( ) ;
@@ -391,7 +398,7 @@ impl<T: DataSourceTableFunction> TableFunction for T {
391398 iterator : global. iterator . clone ( ) ,
392399 exporter : None ,
393400 batch_id : 0 ,
394- file_idx : 0 ,
401+ file_index : 0 ,
395402 } )
396403 }
397404
@@ -409,8 +416,8 @@ impl<T: DataSourceTableFunction> TableFunction for T {
409416 return Ok ( ( ) ) ;
410417 } ;
411418 let ( array_result, conversion_cache) = result?;
412- local_state. file_idx = conversion_cache. file_idx ;
413419 let array_result = array_result. optimize_recursive ( ctx. session ( ) ) ?;
420+ local_state. file_index = conversion_cache. file_index ;
414421
415422 let array_result: StructArray = if let Some ( array) = array_result. as_opt :: < Struct > ( )
416423 {
@@ -441,7 +448,11 @@ impl<T: DataSourceTableFunction> TableFunction for T {
441448 . exporter
442449 . as_mut ( )
443450 . vortex_expect ( "error: exporter missing" ) ;
444- let has_more_data = exporter. export ( chunk, global_state. file_index_column_pos ) ?;
451+ let has_more_data = exporter. export (
452+ chunk,
453+ global_state. file_index_column_pos ,
454+ global_state. file_row_number_column_pos ,
455+ ) ?;
445456
446457 global_state
447458 . bytes_read
@@ -461,7 +472,7 @@ impl<T: DataSourceTableFunction> TableFunction for T {
461472 if let Some ( pos) = global_state. file_index_column_pos {
462473 chunk
463474 . get_vector_mut ( pos)
464- . reference_value ( & Value :: from ( local_state. file_idx as u64 ) ) ;
475+ . reference_value ( & Value :: from ( local_state. file_index as u64 ) ) ;
465476 }
466477
467478 Ok ( ( ) )
@@ -533,12 +544,13 @@ impl<T: DataSourceTableFunction> TableFunction for T {
533544
534545 fn partition_data (
535546 _bind_data : & Self :: BindData ,
536- _global_init_data : & Self :: GlobalState ,
547+ global_init_data : & Self :: GlobalState ,
537548 local_init_data : & mut Self :: LocalState ,
538549 ) -> PartitionData {
539550 PartitionData {
540551 batch_index : local_init_data. batch_id ,
541- file_index : local_init_data. file_idx ,
552+ file_index_column_pos : global_init_data. file_index_column_pos ,
553+ file_row_number_column_pos : global_init_data. file_row_number_column_pos ,
542554 }
543555 }
544556
@@ -583,8 +595,6 @@ struct ProjectionWithVirtualColumns {
583595
584596/// Creates a projection expression from raw projection/column ID slices and
585597/// column names.
586- /// If FILE_INDEX_COLUMN_IDX is present, returns its position as second
587- /// parameter
588598fn extract_projection_expr (
589599 projection_ids : Option < & [ u64 ] > ,
590600 column_ids : & [ u64 ] ,
@@ -624,8 +634,21 @@ fn extract_projection_expr(
624634 . map ( |col_id| Arc :: from ( column_fields[ col_id as usize ] . name . as_str ( ) ) )
625635 . collect :: < FieldNames > ( ) ;
626636
637+ // file_index column will be filled later when exporting the chunk.
638+
639+ let select = select ( names, root ( ) ) ;
640+ let projection = if file_row_number_column_pos. is_some ( ) {
641+ // Append row_idx to end - it will be rearranged to correct position
642+ // in scan()
643+ let row_idx = cast ( row_idx ( ) , DType :: Primitive ( PType :: I64 , false . into ( ) ) ) ;
644+ let row_idx_struct = pack ( [ ( "file_row_number" , row_idx) ] , false . into ( ) ) ;
645+ merge ( [ select, row_idx_struct] )
646+ } else {
647+ select
648+ } ;
649+
627650 ProjectionWithVirtualColumns {
628- projection : select ( names , root ( ) ) ,
651+ projection,
629652 file_index_column_pos,
630653 file_row_number_column_pos,
631654 }
0 commit comments