Skip to content

Commit 6d4dfca

Browse files
committed
re-add column pos
1 parent b0f2e4a commit 6d4dfca

2 files changed

Lines changed: 34 additions & 11 deletions

File tree

vortex-duckdb/cpp/table_function.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ using namespace duckdb;
2121
using vortex::CData;
2222
using vortex::IntoErrString;
2323
constexpr column_t COLUMN_IDENTIFIER_FILE_INDEX = MultiFileReader::COLUMN_IDENTIFIER_FILE_INDEX;
24+
constexpr column_t COLUMN_IDENTIFIER_FILE_ROW_NUMBER = MultiFileReader::COLUMN_IDENTIFIER_FILE_ROW_NUMBER;
2425

2526
struct CTableFunctionInfo final : TableFunctionInfo {
2627
explicit CTableFunctionInfo(const duckdb_vx_tfunc_vtab_t &vtab) : vtab(vtab) {
@@ -391,13 +392,14 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
391392
tf.statistics = c_statistics;
392393

393394
tf.get_row_id_columns = [](auto &, auto) -> vector<column_t> {
394-
return {COLUMN_IDENTIFIER_FILE_INDEX};
395+
return {COLUMN_IDENTIFIER_FILE_INDEX, COLUMN_IDENTIFIER_FILE_ROW_NUMBER};
395396
};
396397

397398
tf.get_virtual_columns = [](auto &, auto) -> virtual_column_map_t {
398399
return {
399400
{COLUMN_IDENTIFIER_EMPTY, {"", LogicalTypeId::BOOLEAN}},
400401
{COLUMN_IDENTIFIER_FILE_INDEX, {"file_index", LogicalType::UBIGINT}},
402+
{COLUMN_IDENTIFIER_FILE_ROW_NUMBER, {"file_row_number", LogicalType::BIGINT}},
401403
};
402404
};
403405

vortex-duckdb/src/datasource.rs

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,10 @@ use crate::duckdb::Value;
7575
use crate::exporter::ArrayExporter;
7676
use crate::exporter::ConversionCache;
7777

78-
/// File index virtual column, may be requested either by user or optimizer.
78+
/// "file_index" virtual column, may be requested either by user or optimizer.
7979
static FILE_INDEX_COLUMN_IDX: u64 = 9223372036854775810;
80+
/// "file_row_number" virtual column, may be requested either by user or optimizer.
81+
static FILE_ROW_NUMBER_COLUMN_IDX: u64 = 9223372036854775809;
8082

8183
/// See duckdb/src/common/constants.cpp
8284
fn is_virtual_column(id: u64) -> bool {
@@ -146,6 +148,7 @@ pub struct DataSourceGlobal {
146148
bytes_total: Arc<AtomicU64>,
147149
bytes_read: AtomicU64,
148150
file_index_column_pos: Option<usize>,
151+
file_row_number_column_pos: Option<usize>,
149152
}
150153

151154
/// Per-thread local scan state.
@@ -278,8 +281,11 @@ impl<T: DataSourceTableFunction> TableFunction for T {
278281
let column_ids = init_input.column_ids();
279282
let projection_ids = init_input.projection_ids();
280283

281-
let (projection_expr, file_idx_pos) =
282-
extract_projection_expr(projection_ids, column_ids, &bind_data.column_fields);
284+
let ProjectionWithVirtualColumns {
285+
projection,
286+
file_index_column_pos,
287+
file_row_number_column_pos,
288+
} = extract_projection_expr(projection_ids, column_ids, &bind_data.column_fields);
283289
let filter_expr = extract_table_filter_expr(
284290
init_input.table_filter_set(),
285291
column_ids,
@@ -291,10 +297,10 @@ impl<T: DataSourceTableFunction> TableFunction for T {
291297
let filter_expr_str = filter_expr
292298
.as_ref()
293299
.map_or_else(|| "true".to_string(), |f| f.to_string());
294-
debug!("Global init Vortex scan SELECT {projection_expr} WHERE {filter_expr_str}");
300+
debug!("Global init Vortex scan SELECT {projection} WHERE {filter_expr_str}");
295301

296302
let request = ScanRequest {
297-
projection: projection_expr,
303+
projection,
298304
filter: filter_expr,
299305
ordered: false,
300306
..Default::default()
@@ -357,7 +363,8 @@ impl<T: DataSourceTableFunction> TableFunction for T {
357363
batch_id: AtomicU64::new(0),
358364
bytes_total: Arc::new(AtomicU64::new(0)),
359365
bytes_read: AtomicU64::new(0),
360-
file_index_column_pos: file_idx_pos,
366+
file_index_column_pos,
367+
file_row_number_column_pos,
361368
})
362369
}
363370

@@ -568,6 +575,12 @@ fn extract_schema_from_dtype(dtype: &DType) -> VortexResult<Vec<DuckdbField>> {
568575
Ok(fields)
569576
}
570577

578+
struct ProjectionWithVirtualColumns {
579+
projection: Expression,
580+
file_index_column_pos: Option<usize>,
581+
file_row_number_column_pos: Option<usize>,
582+
}
583+
571584
/// Creates a projection expression from raw projection/column ID slices and
572585
/// column names.
573586
/// If FILE_INDEX_COLUMN_IDX is present, returns its position as second
@@ -576,15 +589,16 @@ fn extract_projection_expr(
576589
projection_ids: Option<&[u64]>,
577590
column_ids: &[u64],
578591
column_fields: &[DuckdbField],
579-
) -> (Expression, Option<usize>) {
592+
) -> ProjectionWithVirtualColumns {
580593
// If projection ids are empty, use column_ids.
581594
// See duckdb/src/planner/operator/logical_get.cpp#L168
582595
let (ids, has_projection_ids) = match projection_ids {
583596
Some(ids) => (ids, true),
584597
None => (column_ids, false),
585598
};
586599

587-
let mut file_idx_pos = None;
600+
let mut file_index_column_pos = None;
601+
let mut file_row_number_column_pos = None;
588602

589603
#[expect(clippy::cast_possible_truncation)]
590604
let names = ids
@@ -598,7 +612,10 @@ fn extract_projection_expr(
598612
};
599613

600614
if column_id == FILE_INDEX_COLUMN_IDX {
601-
file_idx_pos = Some(column_pos);
615+
file_index_column_pos = Some(column_pos);
616+
}
617+
if column_id == FILE_ROW_NUMBER_COLUMN_IDX {
618+
file_row_number_column_pos = Some(column_pos);
602619
}
603620

604621
column_id
@@ -607,7 +624,11 @@ fn extract_projection_expr(
607624
.map(|col_id| Arc::from(column_fields[col_id as usize].name.as_str()))
608625
.collect::<FieldNames>();
609626

610-
(select(names, root()), file_idx_pos)
627+
ProjectionWithVirtualColumns {
628+
projection: select(names, root()),
629+
file_index_column_pos,
630+
file_row_number_column_pos,
631+
}
611632
}
612633

613634
/// Creates a table filter expression from the table filter set, column metadata, additional

0 commit comments

Comments
 (0)