Skip to content

Commit 87d7c5a

Browse files
committed
initial
Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent f58b59a commit 87d7c5a

4 files changed

Lines changed: 54 additions & 14 deletions

File tree

vortex-duckdb/cpp/table_function.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,19 @@ void c_function(ClientContext &context, TableFunctionInput &input, DataChunk &ou
277277
}
278278
}
279279

280+
/*
281+
* Table filter pushdown is used twice in duckdb:
282+
*
283+
* 1. Planning time: duckdb uses file metadata (filename, hive_partitioning
284+
* options in MultiFileReader) to prune files based on filename or hive
285+
* partition data i.e. month, year, etc. This happens before any file IO.
286+
* We don't use this because we have own file-level pruning in
287+
* FileStatsLayoutReader.
288+
*
289+
* 2. Scan time. As we have filter_pushdown = true, filter expressions are
290+
* converted to TableFilterSet and pushed down to Vortex. We convert them to
291+
* vortex expressions and use as filter options while initializing the scan.
292+
*/
280293
void c_pushdown_complex_filter(ClientContext &,
281294
LogicalGet &,
282295
FunctionData *bind_data,
@@ -292,8 +305,6 @@ void c_pushdown_complex_filter(ClientContext &,
292305
if (error_out) {
293306
throw BinderException(IntoErrString(error_out));
294307
}
295-
296-
// If the pushdown complex filter returns true, we can remove the filter from the list.
297308
iter = pushed ? filters.erase(iter) : std::next(iter);
298309
}
299310
}
@@ -394,6 +405,10 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
394405
return {{COLUMN_IDENTIFIER_EMPTY, TableColumn("", LogicalTypeId::BOOLEAN)}};
395406
};
396407

408+
tf.pushdown_expression = [](auto &, auto &, auto &) {
409+
return true;
410+
};
411+
397412
tf.arguments.resize(vtab->parameter_count);
398413
for (size_t i = 0; i < vtab->parameter_count; i++) {
399414
tf.arguments[i] = *reinterpret_cast<LogicalType *>(vtab->parameters[i]);

vortex-duckdb/src/convert/expr.rs

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,34 +46,52 @@ fn like_pattern_str(value: &duckdb::ExpressionRef) -> VortexResult<Option<String
4646

4747
pub fn try_from_bound_expression(
4848
value: &duckdb::ExpressionRef,
49+
) -> VortexResult<Option<Expression>> {
50+
try_from_expression_inner(value, None)
51+
}
52+
53+
pub(super) fn try_from_bound_expression_with_col_sub(
54+
value: &duckdb::ExpressionRef,
55+
col_sub: &Expression,
56+
) -> VortexResult<Option<Expression>> {
57+
try_from_expression_inner(value, Some(col_sub))
58+
}
59+
60+
fn try_from_expression_inner(
61+
value: &duckdb::ExpressionRef,
62+
col_sub: Option<&Expression>,
4963
) -> VortexResult<Option<Expression>> {
5064
let Some(value) = value.as_class() else {
5165
tracing::debug!("no expression class id {:?}", value.as_class_id());
5266
return Ok(None);
5367
};
5468
Ok(Some(match value {
69+
duckdb::ExpressionClass::BoundRef => {
70+
let Some(col) = col_sub else { return Ok(None) };
71+
col.clone()
72+
}
5573
duckdb::ExpressionClass::BoundColumnRef(col_ref) => col(col_ref.name.as_ref()),
5674
duckdb::ExpressionClass::BoundConstant(const_) => lit(Scalar::try_from(const_.value)?),
5775
duckdb::ExpressionClass::BoundComparison(compare) => {
5876
let operator: Operator = compare.op.try_into()?;
5977

60-
let Some(left) = try_from_bound_expression(compare.left)? else {
78+
let Some(left) = try_from_expression_inner(compare.left, col_sub)? else {
6179
return Ok(None);
6280
};
63-
let Some(right) = try_from_bound_expression(compare.right)? else {
81+
let Some(right) = try_from_expression_inner(compare.right, col_sub)? else {
6482
return Ok(None);
6583
};
6684

6785
Binary.new_expr(operator, [left, right])
6886
}
6987
duckdb::ExpressionClass::BoundBetween(between) => {
70-
let Some(array) = try_from_bound_expression(between.input)? else {
88+
let Some(array) = try_from_expression_inner(between.input, col_sub)? else {
7189
return Ok(None);
7290
};
73-
let Some(lower) = try_from_bound_expression(between.lower)? else {
91+
let Some(lower) = try_from_expression_inner(between.lower, col_sub)? else {
7492
return Ok(None);
7593
};
76-
let Some(upper) = try_from_bound_expression(between.upper)? else {
94+
let Some(upper) = try_from_expression_inner(between.upper, col_sub)? else {
7795
return Ok(None);
7896
};
7997
Between.new_expr(
@@ -98,7 +116,7 @@ pub fn try_from_bound_expression(
98116
| DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_OPERATOR_IS_NOT_NULL => {
99117
let children: Vec<_> = operator.children().collect();
100118
assert_eq!(children.len(), 1);
101-
let Some(child) = try_from_bound_expression(children[0])? else {
119+
let Some(child) = try_from_expression_inner(children[0], col_sub)? else {
102120
return Ok(None);
103121
};
104122
match operator.op {
@@ -114,15 +132,15 @@ pub fn try_from_bound_expression(
114132
// First child is element, rest form the list.
115133
let children: Vec<_> = operator.children().collect();
116134
assert!(children.len() >= 2);
117-
let Some(element) = try_from_bound_expression(children[0])? else {
135+
let Some(element) = try_from_expression_inner(children[0], col_sub)? else {
118136
return Ok(None);
119137
};
120138

121139
let Some(list_elements) = children
122140
.iter()
123141
.skip(1)
124142
.map(|c| {
125-
let Some(value) = try_from_bound_expression(c)? else {
143+
let Some(value) = try_from_expression_inner(c, col_sub)? else {
126144
return Ok(None);
127145
};
128146
Ok(Some(
@@ -154,7 +172,7 @@ pub fn try_from_bound_expression(
154172
DUCKDB_FUNCTION_NAME_CONTAINS => {
155173
let children: Vec<_> = func.children().collect();
156174
assert_eq!(children.len(), 2);
157-
let Some(value) = try_from_bound_expression(children[0])? else {
175+
let Some(value) = try_from_expression_inner(children[0], col_sub)? else {
158176
return Ok(None);
159177
};
160178
let Some(pattern_lit) = like_pattern_str(children[1])? else {
@@ -171,7 +189,7 @@ pub fn try_from_bound_expression(
171189
duckdb::ExpressionClass::BoundConjunction(conj) => {
172190
let Some(children) = conj
173191
.children()
174-
.map(try_from_bound_expression)
192+
.map(|c| try_from_expression_inner(c, col_sub))
175193
.collect::<VortexResult<Option<Vec<_>>>>()?
176194
else {
177195
return Ok(None);

vortex-duckdb/src/convert/table_filter.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,10 @@ pub fn try_from_table_filter(
117117
)
118118
}
119119
TableFilterClass::ExpressionRef(expr) => {
120-
// TODO(ngates): figure out which column ID DuckDB is using for the expression.
121-
vortex_bail!("expression table filter is not supported: {}", expr);
120+
match super::expr::try_from_bound_expression_with_col_sub(expr, col)? {
121+
Some(expression) => expression,
122+
None => vortex_bail!("No output expression"),
123+
}
122124
}
123125
TableFilterClass::Bloom => {
124126
vortex_bail!("bloom filter table filter is not supported")

vortex-duckdb/src/duckdb/expr.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ impl ExpressionRef {
139139
bind_info: out.bind_info,
140140
})
141141
}
142+
cpp::DUCKDB_VX_EXPR_CLASS::DUCKDB_VX_EXPR_CLASS_BOUND_REF => {
143+
ExpressionClass::BoundRef
144+
}
142145
_ => {
143146
return None;
144147
}
@@ -155,6 +158,8 @@ pub enum ExpressionClass<'a> {
155158
BoundBetween(BoundBetween<'a>),
156159
BoundOperator(BoundOperator<'a>),
157160
BoundFunction(BoundFunction<'a>),
161+
/// Column inside ExpressionFilter for expression pushed down to Vortex.
162+
BoundRef,
158163
}
159164

160165
pub struct BoundColumnRef {

0 commit comments

Comments
 (0)