@@ -9,10 +9,12 @@ use vortex::error::VortexError;
99use vortex:: error:: VortexExpect ;
1010use vortex:: error:: VortexResult ;
1111use vortex:: error:: vortex_bail;
12+ use vortex:: error:: vortex_ensure;
1213use vortex:: error:: vortex_err;
1314use vortex:: expr:: Expression ;
1415use vortex:: expr:: and_collect;
1516use vortex:: expr:: col;
17+ use vortex:: expr:: get_item;
1618use vortex:: expr:: is_not_null;
1719use vortex:: expr:: is_null;
1820use vortex:: expr:: list_contains;
@@ -32,48 +34,120 @@ use vortex::scalar_fn::fns::operators::Operator;
3234
3335use crate :: cpp:: DUCKDB_VX_EXPR_TYPE ;
3436use crate :: duckdb;
37+ use crate :: duckdb:: BoundFunction ;
38+ use crate :: duckdb:: BoundOperator ;
3539
36- const DUCKDB_FUNCTION_NAME_CONTAINS : & str = "contains" ;
37-
38- fn like_pattern_str ( value : & duckdb:: ExpressionRef ) -> VortexResult < Option < String > > {
40+ fn from_bound_str ( value : & duckdb:: ExpressionRef ) -> VortexResult < String > {
3941 match value. as_class ( ) . vortex_expect ( "unknown class" ) {
4042 duckdb:: ExpressionClass :: BoundConstant ( constant) => {
41- Ok ( Some ( format ! ( "%{}%" , constant. value. as_string( ) . as_str( ) ) ) )
43+ Ok ( constant. value . as_string ( ) . as_str ( ) . to_owned ( ) )
4244 }
43- _ => Ok ( None ) ,
45+ _ => vortex_bail ! ( "Expected string expression, got {:?}" , value . as_class_id ( ) ) ,
4446 }
4547}
4648
49+ fn try_from_bound_function (
50+ func : & BoundFunction ,
51+ col_sub : Option < & Expression > ,
52+ ) -> VortexResult < Option < Expression > > {
53+ let expr = match func. scalar_function . name ( ) {
54+ "struct_extract" => {
55+ let children: Vec < _ > = func. children ( ) . collect ( ) ;
56+ vortex_ensure ! ( children. len( ) == 2 ) ;
57+ let Some ( child) = try_from_expression_inner ( children[ 0 ] , col_sub) ? else {
58+ return Ok ( None ) ;
59+ } ;
60+ let field = from_bound_str ( children[ 1 ] ) ?;
61+ get_item ( field, child)
62+ }
63+ "contains" => {
64+ let children: Vec < _ > = func. children ( ) . collect ( ) ;
65+ vortex_ensure ! ( children. len( ) == 2 ) ;
66+ let Some ( value) = try_from_expression_inner ( children[ 0 ] , col_sub) ? else {
67+ return Ok ( None ) ;
68+ } ;
69+ let pattern = from_bound_str ( children[ 1 ] ) ?;
70+ let pattern = lit ( format ! ( "%{pattern}%" ) ) ;
71+ Like . new_expr ( LikeOptions :: default ( ) , [ value, pattern] )
72+ }
73+ like @ ( "~~" | "!~~" ) => {
74+ let children: Vec < _ > = func. children ( ) . collect ( ) ;
75+ vortex_ensure ! ( children. len( ) == 2 ) ;
76+ let Some ( string) = try_from_expression_inner ( children[ 0 ] , col_sub) ? else {
77+ return Ok ( None ) ;
78+ } ;
79+ let Some ( target) = try_from_expression_inner ( children[ 1 ] , col_sub) ? else {
80+ return Ok ( None ) ;
81+ } ;
82+ let opts = LikeOptions {
83+ negated : like == "!~~" ,
84+ case_insensitive : false ,
85+ } ;
86+ Like . new_expr ( opts, [ string, target] )
87+ }
88+ _ => {
89+ debug ! ( "bound function {}" , func. scalar_function. name( ) ) ;
90+ return Ok ( None ) ;
91+ }
92+ } ;
93+
94+ Ok ( Some ( expr) )
95+ }
96+
4797pub fn try_from_bound_expression (
4898 value : & duckdb:: ExpressionRef ,
4999) -> VortexResult < Option < Expression > > {
100+ try_from_expression_inner ( value, None )
101+ }
102+
103+ pub ( super ) fn try_from_bound_expression_with_col_sub (
104+ value : & duckdb:: ExpressionRef ,
105+ col_sub : & Expression ,
106+ ) -> VortexResult < Option < Expression > > {
107+ try_from_expression_inner ( value, Some ( col_sub) )
108+ }
109+
110+ fn try_from_expression_inner (
111+ value : & duckdb:: ExpressionRef ,
112+ col_sub : Option < & Expression > ,
113+ ) -> VortexResult < Option < Expression > > {
114+ //println!(
115+ // "from\n\texpresssion={value}\n\tcol={col_sub:?}\n\tid={:?}",
116+ // value.as_class_id()
117+ //);
50118 let Some ( value) = value. as_class ( ) else {
51119 debug ! ( "no expression class id {:?}" , value. as_class_id( ) ) ;
52120 return Ok ( None ) ;
53121 } ;
54122 Ok ( Some ( match value {
123+ duckdb:: ExpressionClass :: BoundRef => {
124+ let Some ( col) = col_sub else {
125+ vortex_bail ! ( "BoundRef requested but no column supplied" ) ;
126+ } ;
127+ col. clone ( )
128+ }
55129 duckdb:: ExpressionClass :: BoundColumnRef ( col_ref) => col ( col_ref. name . as_ref ( ) ) ,
56130 duckdb:: ExpressionClass :: BoundConstant ( const_) => lit ( Scalar :: try_from ( const_. value ) ?) ,
57131 duckdb:: ExpressionClass :: BoundComparison ( compare) => {
58132 let operator: Operator = compare. op . try_into ( ) ?;
59133
60- let Some ( left) = try_from_bound_expression ( compare. left ) ? else {
134+ let Some ( left) = try_from_expression_inner ( compare. left , col_sub ) ? else {
61135 return Ok ( None ) ;
62136 } ;
63- let Some ( right) = try_from_bound_expression ( compare. right ) ? else {
137+ let Some ( right) = try_from_expression_inner ( compare. right , col_sub ) ? else {
64138 return Ok ( None ) ;
65139 } ;
66140
67141 Binary . new_expr ( operator, [ left, right] )
68142 }
69143 duckdb:: ExpressionClass :: BoundBetween ( between) => {
70- let Some ( array) = try_from_bound_expression ( between. input ) ? else {
144+ let Some ( array) = try_from_expression_inner ( between. input , col_sub ) ? else {
71145 return Ok ( None ) ;
72146 } ;
73- let Some ( lower) = try_from_bound_expression ( between. lower ) ? else {
147+ let Some ( lower) = try_from_expression_inner ( between. lower , col_sub ) ? else {
74148 return Ok ( None ) ;
75149 } ;
76- let Some ( upper) = try_from_bound_expression ( between. upper ) ? else {
150+ let Some ( upper) = try_from_expression_inner ( between. upper , col_sub ) ? else {
77151 return Ok ( None ) ;
78152 } ;
79153 Between . new_expr (
@@ -98,7 +172,7 @@ pub fn try_from_bound_expression(
98172 | DUCKDB_VX_EXPR_TYPE :: DUCKDB_VX_EXPR_TYPE_OPERATOR_IS_NOT_NULL => {
99173 let children: Vec < _ > = operator. children ( ) . collect ( ) ;
100174 assert_eq ! ( children. len( ) , 1 ) ;
101- let Some ( child) = try_from_bound_expression ( children[ 0 ] ) ? else {
175+ let Some ( child) = try_from_expression_inner ( children[ 0 ] , col_sub ) ? else {
102176 return Ok ( None ) ;
103177 } ;
104178 match operator. op {
@@ -111,67 +185,23 @@ pub fn try_from_bound_expression(
111185 }
112186 }
113187 DUCKDB_VX_EXPR_TYPE :: DUCKDB_VX_EXPR_TYPE_COMPARE_IN => {
114- // First child is element, rest form the list.
115- let children: Vec < _ > = operator. children ( ) . collect ( ) ;
116- assert ! ( children. len( ) >= 2 ) ;
117- let Some ( element) = try_from_bound_expression ( children[ 0 ] ) ? else {
118- return Ok ( None ) ;
119- } ;
120-
121- let Some ( list_elements) = children
122- . iter ( )
123- . skip ( 1 )
124- . map ( |c| {
125- let Some ( value) = try_from_bound_expression ( c) ? else {
126- return Ok ( None ) ;
127- } ;
128- Ok ( Some (
129- value
130- . as_opt :: < Literal > ( )
131- . ok_or_else ( || {
132- vortex_err ! ( "cannot have a non literal in a in_list" )
133- } ) ?
134- . clone ( ) ,
135- ) )
136- } )
137- . collect :: < VortexResult < Option < Vec < _ > > > > ( ) ?
138- else {
139- return Ok ( None ) ;
140- } ;
141- let list = Scalar :: list (
142- Arc :: new ( list_elements[ 0 ] . dtype ( ) . clone ( ) ) ,
143- list_elements,
144- Nullability :: Nullable ,
145- ) ;
146- list_contains ( lit ( list) , element)
147- }
148- _ => {
149- debug ! ( op=?operator. op, "cannot be pushed down" ) ;
150- return Ok ( None ) ;
188+ return try_from_compare_in ( operator, col_sub, false ) ;
151189 }
152- } ,
153- duckdb:: ExpressionClass :: BoundFunction ( func) => match func. scalar_function . name ( ) {
154- DUCKDB_FUNCTION_NAME_CONTAINS => {
155- let children: Vec < _ > = func. children ( ) . collect ( ) ;
156- assert_eq ! ( children. len( ) , 2 ) ;
157- let Some ( value) = try_from_bound_expression ( children[ 0 ] ) ? else {
158- return Ok ( None ) ;
159- } ;
160- let Some ( pattern_lit) = like_pattern_str ( children[ 1 ] ) ? else {
161- vortex_bail ! ( "expected pattern to be bound string" )
162- } ;
163- let pattern = lit ( pattern_lit) ;
164- Like . new_expr ( LikeOptions :: default ( ) , [ value, pattern] )
190+ DUCKDB_VX_EXPR_TYPE :: DUCKDB_VX_EXPR_TYPE_COMPARE_NOT_IN => {
191+ return try_from_compare_in ( operator, col_sub, true ) ;
165192 }
166193 _ => {
167- debug ! ( "bound function {}" , func . scalar_function . name ( ) ) ;
194+ debug ! ( op=?operator . op , "cannot be pushed down" ) ;
168195 return Ok ( None ) ;
169196 }
170197 } ,
198+ duckdb:: ExpressionClass :: BoundFunction ( func) => {
199+ return try_from_bound_function ( & func, col_sub) ;
200+ }
171201 duckdb:: ExpressionClass :: BoundConjunction ( conj) => {
172202 let Some ( children) = conj
173203 . children ( )
174- . map ( try_from_bound_expression )
204+ . map ( |c| try_from_expression_inner ( c , col_sub ) )
175205 . collect :: < VortexResult < Option < Vec < _ > > > > ( ) ?
176206 else {
177207 return Ok ( None ) ;
@@ -189,6 +219,49 @@ pub fn try_from_bound_expression(
189219 } ) )
190220}
191221
222+ fn try_from_compare_in (
223+ operator : BoundOperator ,
224+ col_sub : Option < & Expression > ,
225+ not_in : bool ,
226+ ) -> VortexResult < Option < Expression > > {
227+ // First child is element, rest form the list.
228+ let children: Vec < _ > = operator. children ( ) . collect ( ) ;
229+ assert ! ( children. len( ) >= 2 ) ;
230+ let Some ( element) = try_from_expression_inner ( children[ 0 ] , col_sub) ? else {
231+ //println!("no expression for element={}", children[0]);
232+ return Ok ( None ) ;
233+ } ;
234+
235+ let Some ( list_elements) = children
236+ . iter ( )
237+ . skip ( 1 )
238+ . map ( |c| {
239+ let Some ( value) = try_from_expression_inner ( c, col_sub) ? else {
240+ //println!("no expression for child={c}");
241+ return Ok ( None ) ;
242+ } ;
243+ Ok ( Some (
244+ value
245+ . as_opt :: < Literal > ( )
246+ . ok_or_else ( || vortex_err ! ( "cannot have a non literal in a in_list" ) ) ?
247+ . clone ( ) ,
248+ ) )
249+ } )
250+ . collect :: < VortexResult < Option < Vec < _ > > > > ( ) ?
251+ else {
252+ //println!("no list children");
253+ return Ok ( None ) ;
254+ } ;
255+ let list = Scalar :: list (
256+ Arc :: new ( list_elements[ 0 ] . dtype ( ) . clone ( ) ) ,
257+ list_elements,
258+ Nullability :: Nullable ,
259+ ) ;
260+
261+ let expr = list_contains ( lit ( list) , element) ;
262+ Ok ( Some ( if not_in { not ( expr) } else { expr } ) )
263+ }
264+
192265impl TryFrom < DUCKDB_VX_EXPR_TYPE > for Operator {
193266 type Error = VortexError ;
194267
0 commit comments