diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 46c201540..d0c934b26 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -16,7 +16,7 @@ // under the License. use criterion::{criterion_group, criterion_main, Criterion}; -use sqlparser::dialect::GenericDialect; +use sqlparser::dialect::{GenericDialect, PostgreSqlDialect}; use sqlparser::keywords::Keyword; use sqlparser::parser::Parser; use sqlparser::tokenizer::{Span, Word}; @@ -177,11 +177,37 @@ fn parse_compound_chain(c: &mut Criterion) { group.finish(); } +/// Benchmark parsing pathological named-arg chains that previously caused +/// 2^N work in `parse_function_args` on dialects with expression-named +/// function arguments (PostgreSQL, MSSQL). Each `--` swallows the +/// trailing `,i)`, leaving a chain of unterminated function calls that the +/// previous `maybe_parse(parse_expr)` re-walked on rollback. +fn parse_named_arg_chain(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_named_arg_chain"); + let dialect = PostgreSqlDialect {}; + + for &n in &[5usize, 10, 15] { + let body = std::iter::repeat_n(".foo(t--,i)", n) + .collect::>() + .join("\n"); + let sql = format!("SELECT Y\n{body}"); + + group.bench_function(format!("chain_{n}"), |b| { + b.iter(|| { + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); + }); + }); + } + + group.finish(); +} + criterion_group!( benches, basic_queries, word_to_ident, parse_many_identifiers, - parse_compound_chain + parse_compound_chain, + parse_named_arg_chain ); criterion_main!(benches); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 91ac386ae..c97ece2da 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -18357,19 +18357,8 @@ impl<'a> Parser<'a> { /// Parse a single function argument, handling named and unnamed variants. pub fn parse_function_args(&mut self) -> Result { - let arg = if self.dialect.supports_named_fn_args_with_expr_name() { - self.maybe_parse(|p| { - let name = p.parse_expr()?; - let operator = p.parse_function_named_arg_operator()?; - let arg = p.parse_wildcard_expr()?.into(); - Ok(FunctionArg::ExprNamed { - name, - arg, - operator, - }) - })? - } else { - self.maybe_parse(|p| { + if !self.dialect.supports_named_fn_args_with_expr_name() { + if let Some(arg) = self.maybe_parse(|p| { let name = p.parse_identifier()?; let operator = p.parse_function_named_arg_operator()?; let arg = p.parse_wildcard_expr()?.into(); @@ -18378,12 +18367,35 @@ impl<'a> Parser<'a> { arg, operator, }) - })? - }; - if let Some(arg) = arg { - return Ok(arg); + })? { + return Ok(arg); + } } + + // Parse the leading expression once, then speculatively parse the + // named-arg tail ` `. The previous implementation also + // speculated on the name itself via `maybe_parse(parse_expr ...)`, + // which produced ~2^N work on chains like + // `SELECT Y\n.foo(t--\n.foo(t--\n...` because rollback re-walked + // deeply nested function calls. Same family of bug as #2344. let wildcard_expr = self.parse_wildcard_expr()?; + if self.dialect.supports_named_fn_args_with_expr_name() + && !matches!(wildcard_expr, Expr::Wildcard(_)) + { + if let Some((operator, arg)) = self.maybe_parse(|p| { + Ok(( + p.parse_function_named_arg_operator()?, + p.parse_wildcard_expr()?, + )) + })? { + return Ok(FunctionArg::ExprNamed { + name: wildcard_expr, + arg: arg.into(), + operator, + }); + } + } + let arg_expr: FunctionArgExpr = match wildcard_expr { Expr::Wildcard(ref token) if self.dialect.supports_select_wildcard_exclude() => { // Support `* EXCLUDE(col1, col2, ...)` inside function calls (e.g. Snowflake's diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f470b93ca..cfb7e1596 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -19004,3 +19004,30 @@ fn parse_compound_chain_no_exponential_blowup() { rx.recv_timeout(Duration::from_secs(5)) .expect("parser should reject this quickly, not loop exponentially"); } + +/// Regression test for the 2^N parse-time blowup in `parse_function_args` on +/// dialects that allow ` ` named args (PostgreSQL, MSSQL). +/// Each `--` swallows the trailing `,i)`, leaving a chain of +/// unterminated function calls that the previous `maybe_parse(parse_expr)` +/// re-walked on rollback. Post-fix the parser returns `Err` in well under +/// a millisecond. +#[test] +fn parse_named_arg_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let body: String = std::iter::repeat_n(".foo(t--,i)", 25) + .collect::>() + .join("\n"); + let sql = format!("SELECT Y\n{body}"); + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&PostgreSqlDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("PostgreSQL parser should reject this quickly, not loop exponentially"); +}