Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions sqlparser_bench/benches/sqlparser_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use criterion::{criterion_group, criterion_main, Criterion};
use sqlparser::dialect::GenericDialect;
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect};
use sqlparser::keywords::Keyword;
use sqlparser::parser::Parser;
use sqlparser::tokenizer::{Span, Word};
Expand Down Expand Up @@ -177,11 +177,37 @@ fn parse_compound_chain(c: &mut Criterion) {
group.finish();
}

/// Benchmark parsing pathological named-arg chains that previously caused
/// 2^N work in `parse_function_args` on dialects with expression-named
/// function arguments (PostgreSQL, MSSQL). Each `--<newline>` swallows the
/// trailing `,i)`, leaving a chain of unterminated function calls that the
/// previous `maybe_parse(parse_expr)` re-walked on rollback.
fn parse_named_arg_chain(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_named_arg_chain");
let dialect = PostgreSqlDialect {};

for &n in &[5usize, 10, 15] {
let body = std::iter::repeat_n(".foo(t--,i)", n)
.collect::<Vec<_>>()
.join("\n");
let sql = format!("SELECT Y\n{body}");

group.bench_function(format!("chain_{n}"), |b| {
b.iter(|| {
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
});
});
}

group.finish();
}

criterion_group!(
benches,
basic_queries,
word_to_ident,
parse_many_identifiers,
parse_compound_chain
parse_compound_chain,
parse_named_arg_chain
);
criterion_main!(benches);
46 changes: 29 additions & 17 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18357,19 +18357,8 @@ impl<'a> Parser<'a> {

/// Parse a single function argument, handling named and unnamed variants.
pub fn parse_function_args(&mut self) -> Result<FunctionArg, ParserError> {
let arg = if self.dialect.supports_named_fn_args_with_expr_name() {
self.maybe_parse(|p| {
let name = p.parse_expr()?;
let operator = p.parse_function_named_arg_operator()?;
let arg = p.parse_wildcard_expr()?.into();
Ok(FunctionArg::ExprNamed {
name,
arg,
operator,
})
})?
} else {
self.maybe_parse(|p| {
if !self.dialect.supports_named_fn_args_with_expr_name() {
if let Some(arg) = self.maybe_parse(|p| {
let name = p.parse_identifier()?;
let operator = p.parse_function_named_arg_operator()?;
let arg = p.parse_wildcard_expr()?.into();
Expand All @@ -18378,12 +18367,35 @@ impl<'a> Parser<'a> {
arg,
operator,
})
})?
};
if let Some(arg) = arg {
return Ok(arg);
})? {
return Ok(arg);
}
}

// Parse the leading expression once, then speculatively parse the
// named-arg tail `<op> <expr>`. The previous implementation also
// speculated on the name itself via `maybe_parse(parse_expr ...)`,
// which produced ~2^N work on chains like
// `SELECT Y\n.foo(t--\n.foo(t--\n...` because rollback re-walked
// deeply nested function calls. Same family of bug as #2344.
let wildcard_expr = self.parse_wildcard_expr()?;
if self.dialect.supports_named_fn_args_with_expr_name()
&& !matches!(wildcard_expr, Expr::Wildcard(_))
{
if let Some((operator, arg)) = self.maybe_parse(|p| {
Ok((
p.parse_function_named_arg_operator()?,
p.parse_wildcard_expr()?,
))
})? {
return Ok(FunctionArg::ExprNamed {
name: wildcard_expr,
arg: arg.into(),
operator,
});
}
}

let arg_expr: FunctionArgExpr = match wildcard_expr {
Expr::Wildcard(ref token) if self.dialect.supports_select_wildcard_exclude() => {
// Support `* EXCLUDE(col1, col2, ...)` inside function calls (e.g. Snowflake's
Expand Down
27 changes: 27 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19004,3 +19004,30 @@ fn parse_compound_chain_no_exponential_blowup() {
rx.recv_timeout(Duration::from_secs(5))
.expect("parser should reject this quickly, not loop exponentially");
}

/// Regression test for the 2^N parse-time blowup in `parse_function_args` on
/// dialects that allow `<expr> <op> <expr>` named args (PostgreSQL, MSSQL).
/// Each `--<newline>` swallows the trailing `,i)`, leaving a chain of
/// unterminated function calls that the previous `maybe_parse(parse_expr)`
/// re-walked on rollback. Post-fix the parser returns `Err` in well under
/// a millisecond.
#[test]
fn parse_named_arg_chain_no_exponential_blowup() {
use std::sync::mpsc;
use std::thread;
use std::time::Duration;

let body: String = std::iter::repeat_n(".foo(t--,i)", 25)
.collect::<Vec<_>>()
.join("\n");
let sql = format!("SELECT Y\n{body}");

let (tx, rx) = mpsc::channel();
thread::spawn(move || {
let _ = Parser::parse_sql(&PostgreSqlDialect {}, &sql);
let _ = tx.send(());
});

rx.recv_timeout(Duration::from_secs(5))
.expect("PostgreSQL parser should reject this quickly, not loop exponentially");
}
Loading