From d1d6e5aee985933610bf4934c080a082a5232955 Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Fri, 22 May 2026 15:10:11 +0200 Subject: [PATCH 1/2] Parser: add regression test and bench for compound keyword-chain blowup --- sqlparser_bench/benches/sqlparser_bench.rs | 26 +++++++++++++++++++++- tests/sqlparser_common.rs | 25 +++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 46c201540..7f6bc2ff9 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -177,11 +177,35 @@ fn parse_compound_chain(c: &mut Criterion) { group.finish(); } +/// Benchmark parsing pathological compound chains with a reserved keyword in +/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to +/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended +/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at +/// every segment. +fn parse_compound_keyword_chain(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_compound_keyword_chain"); + let dialect = GenericDialect {}; + + for &n in &[5usize, 10, 15] { + let body = std::iter::repeat_n(".not-b", n).collect::(); + let sql = format!("SELECT x{body}"); + + group.bench_function(format!("chain_{n}"), |b| { + b.iter(|| { + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); + }); + }); + } + + group.finish(); +} + criterion_group!( benches, basic_queries, word_to_ident, parse_many_identifiers, - parse_compound_chain + parse_compound_chain, + parse_compound_keyword_chain ); criterion_main!(benches); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f470b93ca..0fe05f6e2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -19004,3 +19004,28 @@ fn parse_compound_chain_no_exponential_blowup() { rx.recv_timeout(Duration::from_secs(5)) .expect("parser should reject this quickly, not loop exponentially"); } + +/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on +/// chains like `x.not-b.not-b...`. The `NOT` keyword in field position drives +/// `parse_prefix` -> `parse_not` -> `parse_subexpr`, which re-walks the +/// remaining chain at every segment and doubles the work. Post-fix the parser +/// handles 25 segments in well under a millisecond, so the timeout is a hang +/// guard, not a perf threshold. +#[test] +fn parse_compound_keyword_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let body: String = std::iter::repeat_n(".not-b", 25).collect(); + let sql = format!("SELECT x{body}"); + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&GenericDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("parser should handle this quickly, not loop exponentially"); +} From a317cbc41a97620cfdcfb857993c08cabfc38227 Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Fri, 22 May 2026 15:14:25 +0200 Subject: [PATCH 2/2] Parser: fix exponential parse time on compound keyword chains --- src/parser/mod.rs | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 91ac386ae..a6420b3ae 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2036,20 +2036,35 @@ impl<'a> Parser<'a> { // recursive `parse_subexpr` would re-walk the rest of the chain at // every dot. _ => { - let expr = self.maybe_parse(|parser| { - let expr = parser.parse_prefix()?; - match &expr { - Expr::CompoundFieldAccess { .. } - | Expr::CompoundIdentifier(_) - | Expr::Identifier(_) - | Expr::Value(_) - | Expr::Function(_) => Ok(expr), - _ => parser.expected_ref( - "an identifier or value", - parser.peek_token_ref(), - ), - } - })?; + // For a plain `Word` field (not followed by `(`), skip the + // speculative `parse_prefix`. The only result the validator + // below would accept is `Identifier`, which `parse_identifier` + // in the None branch produces directly. This avoids 2^N work + // on chains like `.not-b.not-b...` where `parse_prefix` would + // descend into `parse_not` and re-walk the remaining chain at + // every segment. + let word_field_no_lparen = + matches!(self.peek_token_ref().token, Token::Word(_)) + && self.peek_nth_token_ref(1).token != Token::LParen; + + let expr = if word_field_no_lparen { + None + } else { + self.maybe_parse(|parser| { + let expr = parser.parse_prefix()?; + match &expr { + Expr::CompoundFieldAccess { .. } + | Expr::CompoundIdentifier(_) + | Expr::Identifier(_) + | Expr::Value(_) + | Expr::Function(_) => Ok(expr), + _ => parser.expected_ref( + "an identifier or value", + parser.peek_token_ref(), + ), + } + })? + }; match expr { // If we get back a compound field access or identifier,