Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion sqlparser_bench/benches/sqlparser_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,35 @@ fn parse_compound_chain(c: &mut Criterion) {
group.finish();
}

/// Benchmark parsing pathological compound chains with a reserved keyword in
/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to
/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended
/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at
/// every segment.
fn parse_compound_keyword_chain(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_compound_keyword_chain");
let dialect = GenericDialect {};

for &n in &[5usize, 10, 15] {
let body = std::iter::repeat_n(".not-b", n).collect::<String>();
let sql = format!("SELECT x{body}");

group.bench_function(format!("chain_{n}"), |b| {
b.iter(|| {
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
});
});
}

group.finish();
}

criterion_group!(
benches,
basic_queries,
word_to_ident,
parse_many_identifiers,
parse_compound_chain
parse_compound_chain,
parse_compound_keyword_chain
);
criterion_main!(benches);
43 changes: 29 additions & 14 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2036,20 +2036,35 @@ impl<'a> Parser<'a> {
// recursive `parse_subexpr` would re-walk the rest of the chain at
// every dot.
_ => {
let expr = self.maybe_parse(|parser| {
let expr = parser.parse_prefix()?;
match &expr {
Expr::CompoundFieldAccess { .. }
| Expr::CompoundIdentifier(_)
| Expr::Identifier(_)
| Expr::Value(_)
| Expr::Function(_) => Ok(expr),
_ => parser.expected_ref(
"an identifier or value",
parser.peek_token_ref(),
),
}
})?;
// For a plain `Word` field (not followed by `(`), skip the
// speculative `parse_prefix`. The only result the validator
// below would accept is `Identifier`, which `parse_identifier`
// in the None branch produces directly. This avoids 2^N work
// on chains like `.not-b.not-b...` where `parse_prefix` would
// descend into `parse_not` and re-walk the remaining chain at
// every segment.
let word_field_no_lparen =
matches!(self.peek_token_ref().token, Token::Word(_))
&& self.peek_nth_token_ref(1).token != Token::LParen;

let expr = if word_field_no_lparen {
None
} else {
self.maybe_parse(|parser| {
let expr = parser.parse_prefix()?;
match &expr {
Expr::CompoundFieldAccess { .. }
| Expr::CompoundIdentifier(_)
| Expr::Identifier(_)
| Expr::Value(_)
| Expr::Function(_) => Ok(expr),
_ => parser.expected_ref(
"an identifier or value",
parser.peek_token_ref(),
),
}
})?
};

match expr {
// If we get back a compound field access or identifier,
Expand Down
25 changes: 25 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19004,3 +19004,28 @@ fn parse_compound_chain_no_exponential_blowup() {
rx.recv_timeout(Duration::from_secs(5))
.expect("parser should reject this quickly, not loop exponentially");
}

/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on
/// chains like `x.not-b.not-b...`. The `NOT` keyword in field position drives
/// `parse_prefix` -> `parse_not` -> `parse_subexpr`, which re-walks the
/// remaining chain at every segment and doubles the work. Post-fix the parser
/// handles 25 segments in well under a millisecond, so the timeout is a hang
/// guard, not a perf threshold.
#[test]
fn parse_compound_keyword_chain_no_exponential_blowup() {
use std::sync::mpsc;
use std::thread;
use std::time::Duration;

let body: String = std::iter::repeat_n(".not-b", 25).collect();
let sql = format!("SELECT x{body}");

let (tx, rx) = mpsc::channel();
thread::spawn(move || {
let _ = Parser::parse_sql(&GenericDialect {}, &sql);
let _ = tx.send(());
});

rx.recv_timeout(Duration::from_secs(5))
.expect("parser should handle this quickly, not loop exponentially");
}
Loading