Skip to content

Commit e7d5fc2

Browse files
committed
Fix: route window dimension filters to outer query
Window function dimensions (LEAD, LAG, etc.) are computed in the CTE SELECT but haven't been evaluated at CTE WHERE time. The filter pushdown logic now detects dimensions with window != None and keeps their filters in the outer query. Also ensures columns referenced by outer-query filters are included in CTE SELECT lists.
1 parent fff2625 commit e7d5fc2

2 files changed

Lines changed: 159 additions & 0 deletions

File tree

sidemantic/sql/generator.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,25 @@ def metric_needs_window(m):
411411
# Extract columns needed for metric-level filters (before building CTEs)
412412
metric_filter_cols_by_model = self._extract_metric_filter_columns(metrics)
413413

414+
# Ensure dimensions referenced in outer-query filters (e.g. window dims)
415+
# are included in the relevant CTE SELECT lists.
416+
for filter_expr in main_query_filters:
417+
try:
418+
parsed = sqlglot.parse_one(filter_expr, dialect=self.dialect)
419+
for column in parsed.find_all(exp.Column):
420+
if column.table:
421+
model_name = column.table.replace("_cte", "")
422+
if model_name in all_models:
423+
if model_name not in metric_filter_cols_by_model:
424+
metric_filter_cols_by_model[model_name] = set()
425+
metric_filter_cols_by_model[model_name].add(column.name)
426+
except Exception:
427+
logging.debug(
428+
"Failed to parse outer filter for column extraction: %s",
429+
filter_expr,
430+
exc_info=True,
431+
)
432+
414433
# Build CTEs for all models with pushed-down filters
415434
cte_sqls = []
416435
for model_name in all_models:
@@ -681,6 +700,7 @@ def _classify_filters_for_pushdown(
681700
# Find all table references in the filter
682701
referenced_models = set()
683702
references_metric = False
703+
references_window_dim = False
684704

685705
for column in parsed.find_all(exp.Column):
686706
table_name = column.table
@@ -697,10 +717,20 @@ def _classify_filters_for_pushdown(
697717
if model and model.get_metric(column_name):
698718
references_metric = True
699719

720+
# Check if this column is a window dimension
721+
if model:
722+
dim = model.get_dimension(column_name)
723+
if dim and dim.window is not None:
724+
references_window_dim = True
725+
700726
# Filters that reference metrics must stay in main query (can't push down)
701727
# because metrics don't exist in CTEs (only _raw columns)
702728
if references_metric:
703729
main_query_filters.append(filter_expr)
730+
# Filters on window dimensions must stay in outer query because the
731+
# window function hasn't been evaluated yet in the CTE WHERE context
732+
elif references_window_dim:
733+
main_query_filters.append(filter_expr)
704734
# If filter references exactly one model and no metrics, push it down
705735
elif len(referenced_models) == 1:
706736
model = list(referenced_models)[0]

tests/optimizations/test_predicate_pushdown.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,135 @@ def test_metric_level_filters_not_pushed(layer):
384384
assert "completed" in case_sql
385385

386386

387+
def test_window_dimension_filter_not_pushed_down(layer):
388+
"""Test that filters on window dimensions stay in the outer query.
389+
390+
Window functions (LEAD, LAG, ROW_NUMBER, etc.) are computed in the CTE
391+
SELECT but haven't been evaluated yet at WHERE-clause time, so pushing
392+
a filter on a window dimension into the CTE WHERE would produce invalid
393+
SQL. The filter must be applied in the outer query instead.
394+
"""
395+
model = Model(
396+
name="events",
397+
table="events_table",
398+
primary_key="event_id",
399+
dimensions=[
400+
Dimension(name="person_id", type="categorical", sql="person_id"),
401+
Dimension(name="event_type", type="categorical", sql="event_type"),
402+
Dimension(
403+
name="next_event",
404+
type="categorical",
405+
sql="event_type",
406+
window="LEAD(event_type) OVER (PARTITION BY person_id ORDER BY created_at)",
407+
),
408+
],
409+
metrics=[
410+
Metric(name="event_count", agg="count"),
411+
],
412+
)
413+
414+
layer.add_model(model)
415+
416+
sql = layer.compile(
417+
metrics=["events.event_count"],
418+
dimensions=["events.event_type"],
419+
filters=["events.next_event = 'purchase'"],
420+
)
421+
422+
parsed = sqlglot.parse_one(sql)
423+
424+
# Find the CTE
425+
cte = None
426+
for cte_def in parsed.find_all(exp.CTE):
427+
if cte_def.alias == "events_cte":
428+
cte = cte_def
429+
break
430+
431+
assert cte is not None, "CTE not found"
432+
433+
# CTE should NOT have a WHERE clause for the window dimension filter
434+
cte_where = cte.this.find(exp.Where)
435+
if cte_where is not None:
436+
cte_where_sql = cte_where.sql()
437+
assert "next_event" not in cte_where_sql, "Window dimension filter should NOT be in CTE WHERE clause"
438+
assert "purchase" not in cte_where_sql, "Window dimension filter value should NOT be in CTE WHERE clause"
439+
440+
# The window expression should still appear in the CTE SELECT
441+
cte_sql = cte.sql()
442+
assert "LEAD" in cte_sql.upper(), "Window function should appear in CTE SELECT"
443+
444+
# The filter should appear in the outer query WHERE
445+
outer_where = parsed.find(exp.Where)
446+
assert outer_where is not None, "Filter should be in outer query WHERE"
447+
outer_where_sql = outer_where.sql()
448+
assert "next_event" in outer_where_sql or "purchase" in outer_where_sql, (
449+
"Window dimension filter should appear in outer query"
450+
)
451+
452+
453+
def test_window_dimension_filter_with_regular_filter(layer):
454+
"""Test mixed filters: regular filter pushed down, window filter stays outer."""
455+
model = Model(
456+
name="events",
457+
table="events_table",
458+
primary_key="event_id",
459+
dimensions=[
460+
Dimension(name="person_id", type="categorical", sql="person_id"),
461+
Dimension(name="event_type", type="categorical", sql="event_type"),
462+
Dimension(
463+
name="next_event",
464+
type="categorical",
465+
sql="event_type",
466+
window="LEAD(event_type) OVER (PARTITION BY person_id ORDER BY created_at)",
467+
),
468+
],
469+
metrics=[
470+
Metric(name="event_count", agg="count"),
471+
],
472+
)
473+
474+
layer.add_model(model)
475+
476+
sql = layer.compile(
477+
metrics=["events.event_count"],
478+
dimensions=["events.event_type"],
479+
filters=[
480+
"events.event_type = 'click'",
481+
"events.next_event = 'purchase'",
482+
],
483+
)
484+
485+
parsed = sqlglot.parse_one(sql)
486+
487+
# Find the CTE
488+
cte = None
489+
for cte_def in parsed.find_all(exp.CTE):
490+
if cte_def.alias == "events_cte":
491+
cte = cte_def
492+
break
493+
494+
assert cte is not None, "CTE not found"
495+
496+
# Regular filter (event_type) should be pushed into CTE WHERE
497+
cte_where = cte.this.find(exp.Where)
498+
assert cte_where is not None, "Regular filter should be pushed into CTE WHERE"
499+
cte_where_sql = cte_where.sql()
500+
assert "event_type" in cte_where_sql, "Regular filter should be in CTE WHERE"
501+
assert "click" in cte_where_sql, "Regular filter value should be in CTE WHERE"
502+
503+
# Window filter (next_event) should NOT be in CTE WHERE
504+
assert "next_event" not in cte_where_sql, "Window dimension filter should NOT be in CTE WHERE"
505+
assert "purchase" not in cte_where_sql, "Window dimension filter value should NOT be in CTE WHERE"
506+
507+
# Window filter should be in outer query WHERE
508+
outer_where = parsed.find(exp.Where)
509+
assert outer_where is not None, "Window filter should be in outer query"
510+
outer_where_sql = outer_where.sql()
511+
assert "next_event" in outer_where_sql or "purchase" in outer_where_sql, (
512+
"Window dimension filter should appear in outer query WHERE"
513+
)
514+
515+
387516
if __name__ == "__main__":
388517
import pytest
389518

0 commit comments

Comments
 (0)