Skip to content

Commit 5b67c14

Browse files
committed
Fix: apply window dim filters as outer WHERE in preagg path
Window dimension filters were only pushed into the model sub-query that owned the window dimension, leaving other models' metrics unfiltered. Now the window dim column is projected through the owning model's preagg CTE and the filter is applied on the outer preagg WHERE, constraining all models' results.
1 parent eb3f404 commit 5b67c14

2 files changed

Lines changed: 78 additions & 47 deletions

File tree

sidemantic/sql/generator.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,6 +1468,28 @@ def _generate_with_preaggregation(
14681468
all_filters, all_model_names
14691469
)
14701470

1471+
# Collect window dimension columns that need to be projected through
1472+
# preagg CTEs so the outer WHERE can reference them.
1473+
window_dim_extra_dims: dict[str, list[str]] = {}
1474+
for model_name, wf_list in window_dim_filters.items():
1475+
for f_expr in wf_list:
1476+
try:
1477+
parsed_f = sqlglot.parse_one(f_expr, dialect=self.dialect)
1478+
for col in parsed_f.find_all(exp.Column):
1479+
clean = (col.table or "").replace("_cte", "")
1480+
if clean in all_model_names:
1481+
model_obj = self.graph.get_model(clean)
1482+
if model_obj:
1483+
dim_obj = model_obj.get_dimension(col.name)
1484+
if dim_obj and dim_obj.window is not None:
1485+
dim_ref = f"{clean}.{col.name}"
1486+
if clean not in window_dim_extra_dims:
1487+
window_dim_extra_dims[clean] = []
1488+
if dim_ref not in window_dim_extra_dims[clean]:
1489+
window_dim_extra_dims[clean].append(dim_ref)
1490+
except Exception:
1491+
pass
1492+
14711493
# Generate a pre-aggregated CTE for each metric model
14721494
preagg_ctes = []
14731495
cte_names = []
@@ -1477,17 +1499,24 @@ def _generate_with_preaggregation(
14771499
cte_names.append(cte_name)
14781500

14791501
# Only pass filters relevant to this model's sub-query.
1480-
# Window-dim filters are included here (not in shared_filters) because
1481-
# preagg CTEs only project query dimensions/metrics, not window dims.
1482-
# The recursive generate() call handles them correctly in its outer WHERE.
1483-
model_filters = pushdown_by_model.get(model_name, []) + window_dim_filters.get(model_name, [])
1502+
# Window-dim filters are NOT pushed here: they are applied as outer
1503+
# WHERE on the final preagg join so that ALL models' metrics are
1504+
# constrained, not just the model that owns the window dimension.
1505+
model_filters = pushdown_by_model.get(model_name, [])
1506+
1507+
# Add extra window dim columns to the sub-query dimensions so they
1508+
# get projected through the preagg CTE for outer WHERE access.
1509+
sub_dimensions = list(dimensions)
1510+
for extra_dim in window_dim_extra_dims.get(model_name, []):
1511+
if extra_dim not in sub_dimensions:
1512+
sub_dimensions.append(extra_dim)
14841513

14851514
# Generate sub-query for this model's metrics at the dimension grain
14861515
# We call generate() recursively but it won't trigger pre-aggregation
14871516
# again because each sub-query has metrics from only one model
14881517
sub_query = self.generate(
14891518
metrics=model_metrics,
1490-
dimensions=dimensions,
1519+
dimensions=sub_dimensions,
14911520
filters=model_filters,
14921521
segments=None, # Already resolved
14931522
order_by=None,
@@ -1568,18 +1597,25 @@ def _generate_with_preaggregation(
15681597

15691598
final_query = f"SELECT\n {select_str}\nFROM {from_str}"
15701599

1571-
# Apply shared filters (cross-model or metric-level) on the outer query.
1600+
# Apply shared filters and window-dim filters on the outer query.
1601+
# Window-dim filters are applied here (not inside model sub-queries) so
1602+
# that ALL models' metrics are constrained by the filter, not just the
1603+
# model that owns the window dimension.
15721604
# Rewrite table references from model/model_cte to model_preagg using
15731605
# sqlglot AST rewriting so we don't accidentally mangle column names
15741606
# that happen to contain a model name as a substring.
1575-
if shared_filters:
1607+
all_outer_filters = list(shared_filters)
1608+
for wf_list in window_dim_filters.values():
1609+
all_outer_filters.extend(wf_list)
1610+
1611+
if all_outer_filters:
15761612
preagg_table_map = {}
15771613
for model_name in metrics_by_model:
15781614
preagg_table_map[model_name] = f"{model_name}_preagg"
15791615
preagg_table_map[f"{model_name}_cte"] = f"{model_name}_preagg"
15801616

15811617
rewritten = []
1582-
for f in shared_filters:
1618+
for f in all_outer_filters:
15831619
try:
15841620
parsed = sqlglot.parse_one(f, dialect=self.dialect)
15851621
for col in parsed.find_all(exp.Column):

tests/optimizations/test_predicate_pushdown.py

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -519,15 +519,13 @@ def test_window_dimension_filter_with_regular_filter(layer):
519519
pytest.main([__file__, "-v"])
520520

521521

522-
def test_window_dim_filter_excluded_from_preagg_shared_filters(layer):
523-
"""Test that window-dim filters do not become shared_filters in the preagg path.
522+
def test_window_dim_filter_applied_as_outer_where_in_preagg(layer):
523+
"""Test that window-dim filters are applied as outer WHERE in the preagg path.
524524
525525
When metrics come from multiple models (triggering pre-aggregation), a filter
526-
on a window dimension must NOT be applied as a shared filter on the outer
527-
query over preagg CTEs, because those CTEs only project query dimensions
528-
and metrics, not window dimension columns. Instead, the window-dim filter
529-
should be pushed into the relevant model's sub-query where the recursive
530-
generate() call can handle it via its own outer WHERE.
526+
on a window dimension must be applied as an outer WHERE on the final preagg
527+
join so that ALL models' metrics are constrained. The window dim column is
528+
projected through the owning model's preagg CTE to make it available.
531529
"""
532530
from sidemantic.core.model import Relationship
533531

@@ -590,28 +588,26 @@ def test_window_dim_filter_excluded_from_preagg_shared_filters(layer):
590588
assert "orders_preagg" in sql, "Should use pre-aggregation path"
591589
assert "order_items_preagg" in sql, "Should use pre-aggregation path"
592590

593-
# The window-dim filter must NOT appear in the outer WHERE referencing _preagg
594-
# because preagg CTEs don't project the window dimension column
595-
assert "orders_preagg.next_status" not in sql, (
596-
"Window-dim filter should NOT reference preagg CTE (column doesn't exist there)"
597-
)
598-
599-
# The filter should be pushed into the orders sub-query instead
600-
# (visible inside the orders_preagg CTE definition)
591+
# The window dim column should be projected in the orders preagg CTE
601592
preagg_start = sql.index("orders_preagg AS (")
602593
preagg_end = sql.index("order_items_preagg AS (")
603594
orders_subquery = sql[preagg_start:preagg_end]
604-
assert "next_status" in orders_subquery, "Window-dim filter should be pushed into orders sub-query"
595+
assert "next_status" in orders_subquery, "Window dim column should be projected in orders preagg CTE"
596+
597+
# The filter should appear in the outer WHERE referencing the preagg CTE,
598+
# so that BOTH models' metrics are constrained by the filter
599+
outer_query = sql[sql.rindex("SELECT") :]
600+
assert "orders_preagg.next_status" in outer_query, (
601+
"Window-dim filter should be in outer WHERE referencing preagg CTE"
602+
)
605603

606604

607-
def test_multi_model_window_dim_filter_excluded_from_preagg_shared_filters(layer):
608-
"""Test that multi-model window-dim filters do not become shared_filters in the preagg path.
605+
def test_multi_model_window_dim_filter_applied_as_outer_where_in_preagg(layer):
606+
"""Test that multi-model window-dim filters are applied as outer WHERE in preagg.
609607
610608
When a filter references columns from multiple models and at least one is a
611-
window dimension, it must NOT be applied as a shared filter on preagg CTEs
612-
(which only project query dimensions/metrics, not window columns). Instead it
613-
should be routed into window_dim_filters for the model that owns the window
614-
dimension, so the recursive generate() call can handle it.
609+
window dimension, the window dim column is projected through the preagg CTE
610+
and the filter is applied on the outer WHERE so both models are constrained.
615611
"""
616612
from sidemantic.core.model import Relationship
617613

@@ -676,25 +672,24 @@ def test_multi_model_window_dim_filter_excluded_from_preagg_shared_filters(layer
676672
assert "orders_preagg" in sql, "Should use pre-aggregation path"
677673
assert "order_items_preagg" in sql, "Should use pre-aggregation path"
678674

679-
# The multi-model window-dim filter must NOT appear in the outer WHERE
680-
# referencing _preagg CTEs, because preagg CTEs don't project window columns
681-
assert "orders_preagg.next_status" not in sql, "Multi-model window-dim filter should NOT reference preagg CTE"
682-
683-
# The filter should be pushed into the orders sub-query instead
675+
# The window dim column should be projected in the orders preagg CTE
684676
preagg_start = sql.index("orders_preagg AS (")
685677
preagg_end = sql.index("order_items_preagg AS (")
686678
orders_subquery = sql[preagg_start:preagg_end]
687-
assert "next_status" in orders_subquery, "Multi-model window-dim filter should be pushed into orders sub-query"
679+
assert "next_status" in orders_subquery, "Window dim column should be projected in orders preagg CTE"
680+
681+
# The filter should appear in the outer WHERE
682+
outer_query = sql[sql.rindex("SELECT") :]
683+
assert "orders_preagg.next_status" in outer_query, "Multi-model window-dim filter should be in outer WHERE"
688684

689685

690-
def test_mixed_metric_and_window_dim_filter_excluded_from_preagg_shared_filters(layer):
691-
"""Test that a filter referencing both a metric and a window dim goes to window_dim_filters.
686+
def test_mixed_metric_and_window_dim_filter_applied_as_outer_where_in_preagg(layer):
687+
"""Test that a filter referencing both a metric and a window dim is applied as outer WHERE.
692688
693689
When a filter like "orders.next_status = 'complete' OR orders.revenue > 100"
694690
references both a window dimension (next_status) and a metric (revenue), the
695-
window dim check must take priority over the metric check. Otherwise the filter
696-
lands in main_query_filters/shared_filters and gets rewritten onto preagg CTEs
697-
that don't project window dimension columns, causing a runtime failure.
691+
window dim check takes priority and the filter is applied on the outer preagg
692+
WHERE. The window dim column is projected through the preagg CTE.
698693
"""
699694
from sidemantic.core.model import Relationship
700695

@@ -757,12 +752,12 @@ def test_mixed_metric_and_window_dim_filter_excluded_from_preagg_shared_filters(
757752
assert "orders_preagg" in sql, "Should use pre-aggregation path"
758753
assert "order_items_preagg" in sql, "Should use pre-aggregation path"
759754

760-
# The mixed filter must NOT appear in the outer WHERE referencing _preagg
761-
# because preagg CTEs don't project window dimension columns
762-
assert "orders_preagg.next_status" not in sql, "Mixed metric/window-dim filter should NOT reference preagg CTE"
763-
764-
# The filter should be pushed into the orders sub-query instead
755+
# The window dim column should be projected in the orders preagg CTE
765756
preagg_start = sql.index("orders_preagg AS (")
766757
preagg_end = sql.index("order_items_preagg AS (")
767758
orders_subquery = sql[preagg_start:preagg_end]
768-
assert "next_status" in orders_subquery, "Mixed metric/window-dim filter should be pushed into orders sub-query"
759+
assert "next_status" in orders_subquery, "Window dim column should be projected in orders preagg CTE"
760+
761+
# The filter should be in the outer WHERE, constraining both models
762+
outer_query = sql[sql.rindex("SELECT") :]
763+
assert "orders_preagg.next_status" in outer_query, "Mixed metric/window-dim filter should be in outer WHERE"

0 commit comments

Comments
 (0)