Skip to content

Commit 8102a9d

Browse files
committed
Fix: exclude window-dim filters from preagg shared filters
1 parent e7d5fc2 commit 8102a9d

2 files changed

Lines changed: 117 additions & 10 deletions

File tree

sidemantic/sql/generator.py

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,13 @@ def metric_needs_window(m):
389389
pass
390390

391391
# Classify filters for pushdown optimization
392-
pushdown_filters, main_query_filters = self._classify_filters_for_pushdown(filters or [], all_models)
392+
pushdown_filters, main_query_filters, window_dim_filters = self._classify_filters_for_pushdown(
393+
filters or [], all_models
394+
)
395+
# In the standard (non-preagg) path, window-dim filters belong in the
396+
# outer query WHERE, so merge them into main_query_filters.
397+
for wf_list in window_dim_filters.values():
398+
main_query_filters.extend(wf_list)
393399

394400
# Determine which models have filters (for join type decision)
395401
models_with_filters = set()
@@ -657,7 +663,7 @@ def collect_models_from_metric(metric_ref: str):
657663

658664
def _classify_filters_for_pushdown(
659665
self, filters: list[str], all_models: set[str]
660-
) -> tuple[dict[str, list[str]], list[str]]:
666+
) -> tuple[dict[str, list[str]], list[str], dict[str, list[str]]]:
661667
"""Classify filters into those that can be pushed down vs those that must stay in main query.
662668
663669
Compound AND expressions are split into individual conjuncts first so that
@@ -670,12 +676,17 @@ def _classify_filters_for_pushdown(
670676
all_models: Set of all model names in the query
671677
672678
Returns:
673-
Tuple of (pushdown_filters_by_model, main_query_filters)
679+
Tuple of (pushdown_filters_by_model, main_query_filters, window_dim_filters_by_model)
674680
- pushdown_filters_by_model: Dict mapping model name to list of filters for that model
675-
- main_query_filters: Filters that reference multiple models (can't push down)
681+
- main_query_filters: Filters that reference multiple models or metrics (can't push down)
682+
- window_dim_filters_by_model: Dict mapping model name to filters on window dimensions.
683+
These can't be pushed into CTE WHERE (window not yet evaluated) but reference a
684+
single model. In the standard path they belong in the outer query; in the preagg
685+
path they are pushed into each model's sub-query (which has its own outer WHERE).
676686
"""
677687
pushdown_filters = {model: [] for model in all_models}
678688
main_query_filters = []
689+
window_dim_filters: dict[str, list[str]] = {model: [] for model in all_models}
679690

680691
# Flatten compound AND expressions into individual conjuncts so each
681692
# part can be classified independently.
@@ -727,9 +738,15 @@ def _classify_filters_for_pushdown(
727738
# because metrics don't exist in CTEs (only _raw columns)
728739
if references_metric:
729740
main_query_filters.append(filter_expr)
730-
# Filters on window dimensions must stay in outer query because the
731-
# window function hasn't been evaluated yet in the CTE WHERE context
741+
# Single-model window-dim filters: kept separate so callers can
742+
# handle them appropriately. In the standard path they go to the
743+
# outer WHERE; in the preagg path they are pushed into each model's
744+
# sub-query (which has its own outer WHERE after window evaluation).
745+
elif references_window_dim and len(referenced_models) == 1:
746+
model_name = list(referenced_models)[0]
747+
window_dim_filters[model_name].append(filter_expr)
732748
elif references_window_dim:
749+
# Window-dim filter spanning multiple models: outer query
733750
main_query_filters.append(filter_expr)
734751
# If filter references exactly one model and no metrics, push it down
735752
elif len(referenced_models) == 1:
@@ -739,7 +756,7 @@ def _classify_filters_for_pushdown(
739756
# Filter references multiple models or no models - keep in main query
740757
main_query_filters.append(filter_expr)
741758

742-
return pushdown_filters, main_query_filters
759+
return pushdown_filters, main_query_filters, window_dim_filters
743760

744761
def _extract_metric_filter_columns(self, metrics: list[str]) -> dict[str, set[str]]:
745762
"""Extract columns referenced in metric-level filters and SQL expressions.
@@ -1439,7 +1456,9 @@ def _generate_with_preaggregation(
14391456
# Cross-model filters (referencing models outside the sub-query) would
14401457
# produce invalid SQL referencing CTEs that don't exist.
14411458
all_model_names = set(metrics_by_model.keys())
1442-
pushdown_by_model, shared_filters = self._classify_filters_for_pushdown(all_filters, all_model_names)
1459+
pushdown_by_model, shared_filters, window_dim_filters = self._classify_filters_for_pushdown(
1460+
all_filters, all_model_names
1461+
)
14431462

14441463
# Generate a pre-aggregated CTE for each metric model
14451464
preagg_ctes = []
@@ -1449,8 +1468,11 @@ def _generate_with_preaggregation(
14491468
cte_name = f"{model_name}_preagg"
14501469
cte_names.append(cte_name)
14511470

1452-
# Only pass filters relevant to this model's sub-query
1453-
model_filters = pushdown_by_model.get(model_name, [])
1471+
# Only pass filters relevant to this model's sub-query.
1472+
# Window-dim filters are included here (not in shared_filters) because
1473+
# preagg CTEs only project query dimensions/metrics, not window dims.
1474+
# The recursive generate() call handles them correctly in its outer WHERE.
1475+
model_filters = pushdown_by_model.get(model_name, []) + window_dim_filters.get(model_name, [])
14541476

14551477
# Generate sub-query for this model's metrics at the dimension grain
14561478
# We call generate() recursively but it won't trigger pre-aggregation

tests/optimizations/test_predicate_pushdown.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,3 +517,88 @@ def test_window_dimension_filter_with_regular_filter(layer):
517517
import pytest
518518

519519
pytest.main([__file__, "-v"])
520+
521+
522+
def test_window_dim_filter_excluded_from_preagg_shared_filters(layer):
523+
"""Test that window-dim filters do not become shared_filters in the preagg path.
524+
525+
When metrics come from multiple models (triggering pre-aggregation), a filter
526+
on a window dimension must NOT be applied as a shared filter on the outer
527+
query over preagg CTEs, because those CTEs only project query dimensions
528+
and metrics, not window dimension columns. Instead, the window-dim filter
529+
should be pushed into the relevant model's sub-query where the recursive
530+
generate() call can handle it via its own outer WHERE.
531+
"""
532+
from sidemantic.core.model import Relationship
533+
534+
orders = Model(
535+
name="orders",
536+
table="orders_table",
537+
primary_key="order_id",
538+
dimensions=[
539+
Dimension(name="status", type="categorical", sql="status"),
540+
Dimension(
541+
name="next_status",
542+
type="categorical",
543+
sql="status",
544+
window="LEAD(status) OVER (PARTITION BY customer_id ORDER BY created_at)",
545+
),
546+
Dimension(name="order_date", type="time", sql="order_date", granularity="day"),
547+
],
548+
metrics=[
549+
Metric(name="revenue", agg="sum", sql="amount"),
550+
],
551+
relationships=[
552+
Relationship(
553+
name="order_items",
554+
type="one_to_many",
555+
sql="order_id",
556+
foreign_key="order_id",
557+
),
558+
],
559+
)
560+
561+
order_items = Model(
562+
name="order_items",
563+
table="order_items_table",
564+
primary_key="item_id",
565+
dimensions=[],
566+
metrics=[
567+
Metric(name="quantity", agg="sum", sql="qty"),
568+
],
569+
relationships=[
570+
Relationship(
571+
name="orders",
572+
type="many_to_one",
573+
foreign_key="order_id",
574+
primary_key="order_id",
575+
),
576+
],
577+
)
578+
579+
layer.add_model(orders)
580+
layer.add_model(order_items)
581+
582+
# This query spans two models, triggering preagg, and filters on a window dim
583+
sql = layer.compile(
584+
metrics=["orders.revenue", "order_items.quantity"],
585+
dimensions=["orders.order_date"],
586+
filters=["orders.next_status = 'complete'"],
587+
)
588+
589+
# The SQL should use pre-aggregation (two model CTEs joined together)
590+
assert "orders_preagg" in sql, "Should use pre-aggregation path"
591+
assert "order_items_preagg" in sql, "Should use pre-aggregation path"
592+
593+
# The window-dim filter must NOT appear in the outer WHERE referencing _preagg
594+
# because preagg CTEs don't project the window dimension column
595+
assert "orders_preagg.next_status" not in sql, (
596+
"Window-dim filter should NOT reference preagg CTE (column doesn't exist there)"
597+
)
598+
599+
# The filter should be pushed into the orders sub-query instead
600+
# (visible inside the orders_preagg CTE definition)
601+
preagg_start = sql.index("orders_preagg AS (")
602+
preagg_end = sql.index("order_items_preagg AS (")
603+
orders_subquery = sql[preagg_start:preagg_end]
604+
assert "next_status" in orders_subquery, "Window-dim filter should be pushed into orders sub-query"

0 commit comments

Comments
 (0)