Skip to content

Commit 3af6d80

Browse files
committed
Fix: route multi-model window filters away from preagg shared WHERE
1 parent 54caa53 commit 3af6d80

2 files changed

Lines changed: 101 additions & 16 deletions

File tree

sidemantic/sql/generator.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -679,10 +679,11 @@ def _classify_filters_for_pushdown(
679679
Tuple of (pushdown_filters_by_model, main_query_filters, window_dim_filters_by_model)
680680
- pushdown_filters_by_model: Dict mapping model name to list of filters for that model
681681
- main_query_filters: Filters that reference multiple models or metrics (can't push down)
682-
- window_dim_filters_by_model: Dict mapping model name to filters on window dimensions.
683-
These can't be pushed into CTE WHERE (window not yet evaluated) but reference a
684-
single model. In the standard path they belong in the outer query; in the preagg
685-
path they are pushed into each model's sub-query (which has its own outer WHERE).
682+
- window_dim_filters_by_model: Dict mapping model name to filters that reference
683+
a window dimension on that model. These can't be pushed into CTE WHERE (window
684+
not yet evaluated). In the standard path they belong in the outer query; in the
685+
preagg path they are pushed into each model's sub-query (which has its own outer
686+
WHERE). Multi-model filters are keyed by the model that owns the window dim.
686687
"""
687688
pushdown_filters = {model: [] for model in all_models}
688689
main_query_filters = []
@@ -711,7 +712,7 @@ def _classify_filters_for_pushdown(
711712
# Find all table references in the filter
712713
referenced_models = set()
713714
references_metric = False
714-
references_window_dim = False
715+
window_dim_models: set[str] = set()
715716

716717
for column in parsed.find_all(exp.Column):
717718
table_name = column.table
@@ -732,22 +733,23 @@ def _classify_filters_for_pushdown(
732733
if model:
733734
dim = model.get_dimension(column_name)
734735
if dim and dim.window is not None:
735-
references_window_dim = True
736+
window_dim_models.add(clean_name)
736737

737738
# Filters that reference metrics must stay in main query (can't push down)
738739
# because metrics don't exist in CTEs (only _raw columns)
739740
if references_metric:
740741
main_query_filters.append(filter_expr)
741-
# Single-model window-dim filters: kept separate so callers can
742-
# handle them appropriately. In the standard path they go to the
743-
# outer WHERE; in the preagg path they are pushed into each model's
744-
# sub-query (which has its own outer WHERE after window evaluation).
745-
elif references_window_dim and len(referenced_models) == 1:
746-
model_name = list(referenced_models)[0]
747-
window_dim_filters[model_name].append(filter_expr)
748-
elif references_window_dim:
749-
# Window-dim filter spanning multiple models: outer query
750-
main_query_filters.append(filter_expr)
742+
# Window-dim filters: kept separate so callers can handle them
743+
# appropriately. In the standard path they go to the outer WHERE;
744+
# in the preagg path they are pushed into each model's sub-query
745+
# (which has its own outer WHERE after window evaluation).
746+
# For multi-model filters, route to each model that owns a window
747+
# dim column so the preagg path includes them in the correct
748+
# sub-queries (the recursive generate() call will join in any
749+
# additional models referenced by the filter).
750+
elif window_dim_models:
751+
for wdm in window_dim_models:
752+
window_dim_filters[wdm].append(filter_expr)
751753
# If filter references exactly one model and no metrics, push it down
752754
elif len(referenced_models) == 1:
753755
model = list(referenced_models)[0]

tests/optimizations/test_predicate_pushdown.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,3 +602,86 @@ def test_window_dim_filter_excluded_from_preagg_shared_filters(layer):
602602
preagg_end = sql.index("order_items_preagg AS (")
603603
orders_subquery = sql[preagg_start:preagg_end]
604604
assert "next_status" in orders_subquery, "Window-dim filter should be pushed into orders sub-query"
605+
606+
607+
def test_multi_model_window_dim_filter_excluded_from_preagg_shared_filters(layer):
608+
"""Test that multi-model window-dim filters do not become shared_filters in the preagg path.
609+
610+
When a filter references columns from multiple models and at least one is a
611+
window dimension, it must NOT be applied as a shared filter on preagg CTEs
612+
(which only project query dimensions/metrics, not window columns). Instead it
613+
should be routed into window_dim_filters for the model that owns the window
614+
dimension, so the recursive generate() call can handle it.
615+
"""
616+
from sidemantic.core.model import Relationship
617+
618+
orders = Model(
619+
name="orders",
620+
table="orders_table",
621+
primary_key="order_id",
622+
dimensions=[
623+
Dimension(name="status", type="categorical", sql="status"),
624+
Dimension(
625+
name="next_status",
626+
type="categorical",
627+
sql="status",
628+
window="LEAD(status) OVER (PARTITION BY customer_id ORDER BY created_at)",
629+
),
630+
Dimension(name="order_date", type="time", sql="order_date", granularity="day"),
631+
],
632+
metrics=[
633+
Metric(name="revenue", agg="sum", sql="amount"),
634+
],
635+
relationships=[
636+
Relationship(
637+
name="order_items",
638+
type="one_to_many",
639+
sql="order_id",
640+
foreign_key="order_id",
641+
),
642+
],
643+
)
644+
645+
order_items = Model(
646+
name="order_items",
647+
table="order_items_table",
648+
primary_key="item_id",
649+
dimensions=[
650+
Dimension(name="item_status", type="categorical", sql="item_status"),
651+
],
652+
metrics=[
653+
Metric(name="quantity", agg="sum", sql="qty"),
654+
],
655+
relationships=[
656+
Relationship(
657+
name="orders",
658+
type="many_to_one",
659+
foreign_key="order_id",
660+
primary_key="order_id",
661+
),
662+
],
663+
)
664+
665+
layer.add_model(orders)
666+
layer.add_model(order_items)
667+
668+
# Multi-model filter: window dim from orders + regular dim from order_items
669+
sql = layer.compile(
670+
metrics=["orders.revenue", "order_items.quantity"],
671+
dimensions=["orders.order_date"],
672+
filters=["orders.next_status = order_items.item_status"],
673+
)
674+
675+
# The SQL should use pre-aggregation (two model CTEs joined together)
676+
assert "orders_preagg" in sql, "Should use pre-aggregation path"
677+
assert "order_items_preagg" in sql, "Should use pre-aggregation path"
678+
679+
# The multi-model window-dim filter must NOT appear in the outer WHERE
680+
# referencing _preagg CTEs, because preagg CTEs don't project window columns
681+
assert "orders_preagg.next_status" not in sql, "Multi-model window-dim filter should NOT reference preagg CTE"
682+
683+
# The filter should be pushed into the orders sub-query instead
684+
preagg_start = sql.index("orders_preagg AS (")
685+
preagg_end = sql.index("order_items_preagg AS (")
686+
orders_subquery = sql[preagg_start:preagg_end]
687+
assert "next_status" in orders_subquery, "Multi-model window-dim filter should be pushed into orders sub-query"

0 commit comments

Comments
 (0)