Skip to content

Commit 60bf5b0

Browse files
authored
Align explain preagg routing with runtime (#95)
1 parent a5e90db commit 60bf5b0

2 files changed

Lines changed: 86 additions & 24 deletions

File tree

sidemantic/core/semantic_layer.py

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,8 @@ def explain(
558558

559559
metrics = metrics or []
560560
dimensions = dimensions or []
561+
filters = list(filters) if filters else []
562+
segments = segments or []
561563

562564
# Compile the actual SQL (respects use_preaggregations setting)
563565
sql = self.compile(
@@ -576,21 +578,15 @@ def explain(
576578

577579
use_preaggs = use_preaggregations if use_preaggregations is not None else self.use_preaggregations
578580

579-
# Extract model names from metric/dimension/filter references
580-
model_names = set()
581-
for ref in list(metrics) + list(dimensions):
582-
if "." in ref:
583-
model_name = ref.split(".", 1)[0]
584-
if model_name:
585-
model_names.add(model_name)
586-
587-
# Also extract model names from filters (e.g., "customers.status = 'vip'")
588-
import re
589-
590-
for f in filters or []:
591-
# Match "model.column" patterns before operators
592-
for match in re.finditer(r"(\w+)\.(\w+)\s*[=<>!]", f):
593-
model_names.add(match.group(1))
581+
generator = SQLGenerator(
582+
self.graph,
583+
dialect=dialect or self.dialect,
584+
preagg_database=self.preagg_database,
585+
preagg_schema=self.preagg_schema,
586+
)
587+
segment_filters = generator._resolve_segments(segments)
588+
all_filters = filters + segment_filters
589+
model_names = generator._find_required_models(metrics, dimensions, all_filters)
594590

595591
# Strip model prefixes from metrics and dimensions for matcher
596592
bare_metrics = []
@@ -610,18 +606,17 @@ def explain(
610606
bare_dims.append(dim_name)
611607

612608
bare_filters = []
613-
if filters:
614-
for f in filters:
615-
# Strip any model prefix from filters
616-
for mn in model_names:
617-
f = f.replace(f"{mn}.", "")
618-
bare_filters.append(f)
609+
for f in all_filters:
610+
for mn in model_names:
611+
f = f.replace(f"{mn}.", "")
612+
f = f.replace(f"{mn}_cte.", "")
613+
bare_filters.append(f)
619614

620615
# Check preconditions for preagg routing
621616
if not use_preaggs:
622617
return QueryPlan(
623618
sql=sql,
624-
model=next(iter(model_names), None),
619+
model=model_names[0] if model_names else None,
625620
metrics=bare_metrics,
626621
dimensions=bare_dims,
627622
used_preaggregation=False,
@@ -641,14 +636,14 @@ def explain(
641636
if ungrouped:
642637
return QueryPlan(
643638
sql=sql,
644-
model=next(iter(model_names)),
639+
model=model_names[0],
645640
metrics=bare_metrics,
646641
dimensions=bare_dims,
647642
used_preaggregation=False,
648643
routing_reason="ungrouped query, preaggs require aggregation",
649644
)
650645

651-
model_name = next(iter(model_names))
646+
model_name = model_names[0]
652647
try:
653648
model = self.get_model(model_name)
654649
except KeyError:

tests/test_preagg_explain.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,70 @@ def test_explain_cross_model_filter_detected(self):
258258

259259
assert plan.used_preaggregation is False
260260
assert "multi-model" in plan.routing_reason
261+
262+
def test_explain_cross_model_filter_with_in_detected(self):
263+
"""IN filters referencing another model should trigger multi-model detection."""
264+
layer = make_layer_with_preaggs()
265+
266+
from sidemantic import Relationship
267+
268+
customers = Model(
269+
name="customers",
270+
table="customers",
271+
primary_key="customer_id",
272+
dimensions=[
273+
Dimension(name="status", type="categorical", sql="status"),
274+
],
275+
metrics=[Metric(name="customer_count", agg="count")],
276+
)
277+
layer.add_model(customers)
278+
279+
events = layer.get_model("events")
280+
events.relationships = [
281+
Relationship(name="customers", type="many_to_one", foreign_key="customer_id"),
282+
]
283+
284+
plan = layer.explain(
285+
metrics=["events.event_count"],
286+
dimensions=["events.event_type"],
287+
filters=["customers.status IN ('vip')"],
288+
)
289+
290+
assert plan.used_preaggregation is False
291+
assert "multi-model" in plan.routing_reason
292+
293+
def test_explain_includes_segment_filters_for_preagg_matching(self):
294+
"""Segment predicates should be considered when evaluating pre-agg eligibility."""
295+
from sidemantic import Segment
296+
297+
layer = SemanticLayer(use_preaggregations=True)
298+
layer.add_model(
299+
Model(
300+
name="events",
301+
table="events",
302+
primary_key="event_id",
303+
dimensions=[
304+
Dimension(name="event_type", type="categorical", sql="event_type"),
305+
Dimension(name="status", type="categorical", sql="status"),
306+
],
307+
metrics=[Metric(name="event_count", agg="count")],
308+
segments=[Segment(name="active_only", sql="{model}.status = 'active'")],
309+
pre_aggregations=[
310+
PreAggregation(
311+
name="by_type",
312+
measures=["event_count"],
313+
dimensions=["event_type"],
314+
)
315+
],
316+
)
317+
)
318+
319+
plan = layer.explain(
320+
metrics=["events.event_count"],
321+
dimensions=["events.event_type"],
322+
segments=["events.active_only"],
323+
)
324+
325+
assert plan.used_preaggregation is False
326+
assert plan.selected_preagg is None
327+
assert "no pre-aggregation matched" in plan.routing_reason

0 commit comments

Comments
 (0)