Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions sidemantic/adapters/sidemantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ def _parse_model(self, model_def: dict) -> Model | None:
sql=measure_def.get("sql") or measure_def.get("expr"),
type=measure_def.get("type"),
filters=measure_def.get("filters"),
having=measure_def.get("having"),
fill_nulls_with=measure_def.get("fill_nulls_with"),
description=measure_def.get("description"),
label=measure_def.get("label"),
Expand Down Expand Up @@ -419,6 +420,7 @@ def _parse_metric(self, metric_def: dict) -> Metric | None:
window_frame=metric_def.get("window_frame"),
window_order=metric_def.get("window_order"),
filters=metric_def.get("filters"),
having=metric_def.get("having"),
fill_nulls_with=metric_def.get("fill_nulls_with"),
format=metric_def.get("format"),
value_format_name=metric_def.get("value_format_name"),
Expand Down Expand Up @@ -540,6 +542,8 @@ def _export_model(self, model: Model) -> dict:
measure_def["sql"] = measure.sql
if measure.filters:
measure_def["filters"] = measure.filters
if measure.having:
measure_def["having"] = measure.having
if measure.description:
measure_def["description"] = measure.description
if measure.label:
Expand Down Expand Up @@ -666,5 +670,7 @@ def _export_metric(self, measure: Metric, graph) -> dict:
result["window"] = measure.window
if measure.filters:
result["filters"] = measure.filters
if measure.having:
result["having"] = measure.having

return result
4 changes: 4 additions & 0 deletions sidemantic/core/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,10 @@ def validate_type_specific_fields(self):

# Common parameters
filters: list[str] | None = Field(None, description="Optional WHERE clause filters")
having: list[str] | None = Field(
None,
description="Optional HAVING clause filters applied after GROUP BY (e.g., 'count(distinct platform) > 1')",
)
fill_nulls_with: int | float | str | None = Field(None, description="Default value when result is NULL")
description: str | None = Field(None, description="Human-readable description")
label: str | None = Field(None, description="Display label")
Expand Down
36 changes: 36 additions & 0 deletions sidemantic/sql/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,9 @@ def extract_from_measure_ref(metric_ref: str):
# Check the measure's own filters
if measure.filters:
add_filter_columns(model_name, measure.filters)
# Check the measure's having clauses (columns in aggregate expressions)
if measure.having:
add_filter_columns(model_name, measure.having)
# If measure is a ratio/derived, recursively check dependencies
if measure.type == "ratio":
if measure.numerator:
Expand Down Expand Up @@ -812,6 +815,15 @@ def extract_from_metric(metric):
add_filter_columns(dep_model_name, metric.filters)
break

# Extract from metric's having clauses
if metric.having:
deps = metric.get_dependencies(self.graph)
for dep in deps:
if "." in dep:
dep_model_name = dep.split(".")[0]
add_filter_columns(dep_model_name, metric.having)
break

# For ratio metrics, check numerator and denominator
if metric.type == "ratio":
if metric.numerator:
Expand Down Expand Up @@ -1793,6 +1805,30 @@ def _build_main_select(
else:
where_filters.append(filter_expr)

# Collect metric-level having clauses (Metric.having) and add to HAVING filters.
# These are post-aggregation conditions defined on the metric itself, e.g.,
# having: ["count(distinct platform) > 1"] for cross-platform user metrics.
for metric_ref in metrics:
metric_obj = None
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Propagate metric HAVING from metric dependencies

This loop only inspects the top-level metrics argument, so Metric.having is applied only when that metric is queried directly. If a derived/ratio metric references another metric that defines having, _build_metric_sql inlines the dependency’s aggregation but this code never adds the dependency HAVING clause, which changes results by keeping groups that should be filtered out. A query like derived = 2 * base_metric will ignore base_metric.having unless base_metric is also selected explicitly.

Useful? React with 👍 / 👎.

metric_model_name = None
if "." in metric_ref:
metric_model_name, measure_name = metric_ref.split(".", 1)
model_obj = self.graph.get_model(metric_model_name)
if model_obj:
metric_obj = model_obj.get_metric(measure_name)
else:
try:
metric_obj = self.graph.get_metric(metric_ref)
except KeyError:
pass
if metric_obj and metric_obj.having:
for having_expr in metric_obj.having:
# Replace {model} placeholder with CTE reference
resolved = having_expr
if metric_model_name:
resolved = resolved.replace("{model}", f"{metric_model_name}_cte")
having_filters.append(resolved)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Skip metric HAVING clauses in ungrouped mode

Metric-level HAVING expressions are appended unconditionally here, but ungrouped queries intentionally skip GROUP BY and project raw *_raw columns. In that mode, adding aggregate HAVING predicates (the primary use case for this feature) produces invalid SQL on standard engines because the select list is not grouped/aggregated. This should be gated behind not ungrouped (or rejected during validation) to avoid generating non-executable queries.

Useful? React with 👍 / 👎.


# Add WHERE clause (dimension filters only - metric-level filters are in CASE WHEN)
if where_filters:
# Parse filters to add table aliases and handle measure vs dimension columns
Expand Down
180 changes: 180 additions & 0 deletions tests/metrics/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,183 @@ def test_having_filter_with_actual_data(layer):
revenues = {row[0]: row[1] for row in rows}
assert revenues["US"] == 225.0
assert revenues["EU"] == 375.0


def test_metric_having_basic(layer):
"""Test that metric-level having clauses generate HAVING SQL."""
events = Model(
name="events",
table="events_table",
primary_key="event_id",
dimensions=[
Dimension(name="person_id", type="categorical"),
Dimension(name="platform", type="categorical"),
],
metrics=[
Metric(
name="cross_platform_users",
agg="count_distinct",
sql="person_id",
having=["count(distinct platform) > 1"],
description="Users who used the app on multiple platforms",
),
],
)

layer.add_model(events)

sql = layer.compile(metrics=["events.cross_platform_users"], dimensions=["events.person_id"])

print("SQL with metric-level having:")
print(sql)

# HAVING clause should be present with the having condition
assert "HAVING" in sql
assert "count(distinct platform) > 1" in sql.lower() or "COUNT(DISTINCT platform) > 1" in sql


def test_metric_having_with_filters(layer):
"""Test metric with both filters (WHERE via CASE WHEN) and having (HAVING clause)."""
events = Model(
name="events",
table="events_table",
primary_key="event_id",
dimensions=[
Dimension(name="person_id", type="categorical"),
Dimension(name="platform", type="categorical"),
Dimension(name="event_type", type="categorical"),
],
metrics=[
Metric(
name="cross_platform_active_users",
agg="count_distinct",
sql="person_id",
filters=["{model}.event_type = 'active'"],
having=["count(distinct platform) > 1"],
description="Active users on multiple platforms",
),
],
)

layer.add_model(events)

sql = layer.compile(
metrics=["events.cross_platform_active_users"],
dimensions=["events.person_id"],
)

print("SQL with both filters and having:")
print(sql)

# Filter should be in CASE WHEN (metric-level filter)
assert "CASE WHEN" in sql
assert "event_type = 'active'" in sql

# HAVING should contain the having clause
assert "HAVING" in sql
assert "count(distinct platform) > 1" in sql.lower() or "COUNT(DISTINCT platform) > 1" in sql


def test_metric_having_regular_filters_still_work(layer):
"""Test that regular filters still generate WHERE clauses when having is also present."""
events = Model(
name="events",
table="events_table",
primary_key="event_id",
dimensions=[
Dimension(name="person_id", type="categorical"),
Dimension(name="platform", type="categorical"),
],
metrics=[
Metric(
name="total_events",
agg="count",
sql="event_id",
),
Metric(
name="cross_platform_users",
agg="count_distinct",
sql="person_id",
having=["count(distinct platform) > 1"],
),
],
)

layer.add_model(events)

# Query with a query-level dimension filter plus a metric with having
sql = layer.compile(
metrics=["events.cross_platform_users"],
dimensions=["events.person_id"],
)

print("SQL with having and query filter:")
print(sql)

# HAVING clause should be present
assert "HAVING" in sql


def test_metric_having_with_data(layer):
"""Integration test: metric-level having with real DuckDB data."""
conn = duckdb.connect(":memory:")

conn.execute("""
CREATE TABLE events (
event_id INTEGER,
person_id INTEGER,
platform VARCHAR
)
""")

conn.execute("""
INSERT INTO events VALUES
(1, 1, 'iOS'),
(2, 1, 'macOS'),
(3, 1, 'iOS'),
(4, 2, 'iOS'),
(5, 2, 'iOS'),
(6, 3, 'macOS'),
(7, 3, 'macOS'),
(8, 4, 'iOS'),
(9, 4, 'macOS'),
(10, 4, 'Android')
""")

layer = SemanticLayer()

events = Model(
name="events",
table="events",
primary_key="event_id",
dimensions=[
Dimension(name="person_id", type="categorical"),
Dimension(name="platform", type="categorical"),
],
metrics=[
Metric(
name="cross_platform_users",
agg="count_distinct",
sql="person_id",
having=["count(distinct platform) > 1"],
description="Users active on multiple platforms",
),
],
)

layer.conn = conn
layer.add_model(events)

result = layer.query(
metrics=["events.cross_platform_users"],
dimensions=["events.person_id"],
)
rows = df_rows(result)

# person_id 1: iOS + macOS (2 platforms) -> included
# person_id 2: iOS only (1 platform) -> excluded
# person_id 3: macOS only (1 platform) -> excluded
# person_id 4: iOS + macOS + Android (3 platforms) -> included
assert len(rows) == 2
person_ids = {row[0] for row in rows}
assert person_ids == {1, 4}
Loading