Skip to content

Commit d495baf

Browse files
committed
Fix: validate single retention metric per query, preserve offset
1 parent fb7726e commit d495baf

2 files changed

Lines changed: 135 additions & 2 deletions

File tree

sidemantic/sql/generator.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,6 +2252,7 @@ def _generate_retention_query(
22522252
filters: list[str] | None = None,
22532253
order_by: list[str] | None = None,
22542254
limit: int | None = None,
2255+
offset: int | None = None,
22552256
) -> str:
22562257
"""Generate SQL for cohort retention metrics.
22572258
@@ -2268,6 +2269,7 @@ def _generate_retention_query(
22682269
filters: List of filter expressions
22692270
order_by: List of fields to order by
22702271
limit: Maximum number of rows to return
2272+
offset: Number of rows to skip (for pagination)
22712273
22722274
Returns:
22732275
SQL query string
@@ -2404,6 +2406,10 @@ def _replace_model_placeholder(expr: str) -> str:
24042406
if limit is not None:
24052407
limit_clause = f"\nLIMIT {limit}"
24062408

2409+
offset_clause = ""
2410+
if offset is not None:
2411+
offset_clause = f"\nOFFSET {offset}"
2412+
24072413
# Use entity_sql for raw-table references, entity_alias for CTE column names
24082414
entity_select = f"{entity_sql} AS {entity_alias}" if entity_sql != entity_alias else entity_alias
24092415

@@ -2439,7 +2445,7 @@ def _replace_model_placeholder(expr: str) -> str:
24392445
c.cohort_size,
24402446
ROUND(r.active_users * 100.0 / c.cohort_size, 1) AS retention_pct
24412447
FROM retention r
2442-
JOIN cohort_sizes c ON r.cohort_date = c.cohort_date{order_clause}{limit_clause}"""
2448+
JOIN cohort_sizes c ON r.cohort_date = c.cohort_date{order_clause}{limit_clause}{offset_clause}"""
24432449

24442450
return sql.strip()
24452451

@@ -2848,7 +2854,19 @@ def collect_leaf_base_metrics(
28482854

28492855
# Handle retention metrics separately - they need a completely different pattern
28502856
if retention_metrics:
2851-
return self._generate_retention_query(retention_metrics[0], dimensions, filters, order_by, limit)
2857+
if len(retention_metrics) > 1:
2858+
raise ValueError("Only one retention metric can be queried at a time")
2859+
non_retention = (
2860+
base_metrics
2861+
or cumulative_metrics
2862+
or time_comparison_metrics
2863+
or offset_ratio_metrics
2864+
or conversion_metrics
2865+
or regular_expression_metric_plans
2866+
)
2867+
if non_retention:
2868+
raise ValueError("Retention metrics cannot be combined with other metrics in a single query")
2869+
return self._generate_retention_query(retention_metrics[0], dimensions, filters, order_by, limit, offset)
28522870

28532871
# Handle conversion metrics separately - they need a completely different pattern
28542872
if conversion_metrics:

tests/metrics/test_retention.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,3 +703,118 @@ def test_retention_metric_level_filters():
703703
# Day 1: only user 1 active -> 50%
704704
day1 = [r for r in rows if r[1] == 1]
705705
assert day1[0][4] == 50.0
706+
707+
708+
def test_retention_multiple_retention_metrics_raises():
709+
"""Test that querying two retention metrics raises ValueError."""
710+
events = Model(
711+
name="events",
712+
sql="SELECT 1 AS uid, 'signup' AS event, '2024-01-01'::DATE AS ts",
713+
primary_key="uid",
714+
dimensions=[
715+
Dimension(name="uid", sql="uid", type="categorical"),
716+
Dimension(name="event", sql="event", type="categorical"),
717+
Dimension(name="ts", sql="ts", type="time"),
718+
],
719+
metrics=[],
720+
)
721+
722+
ret1 = Metric(
723+
name="retention_a",
724+
type="retention",
725+
entity="uid",
726+
cohort_event="event = 'signup'",
727+
periods=1,
728+
)
729+
ret2 = Metric(
730+
name="retention_b",
731+
type="retention",
732+
entity="uid",
733+
cohort_event="event = 'signup'",
734+
periods=2,
735+
)
736+
737+
graph = SemanticGraph()
738+
graph.add_model(events)
739+
graph.add_metric(ret1)
740+
graph.add_metric(ret2)
741+
742+
generator = SQLGenerator(graph)
743+
with pytest.raises(ValueError, match="Only one retention metric can be queried at a time"):
744+
generator.generate(metrics=["retention_a", "retention_b"], dimensions=[])
745+
746+
747+
def test_retention_mixed_with_regular_metric_raises():
748+
"""Test that mixing retention + regular metric raises ValueError."""
749+
events = Model(
750+
name="events",
751+
sql="SELECT 1 AS uid, 'signup' AS event, '2024-01-01'::DATE AS ts, 10 AS revenue",
752+
primary_key="uid",
753+
dimensions=[
754+
Dimension(name="uid", sql="uid", type="categorical"),
755+
Dimension(name="event", sql="event", type="categorical"),
756+
Dimension(name="ts", sql="ts", type="time"),
757+
],
758+
metrics=[
759+
Metric(name="total_revenue", agg="sum", sql="revenue"),
760+
],
761+
)
762+
763+
retention = Metric(
764+
name="retention",
765+
type="retention",
766+
entity="uid",
767+
cohort_event="event = 'signup'",
768+
periods=1,
769+
)
770+
771+
graph = SemanticGraph()
772+
graph.add_model(events)
773+
graph.add_metric(retention)
774+
775+
generator = SQLGenerator(graph)
776+
with pytest.raises(ValueError, match="Retention metrics cannot be combined with other metrics"):
777+
generator.generate(metrics=["retention", "events.total_revenue"], dimensions=[])
778+
779+
780+
def test_retention_offset_in_sql():
781+
"""Test that offset parameter is included in retention SQL output."""
782+
events = Model(
783+
name="events",
784+
sql="""
785+
SELECT 1 AS uid, 'signup' AS event, '2024-01-01'::DATE AS ts
786+
UNION ALL SELECT 1, 'login', '2024-01-02'::DATE
787+
UNION ALL SELECT 2, 'signup', '2024-01-01'::DATE
788+
UNION ALL SELECT 2, 'login', '2024-01-01'::DATE
789+
""",
790+
primary_key="uid",
791+
dimensions=[
792+
Dimension(name="uid", sql="uid", type="categorical"),
793+
Dimension(name="event", sql="event", type="categorical"),
794+
Dimension(name="ts", sql="ts", type="time"),
795+
],
796+
metrics=[],
797+
)
798+
799+
retention = Metric(
800+
name="retention",
801+
type="retention",
802+
entity="uid",
803+
cohort_event="event = 'signup'",
804+
periods=2,
805+
retention_granularity="day",
806+
)
807+
808+
graph = SemanticGraph()
809+
graph.add_model(events)
810+
graph.add_metric(retention)
811+
812+
generator = SQLGenerator(graph)
813+
sql = generator.generate(metrics=["retention"], dimensions=[], limit=5, offset=10)
814+
815+
assert "LIMIT 5" in sql
816+
assert "OFFSET 10" in sql
817+
818+
# Without offset, OFFSET should not appear
819+
sql_no_offset = generator.generate(metrics=["retention"], dimensions=[], limit=5)
820+
assert "OFFSET" not in sql_no_offset

0 commit comments

Comments
 (0)