Skip to content

Commit 765ae8a

Browse files
Add retention metric type for cohort analysis (#121)
* Add retention metric type for cohort retention analysis * Auto-update JSON schema * Fix: dialect-aware dates, use dim.sql_expr, normalize filters * Fix: expand {model} placeholders, strict periods default, parse retention_granularity * Fix: expand {model} in ts_sql, resolve entity dim sql_expr, include metric.filters * Fix: validate single retention metric per query, preserve offset * Fix: raise error when retention model inference is ambiguous * Fix: match retention model by resolved metric identity, not name * Fix: restore filter classification unpack compatibility _classify_filters_for_pushdown returns a 3-tuple but callers in generate() and _generate_with_preaggregation() only unpacked 2 values, causing ValueError at runtime for any query that hits filter classification. Fix all callers to unpack 3 values and handle window_dim_filters: - generate(): merge into main_query_filters (outer WHERE) - _generate_with_preaggregation(): merge into per-model pushdown filters Also guard dim.window access with getattr since the Dimension model does not have a window field on this branch. * Fix: add retention to validation allowlists in validate_model and validate_metric * Fix: use identity check for metric model resolution in all three fallback loops --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 026bfe5 commit 765ae8a

6 files changed

Lines changed: 1515 additions & 15 deletions

File tree

sidemantic-schema.json

Lines changed: 177 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,19 @@
215215
"Metric": {
216216
"description": "Measure definition - supports simple aggregations and complex metric types.\n\nMeasures can be:\n- Simple aggregations: SUM(amount), COUNT(*), AVG(price)\n- Ratios: revenue / order_count\n- Derived formulas: (revenue - cost) / revenue\n- Cumulative: running totals, period-to-date\n- Time comparisons: YoY, MoM growth\n- Conversion funnels: signup -> purchase rate\n\nAuto-registers as a graph-level metric with the current semantic layer context if available.",
217217
"properties": {
218+
"activity_event": {
219+
"anyOf": [
220+
{
221+
"type": "string"
222+
},
223+
{
224+
"type": "null"
225+
}
226+
],
227+
"default": null,
228+
"description": "SQL filter for activity event (default: any event, e.g., \"event IS NOT NULL\")",
229+
"title": "Activity Event"
230+
},
218231
"agg": {
219232
"anyOf": [
220233
{
@@ -285,6 +298,19 @@
285298
"description": "Comparison calculation (default: percent_change)",
286299
"title": "Calculation"
287300
},
301+
"cohort_event": {
302+
"anyOf": [
303+
{
304+
"type": "string"
305+
},
306+
{
307+
"type": "null"
308+
}
309+
],
310+
"default": null,
311+
"description": "SQL filter for cohort-defining event (e.g., \"event = 'install'\")",
312+
"title": "Cohort Event"
313+
},
288314
"comparison_type": {
289315
"anyOf": [
290316
{
@@ -553,12 +579,43 @@
553579
"description": "Time offset for denominator (e.g., '1 month')",
554580
"title": "Offset Window"
555581
},
582+
"periods": {
583+
"anyOf": [
584+
{
585+
"type": "integer"
586+
},
587+
{
588+
"type": "null"
589+
}
590+
],
591+
"default": null,
592+
"description": "Number of retention periods to compute (e.g., 28 for 28-day)",
593+
"title": "Periods"
594+
},
556595
"public": {
557596
"default": true,
558597
"description": "Whether metric is visible in API/UI",
559598
"title": "Public",
560599
"type": "boolean"
561600
},
601+
"retention_granularity": {
602+
"anyOf": [
603+
{
604+
"enum": [
605+
"day",
606+
"week",
607+
"month"
608+
],
609+
"type": "string"
610+
},
611+
{
612+
"type": "null"
613+
}
614+
],
615+
"default": null,
616+
"description": "Time granularity for retention periods (day, week, month)",
617+
"title": "Retention Granularity"
618+
},
562619
"sql": {
563620
"anyOf": [
564621
{
@@ -593,7 +650,8 @@
593650
"derived",
594651
"cumulative",
595652
"time_comparison",
596-
"conversion"
653+
"conversion",
654+
"retention"
597655
],
598656
"type": "string"
599657
},
@@ -1148,6 +1206,19 @@
11481206
"items": {
11491207
"description": "Measure definition - supports simple aggregations and complex metric types.\n\nMeasures can be:\n- Simple aggregations: SUM(amount), COUNT(*), AVG(price)\n- Ratios: revenue / order_count\n- Derived formulas: (revenue - cost) / revenue\n- Cumulative: running totals, period-to-date\n- Time comparisons: YoY, MoM growth\n- Conversion funnels: signup -> purchase rate\n\nAuto-registers as a graph-level metric with the current semantic layer context if available.",
11501208
"properties": {
1209+
"activity_event": {
1210+
"anyOf": [
1211+
{
1212+
"type": "string"
1213+
},
1214+
{
1215+
"type": "null"
1216+
}
1217+
],
1218+
"default": null,
1219+
"description": "SQL filter for activity event (default: any event, e.g., \"event IS NOT NULL\")",
1220+
"title": "Activity Event"
1221+
},
11511222
"agg": {
11521223
"anyOf": [
11531224
{
@@ -1218,6 +1289,19 @@
12181289
"description": "Comparison calculation (default: percent_change)",
12191290
"title": "Calculation"
12201291
},
1292+
"cohort_event": {
1293+
"anyOf": [
1294+
{
1295+
"type": "string"
1296+
},
1297+
{
1298+
"type": "null"
1299+
}
1300+
],
1301+
"default": null,
1302+
"description": "SQL filter for cohort-defining event (e.g., \"event = 'install'\")",
1303+
"title": "Cohort Event"
1304+
},
12211305
"comparison_type": {
12221306
"anyOf": [
12231307
{
@@ -1486,12 +1570,43 @@
14861570
"description": "Time offset for denominator (e.g., '1 month')",
14871571
"title": "Offset Window"
14881572
},
1573+
"periods": {
1574+
"anyOf": [
1575+
{
1576+
"type": "integer"
1577+
},
1578+
{
1579+
"type": "null"
1580+
}
1581+
],
1582+
"default": null,
1583+
"description": "Number of retention periods to compute (e.g., 28 for 28-day)",
1584+
"title": "Periods"
1585+
},
14891586
"public": {
14901587
"default": true,
14911588
"description": "Whether metric is visible in API/UI",
14921589
"title": "Public",
14931590
"type": "boolean"
14941591
},
1592+
"retention_granularity": {
1593+
"anyOf": [
1594+
{
1595+
"enum": [
1596+
"day",
1597+
"week",
1598+
"month"
1599+
],
1600+
"type": "string"
1601+
},
1602+
{
1603+
"type": "null"
1604+
}
1605+
],
1606+
"default": null,
1607+
"description": "Time granularity for retention periods (day, week, month)",
1608+
"title": "Retention Granularity"
1609+
},
14951610
"sql": {
14961611
"anyOf": [
14971612
{
@@ -1526,7 +1641,8 @@
15261641
"derived",
15271642
"cumulative",
15281643
"time_comparison",
1529-
"conversion"
1644+
"conversion",
1645+
"retention"
15301646
],
15311647
"type": "string"
15321648
},
@@ -1831,6 +1947,19 @@
18311947
"Metric": {
18321948
"description": "Measure definition - supports simple aggregations and complex metric types.\n\nMeasures can be:\n- Simple aggregations: SUM(amount), COUNT(*), AVG(price)\n- Ratios: revenue / order_count\n- Derived formulas: (revenue - cost) / revenue\n- Cumulative: running totals, period-to-date\n- Time comparisons: YoY, MoM growth\n- Conversion funnels: signup -> purchase rate\n\nAuto-registers as a graph-level metric with the current semantic layer context if available.",
18331949
"properties": {
1950+
"activity_event": {
1951+
"anyOf": [
1952+
{
1953+
"type": "string"
1954+
},
1955+
{
1956+
"type": "null"
1957+
}
1958+
],
1959+
"default": null,
1960+
"description": "SQL filter for activity event (default: any event, e.g., \"event IS NOT NULL\")",
1961+
"title": "Activity Event"
1962+
},
18341963
"agg": {
18351964
"anyOf": [
18361965
{
@@ -1901,6 +2030,19 @@
19012030
"description": "Comparison calculation (default: percent_change)",
19022031
"title": "Calculation"
19032032
},
2033+
"cohort_event": {
2034+
"anyOf": [
2035+
{
2036+
"type": "string"
2037+
},
2038+
{
2039+
"type": "null"
2040+
}
2041+
],
2042+
"default": null,
2043+
"description": "SQL filter for cohort-defining event (e.g., \"event = 'install'\")",
2044+
"title": "Cohort Event"
2045+
},
19042046
"comparison_type": {
19052047
"anyOf": [
19062048
{
@@ -2169,12 +2311,43 @@
21692311
"description": "Time offset for denominator (e.g., '1 month')",
21702312
"title": "Offset Window"
21712313
},
2314+
"periods": {
2315+
"anyOf": [
2316+
{
2317+
"type": "integer"
2318+
},
2319+
{
2320+
"type": "null"
2321+
}
2322+
],
2323+
"default": null,
2324+
"description": "Number of retention periods to compute (e.g., 28 for 28-day)",
2325+
"title": "Periods"
2326+
},
21722327
"public": {
21732328
"default": true,
21742329
"description": "Whether metric is visible in API/UI",
21752330
"title": "Public",
21762331
"type": "boolean"
21772332
},
2333+
"retention_granularity": {
2334+
"anyOf": [
2335+
{
2336+
"enum": [
2337+
"day",
2338+
"week",
2339+
"month"
2340+
],
2341+
"type": "string"
2342+
},
2343+
{
2344+
"type": "null"
2345+
}
2346+
],
2347+
"default": null,
2348+
"description": "Time granularity for retention periods (day, week, month)",
2349+
"title": "Retention Granularity"
2350+
},
21782351
"sql": {
21792352
"anyOf": [
21802353
{
@@ -2209,7 +2382,8 @@
22092382
"derived",
22102383
"cumulative",
22112384
"time_comparison",
2212-
"conversion"
2385+
"conversion",
2386+
"retention"
22132387
],
22142388
"type": "string"
22152389
},

sidemantic/adapters/sidemantic.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,13 @@ def _parse_model(self, model_def: dict) -> Model | None:
297297
conversion_event=measure_def.get("conversion_event"),
298298
conversion_window=measure_def.get("conversion_window"),
299299
offset_window=measure_def.get("offset_window"),
300+
# Retention parameters
301+
cohort_event=measure_def.get("cohort_event"),
302+
activity_event=measure_def.get("activity_event"),
303+
periods=measure_def.get("periods"),
304+
retention_granularity=(measure_def.get("retention_granularity") or measure_def.get("granularity"))
305+
if measure_def.get("type") == "retention"
306+
else None,
300307
# Cumulative/window parameters
301308
window=measure_def.get("window"),
302309
grain_to_date=measure_def.get("grain_to_date"),
@@ -413,6 +420,12 @@ def _parse_metric(self, metric_def: dict) -> Metric | None:
413420
conversion_event=metric_def.get("conversion_event"),
414421
conversion_window=metric_def.get("conversion_window"),
415422
offset_window=metric_def.get("offset_window"),
423+
cohort_event=metric_def.get("cohort_event"),
424+
activity_event=metric_def.get("activity_event"),
425+
periods=metric_def.get("periods"),
426+
retention_granularity=(metric_def.get("retention_granularity") or metric_def.get("granularity"))
427+
if metric_type == "retention"
428+
else None,
416429
window=metric_def.get("window"),
417430
grain_to_date=metric_def.get("grain_to_date"),
418431
window_expression=metric_def.get("window_expression"),
@@ -574,6 +587,15 @@ def _export_model(self, model: Model) -> dict:
574587
measure_def["conversion_window"] = measure.conversion_window
575588
if measure.offset_window:
576589
measure_def["offset_window"] = measure.offset_window
590+
# Retention parameters
591+
if measure.cohort_event:
592+
measure_def["cohort_event"] = measure.cohort_event
593+
if measure.activity_event:
594+
measure_def["activity_event"] = measure.activity_event
595+
if measure.periods is not None:
596+
measure_def["periods"] = measure.periods
597+
if measure.retention_granularity:
598+
measure_def["retention_granularity"] = measure.retention_granularity
577599
# Cumulative/window parameters
578600
if measure.window:
579601
measure_def["window"] = measure.window
@@ -655,6 +677,14 @@ def _export_metric(self, measure: Metric, graph) -> dict:
655677
result["conversion_window"] = measure.conversion_window
656678
if measure.offset_window:
657679
result["offset_window"] = measure.offset_window
680+
if measure.cohort_event:
681+
result["cohort_event"] = measure.cohort_event
682+
if measure.activity_event:
683+
result["activity_event"] = measure.activity_event
684+
if measure.periods is not None:
685+
result["periods"] = measure.periods
686+
if measure.retention_granularity:
687+
result["retention_granularity"] = measure.retention_granularity
658688
if measure.sql:
659689
result["sql"] = measure.sql
660690
# Auto-detect and export dependencies for derived measures

sidemantic/core/metric.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,10 +204,15 @@ def validate_type_specific_fields(self):
204204
raise ValueError("conversion metric requires 'base_event' field")
205205
if not self.conversion_event:
206206
raise ValueError("conversion metric requires 'conversion_event' field")
207+
if self.type == "retention":
208+
if not self.entity:
209+
raise ValueError("retention metric requires 'entity' field")
210+
if not self.cohort_event:
211+
raise ValueError("retention metric requires 'cohort_event' field")
207212
return self
208213

209214
# Metric type (if this is a complex metric, not just a simple aggregation)
210-
type: Literal["ratio", "derived", "cumulative", "time_comparison", "conversion"] | None = Field(
215+
type: Literal["ratio", "derived", "cumulative", "time_comparison", "conversion", "retention"] | None = Field(
211216
None, description="Metric type for complex calculations"
212217
)
213218

@@ -252,6 +257,18 @@ def validate_type_specific_fields(self):
252257
conversion_event: str | None = Field(None, description="Target event filter")
253258
conversion_window: str | None = Field(None, description="Conversion time window")
254259

260+
# Retention parameters
261+
cohort_event: str | None = Field(
262+
None, description="SQL filter for cohort-defining event (e.g., \"event = 'install'\")"
263+
)
264+
activity_event: str | None = Field(
265+
None, description='SQL filter for activity event (default: any event, e.g., "event IS NOT NULL")'
266+
)
267+
periods: int | None = Field(None, description="Number of retention periods to compute (e.g., 28 for 28-day)")
268+
retention_granularity: Literal["day", "week", "month"] | None = Field(
269+
None, description="Time granularity for retention periods (day, week, month)"
270+
)
271+
255272
# Common parameters
256273
filters: list[str] | None = Field(None, description="Optional WHERE clause filters")
257274
fill_nulls_with: int | float | str | None = Field(None, description="Default value when result is NULL")

0 commit comments

Comments
 (0)