Skip to content

Commit 132443c

Browse files
committed
Fix: monotonic funnel counts, entrant-only totals, validate conversion_window
1 parent 01f4395 commit 132443c

3 files changed

Lines changed: 131 additions & 3 deletions

File tree

sidemantic/core/metric.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,11 @@ def validate_type_specific_fields(self):
203203
if self.steps:
204204
if len(self.steps) < 2:
205205
raise ValueError("conversion metric 'steps' requires at least 2 steps")
206+
if self.conversion_window:
207+
raise ValueError(
208+
"conversion metric cannot specify both 'steps' and 'conversion_window'; "
209+
"conversion_window is only supported for base_event/conversion_event funnels"
210+
)
206211
elif not self.base_event or not self.conversion_event:
207212
raise ValueError("conversion metric requires 'steps' or both 'base_event' and 'conversion_event'")
208213
return self

sidemantic/sql/generator.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2504,14 +2504,17 @@ def _generate_multistep_conversion_query(
25042504
group_by_inner_parts.append(sql_col)
25052505
group_by_inner = ",\n ".join(group_by_inner_parts)
25062506

2507-
# Build outer SELECT: total entities + sum of each step
2507+
# Build outer SELECT: total entities (only step 1 entrants) + sum of each step.
2508+
# Gate each step on completion of all prior steps for monotonic funnels.
25082509
outer_select_parts = []
25092510
if dim_aliases:
25102511
for alias in dim_aliases:
25112512
outer_select_parts.append(f"{alias}")
2512-
outer_select_parts.append("COUNT(*) AS total_entities")
2513+
outer_select_parts.append("SUM(step_1::int) AS total_entities")
25132514
for i in range(1, len(metric.steps) + 1):
2514-
outer_select_parts.append(f"SUM(step_{i}::int) AS step_{i}_count")
2515+
# Each step requires all prior steps to be true
2516+
gate_expr = " AND ".join(f"step_{j}" for j in range(1, i + 1))
2517+
outer_select_parts.append(f"SUM(({gate_expr})::int) AS step_{i}_count")
25152518
outer_select = ",\n ".join(outer_select_parts)
25162519

25172520
# Build GROUP BY, ORDER BY, LIMIT for outer query

tests/metrics/test_advanced.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,3 +793,123 @@ def test_conversion_metric_requires_entity():
793793
type="conversion",
794794
steps=["event = 'a'", "event = 'b'"],
795795
)
796+
797+
798+
def test_conversion_steps_and_conversion_window_rejected():
799+
"""Test validation: steps and conversion_window cannot be used together."""
800+
import pytest
801+
802+
with pytest.raises(ValueError, match="cannot specify both 'steps' and 'conversion_window'"):
803+
Metric(
804+
name="bad_funnel",
805+
type="conversion",
806+
entity="user_id",
807+
steps=["event = 'a'", "event = 'b'"],
808+
conversion_window="7 days",
809+
)
810+
811+
812+
def test_multistep_funnel_monotonic_counts():
813+
"""Test that funnel step counts are monotonically decreasing.
814+
815+
An entity that triggers a later event without completing earlier steps
816+
should not be counted at the later step.
817+
"""
818+
# person 5 has step 3 event but NOT step 1 or 2 -> should not inflate step 3
819+
events = Model(
820+
name="events",
821+
sql="""
822+
SELECT 1 AS person_id, 'install' AS event
823+
UNION ALL SELECT 1, 'activate'
824+
UNION ALL SELECT 1, 'purchase'
825+
UNION ALL SELECT 2, 'install'
826+
UNION ALL SELECT 2, 'activate'
827+
UNION ALL SELECT 3, 'install'
828+
UNION ALL SELECT 5, 'purchase'
829+
""",
830+
primary_key="person_id",
831+
dimensions=[
832+
Dimension(name="person_id", sql="person_id", type="categorical"),
833+
Dimension(name="event", sql="event", type="categorical"),
834+
],
835+
)
836+
837+
funnel = Metric(
838+
name="mono_funnel",
839+
type="conversion",
840+
entity="person_id",
841+
steps=[
842+
"event = 'install'",
843+
"event = 'activate'",
844+
"event = 'purchase'",
845+
],
846+
)
847+
848+
graph = SemanticGraph()
849+
graph.add_model(events)
850+
graph.add_metric(funnel)
851+
852+
generator = SQLGenerator(graph)
853+
sql = generator.generate(metrics=["mono_funnel"], dimensions=[])
854+
855+
conn = duckdb.connect(":memory:")
856+
result = conn.execute(sql)
857+
rows = df_rows(result)
858+
859+
# total_entities = step 1 entrants only: persons 1,2,3 = 3 (person 5 excluded)
860+
assert rows[0][0] == 3 # total_entities
861+
assert rows[0][1] == 3 # step_1_count: persons 1,2,3
862+
assert rows[0][2] == 2 # step_2_count: persons 1,2 (had install AND activate)
863+
assert rows[0][3] == 1 # step_3_count: person 1 only (had all 3 steps)
864+
865+
# Verify monotonicity
866+
for i in range(1, len(rows[0]) - 1):
867+
assert rows[0][i] >= rows[0][i + 1], (
868+
f"Funnel not monotonic: step {i} ({rows[0][i]}) < step {i + 1} ({rows[0][i + 1]})"
869+
)
870+
871+
872+
def test_multistep_funnel_total_excludes_non_entrants():
873+
"""Test that total_entities excludes entities who never entered step 1."""
874+
# 4 entities in data, but only 2 satisfy step 1
875+
events = Model(
876+
name="events",
877+
sql="""
878+
SELECT 1 AS uid, 'signup' AS event
879+
UNION ALL SELECT 1, 'purchase'
880+
UNION ALL SELECT 2, 'signup'
881+
UNION ALL SELECT 3, 'browse'
882+
UNION ALL SELECT 4, 'browse'
883+
""",
884+
primary_key="uid",
885+
dimensions=[
886+
Dimension(name="uid", sql="uid", type="categorical"),
887+
Dimension(name="event", sql="event", type="categorical"),
888+
],
889+
)
890+
891+
funnel = Metric(
892+
name="entry_funnel",
893+
type="conversion",
894+
entity="uid",
895+
steps=[
896+
"event = 'signup'",
897+
"event = 'purchase'",
898+
],
899+
)
900+
901+
graph = SemanticGraph()
902+
graph.add_model(events)
903+
graph.add_metric(funnel)
904+
905+
generator = SQLGenerator(graph)
906+
sql = generator.generate(metrics=["entry_funnel"], dimensions=[])
907+
908+
conn = duckdb.connect(":memory:")
909+
result = conn.execute(sql)
910+
rows = df_rows(result)
911+
912+
# total_entities should be 2 (only users 1 and 2 who signed up), NOT 4
913+
assert rows[0][0] == 2 # total_entities = step 1 entrants
914+
assert rows[0][1] == 2 # step_1_count
915+
assert rows[0][2] == 1 # step_2_count (only user 1 purchased)

0 commit comments

Comments
 (0)