@@ -1134,3 +1134,132 @@ def test_multistep_funnel_only_prior_step2_not_counted():
11341134 assert rows [0 ][0 ] == 2 # total_entities
11351135 assert rows [0 ][1 ] == 2 # step_1_count (both signed up)
11361136 assert rows [0 ][2 ] == 1 # step_2_count (only user 2)
1137+
1138+
1139+ def test_multistep_funnel_expression_timestamp ():
1140+ """Test that a time dimension using an expression works in step CTEs.
1141+
1142+ When the time dimension's sql_expr is something like CAST(ts AS DATE)
1143+ rather than a bare column name, the generated SQL must not produce
1144+ invalid fragments like s.CAST(ts AS DATE). The generator should alias
1145+ the expression and reference the alias in later steps.
1146+ """
1147+ events = Model (
1148+ name = "events" ,
1149+ sql = """
1150+ SELECT 1 AS user_id, 'signup' AS event, '2024-01-01 10:00:00'::TIMESTAMP AS created_at
1151+ UNION ALL SELECT 1, 'purchase', '2024-01-02 14:00:00'::TIMESTAMP
1152+ UNION ALL SELECT 2, 'signup', '2024-01-01 08:00:00'::TIMESTAMP
1153+ UNION ALL SELECT 3, 'signup', '2024-01-01 09:00:00'::TIMESTAMP
1154+ """ ,
1155+ primary_key = "user_id" ,
1156+ dimensions = [
1157+ Dimension (name = "user_id" , sql = "user_id" , type = "categorical" ),
1158+ Dimension (name = "event" , sql = "event" , type = "categorical" ),
1159+ # Expression-based time dimension (not a bare column)
1160+ Dimension (name = "event_date" , sql = "CAST(created_at AS DATE)" , type = "time" ),
1161+ ],
1162+ )
1163+
1164+ funnel = Metric (
1165+ name = "expr_ts_funnel" ,
1166+ type = "conversion" ,
1167+ entity = "user_id" ,
1168+ steps = [
1169+ "event = 'signup'" ,
1170+ "event = 'purchase'" ,
1171+ ],
1172+ )
1173+
1174+ graph = SemanticGraph ()
1175+ graph .add_model (events )
1176+ graph .add_metric (funnel )
1177+
1178+ generator = SQLGenerator (graph )
1179+ sql = generator .generate (metrics = ["expr_ts_funnel" ], dimensions = [])
1180+
1181+ print ("\n Expression timestamp funnel SQL:" )
1182+ print (sql )
1183+
1184+ conn = duckdb .connect (":memory:" )
1185+ result = conn .execute (sql )
1186+ rows = df_rows (result )
1187+
1188+ # User 1: signup -> purchase (valid path)
1189+ # User 2: signup only
1190+ # User 3: signup only
1191+ assert rows [0 ][0 ] == 3 # total_entities
1192+ assert rows [0 ][1 ] == 3 # step_1_count
1193+ assert rows [0 ][2 ] == 1 # step_2_count (only user 1)
1194+
1195+
1196+ def test_multistep_funnel_or_step_with_filter ():
1197+ """Test that step predicates containing OR are correctly parenthesized.
1198+
1199+ Without parenthesization, WHERE event = 'a' OR event = 'b' AND region = 'US'
1200+ would be parsed as event = 'a' OR (event = 'b' AND region = 'US'), which is
1201+ incorrect. The step predicate should be wrapped: WHERE (event = 'a' OR event = 'b')
1202+ AND region = 'US'.
1203+ """
1204+ events = Model (
1205+ name = "events" ,
1206+ sql = """
1207+ SELECT 1 AS user_id, 'email_signup' AS event, 'US' AS region, '2024-01-01'::TIMESTAMP AS ts
1208+ UNION ALL SELECT 1, 'purchase', 'US', '2024-01-02'::TIMESTAMP
1209+ UNION ALL SELECT 2, 'social_signup', 'US', '2024-01-01'::TIMESTAMP
1210+ UNION ALL SELECT 2, 'purchase', 'US', '2024-01-02'::TIMESTAMP
1211+ UNION ALL SELECT 3, 'email_signup', 'EU', '2024-01-01'::TIMESTAMP
1212+ UNION ALL SELECT 3, 'purchase', 'EU', '2024-01-02'::TIMESTAMP
1213+ UNION ALL SELECT 4, 'social_signup', 'EU', '2024-01-01'::TIMESTAMP
1214+ """ ,
1215+ primary_key = "user_id" ,
1216+ dimensions = [
1217+ Dimension (name = "user_id" , sql = "user_id" , type = "categorical" ),
1218+ Dimension (name = "event" , sql = "event" , type = "categorical" ),
1219+ Dimension (name = "region" , sql = "region" , type = "categorical" ),
1220+ Dimension (name = "ts" , sql = "ts" , type = "time" ),
1221+ ],
1222+ )
1223+
1224+ funnel = Metric (
1225+ name = "or_step_funnel" ,
1226+ type = "conversion" ,
1227+ entity = "user_id" ,
1228+ steps = [
1229+ "event = 'email_signup' OR event = 'social_signup'" ,
1230+ "event = 'purchase'" ,
1231+ ],
1232+ )
1233+
1234+ graph = SemanticGraph ()
1235+ graph .add_model (events )
1236+ graph .add_metric (funnel )
1237+
1238+ generator = SQLGenerator (graph )
1239+ # Filter to US only
1240+ sql = generator .generate (
1241+ metrics = ["or_step_funnel" ],
1242+ dimensions = [],
1243+ filters = ["region = 'US'" ],
1244+ )
1245+
1246+ print ("\n OR step with filter funnel SQL:" )
1247+ print (sql )
1248+
1249+ conn = duckdb .connect (":memory:" )
1250+ result = conn .execute (sql )
1251+ rows = df_rows (result )
1252+
1253+ # With correct parenthesization:
1254+ # Step 1: (event = 'email_signup' OR event = 'social_signup') AND region = 'US'
1255+ # -> users 1 (email_signup, US), 2 (social_signup, US) = 2
1256+ # User 3 is EU, user 4 is EU -> filtered out
1257+ # Step 2: (event = 'purchase') AND region = 'US'
1258+ # -> users 1, 2 (both purchased in US) = 2
1259+ #
1260+ # Without parenthesization (the bug):
1261+ # Step 1: event = 'email_signup' OR (event = 'social_signup' AND region = 'US')
1262+ # -> users 1 (email), 2 (social+US), 3 (email) = 3 (WRONG: user 3 EU leaks in)
1263+ assert rows [0 ][0 ] == 2 # total_entities (US signups only)
1264+ assert rows [0 ][1 ] == 2 # step_1_count
1265+ assert rows [0 ][2 ] == 2 # step_2_count (both US users purchased)
0 commit comments