Skip to content

Commit 54caa53

Browse files
committed
Fix: exclude window dimensions from pre-aggregation materialization
Window dimensions (LEAD, LAG, etc.) are incompatible with GROUP BY in rollup materialization SQL. generate_materialization_sql now raises a clear ValueError when a pre-aggregation references a dimension that has a window expression set, for both regular dimensions and time dimensions.
1 parent 8102a9d commit 54caa53

2 files changed

Lines changed: 108 additions & 1 deletion

File tree

sidemantic/core/pre_aggregation.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,16 +143,28 @@ def generate_materialization_sql(self, model: Any) -> str:
143143
if self.time_dimension and self.granularity:
144144
time_dim = model.get_dimension(self.time_dimension)
145145
if time_dim:
146+
if time_dim.window:
147+
raise ValueError(
148+
f"Cannot use window dimension '{self.time_dimension}' as time_dimension "
149+
f"in pre-aggregation '{self.name}': window functions are incompatible "
150+
f"with GROUP BY in rollup materialization"
151+
)
146152
col_name = f"{self.time_dimension}_{self.granularity}"
147153
select_exprs.append(f"DATE_TRUNC('{self.granularity}', {time_dim.sql_expr}) as {col_name}")
148154
group_by_positions.append(str(pos))
149155
pos += 1
150156

151-
# Add dimensions
157+
# Add dimensions (reject window dimensions - incompatible with GROUP BY)
152158
if self.dimensions:
153159
for dim_name in self.dimensions:
154160
dim = model.get_dimension(dim_name)
155161
if dim:
162+
if dim.window:
163+
raise ValueError(
164+
f"Cannot use window dimension '{dim_name}' in pre-aggregation "
165+
f"'{self.name}': window functions are incompatible with "
166+
f"GROUP BY in rollup materialization"
167+
)
156168
select_exprs.append(f"{dim.sql_expr} as {dim_name}")
157169
group_by_positions.append(str(pos))
158170
pos += 1

tests/optimizations/test_pre_aggregations.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,5 +1204,100 @@ def test_preagg_injection_in_preagg_name_rejected():
12041204
preagg.get_table_name("orders")
12051205

12061206

1207+
def test_generate_materialization_sql_rejects_window_dimension():
1208+
"""Test that generate_materialization_sql raises ValueError for window dimensions."""
1209+
model = Model(
1210+
name="events",
1211+
table="events",
1212+
dimensions=[
1213+
Dimension(name="event", type="categorical", sql="event"),
1214+
Dimension(
1215+
name="next_event",
1216+
type="categorical",
1217+
sql="event",
1218+
window="LEAD(event) OVER (PARTITION BY person_id ORDER BY timestamp)",
1219+
),
1220+
Dimension(name="status", type="categorical", sql="status"),
1221+
],
1222+
metrics=[
1223+
Metric(name="count", agg="count"),
1224+
],
1225+
)
1226+
1227+
# Pre-aggregation that references a window dimension should raise
1228+
preagg = PreAggregation(
1229+
name="by_next_event",
1230+
measures=["count"],
1231+
dimensions=["next_event"],
1232+
)
1233+
1234+
with pytest.raises(ValueError, match="window dimension.*next_event.*incompatible"):
1235+
preagg.generate_materialization_sql(model)
1236+
1237+
1238+
def test_generate_materialization_sql_rejects_window_time_dimension():
1239+
"""Test that generate_materialization_sql raises ValueError for window time dimensions."""
1240+
model = Model(
1241+
name="events",
1242+
table="events",
1243+
dimensions=[
1244+
Dimension(
1245+
name="next_timestamp",
1246+
type="time",
1247+
sql="timestamp",
1248+
window="LEAD(timestamp) OVER (PARTITION BY person_id ORDER BY timestamp)",
1249+
),
1250+
],
1251+
metrics=[
1252+
Metric(name="count", agg="count"),
1253+
],
1254+
)
1255+
1256+
preagg = PreAggregation(
1257+
name="daily_next",
1258+
measures=["count"],
1259+
dimensions=[],
1260+
time_dimension="next_timestamp",
1261+
granularity="day",
1262+
)
1263+
1264+
with pytest.raises(ValueError, match="window dimension.*next_timestamp.*incompatible"):
1265+
preagg.generate_materialization_sql(model)
1266+
1267+
1268+
def test_generate_materialization_sql_normal_dimensions_still_work():
1269+
"""Test that pre-aggregations with normal (non-window) dimensions still work."""
1270+
model = Model(
1271+
name="events",
1272+
table="events",
1273+
dimensions=[
1274+
Dimension(name="event", type="categorical", sql="event"),
1275+
Dimension(name="status", type="categorical", sql="status"),
1276+
Dimension(
1277+
name="next_event",
1278+
type="categorical",
1279+
sql="event",
1280+
window="LEAD(event) OVER (PARTITION BY person_id ORDER BY timestamp)",
1281+
),
1282+
],
1283+
metrics=[
1284+
Metric(name="count", agg="count"),
1285+
],
1286+
)
1287+
1288+
# Pre-aggregation with only normal dimensions should work fine
1289+
preagg = PreAggregation(
1290+
name="by_status",
1291+
measures=["count"],
1292+
dimensions=["status"],
1293+
)
1294+
1295+
sql = preagg.generate_materialization_sql(model)
1296+
assert "status as status" in sql
1297+
assert "GROUP BY" in sql
1298+
assert "LEAD" not in sql
1299+
assert "OVER" not in sql
1300+
1301+
12071302
if __name__ == "__main__":
12081303
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)