Skip to content

Commit f1c138b

Browse files
committed
Fix: dialect-aware dates, use dim.sql_expr, normalize filters
1 parent 0096e66 commit f1c138b

1 file changed

Lines changed: 67 additions & 13 deletions

File tree

sidemantic/sql/generator.py

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,51 @@ def _build_interval(self, num: str, unit: str) -> str:
7575
# Standard SQL: INTERVAL '7 days'
7676
return f"INTERVAL '{num} {unit}'"
7777

78+
def _date_diff(self, later: str, earlier: str, unit: str = "day") -> str:
79+
"""Generate dialect-specific date difference expression.
80+
81+
Args:
82+
later: SQL expression for the later date
83+
earlier: SQL expression for the earlier date
84+
unit: Time unit for the difference (e.g., "day")
85+
86+
Returns:
87+
SQL expression computing the difference in the given unit
88+
"""
89+
if self.dialect == "bigquery":
90+
return f"DATE_DIFF({later}, {earlier}, {unit.upper()})"
91+
elif self.dialect == "snowflake":
92+
return f"DATEDIFF('{unit}', {earlier}, {later})"
93+
else:
94+
# DuckDB and Postgres support direct date subtraction
95+
return f"({later} - {earlier})"
96+
97+
def _strip_model_prefixes(self, filters: list[str], model_name: str) -> list[str]:
98+
"""Strip model name prefixes from filter column references.
99+
100+
Retention and conversion CTEs query the raw table directly, so qualified
101+
references like ``events.region`` need to become just ``region``.
102+
103+
Args:
104+
filters: List of filter SQL expressions
105+
model_name: Model name whose prefix should be stripped
106+
107+
Returns:
108+
Filters with model prefixes removed
109+
"""
110+
result = []
111+
for f in filters:
112+
try:
113+
parsed = sqlglot.parse_one(f, dialect=self.dialect)
114+
for column in parsed.find_all(exp.Column):
115+
tbl = column.table
116+
if tbl and tbl.replace("_cte", "") == model_name:
117+
column.set("table", None)
118+
result.append(parsed.sql(dialect=self.dialect))
119+
except Exception:
120+
result.append(f)
121+
return result
122+
78123
def _quote_alias(self, name: str) -> str:
79124
"""Quote an identifier for use as a SQL alias.
80125
@@ -2276,33 +2321,39 @@ def _generate_retention_query(
22762321
if not isinstance(periods, int) or periods < 1:
22772322
raise ValueError(f"Invalid periods value: {periods}")
22782323

2279-
# Find timestamp dimension
2324+
# Find timestamp dimension: prefer model.default_time_dimension, fall back to first time dim
22802325
timestamp_dim = None
2281-
for dim in model.dimensions:
2282-
if dim.type == "time":
2283-
timestamp_dim = dim.name
2284-
break
2326+
if model.default_time_dimension:
2327+
timestamp_dim = model.get_dimension(model.default_time_dimension)
2328+
if not timestamp_dim:
2329+
for dim in model.dimensions:
2330+
if dim.type == "time":
2331+
timestamp_dim = dim
2332+
break
22852333

22862334
if not timestamp_dim:
22872335
raise ValueError("Retention metrics require a time dimension on the model")
22882336

2337+
# Use sql_expr for actual SQL, name for alias
2338+
ts_sql = timestamp_dim.sql_expr
2339+
22892340
# Build FROM clause
22902341
if model.sql:
22912342
from_clause = f"({model.sql}) AS t"
22922343
else:
22932344
from_clause = model.table
22942345

2295-
# Build granularity-specific date truncation and interval
2346+
# Build granularity-specific date truncation and date diff (dialect-aware)
22962347
if granularity == "day":
2297-
trunc_expr = f"{timestamp_dim}::date"
2298-
diff_expr = "a.active_date - c.cohort_date"
2348+
trunc_expr = f"CAST({ts_sql} AS DATE)"
2349+
diff_expr = self._date_diff("a.active_date", "c.cohort_date", "day")
22992350
periods_label = "days_since"
23002351
elif granularity == "week":
2301-
trunc_expr = f"{self._date_trunc('week', timestamp_dim)}::date"
2302-
diff_expr = "(a.active_date - c.cohort_date) / 7"
2352+
trunc_expr = f"CAST({self._date_trunc('week', ts_sql)} AS DATE)"
2353+
diff_expr = f"{self._date_diff('a.active_date', 'c.cohort_date', 'day')} / 7"
23032354
periods_label = "weeks_since"
23042355
elif granularity == "month":
2305-
trunc_expr = f"{self._date_trunc('month', timestamp_dim)}::date"
2356+
trunc_expr = f"CAST({self._date_trunc('month', ts_sql)} AS DATE)"
23062357
diff_expr = (
23072358
"(EXTRACT(YEAR FROM a.active_date) - EXTRACT(YEAR FROM c.cohort_date)) * 12"
23082359
" + (EXTRACT(MONTH FROM a.active_date) - EXTRACT(MONTH FROM c.cohort_date))"
@@ -2311,10 +2362,13 @@ def _generate_retention_query(
23112362
else:
23122363
raise ValueError(f"Unsupported retention granularity: {granularity}")
23132364

2365+
# Normalize filters: strip model name prefixes so they work inside CTEs
2366+
normalized_filters = self._strip_model_prefixes(filters or [], model.name)
2367+
23142368
# Build optional WHERE filters for the source data
23152369
filter_clause = ""
2316-
if filters:
2317-
filter_clause = " AND " + " AND ".join(filters)
2370+
if normalized_filters:
2371+
filter_clause = " AND " + " AND ".join(normalized_filters)
23182372

23192373
order_clause = "\nORDER BY r.cohort_date, r.periods_since"
23202374
if order_by:

0 commit comments

Comments
 (0)