Skip to content

Commit 08f41ed

Browse files
authored
Expand fixture corpus coverage and enforce functionality contracts (#96)
* Add fixture corpus coverage and strict functionality contracts * Improve fixture execution coverage and CTE ref quoting * Strengthen fixture execution semantics and SQL contracts * Expand fixture execution coverage and non-exec contracts * Strengthen fixture contracts with semantic execution assertions * Enforce adapter-wide exact fixture execution semantics
1 parent 273a6a0 commit 08f41ed

140 files changed

Lines changed: 37896 additions & 95 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

sidemantic/adapters/cube.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,17 @@ def _parse_preaggregation(self, preagg_def: dict, cube_name: str) -> PreAggregat
396396

397397
preagg_type = preagg_def.get("type", "rollup")
398398

399+
# Normalize Cube camelCase type names to Sidemantic snake_case
400+
preagg_type_mapping = {
401+
"rollupJoin": "rollup_join",
402+
"rollupLambda": "lambda",
403+
"rollup_join": "rollup_join",
404+
"original_sql": "original_sql",
405+
"rollup": "rollup",
406+
"lambda": "lambda",
407+
}
408+
preagg_type = preagg_type_mapping.get(preagg_type, preagg_type)
409+
399410
# Extract measures - strip CUBE prefix if present
400411
measures = []
401412
for measure_ref in preagg_def.get("measures") or []:

sidemantic/adapters/omni.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def _parse_view(self, file_path: Path) -> Model | None:
7979
with open(file_path) as f:
8080
view = yaml.safe_load(f)
8181

82-
if not view:
82+
if not view or not isinstance(view, dict):
8383
return None
8484

8585
# Get view name from filename or name field

sidemantic/adapters/superset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def _parse_column(self, col_def: dict[str, Any], main_dttm_col: str | None) -> D
122122

123123
# Determine dimension type
124124
is_dttm = col_def.get("is_dttm", False)
125-
sql_type = col_def.get("type", "")
125+
sql_type = col_def.get("type") or ""
126126

127127
dim_type = "categorical"
128128
granularity = None

sidemantic/sql/generator.py

Lines changed: 60 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,29 @@ def _quote_alias(self, name: str) -> str:
8585
Returns:
8686
Properly quoted identifier for the dialect (e.g., '"auctions.bid_request_cnt_wow"')
8787
"""
88+
return self._quote_identifier(name)
89+
90+
@staticmethod
91+
def _is_simple_identifier(name: str) -> bool:
92+
"""Return True when identifier can be emitted without quotes."""
93+
import re
94+
95+
return re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name) is not None
96+
97+
def _quote_identifier(self, name: str) -> str:
98+
"""Quote a SQL identifier for the current dialect."""
99+
if self._is_simple_identifier(name):
100+
return name
88101
return sqlglot.to_identifier(name, quoted=True).sql(dialect=self.dialect)
89102

103+
def _cte_name(self, model_name: str) -> str:
104+
"""Get the CTE identifier name for a model."""
105+
return f"{model_name}_cte"
106+
107+
def _cte_ref(self, model_name: str, column_name: str) -> str:
108+
"""Build a quoted reference to a CTE column."""
109+
return f"{self._quote_identifier(self._cte_name(model_name))}.{self._quote_identifier(column_name)}"
110+
90111
def _apply_default_time_dimensions(self, metrics: list[str], dimensions: list[str]) -> list[str]:
91112
"""Auto-include default_time_dimension from models if not already present.
92113
@@ -923,7 +944,7 @@ def _build_model_cte(
923944
# Include this model's primary key columns (always needed for joins/grouping)
924945
for pk_col in model.primary_key_columns:
925946
if pk_col not in columns_added:
926-
select_cols.append(f"{pk_col} AS {pk_col}")
947+
select_cols.append(f"{pk_col} AS {self._quote_alias(pk_col)}")
927948
columns_added.add(pk_col)
928949

929950
# Include foreign keys if we're joining OR if they're explicitly requested as dimensions
@@ -934,7 +955,7 @@ def _build_model_cte(
934955
# Add FK if: (1) we're joining to this related model, OR (2) FK is requested as dimension
935956
should_include = (needs_joins and relationship.name in all_models) or fk in needed_dimensions
936957
if should_include and fk not in columns_added:
937-
select_cols.append(f"{fk} AS {fk}")
958+
select_cols.append(f"{fk} AS {self._quote_alias(fk)}")
938959
columns_added.add(fk)
939960
# Mark FK as "needed" so it's not duplicated as a dimension
940961
needed_dimensions.discard(fk)
@@ -953,7 +974,7 @@ def _build_model_cte(
953974
# For has_many/has_one, foreign_key is the FK column in THIS model
954975
fk = other_join.foreign_key or other_join.sql_expr
955976
if fk not in columns_added:
956-
select_cols.append(f"{fk} AS {fk}")
977+
select_cols.append(f"{fk} AS {self._quote_alias(fk)}")
957978
columns_added.add(fk)
958979

959980
for other_model_name, other_model in self.graph.models.items():
@@ -965,7 +986,7 @@ def _build_model_cte(
965986
junction_self_fk, junction_related_fk = other_join.junction_keys()
966987
for fk in (junction_self_fk, junction_related_fk):
967988
if fk and fk not in columns_added:
968-
select_cols.append(f"{fk} AS {fk}")
989+
select_cols.append(f"{fk} AS {self._quote_alias(fk)}")
969990
columns_added.add(fk)
970991

971992
# Determine table alias for {model} placeholder replacement
@@ -990,7 +1011,7 @@ def replace_model_placeholder(sql_expr: str) -> str:
9901011
dim_sql = dimension.sql_expr
9911012
# Replace {model} placeholder with actual table reference
9921013
dim_sql = replace_model_placeholder(dim_sql)
993-
select_cols.append(f"{dim_sql} AS {dimension.name}")
1014+
select_cols.append(f"{dim_sql} AS {self._quote_alias(dimension.name)}")
9941015
columns_added.add(dimension.name)
9951016

9961017
# Then, add time dimensions with specific granularities
@@ -1009,7 +1030,7 @@ def replace_model_placeholder(sql_expr: str) -> str:
10091030
dim_sql = self._date_trunc(gran, replace_model_placeholder(dimension.sql_expr))
10101031
alias = f"{dim_name}__{gran}"
10111032
if alias not in columns_added:
1012-
select_cols.append(f"{dim_sql} AS {alias}")
1033+
select_cols.append(f"{dim_sql} AS {self._quote_alias(alias)}")
10131034
columns_added.add(alias)
10141035

10151036
# Add raw columns referenced by inline aggregate SQL (if they are not dimensions/measures)
@@ -1020,13 +1041,16 @@ def replace_model_placeholder(sql_expr: str) -> str:
10201041
dim = model.get_dimension(col_name)
10211042
if dim:
10221043
dim_sql = replace_model_placeholder(dim.sql_expr)
1023-
select_cols.append(f"{dim_sql} AS {col_name}")
1044+
select_cols.append(f"{dim_sql} AS {self._quote_alias(col_name)}")
10241045
columns_added.add(col_name)
10251046
continue
10261047
if model.get_metric(col_name):
10271048
continue
1028-
raw_expr = f"{model_table_alias}.{col_name}" if model_table_alias else col_name
1029-
select_cols.append(f"{raw_expr} AS {col_name}")
1049+
if model_table_alias:
1050+
raw_expr = f"{self._quote_identifier(model_table_alias)}.{self._quote_identifier(col_name)}"
1051+
else:
1052+
raw_expr = self._quote_identifier(col_name)
1053+
select_cols.append(f"{raw_expr} AS {self._quote_alias(col_name)}")
10301054
columns_added.add(col_name)
10311055

10321056
# Add measure columns (raw, not aggregated in CTE)
@@ -1142,7 +1166,7 @@ def collect_measures_from_metric(metric_ref: str, visited: set[str] | None = Non
11421166
else:
11431167
measure_sql = base_sql
11441168

1145-
select_cols.append(f"{measure_sql} AS {measure_name}_raw")
1169+
select_cols.append(f"{measure_sql} AS {self._quote_alias(f'{measure_name}_raw')}")
11461170

11471171
# Build FROM clause
11481172
if model.sql:
@@ -1174,7 +1198,10 @@ def collect_measures_from_metric(metric_ref: str, visited: set[str] | None = Non
11741198

11751199
# Build CTE
11761200
select_str = ",\n ".join(select_cols)
1177-
cte_sql = f"{model_name}_cte AS (\n SELECT\n {select_str}\n FROM {from_clause}{where_clause}\n)"
1201+
cte_sql = (
1202+
f"{self._quote_identifier(self._cte_name(model_name))} AS "
1203+
f"(\n SELECT\n {select_str}\n FROM {from_clause}{where_clause}\n)"
1204+
)
11781205

11791206
return cte_sql
11801207

@@ -1584,7 +1611,7 @@ def _build_main_select(
15841611
else:
15851612
alias = base_alias
15861613

1587-
select_exprs.append(f"{model_name}_cte.{cte_col_name} AS {alias}")
1614+
select_exprs.append(f"{self._cte_ref(model_name, cte_col_name)} AS {self._quote_alias(alias)}")
15881615

15891616
# Add metrics
15901617
for metric_ref in metrics:
@@ -1614,7 +1641,7 @@ def _build_main_select(
16141641
# Use complex metric builder
16151642
metric_expr = self._build_metric_sql(measure, model_name)
16161643
metric_expr = self._wrap_with_fill_nulls(metric_expr, measure)
1617-
select_exprs.append(f"{metric_expr} AS {alias}")
1644+
select_exprs.append(f"{metric_expr} AS {self._quote_alias(alias)}")
16181645
elif not measure.agg:
16191646
# Unknown metric type that needs special handling
16201647
raise ValueError(
@@ -1623,7 +1650,9 @@ def _build_main_select(
16231650
)
16241651
elif ungrouped:
16251652
# For ungrouped queries, select raw column without aggregation
1626-
select_exprs.append(f"{model_name}_cte.{measure_name}_raw AS {alias}")
1653+
select_exprs.append(
1654+
f"{self._cte_ref(model_name, f'{measure_name}_raw')} AS {self._quote_alias(alias)}"
1655+
)
16271656
else:
16281657
# Simple aggregation measures
16291658
# Check if this model needs symmetric aggregates
@@ -1650,26 +1679,26 @@ def _build_main_select(
16501679
# This ensures each metric's filter only affects that metric
16511680
agg_expr = self._build_measure_aggregation_sql(model_name, measure)
16521681

1653-
select_exprs.append(f"{agg_expr} AS {alias}")
1682+
select_exprs.append(f"{agg_expr} AS {self._quote_alias(alias)}")
16541683
else:
16551684
# Try as metric
16561685
metric = self.graph.get_metric(metric_ref)
16571686
if metric:
16581687
metric_expr = self._build_metric_sql(metric)
16591688
metric_expr = self._wrap_with_fill_nulls(metric_expr, metric)
1660-
select_exprs.append(f"{metric_expr} AS {metric.name}")
1689+
select_exprs.append(f"{metric_expr} AS {self._quote_alias(metric.name)}")
16611690
else:
16621691
# It's a metric reference (just metric name)
16631692
metric = self.graph.get_metric(metric_ref)
16641693
if metric:
16651694
metric_expr = self._build_metric_sql(metric)
16661695
metric_expr = self._wrap_with_fill_nulls(metric_expr, metric)
1667-
select_exprs.append(f"{metric_expr} AS {metric.name}")
1696+
select_exprs.append(f"{metric_expr} AS {self._quote_alias(metric.name)}")
16681697
else:
16691698
raise ValueError(f"Metric {metric_ref} not found")
16701699

16711700
# Build query using builder API
1672-
query = select(*select_exprs).from_(f"{base_model_name}_cte")
1701+
query = select(*select_exprs).from_(self._quote_identifier(self._cte_name(base_model_name)))
16731702

16741703
# Add joins (supports multi-hop)
16751704
if other_models:
@@ -1685,8 +1714,7 @@ def _build_main_select(
16851714
if jp.to_model in joined_models:
16861715
continue
16871716

1688-
left_table = jp.from_model + "_cte"
1689-
right_table = jp.to_model + "_cte"
1717+
right_table = self._quote_identifier(self._cte_name(jp.to_model))
16901718
# Validate column counts match for composite keys
16911719
if len(jp.from_columns) != len(jp.to_columns):
16921720
raise ValueError(
@@ -1695,7 +1723,8 @@ def _build_main_select(
16951723
)
16961724
# Build join condition for single or multi-column keys
16971725
join_conditions = [
1698-
f"{left_table}.{fk} = {right_table}.{pk}" for fk, pk in zip(jp.from_columns, jp.to_columns)
1726+
self._cte_ref(jp.from_model, fk) + " = " + self._cte_ref(jp.to_model, pk)
1727+
for fk, pk in zip(jp.from_columns, jp.to_columns)
16991728
]
17001729
join_cond = " AND ".join(join_conditions)
17011730

@@ -1769,10 +1798,10 @@ def replace_field(match):
17691798
field_name = match.group(1)
17701799
# Check if it's a measure
17711800
if model_obj.get_metric(field_name):
1772-
return f"{model_name}_cte.{field_name}_raw"
1801+
return self._cte_ref(model_name, f"{field_name}_raw")
17731802
else:
17741803
# It's a dimension or other column
1775-
return f"{model_name}_cte.{field_name}"
1804+
return self._cte_ref(model_name, field_name)
17761805

17771806
result_parts = []
17781807
for part, is_quoted in parts:
@@ -1914,12 +1943,16 @@ def _rewrite_model_refs_to_ctes(self, sql_expr: str) -> str:
19141943
continue
19151944
model_name = col.table.replace("_cte", "")
19161945
if model_name in self.graph.models:
1917-
col.set("table", exp.to_identifier(f"{model_name}_cte"))
1946+
cte_name = self._cte_name(model_name)
1947+
col.set("table", exp.to_identifier(cte_name, quoted=not self._is_simple_identifier(cte_name)))
19181948
return parsed.sql(dialect=self.dialect)
19191949
except Exception:
19201950
rewritten = sql_expr
19211951
for model_name in self.graph.models:
1922-
rewritten = rewritten.replace(f"{model_name}.", f"{model_name}_cte.")
1952+
rewritten = rewritten.replace(
1953+
f"{model_name}.",
1954+
f"{self._quote_identifier(self._cte_name(model_name))}.",
1955+
)
19231956
return rewritten
19241957

19251958
def _build_measure_aggregation_sql(self, model_name: str, measure) -> str:
@@ -1942,7 +1975,7 @@ def _build_measure_aggregation_sql(self, model_name: str, measure) -> str:
19421975
SQL aggregation expression string
19431976
"""
19441977
agg_func = measure.agg.upper()
1945-
raw_col = f"{model_name}_cte.{measure.name}_raw"
1978+
raw_col = self._cte_ref(model_name, f"{measure.name}_raw")
19461979

19471980
# Simple aggregation - filters are already applied in CTE's raw column
19481981
if agg_func == "COUNT_DISTINCT":
@@ -2059,7 +2092,7 @@ def resolve_ratio_ref(ref: str) -> str:
20592092
break
20602093

20612094
if metric_model_name:
2062-
cte_alias = f"{metric_model_name}_cte"
2095+
cte_alias = self._quote_identifier(self._cte_name(metric_model_name))
20632096
# Replace {model} placeholder with the CTE alias
20642097
formula = formula.replace("{model}", cte_alias)
20652098

0 commit comments

Comments
 (0)