Skip to content

Commit 1919826

Browse files
committed
Add environment variable substitution support for YAML files
- Support ${ENV_VAR}, ${ENV_VAR:-default}, and $ENV_VAR syntax - Environment variables substituted when YAML files are loaded - Works in connection strings, table names, and any YAML value - Update SemanticLayer.from_yaml() to read connection from YAML file - Add 11 comprehensive tests for env var substitution - Document env var usage in config.qmd and connections.qmd - Compatible with Docker Compose and common YAML env var patterns
1 parent 1277709 commit 1919826

6 files changed

Lines changed: 366 additions & 6 deletions

File tree

docs/config.qmd

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Basic semantic layer YAML file:
1212
# semantic_layer.yml
1313

1414
# Database connection (optional, defaults to in-memory DuckDB)
15+
# Supports environment variable substitution
1516
connection: duckdb:///data/analytics.duckdb
1617

1718
# Models define your tables and metrics
@@ -77,6 +78,47 @@ connection: databricks://token@server/http-path?catalog=main
7778
connection: spark://host:10000/database
7879
```
7980
81+
### Environment Variables in YAML
82+
83+
Use environment variables for sensitive credentials:
84+
85+
```yaml
86+
# Use ${ENV_VAR} syntax
87+
connection: postgres://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:5432/${DB_NAME}
88+
89+
# With default values
90+
connection: duckdb:///${DB_FILE:-/tmp/default.duckdb}
91+
92+
# Simple form (uppercase vars only)
93+
connection: $DATABASE_URL
94+
```
95+
96+
**Supported syntax:**
97+
98+
- `${ENV_VAR}` - Substituted with environment variable value
99+
- `${ENV_VAR:-default}` - Use default if variable not set
100+
- `$ENV_VAR` - Simple form (uppercase variables only)
101+
102+
**Example:**
103+
104+
```bash
105+
# Set environment variables
106+
export DB_USER=analyst
107+
export DB_PASSWORD=secret
108+
export DB_HOST=localhost
109+
export DB_NAME=analytics
110+
```
111+
112+
```yaml
113+
# semantic_layer.yml
114+
connection: postgres://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:5432/${DB_NAME}
115+
116+
models:
117+
- name: orders
118+
table: ${SCHEMA_NAME:-public}.orders
119+
primary_key: order_id
120+
```
121+
80122
See **[Database Connections](connections.qmd)** for complete connection string reference.
81123

82124
### Override via CLI

docs/connections.qmd

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,57 @@ sidemantic query models/ \
290290

291291
Store credentials in environment variables for security. Never commit credentials to version control.
292292

293+
Environment variables can be used in:
294+
- **YAML configuration files** - Using `${VAR}` syntax
295+
- **CLI flags** - Using shell variables like `$VAR`
296+
297+
### In YAML Files
298+
299+
Sidemantic supports environment variable substitution in YAML files using standard syntax:
300+
301+
**Syntax:**
302+
- `${ENV_VAR}` - Substituted with environment variable value
303+
- `${ENV_VAR:-default}` - Use default if variable not set
304+
- `$ENV_VAR` - Simple form (uppercase variables only)
305+
306+
**Example YAML:**
307+
```yaml
308+
# semantic_layer.yml
309+
connection: postgres://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:5432/${DB_NAME}
310+
311+
models:
312+
- name: orders
313+
table: ${SCHEMA_NAME:-public}.orders
314+
primary_key: order_id
315+
```
316+
317+
**Set environment variables:**
318+
```bash
319+
export DB_USER=analyst
320+
export DB_PASSWORD=secret
321+
export DB_HOST=localhost
322+
export DB_NAME=analytics
323+
export SCHEMA_NAME=prod
324+
```
325+
326+
**Load configuration:**
327+
```bash
328+
sidemantic query semantic_layer.yml --sql "SELECT revenue FROM orders"
329+
```
330+
331+
The environment variables are substituted when the YAML file is loaded.
332+
333+
### In CLI Flags
334+
335+
Use shell environment variables in CLI commands:
336+
337+
```bash
338+
export DATABASE_URL=postgres://user:pass@localhost:5432/analytics
339+
340+
sidemantic query models/ --connection "$DATABASE_URL" \
341+
--sql "SELECT revenue FROM orders"
342+
```
343+
293344
### Full Connection Strings
294345

295346
Store complete connection string in an environment variable:

sidemantic/adapters/sidemantic.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Sidemantic native YAML adapter with SQL syntax support."""
22

3+
import os
4+
import re
35
from pathlib import Path
46

57
import yaml
@@ -14,6 +16,59 @@
1416
from sidemantic.core.sql_definitions import parse_sql_definitions, parse_sql_file_with_frontmatter, parse_sql_model
1517

1618

19+
def substitute_env_vars(content: str) -> str:
20+
"""Substitute environment variables in YAML content.
21+
22+
Supports:
23+
- ${ENV_VAR} - replaced with environment variable value
24+
- ${ENV_VAR:-default} - replaced with value or default if not set
25+
- $ENV_VAR - simple form without braces
26+
27+
Args:
28+
content: YAML content string
29+
30+
Returns:
31+
Content with environment variables substituted
32+
33+
Examples:
34+
>>> os.environ['DB_HOST'] = 'localhost'
35+
>>> substitute_env_vars('host: ${DB_HOST}')
36+
'host: localhost'
37+
>>> substitute_env_vars('host: ${MISSING:-default}')
38+
'host: default'
39+
"""
40+
# Pattern for ${ENV_VAR} or ${ENV_VAR:-default}
41+
def replace_var(match):
42+
var_expr = match.group(1)
43+
# Check for default value syntax: VAR_NAME:-default
44+
if ":-" in var_expr:
45+
var_name, default = var_expr.split(":-", 1)
46+
return os.environ.get(var_name, default)
47+
else:
48+
var_name = var_expr
49+
value = os.environ.get(var_name)
50+
if value is None:
51+
# Keep original if not found (don't fail, let user handle missing vars)
52+
return match.group(0)
53+
return value
54+
55+
# Replace ${VAR} and ${VAR:-default}
56+
content = re.sub(r"\$\{([^}]+)\}", replace_var, content)
57+
58+
# Replace $VAR (simple form, no braces)
59+
# Only match valid environment variable names (alphanumeric + underscore)
60+
def replace_simple_var(match):
61+
var_name = match.group(1)
62+
value = os.environ.get(var_name)
63+
if value is None:
64+
return match.group(0)
65+
return value
66+
67+
content = re.sub(r"\$([A-Z_][A-Z0-9_]*)", replace_simple_var, content)
68+
69+
return content
70+
71+
1772
class SidemanticAdapter(BaseAdapter):
1873
"""Adapter for Sidemantic native YAML format.
1974
@@ -83,7 +138,12 @@ def parse(self, source: str | Path) -> SemanticGraph:
83138

84139
# Handle YAML files
85140
with open(source_path) as f:
86-
data = yaml.safe_load(f)
141+
content = f.read()
142+
143+
# Substitute environment variables
144+
content = substitute_env_vars(content)
145+
146+
data = yaml.safe_load(content)
87147

88148
if not data:
89149
return graph

sidemantic/core/semantic_layer.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -365,22 +365,38 @@ def get_catalog_metadata(self, schema: str = "public") -> dict:
365365
return get_catalog_metadata(self.graph, schema=schema)
366366

367367
@classmethod
368-
def from_yaml(cls, path: str | Path, connection: str = "duckdb:///:memory:") -> SemanticLayer:
368+
def from_yaml(cls, path: str | Path, connection: str | None = None) -> SemanticLayer:
369369
"""Load semantic layer from native YAML file.
370370
371371
Args:
372372
path: Path to YAML file
373-
connection: Database connection string
373+
connection: Database connection string (overrides connection in YAML file)
374374
375375
Returns:
376376
SemanticLayer instance
377377
"""
378-
from sidemantic.adapters.sidemantic import SidemanticAdapter
378+
import yaml
379+
380+
from sidemantic.adapters.sidemantic import SidemanticAdapter, substitute_env_vars
379381

380382
adapter = SidemanticAdapter()
381383
graph = adapter.parse(path)
382384

383-
layer = cls(connection=connection)
385+
# If connection not provided as parameter, try to read from YAML file
386+
if connection is None:
387+
with open(path) as f:
388+
content = f.read()
389+
# Substitute environment variables
390+
content = substitute_env_vars(content)
391+
data = yaml.safe_load(content)
392+
if data and "connection" in data:
393+
connection = data["connection"]
394+
395+
# Create layer with connection (or use default if still None)
396+
if connection:
397+
layer = cls(connection=connection)
398+
else:
399+
layer = cls()
384400
layer.graph = graph
385401

386402
return layer

0 commit comments

Comments
 (0)