Skip to content

Commit d2e07e1

Browse files
Add diff-queries command to list queryable tables and SQL templates for a diff run
1 parent 7af1bc6 commit d2e07e1

5 files changed

Lines changed: 259 additions & 3 deletions

File tree

README.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,11 @@ That command prints a **diff_id**. Use it for follow-up analysis:
5858
sqlcompare inspect <diff_id> --stats
5959
sqlcompare inspect <diff_id> --column revenue --limit 100
6060
sqlcompare inspect <diff_id> --missing-current
61+
sqlcompare diff-queries <diff_id>
6162
sqlcompare inspect <diff_id> --save summary
6263
sqlcompare inspect <diff_id> --save complete --file-path ./reports/full_diff.xlsx
6364
```
6465

65-
---
66-
6766
## Inspect report export (XLSX)
6867

6968
You can export inspect results as a multi-tab Excel report using `--save`.
@@ -94,6 +93,23 @@ Notes:
9493

9594
---
9695

96+
## diff-queries (AI-friendly metadata)
97+
98+
Use `diff-queries` to get a JSON payload describing the queryable tables and ready-to-run SQL templates for a given `diff_id`. This is especially useful for AI agents that need structured context before running analysis queries.
99+
100+
```bash
101+
sqlcompare diff-queries <diff_id>
102+
```
103+
104+
Output (JSON):
105+
106+
* `diff_id`, `resolved_diff_id`, `connection`
107+
* `index_columns`, `common_columns`
108+
* `tables`: previous/current/join table names and their columns
109+
* `queries`: SQL templates for row-only checks and per-column diffs
110+
111+
---
112+
97113
## Example outputs
98114

99115
See [`examples/`](examples/) for datasets, commands, and captured outputs.

sqlcompare/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import typer
44

55
from sqlcompare.dataset import dataset_cmd
6+
from sqlcompare.diff_queries import diff_queries_cmd
67
from sqlcompare.run_cmd import run_cmd
78
from sqlcompare.inspect import inspect_cmd
89
from sqlcompare.list_diffs import list_diffs_cmd
@@ -16,6 +17,7 @@
1617
app.command("table")(table_cmd)
1718
app.command("run")(run_cmd)
1819
app.command("inspect")(inspect_cmd)
20+
app.command("diff-queries")(diff_queries_cmd)
1921
app.command("stats")(stats_cmd)
2022
app.command("list-diffs")(list_diffs_cmd)
2123
app.command("query")(query_cmd)

sqlcompare/diff_queries.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
from __future__ import annotations
2+
3+
import json
4+
import typer
5+
6+
from sqlcompare.analysis.utils import find_diff_file, find_diff_run, list_available_diffs
7+
from sqlcompare.log import log
8+
9+
10+
def list_diff_queries(diff_id: str) -> None:
11+
"""List queryable tables for a diff run along with their contents."""
12+
run = find_diff_run(diff_id)
13+
if not run:
14+
diff_file = find_diff_file(diff_id)
15+
if diff_file:
16+
log.error(
17+
"❌ Diff data found, but it is a pickle-based diff without queryable tables."
18+
)
19+
else:
20+
log.error(f"❌ Diff data with ID '{diff_id}' not found.")
21+
log.info("💡 Available diff IDs:")
22+
list_available_diffs()
23+
return
24+
25+
resolved_id = run.get("id", diff_id)
26+
tables = run.get("tables")
27+
if not tables:
28+
log.error("❌ Diff metadata missing table definitions.")
29+
return
30+
31+
conn_label = run.get("conn")
32+
duckdb_file = run.get("duckdb_file")
33+
if duckdb_file:
34+
connection_display = f"duckdb:///{duckdb_file}"
35+
else:
36+
connection_display = conn_label or "(default connection)"
37+
38+
cols_prev = run.get("cols_prev", [])
39+
cols_new = run.get("cols_new", [])
40+
index_cols = run.get("index_cols", [])
41+
common_cols = run.get("common_cols")
42+
43+
payload = _build_llm_payload(
44+
diff_id=diff_id,
45+
resolved_id=resolved_id,
46+
connection_display=connection_display,
47+
tables=tables,
48+
cols_prev=cols_prev,
49+
cols_new=cols_new,
50+
index_cols=index_cols,
51+
common_cols=common_cols,
52+
)
53+
log.info(json.dumps(payload, indent=2))
54+
55+
56+
def _format_columns(columns: list[str]) -> list[str]:
57+
return columns
58+
59+
60+
def _build_llm_payload(
61+
*,
62+
diff_id: str,
63+
resolved_id: str,
64+
connection_display: str,
65+
tables: dict[str, str],
66+
cols_prev: list[str],
67+
cols_new: list[str],
68+
index_cols: list[str],
69+
common_cols: list[str] | None,
70+
) -> dict[str, object]:
71+
join_columns = [f"{col}_previous" for col in cols_prev] + [
72+
f"{col}_new" for col in cols_new
73+
]
74+
entries = [
75+
{
76+
"name": tables.get("previous"),
77+
"role": "previous",
78+
"content": "Previous dataset (original table)",
79+
"columns": _format_columns(cols_prev),
80+
},
81+
{
82+
"name": tables.get("new"),
83+
"role": "current",
84+
"content": "Current dataset (new table)",
85+
"columns": _format_columns(cols_new),
86+
},
87+
{
88+
"name": tables.get("join"),
89+
"role": "join",
90+
"content": "Full outer join on index columns with _previous/_new suffixes",
91+
"columns": _format_columns(join_columns),
92+
},
93+
]
94+
queries = _build_queries(tables, index_cols)
95+
return {
96+
"diff_id": diff_id,
97+
"resolved_diff_id": resolved_id,
98+
"connection": connection_display,
99+
"index_columns": index_cols,
100+
"common_columns": common_cols or [],
101+
"tables": entries,
102+
"queries": queries,
103+
}
104+
105+
106+
def _build_queries(tables: dict[str, str], index_cols: list[str]) -> list[dict[str, str]]:
107+
join_table = tables.get("join")
108+
if not join_table or not index_cols:
109+
return []
110+
111+
prev_null_cond = " AND ".join([f'\"{c}_previous\" IS NULL' for c in index_cols])
112+
new_null_cond = " AND ".join([f'\"{c}_new\" IS NULL' for c in index_cols])
113+
idx_expr = ", ".join(
114+
[f'COALESCE(\"{c}_new\", \"{c}_previous\") AS \"{c}\"' for c in index_cols]
115+
)
116+
col_placeholder = "<column>"
117+
diff_cond = (
118+
f'NOT (\"{col_placeholder}_previous\" = \"{col_placeholder}_new\" OR '
119+
f'(\"{col_placeholder}_previous\" IS NULL AND \"{col_placeholder}_new\" IS NULL))'
120+
f" AND NOT ({prev_null_cond}) AND NOT ({new_null_cond})"
121+
)
122+
123+
def _query(name: str, sql: str) -> dict[str, str]:
124+
return {"name": name, "sql": sql}
125+
126+
return [
127+
_query(
128+
"rows_only_in_current",
129+
f"SELECT * FROM {join_table} WHERE {prev_null_cond};",
130+
),
131+
_query(
132+
"rows_only_in_previous",
133+
f"SELECT * FROM {join_table} WHERE {new_null_cond};",
134+
),
135+
_query(
136+
"count_rows_only_in_current",
137+
f"SELECT COUNT(*) AS rows_only_in_current FROM {join_table} WHERE {prev_null_cond};",
138+
),
139+
_query(
140+
"count_rows_only_in_previous",
141+
f"SELECT COUNT(*) AS rows_only_in_previous FROM {join_table} WHERE {new_null_cond};",
142+
),
143+
_query(
144+
"rows_with_column_differences",
145+
f"SELECT {idx_expr}, '{col_placeholder}' AS \"COLUMN\", "
146+
f'CAST(\"{col_placeholder}_previous\" AS VARCHAR) AS \"BEFORE\", '
147+
f'CAST(\"{col_placeholder}_new\" AS VARCHAR) AS \"CURRENT\" '
148+
f"FROM {join_table} WHERE {diff_cond};",
149+
),
150+
_query(
151+
"count_column_differences",
152+
f"SELECT COUNT(*) AS diff_count FROM {join_table} WHERE {diff_cond};",
153+
),
154+
_query(
155+
"top_10_diff_samples",
156+
f"""SELECT {idx_expr}, "COLUMN", "BEFORE", "CURRENT"
157+
FROM (
158+
SELECT {idx_expr}, '{col_placeholder}' AS "COLUMN",
159+
CAST("{col_placeholder}_previous" AS VARCHAR) AS "BEFORE",
160+
CAST("{col_placeholder}_new" AS VARCHAR) AS "CURRENT"
161+
FROM {join_table} WHERE {diff_cond}
162+
) AS diffs
163+
LIMIT 10;""",
164+
),
165+
]
166+
167+
168+
def diff_queries_cmd(
169+
diff_id: str = typer.Argument(..., help="Diff run ID"),
170+
) -> None:
171+
"""List queryable tables and SQL templates for a diff run.
172+
173+
Examples:
174+
sqlcompare diff-queries <diff_id>
175+
"""
176+
list_diff_queries(diff_id)

sqlcompare/table.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ def compare_table(
101101
)
102102
log.info(
103103
"💡 Tips: --stats for per-column counts, --missing-current/--missing-previous for row-only, "
104-
"--column <name> to filter, --list-columns to inspect available fields."
104+
"--column <name> to filter, --list-columns to inspect available fields, "
105+
f"and sqlcompare diff-queries {diff_id} for queryable tables and SQL templates (useful for AI agents)."
105106
)
106107

107108

tests/test_cli_diff_queries.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from __future__ import annotations
2+
3+
from pathlib import Path
4+
5+
import json
6+
7+
from typer.testing import CliRunner
8+
9+
from sqlcompare.cli import app
10+
from sqlcompare.config import load_test_runs
11+
from tests.cli_helpers import seed_duckdb, set_cli_env
12+
13+
14+
def _create_diff(tmp_path: Path, monkeypatch) -> str:
15+
db_path = tmp_path / "sqlcompare.duckdb"
16+
seed_duckdb(db_path)
17+
config_dir = tmp_path / "config"
18+
set_cli_env(
19+
monkeypatch,
20+
config_dir,
21+
"duckdb_test",
22+
f"duckdb:///{db_path}",
23+
)
24+
runner = CliRunner()
25+
result = runner.invoke(
26+
app,
27+
[
28+
"table",
29+
"previous",
30+
"current",
31+
"id",
32+
"--connection",
33+
"duckdb_test",
34+
],
35+
)
36+
assert result.exit_code == 0, result.output
37+
runs = load_test_runs()
38+
assert len(runs) == 1
39+
return next(iter(runs.keys()))
40+
41+
42+
def test_diff_queries_lists_queryable_tables(tmp_path, monkeypatch) -> None:
43+
diff_id = _create_diff(tmp_path, monkeypatch)
44+
runner = CliRunner()
45+
46+
result = runner.invoke(app, ["diff-queries", diff_id])
47+
assert result.exit_code == 0, result.output
48+
payload = json.loads(result.output)
49+
assert payload["diff_id"] == diff_id
50+
assert payload["tables"]
51+
table_names = [entry["name"] for entry in payload["tables"]]
52+
assert "previous" in table_names
53+
assert "current" in table_names
54+
assert any(name and name.endswith("_join") for name in table_names)
55+
assert payload["index_columns"] == ["id"]
56+
assert "name" in payload["common_columns"]
57+
assert "value" in payload["common_columns"]
58+
join_entry = next(entry for entry in payload["tables"] if entry["role"] == "join")
59+
assert "id_previous" in join_entry["columns"]
60+
assert "value_new" in join_entry["columns"]
61+
assert payload["queries"]

0 commit comments

Comments
 (0)