Skip to content

Commit 68f4cd2

Browse files
Add support for including and ignoring columns in table comparisons and enhance CLI options
1 parent 8ce9c24 commit 68f4cd2

4 files changed

Lines changed: 183 additions & 9 deletions

File tree

sqlcompare/compare/comparator.py

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,59 @@ def __init__(self, connection: Any):
2121
self.cols_new: list[str] = []
2222
self.common_cols: list[str] = []
2323

24+
@staticmethod
25+
def _resolve_compare_columns(
26+
common_cols: Sequence[str],
27+
include_columns: Sequence[str] | None,
28+
ignore_columns: Sequence[str] | None,
29+
) -> list[str]:
30+
selected_cols = list(common_cols)
31+
32+
if include_columns:
33+
include_matches: list[str] = []
34+
missing_includes: list[str] = []
35+
seen: set[str] = set()
36+
for raw_col in include_columns:
37+
col = raw_col.strip()
38+
if not col:
39+
continue
40+
match = next((c for c in common_cols if c.upper() == col.upper()), None)
41+
if not match:
42+
missing_includes.append(raw_col)
43+
continue
44+
key = match.upper()
45+
if key in seen:
46+
continue
47+
seen.add(key)
48+
include_matches.append(match)
49+
if missing_includes:
50+
raise ValueError(
51+
"Included columns not found in both tables: "
52+
+ ", ".join(missing_includes)
53+
)
54+
selected_cols = include_matches
55+
56+
if ignore_columns:
57+
ignore_keys: set[str] = set()
58+
missing_ignores: list[str] = []
59+
for raw_col in ignore_columns:
60+
col = raw_col.strip()
61+
if not col:
62+
continue
63+
match = next((c for c in common_cols if c.upper() == col.upper()), None)
64+
if not match:
65+
missing_ignores.append(raw_col)
66+
continue
67+
ignore_keys.add(match.upper())
68+
if missing_ignores:
69+
raise ValueError(
70+
"Ignored columns not found in both tables: "
71+
+ ", ".join(missing_ignores)
72+
)
73+
selected_cols = [c for c in selected_cols if c.upper() not in ignore_keys]
74+
75+
return selected_cols
76+
2477
def _ensure_schema(self, db: DBConnection, test_schema: str) -> None:
2578
"""Ensure the test schema exists without changing the database context permanently."""
2679
try:
@@ -60,15 +113,19 @@ def from_saved(
60113
index_cols: Sequence[str],
61114
cols_prev: Sequence[str],
62115
cols_new: Sequence[str],
116+
common_cols: Sequence[str] | None = None,
63117
) -> "DatabaseComparator":
64118
inst = cls(connection)
65119
inst.tables = tables
66120
inst.index_cols = list(index_cols)
67121
inst.cols_prev = list(cols_prev)
68122
inst.cols_new = list(cols_new)
69-
inst.common_cols = [
70-
c for c in inst.cols_prev if c in inst.cols_new and c not in inst.index_cols
71-
]
123+
if common_cols is not None:
124+
inst.common_cols = list(common_cols)
125+
else:
126+
inst.common_cols = [
127+
c for c in inst.cols_prev if c in inst.cols_new and c not in inst.index_cols
128+
]
72129
return inst
73130

74131
# ------------------------------------------------------------------
@@ -157,6 +214,8 @@ def compare(
157214
index_cols: Sequence[str],
158215
test_name: str,
159216
test_schema: str = "sqlcompare",
217+
include_columns: Sequence[str] | None = None,
218+
ignore_columns: Sequence[str] | None = None,
160219
) -> str:
161220
"""Compare two tables in the database."""
162221
if not index_cols:
@@ -251,9 +310,14 @@ def compare(
251310
]
252311
)
253312
self.index_cols = valid_index_cols
254-
self.common_cols = [
313+
all_common_cols = [
255314
c for c in cols_prev if c in cols_new and c not in self.index_cols
256315
]
316+
self.common_cols = self._resolve_compare_columns(
317+
all_common_cols,
318+
include_columns=include_columns,
319+
ignore_columns=ignore_columns,
320+
)
257321

258322
# Build join table with side specific columns
259323
select_cols = [f'p."{c}" AS "{c}_previous"' for c in cols_prev]
@@ -365,6 +429,7 @@ def compare(
365429
"index_cols": list(self.index_cols),
366430
"cols_prev": self.cols_prev,
367431
"cols_new": self.cols_new,
432+
"common_cols": self.common_cols,
368433
"conn": conn_name,
369434
}
370435

sqlcompare/inspect.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@ def inspect_diff(
3636
idx = run.get("index_cols", [])
3737
cols_prev = run.get("cols_prev", [])
3838
cols_new = run.get("cols_new", [])
39+
common_cols = run.get("common_cols")
3940
if "duckdb_file" in run:
4041
db_file = run["duckdb_file"]
4142
comp = DatabaseComparator.from_saved(
42-
db_file, tables, idx, cols_prev, cols_new
43+
db_file, tables, idx, cols_prev, cols_new, common_cols
4344
)
4445
# Use DBConnection with duckdb URL
4546
conn_url = f"duckdb:///{db_file}"
@@ -105,7 +106,9 @@ def inspect_diff(
105106
)
106107
return
107108
conn = run["conn"]
108-
comp = DatabaseComparator.from_saved(conn, tables, idx, cols_prev, cols_new)
109+
comp = DatabaseComparator.from_saved(
110+
conn, tables, idx, cols_prev, cols_new, common_cols
111+
)
109112
with DBConnection(conn) as db:
110113
if stats:
111114
q = comp.get_stats_query(column=column)

sqlcompare/table.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ def compare_table(
1717
index: str,
1818
connection: str | None,
1919
schema: str | None,
20+
include_columns: str | None = None,
21+
ignore_columns: str | None = None,
2022
) -> None:
2123
"""Compare two tables in the database.
2224
@@ -64,8 +66,18 @@ def compare_table(
6466

6567
connection = connection_id
6668

67-
# Parse index columns
69+
# Parse index and comparison columns
6870
id_cols = [x.strip() for x in index.split(",")]
71+
include_cols = (
72+
[x.strip() for x in include_columns.split(",") if x.strip()]
73+
if include_columns
74+
else None
75+
)
76+
ignore_cols = (
77+
[x.strip() for x in ignore_columns.split(",") if x.strip()]
78+
if ignore_columns
79+
else None
80+
)
6981

7082
# Generate a test name based on tables
7183
safe_t1 = "".join(c if c.isalnum() else "_" for c in table1)
@@ -74,7 +86,15 @@ def compare_table(
7486

7587
# Run comparison
7688
comparator = DatabaseComparator(connection)
77-
diff_id = comparator.compare(table1, table2, id_cols, test_name, schema)
89+
diff_id = comparator.compare(
90+
table1,
91+
table2,
92+
id_cols,
93+
test_name,
94+
schema,
95+
include_columns=include_cols,
96+
ignore_columns=ignore_cols,
97+
)
7898
log.info(f"🔎 To review the diff, run: sqlcompare inspect {diff_id}")
7999
log.info(
80100
"💡 Tips: --stats for per-column counts, --missing-current/--missing-previous for row-only, "
@@ -94,6 +114,16 @@ def table_cmd(
94114
None, "--connection", "-c", help="Database connector name"
95115
),
96116
schema: str | None = typer.Option(None, "--schema", help="Schema for test tables"),
117+
columns: str | None = typer.Option(
118+
None,
119+
"--columns",
120+
help="Comma-separated non-index columns to compare (default: all common columns)",
121+
),
122+
ignore_columns: str | None = typer.Option(
123+
None,
124+
"--ignore-columns",
125+
help="Comma-separated non-index columns to skip from comparison",
126+
),
97127
) -> None:
98128
"""Compare two database tables or CSV/XLSX files.
99129
@@ -121,4 +151,12 @@ def table_cmd(
121151
2. SQLCOMPARE_CONN_DEFAULT environment variable
122152
3. For files: auto-creates temporary DuckDB instance
123153
"""
124-
compare_table(table1, table2, index, connection, schema)
154+
compare_table(
155+
table1,
156+
table2,
157+
index,
158+
connection,
159+
schema,
160+
include_columns=columns,
161+
ignore_columns=ignore_columns,
162+
)

tests/test_cli_table.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,71 @@ def test_table_command_with_files(tmp_path, monkeypatch) -> None:
6060
assert "inspect" in result.output
6161
runs = load_test_runs()
6262
assert len(runs) == 1
63+
64+
65+
def test_table_command_ignore_columns_option(tmp_path, monkeypatch) -> None:
66+
db_path = tmp_path / "sqlcompare.duckdb"
67+
seed_duckdb(db_path)
68+
config_dir = tmp_path / "config"
69+
set_cli_env(
70+
monkeypatch,
71+
config_dir,
72+
"duckdb_test",
73+
f"duckdb:///{db_path}",
74+
schema="analysis_schema",
75+
)
76+
runner = CliRunner()
77+
78+
result = runner.invoke(
79+
app,
80+
[
81+
"table",
82+
"previous",
83+
"current",
84+
"id",
85+
"--connection",
86+
"duckdb_test",
87+
"--ignore-columns",
88+
"value",
89+
],
90+
)
91+
92+
assert result.exit_code == 0, result.output
93+
runs = load_test_runs()
94+
assert len(runs) == 1
95+
run = next(iter(runs.values()))
96+
assert run["common_cols"] == ["name"]
97+
98+
99+
def test_table_command_columns_option(tmp_path, monkeypatch) -> None:
100+
db_path = tmp_path / "sqlcompare.duckdb"
101+
seed_duckdb(db_path)
102+
config_dir = tmp_path / "config"
103+
set_cli_env(
104+
monkeypatch,
105+
config_dir,
106+
"duckdb_test",
107+
f"duckdb:///{db_path}",
108+
schema="analysis_schema",
109+
)
110+
runner = CliRunner()
111+
112+
result = runner.invoke(
113+
app,
114+
[
115+
"table",
116+
"previous",
117+
"current",
118+
"id",
119+
"--connection",
120+
"duckdb_test",
121+
"--columns",
122+
"value",
123+
],
124+
)
125+
126+
assert result.exit_code == 0, result.output
127+
runs = load_test_runs()
128+
assert len(runs) == 1
129+
run = next(iter(runs.values()))
130+
assert run["common_cols"] == ["value"]

0 commit comments

Comments
 (0)