Skip to content

Commit d6d3d93

Browse files
feat: Add support for column type comparison in schema checks and reports
1 parent 369fa57 commit d6d3d93

6 files changed

Lines changed: 84 additions & 2 deletions

File tree

sqlcompare/db/connection.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Any
88
from urllib.parse import parse_qs, urlparse
99

10-
from sqlalchemy import text
10+
from sqlalchemy import inspect, text
1111
from sqlalchemy.engine import Connection, Engine, Result
1212
from sqlalchemy.exc import SQLAlchemyError
1313

@@ -261,6 +261,26 @@ def get_table_columns(self, table_name: str) -> list[str]:
261261
_, columns = self.query(f"SELECT * FROM {table_name} WHERE 1=0", include_columns=True)
262262
return columns
263263

264+
def get_table_column_types(self, table_name: str) -> dict[str, str | None]:
265+
"""
266+
Get column types keyed by the actual column name as stored in the database.
267+
"""
268+
inspector = inspect(self.conn)
269+
schema = None
270+
table = table_name
271+
if "." in table_name:
272+
schema, table = table_name.rsplit(".", 1)
273+
schema = schema.strip('"')
274+
table = table.strip('"')
275+
276+
columns = inspector.get_columns(table, schema=schema)
277+
return {
278+
str(column["name"]): (
279+
None if column.get("type") is None else str(column["type"]).upper()
280+
)
281+
for column in columns
282+
}
283+
264284
def query(
265285
self,
266286
sql: str,

sqlcompare/stats/checks.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,22 @@ def run_row_count_check(context: StatsContext, definition: CheckDefinition) -> C
139139
def run_schema_check(context: StatsContext, definition: CheckDefinition) -> CheckResult:
140140
previous_only = context.previous_only_columns
141141
current_only = context.current_only_columns
142-
has_differences = bool(previous_only or current_only)
142+
type_changes = [
143+
(
144+
pair.previous_name,
145+
pair.previous_type or "unknown",
146+
pair.current_type or "unknown",
147+
)
148+
for pair in context.common_columns
149+
if (pair.previous_type or "unknown").upper() != (pair.current_type or "unknown").upper()
150+
]
151+
has_differences = bool(previous_only or current_only or type_changes)
143152
common_count = len(context.common_columns)
144153
if has_differences:
145154
summary = (
146155
f"{len(previous_only)} only in previous, "
147156
f"{len(current_only)} only in current, "
157+
f"{len(type_changes)} type changes, "
148158
f"{common_count} common columns."
149159
)
150160
else:
@@ -157,6 +167,7 @@ def run_schema_check(context: StatsContext, definition: CheckDefinition) -> Chec
157167
metadata={
158168
"previous_only_columns": previous_only,
159169
"current_only_columns": current_only,
170+
"type_changes": type_changes,
160171
"common_count": common_count,
161172
},
162173
)

sqlcompare/stats/models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ class ColumnPair:
99
key: str
1010
previous_name: str
1111
current_name: str
12+
previous_type: str | None = None
13+
current_type: str | None = None
1214

1315

1416
@dataclass
@@ -21,6 +23,8 @@ class StatsContext:
2123
common_columns: list[ColumnPair]
2224
previous_only_columns: list[str]
2325
current_only_columns: list[str]
26+
previous_column_types: dict[str, str | None]
27+
current_column_types: dict[str, str | None]
2428

2529

2630
@dataclass

sqlcompare/stats/report.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,23 @@ def _status_cell(metric: dict) -> str:
2424
return f"{previous} -> {current} ({pct_diff})"
2525

2626

27+
def _type_status_cell(previous_type: str | None, current_type: str | None) -> str:
28+
previous = previous_type or "unknown"
29+
current = current_type or "unknown"
30+
31+
if previous.upper() == current.upper():
32+
return f"{current} (no change)"
33+
34+
return f"{previous} -> {current}"
35+
36+
2737
def _build_column_comparison_table(
2838
context: StatsContext,
2939
selected_check_names: list[str],
3040
results_by_name: dict[str, CheckResult],
3141
) -> tuple[list[str], list[tuple]]:
3242
columns = ["COL"]
43+
columns.append("TYPE")
3344
rows: list[tuple] = []
3445

3546
include_nulls = "nulls" in selected_check_names and "nulls" in results_by_name
@@ -57,6 +68,7 @@ def _build_column_comparison_table(
5768

5869
for pair in context.common_columns:
5970
row = [pair.previous_name]
71+
row.append(_type_status_cell(pair.previous_type, pair.current_type))
6072
similarities: list[float] = []
6173

6274
if row_count_similarity is not None:
@@ -107,12 +119,19 @@ def render_report(
107119
lines.append(result.summary or "")
108120
prev_only = result.metadata.get("previous_only_columns", [])
109121
curr_only = result.metadata.get("current_only_columns", [])
122+
type_changes = result.metadata.get("type_changes", [])
110123
if prev_only:
111124
lines.append("Only in previous:")
112125
lines.extend(f" - {col}" for col in prev_only)
113126
if curr_only:
114127
lines.append("Only in current:")
115128
lines.extend(f" - {col}" for col in curr_only)
129+
if type_changes:
130+
lines.append("Type changes:")
131+
lines.extend(
132+
f" - {column}: {previous_type} -> {current_type}"
133+
for column, previous_type, current_type in type_changes
134+
)
116135
continue
117136

118137
if result.skipped_reason:

sqlcompare/stats/runner.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def _resolve_checks(checks: str | None) -> list[str]:
6464
def _build_context(db: DBConnection, previous_name: str, current_name: str) -> StatsContext:
6565
previous_columns = db.get_table_columns(previous_name)
6666
current_columns = db.get_table_columns(current_name)
67+
previous_column_types = db.get_table_column_types(previous_name)
68+
current_column_types = db.get_table_column_types(current_name)
6769
previous_map = {column.upper(): column for column in previous_columns}
6870
current_map = {column.upper(): column for column in current_columns}
6971

@@ -72,6 +74,8 @@ def _build_context(db: DBConnection, previous_name: str, current_name: str) -> S
7274
key=key,
7375
previous_name=previous_map[key],
7476
current_name=current_map[key],
77+
previous_type=previous_column_types.get(previous_map[key]),
78+
current_type=current_column_types.get(current_map[key]),
7579
)
7680
for key in previous_map
7781
if key in current_map
@@ -92,6 +96,8 @@ def _build_context(db: DBConnection, previous_name: str, current_name: str) -> S
9296
common_columns=common_columns,
9397
previous_only_columns=previous_only_columns,
9498
current_only_columns=current_only_columns,
99+
previous_column_types=previous_column_types,
100+
current_column_types=current_column_types,
95101
)
96102

97103

tests/test_stats_comparison.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def test_table_command_with_stats(tmp_path, monkeypatch) -> None:
4242
assert "Previous Current Diff % Diff" in result.output
4343
assert " 2 2 0 0.0%" in result.output
4444
assert "COL" in result.output
45+
assert "TYPE" in result.output
4546
assert "MATCH" in result.output
4647
assert "0 (no change)" in result.output
4748
assert "Schemas match on column names (3 common columns)." in result.output
@@ -71,8 +72,10 @@ def test_table_command_with_stats_from_files() -> None:
7172
assert "Column comparison:" in result.output
7273
assert "NULL" in result.output
7374
assert "DUP" in result.output
75+
assert "TYPE" in result.output
7476
assert "MATCH" in result.output
7577
assert "notes" in result.output
78+
assert "VARCHAR (no change)" in result.output
7679
assert "2 -> 1 (-50%)" in result.output
7780
assert "0 (no change)" in result.output
7881

@@ -223,6 +226,25 @@ def test_stats_command_reports_schema_differences(tmp_path) -> None:
223226
assert "Column comparison:" in result.output
224227

225228

229+
def test_stats_command_reports_type_changes_in_column_comparison(tmp_path) -> None:
230+
previous = tmp_path / "previous.csv"
231+
current = tmp_path / "current.csv"
232+
previous.write_text("id,code\n1,100\n2,200\n", encoding="utf-8")
233+
current.write_text("id,code\n1,A100\n2,B200\n", encoding="utf-8")
234+
runner = CliRunner()
235+
236+
result = runner.invoke(app, ["run", "stats", str(previous), str(current)])
237+
238+
assert result.exit_code == 0, result.output
239+
assert "Schema differences:" in result.output
240+
assert "Type changes:" in result.output
241+
assert "code: BIGINT -> VARCHAR" in result.output
242+
assert "Column comparison:" in result.output
243+
assert "TYPE" in result.output
244+
assert "BIGINT -> VARCHAR" in result.output
245+
assert "BIGINT (no change)" in result.output
246+
247+
226248
def test_stats_command_skips_column_checks_when_no_common_columns(tmp_path) -> None:
227249
previous = tmp_path / "previous.csv"
228250
current = tmp_path / "current.csv"

0 commit comments

Comments
 (0)