Skip to content

Commit fb2863c

Browse files
Refactor repository structure and enhance CLI commands for dataset comparison and analysis
1 parent 5cad26b commit fb2863c

9 files changed

Lines changed: 319 additions & 246 deletions

File tree

AGENTS.md

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,32 @@
1-
# AGENTS.md
1+
# Repository Guidelines
22

3-
## Scope
4-
These instructions apply to the entire repository.
3+
## Project Structure & Module Organization
4+
- `sqlcompare/` contains the core library and CLI entry points.
5+
- `tests/` holds pytest-based tests (files named `test_*.py`).
6+
- `examples/` includes sample inputs and demo usage.
7+
- `scripts/` provides helper scripts for local workflows.
8+
- `build/` contains build artifacts and is not a source of truth.
59

6-
## Initial setup
7-
- Install uv if not installed
8-
- Install dev dependencies with: `uv sync --extra dev`
10+
## Build, Test, and Development Commands
11+
- `uv sync --extra dev`: install dev dependencies for local work.
12+
- `uv run pytest`: run the full test suite.
13+
- `uv run python -m sqlcompare`: run the CLI module directly for quick checks.
914

10-
## Testing
11-
- Run tests with: `uv run pytest`
12-
- Always run tests after making a change.
13-
- If tests are not run, state the reason in the final response.
15+
## Coding Style & Naming Conventions
16+
- Python style: follow PEP 8 with 4-space indentation.
17+
- Prefer explicit, descriptive names (for example, `dataset_path`, `compare_stats`).
18+
- Avoid non-ASCII unless a file already uses it.
1419

15-
## CLI conventions
16-
- CLI help text should use user-facing terms such as "connector" (not "adapter").
17-
- Any new CLI option or behavior change should be reflected in help strings.
20+
## Testing Guidelines
21+
- Framework: pytest.
22+
- Test files: `tests/test_*.py`; test functions: `test_*`.
23+
- Run `uv run pytest` after any change; note in your summary if tests are skipped.
24+
25+
## Commit & Pull Request Guidelines
26+
- No explicit commit format is enforced in this repo.
27+
- Use clear, imperative commit messages (for example, “Add CSV diff output”).
28+
- PRs should describe the change, include test results, and link related issues when available.
29+
30+
## Configuration & Environment Tips
31+
- Ensure `uv` is installed before syncing dependencies.
32+
- Keep local data and credentials out of the repo; use environment variables if needed.
Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
1+
from __future__ import annotations
2+
13
import os
24

3-
from sqlcompare.log import log
4-
from sqlcompare.db import DBConnection
5-
from ..analysis.utils import (
5+
import typer
6+
7+
from sqlcompare.analysis.utils import (
68
_display,
79
find_diff_file,
810
find_diff_run,
911
list_available_diffs,
1012
)
11-
from .comparator import DatabaseComparator
13+
from sqlcompare.compare.comparator import DatabaseComparator
14+
from sqlcompare.db import DBConnection
15+
from sqlcompare.log import log
1216

1317

1418
def analyze_diff(
@@ -21,28 +25,11 @@ def analyze_diff(
2125
missing_current: bool = False,
2226
missing_previous: bool = False,
2327
) -> None:
24-
run_analysis(
25-
diff_id,
26-
column=column,
27-
limit=limit,
28-
save=save,
29-
list_columns=list_columns,
30-
stats=stats,
31-
missing_current=missing_current,
32-
missing_previous=missing_previous,
33-
)
34-
28+
"""
29+
Analyze diff results from a previous comparison run.
3530
36-
def run_analysis(
37-
diff_id,
38-
column=None,
39-
limit=25,
40-
save=False,
41-
list_columns=False,
42-
stats=False,
43-
missing_current=False,
44-
missing_previous=False,
45-
):
31+
Supports database-backed diffs and legacy pickle files.
32+
"""
4633
run = find_diff_run(diff_id)
4734
if run:
4835
tables = run["tables"]
@@ -212,3 +199,38 @@ def run_analysis(
212199
return
213200

214201
log.error("❌ Pickle-based diff files are not supported without pandas.")
202+
203+
204+
def analyze_diff_cmd(
205+
diff_id: str = typer.Argument(..., help="Diff run ID"),
206+
column: str | None = typer.Option(
207+
None, "--column", "-c", help="Filter by specific column name"
208+
),
209+
limit: int = typer.Option(25, "--limit", "-l", help="Limit results to display"),
210+
save: bool = typer.Option(
211+
False, "--save", help="Save filtered results to CSV file"
212+
),
213+
list_columns: bool = typer.Option(
214+
False, "--list-columns", help="List available columns in the diff data"
215+
),
216+
stats: bool = typer.Option(
217+
False, "--stats", help="Show statistics table instead of differences"
218+
),
219+
missing_current: bool = typer.Option(
220+
False, "--missing-current", help="Show rows only in current dataset"
221+
),
222+
missing_previous: bool = typer.Option(
223+
False, "--missing-previous", help="Show rows only in previous dataset"
224+
),
225+
) -> None:
226+
"""Analyze diff results from a previous comparison run."""
227+
analyze_diff(
228+
diff_id,
229+
column=column,
230+
limit=limit,
231+
save=save,
232+
list_columns=list_columns,
233+
stats=stats,
234+
missing_current=missing_current,
235+
missing_previous=missing_previous,
236+
)

sqlcompare/cli.py

Lines changed: 11 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -2,108 +2,20 @@
22

33
import typer
44

5-
from sqlcompare.compare.analyze import analyze_diff
6-
from sqlcompare.compare.listing import list_diffs
7-
from sqlcompare.compare.table import compare_table
8-
from sqlcompare.dataset import compare_dataset
9-
from sqlcompare.query import query
5+
from sqlcompare.analyze_diff import analyze_diff_cmd
6+
from sqlcompare.dataset import dataset_cmd
7+
from sqlcompare.list_diffs import list_diffs_cmd
8+
from sqlcompare.query import query_cmd
9+
from sqlcompare.table import table_cmd
1010

1111
app = typer.Typer(help="Compare database tables and inspect diffs.")
1212

13-
14-
@app.command("table")
15-
def table(
16-
table1: str = typer.Argument(
17-
..., help="Previous table name or CSV/XLSX file path"
18-
),
19-
table2: str = typer.Argument(..., help="Current table name or CSV/XLSX file path"),
20-
ids: str | None = typer.Argument(
21-
None, help="Comma-separated list of key columns (required unless --stats)"
22-
),
23-
connection: str | None = typer.Option(
24-
None, "--connection", "-c", help="Database connector name"
25-
),
26-
schema: str | None = typer.Option(None, "--schema", help="Schema for test tables"),
27-
stats: bool = typer.Option(
28-
False, "--stats", help="Compare tables statistically without joining rows"
29-
),
30-
) -> None:
31-
compare_table(table1, table2, ids, connection, schema, stats=stats)
32-
33-
34-
@app.command("analyze-diff")
35-
def analyze(
36-
diff_id: str = typer.Argument(..., help="Diff run ID"),
37-
column: str | None = typer.Option(
38-
None, "--column", "-c", help="Filter by specific column name"
39-
),
40-
limit: int = typer.Option(25, "--limit", "-l", help="Limit results to display"),
41-
save: bool = typer.Option(
42-
False, "--save", help="Save filtered results to CSV file"
43-
),
44-
list_columns: bool = typer.Option(
45-
False, "--list-columns", help="List available columns in the diff data"
46-
),
47-
stats: bool = typer.Option(
48-
False, "--stats", help="Show statistics table instead of differences"
49-
),
50-
missing_current: bool = typer.Option(
51-
False, "--missing-current", help="Show rows only in current dataset"
52-
),
53-
missing_previous: bool = typer.Option(
54-
False, "--missing-previous", help="Show rows only in previous dataset"
55-
),
56-
) -> None:
57-
analyze_diff(
58-
diff_id,
59-
column=column,
60-
limit=limit,
61-
save=save,
62-
list_columns=list_columns,
63-
stats=stats,
64-
missing_current=missing_current,
65-
missing_previous=missing_previous,
66-
)
67-
68-
69-
@app.command("list-diffs")
70-
def list_diffs_cmd(
71-
pattern: str | None = typer.Argument(None, help="Match diff IDs"),
72-
test: str | None = typer.Option(None, "--test", help="Filter by test name"),
73-
) -> None:
74-
list_diffs(pattern, test)
75-
76-
77-
@app.command("query")
78-
def query_cmd(
79-
q: str = typer.Argument(..., help="SQL query to run"),
80-
connection: str | None = typer.Option(
81-
None, "--connection", "-c", "--conn", help="Connector name"
82-
),
83-
output: str = typer.Option(
84-
"terminal",
85-
"--output",
86-
"-o",
87-
help="Output format or file path. Use 'terminal' or provide a .csv filename",
88-
),
89-
) -> None:
90-
query(q, connection, output)
91-
92-
93-
@app.command("dataset")
94-
def dataset_cmd(
95-
path: str = typer.Argument(..., help="Path to dataset YAML file"),
96-
connection: str | None = typer.Option(
97-
None,
98-
"--connection",
99-
"-c",
100-
help="Database connector name (optional when both datasets use file_name)",
101-
),
102-
schema: str | None = typer.Option(
103-
None, "--schema", help="Schema for dataset tables"
104-
),
105-
) -> None:
106-
compare_dataset(path, connection, schema)
13+
# Register all commands
14+
app.command("table")(table_cmd)
15+
app.command("analyze-diff")(analyze_diff_cmd)
16+
app.command("list-diffs")(list_diffs_cmd)
17+
app.command("query")(query_cmd)
18+
app.command("dataset")(dataset_cmd)
10719

10820

10921
def main() -> None:

sqlcompare/compare/__init__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
1-
from .analyze import analyze_diff
2-
from .table import compare_table
31
from .comparator import DatabaseComparator
4-
from .listing import list_diffs
52
from .materialize import compare_queries_in_db
63

74
__all__ = [
85
"DatabaseComparator",
9-
"analyze_diff",
10-
"compare_table",
116
"compare_queries_in_db",
12-
"list_diffs",
137
]

0 commit comments

Comments
 (0)