|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import json |
| 4 | +import typer |
| 5 | + |
| 6 | +from sqlcompare.analysis.utils import find_diff_file, find_diff_run, list_available_diffs |
| 7 | +from sqlcompare.log import log |
| 8 | + |
| 9 | + |
| 10 | +def list_diff_queries(diff_id: str) -> None: |
| 11 | + """List queryable tables for a diff run along with their contents.""" |
| 12 | + run = find_diff_run(diff_id) |
| 13 | + if not run: |
| 14 | + diff_file = find_diff_file(diff_id) |
| 15 | + if diff_file: |
| 16 | + log.error( |
| 17 | + "❌ Diff data found, but it is a pickle-based diff without queryable tables." |
| 18 | + ) |
| 19 | + else: |
| 20 | + log.error(f"❌ Diff data with ID '{diff_id}' not found.") |
| 21 | + log.info("💡 Available diff IDs:") |
| 22 | + list_available_diffs() |
| 23 | + return |
| 24 | + |
| 25 | + resolved_id = run.get("id", diff_id) |
| 26 | + tables = run.get("tables") |
| 27 | + if not tables: |
| 28 | + log.error("❌ Diff metadata missing table definitions.") |
| 29 | + return |
| 30 | + |
| 31 | + conn_label = run.get("conn") |
| 32 | + duckdb_file = run.get("duckdb_file") |
| 33 | + if duckdb_file: |
| 34 | + connection_display = f"duckdb:///{duckdb_file}" |
| 35 | + else: |
| 36 | + connection_display = conn_label or "(default connection)" |
| 37 | + |
| 38 | + cols_prev = run.get("cols_prev", []) |
| 39 | + cols_new = run.get("cols_new", []) |
| 40 | + index_cols = run.get("index_cols", []) |
| 41 | + common_cols = run.get("common_cols") |
| 42 | + |
| 43 | + payload = _build_llm_payload( |
| 44 | + diff_id=diff_id, |
| 45 | + resolved_id=resolved_id, |
| 46 | + connection_display=connection_display, |
| 47 | + tables=tables, |
| 48 | + cols_prev=cols_prev, |
| 49 | + cols_new=cols_new, |
| 50 | + index_cols=index_cols, |
| 51 | + common_cols=common_cols, |
| 52 | + ) |
| 53 | + log.info(json.dumps(payload, indent=2)) |
| 54 | + |
| 55 | + |
| 56 | +def _format_columns(columns: list[str]) -> list[str]: |
| 57 | + return columns |
| 58 | + |
| 59 | + |
| 60 | +def _build_llm_payload( |
| 61 | + *, |
| 62 | + diff_id: str, |
| 63 | + resolved_id: str, |
| 64 | + connection_display: str, |
| 65 | + tables: dict[str, str], |
| 66 | + cols_prev: list[str], |
| 67 | + cols_new: list[str], |
| 68 | + index_cols: list[str], |
| 69 | + common_cols: list[str] | None, |
| 70 | +) -> dict[str, object]: |
| 71 | + join_columns = [f"{col}_previous" for col in cols_prev] + [ |
| 72 | + f"{col}_new" for col in cols_new |
| 73 | + ] |
| 74 | + entries = [ |
| 75 | + { |
| 76 | + "name": tables.get("previous"), |
| 77 | + "role": "previous", |
| 78 | + "content": "Previous dataset (original table)", |
| 79 | + "columns": _format_columns(cols_prev), |
| 80 | + }, |
| 81 | + { |
| 82 | + "name": tables.get("new"), |
| 83 | + "role": "current", |
| 84 | + "content": "Current dataset (new table)", |
| 85 | + "columns": _format_columns(cols_new), |
| 86 | + }, |
| 87 | + { |
| 88 | + "name": tables.get("join"), |
| 89 | + "role": "join", |
| 90 | + "content": "Full outer join on index columns with _previous/_new suffixes", |
| 91 | + "columns": _format_columns(join_columns), |
| 92 | + }, |
| 93 | + ] |
| 94 | + queries = _build_queries(tables, index_cols) |
| 95 | + return { |
| 96 | + "diff_id": diff_id, |
| 97 | + "resolved_diff_id": resolved_id, |
| 98 | + "connection": connection_display, |
| 99 | + "index_columns": index_cols, |
| 100 | + "common_columns": common_cols or [], |
| 101 | + "tables": entries, |
| 102 | + "queries": queries, |
| 103 | + } |
| 104 | + |
| 105 | + |
| 106 | +def _build_queries(tables: dict[str, str], index_cols: list[str]) -> list[dict[str, str]]: |
| 107 | + join_table = tables.get("join") |
| 108 | + if not join_table or not index_cols: |
| 109 | + return [] |
| 110 | + |
| 111 | + prev_null_cond = " AND ".join([f'\"{c}_previous\" IS NULL' for c in index_cols]) |
| 112 | + new_null_cond = " AND ".join([f'\"{c}_new\" IS NULL' for c in index_cols]) |
| 113 | + idx_expr = ", ".join( |
| 114 | + [f'COALESCE(\"{c}_new\", \"{c}_previous\") AS \"{c}\"' for c in index_cols] |
| 115 | + ) |
| 116 | + col_placeholder = "<column>" |
| 117 | + diff_cond = ( |
| 118 | + f'NOT (\"{col_placeholder}_previous\" = \"{col_placeholder}_new\" OR ' |
| 119 | + f'(\"{col_placeholder}_previous\" IS NULL AND \"{col_placeholder}_new\" IS NULL))' |
| 120 | + f" AND NOT ({prev_null_cond}) AND NOT ({new_null_cond})" |
| 121 | + ) |
| 122 | + |
| 123 | + def _query(name: str, sql: str) -> dict[str, str]: |
| 124 | + return {"name": name, "sql": sql} |
| 125 | + |
| 126 | + return [ |
| 127 | + _query( |
| 128 | + "rows_only_in_current", |
| 129 | + f"SELECT * FROM {join_table} WHERE {prev_null_cond};", |
| 130 | + ), |
| 131 | + _query( |
| 132 | + "rows_only_in_previous", |
| 133 | + f"SELECT * FROM {join_table} WHERE {new_null_cond};", |
| 134 | + ), |
| 135 | + _query( |
| 136 | + "count_rows_only_in_current", |
| 137 | + f"SELECT COUNT(*) AS rows_only_in_current FROM {join_table} WHERE {prev_null_cond};", |
| 138 | + ), |
| 139 | + _query( |
| 140 | + "count_rows_only_in_previous", |
| 141 | + f"SELECT COUNT(*) AS rows_only_in_previous FROM {join_table} WHERE {new_null_cond};", |
| 142 | + ), |
| 143 | + _query( |
| 144 | + "rows_with_column_differences", |
| 145 | + f"SELECT {idx_expr}, '{col_placeholder}' AS \"COLUMN\", " |
| 146 | + f'CAST(\"{col_placeholder}_previous\" AS VARCHAR) AS \"BEFORE\", ' |
| 147 | + f'CAST(\"{col_placeholder}_new\" AS VARCHAR) AS \"CURRENT\" ' |
| 148 | + f"FROM {join_table} WHERE {diff_cond};", |
| 149 | + ), |
| 150 | + _query( |
| 151 | + "count_column_differences", |
| 152 | + f"SELECT COUNT(*) AS diff_count FROM {join_table} WHERE {diff_cond};", |
| 153 | + ), |
| 154 | + _query( |
| 155 | + "top_10_diff_samples", |
| 156 | + f"""SELECT {idx_expr}, "COLUMN", "BEFORE", "CURRENT" |
| 157 | +FROM ( |
| 158 | + SELECT {idx_expr}, '{col_placeholder}' AS "COLUMN", |
| 159 | + CAST("{col_placeholder}_previous" AS VARCHAR) AS "BEFORE", |
| 160 | + CAST("{col_placeholder}_new" AS VARCHAR) AS "CURRENT" |
| 161 | + FROM {join_table} WHERE {diff_cond} |
| 162 | +) AS diffs |
| 163 | +LIMIT 10;""", |
| 164 | + ), |
| 165 | + ] |
| 166 | + |
| 167 | + |
| 168 | +def diff_queries_cmd( |
| 169 | + diff_id: str = typer.Argument(..., help="Diff run ID"), |
| 170 | +) -> None: |
| 171 | + """List queryable tables and SQL templates for a diff run. |
| 172 | +
|
| 173 | + Examples: |
| 174 | + sqlcompare diff-queries <diff_id> |
| 175 | + """ |
| 176 | + list_diff_queries(diff_id) |
0 commit comments