Skip to content

Commit c7e2aa5

Browse files
committed
feat: attribute $N placeholders to table.column via use EXPLAIN
1 parent 7097a99 commit c7e2aa5

3 files changed

Lines changed: 261 additions & 7 deletions

File tree

cluster.go

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,23 @@ type (
1414
}
1515

1616
Cluster struct {
17-
Fingerprint string `json:"fingerprint"`
18-
Canonical string `json:"canonical"`
19-
Members []Query `json:"members"`
20-
TotalCalls int64 `json:"total_calls"`
21-
TotalExecTimeMs float64 `json:"total_exec_time_ms,omitempty"`
22-
MeanExecTimeMs float64 `json:"mean_exec_time_ms,omitempty"`
23-
Rows int64 `json:"rows,omitempty"`
17+
Fingerprint string `json:"fingerprint"`
18+
Canonical string `json:"canonical"`
19+
Members []Query `json:"members"`
20+
TotalCalls int64 `json:"total_calls"`
21+
TotalExecTimeMs float64 `json:"total_exec_time_ms,omitempty"`
22+
MeanExecTimeMs float64 `json:"mean_exec_time_ms,omitempty"`
23+
Rows int64 `json:"rows,omitempty"`
24+
Params []ParamAttribution `json:"params,omitempty"`
25+
}
26+
27+
ParamAttribution struct {
28+
Position int `json:"position"`
29+
Schema string `json:"schema,omitempty"`
30+
Table string `json:"table,omitempty"`
31+
Column string `json:"column,omitempty"`
32+
Confidence string `json:"confidence"`
33+
Note string `json:"note,omitempty"`
2434
}
2535
)
2636

cmd/qshape/attribute.go

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"os"
9+
"regexp"
10+
"sort"
11+
"strconv"
12+
13+
"github.com/boringsql/qshape"
14+
"github.com/jackc/pgx/v5"
15+
"github.com/spf13/cobra"
16+
)
17+
18+
type (
19+
clustersDoc struct {
20+
Clusters []qshape.Cluster `json:"clusters"`
21+
}
22+
23+
attrCtx struct {
24+
byPosition map[int]*qshape.ParamAttribution
25+
}
26+
27+
tableRef struct {
28+
Schema string
29+
Table string
30+
}
31+
32+
planNode struct {
33+
NodeType string `json:"Node Type"`
34+
Schema string `json:"Schema"`
35+
RelationName string `json:"Relation Name"`
36+
Alias string `json:"Alias"`
37+
Filter string `json:"Filter"`
38+
IndexCond string `json:"Index Cond"`
39+
HashCond string `json:"Hash Cond"`
40+
RecheckCond string `json:"Recheck Cond"`
41+
JoinFilter string `json:"Join Filter"`
42+
MergeCond string `json:"Merge Cond"`
43+
SubplanName string `json:"Subplan Name"`
44+
Plans json.RawMessage `json:"Plans"`
45+
}
46+
)
47+
48+
var (
49+
// column op $N or $N op column — alias.column optional
50+
paramCondRE = regexp.MustCompile(`(?:\(?(\w+)\.)?(\w+)\s*(?:=|<|>|<=|>=|<>|!=)\s*\$(\d+)|\$(\d+)\s*(?:=|<|>|<=|>=|<>|!=)\s*(?:\(?(\w+)\.)?(\w+)`)
51+
// column IN ($N, $M, ...) — capture only the first param and the column
52+
paramInRE = regexp.MustCompile(`(?:\(?(\w+)\.)?(\w+)\s+(?:=\s*ANY\s*\()?IN\s*\(\s*\$(\d+)`)
53+
)
54+
55+
func attributeCmd() *cobra.Command {
56+
var (
57+
inPath string
58+
connStr string
59+
top int
60+
)
61+
cmd := &cobra.Command{
62+
Use: "attribute",
63+
Short: "Attribute $N placeholders to table.column",
64+
Long: `Read a clusters.json, run EXPLAIN (GENERIC_PLAN) on each cluster's
65+
canonical SQL, and attribute every $N placeholder to a table.column.
66+
67+
Attribution failures are recorded as confidence:"none" rather than
68+
aborting. Writes the input to stdout with a "params" array added to
69+
each cluster.`,
70+
Args: cobra.NoArgs,
71+
RunE: func(_ *cobra.Command, _ []string) error {
72+
return runAttribute(inPath, connStr, top)
73+
},
74+
}
75+
cmd.Flags().StringVar(&inPath, "in", "", "input clusters.json (default: stdin)")
76+
cmd.Flags().StringVar(&connStr, "conn", "", "PostgreSQL connection string (required)")
77+
cmd.Flags().IntVar(&top, "top", 0, "only attribute the top N clusters (0 = all)")
78+
_ = cmd.MarkFlagRequired("conn")
79+
return cmd
80+
}
81+
82+
func runAttribute(inPath, connStr string, top int) error {
83+
var r io.Reader = os.Stdin
84+
if inPath != "" {
85+
f, err := os.Open(inPath)
86+
if err != nil {
87+
return err
88+
}
89+
defer f.Close()
90+
r = f
91+
}
92+
var doc clustersDoc
93+
if err := json.NewDecoder(r).Decode(&doc); err != nil {
94+
return fmt.Errorf("decode clusters.json: %w", err)
95+
}
96+
97+
ctx := context.Background()
98+
conn, err := pgx.Connect(ctx, connStr)
99+
if err != nil {
100+
return fmt.Errorf("connect: %w", err)
101+
}
102+
defer conn.Close(ctx)
103+
104+
attributed, skipped := 0, 0
105+
for i := range doc.Clusters {
106+
if top > 0 && i >= top {
107+
break
108+
}
109+
c := &doc.Clusters[i]
110+
if c.Fingerprint == "" || c.Canonical == "" {
111+
continue
112+
}
113+
params, err := attributeCluster(ctx, conn, c.Canonical)
114+
if err != nil {
115+
skipped++
116+
c.Params = []qshape.ParamAttribution{{Confidence: "none", Note: err.Error()}}
117+
continue
118+
}
119+
if len(params) == 0 {
120+
skipped++
121+
continue
122+
}
123+
c.Params = params
124+
attributed++
125+
}
126+
127+
fmt.Fprintf(os.Stderr, "attributed %d clusters, %d skipped\n", attributed, skipped)
128+
enc := json.NewEncoder(os.Stdout)
129+
enc.SetIndent("", " ")
130+
return enc.Encode(doc)
131+
}
132+
133+
func attributeCluster(ctx context.Context, conn *pgx.Conn, canonical string) ([]qshape.ParamAttribution, error) {
134+
var planJSON []byte
135+
row := conn.QueryRow(ctx, "EXPLAIN (GENERIC_PLAN, FORMAT JSON) "+canonical)
136+
if err := row.Scan(&planJSON); err != nil {
137+
return nil, err
138+
}
139+
140+
var plans []struct {
141+
Plan json.RawMessage `json:"Plan"`
142+
}
143+
if err := json.Unmarshal(planJSON, &plans); err != nil {
144+
return nil, err
145+
}
146+
if len(plans) == 0 {
147+
return nil, nil
148+
}
149+
150+
c := &attrCtx{byPosition: map[int]*qshape.ParamAttribution{}}
151+
walkPlan(plans[0].Plan, "", "", c)
152+
153+
out := make([]qshape.ParamAttribution, 0, len(c.byPosition))
154+
for _, p := range c.byPosition {
155+
out = append(out, *p)
156+
}
157+
sort.Slice(out, func(i, j int) bool { return out[i].Position < out[j].Position })
158+
return out, nil
159+
}
160+
161+
func walkPlan(raw json.RawMessage, parentSchema, parentTable string, ctx *attrCtx) {
162+
if len(raw) == 0 {
163+
return
164+
}
165+
var n planNode
166+
if err := json.Unmarshal(raw, &n); err != nil {
167+
return
168+
}
169+
170+
// Track alias → table mapping so we can resolve `u.id = $1` to users.id
171+
aliasToTable := map[string]tableRef{}
172+
if n.RelationName != "" {
173+
t := tableRef{Schema: n.Schema, Table: n.RelationName}
174+
aliasToTable[n.RelationName] = t
175+
if n.Alias != "" && n.Alias != n.RelationName {
176+
aliasToTable[n.Alias] = t
177+
}
178+
}
179+
180+
for _, cond := range []string{n.IndexCond, n.HashCond, n.Filter, n.RecheckCond, n.JoinFilter, n.MergeCond} {
181+
if cond == "" {
182+
continue
183+
}
184+
attributeCond(cond, aliasToTable, n.Schema, n.RelationName, ctx)
185+
}
186+
187+
if len(n.Plans) > 0 {
188+
var children []json.RawMessage
189+
if err := json.Unmarshal(n.Plans, &children); err == nil {
190+
for _, c := range children {
191+
walkPlan(c, n.Schema, n.RelationName, ctx)
192+
}
193+
}
194+
}
195+
}
196+
197+
func attributeCond(cond string, aliases map[string]tableRef, fallbackSchema, fallbackTable string, ctx *attrCtx) {
198+
for _, m := range paramCondRE.FindAllStringSubmatch(cond, -1) {
199+
// Two alternatives in the regex: [1]=alias,[2]=col,[3]=pos OR [4]=pos,[5]=alias,[6]=col
200+
var aliasOrTable, col, posStr string
201+
if m[3] != "" {
202+
aliasOrTable, col, posStr = m[1], m[2], m[3]
203+
} else {
204+
aliasOrTable, col, posStr = m[5], m[6], m[4]
205+
}
206+
recordParam(aliasOrTable, col, posStr, aliases, fallbackSchema, fallbackTable, ctx)
207+
}
208+
for _, m := range paramInRE.FindAllStringSubmatch(cond, -1) {
209+
recordParam(m[1], m[2], m[3], aliases, fallbackSchema, fallbackTable, ctx)
210+
}
211+
}
212+
213+
func recordParam(aliasOrTable, col, posStr string, aliases map[string]tableRef, fallbackSchema, fallbackTable string, ctx *attrCtx) {
214+
pos, err := strconv.Atoi(posStr)
215+
if err != nil {
216+
return
217+
}
218+
// Prefer higher-confidence attribution if we already saw this param.
219+
existing, already := ctx.byPosition[pos]
220+
if already && existing.Confidence == "exact" {
221+
return
222+
}
223+
224+
ref, ok := aliases[aliasOrTable]
225+
confidence := "exact"
226+
if !ok {
227+
// Bare column without an alias — attribute to the current relation.
228+
if fallbackTable != "" {
229+
ref = tableRef{Schema: fallbackSchema, Table: fallbackTable}
230+
confidence = "heuristic"
231+
} else {
232+
confidence = "none"
233+
}
234+
}
235+
236+
ctx.byPosition[pos] = &qshape.ParamAttribution{
237+
Position: pos,
238+
Schema: ref.Schema,
239+
Table: ref.Table,
240+
Column: col,
241+
Confidence: confidence,
242+
}
243+
}

cmd/qshape/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ func Run() error {
1616
normalizeCmd(),
1717
fingerprintCmd(),
1818
captureCmd(),
19+
attributeCmd(),
1920
)
2021
return root.Execute()
2122
}

0 commit comments

Comments
 (0)