Skip to content

Commit 6f9df64

Browse files
committed
feat: regresql-stub generates sql/ for regresql
1 parent c7e2aa5 commit 6f9df64

2 files changed

Lines changed: 324 additions & 0 deletions

File tree

cmd/qshape/regresql_stub.go

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"io"
7+
"os"
8+
"path/filepath"
9+
"regexp"
10+
"sort"
11+
"strconv"
12+
"strings"
13+
14+
"github.com/boringsql/qshape"
15+
"github.com/spf13/cobra"
16+
)
17+
18+
type fixtureDoc struct {
19+
Tables map[string]struct {
20+
Columns []string `json:"columns"`
21+
Rows [][]any `json:"rows"`
22+
} `json:"tables"`
23+
}
24+
25+
var paramRE = regexp.MustCompile(`\$(\d+)`)
26+
27+
func regresqlStubCmd() *cobra.Command {
28+
var (
29+
inPath string
30+
outDir string
31+
top int
32+
minCalls int64
33+
fixturePath string
34+
samplesPer int
35+
)
36+
cmd := &cobra.Command{
37+
Use: "regresql-stub",
38+
Short: "Generate regresql sql/ + plans/ skeletons from clusters.json",
39+
Long: `Walk a clusters.json and emit regresql sql/ + plans/ skeletons for
40+
the top N clusters.
41+
42+
Each cluster becomes a .sql file (canonical SQL with $N → :paramN) and
43+
a YAML plan with REPLACE_ME test cases. If --fixture is set and the
44+
clusters were first run through 'qshape attribute', plan YAMLs are
45+
auto-populated with real sampled values.`,
46+
Args: cobra.NoArgs,
47+
RunE: func(_ *cobra.Command, _ []string) error {
48+
return runRegresqlStub(inPath, outDir, top, minCalls, fixturePath, samplesPer)
49+
},
50+
}
51+
cmd.Flags().StringVar(&inPath, "in", "", "input clusters.json (default: stdin)")
52+
cmd.Flags().StringVar(&outDir, "out", "regresql-stubs", "output directory")
53+
cmd.Flags().IntVar(&top, "top", 10, "number of top clusters to emit")
54+
cmd.Flags().Int64Var(&minCalls, "min-calls", 0, "skip clusters with total_calls <= this")
55+
cmd.Flags().StringVar(&fixturePath, "fixture", "", "fixturize-format JSON used to auto-fill plan YAMLs")
56+
cmd.Flags().IntVar(&samplesPer, "samples", 2, "test cases to emit per query (sampled from fixture if available)")
57+
return cmd
58+
}
59+
60+
func loadFixture(path string) (*fixtureDoc, error) {
61+
f, err := os.Open(path)
62+
if err != nil {
63+
return nil, err
64+
}
65+
defer f.Close()
66+
var fix fixtureDoc
67+
if err := json.NewDecoder(f).Decode(&fix); err != nil {
68+
return nil, fmt.Errorf("decode fixture: %w", err)
69+
}
70+
return &fix, nil
71+
}
72+
73+
// sampleValues returns up to n non-null values from schema.table.column.
74+
// Returns nil if the table or column isn't present in the fixture.
75+
func (f *fixtureDoc) sampleValues(schema, table, column string, n int) []any {
76+
if f == nil {
77+
return nil
78+
}
79+
keys := []string{schema + "." + table, table, "public." + table}
80+
var tbl *struct {
81+
Columns []string `json:"columns"`
82+
Rows [][]any `json:"rows"`
83+
}
84+
for _, k := range keys {
85+
if t, ok := f.Tables[k]; ok {
86+
t := t
87+
tbl = &t
88+
break
89+
}
90+
}
91+
if tbl == nil {
92+
return nil
93+
}
94+
idx := -1
95+
for i, c := range tbl.Columns {
96+
if c == column {
97+
idx = i
98+
break
99+
}
100+
}
101+
if idx < 0 {
102+
return nil
103+
}
104+
out := make([]any, 0, n)
105+
for _, row := range tbl.Rows {
106+
if idx >= len(row) {
107+
continue
108+
}
109+
v := row[idx]
110+
if v == nil {
111+
continue
112+
}
113+
out = append(out, v)
114+
if len(out) >= n {
115+
break
116+
}
117+
}
118+
return out
119+
}
120+
121+
func runRegresqlStub(inPath, outDir string, top int, minCalls int64, fixturePath string, samplesPer int) error {
122+
// TODO: plans/ generation is temporarily disabled — only sql/ stubs are created
123+
_, _ = fixturePath, samplesPer
124+
// var fix *fixtureDoc
125+
// if fixturePath != "" {
126+
// var err error
127+
// fix, err = loadFixture(fixturePath)
128+
// if err != nil {
129+
// return err
130+
// }
131+
// }
132+
// if samplesPer < 1 {
133+
// samplesPer = 1
134+
// }
135+
doc, err := loadClustersDoc(inPath)
136+
if err != nil {
137+
return err
138+
}
139+
140+
sqlDir := filepath.Join(outDir, "sql")
141+
if err := os.MkdirAll(sqlDir, 0o755); err != nil {
142+
return err
143+
}
144+
// plansDir := filepath.Join(outDir, "plans")
145+
// if err := os.MkdirAll(plansDir, 0o755); err != nil {
146+
// return err
147+
// }
148+
149+
emitted := 0
150+
for _, c := range doc.Clusters {
151+
if emitted >= top {
152+
break
153+
}
154+
if c.Fingerprint == "" {
155+
continue
156+
}
157+
if c.TotalCalls <= minCalls {
158+
continue
159+
}
160+
161+
emitted++
162+
slug := stubSlug(emitted, c.Fingerprint)
163+
sql, _ := rewriteParams(c.Canonical)
164+
165+
sqlPath := filepath.Join(sqlDir, slug+".sql")
166+
if err := writeSQLStub(sqlPath, slug, c, sql); err != nil {
167+
return err
168+
}
169+
// planPath := filepath.Join(plansDir, slug+"_"+slug+".yaml")
170+
// values := sampleValuesForParams(params, c.Params, fix, samplesPer)
171+
// if err := writePlanStub(planPath, params, values); err != nil {
172+
// return err
173+
// }
174+
}
175+
176+
fmt.Fprintf(os.Stderr, "wrote %d stubs to %s\n", emitted, outDir)
177+
return nil
178+
}
179+
180+
func stubSlug(rank int, fp string) string {
181+
prefix := strings.TrimPrefix(fp, "sha1:")
182+
if len(prefix) > 8 {
183+
prefix = prefix[:8]
184+
}
185+
return fmt.Sprintf("q%02d-%s", rank, prefix)
186+
}
187+
188+
// rewriteParams replaces $N with :paramN and returns the sorted unique
189+
// param names discovered. $1 → :param1, $2 → :param2, ...
190+
func rewriteParams(sql string) (string, []string) {
191+
seen := map[string]struct{}{}
192+
out := paramRE.ReplaceAllStringFunc(sql, func(m string) string {
193+
n := m[1:]
194+
name := "param" + n
195+
seen[name] = struct{}{}
196+
return ":" + name
197+
})
198+
names := make([]string, 0, len(seen))
199+
for n := range seen {
200+
names = append(names, n)
201+
}
202+
sort.Slice(names, func(i, j int) bool {
203+
ni, _ := strconv.Atoi(strings.TrimPrefix(names[i], "param"))
204+
nj, _ := strconv.Atoi(strings.TrimPrefix(names[j], "param"))
205+
return ni < nj
206+
})
207+
return out, names
208+
}
209+
210+
func writeSQLStub(path, slug string, c qshape.Cluster, sql string) error {
211+
var b strings.Builder
212+
fmt.Fprintf(&b, "-- name: %s\n", slug)
213+
fmt.Fprintf(&b, "-- Generated from qshape cluster %s\n", c.Fingerprint)
214+
fmt.Fprintf(&b, "-- Total calls (prod): %d across %d member variants\n", c.TotalCalls, len(c.Members))
215+
fmt.Fprintf(&b, "-- TODO: rename this slug, review canonical SQL, replace :paramN with meaningful names\n")
216+
b.WriteString(sql)
217+
if !strings.HasSuffix(sql, "\n") {
218+
b.WriteString("\n")
219+
}
220+
return os.WriteFile(path, []byte(b.String()), 0o644)
221+
}
222+
223+
// writePlanStub maps paramN → list of sample values (one per test case).
224+
// If values[p] is empty or absent, the plan uses REPLACE_ME.
225+
func writePlanStub(path string, params []string, values map[string][]any) error {
226+
var b strings.Builder
227+
if len(params) == 0 {
228+
b.WriteString("\"1\": {}\n")
229+
return os.WriteFile(path, []byte(b.String()), 0o644)
230+
}
231+
232+
numCases := 2
233+
for _, vs := range values {
234+
if len(vs) > numCases {
235+
numCases = len(vs)
236+
}
237+
}
238+
for i := 0; i < numCases; i++ {
239+
fmt.Fprintf(&b, "%q:\n", strconv.Itoa(i+1))
240+
for _, p := range params {
241+
vs := values[p]
242+
if i < len(vs) {
243+
fmt.Fprintf(&b, " %s: %s\n", p, yamlScalar(vs[i]))
244+
} else {
245+
fmt.Fprintf(&b, " %s: REPLACE_ME\n", p)
246+
}
247+
}
248+
b.WriteString("\n")
249+
}
250+
return os.WriteFile(path, []byte(b.String()), 0o644)
251+
}
252+
253+
// sampleValuesForParams maps paramN → []sample-values, using the cluster's
254+
// attribution + fixture. Params without attribution get no values (caller
255+
// emits REPLACE_ME).
256+
func sampleValuesForParams(paramNames []string, attrs []qshape.ParamAttribution, fix *fixtureDoc, n int) map[string][]any {
257+
out := map[string][]any{}
258+
if fix == nil || len(attrs) == 0 {
259+
return out
260+
}
261+
byPos := map[int]qshape.ParamAttribution{}
262+
for _, a := range attrs {
263+
byPos[a.Position] = a
264+
}
265+
for _, pname := range paramNames {
266+
posStr := strings.TrimPrefix(pname, "param")
267+
pos, err := strconv.Atoi(posStr)
268+
if err != nil {
269+
continue
270+
}
271+
a, ok := byPos[pos]
272+
if !ok || a.Table == "" || a.Column == "" {
273+
continue
274+
}
275+
vals := fix.sampleValues(a.Schema, a.Table, a.Column, n)
276+
if len(vals) > 0 {
277+
out[pname] = vals
278+
}
279+
}
280+
return out
281+
}
282+
283+
// yamlScalar renders a sample value as a YAML scalar. We keep it simple —
284+
// numbers and bools unquoted, strings double-quoted with escaping, nil → ~.
285+
func yamlScalar(v any) string {
286+
switch x := v.(type) {
287+
case nil:
288+
return "~"
289+
case bool:
290+
return strconv.FormatBool(x)
291+
case float64:
292+
if x == float64(int64(x)) {
293+
return strconv.FormatInt(int64(x), 10)
294+
}
295+
return strconv.FormatFloat(x, 'f', -1, 64)
296+
case int, int64:
297+
return fmt.Sprintf("%d", x)
298+
case string:
299+
esc := strings.ReplaceAll(x, `\`, `\\`)
300+
esc = strings.ReplaceAll(esc, `"`, `\"`)
301+
return `"` + esc + `"`
302+
default:
303+
b, _ := json.Marshal(x)
304+
return string(b)
305+
}
306+
}
307+
308+
func loadClustersDoc(path string) (*clustersDoc, error) {
309+
var r io.Reader = os.Stdin
310+
if path != "" {
311+
f, err := os.Open(path)
312+
if err != nil {
313+
return nil, err
314+
}
315+
defer f.Close()
316+
r = f
317+
}
318+
var doc clustersDoc
319+
if err := json.NewDecoder(r).Decode(&doc); err != nil {
320+
return nil, fmt.Errorf("decode clusters.json: %w", err)
321+
}
322+
return &doc, nil
323+
}

cmd/qshape/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ func Run() error {
1717
fingerprintCmd(),
1818
captureCmd(),
1919
attributeCmd(),
20+
regresqlStubCmd(),
2021
)
2122
return root.Execute()
2223
}

0 commit comments

Comments
 (0)