Skip to content

Commit 60644f8

Browse files
jackcclaude
andcommitted
Fix SQL sanitizer bugs with dollar-quoted strings and placeholder overflow
The simple-protocol SQL sanitizer treated $N found inside PostgreSQL dollar-quoted strings ($$...$$ and $tag$...$tag$) as placeholders and substituted them into what PostgreSQL considers literal text. A crafted arg value could then close the dollar-quote from inside and run arbitrary SQL (see the new proof-of-concept test in query_test.go). The lexer now recognizes dollar-quoted strings using PostgreSQL's tag grammar (from scan.l) and preserves their contents verbatim. Placeholder numbers are also clamped at MaxInt32 rather than silently overflowing. Previously "$92233720368547758070" wrapped to -10; if the wrap had landed on a valid positive index it would have aliased a different argument. Adds unit tests, fuzz seeds, and an integration PoC. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a5680bc commit 60644f8

4 files changed

Lines changed: 185 additions & 8 deletions

File tree

internal/sanitize/sanitize.go

Lines changed: 89 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"encoding/hex"
66
"fmt"
7+
"math"
78
"slices"
89
"strconv"
910
"strings"
@@ -202,12 +203,13 @@ func QuoteBytes(dst, buf []byte) []byte {
202203
}
203204

204205
type sqlLexer struct {
205-
src string
206-
start int
207-
pos int
208-
nested int // multiline comment nesting level.
209-
stateFn stateFn
210-
parts []Part
206+
src string
207+
start int
208+
pos int
209+
nested int // multiline comment nesting level.
210+
dollarTag string // active tag while inside a dollar-quoted string (may be empty for $$).
211+
stateFn stateFn
212+
parts []Part
211213
}
212214

213215
type stateFn func(*sqlLexer) stateFn
@@ -237,6 +239,15 @@ func rawState(l *sqlLexer) stateFn {
237239
l.start = l.pos
238240
return placeholderState
239241
}
242+
// PostgreSQL dollar-quoted string: $[tag]$...$[tag]$. The $ was
243+
// just consumed; try to match the rest of the opening tag.
244+
// Without this, placeholders embedded inside dollar-quoted
245+
// literals would be incorrectly substituted.
246+
if tagLen, ok := scanDollarQuoteTag(l.src[l.pos:]); ok {
247+
l.dollarTag = l.src[l.pos : l.pos+tagLen]
248+
l.pos += tagLen + 1 // advance past tag and closing '$'
249+
return dollarQuoteState
250+
}
240251
case '-':
241252
nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:])
242253
if nextRune == '-' {
@@ -319,8 +330,16 @@ func placeholderState(l *sqlLexer) stateFn {
319330
l.pos += width
320331

321332
if '0' <= r && r <= '9' {
322-
num *= 10
323-
num += int(r - '0')
333+
// Clamp rather than silently wrap on pathological input like
334+
// "$92233720368547758070" which would otherwise overflow int and
335+
// could land on a valid args index. Any value above MaxInt32 far
336+
// exceeds any plausible args length, so Sanitize will correctly
337+
// return "insufficient arguments".
338+
if num > (math.MaxInt32-9)/10 {
339+
num = math.MaxInt32
340+
} else {
341+
num = num*10 + int(r-'0')
342+
}
324343
} else {
325344
l.parts = append(l.parts, num)
326345
l.pos -= width
@@ -330,6 +349,68 @@ func placeholderState(l *sqlLexer) stateFn {
330349
}
331350
}
332351

352+
// dollarQuoteState consumes the body of a PostgreSQL dollar-quoted string
353+
// ($[tag]$...$[tag]$). The opening tag (including its terminating '$') has
354+
// already been consumed.
355+
func dollarQuoteState(l *sqlLexer) stateFn {
356+
closer := "$" + l.dollarTag + "$"
357+
idx := strings.Index(l.src[l.pos:], closer)
358+
if idx < 0 {
359+
// Unterminated — mirror the behavior of other quoted-string states by
360+
// consuming the remaining input into the current part and stopping.
361+
if len(l.src)-l.start > 0 {
362+
l.parts = append(l.parts, l.src[l.start:])
363+
l.start = len(l.src)
364+
}
365+
l.pos = len(l.src)
366+
return nil
367+
}
368+
l.pos += idx + len(closer)
369+
l.dollarTag = ""
370+
return rawState
371+
}
372+
373+
// scanDollarQuoteTag checks whether src begins with an optional dollar-quoted
374+
// string tag followed by a closing '$'. src must point just past the opening
375+
// '$'. Returns the byte length of the tag (zero for an anonymous $$) and
376+
// whether a valid tag was found.
377+
//
378+
// Tag grammar matches the PostgreSQL lexer (scan.l):
379+
//
380+
// dolq_start: [A-Za-z_\x80-\xff]
381+
// dolq_cont: [A-Za-z0-9_\x80-\xff]
382+
func scanDollarQuoteTag(src string) (int, bool) {
383+
first := true
384+
for i := 0; i < len(src); {
385+
r, w := utf8.DecodeRuneInString(src[i:])
386+
if r == '$' {
387+
return i, true
388+
}
389+
if !isDollarTagRune(r, first) {
390+
return 0, false
391+
}
392+
first = false
393+
i += w
394+
}
395+
return 0, false
396+
}
397+
398+
func isDollarTagRune(r rune, first bool) bool {
399+
switch {
400+
case r == '_':
401+
return true
402+
case 'a' <= r && r <= 'z':
403+
return true
404+
case 'A' <= r && r <= 'Z':
405+
return true
406+
case !first && '0' <= r && r <= '9':
407+
return true
408+
case r >= 0x80 && r != utf8.RuneError:
409+
return true
410+
}
411+
return false
412+
}
413+
333414
func escapeStringState(l *sqlLexer) stateFn {
334415
for {
335416
r, width := utf8.DecodeRuneInString(l.src[l.pos:])

internal/sanitize/sanitize_fuzz_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,23 @@ func FuzzNewQuery(f *testing.F) {
7373
f.Add("E'esc' 'norm' \"ident\" $1 -- comment\n$2 /* block */ $3")
7474
f.Add("$1'$2'$3\"$4\"$5--$6\n$7/*$8*/$9")
7575

76+
// Dollar-quoted strings (PostgreSQL $[tag]$...$[tag]$). Placeholders
77+
// inside must NOT be substituted.
78+
f.Add("select $$hello $1 world$$, $1")
79+
f.Add("select $tag$body $1 body$tag$, $2")
80+
f.Add("select $a$ $b$ $a$") // mismatched inner tag
81+
f.Add("select $outer$ $$ $1 $$ $outer$") // nested inner anonymous quote
82+
f.Add("$$unterminated $1") // unterminated $$ run
83+
f.Add("$t$unterminated") // unterminated tagged run
84+
f.Add("$ $1") // $ alone is not a dollar-quote open
85+
f.Add("$abc no closing quote $1")
86+
f.Add("$1abc$") // $1 is placeholder, abc$ is raw
87+
f.Add("$日本$body$日本$") // non-ASCII tag
88+
89+
// Large placeholder numbers that would overflow naive int accumulation.
90+
f.Add("$92233720368547758070")
91+
f.Add("$999999999999999999999999999999999999999")
92+
7693
f.Fuzz(func(t *testing.T, input string) {
7794
query, err := sanitize.NewQuery(input)
7895
if err != nil {

internal/sanitize/sanitize_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package sanitize_test
22

33
import (
44
"encoding/hex"
5+
"math"
56
"strings"
67
"testing"
78
"time"
@@ -100,6 +101,54 @@ func TestNewQuery(t *testing.T) {
100101
sql: "select 'hello world",
101102
expected: sanitize.Query{Parts: []sanitize.Part{"select 'hello world"}},
102103
},
104+
{
105+
// Dollar-quoted string (anonymous) must not be treated as placeholders.
106+
sql: "select $$hello $1 world$$, $1",
107+
expected: sanitize.Query{Parts: []sanitize.Part{"select $$hello $1 world$$, ", 1}},
108+
},
109+
{
110+
// Dollar-quoted string with tag.
111+
sql: "select $tag$hello $1 world$tag$, $2",
112+
expected: sanitize.Query{Parts: []sanitize.Part{"select $tag$hello $1 world$tag$, ", 2}},
113+
},
114+
{
115+
// Dollar-quoted string with tag containing digits.
116+
sql: "select $t1$body$2$t1$, $3",
117+
expected: sanitize.Query{Parts: []sanitize.Part{"select $t1$body$2$t1$, ", 3}},
118+
},
119+
{
120+
// Dollar-quoted string may contain nested $$ sequences that don't match the outer tag.
121+
sql: "select $outer$ $$ still inside $1 $$ $outer$, $1",
122+
expected: sanitize.Query{Parts: []sanitize.Part{"select $outer$ $$ still inside $1 $$ $outer$, ", 1}},
123+
},
124+
{
125+
// Unterminated dollar-quoted string: consume the rest of input.
126+
sql: "select $$hello $1 world",
127+
expected: sanitize.Query{Parts: []sanitize.Part{"select $$hello $1 world"}},
128+
},
129+
{
130+
// $digit is still a placeholder, not a dollar-quote open.
131+
sql: "select $1 $2",
132+
expected: sanitize.Query{Parts: []sanitize.Part{"select ", 1, " ", 2}},
133+
},
134+
{
135+
// Dollar sign not followed by identifier/$/digit is literal.
136+
sql: "select $ + $1",
137+
expected: sanitize.Query{Parts: []sanitize.Part{"select $ + ", 1}},
138+
},
139+
{
140+
// Dollar followed by a non-tag identifier that never meets a closing $ is not a dollar-quoted string.
141+
sql: "select $abc + $1",
142+
expected: sanitize.Query{Parts: []sanitize.Part{"select $abc + ", 1}},
143+
},
144+
{
145+
// Overflow-sized placeholder number must not wrap: it should be
146+
// preserved as some value that Sanitize will reject with
147+
// "insufficient arguments" rather than silently wrapping to a
148+
// small/negative index that aliases a real argument.
149+
sql: "select $92233720368547758070",
150+
expected: sanitize.Query{Parts: []sanitize.Part{"select ", math.MaxInt32}},
151+
},
103152
}
104153

105154
for i, tt := range successTests {
@@ -220,6 +269,13 @@ func TestQuerySanitize(t *testing.T) {
220269
args: []any{42},
221270
expected: `invalid arg type: int`,
222271
},
272+
{
273+
// An overflow-clamped placeholder must not silently map onto a
274+
// real argument; it must produce an error.
275+
query: sanitize.Query{Parts: []sanitize.Part{"select ", math.MaxInt32}},
276+
args: []any{int64(42)},
277+
expected: `insufficient arguments`,
278+
},
223279
}
224280

225281
for i, tt := range errorTests {

query_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2253,6 +2253,29 @@ func TestQueryWithProcedureParametersInAndOut(t *testing.T) {
22532253
})
22542254
}
22552255

2256+
func TestConnQuerySanitizeSQLWithDollarQuotesStrings(t *testing.T) {
2257+
t.Parallel()
2258+
2259+
conn := mustConnectString(t, os.Getenv("PGX_TEST_DATABASE"))
2260+
defer closeConn(t, conn)
2261+
2262+
ctx := t.Context()
2263+
2264+
tx, err := conn.Begin(ctx)
2265+
require.NoError(t, err)
2266+
defer tx.Rollback(ctx)
2267+
2268+
_, err = tx.Exec(ctx, `create table canary(id text primary key)`)
2269+
require.NoError(t, err)
2270+
2271+
attackValue := `$tag$; drop table canary; --`
2272+
_, err = tx.Exec(ctx, `select $tag$ $1 $tag$, $1`, pgx.QueryExecModeSimpleProtocol, attackValue)
2273+
require.NoError(t, err)
2274+
2275+
_, err = tx.Exec(ctx, `select * from canary`)
2276+
require.NoError(t, err)
2277+
}
2278+
22562279
type byteCounterConn struct {
22572280
conn net.Conn
22582281
bytesRead int

0 commit comments

Comments
 (0)