Skip to content

Commit a3d0115

Browse files
ggilderclaudemeiji163
authored
Fix Warning 1300 for varbinary columns with bytes invalid as utf8mb4 (#1661)
* Fix Warning 1300 for varbinary columns with bytes invalid as utf8mb4 When gh-ost replays a binlog DML event, the go-mysql library returns varbinary column values as a Go `string` (not `[]byte`). In convertArg, the existing code only converted string → []byte when the column had a non-empty Charset (e.g. utf8mb4 for varchar). varbinary columns have no character set, so Charset is always "", and the string fell through unconverted. The Go MySQL driver sends `string` args as MYSQL_TYPE_VAR_STRING with the connection's utf8mb4 charset metadata attached, causing MySQL to validate the bytes. If a varbinary value (e.g. a binary UUID) contains byte sequences that are invalid utf8mb4, MySQL emits Warning 1300. With gh-ost's panic-on-warnings enabled, this aborts the migration. Fix: add an else-if branch that detects binary storage types by MySQLType (binary, varbinary, *blob) and returns []byte, so the driver sends MYSQL_TYPE_BLOB (binary data) with no charset validation. MySQLType is used rather than Charset == "" alone because test Column objects built via NewColumnList leave MySQLType unset, which would have changed the return type for all no-charset columns in existing tests. In production, inspect.go always populates MySQLType from information_schema.data_type. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Address PR review feedback on varbinary Warning 1300 fix - Use MySQLType "varbinary(16)" in tests instead of bare "varbinary", matching the real value produced by information_schema COLUMN_TYPE (which includes length). Guards against future refactors that might switch from substring matching to exact matching. - Correct test comment: MySQLType is populated from COLUMN_TYPE, not data_type. - Broaden types.go comment to say "the connection's charset/collation metadata (often utf8mb4)" since gh-ost's connection charset is configurable via --charset. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * appease linter --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: meiji163 <meiji163@github.com>
1 parent 0270a28 commit a3d0115

2 files changed

Lines changed: 48 additions & 0 deletions

File tree

go/sql/types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,13 @@ func (this *Column) convertArg(arg interface{}) interface{} {
7070
if arg2Bytes != nil {
7171
if this.Charset != "" && this.charsetConversion == nil {
7272
arg = arg2Bytes
73+
} else if this.Charset == "" && (strings.Contains(this.MySQLType, "binary") || strings.HasSuffix(this.MySQLType, "blob")) {
74+
// varbinary/binary/blob column: no charset means binary storage. Return []byte so
75+
// the MySQL driver sends MYSQL_TYPE_BLOB (binary) rather than MYSQL_TYPE_VAR_STRING
76+
// (text with the connection's charset/collation metadata, often utf8mb4), which would
77+
// cause MySQL to validate the bytes and emit Warning 1300 for byte sequences that are
78+
// invalid in that charset.
79+
arg = arg2Bytes
7380
} else {
7481
if encoding, ok := charsetEncodingMap[this.Charset]; ok {
7582
decodedBytes, _ := encoding.NewDecoder().Bytes(arg2Bytes)

go/sql/types_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,47 @@ func TestConvertArgBinaryColumnPadding(t *testing.T) {
9595
require.Equal(t, []byte{0x00, 0x00}, resultBytes[18:])
9696
}
9797

98+
func TestConvertArgVarbinaryStringWithInvalidUTF8Bytes(t *testing.T) {
99+
// go-mysql returns varbinary binlog row values as Go `string` (not `[]uint8`).
100+
// When convertArg receives a string for a column with no Charset (varbinary),
101+
// it must return []byte — not the original string. The Go MySQL driver sends
102+
// string args as MYSQL_TYPE_VAR_STRING with utf8mb4 charset metadata, which
103+
// causes MySQL to validate the bytes and emit Warning 1300 for invalid sequences.
104+
// gh-ost's panic-on-warnings then turns that warning into a fatal migration error.
105+
// See: uuid varbinary(16) rows whose binary UUID bytes happen to be invalid utf8mb4.
106+
rawBytes := []byte{0x91, 0xC3, 0xCD, 0x00, 0x01, 0x02}
107+
108+
col := Column{
109+
Name: "uuid",
110+
Charset: "", // varbinary has no character set
111+
MySQLType: "varbinary(16)", // set by inspect.go from information_schema COLUMN_TYPE
112+
}
113+
114+
result := col.convertArg(string(rawBytes))
115+
116+
require.IsType(t, []byte{}, result,
117+
"varbinary value from binlog (Go string) must be returned as []byte, not string, "+
118+
"to prevent MySQL driver from sending it with the connection's charset metadata")
119+
require.Equal(t, rawBytes, result.([]byte))
120+
}
121+
122+
func TestConvertArgVarbinaryBytesWithInvalidUTF8Bytes(t *testing.T) {
123+
// When go-mysql returns varbinary values as []uint8 (rather than string),
124+
// convertArg should also return []byte consistently.
125+
rawBytes := []uint8{0x91, 0xC3, 0xCD, 0x00, 0x01, 0x02}
126+
127+
col := Column{
128+
Name: "uuid",
129+
Charset: "",
130+
MySQLType: "varbinary(16)", // set by inspect.go from information_schema COLUMN_TYPE
131+
}
132+
133+
result := col.convertArg(rawBytes)
134+
135+
require.IsType(t, []byte{}, result)
136+
require.Equal(t, rawBytes, result.([]byte))
137+
}
138+
98139
func TestConvertArgBinaryColumnNoPaddingWhenFull(t *testing.T) {
99140
// When binary value is already at full length, no padding should occur
100141
fullValue := []uint8{

0 commit comments

Comments
 (0)