Skip to content

Commit f4f1bca

Browse files
committed
Fix UTF-8 handling
1 parent 2a831b4 commit f4f1bca

3 files changed

Lines changed: 519 additions & 12 deletions

File tree

ChangeLog.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,35 @@
11
## Release Notes
22

3+
### v2.0.1 (Pending)
4+
5+
* Security Hardening
6+
- Reject ill-formed UTF-8 in MQTT UTF-8 string fields per [MQTT-1.5.3-1].
7+
`MqttDecode_String` now validates each decoded string against RFC 3629
8+
and rejects encodings of surrogate code points (U+D800..U+DFFF) with
9+
`MQTT_CODE_ERROR_MALFORMED_DATA`. Receivers MUST close the network
10+
connection on malformed packets, which the broker's existing decode-
11+
error path enforces. The check covers ClientId, Will Topic, Topic Name,
12+
Topic Filter, Username, and v5 STRING/STRING_PAIR property values.
13+
14+
* API / Behavior Changes
15+
- `MqttDecode_String` may now return `MQTT_CODE_ERROR_MALFORMED_DATA`
16+
on ill-formed UTF-8. This applies to client builds as well as broker
17+
builds — [MQTT-1.5.3-1] is normative for both. wolfMQTT clients that
18+
previously accepted PUBLISH messages with non-UTF-8 topics from
19+
misbehaving brokers will now error on those messages. There is no
20+
opt-out: the spec is a MUST.
21+
- `MqttDecode_Publish` now propagates the underlying error from
22+
`MqttDecode_String` (e.g. `MALFORMED_DATA`) instead of always
23+
returning `MQTT_CODE_ERROR_OUT_OF_BUFFER` on topic decode failure.
24+
- `MqttDecode_Props` similarly now propagates the underlying error from
25+
`MqttDecode_String` for v5 STRING and STRING_PAIR property types
26+
(Reason String, Content Type, User Property, etc.) instead of masking
27+
it as `MQTT_CODE_ERROR_PROPERTY`.
28+
- The CONNECT Password decode no longer goes through `MqttDecode_String`
29+
because [MQTT-3.1.3.5] defines Password as Binary Data, not a UTF-8
30+
string. A binary password containing bytes that are not valid UTF-8
31+
(e.g., `0xC0`, `0xFF`) would otherwise be incorrectly rejected.
32+
333
### v2.0.0 (03/20/2026)
434
Release 2.0.0 has been developed according to wolfSSL's development and QA
535
process (see link below) and successfully passed the quality criteria.

src/mqtt_packet.c

Lines changed: 106 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,76 @@ int MqttEncode_Int(byte* buf, word32 len)
391391
return MQTT_DATA_INT_SIZE;
392392
}
393393

394-
/* Returns number of buffer bytes decoded */
395-
/* Returns pointer to string (which is not guaranteed to be null terminated) */
394+
/* [MQTT-1.5.3-1] Validate that the given byte sequence is well-formed UTF-8
395+
* per RFC 3629, including the surrogate-code-point ban (U+D800..U+DFFF MUST
396+
* NOT be encoded). Returns 1 if valid, 0 if malformed.
397+
*
398+
* RFC 3629 byte-pattern table:
399+
* 1-byte: 00..7F -> U+0000..U+007F
400+
* 2-byte: C2..DF 80..BF -> U+0080..U+07FF
401+
* 3-byte: E0 A0..BF 80..BF -> U+0800..U+0FFF
402+
* E1..EC 80..BF 80..BF -> U+1000..U+CFFF
403+
* ED 80..9F 80..BF -> U+D000..U+D7FF
404+
* EE..EF 80..BF 80..BF -> U+E000..U+FFFF
405+
* 4-byte: F0 90..BF 80..BF 80..BF -> U+10000..U+3FFFF
406+
* F1..F3 80..BF 80..BF 80..BF -> U+40000..U+FFFFF
407+
* F4 80..8F 80..BF 80..BF -> U+100000..U+10FFFF
408+
* Anything else (overlong, surrogate, > U+10FFFF, lone continuation,
409+
* truncated multi-byte) is malformed. */
410+
static int Utf8WellFormed(const byte* s, word16 len)
411+
{
412+
word16 i = 0;
413+
while (i < len) {
414+
byte b0 = s[i];
415+
byte b1, b2, b3;
416+
417+
if (b0 < 0x80) {
418+
i++;
419+
continue;
420+
}
421+
if (b0 < 0xC2 || b0 > 0xF4) {
422+
/* C0/C1 are overlong-only; F5..FF exceed U+10FFFF or are not
423+
* UTF-8 leading bytes. */
424+
return 0;
425+
}
426+
if (b0 < 0xE0) {
427+
/* 2-byte */
428+
if ((word32)i + 1 >= (word32)len) return 0;
429+
b1 = s[i+1];
430+
if ((b1 & 0xC0) != 0x80) return 0;
431+
i += 2;
432+
}
433+
else if (b0 < 0xF0) {
434+
/* 3-byte */
435+
if ((word32)i + 2 >= (word32)len) return 0;
436+
b1 = s[i+1];
437+
b2 = s[i+2];
438+
if ((b1 & 0xC0) != 0x80 || (b2 & 0xC0) != 0x80) return 0;
439+
if (b0 == 0xE0 && b1 < 0xA0) return 0; /* overlong */
440+
if (b0 == 0xED && b1 >= 0xA0) return 0; /* surrogate */
441+
i += 3;
442+
}
443+
else {
444+
/* 4-byte (b0 in F0..F4) */
445+
if ((word32)i + 3 >= (word32)len) return 0;
446+
b1 = s[i+1];
447+
b2 = s[i+2];
448+
b3 = s[i+3];
449+
if ((b1 & 0xC0) != 0x80 ||
450+
(b2 & 0xC0) != 0x80 ||
451+
(b3 & 0xC0) != 0x80) return 0;
452+
if (b0 == 0xF0 && b1 < 0x90) return 0; /* overlong */
453+
if (b0 == 0xF4 && b1 >= 0x90) return 0; /* > U+10FFFF */
454+
i += 4;
455+
}
456+
}
457+
return 1;
458+
}
459+
460+
/* Returns pointer to string (which is not guaranteed to be null terminated).
461+
* [MQTT-1.5.3-1] Rejects ill-formed UTF-8 with MQTT_CODE_ERROR_MALFORMED_DATA;
462+
* receivers MUST close the network connection on malformed packets, which the
463+
* existing decode-error propagation in the broker handles. */
396464
int MqttDecode_String(byte *buf, const char **pstr, word16 *pstr_len, word32 buf_len)
397465
{
398466
int len;
@@ -405,6 +473,9 @@ int MqttDecode_String(byte *buf, const char **pstr, word16 *pstr_len, word32 buf
405473
return MQTT_TRACE_ERROR(MQTT_CODE_ERROR_OUT_OF_BUFFER);
406474
}
407475
buf += len;
476+
if (str_len > 0 && !Utf8WellFormed(buf, str_len)) {
477+
return MQTT_TRACE_ERROR(MQTT_CODE_ERROR_MALFORMED_DATA);
478+
}
408479
if (pstr_len) {
409480
*pstr_len = str_len;
410481
}
@@ -698,7 +769,12 @@ int MqttDecode_Props(MqttPacketType packet, MqttProp** props, byte* pbuf,
698769
(const char**)&cur_prop->data_str.str,
699770
&cur_prop->data_str.len,
700771
(word32)(buf_len - (buf - pbuf)));
701-
if ((tmp >= 0) && ((word32)tmp <= (buf_len - (buf - pbuf)))) {
772+
if (tmp < 0) {
773+
/* Preserve specific error (e.g., MALFORMED_DATA from
774+
* UTF-8 check) instead of masking as PROPERTY. */
775+
rc = tmp;
776+
}
777+
else if ((word32)tmp <= (buf_len - (buf - pbuf))) {
702778
buf += tmp;
703779
total += tmp;
704780
prop_len -= (word32)tmp;
@@ -760,7 +836,10 @@ int MqttDecode_Props(MqttPacketType packet, MqttProp** props, byte* pbuf,
760836
(const char**)&cur_prop->data_str.str,
761837
&cur_prop->data_str.len,
762838
(word32)(buf_len - (buf - pbuf)));
763-
if ((tmp >= 0) && ((word32)tmp <= (buf_len - (buf - pbuf)))) {
839+
if (tmp < 0) {
840+
rc = tmp;
841+
}
842+
else if ((word32)tmp <= (buf_len - (buf - pbuf))) {
764843
buf += tmp;
765844
total += tmp;
766845
prop_len -= (word32)tmp;
@@ -769,8 +848,11 @@ int MqttDecode_Props(MqttPacketType packet, MqttProp** props, byte* pbuf,
769848
(const char**)&cur_prop->data_str2.str,
770849
&cur_prop->data_str2.len,
771850
(word32)(buf_len - (buf - pbuf)));
772-
if ((tmp >= 0) && ((word32)tmp <=
773-
(buf_len - (buf - pbuf)))) {
851+
if (tmp < 0) {
852+
rc = tmp;
853+
}
854+
else if ((word32)tmp <=
855+
(buf_len - (buf - pbuf))) {
774856
buf += tmp;
775857
total += tmp;
776858
prop_len -= (word32)tmp;
@@ -1208,15 +1290,25 @@ int MqttDecode_Connect(byte *rx_buf, int rx_buf_len, MqttConnect *mc_connect)
12081290
}
12091291

12101292
if (packet.flags & MQTT_CONNECT_FLAG_PASSWORD) {
1211-
tmp = MqttDecode_String(rx_payload, &mc_connect->password, NULL,
1293+
/* Password is binary data, not a UTF-8 string ([MQTT-3.1.3.5]). Decode
1294+
* the length prefix directly so MqttDecode_String's UTF-8 validation
1295+
* does not reject non-UTF-8 password bytes. */
1296+
word16 plen = 0;
1297+
tmp = MqttDecode_Num(rx_payload, &plen,
12121298
(word32)(rx_buf_len - (rx_payload - rx_buf)));
12131299
if (tmp < 0) {
12141300
return tmp;
12151301
}
1216-
if ((rx_payload - rx_buf) + tmp > header_len + remain_len) {
1302+
if ((word32)plen >
1303+
(word32)(rx_buf_len - (rx_payload - rx_buf) - tmp)) {
12171304
return MQTT_TRACE_ERROR(MQTT_CODE_ERROR_OUT_OF_BUFFER);
12181305
}
1219-
rx_payload += tmp;
1306+
if ((rx_payload - rx_buf) + tmp + (int)plen >
1307+
header_len + remain_len) {
1308+
return MQTT_TRACE_ERROR(MQTT_CODE_ERROR_OUT_OF_BUFFER);
1309+
}
1310+
mc_connect->password = (char*)(rx_payload + tmp);
1311+
rx_payload += tmp + plen;
12201312
}
12211313

12221314
(void)rx_payload;
@@ -1522,12 +1614,14 @@ int MqttDecode_Publish(byte *rx_buf, int rx_buf_len, MqttPublish *publish)
15221614
/* Decode variable header */
15231615
variable_len = MqttDecode_String(rx_payload, &publish->topic_name,
15241616
&publish->topic_name_len, (word32)(rx_buf_len - (rx_payload - rx_buf)));
1525-
if ((variable_len >= 0) && (variable_len + header_len <= rx_buf_len)) {
1526-
rx_payload += variable_len;
1617+
if (variable_len < 0) {
1618+
/* Preserve specific error (e.g., MALFORMED_DATA from UTF-8 check). */
1619+
return variable_len;
15271620
}
1528-
else {
1621+
if (variable_len + header_len > rx_buf_len) {
15291622
return MQTT_TRACE_ERROR(MQTT_CODE_ERROR_OUT_OF_BUFFER);
15301623
}
1624+
rx_payload += variable_len;
15311625

15321626
/* If QoS > 0 then get packet Id */
15331627
if (publish->qos > MQTT_QOS_0) {

0 commit comments

Comments
 (0)