@@ -391,8 +391,76 @@ int MqttEncode_Int(byte* buf, word32 len)
391391 return MQTT_DATA_INT_SIZE ;
392392}
393393
394- /* Returns number of buffer bytes decoded */
395- /* Returns pointer to string (which is not guaranteed to be null terminated) */
394+ /* [MQTT-1.5.3-1] Validate that the given byte sequence is well-formed UTF-8
395+ * per RFC 3629, including the surrogate-code-point ban (U+D800..U+DFFF MUST
396+ * NOT be encoded). Returns 1 if valid, 0 if malformed.
397+ *
398+ * RFC 3629 byte-pattern table:
399+ * 1-byte: 00..7F -> U+0000..U+007F
400+ * 2-byte: C2..DF 80..BF -> U+0080..U+07FF
401+ * 3-byte: E0 A0..BF 80..BF -> U+0800..U+0FFF
402+ * E1..EC 80..BF 80..BF -> U+1000..U+CFFF
403+ * ED 80..9F 80..BF -> U+D000..U+D7FF
404+ * EE..EF 80..BF 80..BF -> U+E000..U+FFFF
405+ * 4-byte: F0 90..BF 80..BF 80..BF -> U+10000..U+3FFFF
406+ * F1..F3 80..BF 80..BF 80..BF -> U+40000..U+FFFFF
407+ * F4 80..8F 80..BF 80..BF -> U+100000..U+10FFFF
408+ * Anything else (overlong, surrogate, > U+10FFFF, lone continuation,
409+ * truncated multi-byte) is malformed. */
410+ static int Utf8WellFormed (const byte * s , word16 len )
411+ {
412+ word16 i = 0 ;
413+ while (i < len ) {
414+ byte b0 = s [i ];
415+ byte b1 , b2 , b3 ;
416+
417+ if (b0 < 0x80 ) {
418+ i ++ ;
419+ continue ;
420+ }
421+ if (b0 < 0xC2 || b0 > 0xF4 ) {
422+ /* C0/C1 are overlong-only; F5..FF exceed U+10FFFF or are not
423+ * UTF-8 leading bytes. */
424+ return 0 ;
425+ }
426+ if (b0 < 0xE0 ) {
427+ /* 2-byte */
428+ if ((word32 )i + 1 >= (word32 )len ) return 0 ;
429+ b1 = s [i + 1 ];
430+ if ((b1 & 0xC0 ) != 0x80 ) return 0 ;
431+ i += 2 ;
432+ }
433+ else if (b0 < 0xF0 ) {
434+ /* 3-byte */
435+ if ((word32 )i + 2 >= (word32 )len ) return 0 ;
436+ b1 = s [i + 1 ];
437+ b2 = s [i + 2 ];
438+ if ((b1 & 0xC0 ) != 0x80 || (b2 & 0xC0 ) != 0x80 ) return 0 ;
439+ if (b0 == 0xE0 && b1 < 0xA0 ) return 0 ; /* overlong */
440+ if (b0 == 0xED && b1 >= 0xA0 ) return 0 ; /* surrogate */
441+ i += 3 ;
442+ }
443+ else {
444+ /* 4-byte (b0 in F0..F4) */
445+ if ((word32 )i + 3 >= (word32 )len ) return 0 ;
446+ b1 = s [i + 1 ];
447+ b2 = s [i + 2 ];
448+ b3 = s [i + 3 ];
449+ if ((b1 & 0xC0 ) != 0x80 ||
450+ (b2 & 0xC0 ) != 0x80 ||
451+ (b3 & 0xC0 ) != 0x80 ) return 0 ;
452+ if (b0 == 0xF0 && b1 < 0x90 ) return 0 ; /* overlong */
453+ if (b0 == 0xF4 && b1 >= 0x90 ) return 0 ; /* > U+10FFFF */
454+ i += 4 ;
455+ }
456+ }
457+ return 1 ;
458+ }
459+
460+ /* Returns pointer to string (which is not guaranteed to be null terminated).
461+ * [MQTT-1.5.3-1] Rejects ill-formed UTF-8 with MQTT_CODE_ERROR_MALFORMED_DATA;
462+ * receivers MUST close the network connection on malformed packets, which the
463+ * existing decode-error propagation in the broker handles. */
396464int MqttDecode_String (byte * buf , const char * * pstr , word16 * pstr_len , word32 buf_len )
397465{
398466 int len ;
@@ -405,6 +473,9 @@ int MqttDecode_String(byte *buf, const char **pstr, word16 *pstr_len, word32 buf
405473 return MQTT_TRACE_ERROR (MQTT_CODE_ERROR_OUT_OF_BUFFER );
406474 }
407475 buf += len ;
476+ if (str_len > 0 && !Utf8WellFormed (buf , str_len )) {
477+ return MQTT_TRACE_ERROR (MQTT_CODE_ERROR_MALFORMED_DATA );
478+ }
408479 if (pstr_len ) {
409480 * pstr_len = str_len ;
410481 }
@@ -698,7 +769,12 @@ int MqttDecode_Props(MqttPacketType packet, MqttProp** props, byte* pbuf,
698769 (const char * * )& cur_prop -> data_str .str ,
699770 & cur_prop -> data_str .len ,
700771 (word32 )(buf_len - (buf - pbuf )));
701- if ((tmp >= 0 ) && ((word32 )tmp <= (buf_len - (buf - pbuf )))) {
772+ if (tmp < 0 ) {
773+ /* Preserve specific error (e.g., MALFORMED_DATA from
774+ * UTF-8 check) instead of masking as PROPERTY. */
775+ rc = tmp ;
776+ }
777+ else if ((word32 )tmp <= (buf_len - (buf - pbuf ))) {
702778 buf += tmp ;
703779 total += tmp ;
704780 prop_len -= (word32 )tmp ;
@@ -760,7 +836,10 @@ int MqttDecode_Props(MqttPacketType packet, MqttProp** props, byte* pbuf,
760836 (const char * * )& cur_prop -> data_str .str ,
761837 & cur_prop -> data_str .len ,
762838 (word32 )(buf_len - (buf - pbuf )));
763- if ((tmp >= 0 ) && ((word32 )tmp <= (buf_len - (buf - pbuf )))) {
839+ if (tmp < 0 ) {
840+ rc = tmp ;
841+ }
842+ else if ((word32 )tmp <= (buf_len - (buf - pbuf ))) {
764843 buf += tmp ;
765844 total += tmp ;
766845 prop_len -= (word32 )tmp ;
@@ -769,8 +848,11 @@ int MqttDecode_Props(MqttPacketType packet, MqttProp** props, byte* pbuf,
769848 (const char * * )& cur_prop -> data_str2 .str ,
770849 & cur_prop -> data_str2 .len ,
771850 (word32 )(buf_len - (buf - pbuf )));
772- if ((tmp >= 0 ) && ((word32 )tmp <=
773- (buf_len - (buf - pbuf )))) {
851+ if (tmp < 0 ) {
852+ rc = tmp ;
853+ }
854+ else if ((word32 )tmp <=
855+ (buf_len - (buf - pbuf ))) {
774856 buf += tmp ;
775857 total += tmp ;
776858 prop_len -= (word32 )tmp ;
@@ -1208,15 +1290,25 @@ int MqttDecode_Connect(byte *rx_buf, int rx_buf_len, MqttConnect *mc_connect)
12081290 }
12091291
12101292 if (packet .flags & MQTT_CONNECT_FLAG_PASSWORD ) {
1211- tmp = MqttDecode_String (rx_payload , & mc_connect -> password , NULL ,
1293+ /* Password is binary data, not a UTF-8 string ([MQTT-3.1.3.5]). Decode
1294+ * the length prefix directly so MqttDecode_String's UTF-8 validation
1295+ * does not reject non-UTF-8 password bytes. */
1296+ word16 plen = 0 ;
1297+ tmp = MqttDecode_Num (rx_payload , & plen ,
12121298 (word32 )(rx_buf_len - (rx_payload - rx_buf )));
12131299 if (tmp < 0 ) {
12141300 return tmp ;
12151301 }
1216- if ((rx_payload - rx_buf ) + tmp > header_len + remain_len ) {
1302+ if ((word32 )plen >
1303+ (word32 )(rx_buf_len - (rx_payload - rx_buf ) - tmp )) {
12171304 return MQTT_TRACE_ERROR (MQTT_CODE_ERROR_OUT_OF_BUFFER );
12181305 }
1219- rx_payload += tmp ;
1306+ if ((rx_payload - rx_buf ) + tmp + (int )plen >
1307+ header_len + remain_len ) {
1308+ return MQTT_TRACE_ERROR (MQTT_CODE_ERROR_OUT_OF_BUFFER );
1309+ }
1310+ mc_connect -> password = (char * )(rx_payload + tmp );
1311+ rx_payload += tmp + plen ;
12201312 }
12211313
12221314 (void )rx_payload ;
@@ -1522,12 +1614,14 @@ int MqttDecode_Publish(byte *rx_buf, int rx_buf_len, MqttPublish *publish)
15221614 /* Decode variable header */
15231615 variable_len = MqttDecode_String (rx_payload , & publish -> topic_name ,
15241616 & publish -> topic_name_len , (word32 )(rx_buf_len - (rx_payload - rx_buf )));
1525- if ((variable_len >= 0 ) && (variable_len + header_len <= rx_buf_len )) {
1526- rx_payload += variable_len ;
1617+ if (variable_len < 0 ) {
1618+ /* Preserve specific error (e.g., MALFORMED_DATA from UTF-8 check). */
1619+ return variable_len ;
15271620 }
1528- else {
1621+ if ( variable_len + header_len > rx_buf_len ) {
15291622 return MQTT_TRACE_ERROR (MQTT_CODE_ERROR_OUT_OF_BUFFER );
15301623 }
1624+ rx_payload += variable_len ;
15311625
15321626 /* If QoS > 0 then get packet Id */
15331627 if (publish -> qos > MQTT_QOS_0 ) {
0 commit comments