mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Simplify decoding filter for UTF-8
When decoding a 3-byte UTF-8 code unit, redundant checks for overlong code unit and for illegal codepoints from U+D800-DFFF were included. Both of these conditions are caught by the line which reads: if ((c2 & 0xC0) != 0x80 || (c == 0xF0 && c2 < 0x90) || (c == 0xF4 && c2 >= 0x90)) { As such, there is no reason to check for the same error conditions again. Likewise, when decoding a 4-byte UTF-8 code unit, there was a redundant check for overlong code unit. That was already caught by the line which reads: if ((c2 & 0xC0) != 0x80 || (c == 0xF0 && c2 < 0x90) || (c == 0xF4 && c2 >= 0x90)) {
This commit is contained in:
parent
50e32015ae
commit
0109aa62ec
1 changed files with 5 additions and 6 deletions
|
@ -249,11 +249,9 @@ static size_t mb_utf8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf
|
||||||
p--;
|
p--;
|
||||||
} else {
|
} else {
|
||||||
uint32_t decoded = ((c & 0xF) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
|
uint32_t decoded = ((c & 0xF) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
|
||||||
if (decoded < 0x800 || (decoded >= 0xD800 && decoded <= 0xDFFF)) {
|
ZEND_ASSERT(decoded >= 0x800); /* Not an overlong code unit */
|
||||||
*out++ = MBFL_BAD_INPUT;
|
ZEND_ASSERT(decoded < 0xD800 || decoded > 0xDFFF); /* U+D800-DFFF are reserved, illegal code points */
|
||||||
} else {
|
*out++ = decoded;
|
||||||
*out++ = decoded;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
*out++ = MBFL_BAD_INPUT;
|
*out++ = MBFL_BAD_INPUT;
|
||||||
|
@ -283,7 +281,8 @@ static size_t mb_utf8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf
|
||||||
p--;
|
p--;
|
||||||
} else {
|
} else {
|
||||||
uint32_t decoded = ((c & 0x7) << 18) | ((c2 & 0x3F) << 12) | ((c3 & 0x3F) << 6) | (c4 & 0x3F);
|
uint32_t decoded = ((c & 0x7) << 18) | ((c2 & 0x3F) << 12) | ((c3 & 0x3F) << 6) | (c4 & 0x3F);
|
||||||
*out++ = (decoded < 0x10000) ? MBFL_BAD_INPUT : decoded;
|
ZEND_ASSERT(decoded >= 0x10000); /* Not an overlong code unit */
|
||||||
|
*out++ = decoded;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
*out++ = MBFL_BAD_INPUT;
|
*out++ = MBFL_BAD_INPUT;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue