Merge branch 'PHP-7.4' into PHP-8.0

* PHP-7.4:
  Fix #70962: XML_OPTION_SKIP_WHITE strips embedded whitespace
This commit is contained in:
Christoph M. Becker 2021-09-16 12:05:38 +02:00
commit f55d78e817
No known key found for this signature in database
GPG key ID: D66C9593118BCCB6
3 changed files with 103 additions and 57 deletions

4
NEWS
View file

@ -17,6 +17,10 @@ PHP NEWS
- PCRE:
. Fixed bug #81424 (PCRE2 10.35 JIT performance regression). (cmb)
- XML:
. Fixed bug #70962 (XML_OPTION_SKIP_WHITE strips embedded whitespace).
(Aliaksandr Bystry, cmb)
23 Sep 2021, PHP 8.0.11
- Core:

View file

@ -0,0 +1,37 @@
--TEST--
Bug #70962 (XML_OPTION_SKIP_WHITE strips embedded whitespace)
--SKIPIF--
<?php
if (!extension_loaded('xml')) die('skip xml extension not available');
?>
--FILE--
<?php
function parseAndOutput($xml)
{
$parser = xml_parser_create();
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
xml_parse_into_struct($parser, $xml, $values);
return $values;
}
$xml = "<a><b>&lt;d&gt;\n &lt;e&gt;</b><![CDATA[ ]]><c>\n \t</c></a>";
$parsed = parseAndOutput($xml);
// Check embedded whitespace is not getting skipped.
echo $parsed[1]['value'] . "\n";
// Check XML_OPTION_SKIP_WHITE ignores values of tags containing whitespace characters only.
var_dump(isset($parsed[2]['value']));
// Check XML_OPTION_SKIP_WHITE ignores empty <![CDATA[ ]]> values.
var_dump(count($parsed));
?>
--EXPECT--
<d>
<e>
bool(false)
int(4)

View file

@ -781,72 +781,77 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
zend_string *decoded_value;
decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
switch (ZSTR_VAL(decoded_value)[i]) {
case ' ':
case '\t':
case '\n':
continue;
default:
doprint = 1;
if (parser->skipwhite) {
for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
switch (ZSTR_VAL(decoded_value)[i]) {
case ' ':
case '\t':
case '\n':
continue;
default:
doprint = 1;
break;
}
if (doprint) {
break;
}
if (doprint) {
break;
}
}
}
if (doprint || (! parser->skipwhite)) {
if (parser->lastwasopen) {
zval *myval;
/* check if the current tag already has a value - if yes append to that! */
if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
zend_string_release_ex(decoded_value, 0);
} else {
add_assoc_str(parser->ctag, "value", decoded_value);
}
if (parser->lastwasopen) {
zval *myval;
/* check if the current tag already has a value - if yes append to that! */
if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
zend_string_release_ex(decoded_value, 0);
} else {
zval tag;
zval *curtag, *mytype, *myval;
ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
zend_string_release_ex(decoded_value, 0);
return;
}
}
}
break;
} ZEND_HASH_FOREACH_END();
if (parser->level <= XML_MAXLEVEL && parser->level > 0) {
array_init(&tag);
_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
add_assoc_str(&tag, "value", decoded_value);
add_assoc_string(&tag, "type", "cdata");
add_assoc_long(&tag, "level", parser->level);
zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
} else if (parser->level == (XML_MAXLEVEL + 1)) {
php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
if (doprint || (! parser->skipwhite)) {
add_assoc_str(parser->ctag, "value", decoded_value);
} else {
zend_string_release_ex(decoded_value, 0);
}
}
} else {
zend_string_release_ex(decoded_value, 0);
zval tag;
zval *curtag, *mytype, *myval;
ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
zend_string_release_ex(decoded_value, 0);
return;
}
}
}
break;
} ZEND_HASH_FOREACH_END();
if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
array_init(&tag);
_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
add_assoc_str(&tag, "value", decoded_value);
add_assoc_string(&tag, "type", "cdata");
add_assoc_long(&tag, "level", parser->level);
zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
} else if (parser->level == (XML_MAXLEVEL + 1)) {
php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
} else {
zend_string_release_ex(decoded_value, 0);
}
}
}
}