From a9661a5293e98e8c7255663c987297c14a6285ec Mon Sep 17 00:00:00 2001 From: Aliaksandr Bystry Date: Wed, 15 Sep 2021 19:50:16 +0200 Subject: [PATCH] Fix #70962: XML_OPTION_SKIP_WHITE strips embedded whitespace We must never strip embedded whitespace; we only need to skip values when that option is set, and make sure that we keep BC regarding the different behavior for "cdata" and "complete" elements (for the former, the whole element is skipped; for the latter only the "value" key). We also fix erroneous `int` types which should actually be `size_t`. Co-authored-by: Christoph M. Becker Closes GH-7493. --- NEWS | 4 ++ ext/xml/tests/bug70962.phpt | 37 +++++++++++ ext/xml/xml.c | 119 +++++++++++++++++++----------------- 3 files changed, 103 insertions(+), 57 deletions(-) create mode 100644 ext/xml/tests/bug70962.phpt diff --git a/NEWS b/NEWS index e354dde6bb5..204fa178b13 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,10 @@ PHP NEWS - PCRE: . Fixed bug #81424 (PCRE2 10.35 JIT performance regression). (cmb) +- XML: + . Fixed bug #70962 (XML_OPTION_SKIP_WHITE strips embedded whitespace). + (Aliaksandr Bystry, cmb) + 23 Dep 2021, PHP 7.4.24 - Core: diff --git a/ext/xml/tests/bug70962.phpt b/ext/xml/tests/bug70962.phpt new file mode 100644 index 00000000000..e73de391058 --- /dev/null +++ b/ext/xml/tests/bug70962.phpt @@ -0,0 +1,37 @@ +--TEST-- +Bug #70962 (XML_OPTION_SKIP_WHITE strips embedded whitespace) +--SKIPIF-- + +--FILE-- +<d>\n <e>\n \t"; + +$parsed = parseAndOutput($xml); + +// Check embedded whitespace is not getting skipped. +echo $parsed[1]['value'] . "\n"; + +// Check XML_OPTION_SKIP_WHITE ignores values of tags containing whitespace characters only. +var_dump(isset($parsed[2]['value'])); + +// Check XML_OPTION_SKIP_WHITE ignores empty values. +var_dump(count($parsed)); + +?> +--EXPECT-- + + +bool(false) +int(4) diff --git a/ext/xml/xml.c b/ext/xml/xml.c index fd8aebe03a5..6fe6151c7a1 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -886,72 +886,77 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) zend_string *decoded_value; decoded_value = xml_utf8_decode(s, len, parser->target_encoding); - for (i = 0; i < ZSTR_LEN(decoded_value); i++) { - switch (ZSTR_VAL(decoded_value)[i]) { - case ' ': - case '\t': - case '\n': - continue; - default: - doprint = 1; + if (parser->skipwhite) { + for (i = 0; i < ZSTR_LEN(decoded_value); i++) { + switch (ZSTR_VAL(decoded_value)[i]) { + case ' ': + case '\t': + case '\n': + continue; + default: + doprint = 1; + break; + } + if (doprint) { break; - } - if (doprint) { - break; + } } } - if (doprint || (! parser->skipwhite)) { - if (parser->lastwasopen) { - zval *myval; - /* check if the current tag already has a value - if yes append to that! */ - if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { - int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); - Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); - strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), - ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); - zend_string_release_ex(decoded_value, 0); - } else { - add_assoc_str(parser->ctag, "value", decoded_value); - } + if (parser->lastwasopen) { + zval *myval; + /* check if the current tag already has a value - if yes append to that! */ + if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { + size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); + Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); + strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), + ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); + zend_string_release_ex(decoded_value, 0); } else { - zval tag; - zval *curtag, *mytype, *myval; - - ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { - if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { - if (!strcmp(Z_STRVAL_P(mytype), "cdata")) { - if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { - int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); - Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); - strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), - ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); - zend_string_release_ex(decoded_value, 0); - return; - } - } - } - break; - } ZEND_HASH_FOREACH_END(); - - if (parser->level <= XML_MAXLEVEL && parser->level > 0) { - array_init(&tag); - - _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); - - add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1])); - add_assoc_str(&tag, "value", decoded_value); - add_assoc_string(&tag, "type", "cdata"); - add_assoc_long(&tag, "level", parser->level); - - zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); - } else if (parser->level == (XML_MAXLEVEL + 1)) { - php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); + if (doprint || (! parser->skipwhite)) { + add_assoc_str(parser->ctag, "value", decoded_value); + } else { + zend_string_release_ex(decoded_value, 0); } } + } else { - zend_string_release_ex(decoded_value, 0); + zval tag; + zval *curtag, *mytype, *myval; + + ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { + if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { + if (!strcmp(Z_STRVAL_P(mytype), "cdata")) { + if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { + size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); + Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); + strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), + ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); + zend_string_release_ex(decoded_value, 0); + return; + } + } + } + break; + } ZEND_HASH_FOREACH_END(); + + if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) { + array_init(&tag); + + _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); + + add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1])); + add_assoc_str(&tag, "value", decoded_value); + add_assoc_string(&tag, "type", "cdata"); + add_assoc_long(&tag, "level", parser->level); + + zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); + } else if (parser->level == (XML_MAXLEVEL + 1)) { + php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); + } else { + zend_string_release_ex(decoded_value, 0); + } } } }