Fix GH-17572: getElementsByTagName returns collections with tagName-based indexing, causing loss of elements when converted to arrays

Only (dtd) named node maps should have string-based indexing.
The ce check is fragile, just check for the presence of an xml hash
table.

Closes GH-17580.
This commit is contained in:
Niels Dossche 2025-01-26 11:05:37 +01:00
parent 5c066e04b2
commit fc7c353519
No known key found for this signature in database
GPG key ID: B8A8AD166DF0E2E5
3 changed files with 91 additions and 9 deletions

2
NEWS
View file

@ -30,6 +30,8 @@ PHP NEWS
(nielsdos)
. Fixed bug GH-17485 (upstream fix, Self-closing tag on void elements
shouldn't be a parse error/warning in \Dom\HTMLDocument). (lexborisov)
. Fixed bug GH-17572 (getElementsByTagName returns collections with
tagName-based indexing). (nielsdos)
- Enchant:
. Fix crashes in enchant when passing null bytes. (nielsdos)

View file

@ -49,6 +49,13 @@ static void itemHashScanner (void *payload, void *data, xmlChar *name)
}
/* }}} */
static dom_nnodemap_object *php_dom_iterator_get_nnmap(const php_dom_iterator *iterator)
{
const zval *object = &iterator->intern.data;
dom_object *nnmap = Z_DOMOBJ_P(object);
return nnmap->ptr;
}
xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID) /* {{{ */
{
xmlEntityPtr ret = xmlMalloc(sizeof(xmlEntity));
@ -120,18 +127,22 @@ zval *php_dom_iterator_current_data(zend_object_iterator *iter) /* {{{ */
static void php_dom_iterator_current_key(zend_object_iterator *iter, zval *key) /* {{{ */
{
php_dom_iterator *iterator = (php_dom_iterator *)iter;
zval *object = &iterator->intern.data;
zend_class_entry *ce = Z_OBJCE_P(object);
dom_nnodemap_object *objmap = php_dom_iterator_get_nnmap(iterator);
/* Nodelists have the index as a key while named node maps have the name as a key. */
if (instanceof_function(ce, dom_nodelist_class_entry) || instanceof_function(ce, dom_modern_nodelist_class_entry)) {
/* Only dtd named node maps, i.e. the ones based on a libxml hash table or attribute collections,
* are keyed by the name because in that case the name is unique. */
if (!objmap->ht && objmap->nodetype != XML_ATTRIBUTE_NODE) {
ZVAL_LONG(key, iter->index);
} else {
dom_object *intern = Z_DOMOBJ_P(&iterator->curobj);
if (intern != NULL && intern->ptr != NULL) {
xmlNodePtr curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
ZVAL_STRINGL(key, (char *) curnode->name, xmlStrlen(curnode->name));
xmlNodePtr curnode = ((php_libxml_node_ptr *)intern->ptr)->node;
if (objmap->nodetype == XML_ATTRIBUTE_NODE && php_dom_follow_spec_intern(intern)) {
ZVAL_NEW_STR(key, dom_node_get_node_name_attribute_or_element(curnode, false));
} else {
ZVAL_STRINGL(key, (const char *) curnode->name, xmlStrlen(curnode->name));
}
} else {
ZVAL_NULL(key);
}
@ -169,9 +180,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
}
dom_object *intern = Z_DOMOBJ_P(&iterator->curobj);
zval *object = &iterator->intern.data;
dom_object *nnmap = Z_DOMOBJ_P(object);
dom_nnodemap_object *objmap = nnmap->ptr;
dom_nnodemap_object *objmap = php_dom_iterator_get_nnmap(iterator);
if (intern != NULL && intern->ptr != NULL) {
if (objmap->nodetype != XML_ENTITY_NODE &&

View file

@ -0,0 +1,71 @@
--TEST--
GH-17572 (getElementsByTagName returns collections with tagName-based indexing, causing loss of elements when converted to arrays)
--EXTENSIONS--
dom
--FILE--
<?php
$dom = Dom\XMLDocument::createFromString(<<<XML
<!DOCTYPE root [
<!ENTITY a "a">
]>
<root>
<p/>
<p xmlns:x="urn:x" x:a="1" b="2" a="3"/>
</root>
XML);
echo "--- querySelectorAll('p') ---\n";
foreach ($dom->querySelectorAll('p') as $k => $v) {
var_dump($k, $v->nodeName);
}
echo "--- getElementsByTagName('p') ---\n";
foreach ($dom->getElementsByTagName('p') as $k => $v) {
var_dump($k, $v->nodeName);
}
echo "--- entities ---\n";
foreach ($dom->doctype->entities as $k => $v) {
var_dump($k, $v->nodeName);
}
echo "--- attributes ---\n";
$attribs = $dom->getElementsByTagName('p')[1]->attributes;
foreach ($attribs as $k => $v) {
var_dump($k, $v->value);
var_dump($attribs[$k]->value);
}
?>
--EXPECT--
--- querySelectorAll('p') ---
int(0)
string(1) "p"
int(1)
string(1) "p"
--- getElementsByTagName('p') ---
int(0)
string(1) "p"
int(1)
string(1) "p"
--- entities ---
string(1) "a"
string(1) "a"
--- attributes ---
string(7) "xmlns:x"
string(5) "urn:x"
string(5) "urn:x"
string(3) "x:a"
string(1) "1"
string(1) "1"
string(1) "b"
string(1) "2"
string(1) "2"
string(1) "a"
string(1) "3"
string(1) "3"