Implement iteration cache, item cache and length cache for node list iteration (#11330)

* Implement iteration cache, item cache and length cache for node list iteration

The current implementation follows the spec requirement that the list
must be "live". This means that changes in the document must be
reflected in the existing node lists without requiring the user to
refetch the node list.
The consequence is that getting any item, or the length of the list,
always starts searching from the root element of the node list. This
results in O(n) time to get any item or the length. If there's a for
loop over the node list, this means the iterations will take O(n²) time
in total. This causes real-world performance issues with potential for
downtime (see GH-11308 and its references for details).

We fix this by introducing a caching strategy. We cache the last
iterated object in the iterator, the last requested item in the node
list, and the last length computation. To invalidate the cache, we
simply count the number of modifications made to the containing
document. If the modification number does not match what the number was
during caching, we know the document has been modified and the cache is
invalid. If this ever overflows, we saturate the modification number and
don't do any caching anymore. Note that we don't check for overflow on
64-bit systems because it would take hundreds of years to overflow.

Fixes GH-11308.
This commit is contained in:
Niels Dossche 2023-06-03 00:13:14 +02:00 committed by GitHub
parent ce724d186d
commit c3f0797385
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 764 additions and 66 deletions

3
NEWS
View file

@ -44,6 +44,9 @@ PHP NEWS
- Date:
. Implement More Appropriate Date/Time Exceptions RFC. (Derick)
- DOM:
. Fix bug GH-11308 (getElementsByTagName() is O(N^2)). (nielsdos)
- Exif:
. Removed unneeded codepaths in exif_process_TIFF_in_JPEG(). (nielsdos)

View file

@ -120,6 +120,24 @@ PHP 8.3 INTERNALS UPGRADE NOTES
- A new function dom_get_doc_props_read_only() is added to gather the document
properties in a read-only way. This function avoids allocation when there are
no document properties changed yet.
- The node list returned by DOMNode::getElementsByTagName() and
DOMNode::getElementsByTagNameNS() now caches the length and the last requested item.
This means that the length and the last requested item are not recalculated
when the node list is iterated over multiple times.
If you do not use the internal PHP dom APIs to modify the document, you need to
manually invalidate the cache using php_libxml_invalidate_node_list_cache_from_doc().
Furthermore, the following internal APIs were added to handle the cache:
. php_dom_is_cache_tag_stale_from_doc_ptr()
. php_dom_is_cache_tag_stale_from_node()
. php_dom_mark_cache_tag_up_to_date_from_node()
- The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate
the base node of the node list. This function also no longer accepts -1 as the index argument.
- The function dom_namednode_iter() has additional arguments to avoid recomputing the length of
the strings.
g. ext/libxml
- Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and
php_libxml_invalidate_node_list_cache() were added to invalidate the cache of a node list.
========================
4. OpCode changes

View file

@ -777,7 +777,6 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
size_t name_len;
dom_object *intern, *namednode;
char *name;
xmlChar *local;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
@ -788,8 +787,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
local = xmlCharStrndup(name, name_len);
dom_namednode_iter(intern, 0, namednode, NULL, local, NULL);
dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0);
}
/* }}} end dom_document_get_elements_by_tag_name */
@ -847,6 +845,8 @@ PHP_METHOD(DOMDocument, importNode)
}
}
php_libxml_invalidate_node_list_cache_from_doc(docp);
DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern);
}
/* }}} end dom_document_import_node */
@ -991,7 +991,6 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
size_t uri_len, name_len;
dom_object *intern, *namednode;
char *uri, *name;
xmlChar *local, *nsuri;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) {
@ -1002,9 +1001,7 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
local = xmlCharStrndup(name, name_len);
nsuri = xmlCharStrndup(uri ? uri : "", uri_len);
dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri);
dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len);
}
/* }}} end dom_document_get_elements_by_tag_name_ns */
@ -1070,6 +1067,8 @@ PHP_METHOD(DOMDocument, normalizeDocument)
DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
php_libxml_invalidate_node_list_cache_from_doc(docp);
dom_normalize((xmlNodePtr) docp);
}
/* }}} end dom_document_normalize_document */
@ -1328,10 +1327,14 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
if (id != NULL) {
intern = Z_DOMOBJ_P(id);
size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
const php_libxml_doc_ptr *doc_ptr = docp->_private;
ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
@ -1348,6 +1351,12 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
/* Since iterators should invalidate, we need to start the modification number from the old counter */
if (old_modification_nr != 0) {
php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
doc_ptr->cache_tag.modification_nr = old_modification_nr;
php_libxml_invalidate_node_list_cache(doc_ptr);
}
RETURN_TRUE;
} else {
@ -1563,6 +1572,8 @@ PHP_METHOD(DOMDocument, xinclude)
php_dom_remove_xinclude_nodes(root);
}
php_libxml_invalidate_node_list_cache_from_doc(docp);
if (err) {
RETVAL_LONG(err);
} else {
@ -1871,10 +1882,14 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
if (id != NULL && instanceof_function(Z_OBJCE_P(id), dom_document_class_entry)) {
intern = Z_DOMOBJ_P(id);
size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
const php_libxml_doc_ptr *doc_ptr = docp->_private;
ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
@ -1891,6 +1906,12 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
/* Since iterators should invalidate, we need to start the modification number from the old counter */
if (old_modification_nr != 0) {
php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
doc_ptr->cache_tag.modification_nr = old_modification_nr;
php_libxml_invalidate_node_list_cache(doc_ptr);
}
RETURN_TRUE;
} else {

View file

@ -65,7 +65,7 @@ int dom_documenttype_entities_read(dom_object *obj, zval *retval)
entityht = (xmlHashTable *) doctypep->entities;
intern = Z_DOMOBJ_P(retval);
dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, NULL);
dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, 0, NULL, 0);
return SUCCESS;
}
@ -93,7 +93,7 @@ int dom_documenttype_notations_read(dom_object *obj, zval *retval)
notationht = (xmlHashTable *) doctypep->notations;
intern = Z_DOMOBJ_P(retval);
dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, NULL);
dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, 0, NULL, 0);
return SUCCESS;
}

View file

@ -179,7 +179,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
dom_object *intern;
dom_object *nnmap;
dom_nnodemap_object *objmap;
int previndex=0;
int previndex;
HashTable *nodeht;
zval *entry;
bool do_curobj_undef = 1;
@ -205,23 +205,32 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
do_curobj_undef = 0;
}
} else {
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
if (objmap->nodetype == XML_ATTRIBUTE_NODE ||
objmap->nodetype == XML_ELEMENT_NODE) {
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
curnode = curnode->next;
} else {
/* Nav the tree evey time as this is LIVE */
/* The collection is live, we nav the tree from the base object if we cannot
* use the cache to restart from the last point. */
basenode = dom_object_get_node(objmap->baseobj);
if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
basenode->type == XML_HTML_DOCUMENT_NODE)) {
basenode = xmlDocGetRootElement((xmlDoc *) basenode);
} else if (basenode) {
basenode = basenode->children;
} else {
if (UNEXPECTED(!basenode)) {
goto err;
}
if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) {
php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode);
previndex = 0;
if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
basenode->type == XML_HTML_DOCUMENT_NODE)) {
curnode = xmlDocGetRootElement((xmlDoc *) basenode);
} else {
curnode = basenode->children;
}
} else {
previndex = iter->index - 1;
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
basenode, curnode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
}
}
} else {
@ -258,7 +267,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
{
dom_object *intern;
dom_nnodemap_object *objmap;
xmlNodePtr nodep, curnode=NULL;
xmlNodePtr curnode=NULL;
int curindex = 0;
HashTable *nodeht;
zval *entry;
@ -270,6 +279,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
}
iterator = emalloc(sizeof(php_dom_iterator));
zend_iterator_init(&iterator->intern);
iterator->cache_tag.modification_nr = 0;
ZVAL_OBJ_COPY(&iterator->intern.data, Z_OBJ_P(object));
iterator->intern.funcs = &php_dom_iterator_funcs;
@ -288,24 +298,25 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
ZVAL_COPY(&iterator->curobj, entry);
}
} else {
nodep = (xmlNode *)dom_object_get_node(objmap->baseobj);
if (!nodep) {
xmlNodePtr basep = (xmlNode *)dom_object_get_node(objmap->baseobj);
if (!basep) {
goto err;
}
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
if (objmap->nodetype == XML_ATTRIBUTE_NODE) {
curnode = (xmlNodePtr) nodep->properties;
curnode = (xmlNodePtr) basep->properties;
} else {
curnode = (xmlNodePtr) nodep->children;
curnode = (xmlNodePtr) basep->children;
}
} else {
xmlNodePtr nodep = basep;
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
} else {
nodep = nodep->children;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
basep, nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
}
}
} else {

View file

@ -511,7 +511,6 @@ PHP_METHOD(DOMElement, getElementsByTagName)
size_t name_len;
dom_object *intern, *namednode;
char *name;
xmlChar *local;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
@ -522,8 +521,7 @@ PHP_METHOD(DOMElement, getElementsByTagName)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
local = xmlCharStrndup(name, name_len);
dom_namednode_iter(intern, 0, namednode, NULL, local, NULL);
dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0);
}
/* }}} end dom_element_get_elements_by_tag_name */
@ -930,7 +928,6 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS)
size_t uri_len, name_len;
dom_object *intern, *namednode;
char *uri, *name;
xmlChar *local, *nsuri;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) {
@ -941,9 +938,7 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
local = xmlCharStrndup(name, name_len);
nsuri = xmlCharStrndup(uri ? uri : "", uri_len);
dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri);
dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len);
}
/* }}} end dom_element_get_elements_by_tag_name_ns */

View file

@ -195,6 +195,8 @@ int dom_node_node_value_write(dom_object *obj, zval *newval)
break;
}
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
zend_string_release_ex(str, 0);
return SUCCESS;
}
@ -274,7 +276,7 @@ int dom_node_child_nodes_read(dom_object *obj, zval *retval)
php_dom_create_iterator(retval, DOM_NODELIST);
intern = Z_DOMOBJ_P(retval);
dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, NULL);
dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, 0, NULL, 0);
return SUCCESS;
}
@ -482,7 +484,7 @@ int dom_node_attributes_read(dom_object *obj, zval *retval)
if (nodep->type == XML_ELEMENT_NODE) {
php_dom_create_iterator(retval, DOM_NAMEDNODEMAP);
intern = Z_DOMOBJ_P(retval);
dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, NULL);
dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, 0, NULL, 0);
} else {
ZVAL_NULL(retval);
}
@ -769,6 +771,8 @@ int dom_node_text_content_write(dom_object *obj, zval *newval)
return FAILURE;
}
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str);
int type = nodep->type;
@ -897,6 +901,8 @@ PHP_METHOD(DOMNode, insertBefore)
php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL);
}
php_libxml_invalidate_node_list_cache_from_doc(parentp->doc);
if (ref != NULL) {
DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj);
if (refp->parent != parentp) {
@ -1086,6 +1092,7 @@ PHP_METHOD(DOMNode, replaceChild)
nodep->doc->intSubset = (xmlDtd *) newchild;
}
}
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(oldchild, &ret, intern);
}
/* }}} end dom_node_replace_child */
@ -1127,6 +1134,7 @@ PHP_METHOD(DOMNode, removeChild)
}
xmlUnlinkNode(child);
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(child, &ret, intern);
}
/* }}} end dom_node_remove_child */
@ -1230,6 +1238,8 @@ PHP_METHOD(DOMNode, appendChild)
dom_reconcile_ns(nodep->doc, new_child);
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(new_child, &ret, intern);
}
/* }}} end dom_node_append_child */
@ -1339,6 +1349,8 @@ PHP_METHOD(DOMNode, normalize)
DOM_GET_OBJ(nodep, id, xmlNodePtr, intern);
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
dom_normalize(nodep);
}
@ -1571,6 +1583,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
RETURN_THROWS();
}
php_libxml_invalidate_node_list_cache_from_doc(docp);
if (xpath_array == NULL) {
if (nodep->type != XML_DOCUMENT_NODE) {
ctxp = xmlXPathNewContext(docp);

View file

@ -31,6 +31,24 @@
* Since:
*/
static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap)
{
if (objmap->cached_obj) {
/* Since the DOM is a tree there can be no cycles. */
if (GC_DELREF(&objmap->cached_obj->std) == 0) {
zend_objects_store_del(&objmap->cached_obj->std);
}
objmap->cached_obj = NULL;
objmap->cached_obj_index = 0;
}
}
static zend_always_inline void reset_objmap_cache(dom_nnodemap_object *objmap)
{
objmap_cache_release_cached_obj(objmap);
objmap->cached_length = -1;
}
static int get_nodelist_length(dom_object *obj)
{
dom_nnodemap_object *objmap = (dom_nnodemap_object *) obj->ptr;
@ -52,6 +70,17 @@ static int get_nodelist_length(dom_object *obj)
return 0;
}
if (!php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
if (objmap->cached_length >= 0) {
return objmap->cached_length;
}
/* Only the length is out-of-date, the cache tag is still valid.
* Therefore, only overwrite the length and keep the currently cached object. */
} else {
php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, nodep);
reset_objmap_cache(objmap);
}
int count = 0;
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
xmlNodePtr curnode = nodep->children;
@ -63,15 +92,18 @@ static int get_nodelist_length(dom_object *obj)
}
}
} else {
xmlNodePtr basep = nodep;
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
} else {
nodep = nodep->children;
}
dom_get_elements_by_tag_name_ns_raw(
nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1);
basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, INT_MAX - 1 /* because of <= */);
}
objmap->cached_length = count;
return count;
}
@ -113,11 +145,12 @@ PHP_METHOD(DOMNodeList, item)
zval *id;
zend_long index;
int ret;
bool cache_itemnode = false;
dom_object *intern;
xmlNodePtr itemnode = NULL;
dom_nnodemap_object *objmap;
xmlNodePtr nodep, curnode;
xmlNodePtr basep;
int count = 0;
id = ZEND_THIS;
@ -145,23 +178,51 @@ PHP_METHOD(DOMNodeList, item)
return;
}
} else if (objmap->baseobj) {
nodep = dom_object_get_node(objmap->baseobj);
if (nodep) {
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
curnode = nodep->children;
while (count < index && curnode != NULL) {
count++;
curnode = curnode->next;
}
itemnode = curnode;
} else {
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
basep = dom_object_get_node(objmap->baseobj);
if (basep) {
xmlNodePtr nodep = basep;
/* For now we're only able to use cache for forward search.
* TODO: in the future we could extend the logic of the node list such that backwards searches
* are also possible. */
bool restart = true;
int relative_index = index;
if (index >= objmap->cached_obj_index && objmap->cached_obj && !php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
xmlNodePtr cached_obj_xml_node = dom_object_get_node(objmap->cached_obj);
/* The node cannot be NULL if the cache is valid. If it is NULL, then it means we
* forgot an invalidation somewhere. Take the defensive programming approach and invalidate
* it here if it's NULL (except in debug mode where we would want to catch this). */
if (UNEXPECTED(cached_obj_xml_node == NULL)) {
#if ZEND_DEBUG
ZEND_UNREACHABLE();
#endif
reset_objmap_cache(objmap);
} else {
restart = false;
relative_index -= objmap->cached_obj_index;
nodep = cached_obj_xml_node;
}
}
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
if (restart) {
nodep = nodep->children;
}
itemnode = dom_get_elements_by_tag_name_ns_raw(nodep, (char *) objmap->ns, (char *) objmap->local, &count, index);
while (count < relative_index && nodep != NULL) {
count++;
nodep = nodep->next;
}
itemnode = nodep;
} else {
if (restart) {
if (basep->type == XML_DOCUMENT_NODE || basep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc*) basep);
} else {
nodep = basep->children;
}
}
itemnode = dom_get_elements_by_tag_name_ns_raw(basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, relative_index);
}
cache_itemnode = true;
}
}
}
@ -169,6 +230,25 @@ PHP_METHOD(DOMNodeList, item)
if (itemnode) {
DOM_RET_OBJ(itemnode, &ret, objmap->baseobj);
if (cache_itemnode) {
/* Hold additional reference for the cache, must happen before releasing the cache
* because we might be the last reference holder.
* Instead of storing and copying zvals, we store the object pointer directly.
* This saves us some bytes because a pointer is smaller than a zval.
* This also means we have to manually refcount the objects here, and remove the reference count
* in reset_objmap_cache() and the destructor. */
dom_object *cached_obj = Z_DOMOBJ_P(return_value);
GC_ADDREF(&cached_obj->std);
/* If the tag is stale, all cached data is useless. Otherwise only the cached object is useless. */
if (php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, itemnode)) {
php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, itemnode);
reset_objmap_cache(objmap);
} else {
objmap_cache_release_cached_obj(objmap);
}
objmap->cached_obj_index = index;
objmap->cached_obj = cached_obj;
}
return;
}
}

View file

@ -280,6 +280,8 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc)
return;
}
php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc);
xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
if (fragment == NULL) {
@ -322,6 +324,8 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc)
return;
}
php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc);
xmlNodePtr newchild, nextsib;
xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@ -402,6 +406,8 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc)
doc = prevsib->doc;
php_libxml_invalidate_node_list_cache_from_doc(doc);
/* Spec step 4: convert nodes into fragment */
fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@ -451,6 +457,8 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc)
doc = nextsib->doc;
php_libxml_invalidate_node_list_cache_from_doc(doc);
/* Spec step 4: convert nodes into fragment */
fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@ -506,6 +514,8 @@ void dom_child_node_remove(dom_object *context)
return;
}
php_libxml_invalidate_node_list_cache_from_doc(context->document->ptr);
while (children) {
if (children == child) {
xmlUnlinkNode(child);

View file

@ -942,7 +942,7 @@ void dom_objects_free_storage(zend_object *object)
}
/* }}} */
void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns) /* {{{ */
void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len) /* {{{ */
{
dom_nnodemap_object *mapptr = (dom_nnodemap_object *) intern->ptr;
@ -950,11 +950,33 @@ void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xml
ZVAL_OBJ_COPY(&mapptr->baseobj_zv, &basenode->std);
xmlDocPtr doc = basenode->document ? basenode->document->ptr : NULL;
mapptr->baseobj = basenode;
mapptr->nodetype = ntype;
mapptr->ht = ht;
mapptr->local = local;
mapptr->ns = ns;
const xmlChar* tmp;
if (local) {
int len = local_len > INT_MAX ? -1 : (int) local_len;
if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)local, len)) != NULL) {
mapptr->local = (xmlChar*) tmp;
} else {
mapptr->local = xmlCharStrndup(local, len);
mapptr->free_local = true;
}
}
if (ns) {
int len = ns_len > INT_MAX ? -1 : (int) ns_len;
if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)ns, len)) != NULL) {
mapptr->ns = (xmlChar*) tmp;
} else {
mapptr->ns = xmlCharStrndup(ns, len);
mapptr->free_ns = true;
}
}
}
/* }}} */
@ -1010,10 +1032,13 @@ void dom_nnodemap_objects_free_storage(zend_object *object) /* {{{ */
dom_nnodemap_object *objmap = (dom_nnodemap_object *)intern->ptr;
if (objmap) {
if (objmap->local) {
if (objmap->cached_obj && GC_DELREF(&objmap->cached_obj->std) == 0) {
zend_objects_store_del(&objmap->cached_obj->std);
}
if (objmap->free_local) {
xmlFree(objmap->local);
}
if (objmap->ns) {
if (objmap->free_ns) {
xmlFree(objmap->ns);
}
if (!Z_ISUNDEF(objmap->baseobj_zv)) {
@ -1042,7 +1067,13 @@ zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type) /* {{{ */
objmap->nodetype = 0;
objmap->ht = NULL;
objmap->local = NULL;
objmap->free_local = false;
objmap->ns = NULL;
objmap->free_ns = false;
objmap->cache_tag.modification_nr = 0;
objmap->cached_length = -1;
objmap->cached_obj = NULL;
objmap->cached_obj_index = 0;
return &intern->std;
}
@ -1220,19 +1251,25 @@ bool dom_has_feature(zend_string *feature, zend_string *version)
}
/* }}} end dom_has_feature */
xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */
xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */
{
/* Can happen with detached document */
if (UNEXPECTED(nodep == NULL)) {
return NULL;
}
xmlNodePtr ret = NULL;
bool local_match_any = local[0] == '*' && local[1] == '\0';
/* Note: The spec says that ns == '' must be transformed to ns == NULL. In other words, they are equivalent.
* PHP however does not do this and internally uses the empty string everywhere when the user provides ns == NULL.
* This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */
bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0');
while (nodep != NULL && (*cur <= index || index == -1)) {
while (*cur <= index) {
if (nodep->type == XML_ELEMENT_NODE) {
if (xmlStrEqual(nodep->name, (xmlChar *)local) || xmlStrEqual((xmlChar *)"*", (xmlChar *)local)) {
if (ns_match_any || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) {
if (ns_match_any || (ns[0] == '\0' && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
if (*cur == index) {
ret = nodep;
break;
@ -1240,16 +1277,33 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l
(*cur)++;
}
}
ret = dom_get_elements_by_tag_name_ns_raw(nodep->children, ns, local, cur, index);
if (ret != NULL) {
break;
if (nodep->children) {
nodep = nodep->children;
continue;
}
}
nodep = nodep->next;
if (nodep->next) {
nodep = nodep->next;
} else {
/* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
do {
nodep = nodep->parent;
if (nodep == basep) {
return NULL;
}
/* This shouldn't happen, unless there's an invalidation bug somewhere. */
if (UNEXPECTED(nodep == NULL)) {
zend_throw_error(NULL, "Current node in traversal is not in the document. Please report this as a bug in php-src.");
return NULL;
}
} while (nodep->next == NULL);
nodep = nodep->next;
}
}
return ret;
}
/* }}} */
/* }}} end dom_element_get_elements_by_tag_name_ns_raw */
static inline bool is_empty_node(xmlNodePtr nodep)

View file

@ -82,15 +82,22 @@ typedef struct _dom_nnodemap_object {
dom_object *baseobj;
zval baseobj_zv;
int nodetype;
int cached_length;
xmlHashTable *ht;
xmlChar *local;
xmlChar *ns;
php_libxml_cache_tag cache_tag;
dom_object *cached_obj;
int cached_obj_index;
bool free_local : 1;
bool free_ns : 1;
} dom_nnodemap_object;
typedef struct {
zend_object_iterator intern;
zval curobj;
HashPosition pos;
php_libxml_cache_tag cache_tag;
} php_dom_iterator;
#include "domexception.h"
@ -113,14 +120,14 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns);
void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep);
xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName);
void dom_normalize (xmlNodePtr nodep);
xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index);
xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index);
void php_dom_create_implementation(zval *retval);
int dom_hierarchy(xmlNodePtr parent, xmlNodePtr child);
bool dom_has_feature(zend_string *feature, zend_string *version);
int dom_node_is_read_only(xmlNodePtr node);
int dom_node_children_valid(xmlNodePtr node);
void php_dom_create_iterator(zval *return_value, int ce_type);
void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns);
void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len);
xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID);
xmlNode *php_dom_libxml_hash_iter(xmlHashTable *ht, int index);
xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index);
@ -153,6 +160,33 @@ void dom_child_node_remove(dom_object *context);
#define DOM_NODELIST 0
#define DOM_NAMEDNODEMAP 1
static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr)
{
ZEND_ASSERT(cache_tag != NULL);
ZEND_ASSERT(doc_ptr != NULL);
/* See overflow comment in php_libxml_invalidate_node_list_cache(). */
#if SIZEOF_SIZE_T == 8
return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr;
#else
return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr || UNEXPECTED(doc_ptr->cache_tag.modification_nr == SIZE_MAX);
#endif
}
static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
{
ZEND_ASSERT(node != NULL);
return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private);
}
static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
{
ZEND_ASSERT(cache_tag != NULL);
if (node->doc && node->doc->_private) {
const php_libxml_doc_ptr* doc_ptr = node->doc->_private;
cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr;
}
}
PHP_MINIT_FUNCTION(dom);
PHP_MSHUTDOWN_FUNCTION(dom);
PHP_MINFO_FUNCTION(dom);

View file

@ -128,6 +128,8 @@ int dom_processinginstruction_data_write(dom_object *obj, zval *newval)
return FAILURE;
}
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1);
zend_string_release_ex(str, 0);

View file

@ -0,0 +1,47 @@
--TEST--
DOMDocument::getElementsByTagName() is live
--EXTENSIONS--
dom
--FILE--
<?php
$doc = new DOMDocument;
$doc->loadXML( '<root><e i="1"/><e i="2"/><e i="3"/><e i="4"/><e i="5"/><e i="6"/><e i="7"/><e i="8"/><e i="9"/><e i="10"/><e i="11"/><e i="12"/><e i="13"/><e i="14"/><e i="15"/></root>' );
$root = $doc->documentElement;
$i = 0;
/* Note that the list is live. The explanation for the output is as follows:
Before the loop we have the following (writing only the attributes):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Now the loop starts, the current element is marked with a V. $i == 0:
V
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1 gets printed. $i == 0, which is even, so 1 gets removed, which results in:
V
2 3 4 5 6 7 8 9 10 11 12 13 14 15
Note that everything shifted to the left.
Because the list is live, the current element pointer still refers to the first index, which now corresponds to element with attribute 2.
Now the foreach body ends, which means we go to the next element, which is now 3 instead of 2.
V
2 3 4 5 6 7 8 9 10 11 12 13 14 15
3 gets printed. $i == 1, which is odd, so nothing happens and we move on to the next element:
V
2 3 4 5 6 7 8 9 10 11 12 13 14 15
4 gets printed. $i == 2, which is even, so 4 gets removed, which results in:
V
2 3 5 6 7 8 9 10 11 12 13 14 15
Note again everything shifted to the left.
Now the foreach body ends, which means we go to the next element, which is now 6 instead of 5.
V
2 3 5 6 7 8 9 10 11 12 13 14 15
6 gets printed, etc... */
foreach ($doc->getElementsByTagName('e') as $node) {
print $node->getAttribute('i') . ' ';
if ($i++ % 2 == 0)
$root->removeChild($node);
}
print "\n";
?>
--EXPECT--
1 3 4 6 7 9 10 12 13 15

View file

@ -0,0 +1,29 @@
--TEST--
DOMDocument::getElementsByTagName() liveness with simplexml_import_dom
--EXTENSIONS--
dom
simplexml
--FILE--
<?php
$doc = new DOMDocument;
$doc->loadXML( '<root><e i="1"/><e i="2"/><e i="3"/><e i="4"/><e i="5"/><e i="6"/><e i="7"/><e i="8"/><e i="9"/><e i="10"/></root>' );
$list = $doc->getElementsByTagName('e');
print $list->item(5)->getAttribute('i')."\n";
echo "before import\n";
$s = simplexml_import_dom($doc->documentElement);
echo "after import\n";
unset($s->e[5]);
print $list->item(5)->getAttribute('i')."\n";
unset($s->e[5]);
print $list->item(5)->getAttribute('i')."\n";
?>
--EXPECT--
6
before import
after import
7
8

View file

@ -0,0 +1,89 @@
--TEST--
DOMDocument::getElementsByTagName() liveness tree walk
--EXTENSIONS--
dom
--FILE--
<?php
$doc = new DOMDocument;
$doc->loadXML('<root><container><a><b i="1"/><b i="2"/></a><b i="3"/></container><b i="4"/></root>');
echo "-- On first child, for --\n";
$list = $doc->documentElement->firstChild->getElementsByTagName('b');
var_dump($list->length);
for ($i = 0; $i < $list->length; $i++) {
echo $i, " ", $list->item($i)->getAttribute('i'), "\n";
}
// Try to access one beyond to check if we don't get excess elements
var_dump($list->item($i));
echo "-- On first child, foreach --\n";
foreach ($list as $item) {
echo $item->getAttribute('i'), "\n";
}
echo "-- On document, for --\n";
$list = $doc->getElementsByTagName('b');
var_dump($list->length);
for ($i = 0; $i < $list->length; $i++) {
echo $i, " ", $list->item($i)->getAttribute('i'), "\n";
}
// Try to access one beyond to check if we don't get excess elements
var_dump($list->item($i));
echo "-- On document, foreach --\n";
foreach ($list as $item) {
echo $item->getAttribute('i'), "\n";
}
echo "-- On document, after caching followed by removing --\n";
$list = $doc->documentElement->firstChild->getElementsByTagName('b');
$list->item(0); // Activate item cache
$list->item(0)->remove();
$list->item(0)->remove();
$list->item(0)->remove();
var_dump($list->length);
var_dump($list->item(0));
foreach ($list as $item) {
echo "Should not execute\n";
}
echo "-- On document, clean list after removal --\n";
$list = $doc->documentElement->firstChild->getElementsByTagName('b');
var_dump($list->length);
var_dump($list->item(0));
foreach ($list as $item) {
echo "Should not execute\n";
}
?>
--EXPECT--
-- On first child, for --
int(3)
0 1
1 2
2 3
NULL
-- On first child, foreach --
1
2
3
-- On document, for --
int(4)
0 1
1 2
2 3
3 4
NULL
-- On document, foreach --
1
2
3
4
-- On document, after caching followed by removing --
int(0)
NULL
-- On document, clean list after removal --
int(0)
NULL

View file

@ -0,0 +1,43 @@
--TEST--
DOMDocument::getElementsByTagName() liveness affected by writing properties
--EXTENSIONS--
dom
--FILE--
<?php
$xml = '<root xmlns:ns1="foo" xmlns:ns2="bar"><ns1:a/></root>';
$fields = ['nodeValue', 'textContent'];
foreach ($fields as $field) {
$doc = new DOMDocument;
$doc->loadXML($xml);
$list = $doc->getElementsByTagName('a');
var_dump($list->item(0) === NULL);
$doc->documentElement->{$field} = 'new_content';
var_dump($list->item(0) === NULL);
print $doc->saveXML();
}
// Shouldn't be affected
$doc = new DOMDocument;
$doc->loadXML($xml);
$list = $doc->getElementsByTagNameNS('foo', 'a');
var_dump($list->item(0) === NULL);
$doc->documentElement->firstChild->prefix = 'ns2';
var_dump($list->item(0) === NULL);
print $doc->saveXML();
?>
--EXPECT--
bool(false)
bool(true)
<?xml version="1.0"?>
<root xmlns:ns1="foo" xmlns:ns2="bar">new_content</root>
bool(false)
bool(true)
<?xml version="1.0"?>
<root xmlns:ns1="foo" xmlns:ns2="bar">new_content</root>
bool(false)
bool(false)
<?xml version="1.0"?>
<root xmlns:ns1="foo" xmlns:ns2="bar"><ns2:a xmlns:ns2="foo"/></root>

View file

@ -0,0 +1,43 @@
--TEST--
DOMDocument::getElementsByTagName() liveness with DOMDocument::xinclude()
--EXTENSIONS--
dom
--FILE--
<?php
// Adapted from https://www.php.net/manual/en/domdocument.xinclude.php
$xml = <<<EOD
<?xml version="1.0" ?>
<chapter xmlns:xi="http://www.w3.org/2001/XInclude">
<p>Hello</p>
<para>
<xi:include href="book.xml">
<xi:fallback>
<p>xinclude: book.xml not found</p>
</xi:fallback>
</xi:include>
</para>
</chapter>
EOD;
$dom = new DOMDocument;
$dom->loadXML($xml);
$elements = $dom->getElementsByTagName('p');
var_dump($elements->item(0)->textContent);
@$dom->xinclude();
var_dump($elements->item(1)->textContent);
echo $dom->saveXML();
?>
--EXPECT--
string(5) "Hello"
string(28) "xinclude: book.xml not found"
<?xml version="1.0"?>
<chapter xmlns:xi="http://www.w3.org/2001/XInclude">
<p>Hello</p>
<para>
<p>xinclude: book.xml not found</p>
</para>
</chapter>

View file

@ -0,0 +1,69 @@
--TEST--
DOMDocument node list item cache invalidation
--EXTENSIONS--
dom
--FILE--
<?php
echo "-- Switch document test --\n";
$doc = new DOMDocument();
$doc->loadHTML('<p>hello</p><p>world</p>');
$elements = $doc->getElementsByTagName('p');
$elements->item(0); // Activate item cache
$doc->loadHTML('<p>A</p><p>B</p><p>C</p>');
var_dump($elements);
var_dump($elements->item(0)->textContent); // First lookup
var_dump($elements->item(2)->textContent); // Uses cache
var_dump($elements->item(1)->textContent); // Does not use cache
echo "-- Remove cached item test --\n";
$doc = new DOMDocument();
$doc->loadHTML('<p>hello</p><p>world</p><p>!</p>');
$elements = $doc->getElementsByTagName('p');
$item = $elements->item(0); // Activate item cache
var_dump($item->textContent);
$item->remove();
// Now element 0 means "world", and 1 means "!"
unset($item);
$item = $elements->item(1);
var_dump($item->textContent);
echo "-- Removal of cached item in loop test --\n";
$doc = new DOMDocument;
$doc->loadXML( '<root><e i="1"/><e i="2"/><e i="3"/><e i="4"/><e i="5"/><e i="6"/><e i="7"/><e i="8"/><e i="9"/><e i="10"/><e i="11"/><e i="12"/><e i="13"/><e i="14"/><e i="15"/></root>' );
$root = $doc->documentElement;
$i = 0;
$elements = $root->getElementsByTagName('e');
for ($i = 0; $i < 11; $i++) {
$node = $elements->item($i);
print $node->getAttribute('i') . ' ';
if ($i++ % 2 == 0)
$root->removeChild( $node );
}
print "\n";
?>
--EXPECTF--
-- Switch document test --
object(DOMNodeList)#2 (1) {
["length"]=>
int(3)
}
string(1) "A"
string(1) "C"
string(1) "B"
-- Remove cached item test --
string(5) "hello"
string(1) "!"
-- Removal of cached item in loop test --
1 4 7 10 13
Fatal error: Uncaught Error: Call to a member function getAttribute() on null in %s:%d
Stack trace:
#0 {main}
thrown in %s on line %d

View file

@ -0,0 +1,34 @@
--TEST--
DOMDocument node list length cache invalidation
--EXTENSIONS--
dom
--FILE--
<?php
$doc = new DOMDocument();
$doc->loadHTML('<p>hello</p><p>world</p><p>!</p>');
$elements = $doc->getElementsByTagName('p');
$item = $elements->item(0); // Activate item cache
var_dump($elements->length); // Length not cached yet, should still compute
$item->remove();
// Now element 0 means "world", and 1 means "!"
unset($item);
var_dump($elements->length);
$item = $elements->item(1);
var_dump($item->textContent);
$item = $elements->item(1);
var_dump($item->textContent);
$item = $elements->item(0);
var_dump($item->textContent);
$item = $elements->item(1);
var_dump($item->textContent);
?>
--EXPECT--
int(3)
int(2)
string(1) "!"
string(1) "!"
string(5) "world"
string(1) "!"

View file

@ -0,0 +1,43 @@
--TEST--
DOMDocument liveness caching invalidation by textContent
--EXTENSIONS--
dom
--FILE--
<?php
$doc = new DOMDocument;
$doc->loadXML('<root><e id="1"/><e id="2"/><e id="3"/><e id="4"/><e id="5"/></root>');
$root = $doc->documentElement;
$i = 0;
echo "-- Overwrite during iteration --\n";
foreach ($doc->getElementsByTagName('e') as $node) {
if ($i++ == 2) {
$root->textContent = 'overwrite';
}
var_dump($node->tagName, $node->getAttribute('id'));
}
echo "-- Empty iteration --\n";
foreach ($doc->getElementsByTagName('e') as $node) {
echo "Should not execute\n";
}
echo "-- After adding an element again --\n";
$root->appendChild(new DOMElement('e'));
foreach ($doc->getElementsByTagName('e') as $node) {
echo "Should execute once\n";
}
?>
--EXPECT--
-- Overwrite during iteration --
string(1) "e"
string(1) "1"
string(1) "e"
string(1) "2"
string(1) "e"
string(1) "3"
-- Empty iteration --
-- After adding an element again --
Should execute once

View file

@ -0,0 +1,16 @@
--TEST--
Node list cache should not break on DOMElement::getElementsByTagName() without document
--EXTENSIONS--
dom
--FILE--
<?php
$element = new DOMElement("b");
foreach ($element->getElementsByTagName("b") as $x) {
var_dump($x);
}
?>
Done
--EXPECT--
Done

View file

@ -1163,8 +1163,14 @@ PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object,
object->node->_private = private_data;
}
} else {
if (UNEXPECTED(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)) {
php_libxml_doc_ptr *doc_ptr = emalloc(sizeof(php_libxml_doc_ptr));
doc_ptr->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */
object->node = (php_libxml_node_ptr *) doc_ptr; /* downcast */
} else {
object->node = emalloc(sizeof(php_libxml_node_ptr));
}
ret_refcount = 1;
object->node = emalloc(sizeof(php_libxml_node_ptr));
object->node->node = node;
object->node->refcount = 1;
object->node->_private = private_data;

View file

@ -69,6 +69,16 @@ typedef struct _php_libxml_node_ptr {
void *_private;
} php_libxml_node_ptr;
typedef struct {
size_t modification_nr;
} php_libxml_cache_tag;
/* extends php_libxml_node_ptr */
typedef struct {
php_libxml_node_ptr node_ptr;
php_libxml_cache_tag cache_tag;
} php_libxml_doc_ptr;
typedef struct _php_libxml_node_object {
php_libxml_node_ptr *node;
php_libxml_ref_obj *document;
@ -81,6 +91,27 @@ static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object *
return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset);
}
static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_doc_ptr *doc_ptr)
{
#if SIZEOF_SIZE_T == 8
/* If one operation happens every nanosecond, then it would still require 584 years to overflow
* the counter. So we'll just assume this never happens. */
doc_ptr->cache_tag.modification_nr++;
#else
size_t new_modification_nr = doc_ptr->cache_tag.modification_nr + 1;
if (EXPECTED(new_modification_nr > 0)) { /* unsigned overflow; checking after addition results in one less instruction */
doc_ptr->cache_tag.modification_nr = new_modification_nr;
}
#endif
}
static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp)
{
if (docp && docp->_private) { /* docp is NULL for detached nodes */
php_libxml_invalidate_node_list_cache(docp->_private);
}
}
#define Z_LIBXML_NODE_P(zv) php_libxml_node_fetch_object(Z_OBJ_P((zv)))
typedef void * (*php_libxml_export_node) (zval *object);

View file

@ -442,6 +442,8 @@ long_dim:
GET_NODE(sxe, node);
php_libxml_invalidate_node_list_cache_from_doc(node->doc);
if (sxe->iter.type == SXE_ITER_ATTRLIST) {
attribs = 1;
elements = 0;
@ -813,6 +815,8 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements
GET_NODE(sxe, node);
php_libxml_invalidate_node_list_cache_from_doc(node->doc);
if (Z_TYPE_P(member) == IS_LONG) {
if (sxe->iter.type != SXE_ITER_ATTRLIST) {
attribs = 0;
@ -1686,6 +1690,8 @@ PHP_METHOD(SimpleXMLElement, addChild)
sxe = Z_SXEOBJ_P(ZEND_THIS);
GET_NODE(sxe, node);
php_libxml_invalidate_node_list_cache_from_doc(node->doc);
if (sxe->iter.type == SXE_ITER_ATTRLIST) {
php_error_docref(NULL, E_WARNING, "Cannot add element to attributes");
return;