From e878b9f39094acaa8da5a782618ab41f5293f30b Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 30 Apr 2024 20:29:37 +0200 Subject: [PATCH 1/3] Fix crashes when entity declaration is removed while still having entity references libxml doesn't do reference counting inside its node types. It's possible to remove an entity declaration out of the document, but then entity references will keep pointing to that stale declaration. This will cause crashes. One idea would be to check when a declaration is removed, to trigger a hook that updates all references. However this means we have to keep track of all references somehow, which would be a high-overhead solution. The solution in this patch makes sure that the fields are always updated before they are read. Closes GH-14089. --- NEWS | 3 + ext/dom/dom_properties.h | 5 ++ ext/dom/entityreference.c | 60 ++++++++++++++++++++ ext/dom/nodelist.c | 14 ++++- ext/dom/php_dom.c | 19 ++++++- ext/dom/php_dom.h | 1 + ext/dom/tests/entity_reference_stale_01.phpt | 41 +++++++++++++ ext/dom/tests/entity_reference_stale_02.phpt | 35 ++++++++++++ 8 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 ext/dom/tests/entity_reference_stale_01.phpt create mode 100644 ext/dom/tests/entity_reference_stale_02.phpt diff --git a/NEWS b/NEWS index 4c17d6e6624..58036146250 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.2.20 +- DOM: + . Fix crashes when entity declaration is removed while still having entity + references. (nielsdos) 09 May 2024, PHP 8.2.19 diff --git a/ext/dom/dom_properties.h b/ext/dom/dom_properties.h index 32f24193238..9c4e2f5ee57 100644 --- a/ext/dom/dom_properties.h +++ b/ext/dom/dom_properties.h @@ -81,6 +81,11 @@ int dom_entity_actual_encoding_read(dom_object *obj, zval *retval); int dom_entity_encoding_read(dom_object *obj, zval *retval); int dom_entity_version_read(dom_object *obj, zval *retval); +/* entity reference properties */ +int dom_entity_reference_child_read(dom_object *obj, zval *retval); +int dom_entity_reference_text_content_read(dom_object *obj, zval *retval); +int dom_entity_reference_child_nodes_read(dom_object *obj, zval *retval); + /* namednodemap properties */ int dom_namednodemap_length_read(dom_object *obj, zval *retval); diff --git a/ext/dom/entityreference.c b/ext/dom/entityreference.c index 61f0b92eedc..30928b1c6d1 100644 --- a/ext/dom/entityreference.c +++ b/ext/dom/entityreference.c @@ -22,6 +22,7 @@ #include "php.h" #if defined(HAVE_LIBXML) && defined(HAVE_DOM) #include "php_dom.h" +#include "dom_properties.h" /* * class DOMEntityReference extends DOMNode @@ -65,4 +66,63 @@ PHP_METHOD(DOMEntityReference, __construct) } /* }}} end DOMEntityReference::__construct */ +/* The following property handlers are necessary because of special lifetime management with entities and entity + * references. The issue is that entity references hold a reference to an entity declaration, but don't + * register that reference anywhere. When the entity declaration disappears we have no way of notifying the + * entity references. Override the property handlers for the declaration-accessing properties to fix this problem. */ + +xmlEntityPtr dom_entity_reference_fetch_and_sync_declaration(xmlNodePtr reference) +{ + xmlEntityPtr entity = xmlGetDocEntity(reference->doc, reference->name); + reference->children = (xmlNodePtr) entity; + reference->last = (xmlNodePtr) entity; + reference->content = entity ? entity->content : NULL; + return entity; +} + +int dom_entity_reference_child_read(dom_object *obj, zval *retval) +{ + xmlNodePtr nodep = dom_object_get_node(obj); + + if (nodep == NULL) { + php_dom_throw_error(INVALID_STATE_ERR, true); + return FAILURE; + } + + xmlEntityPtr entity = dom_entity_reference_fetch_and_sync_declaration(nodep); + if (entity == NULL) { + ZVAL_NULL(retval); + return SUCCESS; + } + + php_dom_create_object((xmlNodePtr) entity, retval, obj); + return SUCCESS; +} + +int dom_entity_reference_text_content_read(dom_object *obj, zval *retval) +{ + xmlNodePtr nodep = dom_object_get_node(obj); + + if (nodep == NULL) { + php_dom_throw_error(INVALID_STATE_ERR, true); + return FAILURE; + } + + dom_entity_reference_fetch_and_sync_declaration(nodep); + return dom_node_text_content_read(obj, retval); +} + +int dom_entity_reference_child_nodes_read(dom_object *obj, zval *retval) +{ + xmlNodePtr nodep = dom_object_get_node(obj); + + if (nodep == NULL) { + php_dom_throw_error(INVALID_STATE_ERR, true); + return FAILURE; + } + + dom_entity_reference_fetch_and_sync_declaration(nodep); + return dom_node_child_nodes_read(obj, retval); +} + #endif diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index 20f6320a546..a8e713f67d0 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -31,6 +31,16 @@ * Since: */ +static xmlNodePtr dom_nodelist_iter_start_first_child(xmlNodePtr nodep) +{ + if (nodep->type == XML_ENTITY_REF_NODE) { + /* See entityreference.c */ + dom_entity_reference_fetch_and_sync_declaration(nodep); + } + + return nodep->children; +} + int php_dom_get_nodelist_length(dom_object *obj) { dom_nnodemap_object *objmap = (dom_nnodemap_object *) obj->ptr; @@ -54,7 +64,7 @@ int php_dom_get_nodelist_length(dom_object *obj) int count = 0; if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { - xmlNodePtr curnode = nodep->children; + xmlNodePtr curnode = dom_nodelist_iter_start_first_child(nodep); if (curnode) { count++; while (curnode->next != NULL) { @@ -128,7 +138,7 @@ void php_dom_nodelist_get_item_into_zval(dom_nnodemap_object *objmap, zend_long if (nodep) { int count = 0; if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { - xmlNodePtr curnode = nodep->children; + xmlNodePtr curnode = dom_nodelist_iter_start_first_child(nodep); while (count < index && curnode != NULL) { count++; curnode = curnode->next; diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index c86cab99c6c..c9123ab14df 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -72,6 +72,7 @@ static HashTable classes; static HashTable dom_document_prop_handlers; static HashTable dom_documentfragment_prop_handlers; static HashTable dom_node_prop_handlers; +static HashTable dom_entity_reference_prop_handlers; static HashTable dom_nodelist_prop_handlers; static HashTable dom_namednodemap_prop_handlers; static HashTable dom_characterdata_prop_handlers; @@ -284,6 +285,14 @@ static void dom_register_prop_handler(HashTable *prop_handler, char *name, size_ zend_string_release_ex(str, 1); } +static void dom_override_prop_handler(HashTable *prop_handler, char *name, size_t name_len, dom_read_t read_func, dom_write_t write_func) +{ + dom_prop_handler hnd; + hnd.read_func = read_func; + hnd.write_func = write_func; + zend_hash_str_update_mem(prop_handler, name, name_len, &hnd, sizeof(dom_prop_handler)); +} + static zval *dom_get_property_ptr_ptr(zend_object *object, zend_string *name, int type, void **cache_slot) { dom_object *obj = php_dom_obj_from_obj(object); @@ -807,7 +816,14 @@ PHP_MINIT_FUNCTION(dom) dom_entityreference_class_entry = register_class_DOMEntityReference(dom_node_class_entry); dom_entityreference_class_entry->create_object = dom_objects_new; - zend_hash_add_ptr(&classes, dom_entityreference_class_entry->name, &dom_node_prop_handlers); + + zend_hash_init(&dom_entity_reference_prop_handlers, 0, NULL, dom_dtor_prop_handler, true); + zend_hash_merge(&dom_entity_reference_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, false); + dom_override_prop_handler(&dom_entity_reference_prop_handlers, "firstChild", sizeof("firstChild")-1, dom_entity_reference_child_read, NULL); + dom_override_prop_handler(&dom_entity_reference_prop_handlers, "lastChild", sizeof("lastChild")-1, dom_entity_reference_child_read, NULL); + dom_override_prop_handler(&dom_entity_reference_prop_handlers, "textContent", sizeof("textContent")-1, dom_entity_reference_text_content_read, NULL); + dom_override_prop_handler(&dom_entity_reference_prop_handlers, "childNodes", sizeof("childNodes")-1, dom_entity_reference_child_nodes_read, NULL); + zend_hash_add_ptr(&classes, dom_entityreference_class_entry->name, &dom_entity_reference_prop_handlers); dom_processinginstruction_class_entry = register_class_DOMProcessingInstruction(dom_node_class_entry); dom_processinginstruction_class_entry->create_object = dom_objects_new; @@ -869,6 +885,7 @@ PHP_MSHUTDOWN_FUNCTION(dom) /* {{{ */ zend_hash_destroy(&dom_document_prop_handlers); zend_hash_destroy(&dom_documentfragment_prop_handlers); zend_hash_destroy(&dom_node_prop_handlers); + zend_hash_destroy(&dom_entity_reference_prop_handlers); zend_hash_destroy(&dom_namespace_node_prop_handlers); zend_hash_destroy(&dom_nodelist_prop_handlers); zend_hash_destroy(&dom_namednodemap_prop_handlers); diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 9a57996729d..ccf665c417c 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -139,6 +139,7 @@ xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index); zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, int by_ref); void dom_set_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece, zend_class_entry *ce); xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr original, zval *return_value, dom_object *parent_intern); +xmlEntityPtr dom_entity_reference_fetch_and_sync_declaration(xmlNodePtr reference); void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc); void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc); diff --git a/ext/dom/tests/entity_reference_stale_01.phpt b/ext/dom/tests/entity_reference_stale_01.phpt new file mode 100644 index 00000000000..dc1828c3cd9 --- /dev/null +++ b/ext/dom/tests/entity_reference_stale_01.phpt @@ -0,0 +1,41 @@ +--TEST-- +Entity references with stale entity declaration 01 +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< +]> +&foo; +XML); + +$ref = $dom->documentElement->firstChild; +$decl = $ref->firstChild; + +$nodes = $ref->childNodes; +$dom->removeChild($dom->doctype); +unset($decl); + +var_dump($nodes); +var_dump($ref->firstChild); +var_dump($ref->lastChild); +var_dump($ref->textContent); +var_dump($ref->childNodes); + +?> +--EXPECT-- +object(DOMNodeList)#4 (1) { + ["length"]=> + int(0) +} +NULL +NULL +string(0) "" +object(DOMNodeList)#2 (1) { + ["length"]=> + int(0) +} diff --git a/ext/dom/tests/entity_reference_stale_02.phpt b/ext/dom/tests/entity_reference_stale_02.phpt new file mode 100644 index 00000000000..ea93b3ca3ef --- /dev/null +++ b/ext/dom/tests/entity_reference_stale_02.phpt @@ -0,0 +1,35 @@ +--TEST-- +Entity references with stale entity declaration 02 +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + + +]> +&foo1; +XML); + +$ref = $dom->documentElement->firstChild; +$decl = $ref->firstChild; + +$nodes = $ref->childNodes; +$iter = $nodes->getIterator(); +$iter->next(); +$dom->removeChild($dom->doctype); +unset($decl); + +try { + $iter->current()->publicId; +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} + +?> +--EXPECT-- +Couldn't fetch DOMEntity. Node no longer exists From 30a0b0359ed8338c0e3acd1682de3cf96429e898 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 30 Apr 2024 20:50:30 +0200 Subject: [PATCH 2/3] Fix references not handled correctly in C14N Closes GH-14090. --- NEWS | 1 + ext/dom/node.c | 5 +-- ext/dom/tests/DOMNode_C14N_references.phpt | 41 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 ext/dom/tests/DOMNode_C14N_references.phpt diff --git a/NEWS b/NEWS index 58036146250..3a95dec8351 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ PHP NEWS - DOM: . Fix crashes when entity declaration is removed while still having entity references. (nielsdos) + . Fix references not handled correctly in C14N. (nielsdos) 09 May 2024, PHP 8.2.19 diff --git a/ext/dom/node.c b/ext/dom/node.c index 973505c5b01..82f40860cfd 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -1614,7 +1614,7 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ zval *tmp; char *xquery; - tmp = zend_hash_str_find(ht, "query", sizeof("query")-1); + tmp = zend_hash_str_find_deref(ht, "query", sizeof("query")-1); if (!tmp) { /* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */ zend_argument_value_error(3 + mode, "must have a \"query\" key"); @@ -1630,12 +1630,13 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ ctxp = xmlXPathNewContext(docp); ctxp->node = nodep; - tmp = zend_hash_str_find(ht, "namespaces", sizeof("namespaces")-1); + tmp = zend_hash_str_find_deref(ht, "namespaces", sizeof("namespaces")-1); if (tmp && Z_TYPE_P(tmp) == IS_ARRAY && !HT_IS_PACKED(Z_ARRVAL_P(tmp))) { zval *tmpns; zend_string *prefix; ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(tmp), prefix, tmpns) { + ZVAL_DEREF(tmpns); if (Z_TYPE_P(tmpns) == IS_STRING) { if (prefix) { xmlXPathRegisterNs(ctxp, (xmlChar *) ZSTR_VAL(prefix), (xmlChar *) Z_STRVAL_P(tmpns)); diff --git a/ext/dom/tests/DOMNode_C14N_references.phpt b/ext/dom/tests/DOMNode_C14N_references.phpt new file mode 100644 index 00000000000..514e22be636 --- /dev/null +++ b/ext/dom/tests/DOMNode_C14N_references.phpt @@ -0,0 +1,41 @@ +--TEST-- +Test: Canonicalization - C14N() with references +--EXTENSIONS-- +dom +--FILE-- + + + + + + + + +EOXML; + +$dom = new DOMDocument(); +$dom->loadXML($xml); +$doc = $dom->documentElement->firstChild; + +$xpath = [ + 'query' => '(//a:contain | //a:bar | .//namespace::*)', + 'namespaces' => ['a' => 'http://www.example.com/ns/foo'], +]; +$prefixes = ['test']; + +foreach ($xpath['namespaces'] as $k => &$v); +unset($v); +foreach ($xpath as $k => &$v); +unset($v); +foreach ($prefixes as $k => &$v); +unset($v); + +echo $doc->C14N(true, false, $xpath, $prefixes); +?> +--EXPECT-- + From 2dbe2d62b3785b26516a6450a74137477b4019e6 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 30 Apr 2024 21:09:29 +0200 Subject: [PATCH 3/3] Fix crash when calling childNodes next() when iterator is exhausted Closes GH-14091. --- NEWS | 2 ++ ext/dom/dom_iterators.c | 3 +-- ext/dom/tests/childNodes_current_crash.phpt | 25 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 ext/dom/tests/childNodes_current_crash.phpt diff --git a/NEWS b/NEWS index 3a95dec8351..f124c8e4ed5 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,8 @@ PHP NEWS . Fix crashes when entity declaration is removed while still having entity references. (nielsdos) . Fix references not handled correctly in C14N. (nielsdos) + . Fix crash when calling childNodes next() when iterator is exhausted. + (nielsdos) 09 May 2024, PHP 8.2.19 diff --git a/ext/dom/dom_iterators.c b/ext/dom/dom_iterators.c index 72c97104db0..670f08a679f 100644 --- a/ext/dom/dom_iterators.c +++ b/ext/dom/dom_iterators.c @@ -147,8 +147,7 @@ static int php_dom_iterator_valid(zend_object_iterator *iter) /* {{{ */ zval *php_dom_iterator_current_data(zend_object_iterator *iter) /* {{{ */ { php_dom_iterator *iterator = (php_dom_iterator *)iter; - - return &iterator->curobj; + return Z_ISUNDEF(iterator->curobj) ? NULL : &iterator->curobj; } /* }}} */ diff --git a/ext/dom/tests/childNodes_current_crash.phpt b/ext/dom/tests/childNodes_current_crash.phpt new file mode 100644 index 00000000000..aa93cf33a64 --- /dev/null +++ b/ext/dom/tests/childNodes_current_crash.phpt @@ -0,0 +1,25 @@ +--TEST-- +Crash in childNodes iterator current() +--EXTENSIONS-- +dom +--FILE-- +loadXML('foo1'); + +$nodes = $dom->documentElement->childNodes; +$iter = $nodes->getIterator(); + +var_dump($iter->valid()); +var_dump($iter->current()?->wholeText); +$iter->next(); +var_dump($iter->valid()); +var_dump($iter->current()?->wholeText); + +?> +--EXPECT-- +bool(true) +string(4) "foo1" +bool(false) +NULL