php-src/ext/dom/php_dom.h
Niels Dossche c3f0797385
Implement iteration cache, item cache and length cache for node list iteration (#11330)
* Implement iteration cache, item cache and length cache for node list iteration

The current implementation follows the spec requirement that the list
must be "live". This means that changes in the document must be
reflected in the existing node lists without requiring the user to
refetch the node list.
The consequence is that getting any item, or the length of the list,
always starts searching from the root element of the node list. This
results in O(n) time to get any item or the length. If there's a for
loop over the node list, this means the iterations will take O(n²) time
in total. This causes real-world performance issues with potential for
downtime (see GH-11308 and its references for details).

We fix this by introducing a caching strategy. We cache the last
iterated object in the iterator, the last requested item in the node
list, and the last length computation. To invalidate the cache, we
simply count the number of modifications made to the containing
document. If the modification number does not match what the number was
during caching, we know the document has been modified and the cache is
invalid. If this ever overflows, we saturate the modification number and
don't do any caching anymore. Note that we don't check for overflow on
64-bit systems because it would take hundreds of years to overflow.

Fixes GH-11308.
2023-06-03 00:13:14 +02:00

194 lines
7.3 KiB
C

/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Christian Stocker <chregu@php.net> |
| Rob Richards <rrichards@php.net> |
| Marcus Borger <helly@php.net> |
+----------------------------------------------------------------------+
*/
#ifndef PHP_DOM_H
#define PHP_DOM_H
extern zend_module_entry dom_module_entry;
#define phpext_dom_ptr &dom_module_entry
#ifdef ZTS
#include "TSRM.h"
#endif
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/tree.h>
#include <libxml/uri.h>
#include <libxml/xmlerror.h>
#include <libxml/xinclude.h>
#include <libxml/hash.h>
#include <libxml/c14n.h>
#ifdef LIBXML_HTML_ENABLED
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#endif
#ifdef LIBXML_XPATH_ENABLED
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#endif
#ifdef LIBXML_XPTR_ENABLED
#include <libxml/xpointer.h>
#endif
#ifdef PHP_WIN32
#ifndef DOM_EXPORTS
#define DOM_EXPORTS
#endif
#endif
#include "xml_common.h"
#include "ext/libxml/php_libxml.h"
#include "zend_exceptions.h"
#include "dom_ce.h"
/* DOM API_VERSION, please bump it up, if you change anything in the API
therefore it's easier for the script-programmers to check, what's working how
Can be checked with phpversion("dom");
*/
#define DOM_API_VERSION "20031129"
/* Define a custom type for iterating using an unused nodetype */
#define DOM_NODESET XML_XINCLUDE_START
typedef struct _dom_xpath_object {
int registerPhpFunctions;
int register_node_ns;
HashTable *registered_phpfunctions;
HashTable *node_list;
dom_object dom;
} dom_xpath_object;
static inline dom_xpath_object *php_xpath_obj_from_obj(zend_object *obj) {
return (dom_xpath_object*)((char*)(obj)
- XtOffsetOf(dom_xpath_object, dom) - XtOffsetOf(dom_object, std));
}
#define Z_XPATHOBJ_P(zv) php_xpath_obj_from_obj(Z_OBJ_P((zv)))
typedef struct _dom_nnodemap_object {
dom_object *baseobj;
zval baseobj_zv;
int nodetype;
int cached_length;
xmlHashTable *ht;
xmlChar *local;
xmlChar *ns;
php_libxml_cache_tag cache_tag;
dom_object *cached_obj;
int cached_obj_index;
bool free_local : 1;
bool free_ns : 1;
} dom_nnodemap_object;
typedef struct {
zend_object_iterator intern;
zval curobj;
HashPosition pos;
php_libxml_cache_tag cache_tag;
} php_dom_iterator;
#include "domexception.h"
dom_object *dom_object_get_data(xmlNodePtr obj);
dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document);
libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document);
zend_object *dom_objects_new(zend_class_entry *class_type);
zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type);
#ifdef LIBXML_XPATH_ENABLED
zend_object *dom_xpath_objects_new(zend_class_entry *class_type);
#endif
int dom_get_strict_error(php_libxml_ref_obj *document);
void php_dom_throw_error(int error_code, int strict_error);
void php_dom_throw_error_with_message(int error_code, char *error_message, int strict_error);
void node_list_unlink(xmlNodePtr node);
int dom_check_qname(char *qname, char **localname, char **prefix, int uri_len, int name_len);
xmlNsPtr dom_get_ns(xmlNodePtr node, char *uri, int *errorcode, char *prefix);
void dom_set_old_ns(xmlDoc *doc, xmlNs *ns);
void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep);
xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName);
void dom_normalize (xmlNodePtr nodep);
xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index);
void php_dom_create_implementation(zval *retval);
int dom_hierarchy(xmlNodePtr parent, xmlNodePtr child);
bool dom_has_feature(zend_string *feature, zend_string *version);
int dom_node_is_read_only(xmlNodePtr node);
int dom_node_children_valid(xmlNodePtr node);
void php_dom_create_iterator(zval *return_value, int ce_type);
void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len);
xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID);
xmlNode *php_dom_libxml_hash_iter(xmlHashTable *ht, int index);
xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index);
zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, int by_ref);
void dom_set_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece, zend_class_entry *ce);
void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc);
void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc);
void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc);
void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc);
void dom_child_node_remove(dom_object *context);
#define DOM_GET_OBJ(__ptr, __id, __prtype, __intern) { \
__intern = Z_DOMOBJ_P(__id); \
if (__intern->ptr == NULL || !(__ptr = (__prtype)((php_libxml_node_ptr *)__intern->ptr)->node)) { \
zend_throw_error(NULL, "Couldn't fetch %s", ZSTR_VAL(__intern->std.ce->name));\
RETURN_THROWS();\
} \
}
#define DOM_NO_ARGS() \
if (zend_parse_parameters_none() == FAILURE) { \
RETURN_THROWS(); \
}
#define DOM_NOT_IMPLEMENTED() \
zend_throw_error(NULL, "Not yet implemented"); \
RETURN_THROWS();
#define DOM_NODELIST 0
#define DOM_NAMEDNODEMAP 1
static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr)
{
ZEND_ASSERT(cache_tag != NULL);
ZEND_ASSERT(doc_ptr != NULL);
/* See overflow comment in php_libxml_invalidate_node_list_cache(). */
#if SIZEOF_SIZE_T == 8
return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr;
#else
return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr || UNEXPECTED(doc_ptr->cache_tag.modification_nr == SIZE_MAX);
#endif
}
static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
{
ZEND_ASSERT(node != NULL);
return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private);
}
static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
{
ZEND_ASSERT(cache_tag != NULL);
if (node->doc && node->doc->_private) {
const php_libxml_doc_ptr* doc_ptr = node->doc->_private;
cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr;
}
}
PHP_MINIT_FUNCTION(dom);
PHP_MSHUTDOWN_FUNCTION(dom);
PHP_MINFO_FUNCTION(dom);
#endif /* PHP_DOM_H */