diff --git a/ext/dom/config.m4 b/ext/dom/config.m4 index 4dcde6105a5..0db013b8689 100644 --- a/ext/dom/config.m4 +++ b/ext/dom/config.m4 @@ -25,7 +25,7 @@ if test "$PHP_DOM" != "no"; then $LEXBOR_DIR/selectors/selectors.c \ $LEXBOR_DIR/ns/ns.c \ $LEXBOR_DIR/tag/tag.c" - PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \ + PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \ xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \ domexception.c parentnode.c \ processinginstruction.c cdatasection.c \ diff --git a/ext/dom/config.w32 b/ext/dom/config.w32 index 1a5d33bf7ca..02a7e0a9409 100644 --- a/ext/dom/config.w32 +++ b/ext/dom/config.w32 @@ -7,7 +7,7 @@ if (PHP_DOM == "yes") { ADD_EXTENSION_DEP('dom', 'libxml') && CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2") ) { - EXTENSION("dom", "php_dom.c attr.c document.c \ + EXTENSION("dom", "php_dom.c attr.c document.c infra.c \ xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \ domexception.c parentnode.c processinginstruction.c \ cdatasection.c documentfragment.c domimplementation.c element.c \ diff --git a/ext/dom/dom_properties.h b/ext/dom/dom_properties.h index 1f8d1e44f34..039f7fd260e 100644 --- a/ext/dom/dom_properties.h +++ b/ext/dom/dom_properties.h @@ -65,6 +65,7 @@ zend_result dom_html_document_encoding_write(dom_object *obj, zval *retval); zend_result dom_html_document_body_read(dom_object *obj, zval *retval); zend_result dom_html_document_body_write(dom_object *obj, zval *newval); zend_result dom_html_document_head_read(dom_object *obj, zval *retval); +zend_result dom_html_document_title_read(dom_object *obj, zval *retval); /* documenttype properties */ zend_result dom_documenttype_name_read(dom_object *obj, zval *retval); diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c index e4fdf248a89..d487d04b7fb 100644 --- a/ext/dom/html_document.c +++ b/ext/dom/html_document.c @@ -21,6 +21,7 @@ #include "php.h" #if defined(HAVE_LIBXML) && defined(HAVE_DOM) #include "php_dom.h" +#include "infra.h" #include "html5_parser.h" #include "html5_serializer.h" #include "namespace_compat.h" @@ -1458,4 +1459,82 @@ zend_result dom_html_document_body_write(dom_object *obj, zval *newval) return FAILURE; } +/* https://dom.spec.whatwg.org/#concept-child-text-content */ +static zend_string *dom_get_child_text_content(const xmlNode *node) +{ + smart_str content = {0}; + + const xmlNode *text = node->children; + while (text != NULL) { + if (text->type == XML_TEXT_NODE || text->type == XML_CDATA_SECTION_NODE) { + smart_str_appends(&content, (const char *) text->content); + } + text = text->next; + } + + return smart_str_extract(&content); +} + +/* https://html.spec.whatwg.org/#the-title-element-2 */ +static const xmlNode *dom_get_title_element(const xmlDoc *doc) +{ + const xmlNode *node = doc->children; + + while (node != NULL) { + if (node->type == XML_ELEMENT_NODE) { + if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) && xmlStrEqual(node->name, BAD_CAST "title")) { + break; + } + } + + node = php_dom_next_in_tree_order(node, NULL); + } + + return node; +} + +/* https://html.spec.whatwg.org/#document.title */ +zend_result dom_html_document_title_read(dom_object *obj, zval *retval) +{ + DOM_PROP_NODE(const xmlDoc *, docp, obj); + const xmlNode *root = xmlDocGetRootElement(docp); + + if (root == NULL) { + ZVAL_EMPTY_STRING(retval); + return SUCCESS; + } + + zend_string *value = zend_empty_string; + + /* 1. If the document element is an SVG svg element, + * then let value be the child text content of the first SVG title element that is a child of the document element. */ + if (php_dom_ns_is_fast(root, php_dom_ns_is_svg_magic_token) && xmlStrEqual(root->name, BAD_CAST "svg")) { + const xmlNode *cur = root->children; + + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE + && php_dom_ns_is_fast(cur, php_dom_ns_is_svg_magic_token) && xmlStrEqual(cur->name, BAD_CAST "title")) { + value = dom_get_child_text_content(cur); + break; + } + cur = cur->next; + } + } else { + /* 2. Otherwise, let value be the child text content of the title element, + * or the empty string if the title element is null. */ + const xmlNode *title = dom_get_title_element(docp); + if (title != NULL) { + value = dom_get_child_text_content(title); + } + } + + /* 3. Strip and collapse ASCII whitespace in value. */ + value = dom_strip_and_collapse_ascii_whitespace(value); + + /* 4. Return value. */ + ZVAL_STR(retval, value); + + return SUCCESS; +} + #endif /* HAVE_LIBXML && HAVE_DOM */ diff --git a/ext/dom/infra.c b/ext/dom/infra.c new file mode 100644 index 00000000000..8fa42453c75 --- /dev/null +++ b/ext/dom/infra.c @@ -0,0 +1,77 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Niels Dossche | + +----------------------------------------------------------------------+ +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#if defined(HAVE_LIBXML) && defined(HAVE_DOM) +#include "infra.h" + +/* https://infra.spec.whatwg.org/#ascii-whitespace */ +const char *ascii_whitespace = "\x09\x0A\x0C\x0D\x20"; + +/* https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace */ +zend_string *dom_strip_and_collapse_ascii_whitespace(zend_string *input) +{ + if (input == zend_empty_string) { + return input; + } + + ZEND_ASSERT(!ZSTR_IS_INTERNED(input)); + ZEND_ASSERT(GC_REFCOUNT(input) == 1); + + char *write_ptr = ZSTR_VAL(input); + + const char *start = ZSTR_VAL(input); + const char *current = start; + const char *end = current + ZSTR_LEN(input); + + current += strspn(current, ascii_whitespace); + + while (current < end) { + /* Copy non-whitespace */ + size_t non_whitespace_len = strcspn(current, ascii_whitespace); + /* If the pointers are equal, we still haven't encountered collapsable or strippable whitespace. */ + if (write_ptr != current) { + memmove(write_ptr, current, non_whitespace_len); + } + current += non_whitespace_len; + write_ptr += non_whitespace_len; + + /* Skip whitespace */ + current += strspn(current, ascii_whitespace); + if (current < end) { + /* Only make a space when we're not yet at the end of the input, because that means more non-whitespace + * input is to come. */ + *write_ptr++ = ' '; + } + } + + *write_ptr = '\0'; + + size_t len = write_ptr - start; + if (len != ZSTR_LEN(input)) { + return zend_string_truncate(input, len, false); + } else { + /* Forget the hash value since we may have transformed non-space-whitespace into spaces. */ + zend_string_forget_hash_val(input); + return input; + } +} + +#endif /* HAVE_LIBXML && HAVE_DOM */ diff --git a/ext/dom/infra.h b/ext/dom/infra.h new file mode 100644 index 00000000000..d84ad5a2a0e --- /dev/null +++ b/ext/dom/infra.h @@ -0,0 +1,26 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Niels Dossche | + +----------------------------------------------------------------------+ +*/ + +#ifndef INFRA_H +#define INFRA_H + +#include "zend_string.h" + +extern const char *ascii_whitespace; + +zend_string *dom_strip_and_collapse_ascii_whitespace(zend_string *input); + +#endif diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 3c26afd7780..44fa0a5489d 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -853,6 +853,7 @@ PHP_MINIT_FUNCTION(dom) DOM_REGISTER_PROP_HANDLER(&dom_abstract_base_document_prop_handlers, "childElementCount", dom_parent_node_child_element_count, NULL); DOM_REGISTER_PROP_HANDLER(&dom_abstract_base_document_prop_handlers, "body", dom_html_document_body_read, dom_html_document_body_write); DOM_REGISTER_PROP_HANDLER(&dom_abstract_base_document_prop_handlers, "head", dom_html_document_head_read, NULL); + DOM_REGISTER_PROP_HANDLER(&dom_abstract_base_document_prop_handlers, "title", dom_html_document_title_read, NULL); zend_hash_merge(&dom_abstract_base_document_prop_handlers, &dom_modern_node_prop_handlers, NULL, false); /* No need to register in &classes because this is an abstract class handler. */ diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index a21b818b899..e2d374b7efa 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -1584,6 +1584,7 @@ namespace Dom public ?Element $body; /** @readonly */ public ?Element $head; + public string $title; } final class HTMLDocument extends Document diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index b364952687e..c3cc6187575 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 0795d4e52f62ab33df92cfbdd5178223fbfc3eeb */ + * Stub hash: 7a2c28838f431eff28dea8cc5356dbcd38921592 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) @@ -3456,6 +3456,12 @@ static zend_class_entry *register_class_Dom_Document(zend_class_entry *class_ent zend_declare_typed_property(class_entry, property_head_name, &property_head_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_head_class_Dom_Element, 0, MAY_BE_NULL)); zend_string_release(property_head_name); + zval property_title_default_value; + ZVAL_UNDEF(&property_title_default_value); + zend_string *property_title_name = zend_string_init("title", sizeof("title") - 1, 1); + zend_declare_typed_property(class_entry, property_title_name, &property_title_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING)); + zend_string_release(property_title_name); + return class_entry; } diff --git a/ext/dom/tests/modern/common/Document_title_getter.phpt b/ext/dom/tests/modern/common/Document_title_getter.phpt new file mode 100644 index 00000000000..eb4f4b7cdb5 --- /dev/null +++ b/ext/dom/tests/modern/common/Document_title_getter.phpt @@ -0,0 +1,79 @@ +--TEST-- +Dom\Document::$title getter +--EXTENSIONS-- +dom +--FILE-- +A normal title without collapsable or strippable whitespace"); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString(" only ws at front"); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString("only ws at back "); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString("
first
second
"); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString("title"); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString(" abc def ghi "); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString(""); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString(""); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString(" \t\r\n "); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString(" \tx<?y y?><![CDATA[z]]>\n "); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString("<div><!-- comment -->x</div>y<p>z</p>w"); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString("title\nhere"); +var_dump($dom->title); + +echo "=== SVG namespaced root ===\n"; + +$dom = Dom\XMLDocument::createFromString("title"); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString("title"); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString("titlehi"); +var_dump($dom->title); + +$dom = Dom\XMLDocument::createFromString(""); +var_dump($dom->title); + +?> +--EXPECT-- +=== HTML namespaced root === +string(59) "A normal title without collapsable or strippable whitespace" +string(16) "only ws at front" +string(15) "only ws at back" +string(5) "first" +string(5) "title" +string(11) "abc def ghi" +string(0) "" +string(0) "" +string(0) "" +string(2) "xz" +string(2) "yw" +string(10) "title here" +=== SVG namespaced root === +string(5) "title" +string(5) "title" +string(5) "title" +string(0) "" diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_01.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_01.phpt index 593dae8e1f2..cdd98b58526 100644 --- a/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_01.phpt +++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_01.phpt @@ -23,7 +23,7 @@ var_dump(get_class($dom->getElementsByTagName("p")->item(0))); ?> --EXPECT-- -object(Dom\HTMLDocument)#1 (27) { +object(Dom\HTMLDocument)#1 (28) { ["implementation"]=> string(22) "(object value omitted)" ["URL"]=> @@ -50,6 +50,8 @@ object(Dom\HTMLDocument)#1 (27) { string(22) "(object value omitted)" ["head"]=> string(22) "(object value omitted)" + ["title"]=> + string(0) "" ["nodeType"]=> int(13) ["nodeName"]=> diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_02.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_02.phpt index b2620564778..b216b399a16 100644 --- a/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_02.phpt +++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_02.phpt @@ -23,7 +23,7 @@ var_dump(get_class($dom->getElementsByTagName("p")->item(0))); ?> --EXPECT-- -object(Dom\HTMLDocument)#1 (27) { +object(Dom\HTMLDocument)#1 (28) { ["implementation"]=> string(22) "(object value omitted)" ["URL"]=> @@ -50,6 +50,8 @@ object(Dom\HTMLDocument)#1 (27) { string(22) "(object value omitted)" ["head"]=> string(22) "(object value omitted)" + ["title"]=> + string(0) "" ["nodeType"]=> int(13) ["nodeName"]=> diff --git a/ext/dom/tests/modern/spec/Document_implementation_createDocument.phpt b/ext/dom/tests/modern/spec/Document_implementation_createDocument.phpt index 37d73c5d40a..ae49a6a494c 100644 --- a/ext/dom/tests/modern/spec/Document_implementation_createDocument.phpt +++ b/ext/dom/tests/modern/spec/Document_implementation_createDocument.phpt @@ -37,7 +37,7 @@ echo $dom->implementation->createDocument(null, "", $dtd)->saveXml(), "\n"; ?> --EXPECT-- --- (null, "") --- -object(Dom\XMLDocument)#3 (31) { +object(Dom\XMLDocument)#3 (32) { ["xmlEncoding"]=> string(5) "UTF-8" ["xmlStandalone"]=> @@ -72,6 +72,8 @@ object(Dom\XMLDocument)#3 (31) { NULL ["head"]=> NULL + ["title"]=> + string(0) "" ["nodeType"]=> int(9) ["nodeName"]=> diff --git a/ext/dom/tests/modern/xml/XMLDocument_debug.phpt b/ext/dom/tests/modern/xml/XMLDocument_debug.phpt index a0f6a528389..e2d6ebffe89 100644 --- a/ext/dom/tests/modern/xml/XMLDocument_debug.phpt +++ b/ext/dom/tests/modern/xml/XMLDocument_debug.phpt @@ -10,7 +10,7 @@ var_dump($dom); ?> --EXPECT-- -object(Dom\XMLDocument)#1 (31) { +object(Dom\XMLDocument)#1 (32) { ["xmlEncoding"]=> string(5) "UTF-8" ["xmlStandalone"]=> @@ -45,6 +45,8 @@ object(Dom\XMLDocument)#1 (31) { NULL ["head"]=> NULL + ["title"]=> + string(0) "" ["nodeType"]=> int(9) ["nodeName"]=> diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_02.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_02.phpt index b8a87e5ba4e..62d64a05f9b 100644 --- a/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_02.phpt +++ b/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_02.phpt @@ -10,7 +10,7 @@ var_dump($dom); ?> --EXPECT-- -object(Dom\XMLDocument)#1 (31) { +object(Dom\XMLDocument)#1 (32) { ["xmlEncoding"]=> string(5) "UTF-8" ["xmlStandalone"]=> @@ -45,6 +45,8 @@ object(Dom\XMLDocument)#1 (31) { NULL ["head"]=> NULL + ["title"]=> + string(0) "" ["nodeType"]=> int(9) ["nodeName"]=> diff --git a/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt b/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt index 4f3308343e9..e18c43f05ae 100644 --- a/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt +++ b/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt @@ -13,7 +13,7 @@ var_dump($element->ownerDocument); ?> --EXPECTF-- -object(Dom\XMLDocument)#1 (31) { +object(Dom\XMLDocument)#1 (32) { ["xmlEncoding"]=> string(5) "UTF-8" ["xmlStandalone"]=> @@ -48,6 +48,8 @@ object(Dom\XMLDocument)#1 (31) { NULL ["head"]=> NULL + ["title"]=> + string(0) "" ["nodeType"]=> int(9) ["nodeName"]=>