Implement Dom\Document::$title getter

This commit is contained in:
Niels Dossche 2024-04-07 14:24:01 +02:00
parent a12db3b656
commit 04af960397
16 changed files with 291 additions and 9 deletions

View file

@ -25,7 +25,7 @@ if test "$PHP_DOM" != "no"; then
$LEXBOR_DIR/selectors/selectors.c \
$LEXBOR_DIR/ns/ns.c \
$LEXBOR_DIR/tag/tag.c"
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c \
processinginstruction.c cdatasection.c \

View file

@ -7,7 +7,7 @@ if (PHP_DOM == "yes") {
ADD_EXTENSION_DEP('dom', 'libxml') &&
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
) {
EXTENSION("dom", "php_dom.c attr.c document.c \
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c \

View file

@ -65,6 +65,7 @@ zend_result dom_html_document_encoding_write(dom_object *obj, zval *retval);
zend_result dom_html_document_body_read(dom_object *obj, zval *retval);
zend_result dom_html_document_body_write(dom_object *obj, zval *newval);
zend_result dom_html_document_head_read(dom_object *obj, zval *retval);
zend_result dom_html_document_title_read(dom_object *obj, zval *retval);
/* documenttype properties */
zend_result dom_documenttype_name_read(dom_object *obj, zval *retval);

View file

@ -21,6 +21,7 @@
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "infra.h"
#include "html5_parser.h"
#include "html5_serializer.h"
#include "namespace_compat.h"
@ -1458,4 +1459,82 @@ zend_result dom_html_document_body_write(dom_object *obj, zval *newval)
return FAILURE;
}
/* https://dom.spec.whatwg.org/#concept-child-text-content */
static zend_string *dom_get_child_text_content(const xmlNode *node)
{
smart_str content = {0};
const xmlNode *text = node->children;
while (text != NULL) {
if (text->type == XML_TEXT_NODE || text->type == XML_CDATA_SECTION_NODE) {
smart_str_appends(&content, (const char *) text->content);
}
text = text->next;
}
return smart_str_extract(&content);
}
/* https://html.spec.whatwg.org/#the-title-element-2 */
static const xmlNode *dom_get_title_element(const xmlDoc *doc)
{
const xmlNode *node = doc->children;
while (node != NULL) {
if (node->type == XML_ELEMENT_NODE) {
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) && xmlStrEqual(node->name, BAD_CAST "title")) {
break;
}
}
node = php_dom_next_in_tree_order(node, NULL);
}
return node;
}
/* https://html.spec.whatwg.org/#document.title */
zend_result dom_html_document_title_read(dom_object *obj, zval *retval)
{
DOM_PROP_NODE(const xmlDoc *, docp, obj);
const xmlNode *root = xmlDocGetRootElement(docp);
if (root == NULL) {
ZVAL_EMPTY_STRING(retval);
return SUCCESS;
}
zend_string *value = zend_empty_string;
/* 1. If the document element is an SVG svg element,
* then let value be the child text content of the first SVG title element that is a child of the document element. */
if (php_dom_ns_is_fast(root, php_dom_ns_is_svg_magic_token) && xmlStrEqual(root->name, BAD_CAST "svg")) {
const xmlNode *cur = root->children;
while (cur != NULL) {
if (cur->type == XML_ELEMENT_NODE
&& php_dom_ns_is_fast(cur, php_dom_ns_is_svg_magic_token) && xmlStrEqual(cur->name, BAD_CAST "title")) {
value = dom_get_child_text_content(cur);
break;
}
cur = cur->next;
}
} else {
/* 2. Otherwise, let value be the child text content of the title element,
* or the empty string if the title element is null. */
const xmlNode *title = dom_get_title_element(docp);
if (title != NULL) {
value = dom_get_child_text_content(title);
}
}
/* 3. Strip and collapse ASCII whitespace in value. */
value = dom_strip_and_collapse_ascii_whitespace(value);
/* 4. Return value. */
ZVAL_STR(retval, value);
return SUCCESS;
}
#endif /* HAVE_LIBXML && HAVE_DOM */

77
ext/dom/infra.c Normal file
View file

@ -0,0 +1,77 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "infra.h"
/* https://infra.spec.whatwg.org/#ascii-whitespace */
const char *ascii_whitespace = "\x09\x0A\x0C\x0D\x20";
/* https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace */
zend_string *dom_strip_and_collapse_ascii_whitespace(zend_string *input)
{
if (input == zend_empty_string) {
return input;
}
ZEND_ASSERT(!ZSTR_IS_INTERNED(input));
ZEND_ASSERT(GC_REFCOUNT(input) == 1);
char *write_ptr = ZSTR_VAL(input);
const char *start = ZSTR_VAL(input);
const char *current = start;
const char *end = current + ZSTR_LEN(input);
current += strspn(current, ascii_whitespace);
while (current < end) {
/* Copy non-whitespace */
size_t non_whitespace_len = strcspn(current, ascii_whitespace);
/* If the pointers are equal, we still haven't encountered collapsable or strippable whitespace. */
if (write_ptr != current) {
memmove(write_ptr, current, non_whitespace_len);
}
current += non_whitespace_len;
write_ptr += non_whitespace_len;
/* Skip whitespace */
current += strspn(current, ascii_whitespace);
if (current < end) {
/* Only make a space when we're not yet at the end of the input, because that means more non-whitespace
* input is to come. */
*write_ptr++ = ' ';
}
}
*write_ptr = '\0';
size_t len = write_ptr - start;
if (len != ZSTR_LEN(input)) {
return zend_string_truncate(input, len, false);
} else {
/* Forget the hash value since we may have transformed non-space-whitespace into spaces. */
zend_string_forget_hash_val(input);
return input;
}
}
#endif /* HAVE_LIBXML && HAVE_DOM */

26
ext/dom/infra.h Normal file
View file

@ -0,0 +1,26 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifndef INFRA_H
#define INFRA_H
#include "zend_string.h"
extern const char *ascii_whitespace;
zend_string *dom_strip_and_collapse_ascii_whitespace(zend_string *input);
#endif

View file

@ -853,6 +853,7 @@ PHP_MINIT_FUNCTION(dom)
DOM_REGISTER_PROP_HANDLER(&dom_abstract_base_document_prop_handlers, "childElementCount", dom_parent_node_child_element_count, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_abstract_base_document_prop_handlers, "body", dom_html_document_body_read, dom_html_document_body_write);
DOM_REGISTER_PROP_HANDLER(&dom_abstract_base_document_prop_handlers, "head", dom_html_document_head_read, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_abstract_base_document_prop_handlers, "title", dom_html_document_title_read, NULL);
zend_hash_merge(&dom_abstract_base_document_prop_handlers, &dom_modern_node_prop_handlers, NULL, false);
/* No need to register in &classes because this is an abstract class handler. */

View file

@ -1584,6 +1584,7 @@ namespace Dom
public ?Element $body;
/** @readonly */
public ?Element $head;
public string $title;
}
final class HTMLDocument extends Document

View file

@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: 0795d4e52f62ab33df92cfbdd5178223fbfc3eeb */
* Stub hash: 7a2c28838f431eff28dea8cc5356dbcd38921592 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@ -3456,6 +3456,12 @@ static zend_class_entry *register_class_Dom_Document(zend_class_entry *class_ent
zend_declare_typed_property(class_entry, property_head_name, &property_head_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_head_class_Dom_Element, 0, MAY_BE_NULL));
zend_string_release(property_head_name);
zval property_title_default_value;
ZVAL_UNDEF(&property_title_default_value);
zend_string *property_title_name = zend_string_init("title", sizeof("title") - 1, 1);
zend_declare_typed_property(class_entry, property_title_name, &property_title_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING));
zend_string_release(property_title_name);
return class_entry;
}

View file

@ -0,0 +1,79 @@
--TEST--
Dom\Document::$title getter
--EXTENSIONS--
dom
--FILE--
<?php
echo "=== HTML namespaced root ===\n";
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title>A normal title without collapsable or strippable whitespace</title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title> only ws at front</title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title>only ws at back </title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><ignoreme/><div><title>first</title></div><title>second</title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<title xmlns=\"http://www.w3.org/1999/xhtml\">title</title>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title> abc def ghi </title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title></title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title> \t\r\n </title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title> \tx<?y y?><![CDATA[z]]>\n </title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title><div><!-- comment -->x</div>y<p>z</p>w</title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title>title\nhere</title></root>");
var_dump($dom->title);
echo "=== SVG namespaced root ===\n";
$dom = Dom\XMLDocument::createFromString("<root xmlns=\"http://www.w3.org/1999/xhtml\"><title>title</title></root>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<svg xmlns=\"http://www.w3.org/1999/xhtml\"><title xmlns=\"http://www.w3.org/1999/xhtml\">title</title></svg>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<svg xmlns=\"http://www.w3.org/1999/xhtml\"><title xmlns=\"http://www.w3.org/1999/xhtml\">title</title><foo/><title>hi</title></svg>");
var_dump($dom->title);
$dom = Dom\XMLDocument::createFromString("<svg xmlns=\"http://www.w3.org/1999/xhtml\"/>");
var_dump($dom->title);
?>
--EXPECT--
=== HTML namespaced root ===
string(59) "A normal title without collapsable or strippable whitespace"
string(16) "only ws at front"
string(15) "only ws at back"
string(5) "first"
string(5) "title"
string(11) "abc def ghi"
string(0) ""
string(0) ""
string(0) ""
string(2) "xz"
string(2) "yw"
string(10) "title here"
=== SVG namespaced root ===
string(5) "title"
string(5) "title"
string(5) "title"
string(0) ""

View file

@ -23,7 +23,7 @@ var_dump(get_class($dom->getElementsByTagName("p")->item(0)));
?>
--EXPECT--
object(Dom\HTMLDocument)#1 (27) {
object(Dom\HTMLDocument)#1 (28) {
["implementation"]=>
string(22) "(object value omitted)"
["URL"]=>
@ -50,6 +50,8 @@ object(Dom\HTMLDocument)#1 (27) {
string(22) "(object value omitted)"
["head"]=>
string(22) "(object value omitted)"
["title"]=>
string(0) ""
["nodeType"]=>
int(13)
["nodeName"]=>

View file

@ -23,7 +23,7 @@ var_dump(get_class($dom->getElementsByTagName("p")->item(0)));
?>
--EXPECT--
object(Dom\HTMLDocument)#1 (27) {
object(Dom\HTMLDocument)#1 (28) {
["implementation"]=>
string(22) "(object value omitted)"
["URL"]=>
@ -50,6 +50,8 @@ object(Dom\HTMLDocument)#1 (27) {
string(22) "(object value omitted)"
["head"]=>
string(22) "(object value omitted)"
["title"]=>
string(0) ""
["nodeType"]=>
int(13)
["nodeName"]=>

View file

@ -37,7 +37,7 @@ echo $dom->implementation->createDocument(null, "", $dtd)->saveXml(), "\n";
?>
--EXPECT--
--- (null, "") ---
object(Dom\XMLDocument)#3 (31) {
object(Dom\XMLDocument)#3 (32) {
["xmlEncoding"]=>
string(5) "UTF-8"
["xmlStandalone"]=>
@ -72,6 +72,8 @@ object(Dom\XMLDocument)#3 (31) {
NULL
["head"]=>
NULL
["title"]=>
string(0) ""
["nodeType"]=>
int(9)
["nodeName"]=>

View file

@ -10,7 +10,7 @@ var_dump($dom);
?>
--EXPECT--
object(Dom\XMLDocument)#1 (31) {
object(Dom\XMLDocument)#1 (32) {
["xmlEncoding"]=>
string(5) "UTF-8"
["xmlStandalone"]=>
@ -45,6 +45,8 @@ object(Dom\XMLDocument)#1 (31) {
NULL
["head"]=>
NULL
["title"]=>
string(0) ""
["nodeType"]=>
int(9)
["nodeName"]=>

View file

@ -10,7 +10,7 @@ var_dump($dom);
?>
--EXPECT--
object(Dom\XMLDocument)#1 (31) {
object(Dom\XMLDocument)#1 (32) {
["xmlEncoding"]=>
string(5) "UTF-8"
["xmlStandalone"]=>
@ -45,6 +45,8 @@ object(Dom\XMLDocument)#1 (31) {
NULL
["head"]=>
NULL
["title"]=>
string(0) ""
["nodeType"]=>
int(9)
["nodeName"]=>

View file

@ -13,7 +13,7 @@ var_dump($element->ownerDocument);
?>
--EXPECTF--
object(Dom\XMLDocument)#1 (31) {
object(Dom\XMLDocument)#1 (32) {
["xmlEncoding"]=>
string(5) "UTF-8"
["xmlStandalone"]=>
@ -48,6 +48,8 @@ object(Dom\XMLDocument)#1 (31) {
NULL
["head"]=>
NULL
["title"]=>
string(0) ""
["nodeType"]=>
int(9)
["nodeName"]=>