Fix lowercase HTML attribute exceptions

Closes GH-17815.
This commit is contained in:
Niels Dossche 2025-02-15 16:21:54 +01:00
parent 3a4a320386
commit 74df3e0404
No known key found for this signature in database
GPG key ID: B8A8AD166DF0E2E5
4 changed files with 87 additions and 3 deletions

2
NEWS
View file

@ -21,6 +21,8 @@ PHP NEWS
- DOM:
. Fixed bug GH-17609 (Typo in error message: Dom\NO_DEFAULT_NS instead of
Dom\HTML_NO_DEFAULT_NS). (nielsdos)
. Fixed bug GH-17802 (\Dom\HTMLDocument querySelector attribute name is case
sensitive in HTML). (nielsdos)
- GD:
. Fixed bug GH-17703 (imagescale with both width and height negative values

View file

@ -99,8 +99,67 @@ static zend_always_inline dom_lxb_str_wrapper lxb_selectors_adapted_attr_value(c
return ret;
}
static bool lxb_selectors_attrib_name_cmp(const lxb_css_selector_t *selector, const char *name, size_t len)
{
return selector->name.length == len && lexbor_str_data_nlocmp_right((const lxb_char_t *) name, selector->name.data, len);
}
/* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors
* "Attribute selectors on an HTML element in an HTML document must treat the values of attributes with the following names as ASCII case-insensitive:" */
static bool lxb_selectors_is_lowercased_html_attrib_name(const lxb_css_selector_t *selector)
{
return lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept-charset"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("align"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("alink"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("axis"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("bgcolor"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("charset"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("checked"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("clear"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("codetype"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("color"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("compact"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("declare"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("defer"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("dir"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("direction"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("disabled"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("enctype"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("face"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("frame"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("hreflang"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("http-equiv"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("lang"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("language"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("link"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("media"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("method"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("multiple"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nohref"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noresize"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noshade"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nowrap"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("readonly"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rel"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rev"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rules"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scope"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scrolling"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("selected"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("shape"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("target"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("text"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("type"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valign"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valuetype"))
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("vlink"));
}
static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
{
entry->id.attr_case_insensitive = lxb_selectors_is_lowercased_html_attrib_name(selector);
if (node->doc != NULL && node->doc->dict != NULL) {
const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length);
if (interned != NULL) {
@ -1304,10 +1363,10 @@ lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src,
}
static bool
lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, const lexbor_str_t *trg, const lexbor_str_t *src)
lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, bool force_modifier_i, const lexbor_str_t *trg, const lexbor_str_t *src)
{
bool res;
bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I;
bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I || force_modifier_i;
switch (attr->match) {
case LXB_CSS_SELECTOR_MATCH_EQUAL: /* = */
@ -1419,7 +1478,13 @@ lxb_selectors_match_attribute(const lxb_css_selector_t *selector,
}
dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr);
bool res = lxb_selectors_match_attribute_value(attr, &trg.str, src);
ZEND_ASSERT(node->doc != NULL);
bool res = lxb_selectors_match_attribute_value(
attr,
entry->id.attr_case_insensitive && php_dom_ns_is_html_and_document_is_html(node),
&trg.str,
src
);
dom_lxb_str_wrapper_release(&trg);
return res;
}

View file

@ -78,6 +78,7 @@ typedef lxb_selectors_entry_t *
typedef struct {
const xmlChar *name;
bool interned;
bool attr_case_insensitive;
} lxb_selectors_adapted_id;
struct lxb_selectors_entry {

View file

@ -32,6 +32,18 @@ foreach ($dom->querySelectorAll('meta[charset]') as $entry) {
var_dump($dom->saveHtml($entry));
}
echo "--- charseT and lowercase value ---\n";
foreach ($dom->querySelectorAll('meta[charseT="windows-1252"]') as $entry) {
var_dump($dom->saveHtml($entry));
}
echo "--- charset and lowercase value ---\n";
foreach ($dom->querySelectorAll('meta[charset="windows-1252"]') as $entry) {
var_dump($dom->saveHtml($entry));
}
?>
--EXPECT--
<html><head>
@ -45,3 +57,7 @@ string(29) "<meta charset="Windows-1252">"
--- charset ---
string(29) "<meta charset="Windows-1252">"
string(25) "<meta charset="x"></meta>"
--- charseT and lowercase value ---
string(29) "<meta charset="Windows-1252">"
--- charset and lowercase value ---
string(29) "<meta charset="Windows-1252">"