diff --git a/NEWS b/NEWS index 7fa72d486b5..c6bfbc62479 100644 --- a/NEWS +++ b/NEWS @@ -21,6 +21,8 @@ PHP NEWS - DOM: . Fixed bug GH-17609 (Typo in error message: Dom\NO_DEFAULT_NS instead of Dom\HTML_NO_DEFAULT_NS). (nielsdos) + . Fixed bug GH-17802 (\Dom\HTMLDocument querySelector attribute name is case + sensitive in HTML). (nielsdos) - GD: . Fixed bug GH-17703 (imagescale with both width and height negative values diff --git a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c index 7d99ee8e0bc..3a40318628f 100644 --- a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c +++ b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c @@ -99,8 +99,67 @@ static zend_always_inline dom_lxb_str_wrapper lxb_selectors_adapted_attr_value(c return ret; } +static bool lxb_selectors_attrib_name_cmp(const lxb_css_selector_t *selector, const char *name, size_t len) +{ + return selector->name.length == len && lexbor_str_data_nlocmp_right((const lxb_char_t *) name, selector->name.data, len); +} + +/* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors + * "Attribute selectors on an HTML element in an HTML document must treat the values of attributes with the following names as ASCII case-insensitive:" */ +static bool lxb_selectors_is_lowercased_html_attrib_name(const lxb_css_selector_t *selector) +{ + return lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept-charset")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("align")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("alink")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("axis")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("bgcolor")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("charset")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("checked")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("clear")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("codetype")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("color")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("compact")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("declare")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("defer")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("dir")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("direction")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("disabled")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("enctype")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("face")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("frame")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("hreflang")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("http-equiv")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("lang")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("language")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("link")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("media")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("method")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("multiple")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nohref")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noresize")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noshade")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nowrap")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("readonly")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rel")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rev")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rules")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scope")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scrolling")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("selected")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("shape")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("target")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("text")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("type")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valign")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valuetype")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("vlink")); +} + static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node) { + entry->id.attr_case_insensitive = lxb_selectors_is_lowercased_html_attrib_name(selector); + if (node->doc != NULL && node->doc->dict != NULL) { const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length); if (interned != NULL) { @@ -1304,10 +1363,10 @@ lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src, } static bool -lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, const lexbor_str_t *trg, const lexbor_str_t *src) +lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, bool force_modifier_i, const lexbor_str_t *trg, const lexbor_str_t *src) { bool res; - bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I; + bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I || force_modifier_i; switch (attr->match) { case LXB_CSS_SELECTOR_MATCH_EQUAL: /* = */ @@ -1419,7 +1478,13 @@ lxb_selectors_match_attribute(const lxb_css_selector_t *selector, } dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr); - bool res = lxb_selectors_match_attribute_value(attr, &trg.str, src); + ZEND_ASSERT(node->doc != NULL); + bool res = lxb_selectors_match_attribute_value( + attr, + entry->id.attr_case_insensitive && php_dom_ns_is_html_and_document_is_html(node), + &trg.str, + src + ); dom_lxb_str_wrapper_release(&trg); return res; } diff --git a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h index 441976b1e3d..9057fae6841 100644 --- a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h +++ b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h @@ -78,6 +78,7 @@ typedef lxb_selectors_entry_t * typedef struct { const xmlChar *name; bool interned; + bool attr_case_insensitive; } lxb_selectors_adapted_id; struct lxb_selectors_entry { diff --git a/ext/dom/tests/modern/css_selectors/gh17802.phpt b/ext/dom/tests/modern/css_selectors/gh17802.phpt index 4797b7fb1c9..2af1dab6228 100644 --- a/ext/dom/tests/modern/css_selectors/gh17802.phpt +++ b/ext/dom/tests/modern/css_selectors/gh17802.phpt @@ -32,6 +32,18 @@ foreach ($dom->querySelectorAll('meta[charset]') as $entry) { var_dump($dom->saveHtml($entry)); } +echo "--- charseT and lowercase value ---\n"; + +foreach ($dom->querySelectorAll('meta[charseT="windows-1252"]') as $entry) { + var_dump($dom->saveHtml($entry)); +} + +echo "--- charset and lowercase value ---\n"; + +foreach ($dom->querySelectorAll('meta[charset="windows-1252"]') as $entry) { + var_dump($dom->saveHtml($entry)); +} + ?> --EXPECT--
@@ -45,3 +57,7 @@ string(29) "" --- charset --- string(29) "" string(25) "" +--- charseT and lowercase value --- +string(29) "" +--- charset and lowercase value --- +string(29) ""