From b6004a043c16b211d462218fbb3f72db68ec2b18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20D=C3=BCsterhus?= Date: Wed, 20 Nov 2024 10:47:27 +0100 Subject: [PATCH] Fix GHSA-p3x9-6h7p-cgfc: libxml streams wrong `content-type` on redirect libxml streams use wrong content-type header when requesting a redirected resource. --- ext/dom/tests/ghsa-p3x9-6h7p-cgfc_001.phpt | 60 ++++++++++++++++ ext/dom/tests/ghsa-p3x9-6h7p-cgfc_002.phpt | 60 ++++++++++++++++ ext/dom/tests/ghsa-p3x9-6h7p-cgfc_003.phpt | 60 ++++++++++++++++ ext/libxml/libxml.c | 79 ++++++++++++---------- 4 files changed, 225 insertions(+), 34 deletions(-) create mode 100644 ext/dom/tests/ghsa-p3x9-6h7p-cgfc_001.phpt create mode 100644 ext/dom/tests/ghsa-p3x9-6h7p-cgfc_002.phpt create mode 100644 ext/dom/tests/ghsa-p3x9-6h7p-cgfc_003.phpt diff --git a/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_001.phpt b/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_001.phpt new file mode 100644 index 00000000000..47212cb3410 --- /dev/null +++ b/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_001.phpt @@ -0,0 +1,60 @@ +--TEST-- +GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Basic) +--EXTENSIONS-- +dom +--SKIPIF-- + +--FILE-- + + + + GHSA-p3x9-6h7p-cgfc + + + + + + +

GHSA-p3x9-6h7p-cgfc

+ + + EOT; + // Intentionally using non-standard casing for content-type to verify it is matched not case sensitively. + yield "data://text/plain,HTTP/1.1 200 OK\r\nconteNt-tyPe: text/html; charset=utf-8\r\n\r\n{$xml}"; +} + +['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output); +$document = new \DOMDocument(); +$document->loadHTMLFile($uri); + +$h1 = $document->getElementsByTagName('h1'); +var_dump($h1->length); +var_dump($document->saveHTML()); +http_server_kill($pid); +?> +--EXPECT-- +int(1) +string(266) " + + + GHSA-p3x9-6h7p-cgfc + + + + + + +

GHSA-p3x9-6h7p-cgfc

+ + +" diff --git a/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_002.phpt b/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_002.phpt new file mode 100644 index 00000000000..a7eff3b9a8b --- /dev/null +++ b/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_002.phpt @@ -0,0 +1,60 @@ +--TEST-- +GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Missing content-type) +--EXTENSIONS-- +dom +--SKIPIF-- + +--FILE-- + + + + GHSA-p3x9-6h7p-cgfc + + + + + + +

GHSA-p3x9-6h7p-cgfc

+ + + EOT; + // Missing content-type in actual response. + yield "data://text/plain,HTTP/1.1 200 OK\r\n\r\n{$xml}"; +} + +['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output); +$document = new \DOMDocument(); +$document->loadHTMLFile($uri); + +$h1 = $document->getElementsByTagName('h1'); +var_dump($h1->length); +var_dump($document->saveHTML()); +http_server_kill($pid); +?> +--EXPECT-- +int(1) +string(266) " + + + GHSA-p3x9-6h7p-cgfc + + + + + + +

GHSA-p3x9-6h7p-cgfc

+ + +" diff --git a/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_003.phpt b/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_003.phpt new file mode 100644 index 00000000000..178b35f3525 --- /dev/null +++ b/ext/dom/tests/ghsa-p3x9-6h7p-cgfc_003.phpt @@ -0,0 +1,60 @@ +--TEST-- +GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Reason with colon) +--EXTENSIONS-- +dom +--SKIPIF-- + +--FILE-- + + + + GHSA-p3x9-6h7p-cgfc + + + + + + +

GHSA-p3x9-6h7p-cgfc

+ + + EOT; + // Missing content-type in actual response. + yield "data://text/plain,HTTP/1.1 200 OK: This is fine\r\n\r\n{$xml}"; +} + +['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output); +$document = new \DOMDocument(); +$document->loadHTMLFile($uri); + +$h1 = $document->getElementsByTagName('h1'); +var_dump($h1->length); +var_dump($document->saveHTML()); +http_server_kill($pid); +?> +--EXPECT-- +int(1) +string(266) " + + + GHSA-p3x9-6h7p-cgfc + + + + + + +

GHSA-p3x9-6h7p-cgfc

+ + +" diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index dc5e7790952..3311346d4bc 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -374,42 +374,53 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc) if (Z_TYPE(s->wrapperdata) == IS_ARRAY) { zval *header; - ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) { + /* Scan backwards: The header array might contain the headers for multiple responses, if + * a redirect was followed. + */ + ZEND_HASH_REVERSE_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) { const char buf[] = "Content-Type:"; - if (Z_TYPE_P(header) == IS_STRING && - !zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) { - char *needle = estrdup("charset="); - char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header)); - char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1); - - if (encoding) { - char *end; - - encoding += sizeof("charset=")-1; - if (*encoding == '"') { - encoding++; - } - end = strchr(encoding, ';'); - if (end == NULL) { - end = encoding + strlen(encoding); - } - end--; /* end == encoding-1 isn't a buffer underrun */ - while (*end == ' ' || *end == '\t') { - end--; - } - if (*end == '"') { - end--; - } - if (encoding >= end) continue; - *(end+1) = '\0'; - enc = xmlParseCharEncoding(encoding); - if (enc <= XML_CHAR_ENCODING_NONE) { - enc = XML_CHAR_ENCODING_NONE; - } + if (Z_TYPE_P(header) == IS_STRING) { + /* If no colon is found in the header, we assume it's the HTTP status line and bail out. */ + char *colon = memchr(Z_STRVAL_P(header), ':', Z_STRLEN_P(header)); + char *space = memchr(Z_STRVAL_P(header), ' ', Z_STRLEN_P(header)); + if (colon == NULL || space < colon) { + break; + } + + if (!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) { + char *needle = estrdup("charset="); + char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header)); + char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1); + + if (encoding) { + char *end; + + encoding += sizeof("charset=")-1; + if (*encoding == '"') { + encoding++; + } + end = strchr(encoding, ';'); + if (end == NULL) { + end = encoding + strlen(encoding); + } + end--; /* end == encoding-1 isn't a buffer underrun */ + while (*end == ' ' || *end == '\t') { + end--; + } + if (*end == '"') { + end--; + } + if (encoding >= end) continue; + *(end+1) = '\0'; + enc = xmlParseCharEncoding(encoding); + if (enc <= XML_CHAR_ENCODING_NONE) { + enc = XML_CHAR_ENCODING_NONE; + } + } + efree(haystack); + efree(needle); + break; /* found content-type */ } - efree(haystack); - efree(needle); - break; /* found content-type */ } } ZEND_HASH_FOREACH_END(); }