Fix #80268: loadHTML() truncates at NUL bytes

libxml2 has no particular issues parsing HTML strings with NUL bytes;
these just cause truncation of the current text content, but parsing
continues generally.  Since `::loadHTMLFile()` already supports NUL
bytes, `::loadHTML()` should as well.

Note that this is different from XML, which does not allow any NUL
bytes.

Closes GH-6368.
This commit is contained in:
Christoph M. Becker 2020-10-23 11:06:30 +02:00
parent 824cbc2781
commit 6d2bc72530
3 changed files with 27 additions and 1 deletions

3
NEWS
View file

@ -11,6 +11,9 @@ PHP NEWS
- COM:
. Fixed bug #62474 (com_event_sink crashes on certain arguments). (cmb)
- DOM:
. Fixed bug #80268 (loadHTML() truncates at NUL bytes). (cmb)
- IMAP:
. Fixed bug #64076 (imap_sort() does not return FALSE on failure). (cmb)
. Fixed bug #76618 (segfault on imap_reopen). (girgias)

View file

@ -2024,7 +2024,6 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
}
ctxt = htmlCreateFileParserCtxt(source, NULL);
} else {
source_len = xmlStrlen((xmlChar *) source);
if (ZEND_SIZE_T_INT_OVFL(source_len)) {
php_error_docref(NULL, E_WARNING, "Input string is too long");
RETURN_FALSE;

View file

@ -0,0 +1,24 @@
--TEST--
Bug #80268 (loadHTML() truncates at NUL bytes)
--SKIPIF--
<?php require_once('skipif.inc'); ?>
--FILE--
<?php
$doc = new DOMDocument;
$doc->loadHTML("<p>foo\0bar</p>");
$html = $doc->saveHTML();
var_dump(strpos($html, '<p>foo</p>') !== false);
file_put_contents(__DIR__ . '/80268.html', "<p>foo\0bar</p>");
$doc = new DOMDocument;
$doc->loadHTMLFile(__DIR__ . '/80268.html');
$html = $doc->saveHTML();
var_dump(strpos($html, '<p>foo</p>') !== false);
?>
--CLEAN--
<?php
unlink(__DIR__ . '/80268.html');
?>
--EXPECT--
bool(true)
bool(true)