[RFC] DOM HTML5 parsing and serialization support (#12111)

This commit is contained in:
Niels Dossche 2023-11-13 19:18:19 +00:00 committed by GitHub
parent 17d2917fb7
commit 1492be5286
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
162 changed files with 8425 additions and 1354 deletions

1
NEWS
View file

@ -10,6 +10,7 @@ DOM:
. Implement #53655 (Improve speed of DOMNode::C14N() on large XML documents). . Implement #53655 (Improve speed of DOMNode::C14N() on large XML documents).
(nielsdos) (nielsdos)
. Fix cloning attribute with namespace disappearing namespace. (nielsdos) . Fix cloning attribute with namespace disappearing namespace. (nielsdos)
. Implement DOM HTML5 parsing and serialization RFC. (nielsdos)
FTP: FTP:
. Removed the deprecated inet_ntoa call support. (David Carlier) . Removed the deprecated inet_ntoa call support. (David Carlier)

View file

@ -80,6 +80,14 @@ PHP 8.4 UPGRADE NOTES
. Added constant DOMNode::DOCUMENT_POSITION_CONTAINS. . Added constant DOMNode::DOCUMENT_POSITION_CONTAINS.
. Added constant DOMNode::DOCUMENT_POSITION_CONTAINED_BY. . Added constant DOMNode::DOCUMENT_POSITION_CONTAINED_BY.
. Added constant DOMNode::DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC. . Added constant DOMNode::DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC.
. Implemented DOM HTML5 parsing and serialization.
RFC: https://wiki.php.net/rfc/domdocument_html5_parser.
This RFC adds the new DOM namespace along with class and constant aliases.
There are two new classes to handle HTML and XML documents:
DOM\HTMLDocument and DOM\XMLDocument.
These classes provide a cleaner API to handle HTML and XML documents.
Furthermore, the DOM\HTMLDocument class implements spec-compliant HTML5
parsing and serialization.
- Phar: - Phar:
. Added support for the unix timestamp extension for zip archives. . Added support for the unix timestamp extension for zip archives.

View file

@ -52,6 +52,10 @@ PHP 8.4 INTERNALS UPGRADE NOTES
- The function php_xsl_create_object() was removed as it was not used - The function php_xsl_create_object() was removed as it was not used
nor exported. nor exported.
d. ext/libxml
- Added php_libxml_pretend_ctx_error_ex() to emit errors as if they had come
from libxml.
======================== ========================
4. OpCode changes 4. OpCode changes
======================== ========================

View file

@ -12,7 +12,21 @@ if test "$PHP_DOM" != "no"; then
PHP_SETUP_LIBXML(DOM_SHARED_LIBADD, [ PHP_SETUP_LIBXML(DOM_SHARED_LIBADD, [
AC_DEFINE(HAVE_DOM,1,[ ]) AC_DEFINE(HAVE_DOM,1,[ ])
PHP_LEXBOR_CFLAGS="-I@ext_srcdir@/lexbor -DLEXBOR_STATIC"
LEXBOR_DIR="lexbor/lexbor"
LEXBOR_SOURCES="$LEXBOR_DIR/ports/posix/lexbor/core/memory.c \
$LEXBOR_DIR/core/array_obj.c $LEXBOR_DIR/core/array.c $LEXBOR_DIR/core/avl.c $LEXBOR_DIR/core/bst.c $LEXBOR_DIR/core/diyfp.c $LEXBOR_DIR/core/conv.c $LEXBOR_DIR/core/dobject.c $LEXBOR_DIR/core/dtoa.c $LEXBOR_DIR/core/hash.c $LEXBOR_DIR/core/mem.c $LEXBOR_DIR/core/mraw.c $LEXBOR_DIR/core/print.c $LEXBOR_DIR/core/serialize.c $LEXBOR_DIR/core/shs.c $LEXBOR_DIR/core/str.c $LEXBOR_DIR/core/strtod.c \
$LEXBOR_DIR/dom/interface.c $LEXBOR_DIR/dom/interfaces/attr.c $LEXBOR_DIR/dom/interfaces/cdata_section.c $LEXBOR_DIR/dom/interfaces/character_data.c $LEXBOR_DIR/dom/interfaces/comment.c $LEXBOR_DIR/dom/interfaces/document.c $LEXBOR_DIR/dom/interfaces/document_fragment.c $LEXBOR_DIR/dom/interfaces/document_type.c $LEXBOR_DIR/dom/interfaces/element.c $LEXBOR_DIR/dom/interfaces/node.c $LEXBOR_DIR/dom/interfaces/processing_instruction.c $LEXBOR_DIR/dom/interfaces/shadow_root.c $LEXBOR_DIR/dom/interfaces/text.c \
$LEXBOR_DIR/html/tokenizer/error.c $LEXBOR_DIR/html/tokenizer/state_comment.c $LEXBOR_DIR/html/tokenizer/state_doctype.c $LEXBOR_DIR/html/tokenizer/state_rawtext.c $LEXBOR_DIR/html/tokenizer/state_rcdata.c $LEXBOR_DIR/html/tokenizer/state_script.c $LEXBOR_DIR/html/tokenizer/state.c \
$LEXBOR_DIR/html/tree/active_formatting.c $LEXBOR_DIR/html/tree/error.c $LEXBOR_DIR/html/tree/insertion_mode/after_after_body.c $LEXBOR_DIR/html/tree/insertion_mode/after_after_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/after_body.c $LEXBOR_DIR/html/tree/insertion_mode/after_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/after_head.c $LEXBOR_DIR/html/tree/insertion_mode/before_head.c $LEXBOR_DIR/html/tree/insertion_mode/before_html.c $LEXBOR_DIR/html/tree/insertion_mode/foreign_content.c $LEXBOR_DIR/html/tree/insertion_mode/in_body.c $LEXBOR_DIR/html/tree/insertion_mode/in_caption.c $LEXBOR_DIR/html/tree/insertion_mode/in_cell.c $LEXBOR_DIR/html/tree/insertion_mode/in_column_group.c $LEXBOR_DIR/html/tree/insertion_mode/in_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/in_head.c $LEXBOR_DIR/html/tree/insertion_mode/in_head_noscript.c $LEXBOR_DIR/html/tree/insertion_mode/initial.c $LEXBOR_DIR/html/tree/insertion_mode/in_row.c $LEXBOR_DIR/html/tree/insertion_mode/in_select.c $LEXBOR_DIR/html/tree/insertion_mode/in_select_in_table.c $LEXBOR_DIR/html/tree/insertion_mode/in_table_body.c $LEXBOR_DIR/html/tree/insertion_mode/in_table.c $LEXBOR_DIR/html/tree/insertion_mode/in_table_text.c $LEXBOR_DIR/html/tree/insertion_mode/in_template.c $LEXBOR_DIR/html/tree/insertion_mode/text.c $LEXBOR_DIR/html/tree/open_elements.c \
$LEXBOR_DIR/encoding/big5.c $LEXBOR_DIR/encoding/decode.c $LEXBOR_DIR/encoding/encode.c $LEXBOR_DIR/encoding/encoding.c $LEXBOR_DIR/encoding/euc_kr.c $LEXBOR_DIR/encoding/gb18030.c $LEXBOR_DIR/encoding/iso_2022_jp_katakana.c $LEXBOR_DIR/encoding/jis0208.c $LEXBOR_DIR/encoding/jis0212.c $LEXBOR_DIR/encoding/range.c $LEXBOR_DIR/encoding/res.c $LEXBOR_DIR/encoding/single.c \
$LEXBOR_DIR/html/encoding.c $LEXBOR_DIR/html/interface.c $LEXBOR_DIR/html/parser.c $LEXBOR_DIR/html/token.c $LEXBOR_DIR/html/token_attr.c $LEXBOR_DIR/html/tokenizer.c $LEXBOR_DIR/html/tree.c \
$LEXBOR_DIR/html/interfaces/anchor_element.c $LEXBOR_DIR/html/interfaces/area_element.c $LEXBOR_DIR/html/interfaces/audio_element.c $LEXBOR_DIR/html/interfaces/base_element.c $LEXBOR_DIR/html/interfaces/body_element.c $LEXBOR_DIR/html/interfaces/br_element.c $LEXBOR_DIR/html/interfaces/button_element.c $LEXBOR_DIR/html/interfaces/canvas_element.c $LEXBOR_DIR/html/interfaces/data_element.c $LEXBOR_DIR/html/interfaces/data_list_element.c $LEXBOR_DIR/html/interfaces/details_element.c $LEXBOR_DIR/html/interfaces/dialog_element.c $LEXBOR_DIR/html/interfaces/directory_element.c $LEXBOR_DIR/html/interfaces/div_element.c $LEXBOR_DIR/html/interfaces/d_list_element.c $LEXBOR_DIR/html/interfaces/document.c $LEXBOR_DIR/html/interfaces/element.c $LEXBOR_DIR/html/interfaces/embed_element.c $LEXBOR_DIR/html/interfaces/field_set_element.c $LEXBOR_DIR/html/interfaces/font_element.c $LEXBOR_DIR/html/interfaces/form_element.c $LEXBOR_DIR/html/interfaces/frame_element.c $LEXBOR_DIR/html/interfaces/frame_set_element.c $LEXBOR_DIR/html/interfaces/head_element.c $LEXBOR_DIR/html/interfaces/heading_element.c $LEXBOR_DIR/html/interfaces/hr_element.c $LEXBOR_DIR/html/interfaces/html_element.c $LEXBOR_DIR/html/interfaces/iframe_element.c $LEXBOR_DIR/html/interfaces/image_element.c $LEXBOR_DIR/html/interfaces/input_element.c $LEXBOR_DIR/html/interfaces/label_element.c $LEXBOR_DIR/html/interfaces/legend_element.c $LEXBOR_DIR/html/interfaces/li_element.c $LEXBOR_DIR/html/interfaces/link_element.c $LEXBOR_DIR/html/interfaces/map_element.c $LEXBOR_DIR/html/interfaces/marquee_element.c $LEXBOR_DIR/html/interfaces/media_element.c $LEXBOR_DIR/html/interfaces/menu_element.c $LEXBOR_DIR/html/interfaces/meta_element.c $LEXBOR_DIR/html/interfaces/meter_element.c $LEXBOR_DIR/html/interfaces/mod_element.c $LEXBOR_DIR/html/interfaces/object_element.c $LEXBOR_DIR/html/interfaces/o_list_element.c $LEXBOR_DIR/html/interfaces/opt_group_element.c $LEXBOR_DIR/html/interfaces/option_element.c $LEXBOR_DIR/html/interfaces/output_element.c $LEXBOR_DIR/html/interfaces/paragraph_element.c $LEXBOR_DIR/html/interfaces/param_element.c $LEXBOR_DIR/html/interfaces/picture_element.c $LEXBOR_DIR/html/interfaces/pre_element.c $LEXBOR_DIR/html/interfaces/progress_element.c $LEXBOR_DIR/html/interfaces/quote_element.c $LEXBOR_DIR/html/interfaces/script_element.c $LEXBOR_DIR/html/interfaces/select_element.c $LEXBOR_DIR/html/interfaces/slot_element.c $LEXBOR_DIR/html/interfaces/source_element.c $LEXBOR_DIR/html/interfaces/span_element.c $LEXBOR_DIR/html/interfaces/style_element.c $LEXBOR_DIR/html/interfaces/table_caption_element.c $LEXBOR_DIR/html/interfaces/table_cell_element.c $LEXBOR_DIR/html/interfaces/table_col_element.c $LEXBOR_DIR/html/interfaces/table_element.c $LEXBOR_DIR/html/interfaces/table_row_element.c $LEXBOR_DIR/html/interfaces/table_section_element.c $LEXBOR_DIR/html/interfaces/template_element.c $LEXBOR_DIR/html/interfaces/text_area_element.c $LEXBOR_DIR/html/interfaces/time_element.c $LEXBOR_DIR/html/interfaces/title_element.c $LEXBOR_DIR/html/interfaces/track_element.c $LEXBOR_DIR/html/interfaces/u_list_element.c $LEXBOR_DIR/html/interfaces/unknown_element.c $LEXBOR_DIR/html/interfaces/video_element.c $LEXBOR_DIR/html/interfaces/window.c \
$LEXBOR_DIR/selectors/selectors.c \
$LEXBOR_DIR/ns/ns.c \
$LEXBOR_DIR/tag/tag.c"
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \ PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \
xml_document.c html_document.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c \ domexception.c parentnode.c \
processinginstruction.c cdatasection.c \ processinginstruction.c cdatasection.c \
documentfragment.c domimplementation.c \ documentfragment.c domimplementation.c \
@ -21,8 +35,9 @@ if test "$PHP_DOM" != "no"; then
nodelist.c text.c comment.c \ nodelist.c text.c comment.c \
entityreference.c \ entityreference.c \
notation.c xpath.c dom_iterators.c \ notation.c xpath.c dom_iterators.c \
namednodemap.c], namednodemap.c \
$ext_shared) $LEXBOR_SOURCES],
$ext_shared,,$PHP_LEXBOR_CFLAGS)
PHP_SUBST(DOM_SHARED_LIBADD) PHP_SUBST(DOM_SHARED_LIBADD)
PHP_INSTALL_HEADERS([ext/dom/xml_common.h]) PHP_INSTALL_HEADERS([ext/dom/xml_common.h])
PHP_ADD_EXTENSION_DEP(dom, libxml) PHP_ADD_EXTENSION_DEP(dom, libxml)

View file

@ -8,13 +8,29 @@ if (PHP_DOM == "yes") {
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2") CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
) { ) {
EXTENSION("dom", "php_dom.c attr.c document.c \ EXTENSION("dom", "php_dom.c attr.c document.c \
xml_document.c html_document.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c processinginstruction.c \ domexception.c parentnode.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c \ cdatasection.c documentfragment.c domimplementation.c element.c \
node.c characterdata.c documenttype.c \ node.c characterdata.c documenttype.c \
entity.c nodelist.c text.c comment.c \ entity.c nodelist.c text.c comment.c \
entityreference.c \ entityreference.c \
notation.c xpath.c dom_iterators.c \ notation.c xpath.c dom_iterators.c \
namednodemap.c"); namednodemap.c", null, "-Iext/dom/lexbor");
ADD_SOURCES("ext/dom/lexbor/lexbor/ports/windows_nt/lexbor/core", "memory.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/core", "array_obj.c array.c avl.c bst.c diyfp.c conv.c dobject.c dtoa.c hash.c mem.c mraw.c print.c serialize.c shs.c str.c strtod.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/dom", "interface.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/dom/interfaces", "attr.c cdata_section.c character_data.c comment.c document.c document_fragment.c document_type.c element.c node.c processing_instruction.c shadow_root.c text.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/html/tokenizer", "error.c state_comment.c state_doctype.c state_rawtext.c state_rcdata.c state_script.c state.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/html/tree", "active_formatting.c open_elements.c error.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/html/tree/insertion_mode", "after_after_body.c after_after_frameset.c after_body.c after_frameset.c after_head.c before_head.c before_html.c foreign_content.c in_body.c in_caption.c in_cell.c in_column_group.c in_frameset.c in_head.c in_head_noscript.c initial.c in_row.c in_select.c in_select_in_table.c in_table_body.c in_table.c in_table_text.c in_template.c text.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/html", "encoding.c interface.c parser.c token.c token_attr.c tokenizer.c tree.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/encoding", "big5.c decode.c encode.c encoding.c euc_kr.c gb18030.c iso_2022_jp_katakana.c jis0208.c jis0212.c range.c res.c single.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/html/interfaces", "anchor_element.c area_element.c audio_element.c base_element.c body_element.c br_element.c button_element.c canvas_element.c data_element.c data_list_element.c details_element.c dialog_element.c directory_element.c div_element.c d_list_element.c document.c element.c embed_element.c field_set_element.c font_element.c form_element.c frame_element.c frame_set_element.c head_element.c heading_element.c hr_element.c html_element.c iframe_element.c image_element.c input_element.c label_element.c legend_element.c li_element.c link_element.c map_element.c marquee_element.c media_element.c menu_element.c meta_element.c meter_element.c mod_element.c object_element.c o_list_element.c opt_group_element.c option_element.c output_element.c paragraph_element.c param_element.c picture_element.c pre_element.c progress_element.c quote_element.c script_element.c select_element.c slot_element.c source_element.c span_element.c style_element.c table_caption_element.c table_cell_element.c table_col_element.c table_element.c table_row_element.c table_section_element.c template_element.c text_area_element.c time_element.c title_element.c track_element.c u_list_element.c unknown_element.c video_element.c window.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/selectors", "selectors.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/ns", "ns.c", "dom");
ADD_SOURCES("ext/dom/lexbor/lexbor/tag", "tag.c", "dom");
ADD_FLAG("CFLAGS_DOM", "/D LEXBOR_STATIC ");
AC_DEFINE("HAVE_DOM", 1, "DOM support"); AC_DEFINE("HAVE_DOM", 1, "DOM support");

View file

@ -35,9 +35,6 @@ struct _idsIterator {
xmlNode *element; xmlNode *element;
}; };
#define DOM_LOAD_STRING 0
#define DOM_LOAD_FILE 1
/* /*
* class DOMDocument extends DOMNode * class DOMDocument extends DOMNode
* *
@ -486,7 +483,7 @@ zend_result dom_document_config_read(dom_object *obj, zval *retval)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-2141741547 /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-2141741547
Since: Since:
*/ */
PHP_METHOD(DOMDocument, createElement) PHP_METHOD(DOM_Document, createElement)
{ {
zval *id; zval *id;
xmlNode *node; xmlNode *node;
@ -521,7 +518,7 @@ PHP_METHOD(DOMDocument, createElement)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-35CB04B5 /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-35CB04B5
Since: Since:
*/ */
PHP_METHOD(DOMDocument, createDocumentFragment) PHP_METHOD(DOM_Document, createDocumentFragment)
{ {
zval *id; zval *id;
xmlNode *node; xmlNode *node;
@ -549,7 +546,7 @@ PHP_METHOD(DOMDocument, createDocumentFragment)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1975348127 /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1975348127
Since: Since:
*/ */
PHP_METHOD(DOMDocument, createTextNode) PHP_METHOD(DOM_Document, createTextNode)
{ {
zval *id; zval *id;
xmlNode *node; xmlNode *node;
@ -579,7 +576,7 @@ PHP_METHOD(DOMDocument, createTextNode)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1334481328 /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1334481328
Since: Since:
*/ */
PHP_METHOD(DOMDocument, createComment) PHP_METHOD(DOM_Document, createComment)
{ {
zval *id; zval *id;
xmlNode *node; xmlNode *node;
@ -609,7 +606,7 @@ PHP_METHOD(DOMDocument, createComment)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-D26C0AF8 /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-D26C0AF8
Since: Since:
*/ */
PHP_METHOD(DOMDocument, createCDATASection) PHP_METHOD(DOM_Document, createCDATASection)
{ {
zval *id; zval *id;
xmlNode *node; xmlNode *node;
@ -639,7 +636,7 @@ PHP_METHOD(DOMDocument, createCDATASection)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-135944439 /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-135944439
Since: Since:
*/ */
PHP_METHOD(DOMDocument, createProcessingInstruction) PHP_METHOD(DOM_Document, createProcessingInstruction)
{ {
zval *id; zval *id;
xmlNode *node; xmlNode *node;
@ -676,7 +673,7 @@ PHP_METHOD(DOMDocument, createProcessingInstruction)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1084891198 /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1084891198
Since: Since:
*/ */
PHP_METHOD(DOMDocument, createAttribute) PHP_METHOD(DOM_Document, createAttribute)
{ {
zval *id; zval *id;
xmlAttrPtr node; xmlAttrPtr node;
@ -747,7 +744,7 @@ PHP_METHOD(DOMDocument, createEntityReference)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-A6C9094 /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-A6C9094
Since: Since:
*/ */
PHP_METHOD(DOMDocument, getElementsByTagName) PHP_METHOD(DOM_Document, getElementsByTagName)
{ {
size_t name_len; size_t name_len;
dom_object *intern, *namednode; dom_object *intern, *namednode;
@ -768,7 +765,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#Core-Document-importNode /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#Core-Document-importNode
Since: DOM Level 2 Since: DOM Level 2
*/ */
PHP_METHOD(DOMDocument, importNode) PHP_METHOD(DOM_Document, importNode)
{ {
zval *node; zval *node;
xmlDocPtr docp; xmlDocPtr docp;
@ -776,8 +773,6 @@ PHP_METHOD(DOMDocument, importNode)
dom_object *intern, *nodeobj; dom_object *intern, *nodeobj;
int ret; int ret;
bool recursive = 0; bool recursive = 0;
/* See http://www.xmlsoft.org/html/libxml-tree.html#xmlDocCopyNode for meaning of values */
int extended_recursive;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "O|b", &node, dom_node_class_entry, &recursive) == FAILURE) { if (zend_parse_parameters(ZEND_NUM_ARGS(), "O|b", &node, dom_node_class_entry, &recursive) == FAILURE) {
RETURN_THROWS(); RETURN_THROWS();
@ -796,11 +791,7 @@ PHP_METHOD(DOMDocument, importNode)
if (nodep->doc == docp) { if (nodep->doc == docp) {
retnodep = nodep; retnodep = nodep;
} else { } else {
extended_recursive = recursive; retnodep = dom_clone_node(nodep, docp, intern, recursive);
if ((recursive == 0) && (nodep->type == XML_ELEMENT_NODE)) {
extended_recursive = 2;
}
retnodep = xmlDocCopyNode(nodep, docp, extended_recursive);
if (!retnodep) { if (!retnodep) {
RETURN_FALSE; RETURN_FALSE;
} }
@ -827,7 +818,7 @@ PHP_METHOD(DOMDocument, importNode)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-DocCrElNS /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-DocCrElNS
Since: DOM Level 2 Since: DOM Level 2
*/ */
PHP_METHOD(DOMDocument, createElementNS) PHP_METHOD(DOM_Document, createElementNS)
{ {
zval *id; zval *id;
xmlDocPtr docp; xmlDocPtr docp;
@ -887,7 +878,7 @@ PHP_METHOD(DOMDocument, createElementNS)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-DocCrAttrNS /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-DocCrAttrNS
Since: DOM Level 2 Since: DOM Level 2
*/ */
PHP_METHOD(DOMDocument, createAttributeNS) PHP_METHOD(DOM_Document, createAttributeNS)
{ {
zval *id; zval *id;
xmlDocPtr docp; xmlDocPtr docp;
@ -953,7 +944,7 @@ PHP_METHOD(DOMDocument, createAttributeNS)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBTNNS /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBTNNS
Since: DOM Level 2 Since: DOM Level 2
*/ */
PHP_METHOD(DOMDocument, getElementsByTagNameNS) PHP_METHOD(DOM_Document, getElementsByTagNameNS)
{ {
size_t uri_len, name_len; size_t uri_len, name_len;
dom_object *intern, *namednode; dom_object *intern, *namednode;
@ -974,7 +965,7 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBId /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBId
Since: DOM Level 2 Since: DOM Level 2
*/ */
PHP_METHOD(DOMDocument, getElementById) PHP_METHOD(DOM_Document, getElementById)
{ {
zval *id; zval *id;
xmlDocPtr docp; xmlDocPtr docp;
@ -1051,7 +1042,7 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x
Since: DOM Level 3 Since: DOM Level 3
Modern spec URL: https://dom.spec.whatwg.org/#dom-document-adoptnode Modern spec URL: https://dom.spec.whatwg.org/#dom-document-adoptnode
*/ */
PHP_METHOD(DOMDocument, adoptNode) PHP_METHOD(DOM_Document, adoptNode)
{ {
zval *node_zval; zval *node_zval;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &node_zval, dom_node_class_entry) == FAILURE) { if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &node_zval, dom_node_class_entry) == FAILURE) {
@ -1088,7 +1079,7 @@ PHP_METHOD(DOMDocument, adoptNode)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-Document3-normalizeDocument /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-Document3-normalizeDocument
Since: DOM Level 3 Since: DOM Level 3
*/ */
PHP_METHOD(DOMDocument, normalizeDocument) PHP_METHOD(DOM_Document, normalizeDocument)
{ {
zval *id; zval *id;
xmlDocPtr docp; xmlDocPtr docp;
@ -1149,11 +1140,11 @@ PHP_METHOD(DOMDocument, __construct)
} }
/* }}} end DOMDocument::__construct */ /* }}} end DOMDocument::__construct */
char *_dom_get_valid_file_path(char *source, char *resolved_path, int resolved_path_len ) /* {{{ */ const char *_dom_get_valid_file_path(const char *source, char *resolved_path, int resolved_path_len ) /* {{{ */
{ {
xmlURI *uri; xmlURI *uri;
xmlChar *escsource; xmlChar *escsource;
char *file_dest; const char *file_dest;
int isFileUri = 0; int isFileUri = 0;
uri = xmlCreateURI(); uri = xmlCreateURI();
@ -1206,7 +1197,7 @@ char *_dom_get_valid_file_path(char *source, char *resolved_path, int resolved_p
} }
/* }}} */ /* }}} */
static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t source_len, size_t options) /* {{{ */ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding) /* {{{ */
{ {
xmlDocPtr ret; xmlDocPtr ret;
xmlParserCtxtPtr ctxt = NULL; xmlParserCtxtPtr ctxt = NULL;
@ -1215,10 +1206,14 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
int old_error_reporting = 0; int old_error_reporting = 0;
char *directory=NULL, resolved_path[MAXPATHLEN + 1]; char *directory=NULL, resolved_path[MAXPATHLEN + 1];
libxml_doc_props const* doc_props;
if (id == NULL) {
doc_props = dom_get_doc_props_read_only(NULL);
} else {
dom_object *intern = Z_DOMOBJ_P(id); dom_object *intern = Z_DOMOBJ_P(id);
php_libxml_ref_obj *document = intern->document; php_libxml_ref_obj *document = intern->document;
doc_props = dom_get_doc_props_read_only(document);
libxml_doc_props const* doc_props = dom_get_doc_props_read_only(document); }
validate = doc_props->validateonparse; validate = doc_props->validateonparse;
resolve_externals = doc_props->resolveexternals; resolve_externals = doc_props->resolveexternals;
keep_blanks = doc_props->preservewhitespace; keep_blanks = doc_props->preservewhitespace;
@ -1228,12 +1223,11 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
xmlInitParser(); xmlInitParser();
if (mode == DOM_LOAD_FILE) { if (mode == DOM_LOAD_FILE) {
char *file_dest;
if (CHECK_NULL_PATH(source, source_len)) { if (CHECK_NULL_PATH(source, source_len)) {
zend_value_error("Path to document must not contain any null bytes"); zend_argument_value_error(1, "must not contain any null bytes");
return NULL; return NULL;
} }
file_dest = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN); const char *file_dest = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN);
if (file_dest) { if (file_dest) {
ctxt = xmlCreateFileParserCtxt(file_dest); ctxt = xmlCreateFileParserCtxt(file_dest);
} }
@ -1246,6 +1240,8 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
return(NULL); return(NULL);
} }
(void) xmlSwitchToEncoding(ctxt, encoding);
/* If loading from memory, we need to set the base directory for the document */ /* If loading from memory, we need to set the base directory for the document */
if (mode != DOM_LOAD_FILE) { if (mode != DOM_LOAD_FILE) {
#ifdef HAVE_GETCWD #ifdef HAVE_GETCWD
@ -1319,7 +1315,7 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
} }
/* }}} */ /* }}} */
static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr newdoc) static void php_dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr newdoc)
{ {
if (!newdoc) if (!newdoc)
RETURN_FALSE; RETURN_FALSE;
@ -1327,11 +1323,13 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt
dom_object *intern = Z_DOMOBJ_P(this); dom_object *intern = Z_DOMOBJ_P(this);
size_t old_modification_nr = 0; size_t old_modification_nr = 0;
if (intern != NULL) { if (intern != NULL) {
bool is_modern_api_class = false;
xmlDocPtr docp = (xmlDocPtr) dom_object_get_node(intern); xmlDocPtr docp = (xmlDocPtr) dom_object_get_node(intern);
dom_doc_propsptr doc_prop = NULL; dom_doc_propsptr doc_prop = NULL;
if (docp != NULL) { if (docp != NULL) {
const php_libxml_ref_obj *doc_ptr = intern->document; const php_libxml_ref_obj *doc_ptr = intern->document;
ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
is_modern_api_class = doc_ptr->is_modern_api_class;
old_modification_nr = doc_ptr->cache_tag.modification_nr; old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props; doc_prop = intern->document->doc_props;
@ -1346,6 +1344,7 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt
RETURN_FALSE; RETURN_FALSE;
} }
intern->document->doc_props = doc_prop; intern->document->doc_props = doc_prop;
intern->document->is_modern_api_class = is_modern_api_class;
} }
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
@ -1358,8 +1357,8 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt
RETURN_TRUE; RETURN_TRUE;
} }
/* {{{ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) */ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) { {
char *source; char *source;
size_t source_len; size_t source_len;
zend_long options = 0; zend_long options = 0;
@ -1381,11 +1380,9 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
RETURN_FALSE; RETURN_FALSE;
} }
xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options); xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options, NULL);
php_dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
} }
/* }}} end dom_parser_document */
/* {{{ URL: http://www.w3.org/TR/DOM-Level-3-LS/load-save.html#LS-DocumentLS-load /* {{{ URL: http://www.w3.org/TR/DOM-Level-3-LS/load-save.html#LS-DocumentLS-load
Since: DOM Level 3 Since: DOM Level 3
@ -1664,7 +1661,8 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
zval *id; zval *id;
xmlDoc *docp; xmlDoc *docp;
dom_object *intern; dom_object *intern;
char *source = NULL, *valid_file = NULL; char *source = NULL;
const char *valid_file = NULL;
size_t source_len = 0; size_t source_len = 0;
int valid_opts = 0; int valid_opts = 0;
zend_long flags = 0; zend_long flags = 0;
@ -1756,14 +1754,14 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
/* }}} */ /* }}} */
/* {{{ */ /* {{{ */
PHP_METHOD(DOMDocument, schemaValidate) PHP_METHOD(DOM_Document, schemaValidate)
{ {
_dom_document_schema_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE); _dom_document_schema_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
} }
/* }}} end dom_document_schema_validate_file */ /* }}} end dom_document_schema_validate_file */
/* {{{ */ /* {{{ */
PHP_METHOD(DOMDocument, schemaValidateSource) PHP_METHOD(DOM_Document, schemaValidateSource)
{ {
_dom_document_schema_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING); _dom_document_schema_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
} }
@ -1774,7 +1772,8 @@ static void _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int typ
zval *id; zval *id;
xmlDoc *docp; xmlDoc *docp;
dom_object *intern; dom_object *intern;
char *source = NULL, *valid_file = NULL; char *source = NULL;
const char *valid_file = NULL;
size_t source_len = 0; size_t source_len = 0;
xmlRelaxNGParserCtxtPtr parser; xmlRelaxNGParserCtxtPtr parser;
xmlRelaxNGPtr sptr; xmlRelaxNGPtr sptr;
@ -1852,14 +1851,14 @@ static void _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int typ
/* }}} */ /* }}} */
/* {{{ */ /* {{{ */
PHP_METHOD(DOMDocument, relaxNGValidate) PHP_METHOD(DOM_Document, relaxNGValidate)
{ {
_dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE); _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
} }
/* }}} end dom_document_relaxNG_validate_file */ /* }}} end dom_document_relaxNG_validate_file */
/* {{{ */ /* {{{ */
PHP_METHOD(DOMDocument, relaxNGValidateSource) PHP_METHOD(DOM_Document, relaxNGValidateSource)
{ {
_dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING); _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
} }
@ -1923,7 +1922,7 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
xmlDocPtr newdoc = ctxt->myDoc; xmlDocPtr newdoc = ctxt->myDoc;
htmlFreeParserCtxt(ctxt); htmlFreeParserCtxt(ctxt);
dom_finish_loading_document(ZEND_THIS, return_value, newdoc); php_dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
} }
/* }}} */ /* }}} */
@ -2065,7 +2064,7 @@ PHP_METHOD(DOMDocument, saveHTML)
#endif /* defined(LIBXML_HTML_ENABLED) */ #endif /* defined(LIBXML_HTML_ENABLED) */
/* {{{ Register extended class used to create base node type */ /* {{{ Register extended class used to create base node type */
PHP_METHOD(DOMDocument, registerNodeClass) PHP_METHOD(DOM_Document, registerNodeClass)
{ {
zend_class_entry *basece = dom_node_class_entry, *ce = NULL; zend_class_entry *basece = dom_node_class_entry, *ce = NULL;
dom_object *intern; dom_object *intern;
@ -2074,6 +2073,11 @@ PHP_METHOD(DOMDocument, registerNodeClass)
RETURN_THROWS(); RETURN_THROWS();
} }
if (basece->ce_flags & ZEND_ACC_ABSTRACT) {
zend_argument_value_error(1, "must not be an abstract class");
RETURN_THROWS();
}
if (ce == NULL || instanceof_function(ce, basece)) { if (ce == NULL || instanceof_function(ce, basece)) {
if (UNEXPECTED(ce != NULL && (ce->ce_flags & ZEND_ACC_ABSTRACT))) { if (UNEXPECTED(ce != NULL && (ce->ce_flags & ZEND_ACC_ABSTRACT))) {
zend_argument_value_error(2, "must not be an abstract class"); zend_argument_value_error(2, "must not be an abstract class");
@ -2091,7 +2095,7 @@ PHP_METHOD(DOMDocument, registerNodeClass)
/* {{{ URL: https://dom.spec.whatwg.org/#dom-parentnode-replacechildren /* {{{ URL: https://dom.spec.whatwg.org/#dom-parentnode-replacechildren
Since: Since:
*/ */
PHP_METHOD(DOMDocument, replaceChildren) PHP_METHOD(DOM_Document, replaceChildren)
{ {
uint32_t argc = 0; uint32_t argc = 0;
zval *args; zval *args;

View file

@ -23,6 +23,8 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_domexception_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_html_document_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_xml_document_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry;
@ -36,6 +38,7 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_notation_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_entity_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_entity_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_entityreference_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_entityreference_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_processinginstruction_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_processinginstruction_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_abstract_base_document_class_entry;
#ifdef LIBXML_XPATH_ENABLED #ifdef LIBXML_XPATH_ENABLED
extern PHP_DOM_EXPORT zend_class_entry *dom_xpath_class_entry; extern PHP_DOM_EXPORT zend_class_entry *dom_xpath_class_entry;
#endif #endif

View file

@ -61,6 +61,9 @@ zend_result dom_document_recover_write(dom_object *obj, zval *newval);
zend_result dom_document_substitue_entities_read(dom_object *obj, zval *retval); zend_result dom_document_substitue_entities_read(dom_object *obj, zval *retval);
zend_result dom_document_substitue_entities_write(dom_object *obj, zval *newval); zend_result dom_document_substitue_entities_write(dom_object *obj, zval *newval);
/* html5 document properties */
zend_result dom_html_document_encoding_write(dom_object *obj, zval *retval);
/* documenttype properties */ /* documenttype properties */
zend_result dom_documenttype_name_read(dom_object *obj, zval *retval); zend_result dom_documenttype_name_read(dom_object *obj, zval *retval);
zend_result dom_documenttype_entities_read(dom_object *obj, zval *retval); zend_result dom_documenttype_entities_read(dom_object *obj, zval *retval);

328
ext/dom/html5_parser.c Normal file
View file

@ -0,0 +1,328 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "html5_parser.h"
#include "namespace_compat.h"
#include <lexbor/html/parser.h>
#include <lexbor/html/interfaces/element.h>
#include <libxml/tree.h>
#include <libxml/parserInternals.h>
#include <libxml/HTMLtree.h>
#include <Zend/zend.h>
#define WORK_LIST_INIT_SIZE 128
/* libxml2 reserves 2 pointer-sized words for interned strings */
#define LXML_INTERNED_STRINGS_SIZE (sizeof(void *) * 2)
typedef struct {
lxb_dom_node_t *node;
uintptr_t current_active_namespace;
xmlNodePtr lxml_parent;
xmlNsPtr lxml_ns;
} work_list_item;
static void lexbor_libxml2_bridge_work_list_item_push(
lexbor_array_obj_t *array,
lxb_dom_node_t *node,
uintptr_t current_active_namespace,
xmlNodePtr lxml_parent,
xmlNsPtr lxml_ns
)
{
work_list_item *item = (work_list_item *) lexbor_array_obj_push_wo_cls(array);
item->node = node;
item->current_active_namespace = current_active_namespace;
item->lxml_parent = lxml_parent;
item->lxml_ns = lxml_ns;
}
static unsigned short sanitize_line_nr(size_t line)
{
if (line > USHRT_MAX) {
return USHRT_MAX;
}
return (unsigned short) line;
}
static const xmlChar *get_libxml_namespace_href(uintptr_t lexbor_namespace)
{
if (lexbor_namespace == LXB_NS_SVG) {
return (const xmlChar *) DOM_SVG_NS_URI;
} else if (lexbor_namespace == LXB_NS_MATH) {
return (const xmlChar *) DOM_MATHML_NS_URI;
} else {
return (const xmlChar *) DOM_XHTML_NS_URI;
}
}
static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
lxb_dom_node_t *start_node,
xmlDocPtr lxml_doc,
bool compact_text_nodes,
bool create_default_ns
)
{
lexbor_libxml2_bridge_status retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
lexbor_array_obj_t work_list;
lexbor_array_obj_init(&work_list, WORK_LIST_INIT_SIZE, sizeof(work_list_item));
for (lxb_dom_node_t *node = start_node; node != NULL; node = node->prev) {
lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, (xmlNodePtr) lxml_doc, NULL);
}
work_list_item *current_stack_item;
while ((current_stack_item = lexbor_array_obj_pop(&work_list)) != NULL) {
lxb_dom_node_t *node = current_stack_item->node;
xmlNodePtr lxml_parent = current_stack_item->lxml_parent;
/* CDATA section and processing instructions don't occur in parsed HTML documents.
* The historical types are not emitted by the parser either. */
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
/* Note: HTML isn't exactly XML-namespace-aware; as this is an HTML parser we only care about the local name.
* If a prefix:name format is used, then the local name will be "prefix:name" and the prefix will be empty.
* There is however still somewhat of a concept of namespaces. There are three: HTML (the default), SVG, and MATHML. */
lxb_dom_element_t *element = lxb_dom_interface_element(node);
const lxb_char_t *name = lxb_dom_element_local_name(element, NULL);
xmlNodePtr lxml_element = xmlNewDocNode(lxml_doc, NULL, name, NULL);
if (UNEXPECTED(lxml_element == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
goto out;
}
xmlAddChild(lxml_parent, lxml_element);
lxml_element->line = sanitize_line_nr(node->line);
/* Namespaces, note: namespace switches are uncommon */
uintptr_t entering_namespace = element->node.ns;
xmlNsPtr current_lxml_ns = current_stack_item->lxml_ns;
if (create_default_ns && UNEXPECTED(entering_namespace != current_stack_item->current_active_namespace)) {
current_lxml_ns = xmlNewNs(lxml_element, get_libxml_namespace_href(entering_namespace), NULL);
}
/* Instead of xmlSetNs() because we know the arguments are valid. Prevents overhead. */
lxml_element->ns = current_lxml_ns;
for (lxb_dom_node_t *child_node = element->node.last_child; child_node != NULL; child_node = child_node->prev) {
lexbor_libxml2_bridge_work_list_item_push(
&work_list,
child_node,
entering_namespace,
lxml_element,
current_lxml_ns
);
}
for (lxb_dom_attr_t *attr = element->last_attr; attr != NULL; attr = attr->prev) {
lexbor_libxml2_bridge_work_list_item_push(
&work_list,
(lxb_dom_node_t *) attr,
entering_namespace,
lxml_element,
current_lxml_ns
);
}
} else if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
lxb_dom_text_t *text = lxb_dom_interface_text(node);
const lxb_char_t *data = text->char_data.data.data;
size_t data_length = text->char_data.data.length;
if (UNEXPECTED(data_length >= INT_MAX)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW;
goto out;
}
xmlNodePtr lxml_text;
if (compact_text_nodes && data_length < LXML_INTERNED_STRINGS_SIZE) {
/* See xmlSAX2TextNode() in libxml2 */
lxml_text = xmlMalloc(sizeof(*lxml_text));
if (UNEXPECTED(lxml_text == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
goto out;
}
memset(lxml_text, 0, sizeof(*lxml_text));
lxml_text->name = xmlStringText;
lxml_text->type = XML_TEXT_NODE;
lxml_text->doc = lxml_doc;
lxml_text->content = (xmlChar *) &lxml_text->properties;
memcpy(lxml_text->content, data, data_length + 1 /* include '\0' */);
} else {
lxml_text = xmlNewDocTextLen(lxml_doc, (const xmlChar *) data, data_length);
if (UNEXPECTED(lxml_text == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
goto out;
}
}
xmlAddChild(lxml_parent, lxml_text);
if (node->line >= USHRT_MAX) {
lxml_text->line = USHRT_MAX;
lxml_text->psvi = (void *) (ptrdiff_t) node->line;
} else {
lxml_text->line = (unsigned short) node->line;
}
} else if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT_TYPE) {
lxb_dom_document_type_t *doctype = lxb_dom_interface_document_type(node);
const lxb_char_t *name = lxb_dom_document_type_name(doctype, NULL);
size_t public_id_len, system_id_len;
const lxb_char_t *public_id = lxb_dom_document_type_public_id(doctype, &public_id_len);
const lxb_char_t *system_id = lxb_dom_document_type_system_id(doctype, &system_id_len);
xmlDtdPtr lxml_dtd = xmlCreateIntSubset(
lxml_doc,
name,
public_id_len ? public_id : NULL,
system_id_len ? system_id : NULL
);
if (UNEXPECTED(lxml_dtd == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
goto out;
}
/* libxml2 doesn't support line numbers on this anyway, it returns -1 instead, so don't bother */
} else if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
do {
/* Same namespace remark as for elements */
const lxb_char_t *local_name = lxb_dom_attr_local_name(attr, NULL);
const lxb_char_t *value = lxb_dom_attr_value(attr, NULL);
xmlAttrPtr lxml_attr = xmlSetNsProp(lxml_parent, NULL, local_name, value);
if (UNEXPECTED(lxml_attr == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
goto out;
}
attr = attr->next;
/* libxml2 doesn't support line numbers on this anyway, it derives them instead, so don't bother */
} while (attr);
} else if (node->type == LXB_DOM_NODE_TYPE_COMMENT) {
lxb_dom_comment_t *comment = lxb_dom_interface_comment(node);
xmlNodePtr lxml_comment = xmlNewDocComment(lxml_doc, comment->char_data.data.data);
if (UNEXPECTED(lxml_comment == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
goto out;
}
xmlAddChild(lxml_parent, lxml_comment);
lxml_comment->line = sanitize_line_nr(node->line);
}
}
out:
lexbor_array_obj_destroy(&work_list, false);
return retval;
}
void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context *ctx)
{
memset(ctx, 0, sizeof(*ctx));
}
void lexbor_libxml2_bridge_parse_set_error_callbacks(
lexbor_libxml2_bridge_parse_context *ctx,
lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter,
lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter
)
{
ctx->tokenizer_error_reporter = tokenizer_error_reporter;
ctx->tree_error_reporter = tree_error_reporter;
}
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
lxb_html_document_t *document,
xmlDocPtr *doc_out,
bool compact_text_nodes,
bool create_default_ns
)
{
#ifdef LIBXML_HTML_ENABLED
xmlDocPtr lxml_doc = htmlNewDocNoDtD(NULL, NULL);
#else
/* If HTML support is not enabled, then htmlNewDocNoDtD() is not available.
* This code mimics the behaviour. */
xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) "1.0");
lxml_doc->type = XML_HTML_DOCUMENT_NODE;
#endif
if (!lxml_doc) {
return LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
}
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert(
lxb_dom_interface_node(document)->last_child,
lxml_doc,
compact_text_nodes,
create_default_ns
);
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
xmlFreeDoc(lxml_doc);
return status;
}
*doc_out = lxml_doc;
return LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
}
void lexbor_libxml2_bridge_report_errors(
const lexbor_libxml2_bridge_parse_context *ctx,
lxb_html_parser_t *parser,
const lxb_char_t *input_html,
size_t chunk_offset,
size_t *error_index_offset_tokenizer,
size_t *error_index_offset_tree
)
{
void *error;
/* Tokenizer errors */
lexbor_array_obj_t *parse_errors = lxb_html_parser_tokenizer(parser)->parse_errors;
size_t index = *error_index_offset_tokenizer;
while ((error = lexbor_array_obj_get(parse_errors, index)) != NULL) {
/* See https://github.com/lexbor/lexbor/blob/master/source/lexbor/html/tokenizer/error.h */
lxb_html_tokenizer_error_t *token_error = error;
if (ctx->tokenizer_error_reporter) {
ctx->tokenizer_error_reporter(
ctx->application_data,
token_error,
token_error->pos - input_html + chunk_offset
);
}
index++;
}
*error_index_offset_tokenizer = index;
/* Tree parser errors */
parse_errors = lxb_html_parser_tree(parser)->parse_errors;
index = *error_index_offset_tree;
while ((error = lexbor_array_obj_get(parse_errors, index)) != NULL) {
/* See https://github.com/lexbor/lexbor/blob/master/source/lexbor/html/tree/error.h */
lxb_html_tree_error_t *tree_error = error;
if (ctx->tree_error_reporter) {
ctx->tree_error_reporter(
ctx->application_data,
tree_error,
tree_error->line + 1,
tree_error->column + 1,
tree_error->length
);
}
index++;
}
*error_index_offset_tree = index;
}
void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxml2_bridge_extracted_observations *observations)
{
observations->has_explicit_html_tag = tree->has_explicit_html_tag;
observations->has_explicit_head_tag = tree->has_explicit_head_tag;
observations->has_explicit_body_tag = tree->has_explicit_body_tag;
}
#endif /* HAVE_LIBXML && HAVE_DOM */

86
ext/dom/html5_parser.h Normal file
View file

@ -0,0 +1,86 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifndef HTML5_PARSER_H
#define HTML5_PARSER_H
#include <lexbor/html/parser.h>
#include <libxml/tree.h>
#include <Zend/zend_portability.h>
typedef enum {
LEXBOR_LIBXML2_BRIDGE_STATUS_OK = 0,
LEXBOR_LIBXML2_BRIDGE_STATUS_CANNOT_INIT,
LEXBOR_LIBXML2_BRIDGE_STATUS_FATAL_PARSE,
LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW,
LEXBOR_LIBXML2_BRIDGE_STATUS_OOM,
} lexbor_libxml2_bridge_status;
typedef void (*lexbor_libxml2_bridge_tokenizer_error_reporter)(
void *application_data,
lxb_html_tokenizer_error_t *error,
size_t offset
);
typedef void (*lexbor_libxml2_bridge_tree_error_reporter)(
void *application_data,
lxb_html_tree_error_t *error,
size_t line,
size_t column,
size_t len
);
typedef struct {
bool has_explicit_html_tag;
bool has_explicit_head_tag;
bool has_explicit_body_tag;
} lexbor_libxml2_bridge_extracted_observations;
typedef struct {
/* Private fields */
lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter;
lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter;
/* Public fields */
lexbor_libxml2_bridge_extracted_observations observations;
/* Application data, do what you want with this */
void *application_data;
} lexbor_libxml2_bridge_parse_context;
void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context *ctx);
void lexbor_libxml2_bridge_parse_set_error_callbacks(
lexbor_libxml2_bridge_parse_context *ctx,
lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter,
lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter
);
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
lxb_html_document_t *document,
xmlDocPtr *doc_out,
bool compact_text_nodes,
bool create_default_ns
);
void lexbor_libxml2_bridge_report_errors(
const lexbor_libxml2_bridge_parse_context *ctx,
lxb_html_parser_t *parser,
const lxb_char_t *input_html,
size_t chunk_offset,
size_t *error_index_offset_tokenizer,
size_t *error_index_offset_tree
);
void lexbor_libxml2_bridge_copy_observations(
lxb_html_tree_t *tree,
lexbor_libxml2_bridge_extracted_observations *observations
);
#endif

356
ext/dom/html5_serializer.c Normal file
View file

@ -0,0 +1,356 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "html5_serializer.h"
#include "namespace_compat.h"
#include <lexbor/encoding/encoding.h>
#define TRY(x) do { if (UNEXPECTED((x) != SUCCESS)) { return FAILURE; } } while (0)
static bool dom_is_ns(const xmlNode *node, const char *uri)
{
return node->ns != NULL && strcmp((const char *) node->ns->href, uri) == 0;
}
static bool dom_is_html_ns(const xmlNode *node)
{
return node->ns == NULL || dom_is_ns(node, DOM_XHTML_NS_URI);
}
static bool dom_local_name_compare_ex(const xmlNode *node, const char *tag, size_t tag_length, size_t name_length)
{
return name_length == tag_length && zend_binary_strcmp((const char *) node->name, name_length, tag, tag_length) == 0;
}
static zend_result dom_html5_serialize_doctype(dom_html5_serialize_context *ctx, const xmlDtd *dtd)
{
TRY(ctx->write_string_len(ctx->application_data, "<!DOCTYPE ", strlen("<!DOCTYPE ")));
TRY(ctx->write_string(ctx->application_data, (const char *) dtd->name));
return ctx->write_string_len(ctx->application_data, ">", strlen(">"));
}
static zend_result dom_html5_serialize_comment(dom_html5_serialize_context *ctx, const xmlNode *node)
{
TRY(ctx->write_string_len(ctx->application_data, "<!--", strlen("<!--")));
TRY(ctx->write_string(ctx->application_data, (const char *) node->content));
return ctx->write_string_len(ctx->application_data, "-->", strlen("-->"));
}
static zend_result dom_html5_serialize_processing_instruction(dom_html5_serialize_context *ctx, const xmlNode *node)
{
TRY(ctx->write_string_len(ctx->application_data, "<?", strlen("<?")));
TRY(ctx->write_string(ctx->application_data, (const char *) node->name));
TRY(ctx->write_string_len(ctx->application_data, " ", strlen(" ")));
TRY(ctx->write_string(ctx->application_data, (const char *) node->content));
return ctx->write_string_len(ctx->application_data, ">", strlen(">"));
}
/* https://html.spec.whatwg.org/multipage/parsing.html#escapingString */
static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, const char *content, bool attribute_mode)
{
const char *last_output = content;
while (*content != '\0') {
switch (*content) {
/* Step 1 */
case '&': {
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&amp;", strlen("&amp;")));
last_output = content + 1;
break;
}
/* Step 2 (non-breaking space) (note: uses UTF-8 internally) */
case '\xC2': {
if (content[1] == '\xA0') {
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&nbsp;", strlen("&nbsp;")));
content++; /* Consume A0 too */
last_output = content + 1;
}
break;
}
/* Step 3 */
case '"': {
if (attribute_mode) {
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&quot;", strlen("&quot;")));
last_output = content + 1;
}
break;
}
/* Step 4 */
case '<': {
if (!attribute_mode) {
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&lt;", strlen("&lt;")));
last_output = content + 1;
}
break;
}
case '>': {
if (!attribute_mode) {
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, "&gt;", strlen("&gt;")));
last_output = content + 1;
}
break;
}
}
content++;
}
return ctx->write_string_len(ctx->application_data, last_output, content - last_output);
}
static zend_result dom_html5_serialize_text_node(dom_html5_serialize_context *ctx, const xmlNode *node)
{
if (node->parent->type == XML_ELEMENT_NODE && dom_is_html_ns(node->parent)) {
const xmlNode *parent = node->parent;
size_t name_length = strlen((const char *) parent->name);
/* Spec tells us to only emit noscript content as-is if scripting is enabled.
* However, the user agent (PHP) does not support (JS) scripting.
* Furthermore, if actually consumed by a browser then we should err on the safe side and not emit the content as-is. */
if (dom_local_name_compare_ex(parent, "style", strlen("style"), name_length)
|| dom_local_name_compare_ex(parent, "script", strlen("script"), name_length)
|| dom_local_name_compare_ex(parent, "xmp", strlen("xmp"), name_length)
|| dom_local_name_compare_ex(parent, "iframe", strlen("iframe"), name_length)
|| dom_local_name_compare_ex(parent, "noembed", strlen("noembed"), name_length)
|| dom_local_name_compare_ex(parent, "noframes", strlen("noframes"), name_length)
|| dom_local_name_compare_ex(parent, "plaintext", strlen("plaintext"), name_length)) {
return ctx->write_string(ctx->application_data, (const char *) node->content);
}
}
return dom_html5_escape_string(ctx, (const char *) node->content, false);
}
static zend_result dom_html5_serialize_element_tag_name(dom_html5_serialize_context *ctx, const xmlNode *node)
{
/* Note: it is not the serializer's responsibility to care about uppercase/lowercase (see createElement() note) */
if (node->ns != NULL && node->ns->prefix != NULL
&& !(dom_is_html_ns(node) || dom_is_ns(node, DOM_MATHML_NS_URI) || dom_is_ns(node, DOM_SVG_NS_URI))) {
TRY(ctx->write_string(ctx->application_data, (const char *) node->ns->prefix));
TRY(ctx->write_string_len(ctx->application_data, ":", strlen(":")));
}
return ctx->write_string(ctx->application_data, (const char *) node->name);
}
static zend_result dom_html5_serialize_element_start(dom_html5_serialize_context *ctx, const xmlNode *node)
{
TRY(ctx->write_string_len(ctx->application_data, "<", strlen("<")));
TRY(dom_html5_serialize_element_tag_name(ctx, node));
/* We don't support the "is" value during element creation, so no handling here. */
/* Some namespace declarations are also attributes (see https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token) */
for (const xmlNs *ns = node->nsDef; ns != NULL; ns = ns->next) {
if (!dom_ns_is_also_an_attribute(ns)) {
continue;
}
if (ns->prefix != NULL) {
TRY(ctx->write_string_len(ctx->application_data, " xmlns:", strlen(" xmlns:")));
TRY(ctx->write_string(ctx->application_data, (const char *) ns->prefix));
TRY(ctx->write_string_len(ctx->application_data, "=\"", strlen("=\"")));
} else {
TRY(ctx->write_string_len(ctx->application_data, " xmlns=\"", strlen(" xmlns=\"")));
}
TRY(ctx->write_string(ctx->application_data, (const char *) ns->href));
TRY(ctx->write_string_len(ctx->application_data, "\"", strlen("\"")));
}
for (const xmlAttr *attr = node->properties; attr; attr = attr->next) {
TRY(ctx->write_string_len(ctx->application_data, " ", strlen(" ")));
if (attr->ns == NULL) {
TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
} else {
if (dom_is_ns((const xmlNode *) attr, DOM_XML_NS_URI)) {
TRY(ctx->write_string_len(ctx->application_data, "xml:", strlen("xml:")));
TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
} else if (dom_is_ns((const xmlNode *) attr, DOM_XMLNS_NS_URI)) {
/* Compatibility for real attributes */
if (strcmp((const char *) attr->name, "xmlns") == 0) {
TRY(ctx->write_string_len(ctx->application_data, "xmlns", strlen("xmlns")));
} else {
TRY(ctx->write_string_len(ctx->application_data, "xmlns:", strlen("xmlns:")));
TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
}
} else if (dom_is_ns((const xmlNode *) attr, DOM_XLINK_NS_URI)) {
TRY(ctx->write_string_len(ctx->application_data, "xlink:", strlen("xlink:")));
TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
} else if (attr->ns->prefix == NULL) {
TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
} else {
TRY(ctx->write_string(ctx->application_data, (const char *) attr->ns->prefix));
TRY(ctx->write_string_len(ctx->application_data, ":", strlen(":")));
TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
}
}
TRY(ctx->write_string_len(ctx->application_data, "=\"", strlen("=\"")));
xmlChar *content = xmlNodeGetContent((const xmlNode *) attr);
if (content != NULL) {
zend_result result = dom_html5_escape_string(ctx, (const char *) content, true);
xmlFree(content);
TRY(result);
}
TRY(ctx->write_string_len(ctx->application_data, "\"", strlen("\"")));
}
return ctx->write_string_len(ctx->application_data, ">", strlen(">"));
/* Note: "continue on to the next child if the element is void" is handled in the iteration and dom_html5_serialize_element_end() */
}
/* https://html.spec.whatwg.org/multipage/syntax.html#void-elements
* https://html.spec.whatwg.org/multipage/parsing.html#serializes-as-void */
static bool dom_html5_serializes_as_void(const xmlNode *node)
{
if (dom_is_html_ns(node)) {
size_t name_length = strlen((const char *) node->name);
if (/* These are the void elements from https://html.spec.whatwg.org/multipage/syntax.html#void-elements */
dom_local_name_compare_ex(node, "area", strlen("area"), name_length)
|| dom_local_name_compare_ex(node, "base", strlen("base"), name_length)
|| dom_local_name_compare_ex(node, "br", strlen("br"), name_length)
|| dom_local_name_compare_ex(node, "col", strlen("col"), name_length)
|| dom_local_name_compare_ex(node, "embed", strlen("embed"), name_length)
|| dom_local_name_compare_ex(node, "hr", strlen("hr"), name_length)
|| dom_local_name_compare_ex(node, "img", strlen("img"), name_length)
|| dom_local_name_compare_ex(node, "input", strlen("input"), name_length)
|| dom_local_name_compare_ex(node, "link", strlen("link"), name_length)
|| dom_local_name_compare_ex(node, "meta", strlen("meta"), name_length)
|| dom_local_name_compare_ex(node, "source", strlen("source"), name_length)
|| dom_local_name_compare_ex(node, "track", strlen("track"), name_length)
|| dom_local_name_compare_ex(node, "wbr", strlen("wbr"), name_length)
/* These are the additional names from https://html.spec.whatwg.org/multipage/parsing.html#serializes-as-void */
|| dom_local_name_compare_ex(node, "basefont", strlen("basefont"), name_length)
|| dom_local_name_compare_ex(node, "bgsound", strlen("bgsound"), name_length)
|| dom_local_name_compare_ex(node, "frame", strlen("frame"), name_length)
|| dom_local_name_compare_ex(node, "keygen", strlen("keygen"), name_length)
|| dom_local_name_compare_ex(node, "param", strlen("param"), name_length)) {
return true;
}
}
return false;
}
static zend_result dom_html5_serialize_element_end(dom_html5_serialize_context *ctx, const xmlNode *node)
{
if (!dom_html5_serializes_as_void(node)) {
TRY(ctx->write_string_len(ctx->application_data, "</", strlen("</")));
TRY(dom_html5_serialize_element_tag_name(ctx, node));
return ctx->write_string_len(ctx->application_data, ">", strlen(">"));
}
return SUCCESS;
}
/* https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm */
static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, const xmlNode *node, const xmlNode *bound)
{
while (node != NULL) {
switch (node->type) {
case XML_DTD_NODE: {
TRY(dom_html5_serialize_doctype(ctx, (const xmlDtd *) node));
break;
}
case XML_CDATA_SECTION_NODE:
case XML_TEXT_NODE: {
TRY(dom_html5_serialize_text_node(ctx, node));
break;
}
case XML_PI_NODE: {
TRY(dom_html5_serialize_processing_instruction(ctx, node));
break;
}
case XML_COMMENT_NODE: {
TRY(dom_html5_serialize_comment(ctx, node));
break;
}
case XML_ELEMENT_NODE: {
TRY(dom_html5_serialize_element_start(ctx, node));
if (node->children) {
if (!dom_html5_serializes_as_void(node)) {
node = node->children;
continue;
}
} else {
/* Not descended, so wouldn't put the closing tag as it's normally only done when going back upwards. */
TRY(dom_html5_serialize_element_end(ctx, node));
}
break;
}
default:
break;
}
if (node->next) {
node = node->next;
} else {
/* Go upwards, until we find a parent node with a next sibling, or until we hit the bound. */
do {
node = node->parent;
if (node == bound) {
return SUCCESS;
}
if (node->type == XML_ELEMENT_NODE) {
TRY(dom_html5_serialize_element_end(ctx, node));
}
} while (node->next == NULL);
node = node->next;
}
}
return SUCCESS;
}
/* https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments (Date 2023-10-18)
* Note: this serializes the _children_, excluding the node itself! */
zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node)
{
/* Step 1. Note that this algorithm serializes children. Only elements, documents, and fragments can have children. */
if (node->type != XML_ELEMENT_NODE
&& node->type != XML_DOCUMENT_FRAG_NODE
&& node->type != XML_DOCUMENT_NODE
&& node->type != XML_HTML_DOCUMENT_NODE) {
return SUCCESS;
}
if (node->type == XML_ELEMENT_NODE && dom_html5_serializes_as_void(node)) {
return SUCCESS;
}
/* Step 2 not needed because we're not using a string to store the serialized data */
/* Step 3 not needed because we don't support template contents yet */
/* Step 4 */
return dom_html5_serialize_node(ctx, node->children, node);
}
#endif /* HAVE_LIBXML && HAVE_DOM */

View file

@ -0,0 +1,31 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifndef HTML5_SERIALIZER_H
#define HTML5_SERIALIZER_H
#include <Zend/zend_types.h>
#include <libxml/tree.h>
typedef struct {
zend_result (*write_string)(void *application_data, const char *buf);
zend_result (*write_string_len)(void *application_data, const char *buf, size_t len);
void *application_data;
} dom_html5_serialize_context;
zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node);
#endif

1330
ext/dom/html_document.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,54 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
bool dom_ns_is_also_an_attribute(const xmlNs *ns) {
return ns->_private != NULL;
}
void dom_ns_compat_mark_attribute(xmlNsPtr ns) {
ns->_private = (void *) 1;
}
void dom_ns_compat_mark_attribute_list(xmlNsPtr ns) {
while (ns != NULL) {
dom_ns_compat_mark_attribute(ns);
ns = ns->next;
}
}
void dom_ns_compat_copy_attribute_list_mark(xmlNsPtr copy, const xmlNs *original) {
/* It's possible that the original list is shorter than the copy list
* because of additional namespace copies from within a fragment. */
while (original != NULL) {
ZEND_ASSERT(copy != NULL);
if (dom_ns_is_also_an_attribute(original)) {
dom_ns_compat_mark_attribute(copy);
}
copy = copy->next;
original = original->next;
}
}
#endif /* HAVE_LIBXML && HAVE_DOM */

View file

@ -0,0 +1,39 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifndef NAMESPACE_COMPAT_H
#define NAMESPACE_COMPAT_H
#include <libxml/tree.h>
/* https://infra.spec.whatwg.org/#namespaces */
#define DOM_XHTML_NS_URI "http://www.w3.org/1999/xhtml"
#define DOM_MATHML_NS_URI "http://www.w3.org/1998/Math/MathML"
#define DOM_SVG_NS_URI "http://www.w3.org/2000/svg"
#define DOM_XLINK_NS_URI "http://www.w3.org/1999/xlink"
#define DOM_XML_NS_URI "http://www.w3.org/XML/1998/namespace"
#define DOM_XMLNS_NS_URI "http://www.w3.org/2000/xmlns/"
/* These functions make it possible to make a namespace declaration also visible as an attribute by
* setting a flag that can be checked with dom_ns_is_also_an_attribute().
* This is used in the serializer for example. */
bool dom_ns_is_also_an_attribute(const xmlNs *ns);
void dom_ns_compat_mark_attribute(xmlNsPtr ns);
void dom_ns_compat_mark_attribute_list(xmlNsPtr ns);
void dom_ns_compat_copy_attribute_list_mark(xmlNsPtr copy, const xmlNs *original);
#endif

View file

@ -1329,40 +1329,12 @@ PHP_METHOD(DOMNode, cloneNode)
DOM_GET_OBJ(n, id, xmlNodePtr, intern); DOM_GET_OBJ(n, id, xmlNodePtr, intern);
node = xmlDocCopyNode(n, n->doc, recursive); node = dom_clone_node(n, n->doc, intern, recursive);
if (!node) { if (!node) {
RETURN_FALSE; RETURN_FALSE;
} }
/* When deep is false Element nodes still require the attributes
Following taken from libxml as xmlDocCopyNode doesn't do this */
if (n->type == XML_ELEMENT_NODE && recursive == 0) {
if (n->nsDef != NULL) {
node->nsDef = xmlCopyNamespaceList(n->nsDef);
}
if (n->ns != NULL) {
xmlNsPtr ns;
ns = xmlSearchNs(n->doc, node, n->ns->prefix);
if (ns == NULL) {
ns = xmlSearchNs(n->doc, n, n->ns->prefix);
if (ns != NULL) {
xmlNodePtr root = node;
while (root->parent != NULL) {
root = root->parent;
}
node->ns = xmlNewNs(root, ns->href, ns->prefix);
}
} else {
node->ns = ns;
}
}
if (n->properties != NULL) {
node->properties = xmlCopyPropList(node, n->properties);
}
}
if (node->type == XML_ATTRIBUTE_NODE && n->ns != NULL && node->ns == NULL) { if (node->type == XML_ATTRIBUTE_NODE && n->ns != NULL && node->ns == NULL) {
/* Let reconciliation deal with this. The lifetime of the namespace poses no problem /* Let reconciliation deal with this. The lifetime of the namespace poses no problem
* because we're increasing the refcount of the document proxy at the return. * because we're increasing the refcount of the document proxy at the return.

View file

@ -27,6 +27,8 @@
#include "php_dom_arginfo.h" #include "php_dom_arginfo.h"
#include "dom_properties.h" #include "dom_properties.h"
#include "zend_interfaces.h" #include "zend_interfaces.h"
#include "lexbor/lexbor/core/types.h"
#include "lexbor/lexbor/core/lexbor.h"
#include "ext/standard/info.h" #include "ext/standard/info.h"
#define PHP_XPATH 1 #define PHP_XPATH 1
@ -40,6 +42,8 @@ PHP_DOM_EXPORT zend_class_entry *dom_childnode_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_html_document_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_xml_document_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry;
@ -53,6 +57,7 @@ PHP_DOM_EXPORT zend_class_entry *dom_notation_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_entity_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_entity_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_entityreference_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_entityreference_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_processinginstruction_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_processinginstruction_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_abstract_base_document_class_entry;
#ifdef LIBXML_XPATH_ENABLED #ifdef LIBXML_XPATH_ENABLED
PHP_DOM_EXPORT zend_class_entry *dom_xpath_class_entry; PHP_DOM_EXPORT zend_class_entry *dom_xpath_class_entry;
#endif #endif
@ -70,6 +75,8 @@ zend_object_handlers dom_xpath_object_handlers;
static HashTable classes; static HashTable classes;
/* {{{ prop handler tables */ /* {{{ prop handler tables */
static HashTable dom_document_prop_handlers; static HashTable dom_document_prop_handlers;
static HashTable dom_xml_document_prop_handlers;
static HashTable dom_html_document_prop_handlers;
static HashTable dom_documentfragment_prop_handlers; static HashTable dom_documentfragment_prop_handlers;
static HashTable dom_node_prop_handlers; static HashTable dom_node_prop_handlers;
static HashTable dom_nodelist_prop_handlers; static HashTable dom_nodelist_prop_handlers;
@ -206,6 +213,7 @@ static void dom_copy_doc_props(php_libxml_ref_obj *source_doc, php_libxml_ref_ob
zend_hash_copy(dest->classmap, source->classmap, NULL); zend_hash_copy(dest->classmap, source->classmap, NULL);
} }
dest_doc->is_modern_api_class = source_doc->is_modern_api_class;
} }
} }
@ -490,6 +498,12 @@ static void dom_update_refcount_after_clone(dom_object *original, xmlNodePtr ori
php_libxml_increment_node_ptr((php_libxml_node_object *)clone, cloned_node, (void *)clone); php_libxml_increment_node_ptr((php_libxml_node_object *)clone, cloned_node, (void *)clone);
if (original->document != clone->document) { if (original->document != clone->document) {
dom_copy_doc_props(original->document, clone->document); dom_copy_doc_props(original->document, clone->document);
/* Workaround libxml2 bug, see https://gitlab.gnome.org/GNOME/libxml2/-/commit/07920b4381873187c02df53fa9b5d44aff3a7041 */
#if LIBXML_VERSION < 20911
if (original_node->type == XML_HTML_DOCUMENT_NODE) {
cloned_node->type = XML_HTML_DOCUMENT_NODE;
}
#endif
} }
} }
@ -586,6 +600,38 @@ static zend_object *dom_objects_store_clone_obj(zend_object *zobject);
void dom_xpath_objects_free_storage(zend_object *object); void dom_xpath_objects_free_storage(zend_object *object);
#endif #endif
static void *dom_malloc(size_t size) {
return emalloc(size);
}
static void *dom_realloc(void *dst, size_t size) {
return erealloc(dst, size);
}
static void *dom_calloc(size_t num, size_t size) {
return ecalloc(num, size);
}
static void dom_free(void *ptr) {
efree(ptr);
}
static void register_nondeprecated_xml_props(HashTable *table)
{
dom_register_prop_handler(table, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_document_encoding_write);
dom_register_prop_handler(table, "xmlEncoding", sizeof("xmlEncoding")-1, dom_document_encoding_read, NULL);
dom_register_prop_handler(table, "standalone", sizeof("standalone")-1, dom_document_standalone_read, dom_document_standalone_write);
dom_register_prop_handler(table, "xmlStandalone", sizeof("xmlStandalone")-1, dom_document_standalone_read, dom_document_standalone_write);
dom_register_prop_handler(table, "version", sizeof("version")-1, dom_document_version_read, dom_document_version_write);
dom_register_prop_handler(table, "xmlVersion", sizeof("xmlVersion")-1, dom_document_version_read, dom_document_version_write);
dom_register_prop_handler(table, "formatOutput", sizeof("formatOutput")-1, dom_document_format_output_read, dom_document_format_output_write);
dom_register_prop_handler(table, "validateOnParse", sizeof("validateOnParse")-1, dom_document_validate_on_parse_read, dom_document_validate_on_parse_write);
dom_register_prop_handler(table, "resolveExternals", sizeof("resolveExternals")-1, dom_document_resolve_externals_read, dom_document_resolve_externals_write);
dom_register_prop_handler(table, "preserveWhiteSpace", sizeof("preserveWhitespace")-1, dom_document_preserve_whitespace_read, dom_document_preserve_whitespace_write);
dom_register_prop_handler(table, "recover", sizeof("recover")-1, dom_document_recover_read, dom_document_recover_write);
dom_register_prop_handler(table, "substituteEntities", sizeof("substituteEntities")-1, dom_document_substitue_entities_read, dom_document_substitue_entities_write);
}
/* {{{ PHP_MINIT_FUNCTION(dom) */ /* {{{ PHP_MINIT_FUNCTION(dom) */
PHP_MINIT_FUNCTION(dom) PHP_MINIT_FUNCTION(dom)
{ {
@ -675,36 +721,49 @@ PHP_MINIT_FUNCTION(dom)
zend_hash_merge(&dom_documentfragment_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, 0); zend_hash_merge(&dom_documentfragment_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_documentfragment_class_entry->name, &dom_documentfragment_prop_handlers); zend_hash_add_ptr(&classes, dom_documentfragment_class_entry->name, &dom_documentfragment_prop_handlers);
dom_document_class_entry = register_class_DOMDocument(dom_node_class_entry, dom_parentnode_class_entry); dom_abstract_base_document_class_entry = register_class_DOM_Document(dom_node_class_entry, dom_parentnode_class_entry);
/* No need to set create_object as it's abstract. */
HashTable dom_abstract_base_document_prop_handlers;
zend_hash_init(&dom_abstract_base_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "doctype", sizeof("doctype")-1, dom_document_doctype_read, NULL);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "documentElement", sizeof("documentElement")-1, dom_document_document_element_read, NULL);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "strictErrorChecking", sizeof("strictErrorChecking")-1, dom_document_strict_error_checking_read, dom_document_strict_error_checking_write);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "documentURI", sizeof("documentURI")-1, dom_document_document_uri_read, dom_document_document_uri_write);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "firstElementChild", sizeof("firstElementChild")-1, dom_parent_node_first_element_child_read, NULL);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "lastElementChild", sizeof("lastElementChild")-1, dom_parent_node_last_element_child_read, NULL);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "childElementCount", sizeof("childElementCount")-1, dom_parent_node_child_element_count, NULL);
zend_hash_merge(&dom_abstract_base_document_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, 0);
/* No need to register in &classes, because this is only used for merging. This is destroyed down below. */
dom_document_class_entry = register_class_DOMDocument(dom_abstract_base_document_class_entry);
dom_document_class_entry->create_object = dom_objects_new; dom_document_class_entry->create_object = dom_objects_new;
zend_hash_init(&dom_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1); zend_hash_init(&dom_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
dom_register_prop_handler(&dom_document_prop_handlers, "doctype", sizeof("doctype")-1, dom_document_doctype_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "implementation", sizeof("implementation")-1, dom_document_implementation_read, NULL); dom_register_prop_handler(&dom_document_prop_handlers, "implementation", sizeof("implementation")-1, dom_document_implementation_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "documentElement", sizeof("documentElement")-1, dom_document_document_element_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "actualEncoding", sizeof("actualEncoding")-1, dom_document_encoding_read, NULL); dom_register_prop_handler(&dom_document_prop_handlers, "actualEncoding", sizeof("actualEncoding")-1, dom_document_encoding_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_document_encoding_write);
dom_register_prop_handler(&dom_document_prop_handlers, "xmlEncoding", sizeof("xmlEncoding")-1, dom_document_encoding_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "standalone", sizeof("standalone")-1, dom_document_standalone_read, dom_document_standalone_write);
dom_register_prop_handler(&dom_document_prop_handlers, "xmlStandalone", sizeof("xmlStandalone")-1, dom_document_standalone_read, dom_document_standalone_write);
dom_register_prop_handler(&dom_document_prop_handlers, "version", sizeof("version")-1, dom_document_version_read, dom_document_version_write);
dom_register_prop_handler(&dom_document_prop_handlers, "xmlVersion", sizeof("xmlVersion")-1, dom_document_version_read, dom_document_version_write);
dom_register_prop_handler(&dom_document_prop_handlers, "strictErrorChecking", sizeof("strictErrorChecking")-1, dom_document_strict_error_checking_read, dom_document_strict_error_checking_write);
dom_register_prop_handler(&dom_document_prop_handlers, "documentURI", sizeof("documentURI")-1, dom_document_document_uri_read, dom_document_document_uri_write);
dom_register_prop_handler(&dom_document_prop_handlers, "config", sizeof("config")-1, dom_document_config_read, NULL); dom_register_prop_handler(&dom_document_prop_handlers, "config", sizeof("config")-1, dom_document_config_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "formatOutput", sizeof("formatOutput")-1, dom_document_format_output_read, dom_document_format_output_write); register_nondeprecated_xml_props(&dom_document_prop_handlers);
dom_register_prop_handler(&dom_document_prop_handlers, "validateOnParse", sizeof("validateOnParse")-1, dom_document_validate_on_parse_read, dom_document_validate_on_parse_write);
dom_register_prop_handler(&dom_document_prop_handlers, "resolveExternals", sizeof("resolveExternals")-1, dom_document_resolve_externals_read, dom_document_resolve_externals_write);
dom_register_prop_handler(&dom_document_prop_handlers, "preserveWhiteSpace", sizeof("preserveWhitespace")-1, dom_document_preserve_whitespace_read, dom_document_preserve_whitespace_write);
dom_register_prop_handler(&dom_document_prop_handlers, "recover", sizeof("recover")-1, dom_document_recover_read, dom_document_recover_write);
dom_register_prop_handler(&dom_document_prop_handlers, "substituteEntities", sizeof("substituteEntities")-1, dom_document_substitue_entities_read, dom_document_substitue_entities_write);
dom_register_prop_handler(&dom_document_prop_handlers, "firstElementChild", sizeof("firstElementChild")-1, dom_parent_node_first_element_child_read, NULL); zend_hash_merge(&dom_document_prop_handlers, &dom_abstract_base_document_prop_handlers, dom_copy_prop_handler, 0);
dom_register_prop_handler(&dom_document_prop_handlers, "lastElementChild", sizeof("lastElementChild")-1, dom_parent_node_last_element_child_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "childElementCount", sizeof("childElementCount")-1, dom_parent_node_child_element_count, NULL);
zend_hash_merge(&dom_document_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_document_class_entry->name, &dom_document_prop_handlers); zend_hash_add_ptr(&classes, dom_document_class_entry->name, &dom_document_prop_handlers);
dom_html_document_class_entry = register_class_DOM_HTMLDocument(dom_abstract_base_document_class_entry);
dom_document_class_entry->create_object = dom_objects_new;
zend_hash_init(&dom_html_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
dom_register_prop_handler(&dom_html_document_prop_handlers, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_html_document_encoding_write);
zend_hash_merge(&dom_html_document_prop_handlers, &dom_abstract_base_document_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_html_document_class_entry->name, &dom_html_document_prop_handlers);
dom_xml_document_class_entry = register_class_DOM_XMLDocument(dom_abstract_base_document_class_entry);
dom_xml_document_class_entry->create_object = dom_objects_new;
zend_hash_init(&dom_xml_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
register_nondeprecated_xml_props(&dom_xml_document_prop_handlers);
zend_hash_merge(&dom_xml_document_prop_handlers, &dom_abstract_base_document_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_xml_document_class_entry->name, &dom_xml_document_prop_handlers);
zend_hash_destroy(&dom_abstract_base_document_prop_handlers);
dom_nodelist_class_entry = register_class_DOMNodeList(zend_ce_aggregate, zend_ce_countable); dom_nodelist_class_entry = register_class_DOMNodeList(zend_ce_aggregate, zend_ce_countable);
dom_nodelist_class_entry->create_object = dom_nnodemap_objects_new; dom_nodelist_class_entry->create_object = dom_nnodemap_objects_new;
dom_nodelist_class_entry->default_object_handlers = &dom_nodelist_object_handlers; dom_nodelist_class_entry->default_object_handlers = &dom_nodelist_object_handlers;
@ -845,6 +904,8 @@ PHP_MINIT_FUNCTION(dom)
php_libxml_register_export(dom_node_class_entry, php_dom_export_node); php_libxml_register_export(dom_node_class_entry, php_dom_export_node);
lexbor_memory_setup(dom_malloc, dom_realloc, dom_calloc, dom_free);
return SUCCESS; return SUCCESS;
} }
/* }}} */ /* }}} */
@ -876,6 +937,8 @@ PHP_MINFO_FUNCTION(dom)
PHP_MSHUTDOWN_FUNCTION(dom) /* {{{ */ PHP_MSHUTDOWN_FUNCTION(dom) /* {{{ */
{ {
zend_hash_destroy(&dom_document_prop_handlers); zend_hash_destroy(&dom_document_prop_handlers);
zend_hash_destroy(&dom_html_document_prop_handlers);
zend_hash_destroy(&dom_xml_document_prop_handlers);
zend_hash_destroy(&dom_documentfragment_prop_handlers); zend_hash_destroy(&dom_documentfragment_prop_handlers);
zend_hash_destroy(&dom_node_prop_handlers); zend_hash_destroy(&dom_node_prop_handlers);
zend_hash_destroy(&dom_namespace_node_prop_handlers); zend_hash_destroy(&dom_namespace_node_prop_handlers);
@ -1172,9 +1235,21 @@ PHP_DOM_EXPORT bool php_dom_create_object(xmlNodePtr obj, zval *return_value, do
switch (obj->type) { switch (obj->type) {
case XML_DOCUMENT_NODE: case XML_DOCUMENT_NODE:
{
if (domobj && domobj->document->is_modern_api_class) {
ce = dom_xml_document_class_entry;
} else {
ce = dom_document_class_entry;
}
break;
}
case XML_HTML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE:
{ {
if (domobj && domobj->document->is_modern_api_class) {
ce = dom_html_document_class_entry;
} else {
ce = dom_document_class_entry; ce = dom_document_class_entry;
}
break; break;
} }
case XML_DTD_NODE: case XML_DTD_NODE:
@ -1249,20 +1324,27 @@ PHP_DOM_EXPORT bool php_dom_create_object(xmlNodePtr obj, zval *return_value, do
if (domobj && domobj->document) { if (domobj && domobj->document) {
ce = dom_get_doc_classmap(domobj->document, ce); ce = dom_get_doc_classmap(domobj->document, ce);
} }
php_dom_instantiate_object_helper(return_value, ce, obj, domobj);
return 0;
}
/* }}} end php_domobject_new */
dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent)
{
object_init_ex(return_value, ce); object_init_ex(return_value, ce);
intern = Z_DOMOBJ_P(return_value); dom_object *intern = Z_DOMOBJ_P(return_value);
if (obj->doc != NULL) { if (obj->doc != NULL) {
if (domobj != NULL) { if (parent != NULL) {
intern->document = domobj->document; intern->document = parent->document;
} }
php_libxml_increment_doc_ref((php_libxml_node_object *)intern, obj->doc); php_libxml_increment_doc_ref((php_libxml_node_object *)intern, obj->doc);
} }
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, obj, (void *)intern); php_libxml_increment_node_ptr((php_libxml_node_object *)intern, obj, (void *)intern);
return 0;
return intern;
} }
/* }}} end php_domobject_new */
void php_dom_create_implementation(zval *retval) { void php_dom_create_implementation(zval *retval) {
object_init_ex(retval, dom_domimplementation_class_entry); object_init_ex(retval, dom_domimplementation_class_entry);
@ -1819,4 +1901,23 @@ static int dom_nodemap_has_dimension(zend_object *object, zval *member, int chec
return offset >= 0 && offset < php_dom_get_namednodemap_length(php_dom_obj_from_obj(object)); return offset >= 0 && offset < php_dom_get_namednodemap_length(php_dom_obj_from_obj(object));
} /* }}} end dom_nodemap_has_dimension */ } /* }}} end dom_nodemap_has_dimension */
xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, const dom_object *intern, bool recursive)
{
/* See http://www.xmlsoft.org/html/libxml-tree.html#xmlDocCopyNode for meaning of values */
int extended_recursive = recursive;
if (!recursive && node->type == XML_ELEMENT_NODE) {
extended_recursive = 2;
}
xmlNodePtr copy = xmlDocCopyNode(node, doc, extended_recursive);
if (UNEXPECTED(!copy)) {
return NULL;
}
if (intern->document && intern->document->is_modern_api_class) {
dom_mark_namespaces_for_copy_based_on_copy(copy, node);
}
return copy;
}
#endif /* HAVE_DOM */ #endif /* HAVE_DOM */

View file

@ -114,6 +114,8 @@ static inline dom_object_namespace_node *php_dom_namespace_node_obj_from_obj(zen
#include "domexception.h" #include "domexception.h"
#define DOM_HTML_NO_DEFAULT_NS (1U << 31)
dom_object *dom_object_get_data(xmlNodePtr obj); dom_object *dom_object_get_data(xmlNodePtr obj);
dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document); dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document);
libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document); libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document);
@ -154,6 +156,17 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x
xmlNsPtr dom_get_ns_resolve_prefix_conflict(xmlNodePtr tree, const char *uri); xmlNsPtr dom_get_ns_resolve_prefix_conflict(xmlNodePtr tree, const char *uri);
void php_dom_reconcile_attribute_namespace_after_insertion(xmlAttrPtr attrp); void php_dom_reconcile_attribute_namespace_after_insertion(xmlAttrPtr attrp);
void php_dom_document_constructor(INTERNAL_FUNCTION_PARAMETERS);
dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent);
typedef enum {
DOM_LOAD_STRING = 0,
DOM_LOAD_FILE = 1,
} dom_load_mode;
xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding);
/* parentnode */ /* parentnode */
void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc); void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc);
void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc); void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc);
@ -174,6 +187,9 @@ void php_dom_nodelist_get_item_into_zval(dom_nnodemap_object *objmap, zend_long
int php_dom_get_namednodemap_length(dom_object *obj); int php_dom_get_namednodemap_length(dom_object *obj);
int php_dom_get_nodelist_length(dom_object *obj); int php_dom_get_nodelist_length(dom_object *obj);
xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, const dom_object *intern, bool recursive);
void dom_mark_namespaces_for_copy_based_on_copy(xmlNodePtr copy, const xmlNode *original);
#define DOM_GET_INTERN(__id, __intern) { \ #define DOM_GET_INTERN(__id, __intern) { \
__intern = Z_DOMOBJ_P(__id); \ __intern = Z_DOMOBJ_P(__id); \
if (UNEXPECTED(__intern->ptr == NULL)) { \ if (UNEXPECTED(__intern->ptr == NULL)) { \

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -141,7 +141,7 @@ string(27) "<?xml version="1.0"?>
Not Supported Error Not Supported Error
-- Adopt a document (strict error off) -- -- Adopt a document (strict error off) --
Warning: DOMDocument::adoptNode(): Not Supported Error in %s on line %d Warning: DOM\Document::adoptNode(): Not Supported Error in %s on line %d
-- Adopt an attribute -- -- Adopt an attribute --
bool(true) bool(true)
bool(true) bool(true)

View file

@ -23,5 +23,5 @@ var_dump($dom->load(str_repeat(" ", PHP_MAXPATHLEN + 1)));
?> ?>
--EXPECT-- --EXPECT--
DOMDocument::load(): Argument #1 ($filename) must not be empty DOMDocument::load(): Argument #1 ($filename) must not be empty
Path to document must not contain any null bytes DOMDocument::load(): Argument #1 ($filename) must not contain any null bytes
bool(false) bool(false)

View file

@ -35,5 +35,5 @@ var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::relaxNGValidateSource(): Did not expect element pear there in %s on line %d Warning: DOM\Document::relaxNGValidateSource(): Did not expect element pear there in %s on line %d
bool(false) bool(false)

View file

@ -31,7 +31,7 @@ var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::relaxNGValidateSource(): xmlRelaxNGParseElement: element has no content in %s on line %d Warning: DOM\Document::relaxNGValidateSource(): xmlRelaxNGParseElement: element has no content in %s on line %d
Warning: DOMDocument::relaxNGValidateSource(): Invalid RelaxNG in %s on line %d Warning: DOM\Document::relaxNGValidateSource(): Invalid RelaxNG in %s on line %d
bool(false) bool(false)

View file

@ -20,5 +20,5 @@ $result = $doc->relaxNGValidate($rng);
var_dump($result); var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::relaxNGValidate(): Did not expect element pear there in %s on line %d Warning: DOM\Document::relaxNGValidate(): Did not expect element pear there in %s on line %d
bool(false) bool(false)

View file

@ -20,9 +20,9 @@ $result = $doc->relaxNGValidate($rng);
var_dump($result); var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::relaxNGValidate(): I/O warning : failed to load external entity "%s/foo.rng" in %s on line %d Warning: DOM\Document::relaxNGValidate(): I/O warning : failed to load external entity "%s/foo.rng" in %s on line %d
Warning: DOMDocument::relaxNGValidate(): xmlRelaxNGParse: could not load %s/foo.rng in %s on line %d Warning: DOM\Document::relaxNGValidate(): xmlRelaxNGParse: could not load %s/foo.rng in %s on line %d
Warning: DOMDocument::relaxNGValidate(): Invalid RelaxNG in %s on line %d Warning: DOM\Document::relaxNGValidate(): Invalid RelaxNG in %s on line %d
bool(false) bool(false)

View file

@ -17,13 +17,13 @@ var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::schemaValidateSource(): Entity: line 1: parser error : Start tag expected, '<' not found in %s.php on line %d Warning: DOM\Document::schemaValidateSource(): Entity: line 1: parser error : Start tag expected, '<' not found in %s on line %d
Warning: DOMDocument::schemaValidateSource(): string that is not a schema in %s.php on line %d Warning: DOM\Document::schemaValidateSource(): string that is not a schema in %s on line %d
Warning: DOMDocument::schemaValidateSource(): ^ in %s.php on line %d Warning: DOM\Document::schemaValidateSource(): ^ in %s on line %d
Warning: DOMDocument::schemaValidateSource(): Failed to parse the XML resource 'in_memory_buffer'. in %s.php on line %d Warning: DOM\Document::schemaValidateSource(): Failed to parse the XML resource 'in_memory_buffer'. in %s on line %d
Warning: DOMDocument::schemaValidateSource(): Invalid Schema in %s.php on line %d Warning: DOM\Document::schemaValidateSource(): Invalid Schema in %s on line %d
bool(false) bool(false)

View file

@ -19,5 +19,5 @@ var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::schemaValidateSource(): Element 'books': No matching global declaration available for the validation root. in %s.php on line %d Warning: DOM\Document::schemaValidateSource(): Element 'books': No matching global declaration available for the validation root. in %s on line %d
bool(false) bool(false)

View file

@ -20,4 +20,4 @@ try {
?> ?>
--EXPECT-- --EXPECT--
DOMDocument::schemaValidateSource(): Argument #1 ($source) must not be empty DOM\Document::schemaValidateSource(): Argument #1 ($source) must not be empty

View file

@ -17,13 +17,13 @@ var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::schemaValidate(): %sbook-not-a-schema.xsd:1: parser error : Start tag expected, '<' not found in %s.php on line %d Warning: DOM\Document::schemaValidate(): %s/book-not-a-schema.xsd:1: parser error : Start tag expected, '<' not found in %s on line %d
Warning: DOMDocument::schemaValidate(): Let's see what happens upon parsing a file that doesn't contain a schema. in %s.php on line %d Warning: DOM\Document::schemaValidate(): Let's see what happens upon parsing a file that doesn't contain a schema. in %s on line %d
Warning: DOMDocument::schemaValidate(): ^ in %s.php on line %d Warning: DOM\Document::schemaValidate(): ^ in %s on line %d
Warning: DOMDocument::schemaValidate(): Failed to parse the XML resource '%sbook-not-a-schema.xsd'. in %s.php on line %d Warning: DOM\Document::schemaValidate(): Failed to parse the XML resource '%s/book-not-a-schema.xsd'. in %s on line %d
Warning: DOMDocument::schemaValidate(): Invalid Schema in %s.php on line %d Warning: DOM\Document::schemaValidate(): Invalid Schema in %s on line %d
bool(false) bool(false)

View file

@ -17,5 +17,5 @@ var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::schemaValidate(): Element 'books': No matching global declaration available for the validation root. in %s.php on line %d Warning: DOM\Document::schemaValidate(): Element 'books': No matching global declaration available for the validation root. in %s on line %d
bool(false) bool(false)

View file

@ -20,4 +20,4 @@ try {
?> ?>
--EXPECT-- --EXPECT--
DOMDocument::schemaValidate(): Argument #1 ($filename) must not be empty DOM\Document::schemaValidate(): Argument #1 ($filename) must not be empty

View file

@ -17,9 +17,9 @@ var_dump($result);
?> ?>
--EXPECTF-- --EXPECTF--
Warning: DOMDocument::schemaValidate(): I/O warning : failed to load external entity "%snon-existent-file" in %s.php on line %d Warning: DOM\Document::schemaValidate(): I/O warning : failed to load external entity "%s/non-existent-file" in %s on line %d
Warning: DOMDocument::schemaValidate(): Failed to locate the main schema resource at '%s/non-existent-file'. in %s.php on line %d Warning: DOM\Document::schemaValidate(): Failed to locate the main schema resource at '%s/non-existent-file'. in %s on line %d
Warning: DOMDocument::schemaValidate(): Invalid Schema in %s.php on line %d Warning: DOM\Document::schemaValidate(): Invalid Schema in %s on line %d
bool(false) bool(false)

View file

@ -19,7 +19,7 @@ var_dump($doc->schemaValidate(str_repeat(" ", PHP_MAXPATHLEN + 1)));
?> ?>
--EXPECTF-- --EXPECTF--
DOMDocument::schemaValidate(): Argument #1 ($filename) must not contain any null bytes DOM\Document::schemaValidate(): Argument #1 ($filename) must not contain any null bytes
Warning: DOMDocument::schemaValidate(): Invalid Schema file source in %s on line %d Warning: DOM\Document::schemaValidate(): Invalid Schema file source in %s on line %d
bool(false) bool(false)

View file

@ -56,4 +56,4 @@ See if strictErrorChecking is off
bool(false) bool(false)
Should raise PHP error because strictErrorChecking is off Should raise PHP error because strictErrorChecking is off
Warning: DOMDocument::createAttribute(): Invalid Character Error in %sDOMDocument_strictErrorChecking_variation.php on line %d Warning: DOM\Document::createAttribute(): Invalid Character Error in %s on line %d

View file

@ -20,14 +20,12 @@ object(DOMDocument)#1 (41) {
["dynamicProperty"]=> ["dynamicProperty"]=>
object(stdClass)#2 (0) { object(stdClass)#2 (0) {
} }
["doctype"]=>
NULL
["implementation"]=> ["implementation"]=>
string(22) "(object value omitted)" string(22) "(object value omitted)"
["documentElement"]=>
string(22) "(object value omitted)"
["actualEncoding"]=> ["actualEncoding"]=>
NULL NULL
["config"]=>
NULL
["encoding"]=> ["encoding"]=>
NULL NULL
["xmlEncoding"]=> ["xmlEncoding"]=>
@ -40,12 +38,6 @@ object(DOMDocument)#1 (41) {
string(3) "1.0" string(3) "1.0"
["xmlVersion"]=> ["xmlVersion"]=>
string(3) "1.0" string(3) "1.0"
["strictErrorChecking"]=>
bool(true)
["documentURI"]=>
string(%d) %s
["config"]=>
NULL
["formatOutput"]=> ["formatOutput"]=>
bool(false) bool(false)
["validateOnParse"]=> ["validateOnParse"]=>
@ -58,6 +50,14 @@ object(DOMDocument)#1 (41) {
bool(false) bool(false)
["substituteEntities"]=> ["substituteEntities"]=>
bool(false) bool(false)
["doctype"]=>
NULL
["documentElement"]=>
string(22) "(object value omitted)"
["strictErrorChecking"]=>
bool(true)
["documentURI"]=>
string(%d) "%s"
["firstElementChild"]=> ["firstElementChild"]=>
string(22) "(object value omitted)" string(22) "(object value omitted)"
["lastElementChild"]=> ["lastElementChild"]=>

View file

@ -0,0 +1,36 @@
--TEST--
DOM\HTMLDocument GB18030 encoding test
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/gb18030.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
$output = $dom->saveHTML();
echo $output, "\n";
$dom->saveHTMLFile(__DIR__ . "/gb18030_output.tmp");
var_dump(file_get_contents(__DIR__ . "/gb18030_output.tmp") === $output);
echo "--- After changing encoding to UTF-8 ---\n";
$dom->encoding = "UTF-8";
echo $dom->saveHTML(), "\n";
?>
--CLEAN--
<?php
@unlink(__DIR__ . "/gb18030_output.tmp");
?>
--EXPECT--
string(7) "gb18030"
<!DOCTYPE html><html><head>
<meta charset="gb18030">
</head>
<body>¨¦</body></html>
bool(true)
--- After changing encoding to UTF-8 ---
<!DOCTYPE html><html><head>
<meta charset="gb18030">
</head>
<body>é</body></html>

View file

@ -0,0 +1,40 @@
--TEST--
DOM\HTMLDocument Shift JIS encoding test
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/shift_jis.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent .= "é";
$output = $dom->saveHTML();
echo $output, "\n";
$dom->saveHTMLFile(__DIR__ . "/shift_jis.tmp");
var_dump(file_get_contents(__DIR__ . "/shift_jis.tmp") === $output);
echo "--- After changing encoding to UTF-8 ---\n";
$dom->encoding = "UTF-8";
echo $dom->saveHTML(), "\n";
?>
--CLEAN--
<?php
@unlink(__DIR__ . "/shift_jis.tmp");
?>
--EXPECT--
string(9) "Shift_JIS"
<!DOCTYPE html><html><head>
<meta charset="shift_jis">
</head>
<body>
‚â‚ 
?</body></html>
bool(true)
--- After changing encoding to UTF-8 ---
<!DOCTYPE html><html><head>
<meta charset="shift_jis">
</head>
<body>
ã„ã<E2809E>
é</body></html>

View file

@ -0,0 +1,38 @@
--TEST--
DOM\HTMLDocument UTF-16BE BOM encoding test
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/utf16be_bom.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
$output = $dom->saveHTML();
echo $output, "\n";
$dom->saveHTMLFile(__DIR__ . "/utf16be_bom_output.tmp");
var_dump(file_get_contents(__DIR__ . "/utf16be_bom_output.tmp") === $output);
echo "--- After changing encoding to UTF-8 ---\n";
$dom->encoding = "UTF-8";
echo $dom->saveHTML(), "\n";
?>
--CLEAN--
<?php
@unlink(__DIR__ . "/utf16be_bom_output.tmp");
?>
--EXPECTF--
string(8) "UTF-16BE"
%0<%0!%0D%0O%0C%0T%0Y%0P%0E%0 %0h%0t%0m%0l%0>%0<%0h%0t%0m%0l%0>%0<%0h%0e%0a%0d%0>%0
%0<%0!%0-%0-%0 %0i%0n%0t%0e%0n%0t%0i%0o%0n%0a%0l%0 %0l%0i%0e%0s%0 %0a%0n%0d%0 %0d%0e%0c%0e%0i%0t%0 %0-%0-%0>%0
%0<%0m%0e%0t%0a%0 %0c%0h%0a%0r%0s%0e%0t%0=%0"%0u%0t%0f%0-%08%0"%0>%0
%0<%0/%0h%0e%0a%0d%0>%0
%0<%0b%0o%0d%0y%0>%0é%0<%0/%0b%0o%0d%0y%0>%0<%0/%0h%0t%0m%0l%0>
bool(true)
--- After changing encoding to UTF-8 ---
<!DOCTYPE html><html><head>
<!-- intentional lies and deceit -->
<meta charset="utf-8">
</head>
<body>é</body></html>

View file

@ -0,0 +1,38 @@
--TEST--
DOM\HTMLDocument UTF-16LE BOM encoding test
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/utf16le_bom.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
$output = $dom->saveHTML();
echo $output, "\n";
$dom->saveHTMLFile(__DIR__ . "/utf16le_bom_output.tmp");
var_dump(file_get_contents(__DIR__ . "/utf16le_bom_output.tmp") === $output);
echo "--- After changing encoding to UTF-8 ---\n";
$dom->encoding = "UTF-8";
echo $dom->saveHTML(), "\n";
?>
--CLEAN--
<?php
@unlink(__DIR__ . "/utf16le_bom_output.tmp");
?>
--EXPECTF--
string(8) "UTF-16LE"
<%0!%0D%0O%0C%0T%0Y%0P%0E%0 %0h%0t%0m%0l%0>%0<%0h%0t%0m%0l%0>%0<%0h%0e%0a%0d%0>%0
%0<%0!%0-%0-%0 %0i%0n%0t%0e%0n%0t%0i%0o%0n%0a%0l%0 %0l%0i%0e%0s%0 %0a%0n%0d%0 %0d%0e%0c%0e%0i%0t%0 %0-%0-%0>%0
%0<%0m%0e%0t%0a%0 %0c%0h%0a%0r%0s%0e%0t%0=%0"%0u%0t%0f%0-%08%0"%0>%0
%0<%0/%0h%0e%0a%0d%0>%0
%0<%0b%0o%0d%0y%0>%0é%0<%0/%0b%0o%0d%0y%0>%0<%0/%0h%0t%0m%0l%0>%0
bool(true)
--- After changing encoding to UTF-8 ---
<!DOCTYPE html><html><head>
<!-- intentional lies and deceit -->
<meta charset="utf-8">
</head>
<body>é</body></html>

View file

@ -0,0 +1,38 @@
--TEST--
DOM\HTMLDocument UTF-8 BOM encoding test
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/utf8_bom.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
$output = $dom->saveHTML();
echo $output, "\n";
$dom->saveHTMLFile(__DIR__ . "/utf8_bom_output.tmp");
var_dump(file_get_contents(__DIR__ . "/utf8_bom_output.tmp") === $output);
echo "--- After changing encoding to UTF-8 ---\n";
$dom->encoding = "UTF-8";
echo $dom->saveHTML(), "\n";
?>
--CLEAN--
<?php
@unlink(__DIR__ . "/utf8_bom_output.tmp");
?>
--EXPECT--
string(5) "UTF-8"
<!DOCTYPE html><html><head>
<!-- intentional lies and deceit -->
<meta charset="utf-16">
</head>
<body>é</body></html>
bool(true)
--- After changing encoding to UTF-8 ---
<!DOCTYPE html><html><head>
<!-- intentional lies and deceit -->
<meta charset="utf-16">
</head>
<body>é</body></html>

View file

@ -0,0 +1,40 @@
--TEST--
DOM\HTMLDocument Windows-1251 encoding test
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/windows1251.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent .= "é"; // Note: won't show up in Windows 1251 because it doesn't exist there
$output = $dom->saveHTML();
echo $output, "\n";
$dom->saveHTMLFile(__DIR__ . "/windows1251_output.tmp");
var_dump(file_get_contents(__DIR__ . "/windows1251_output.tmp") === $output);
echo "--- After changing encoding to UTF-8 ---\n";
$dom->encoding = "UTF-8";
echo $dom->saveHTML(), "\n";
?>
--CLEAN--
<?php
@unlink(__DIR__ . "/windows1251_output.tmp");
?>
--EXPECT--
string(12) "windows-1251"
<!DOCTYPE html><html><head>
<meta charset="windows-1251">
</head>
<body>
A ô B á C
?</body></html>
bool(true)
--- After changing encoding to UTF-8 ---
<!DOCTYPE html><html><head>
<meta charset="windows-1251">
</head>
<body>
A ф B б C
é</body></html>

View file

@ -0,0 +1,96 @@
--TEST--
DOM\HTMLDocument::createFromFile() HTTP header Content-Type
--EXTENSIONS--
dom
--SKIPIF--
<?php
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
http_server_skipif();
?>
--FILE--
<?php
require "./ext/standard/tests/http/server.inc";
$tests = [
"Invalid type/subtype" => [
"/html; Charset=\"ISO-8859-1\"",
"text/; Charset=\"ISO-8859-1\"",
"tex°t/html; Charset=\"ISO-8859-1\"",
"/; Charset=\"ISO-8859-1\"",
"$/€; Charset=\"ISO-8859-1\"",
"; Charset=\"ISO-8859-1\"",
";",
"",
" \t",
],
"Valid type/subtype without charset" => [
"text/html; x=ISO-8859-1",
"text/html; x=\"ISO-8859-1\"",
"text/html; charet=\"ISO-8859-1\"",
"text/html; chars et=\"ISO-8859-1\"",
],
"All valid inputs" => [
"text/html; charset=ISO-8859-1",
"\t\r text/html; charset=ISO-8859-1 \t",
"text/html; foo=bar;charset=ISO-8859-1",
"text/html; foo=bar;charset=ISO-8859-1;bar=\"foooooo\"",
"text/html;;;; charset=ISO-8859-1",
"text/html; Charset=\"ISO-8859-1\"",
"text/html; Charset=\"ISO\\-8859-1\"",
"text/html; ;; ; ;; Charset=\"ISO-8859-1\"",
"text/html;Charset=\"ISO-8859-1",
"tex.t/h#\$%!&'*%2B-.^_`|~tml;Charset=\"ISO-8859-1\"", // Note: have to encode + as 2B because of implementation details of http_server()
],
"Valid input, but invalid encoding name" => [
"text/html;Charset=\"ISO-8859-1\\",
"text/html;Charset=\"ISO-8859-1\\\"",
"text/html;Charset=\"foobar\\\"",
"text/html;Charset=\"\\\"",
"text/html;Charset=",
],
];
foreach ($tests as $name => $headers) {
echo "--- $name ---\n";
$responses = array_map(fn ($header) => "data://text/plain,HTTP/1.1 200 OK\r\nContent-Type: " . $header . "\r\n\r\n" . "<p>\xE4\xF6\xFC</p>\n", $headers);
['pid' => $pid, 'uri' => $uri] = http_server($responses);
for ($i = 0; $i < count($responses); $i++) {
$result = DOM\HTMLDocument::createFromFile($uri, LIBXML_NOERROR);
echo $result->textContent;
}
http_server_kill($pid);
}
?>
--EXPECT--
--- Invalid type/subtype ---
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
--- Valid type/subtype without charset ---
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
--- All valid inputs ---
äöü
äöü
äöü
äöü
äöü
äöü
äöü
äöü
äöü
äöü
--- Valid input, but invalid encoding name ---
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD>

View file

@ -0,0 +1,33 @@
--TEST--
DOM\HTMLDocument::createFromFile() with overrideEncoding
--EXTENSIONS--
dom
--FILE--
<?php
try {
DOM\HTMLDocument::createFromFile(__DIR__ . '/gb18030_without_charset.html', overrideEncoding: 'nonexistent');
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
// The override encoding matches with the document encoding attribute
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/gb18030_without_charset.html', overrideEncoding: 'GB18030');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
// The override encoding mismatches with the document encoding attribute
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/fallback_encoding.html', overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
?>
--EXPECT--
DOM\HTMLDocument::createFromFile(): Argument #3 ($overrideEncoding) must be a valid document encoding
string(20) "
Héllo, world!
"
string(7) "gb18030"
string(1) "
"
string(12) "windows-1252"

View file

@ -0,0 +1,33 @@
--TEST--
DOM\HTMLDocument::createFromString() with overrideEncoding
--EXTENSIONS--
dom
--FILE--
<?php
try {
DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/gb18030_without_charset.html'), overrideEncoding: 'nonexistent');
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
// The override encoding matches with the document encoding attribute
$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/gb18030_without_charset.html'), overrideEncoding: 'GB18030');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
// The override encoding mismatches with the document encoding attribute
$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/fallback_encoding.html'), overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
?>
--EXPECT--
DOM\HTMLDocument::createFromString(): Argument #3 ($overrideEncoding) must be a valid document encoding
string(20) "
Héllo, world!
"
string(7) "gb18030"
string(1) "
"
string(12) "windows-1252"

View file

@ -0,0 +1,16 @@
--TEST--
DOM\HTMLDocument edge case encoding 01
--EXTENSIONS--
dom
--FILE--
<?php
// UTF-8 -> UTF-8
// Create a UTF-8 string where a UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
$dom = DOM\HTMLDocument::createEmpty();
$dom->append(str_repeat("A", 4096 - 2) . "\xf0\x90\x8d\x88AA");
var_dump($dom->saveHTML());
?>
--EXPECT--
string(4100) "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA𐍈AA"

View file

@ -0,0 +1,27 @@
--TEST--
DOM\HTMLDocument edge case encoding 02
--EXTENSIONS--
dom
--FILE--
<?php
// UTF-8 -> GB18030
$dom = DOM\HTMLDocument::createEmpty("GB18030");
// Create a UTF-8 string where a UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
// *and* the sequence also falls over the boundary for the result
$dom->append(str_repeat("A", 4096 - 2) . "\xf0\x90\x8d\x88AA");
var_dump($output = $dom->saveHTML());
// GB18030 encoding of the above UTF-8 symbol
var_dump($output[4094] == "\x90");
var_dump($output[4095] == "\x30");
var_dump($output[4096] == "\xd5");
var_dump($output[4097] == "\x30");
?>
--EXPECT--
string(4100) "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA𐍈AA"
bool(true)
bool(true)
bool(true)
bool(true)

View file

@ -0,0 +1,17 @@
--TEST--
DOM\HTMLDocument edge case encoding 03
--EXTENSIONS--
dom
--FILE--
<?php
// UTF-8 -> GB18030
$dom = DOM\HTMLDocument::createEmpty("GB18030");
// Create a UTF-8 string where an invalid UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
// Note: the strange ?1?7 sequence is the GB18030 encoding for the unicode replacement character
$dom->append(str_repeat("A", 4096 - 2) . "\xff\xff\xff");
var_dump($dom->saveHTML());
?>
--EXPECT--
string(4106) "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA<41><41><EFBFBD>"

View file

@ -0,0 +1,16 @@
--TEST--
DOM\HTMLDocument edge case encoding 04
--EXTENSIONS--
dom
--FILE--
<?php
// UTF-8 -> UTF-8
$dom = DOM\HTMLDocument::createEmpty();
// Create a UTF-8 string where an invalid UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
$dom->append(str_repeat("A", 4096 - 2) . "\xff\xff\xff");
var_dump($dom->saveHTML());
?>
--EXPECT--
string(4103) "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA<41><41><EFBFBD>"

View file

@ -0,0 +1,21 @@
--TEST--
DOM\HTMLDocument edge case encoding 05
--EXTENSIONS--
dom
--FILE--
<?php
// GB18030 -> UTF-8
$header = "<!doctype html><html><head><meta charset=\"gb18030\"></head><body>";
$padding_required_until_4094 = 4094 - strlen($header);
$trailer = "\x90\x30\xd5\x30";
$dom = DOM\HTMLDocument::createFromString($header . str_repeat("A", $padding_required_until_4094) . $trailer);
// GB18030 byte sequence crossing the 4096 boundary
var_dump($dom->encoding);
$dom->encoding = "UTF-8";
var_dump($dom->saveHTML());
?>
--EXPECT--
string(7) "gb18030"
string(4112) "<!DOCTYPE html><html><head><meta charset="gb18030"></head><body>AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA𐍈</body></html>"

View file

@ -0,0 +1,16 @@
--TEST--
DOM\HTMLDocument edge case encoding 06
--EXTENSIONS--
dom
--FILE--
<?php
// UTF-8 -> UTF-8
$dom = DOM\HTMLDocument::createEmpty();
// Create a UTF-8 string where a *broken* UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
$dom->append(str_repeat("A", 4096 - 1) . "\xf0\x90");
var_dump($dom->saveHTML());
?>
--EXPECT--
string(4101) "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA<41><41>"

View file

@ -0,0 +1,21 @@
--TEST--
DOM\HTMLDocument edge case encoding 07
--EXTENSIONS--
dom
--FILE--
<?php
// GB18030 -> UTF-8
$header = "<!doctype html><html><head><meta charset=\"gb18030\"></head><body>";
$padding_required_until_4095 = 4095 - strlen($header);
$trailer = "\x90\x30";
$dom = DOM\HTMLDocument::createFromString($header . str_repeat("A", $padding_required_until_4095) . $trailer);
// GB18030 *broken* byte sequence crossing the 4096 boundary
var_dump($dom->encoding);
$dom->encoding = "UTF-8";
var_dump($dom->saveHTML());
?>
--EXPECT--
string(7) "gb18030"
string(4115) "<!DOCTYPE html><html><head><meta charset="gb18030"></head><body>AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA<41><41></body></html>"

View file

@ -0,0 +1,43 @@
--TEST--
DOM\HTMLDocument test values for encoding field
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createEmpty();
var_dump($dom->encoding);
$dom->encoding = "CSeuckr";
var_dump($dom->encoding);
try {
$dom->encoding = "nope";
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
var_dump($dom->encoding);
$dom->encoding = "Windows-1251";
var_dump($dom->encoding);
try {
$dom->encoding = NULL;
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
var_dump($dom->encoding);
echo $dom->saveHTML();
try {
$dom = DOM\HTMLDocument::createEmpty("bogus");
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
?>
--EXPECT--
string(5) "UTF-8"
string(6) "EUC-KR"
Invalid document encoding
string(6) "EUC-KR"
string(12) "windows-1251"
Invalid document encoding
string(12) "windows-1251"
DOM\HTMLDocument::createEmpty(): Argument #1 ($encoding) must be a valid document encoding

View file

@ -0,0 +1,26 @@
--TEST--
DOM\HTMLDocument loading with unicode codepoints resulting in an error
--EXTENSIONS--
dom
--FILE--
<?php
echo "--- createFromFile ---\n";
DOM\HTMLDocument::createFromFile(__DIR__."/utf16le_error.html");
echo "--- createFromString ---\n";
DOM\HTMLDocument::createFromString(file_get_contents(__DIR__."/utf16le_error.html"));
?>
--EXPECTF--
--- createFromFile ---
Warning: DOM\HTMLDocument::createFromFile(): tokenizer error missing-end-tag-name in %s on line %d
Warning: DOM\HTMLDocument::createFromFile(): tree error unexpected-token in %s on line %d
Warning: DOM\HTMLDocument::createFromFile(): tree error unexpected-token in %s on line %d
--- createFromString ---
Warning: DOM\HTMLDocument::createFromString(): tokenizer error missing-end-tag-name in Entity, line: 7, column: 29 in %s on line %d
Warning: DOM\HTMLDocument::createFromString(): tree error unexpected-token in Entity, line: 7, column: 14-17 in %s on line %d
Warning: DOM\HTMLDocument::createFromString(): tree error unexpected-token in Entity, line: 8, column: 7-10 in %s on line %d

View file

@ -0,0 +1,23 @@
--TEST--
DOM\HTMLDocument fallback encoding test
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/fallback_encoding.html");
var_dump($dom->encoding);
echo $dom->saveHTML();
?>
--CLEAN--
<?php
@unlink(__DIR__ . "/windows1251_output.tmp");
?>
--EXPECT--
string(5) "UTF-8"
<!DOCTYPE html><html><head>
<meta charset="I don't exist">
</head>
<body>
</body></html>

View file

@ -0,0 +1,36 @@
--TEST--
DOM\HTMLDocument: overrideEncoding with incompatible charset
--EXTENSIONS--
iconv
dom
--FILE--
<?php
$doc = DOM\HTMLDocument::createFromString(
iconv(
'ISO-8859-1',
'UTF-8',
<<<'DOC'
<!DOCTYPE html>
<html>
<head>
<meta charset="iso-8859-1">
<title>äöü</title>
</head>
<body>
äöü
</body>
</html>
DOC,
),
overrideEncoding: 'utf-8'
);
var_dump(iconv('UTF-8', 'ISO-8859-1', $doc->getElementsByTagName('title')->item(0)->textContent));
var_dump(iconv('UTF-8', 'ISO-8859-1', $doc->getElementsByTagName('body')->item(0)->textContent));
?>
--EXPECT--
string(3) "äöü"
string(9) "
äöü
"

View file

@ -0,0 +1,6 @@
<!doctype html>
<head>
<meta charset="I don't exist">
</head>
<body>
</body>

View file

@ -0,0 +1,7 @@
<!doctype html>
<head>
<meta charset="gb18030">
</head>
<body>
H¨¦llo, world!
</body>

View file

@ -0,0 +1,7 @@
<!doctype html>
<head>
<title>No charset!</title>
</head>
<body>
H¨¦llo, world!
</body>

View file

@ -0,0 +1,7 @@
<!doctype html>
<head>
<meta charset="shift_jis">
</head>
<body>
<p>やあ</p>
</body>

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,7 @@
<!doctype html>
<head>
<!-- intentional lies and deceit -->
<meta charset="utf-16">
</head>
<body>
</body>

View file

@ -0,0 +1,7 @@
<!doctype html>
<head>
<meta charset="windows-1251">
</head>
<body>
A ô B á C
</body>

View file

@ -0,0 +1,30 @@
--TEST--
DOM\HTMLDocument adopts a DOMDocument
--EXTENSIONS--
dom
--FILE--
<?php
$dom = new DOMDocument();
$dom->loadHTML(<<<HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
</head>
<body>
</body>
</html>
HTML);
$dom2 = DOM\HTMLDocument::createEmpty();
$dom2->appendChild($dom2->adoptNode($dom->documentElement));
echo $dom2->saveHTML();
?>
--EXPECT--
<html>
<head>
</head>
<body>
</body>
</html>

View file

@ -0,0 +1,23 @@
--TEST--
Cloning a DOM\HTMLDocument
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString("<p>foo</p>");
$dom2 = clone $dom;
var_dump($dom2->firstChild->tagName);
var_dump($dom2->firstChild->textContent);
$element = $dom2->firstChild;
unset($dom2);
var_dump(get_class($element->ownerDocument));
?>
--EXPECTF--
Warning: DOM\HTMLDocument::createFromString(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2 in %s on line %d
string(4) "html"
string(3) "foo"
string(16) "DOM\HTMLDocument"

View file

@ -0,0 +1,47 @@
--TEST--
DOM\HTMLDocument::documentURI
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/test foo.html", LIBXML_NOERROR);
var_dump($dom->documentURI);
$memory = fopen("php://memory", "w+");
fwrite($memory, "foobar");
rewind($memory);
$dom = DOM\HTMLDocument::createFromFile("php://memory");
var_dump($dom->documentURI);
fclose($memory);
class DummyWrapper {
public $context;
public function stream_open($path, $mode, $options, &$opened_path) {
return true;
}
public function stream_read($count) {
return "";
}
public function stream_eof() {
return true;
}
public function stream_close() {
return true;
}
}
stream_wrapper_register("dummy", DummyWrapper::class);
$dom = DOM\HTMLDocument::createFromFile("dummy://foo/ bar");
var_dump($dom->documentURI);
?>
--EXPECTF--
string(%d) "file:/%stest%20foo.html"
string(12) "php://memory"
string(16) "dummy://foo/ bar"

View file

@ -0,0 +1,98 @@
--TEST--
Test DOM\HTMLDocument::getElementsByTagName(NS)
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString(<<<HTML
<!DOCTYPE html>
<html>
<head>
<title>Test</title>
</head>
<body>
<h1>Test</h1>
<p>Test</p>
<svg width="100" height="100">
<circle cx="0" cy="0" r="10"/>
</svg>
<math>
<mtable id="table"></mtable>
</math>
</body>
</html>
HTML);
echo "--- getElementsByTagName ---\n";
var_dump($dom->getElementsByTagName("p")[0]?->nodeName);
var_dump($dom->getElementsByTagName("math")[0]?->nodeName);
var_dump($dom->getElementsByTagName("mtable")[0]?->nodeName);
var_dump($dom->getElementsByTagName("svg")[0]?->nodeName);
var_dump($dom->getElementsByTagName("circle")[0]?->nodeName);
echo "--- getElementsByTagNameNS (*) ---\n";
var_dump($dom->getElementsByTagNameNS("*", "p")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("*", "math")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("*", "mtable")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("*", "svg")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("*", "circle")[0]?->nodeName);
echo "--- getElementsByTagNameNS (xhtml) ---\n";
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "p")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "math")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "mtable")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "svg")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "circle")[0]?->nodeName);
echo "--- getElementsByTagNameNS (svg) ---\n";
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "p")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "math")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "mtable")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "svg")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "circle")[0]?->nodeName);
echo "--- getElementsByTagNameNS (math) ---\n";
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "p")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "math")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "mtable")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "svg")[0]?->nodeName);
var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "circle")[0]?->nodeName);
?>
--EXPECT--
--- getElementsByTagName ---
string(1) "p"
string(4) "math"
string(6) "mtable"
string(3) "svg"
string(6) "circle"
--- getElementsByTagNameNS (*) ---
string(1) "p"
string(4) "math"
string(6) "mtable"
string(3) "svg"
string(6) "circle"
--- getElementsByTagNameNS (xhtml) ---
string(1) "p"
NULL
NULL
NULL
NULL
--- getElementsByTagNameNS (svg) ---
NULL
NULL
NULL
string(3) "svg"
string(6) "circle"
--- getElementsByTagNameNS (math) ---
NULL
string(4) "math"
string(6) "mtable"
NULL
NULL

View file

@ -0,0 +1,17 @@
--TEST--
DOM\HTMLDocument::registerNodeClass 01
--EXTENSIONS--
dom
--FILE--
<?php
$dom = new DOMDocument();
$dom->registerNodeClass("DOM\\HTMLDocument", "DOMDocument");
?>
--EXPECTF--
Fatal error: Uncaught Error: DOM\Document::registerNodeClass(): Argument #2 ($extendedClass) must be a class name derived from DOM\HTMLDocument or null, DOMDocument given in %s:%d
Stack trace:
#0 %s(%d): DOM\Document->registerNodeClass('DOM\\HTMLDocumen...', 'DOMDocument')
#1 {main}
thrown in %s on line %d

View file

@ -0,0 +1,36 @@
--TEST--
DOM\HTMLDocument::registerNodeClass 02
--EXTENSIONS--
dom
--FILE--
<?php
class Custom extends DOM\Document {
public function foo() {
}
}
$dom = new DOMDocument();
try {
$dom->registerNodeClass("DOM\\Document", "Custom");
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
$element = $dom->appendChild($dom->createElement("foo"));
unset($dom);
var_dump(get_class($element->ownerDocument));
// Should fail
$element->ownerDocument->foo();
?>
--EXPECTF--
DOM\Document::registerNodeClass(): Argument #1 ($baseClass) must not be an abstract class
string(11) "DOMDocument"
Fatal error: Uncaught Error: Call to undefined method DOMDocument::foo() in %s:%d
Stack trace:
#0 {main}
thrown in %s on line %d

View file

@ -0,0 +1,18 @@
--TEST--
DOM\HTMLDocument::saveHTMLFile() empty path
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createEmpty();
$root = $dom->appendChild($dom->createElement("root"));
$dom->saveHTMLFile("");
?>
--EXPECTF--
Fatal error: Uncaught ValueError: DOM\HTMLDocument::saveHTMLFile(): Argument #1 ($filename) must not be empty in %s:%d
Stack trace:
#0 %s(%d): DOM\HTMLDocument->saveHTMLFile('')
#1 {main}
thrown in %s on line %d

View file

@ -0,0 +1,17 @@
--TEST--
DOM\HTMLDocument::saveHTML() wrong document
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createEmpty();
$dom->saveHTML(DOM\HTMLDocument::createEmpty());
?>
--EXPECTF--
Fatal error: Uncaught DOMException: Wrong Document Error in %s:%d
Stack trace:
#0 %s(%d): DOM\HTMLDocument->saveHTML(Object(DOM\HTMLDocument))
#1 {main}
thrown in %s on line %d

View file

@ -0,0 +1,78 @@
--TEST--
DOM\HTMLDocument should retain properties and ownerDocument relation 01
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString("<p>foo</p>", LIBXML_NOERROR);
$dom->strictErrorChecking = false;
// Destroy reference to the DOM
$child = $dom->documentElement;
unset($dom);
// Regain reference using the ownerDocument property
// Should be a DOM\HTML5Document
$dom = $child->ownerDocument;
var_dump($dom);
// Test if property is preserved (any random doc_props property will do)
var_dump($dom->strictErrorChecking);
?>
--EXPECT--
object(DOM\HTMLDocument)#1 (26) {
["encoding"]=>
string(5) "UTF-8"
["doctype"]=>
NULL
["documentElement"]=>
string(22) "(object value omitted)"
["strictErrorChecking"]=>
bool(false)
["documentURI"]=>
NULL
["firstElementChild"]=>
string(22) "(object value omitted)"
["lastElementChild"]=>
string(22) "(object value omitted)"
["childElementCount"]=>
int(1)
["nodeName"]=>
string(9) "#document"
["nodeValue"]=>
NULL
["nodeType"]=>
int(13)
["parentNode"]=>
NULL
["parentElement"]=>
NULL
["childNodes"]=>
string(22) "(object value omitted)"
["firstChild"]=>
string(22) "(object value omitted)"
["lastChild"]=>
string(22) "(object value omitted)"
["previousSibling"]=>
NULL
["nextSibling"]=>
NULL
["attributes"]=>
NULL
["isConnected"]=>
bool(true)
["ownerDocument"]=>
NULL
["namespaceURI"]=>
NULL
["prefix"]=>
string(0) ""
["localName"]=>
NULL
["baseURI"]=>
NULL
["textContent"]=>
string(3) "foo"
}
bool(false)

View file

@ -0,0 +1,78 @@
--TEST--
DOM\HTMLDocument should retain properties and ownerDocument relation 02
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString("<p>foo</p>", LIBXML_NOERROR);
$dom->strictErrorChecking = false;
$child = $dom->appendChild($dom->createElement('html'));
// Destroy reference to the DOM
unset($dom);
// Regain reference using the ownerDocument property
// Should be a DOM\HTMLDocument
$dom = $child->ownerDocument;
var_dump($dom);
// Test if property is preserved (any random doc_props property will do)
var_dump($dom->strictErrorChecking);
?>
--EXPECT--
object(DOM\HTMLDocument)#1 (26) {
["encoding"]=>
string(5) "UTF-8"
["doctype"]=>
NULL
["documentElement"]=>
string(22) "(object value omitted)"
["strictErrorChecking"]=>
bool(false)
["documentURI"]=>
NULL
["firstElementChild"]=>
string(22) "(object value omitted)"
["lastElementChild"]=>
string(22) "(object value omitted)"
["childElementCount"]=>
int(2)
["nodeName"]=>
string(9) "#document"
["nodeValue"]=>
NULL
["nodeType"]=>
int(13)
["parentNode"]=>
NULL
["parentElement"]=>
NULL
["childNodes"]=>
string(22) "(object value omitted)"
["firstChild"]=>
string(22) "(object value omitted)"
["lastChild"]=>
string(22) "(object value omitted)"
["previousSibling"]=>
NULL
["nextSibling"]=>
NULL
["attributes"]=>
NULL
["isConnected"]=>
bool(true)
["ownerDocument"]=>
NULL
["namespaceURI"]=>
NULL
["prefix"]=>
string(0) ""
["localName"]=>
NULL
["baseURI"]=>
NULL
["textContent"]=>
string(3) "foo"
}
bool(false)

View file

@ -0,0 +1,20 @@
--TEST--
getLineNo() returns the line number of the node >= 65536
--EXTENSIONS--
dom
--FILE--
<?php
$html = str_repeat("\n", 65536) . <<<EOF
<!doctype html>
<body>
<p>hello</p>
</body>
EOF;
$dom = DOM\HTMLDocument::createFromString($html);
var_dump($dom->documentElement->firstChild->nextSibling->firstChild->nextSibling->getLineNo());
?>
--EXPECT--
int(65538)

View file

@ -0,0 +1,46 @@
--TEST--
noscript behaviour
--EXTENSIONS--
dom
--FILE--
<?php
echo "--- Parsing ---\n";
$dom = DOM\HTMLDocument::createFromString("<!doctype html><html><body><noscript><p>hi</p></noscript></body></html>", DOM\HTML_NO_DEFAULT_NS);
var_dump($dom->documentElement->textContent);
echo $dom->saveHTML(), "\n";
echo $dom->saveXML();
echo "--- Modifying the text content: tag ---\n";
$xpath = new DOMXPath($dom);
$noscript = $xpath->query("//noscript")[0];
$noscript->textContent = "<p>bye</p>";
echo $dom->saveHTML(), "\n";
echo $dom->saveXML();
echo "--- Modifying the text content: trick ---\n";
$noscript->textContent = "<!-- </noscript> -->";
echo $dom->saveHTML(), "\n";
echo $dom->saveXML();
?>
--EXPECT--
--- Parsing ---
string(2) "hi"
<!DOCTYPE html><html><head></head><body><noscript><p>hi</p></noscript></body></html>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE html>
<html><head/><body><noscript><p>hi</p></noscript></body></html>
--- Modifying the text content: tag ---
<!DOCTYPE html><html><head></head><body><noscript>&lt;p&gt;bye&lt;/p&gt;</noscript></body></html>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE html>
<html><head/><body><noscript>&lt;p&gt;bye&lt;/p&gt;</noscript></body></html>
--- Modifying the text content: trick ---
<!DOCTYPE html><html><head></head><body><noscript>&lt;!-- &lt;/noscript&gt; --&gt;</noscript></body></html>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE html>
<html><head/><body><noscript>&lt;!-- &lt;/noscript&gt; --&gt;</noscript></body></html>

View file

@ -0,0 +1 @@
foo

View file

@ -0,0 +1,20 @@
--TEST--
Tests without running the constructor
--EXTENSIONS--
dom
--FILE--
<?php
foreach (['DOM\HTMLDocument', 'DOM\XMLDocument'] as $class) {
try {
$rc = new ReflectionClass($class);
$rc->newInstanceWithoutConstructor();
} catch (ReflectionException $e) {
echo $e->getMessage(), "\n";
}
}
?>
--EXPECT--
Class DOM\HTMLDocument is an internal class marked as final that cannot be instantiated without invoking its constructor
Class DOM\XMLDocument is an internal class marked as final that cannot be instantiated without invoking its constructor

View file

@ -0,0 +1,26 @@
--TEST--
DOM\HTMLDocument::createFromFile()/createFromString() BOM with a buffer on the edge
--EXTENSIONS--
dom
--FILE--
<?php
$header = "\xEF\xBB\xBF<!doctype html><html><body>";
$trailer = "</body></html>";
$data = $header . str_repeat("a", 4096 - strlen($header) - strlen($trailer)) . $trailer;
$dom = DOM\HTMLDocument::createFromString($header . str_repeat("a", 4096 - strlen($header) - strlen($trailer)) . $trailer);
var_dump($dom->textContent);
file_put_contents(__DIR__ . "/BOM_edge.tmp", $data);
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/BOM_edge.tmp");
var_dump($dom->textContent);
?>
--CLEAN--
<?php
@unlink(__DIR__ . "/BOM_edge.tmp");
?>
--EXPECT--
string(4052) "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
string(4052) "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"

View file

@ -0,0 +1,37 @@
--TEST--
DOM\HTMLDocument::createFromFile() with DOM\HTML_NO_DEFAULT_NS
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/paragraph.html", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
$xpath = new DOMXPath($dom);
$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
var_dump($xpath->query("//p"));
var_dump($xpath->query("//x:p"));
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/paragraph.html", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR | DOM\HTML_NO_DEFAULT_NS);
$xpath = new DOMXPath($dom);
$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
var_dump($xpath->query("//p"));
var_dump($xpath->query("//x:p"));
?>
--EXPECT--
object(DOMNodeList)#3 (1) {
["length"]=>
int(0)
}
object(DOMNodeList)#4 (1) {
["length"]=>
int(1)
}
object(DOMNodeList)#3 (1) {
["length"]=>
int(1)
}
object(DOMNodeList)#3 (1) {
["length"]=>
int(0)
}

View file

@ -0,0 +1,16 @@
--TEST--
DOM\HTMLDocument::createFromFile() - empty path
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile("");
?>
--EXPECTF--
Fatal error: Uncaught ValueError: Path cannot be empty in %s:%d
Stack trace:
#0 %s(%d): DOM\HTMLDocument::createFromFile('')
#1 {main}
thrown in %s on line %d

View file

@ -0,0 +1,22 @@
--TEST--
DOM\HTMLDocument::createFromFile() - local existing file
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/../../../test.html");
echo $dom->saveHTML(), "\n";
?>
--EXPECTF--
Warning: DOM\HTMLDocument::createFromFile(): tree error unexpected-token-in-initial-mode in %s on line %d
<html><head>
<title>Hello world</title>
</head>
<body>
This is a not well-formed<br>
html files with undeclared entities&nbsp;
</body></html>

View file

@ -0,0 +1,19 @@
--TEST--
DOM\HTMLDocument::createFromFile() - local file that does not exist
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/../foobar");
echo $dom->saveHTML(), "\n";
?>
--EXPECTF--
Warning: DOM\HTMLDocument::createFromFile(%s): Failed to open stream: No such file or directory in %s on line %d
Fatal error: Uncaught Exception: Cannot open file '%s' in %s:%d
Stack trace:
#0 %s(%d): DOM\HTMLDocument::createFromFile('%s')
#1 {main}
thrown in %s on line %d

View file

@ -0,0 +1,22 @@
--TEST--
DOM\HTMLDocument::createFromFile() - NUL terminator cases path
--EXTENSIONS--
dom
--FILE--
<?php
try {
DOM\HTMLDocument::createFromFile("\0");
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
try {
DOM\HTMLDocument::createFromFile('%00');
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
?>
--EXPECT--
DOM\HTMLDocument::createFromFile(): Argument #1 ($path) must not contain any null bytes
DOM\HTMLDocument::createFromFile(): Argument #1 ($path) must not contain percent-encoded NUL bytes

View file

@ -0,0 +1,19 @@
--TEST--
DOM\HTMLDocument::createFromFile() - parser warning 01
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__."/parser_warning_01.html", LIBXML_NOERROR);
echo $dom->saveHTML(), "\n";
?>
--EXPECT--
<html><head><title>foo</title>
</head><body><datalist id="fruits">
<option value="Apple">
</option><option value="Banana">
</option><option value="Pear">
</option></datalist>
<p>error</p></body></html>

View file

@ -0,0 +1,20 @@
--TEST--
DOM\HTMLDocument::createFromFile() - parser warning 02
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__."/parser_warning_02.html", LIBXML_NOERROR);
echo $dom->saveHTML(), "\n";
?>
--EXPECT--
<!DOCTYPE html><html><head></head><body>
<p<>>foo<p></p>
<p id="foo" class="bar">
<!--ELEMENT br EMPTY-->
<!-- <!-- nested --> --&gt;
</p></p<>></body></html>

View file

@ -0,0 +1,16 @@
--TEST--
DOM\HTMLDocument::createFromFile() - parser warning 03
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromFile(__DIR__."/parser_warning_03.html", LIBXML_NOERROR);
echo $dom->saveHTML(), "\n";
?>
--EXPECT--
<!DOCTYPE html><html><head></head><body>
<p>
</p></body></html>

View file

@ -0,0 +1,30 @@
--TEST--
DOM\HTMLDocument::createFromFile() - parser warning libxml_get_last_error()
--EXTENSIONS--
dom
--FILE--
<?php
libxml_use_internal_errors(true);
$html = '<>x</> <!doctype html>';
$dom = DOM\HTMLDocument::createFromFile(__DIR__."/parser_warning_01.html");
var_dump(libxml_get_last_error());
?>
--EXPECTF--
object(LibXMLError)#2 (6) {
["level"]=>
int(2)
["code"]=>
int(1)
["column"]=>
int(2)
["message"]=>
string(%d) "tree error unexpected-token-in-initial-mode in %sparser_warning_01.html, line: 1, column: 2-6"
["file"]=>
string(%d) "%sparser_warning_01.html"
["line"]=>
int(1)
}

View file

@ -0,0 +1,44 @@
--TEST--
DOM\HTMLDocument::createFromFile() with failing stream wrapper
--EXTENSIONS--
dom
--FILE--
<?php
class FailingWrapper {
public $context;
public $fail = false;
public function stream_open($path, $mode, $options, &$opened_path) {
return true;
}
public function stream_read($count) {
if ($this->fail) {
throw new Error("fail");
}
$this->fail = true;
return str_repeat("X", $count);
}
public function stream_eof() {
return false;
}
public function stream_close() {
return true;
}
}
stream_wrapper_register("fail", FailingWrapper::class, 0);
DOM\HTMLDocument::createFromFile("fail://x");
?>
--EXPECTF--
Fatal error: Uncaught Error: fail in %s:%d
Stack trace:
#0 [internal function]: FailingWrapper->stream_read(8192)
#1 %s(%d): DOM\HTMLDocument::createFromFile('fail://x')
#2 {main}
thrown in %s on line %d

View file

@ -0,0 +1,60 @@
--TEST--
DOM\HTMLDocument::createFromFile() with working stream wrapper
--EXTENSIONS--
dom
--FILE--
<?php
class EchoUriWrapper {
private int $position = 0;
private string $data;
public $context;
public function stream_open($path, $mode, $options, &$opened_path) {
$this->data = substr($path, 6);
return true;
}
public function stream_read($count) {
$ret = substr($this->data, $this->position, $count);
$this->position += $count;
return $ret;
}
public function stream_eof() {
return $this->position >= strlen($this->data);
}
public function stream_close() {
return true;
}
}
stream_wrapper_register("euw", EchoUriWrapper::class, 0);
echo "--- Stream wrapper case ---\n";
$dom = DOM\HTMLDocument::createFromFile("euw://<p>hello</p>");
echo $dom->saveHTML(), "\n";
echo "--- Stream wrapper in two chunks case ---\n";
libxml_use_internal_errors(true);
// To properly test this, keep the 4096 in sync with document.c's input stream buffer size.
$dom = DOM\HTMLDocument::createFromFile("euw://<!doctype html><html>" . str_repeat("\n", 4096-22) . "<></html>");
echo $dom->saveHTML(), "\n";
foreach (libxml_get_errors() as $error) {
var_dump($error->line, $error->column);
}
?>
--EXPECTF--
--- Stream wrapper case ---
Warning: DOM\HTMLDocument::createFromFile(): tree error unexpected-token-in-initial-mode in euw://<p>hello</p>, line: 1, column: 2 in %s on line %d
<html><head></head><body><p>hello</p></body></html>
--- Stream wrapper in two chunks case ---
<!DOCTYPE html><html><head></head><body>&lt;&gt;</body></html>
int(4075)
int(2)

View file

@ -0,0 +1,37 @@
--TEST--
DOM\HTMLDocument::createFromString() with DOM\HTML_NO_DEFAULT_NS
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . "/paragraph.html"), LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
$xpath = new DOMXPath($dom);
$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
var_dump($xpath->query("//p"));
var_dump($xpath->query("//x:p"));
$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . "/paragraph.html"), LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR | DOM\HTML_NO_DEFAULT_NS);
$xpath = new DOMXPath($dom);
$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
var_dump($xpath->query("//p"));
var_dump($xpath->query("//x:p"));
?>
--EXPECT--
object(DOMNodeList)#3 (1) {
["length"]=>
int(0)
}
object(DOMNodeList)#4 (1) {
["length"]=>
int(1)
}
object(DOMNodeList)#3 (1) {
["length"]=>
int(1)
}
object(DOMNodeList)#3 (1) {
["length"]=>
int(0)
}

View file

@ -0,0 +1,40 @@
--TEST--
DOM\HTMLDocument::createFromString() with LIBXML_COMPACT
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString(<<<HTML
<!DOCTYPE HTML>
<html>
<head>
</head>
<body>
<p> x </p>
<p>foo</p>
<p>foox</p>
<p>fooxx</p>
<p>fooxxx</p>
<p>fooxxxx</p>
<p>fooxxxxx</p>
<p>this does not fit</p>
</body>
</html>
HTML, LIBXML_COMPACT);
$xpath = new DOMXPath($dom);
foreach ($xpath->query("//*[name()='p']") as $p) {
echo $p->textContent, "\n";
}
?>
--EXPECT--
x
foo
foox
fooxx
fooxxx
fooxxxx
fooxxxxx
this does not fit

View file

@ -0,0 +1,13 @@
--TEST--
DOM\HTMLDocument::createFromString() with LIBXML_HTML_NOIMPLIED - tree error should not happen
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString("<div>foo</div>", LIBXML_HTML_NOIMPLIED);
echo $dom->saveHTML();
?>
--EXPECT--
<div>foo</div>

View file

@ -0,0 +1,45 @@
--TEST--
DOM\HTMLDocument::createFromString() with LIBXML_HTML_NOIMPLIED namespace check
--EXTENSIONS--
dom
--FILE--
<?php
echo "--- No elements ---\n";
$dom = DOM\HTMLDocument::createFromString("", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
echo $dom->saveXML();
echo "--- Single element ---\n";
$dom = DOM\HTMLDocument::createFromString("<p>foo</p>", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
echo $dom->saveXML();
var_dump($dom->documentElement->namespaceURI);
var_dump($dom->documentElement->prefix);
echo "--- Multiple elements ---\n";
$dom = DOM\HTMLDocument::createFromString("<p>foo</p><strong>bar</strong>", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
echo $dom->saveXML();
var_dump($dom->documentElement->namespaceURI);
var_dump($dom->documentElement->prefix);
var_dump($dom->documentElement->nextSibling->namespaceURI);
var_dump($dom->documentElement->nextSibling->prefix);
?>
--EXPECT--
--- No elements ---
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
--- Single element ---
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<p xmlns="http://www.w3.org/1999/xhtml">foo</p>
string(28) "http://www.w3.org/1999/xhtml"
string(0) ""
--- Multiple elements ---
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<p xmlns="http://www.w3.org/1999/xhtml">foo</p>
<strong xmlns="http://www.w3.org/1999/xhtml">bar</strong>
string(28) "http://www.w3.org/1999/xhtml"
string(0) ""
string(28) "http://www.w3.org/1999/xhtml"
string(0) ""

View file

@ -0,0 +1,13 @@
--TEST--
DOM\HTMLDocument::createFromString() - empty document
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString('');
echo $dom->saveHTML(), "\n";
?>
--EXPECT--
<html><head></head><body></body></html>

View file

@ -0,0 +1,90 @@
--TEST--
DOM\HTMLDocument::createFromString()/createFromFile() with LIBXML_HTML_NOIMPLIED
--EXTENSIONS--
dom
--FILE--
<?php
function test(string $html) {
echo "Testing: $html\n";
$dom = DOM\HTMLDocument::createFromString($html, LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
$output = $dom->saveHTML();
echo $output, "\n";
// Also test the loadHTMLFile variation. We won't print out the result, just checking the result is the same.
$temp = fopen(__DIR__."/DOM_HTMLDocument_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp", "w");
fwrite($temp, $html);
fclose($temp);
$dom = DOM\HTMLDocument::createFromFile(__DIR__."/DOM_HTMLDocument_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
var_dump($output === $dom->saveHTML());
}
echo "--- Missing html, head, body ---\n";
test("");
test("foobarbaz");
test("<p>foo</p>");
echo "--- Missing html, head ---\n";
test("<body><p>foo</p></body>");
test("<title>x</title><p>foo</p>");
echo "--- Missing html, body ---\n";
test("<!-- bar --><head><title>x</title><!-- foo --></head><p>foo</p><!-- bar -->");
echo "--- Missing html ---\n";
test("<head><title>x</title></head><body><p>foo</p></body>");
echo "--- Missing head, body ---\n";
test("<html>foobar</html>");
test("<html><!-- foo --><title>a</title><p>foo</p></html><!-- bar -->");
echo "--- Missing head ---\n";
test("<html><!-- foo --><body>hi</body></html>");
echo "--- Missing nothing ---\n";
test("<html><head><title>x</title></head><!-- foo --><body><p>foo</p></body></html>");
echo "--- Malformed document ---\n";
test("<!-- start --><body><head><html>foo</html></head></body><!-- end -->");
?>
--CLEAN--
<?php
@unlink(__DIR__."/DOM_HTMLDocument_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp");
?>
--EXPECT--
--- Missing html, head, body ---
Testing:
bool(true)
Testing: foobarbaz
foobarbaz
bool(true)
Testing: <p>foo</p>
<p>foo</p>
bool(true)
--- Missing html, head ---
Testing: <body><p>foo</p></body>
<body><p>foo</p></body>
bool(true)
Testing: <title>x</title><p>foo</p>
<title>x</title><p>foo</p>
bool(true)
--- Missing html, body ---
Testing: <!-- bar --><head><title>x</title><!-- foo --></head><p>foo</p><!-- bar -->
<!-- bar --><head><title>x</title><!-- foo --></head><p>foo</p><!-- bar -->
bool(true)
--- Missing html ---
Testing: <head><title>x</title></head><body><p>foo</p></body>
<head><title>x</title></head><body><p>foo</p></body>
bool(true)
--- Missing head, body ---
Testing: <html>foobar</html>
<html>foobar</html>
bool(true)
Testing: <html><!-- foo --><title>a</title><p>foo</p></html><!-- bar -->
<html><!-- foo --><title>a</title><p>foo</p></html><!-- bar -->
bool(true)
--- Missing head ---
Testing: <html><!-- foo --><body>hi</body></html>
<html><!-- foo --><body>hi</body></html>
bool(true)
--- Missing nothing ---
Testing: <html><head><title>x</title></head><!-- foo --><body><p>foo</p></body></html>
<html><head><title>x</title></head><!-- foo --><body><p>foo</p></body></html>
bool(true)
--- Malformed document ---
Testing: <!-- start --><body><head><html>foo</html></head></body><!-- end -->
<!-- start --><body>foo</body><!-- end -->
bool(true)

View file

@ -0,0 +1,57 @@
--TEST--
DOM\HTMLDocument::createFromString() - line and column test
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString(<<<HTML
<!doctype html>
<html>
<head>
<title>foo</title>
</head>
<body>
<div id="mydiv" x="foo">
<p>
<strong>This is my paragraph</strong>
<!-- my comment -->
</p>
</div>
</body>
</html>
HTML);
$xpath = new DOMXPath($dom);
foreach ($xpath->query("//*") as $element) {
echo "Element: '", $element->tagName, "', ", $element->getLineNo(), "\n";
}
foreach ($xpath->query("//*[name()='strong']") as $element) {
echo "Text: '", $element->textContent, "', ", $element->firstChild->getLineNo(), "\n";
}
foreach ($xpath->query("//*[name()='div']") as $element) {
foreach ($element->attributes as $attribute) {
echo "Attribute: '", $attribute->nodeName, "', ", $attribute->getLineNo(), "\n";
}
}
foreach ($xpath->query("//comment()") as $comment) {
echo "Comment: '", $comment->data, "', ", $comment->getLineNo(), "\n";
}
?>
--EXPECT--
Element: 'html', 1
Element: 'head', 2
Element: 'title', 3
Element: 'body', 5
Element: 'div', 6
Element: 'p', 7
Element: 'strong', 8
Text: 'This is my paragraph', 8
Attribute: 'id', 6
Attribute: 'x', 6
Comment: ' my comment ', 9

View file

@ -0,0 +1,39 @@
--TEST--
DOM\HTMLDocument::createFromString() - normal document, no error
--EXTENSIONS--
dom
--FILE--
<?php
// The closing p tag breaks libxml2's HTML parser, but doesn't break the HTML5 parser due to the script context parsing rules.
$html = <<<HTML
<!DOCTYPE HTML>
<html>
<head>
<meta charset="utf-8">
<title>foo</title>
</head>
<body>
<script>
var foo = "</p>";
</script>
<p test="<script>">bar <!-- hi --></p>
</body>
</html>
HTML;
$dom = DOM\HTMLDocument::createFromString($html);
echo $dom->saveHTML(), "\n";
?>
--EXPECT--
<!DOCTYPE html><html><head>
<meta charset="utf-8">
<title>foo</title>
</head>
<body>
<script>
var foo = "</p>";
</script>
<p test="<script>">bar <!-- hi --></p>
</body></html>

View file

@ -0,0 +1,39 @@
--TEST--
DOM\HTMLDocument::createFromString(): Old DTD
--EXTENSIONS--
dom
--FILE--
<?php
$dom = DOM\HTMLDocument::createFromString(<<<HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
</head>
<body>
</body>
</html>
HTML);
echo "--- HTML serialization ---\n";
echo $dom->saveHTML(), "\n";
echo "--- XML serialization ---\n";
echo $dom->saveXML();
?>
--EXPECTF--
Warning: DOM\HTMLDocument::createFromString(): tree error bad-doctype-token-in-initial-mode in Entity, line: 1, column: 3-9 in %s on line %d
--- HTML serialization ---
<!DOCTYPE html><html><head>
</head>
<body>
</body></html>
--- XML serialization ---
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head>
</head>
<body>
</body></html>

Some files were not shown because too many files have changed in this diff Show more