Optimize and reduce memory usage of XML serialization (#14204)

The serialization process uses the system allocator and requires a copy
to request allocated memory once finished. This patch improves this by
using smart_str to build the resulting string, reducing the number of
copies and reducing total peak memory usage.
This commit is contained in:
Niels Dossche 2024-05-12 01:57:29 +02:00 committed by GitHub
parent 8776561581
commit aa3e6eec50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 35 additions and 42 deletions

View file

@ -693,6 +693,7 @@ PHP 8.4 UPGRADE NOTES
. The performance of DOMNode::C14N() is greatly improved for the case without . The performance of DOMNode::C14N() is greatly improved for the case without
an xpath query. This can give a time improvement of easily two order of an xpath query. This can give a time improvement of easily two order of
magnitude for documents with tens of thousands of nodes. magnitude for documents with tens of thousands of nodes.
. Improved performance and reduce memory consumption of XML serialization.
- FTP: - FTP:
. Improved the performance of FTP uploads up to a factor of 10x for large . Improved the performance of FTP uploads up to a factor of 10x for large
@ -708,6 +709,9 @@ PHP 8.4 UPGRADE NOTES
- MySQLnd: - MySQLnd:
. Improved the performance of MySQLnd quoting. . Improved the performance of MySQLnd quoting.
- SimpleXML:
. Improved performance and reduce memory consumption of XML serialization.
- Standard: - Standard:
. Improved the performance of strpbrk(). . Improved the performance of strpbrk().
. get_browser() is much faster now, up to 1.5x - 2.5x for some test cases. . get_browser() is much faster now, up to 1.5x - 2.5x for some test cases.

View file

@ -250,42 +250,38 @@ PHP_METHOD(Dom_XMLDocument, createFromFile)
load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE); load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
} }
static zend_string *php_new_dom_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding) static int php_new_dom_write_smart_str(void *context, const char *buffer, int len)
{ {
xmlBufferPtr buf = xmlBufferCreate(); smart_str *str = context;
if (!buf) { smart_str_appendl(str, buffer, len);
return NULL; return len;
} }
static zend_string *php_new_dom_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
{
smart_str str = {0};
int status = -1; int status = -1;
xmlSaveCtxtPtr ctxt = xmlSaveToBuffer(buf, encoding, XML_SAVE_AS_XML); xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_new_dom_write_smart_str, NULL, &str, encoding, XML_SAVE_AS_XML);
if (EXPECTED(ctxt != NULL)) { if (EXPECTED(ctxt != NULL)) {
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding); xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
xmlOutputBufferPtr out = xmlOutputBufferCreateBuffer(buf, handler); xmlOutputBufferPtr out = xmlOutputBufferCreateIO(php_new_dom_write_smart_str, NULL, &str, handler);
if (EXPECTED(out != NULL)) { if (EXPECTED(out != NULL)) {
status = dom_xml_serialize(ctxt, out, node, format); status = dom_xml_serialize(ctxt, out, node, format);
status |= xmlOutputBufferFlush(out); status |= xmlOutputBufferFlush(out);
status |= xmlOutputBufferClose(out); status |= xmlOutputBufferClose(out);
} else {
xmlCharEncCloseFunc(handler);
} }
(void) xmlSaveClose(ctxt); (void) xmlSaveClose(ctxt);
xmlCharEncCloseFunc(handler);
} }
if (UNEXPECTED(status < 0)) { if (UNEXPECTED(status < 0)) {
xmlBufferFree(buf); smart_str_free_ex(&str, false);
return NULL; return NULL;
} }
const xmlChar *content = xmlBufferContent(buf); return smart_str_extract(&str);
if (!content) {
xmlBufferFree(buf);
return NULL;
}
int size = xmlBufferLength(buf);
zend_string *res = zend_string_init((const char *) content, size, false);
xmlBufferFree(buf);
return res;
} }
static zend_string *php_new_dom_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding) static zend_string *php_new_dom_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding)

View file

@ -1481,43 +1481,38 @@ PHP_LIBXML_API xmlChar *php_libxml_attr_value(const xmlAttr *attr, bool *free)
return value; return value;
} }
static zend_string *php_libxml_default_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding) static int php_libxml_write_smart_str(void *context, const char *buffer, int len)
{ {
xmlBufferPtr buf = xmlBufferCreate(); smart_str *str = context;
if (!buf) { smart_str_appendl(str, buffer, len);
return NULL; return len;
} }
static zend_string *php_libxml_default_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding)
{
smart_str str = {0};
/* Encoding is handled from the encoding property set on the document */ /* Encoding is handled from the encoding property set on the document */
xmlSaveCtxtPtr ctxt = xmlSaveToBuffer(buf, encoding, options); xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_libxml_write_smart_str, NULL, &str, encoding, options);
if (!ctxt) { if (!ctxt) {
xmlBufferFree(buf);
return NULL; return NULL;
} }
long status = xmlSaveDoc(ctxt, doc); long status = xmlSaveDoc(ctxt, doc);
(void) xmlSaveClose(ctxt); (void) xmlSaveClose(ctxt);
if (status < 0) { if (status < 0) {
xmlBufferFree(buf); smart_str_free_ex(&str, false);
return NULL; return NULL;
} }
const xmlChar *content = xmlBufferContent(buf); return smart_str_extract(&str);
if (!content) {
xmlBufferFree(buf);
return NULL;
}
int size = xmlBufferLength(buf);
zend_string *str = zend_string_init((const char *) content, size, false);
xmlBufferFree(buf);
return str;
} }
static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding) static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
{ {
// TODO: should this alloc take an encoding? For now keep it NULL for BC. smart_str str = {0};
xmlOutputBufferPtr buf = xmlAllocOutputBuffer(NULL); // TODO: should this buffer take an encoding? For now keep it NULL for BC.
xmlOutputBufferPtr buf = xmlOutputBufferCreateIO(php_libxml_write_smart_str, NULL, &str, NULL);
if (!buf) { if (!buf) {
return NULL; return NULL;
} }
@ -1525,16 +1520,14 @@ static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePt
xmlNodeDumpOutput(buf, doc, node, 0, format, encoding); xmlNodeDumpOutput(buf, doc, node, 0, format, encoding);
if (xmlOutputBufferFlush(buf) < 0) { if (xmlOutputBufferFlush(buf) < 0) {
smart_str_free_ex(&str, false);
xmlOutputBufferClose(buf); xmlOutputBufferClose(buf);
return NULL; return NULL;
} }
const xmlChar *content = xmlOutputBufferGetContent(buf);
size_t size = xmlOutputBufferGetSize(buf);
zend_string *str = zend_string_init((const char *) content, size, false);
xmlOutputBufferClose(buf); xmlOutputBufferClose(buf);
return str;
return smart_str_extract(&str);
} }
static zend_long php_libxml_default_dump_doc_to_file(const char *filename, xmlDocPtr doc, bool format, const char *encoding) static zend_long php_libxml_default_dump_doc_to_file(const char *filename, xmlDocPtr doc, bool format, const char *encoding)