From aa3e6eec504a6acfef68c730246828e4d83bca6c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 12 May 2024 01:57:29 +0200 Subject: [PATCH] Optimize and reduce memory usage of XML serialization (#14204) The serialization process uses the system allocator and requires a copy to request allocated memory once finished. This patch improves this by using smart_str to build the resulting string, reducing the number of copies and reducing total peak memory usage. --- UPGRADING | 4 ++++ ext/dom/xml_document.c | 32 ++++++++++++++------------------ ext/libxml/libxml.c | 41 +++++++++++++++++------------------------ 3 files changed, 35 insertions(+), 42 deletions(-) diff --git a/UPGRADING b/UPGRADING index 819f141b944..0afe7cd2269 100644 --- a/UPGRADING +++ b/UPGRADING @@ -693,6 +693,7 @@ PHP 8.4 UPGRADE NOTES . The performance of DOMNode::C14N() is greatly improved for the case without an xpath query. This can give a time improvement of easily two order of magnitude for documents with tens of thousands of nodes. + . Improved performance and reduce memory consumption of XML serialization. - FTP: . Improved the performance of FTP uploads up to a factor of 10x for large @@ -708,6 +709,9 @@ PHP 8.4 UPGRADE NOTES - MySQLnd: . Improved the performance of MySQLnd quoting. +- SimpleXML: + . Improved performance and reduce memory consumption of XML serialization. + - Standard: . Improved the performance of strpbrk(). . get_browser() is much faster now, up to 1.5x - 2.5x for some test cases. diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c index 32eaa4b4cf9..2ad0ebc895d 100644 --- a/ext/dom/xml_document.c +++ b/ext/dom/xml_document.c @@ -250,42 +250,38 @@ PHP_METHOD(Dom_XMLDocument, createFromFile) load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE); } +static int php_new_dom_write_smart_str(void *context, const char *buffer, int len) +{ + smart_str *str = context; + smart_str_appendl(str, buffer, len); + return len; +} + static zend_string *php_new_dom_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding) { - xmlBufferPtr buf = xmlBufferCreate(); - if (!buf) { - return NULL; - } + smart_str str = {0}; int status = -1; - xmlSaveCtxtPtr ctxt = xmlSaveToBuffer(buf, encoding, XML_SAVE_AS_XML); + xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_new_dom_write_smart_str, NULL, &str, encoding, XML_SAVE_AS_XML); if (EXPECTED(ctxt != NULL)) { xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding); - xmlOutputBufferPtr out = xmlOutputBufferCreateBuffer(buf, handler); + xmlOutputBufferPtr out = xmlOutputBufferCreateIO(php_new_dom_write_smart_str, NULL, &str, handler); if (EXPECTED(out != NULL)) { status = dom_xml_serialize(ctxt, out, node, format); status |= xmlOutputBufferFlush(out); status |= xmlOutputBufferClose(out); + } else { + xmlCharEncCloseFunc(handler); } (void) xmlSaveClose(ctxt); - xmlCharEncCloseFunc(handler); } if (UNEXPECTED(status < 0)) { - xmlBufferFree(buf); + smart_str_free_ex(&str, false); return NULL; } - const xmlChar *content = xmlBufferContent(buf); - if (!content) { - xmlBufferFree(buf); - return NULL; - } - - int size = xmlBufferLength(buf); - zend_string *res = zend_string_init((const char *) content, size, false); - xmlBufferFree(buf); - return res; + return smart_str_extract(&str); } static zend_string *php_new_dom_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding) diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 858da6922c0..3822deed914 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -1481,43 +1481,38 @@ PHP_LIBXML_API xmlChar *php_libxml_attr_value(const xmlAttr *attr, bool *free) return value; } +static int php_libxml_write_smart_str(void *context, const char *buffer, int len) +{ + smart_str *str = context; + smart_str_appendl(str, buffer, len); + return len; +} + static zend_string *php_libxml_default_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding) { - xmlBufferPtr buf = xmlBufferCreate(); - if (!buf) { - return NULL; - } + smart_str str = {0}; /* Encoding is handled from the encoding property set on the document */ - xmlSaveCtxtPtr ctxt = xmlSaveToBuffer(buf, encoding, options); + xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_libxml_write_smart_str, NULL, &str, encoding, options); if (!ctxt) { - xmlBufferFree(buf); return NULL; } long status = xmlSaveDoc(ctxt, doc); (void) xmlSaveClose(ctxt); if (status < 0) { - xmlBufferFree(buf); + smart_str_free_ex(&str, false); return NULL; } - const xmlChar *content = xmlBufferContent(buf); - if (!content) { - xmlBufferFree(buf); - return NULL; - } - - int size = xmlBufferLength(buf); - zend_string *str = zend_string_init((const char *) content, size, false); - xmlBufferFree(buf); - return str; + return smart_str_extract(&str); } static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding) { - // TODO: should this alloc take an encoding? For now keep it NULL for BC. - xmlOutputBufferPtr buf = xmlAllocOutputBuffer(NULL); + smart_str str = {0}; + // TODO: should this buffer take an encoding? For now keep it NULL for BC. + xmlOutputBufferPtr buf = xmlOutputBufferCreateIO(php_libxml_write_smart_str, NULL, &str, NULL); if (!buf) { return NULL; } @@ -1525,16 +1520,14 @@ static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePt xmlNodeDumpOutput(buf, doc, node, 0, format, encoding); if (xmlOutputBufferFlush(buf) < 0) { + smart_str_free_ex(&str, false); xmlOutputBufferClose(buf); return NULL; } - const xmlChar *content = xmlOutputBufferGetContent(buf); - size_t size = xmlOutputBufferGetSize(buf); - - zend_string *str = zend_string_init((const char *) content, size, false); xmlOutputBufferClose(buf); - return str; + + return smart_str_extract(&str); } static zend_long php_libxml_default_dump_doc_to_file(const char *filename, xmlDocPtr doc, bool format, const char *encoding)