Optimize in-memory XMLWriter

We're currently using a libxml buffer, which requires copying the buffer
to zend_strings every time we want to output the string. Furthermore,
its use of the system allocator instead of ZendMM makes it not count
towards the memory_limit and hinders performance.

This patch adds a custom writer such that the strings are written to a
smart_str instance, using ZendMM for improved performance, and giving
the ability to not copy the string in the common case where flush has
empty set to true.

Closes GH-16120.
This commit is contained in:
Niels Dossche 2024-09-29 12:35:36 +02:00
parent 63e0b9ccbf
commit f5e81fe182
No known key found for this signature in database
GPG key ID: B8A8AD166DF0E2E5
5 changed files with 85 additions and 30 deletions

3
NEWS
View file

@ -19,4 +19,7 @@ PHP NEWS
. Fixed bug #49169 (SoapServer calls wrong function, although "SOAP action" . Fixed bug #49169 (SoapServer calls wrong function, although "SOAP action"
header is correct). (nielsdos) header is correct). (nielsdos)
- XMLWriter:
. Improved performance and reduce memory consumption. (nielsdos)
<<< NOTE: Insert NEWS from last stable release here prior to actual release! >>> <<< NOTE: Insert NEWS from last stable release here prior to actual release! >>>

View file

@ -95,3 +95,6 @@ PHP 8.5 UPGRADE NOTES
======================================== ========================================
14. Performance Improvements 14. Performance Improvements
======================================== ========================================
- XMLWriter:
. Improved performance and reduce memory consumption.

View file

@ -24,6 +24,7 @@
#include "ext/standard/info.h" #include "ext/standard/info.h"
#include "php_xmlwriter.h" #include "php_xmlwriter.h"
#include "php_xmlwriter_arginfo.h" #include "php_xmlwriter_arginfo.h"
#include "zend_smart_str.h"
static zend_class_entry *xmlwriter_class_entry_ce; static zend_class_entry *xmlwriter_class_entry_ce;
@ -47,11 +48,9 @@ static zend_object_handlers xmlwriter_object_handlers;
static zend_always_inline void xmlwriter_destroy_libxml_objects(ze_xmlwriter_object *intern) static zend_always_inline void xmlwriter_destroy_libxml_objects(ze_xmlwriter_object *intern)
{ {
if (intern->ptr) { if (intern->ptr) {
/* Note: this call will also free the output pointer. */
xmlFreeTextWriter(intern->ptr); xmlFreeTextWriter(intern->ptr);
intern->ptr = NULL; intern->ptr = NULL;
}
if (intern->output) {
xmlBufferFree(intern->output);
intern->output = NULL; intern->output = NULL;
} }
} }
@ -178,14 +177,14 @@ static char *_xmlwriter_get_valid_file_path(char *source, char *resolved_path, i
} }
/* }}} */ /* }}} */
static void xml_writer_create_static(INTERNAL_FUNCTION_PARAMETERS, xmlTextWriterPtr writer, xmlBufferPtr output) static void xml_writer_create_static(INTERNAL_FUNCTION_PARAMETERS, xmlTextWriterPtr writer, smart_str *output)
{ {
if (object_init_with_constructor(return_value, Z_CE_P(ZEND_THIS), 0, NULL, NULL) == SUCCESS) { if (object_init_with_constructor(return_value, Z_CE_P(ZEND_THIS), 0, NULL, NULL) == SUCCESS) {
ze_xmlwriter_object *intern = Z_XMLWRITER_P(return_value); ze_xmlwriter_object *intern = Z_XMLWRITER_P(return_value);
intern->ptr = writer; intern->ptr = writer;
intern->output = output; intern->output = output;
} else { } else {
xmlBufferFree(output); // output is freed by writer, so we don't need to free it here.
xmlFreeTextWriter(writer); xmlFreeTextWriter(writer);
} }
} }
@ -877,11 +876,45 @@ PHP_METHOD(XMLWriter, toUri)
xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, NULL); xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, NULL);
} }
static int xml_writer_stream_write_memory(void *context, const char *buffer, int len)
{
smart_str *output = context;
smart_str_appendl(output, buffer, len);
return len;
}
static int xml_writer_stream_close_memory(void *context)
{
smart_str *output = context;
smart_str_free_ex(output, false);
efree(output);
return 0;
}
static xmlTextWriterPtr xml_writer_create_in_memory(smart_str **output_ptr)
{
smart_str *output = emalloc(sizeof(*output));
memset(output, 0, sizeof(*output));
xmlOutputBufferPtr output_buffer = xmlOutputBufferCreateIO(xml_writer_stream_write_memory, xml_writer_stream_close_memory, output, NULL);
if (output_buffer == NULL) {
efree(output);
return NULL;
}
xmlTextWriterPtr writer = xmlNewTextWriter(output_buffer);
if (!writer) {
/* This call will free output too. */
xmlOutputBufferClose(output_buffer);
return NULL;
}
*output_ptr = output;
return writer;
}
/* {{{ Create new xmlwriter using memory for string output */ /* {{{ Create new xmlwriter using memory for string output */
PHP_FUNCTION(xmlwriter_open_memory) PHP_FUNCTION(xmlwriter_open_memory)
{ {
xmlTextWriterPtr ptr;
xmlBufferPtr buffer;
zval *self = getThis(); zval *self = getThis();
ze_xmlwriter_object *ze_obj = NULL; ze_xmlwriter_object *ze_obj = NULL;
@ -894,28 +927,21 @@ PHP_FUNCTION(xmlwriter_open_memory)
ze_obj = Z_XMLWRITER_P(self); ze_obj = Z_XMLWRITER_P(self);
} }
buffer = xmlBufferCreate(); smart_str *output;
xmlTextWriterPtr ptr = xml_writer_create_in_memory(&output);
if (buffer == NULL) {
php_error_docref(NULL, E_WARNING, "Unable to create output buffer");
RETURN_FALSE;
}
ptr = xmlNewTextWriterMemory(buffer, 0);
if (! ptr) { if (! ptr) {
xmlBufferFree(buffer);
RETURN_FALSE; RETURN_FALSE;
} }
if (self) { if (self) {
xmlwriter_destroy_libxml_objects(ze_obj); xmlwriter_destroy_libxml_objects(ze_obj);
ze_obj->ptr = ptr; ze_obj->ptr = ptr;
ze_obj->output = buffer; ze_obj->output = output;
RETURN_TRUE; RETURN_TRUE;
} else { } else {
ze_obj = php_xmlwriter_fetch_object(xmlwriter_object_new(xmlwriter_class_entry_ce)); ze_obj = php_xmlwriter_fetch_object(xmlwriter_object_new(xmlwriter_class_entry_ce));
ze_obj->ptr = ptr; ze_obj->ptr = ptr;
ze_obj->output = buffer; ze_obj->output = output;
RETURN_OBJ(&ze_obj->std); RETURN_OBJ(&ze_obj->std);
} }
@ -926,17 +952,16 @@ PHP_METHOD(XMLWriter, toMemory)
{ {
ZEND_PARSE_PARAMETERS_NONE(); ZEND_PARSE_PARAMETERS_NONE();
xmlBufferPtr buffer = xmlBufferCreate(); smart_str *output;
xmlTextWriterPtr writer = xmlNewTextWriterMemory(buffer, 0); xmlTextWriterPtr writer = xml_writer_create_in_memory(&output);
/* No need for an explicit buffer check as this will fail on a NULL buffer. */ /* No need for an explicit buffer check as this will fail on a NULL buffer. */
if (!writer) { if (!writer) {
xmlBufferFree(buffer);
zend_throw_error(NULL, "Could not construct libxml writer"); zend_throw_error(NULL, "Could not construct libxml writer");
RETURN_THROWS(); RETURN_THROWS();
} }
xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, buffer); xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, output);
} }
static int xml_writer_stream_write(void *context, const char *buffer, int len) static int xml_writer_stream_write(void *context, const char *buffer, int len)
@ -992,7 +1017,6 @@ PHP_METHOD(XMLWriter, toStream)
/* {{{ php_xmlwriter_flush */ /* {{{ php_xmlwriter_flush */
static void php_xmlwriter_flush(INTERNAL_FUNCTION_PARAMETERS, int force_string) { static void php_xmlwriter_flush(INTERNAL_FUNCTION_PARAMETERS, int force_string) {
xmlTextWriterPtr ptr; xmlTextWriterPtr ptr;
xmlBufferPtr buffer;
bool empty = 1; bool empty = 1;
int output_bytes; int output_bytes;
zval *self; zval *self;
@ -1002,16 +1026,18 @@ static void php_xmlwriter_flush(INTERNAL_FUNCTION_PARAMETERS, int force_string)
} }
XMLWRITER_FROM_OBJECT(ptr, self); XMLWRITER_FROM_OBJECT(ptr, self);
buffer = Z_XMLWRITER_P(self)->output; smart_str *output = Z_XMLWRITER_P(self)->output;
if (force_string == 1 && buffer == NULL) { if (force_string == 1 && output == NULL) {
RETURN_EMPTY_STRING(); RETURN_EMPTY_STRING();
} }
output_bytes = xmlTextWriterFlush(ptr); output_bytes = xmlTextWriterFlush(ptr);
if (buffer) { if (output) {
const xmlChar *content = xmlBufferContent(buffer);
RETVAL_STRING((const char *) content);
if (empty) { if (empty) {
xmlBufferEmpty(buffer); RETURN_STR(smart_str_extract(output));
} else if (smart_str_get_len(output) > 0) {
RETURN_NEW_STR(zend_string_dup(output->s, false));
} else {
RETURN_EMPTY_STRING();
} }
} else { } else {
RETVAL_LONG(output_bytes); RETVAL_LONG(output_bytes);

View file

@ -35,7 +35,7 @@ extern zend_module_entry xmlwriter_module_entry;
/* Extends zend object */ /* Extends zend object */
typedef struct _ze_xmlwriter_object { typedef struct _ze_xmlwriter_object {
xmlTextWriterPtr ptr; xmlTextWriterPtr ptr;
xmlBufferPtr output; smart_str *output;
zend_object std; zend_object std;
} ze_xmlwriter_object; } ze_xmlwriter_object;

View file

@ -0,0 +1,23 @@
--TEST--
XMLWriter::toMemory() with combinations of empty flush and non-empty flush
--EXTENSIONS--
xmlwriter
--FILE--
<?php
$writer = XMLWriter::toMemory();
var_dump($writer->flush(empty: false));
$writer->startElement('foo');
var_dump($writer->flush(empty: false));
$writer->endElement();
var_dump($writer->flush(empty: false));
var_dump($writer->flush());
var_dump($writer->flush());
?>
--EXPECT--
string(0) ""
string(4) "<foo"
string(6) "<foo/>"
string(6) "<foo/>"
string(0) ""