From 0870da33648188edcec1d92820257446dd8bc69c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 16 Dec 2023 12:47:57 +0000 Subject: [PATCH] Use strcspn() to optimize dom_html5_escape_string() (#12948) * Use strcspn() to optimize dom_html5_escape_string() This routine implemented by libc uses a faster algorithm than the old naive byte-per-byte approach here. It also is often optimized using SIMD. * Calculate mask outside of loop --- ext/dom/html5_serializer.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/ext/dom/html5_serializer.c b/ext/dom/html5_serializer.c index f0d43f09afb..f61254257ab 100644 --- a/ext/dom/html5_serializer.c +++ b/ext/dom/html5_serializer.c @@ -70,7 +70,17 @@ static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, con { const char *last_output = content; - while (*content != '\0') { + /* Note: uses UTF-8 internally, so indicates a non-breaking space */ + const char *mask = attribute_mode ? "&\xC2\"" : "&\xC2<>"; + + while (true) { + size_t chunk_length = strcspn(content, mask); + + content += chunk_length; + if (*content == '\0') { + break; + } + switch (*content) { /* Step 1 */ case '&': { @@ -93,29 +103,23 @@ static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, con /* Step 3 */ case '"': { - if (attribute_mode) { - TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); - TRY(ctx->write_string_len(ctx->application_data, """, strlen("""))); - last_output = content + 1; - } + TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); + TRY(ctx->write_string_len(ctx->application_data, """, strlen("""))); + last_output = content + 1; break; } /* Step 4 */ case '<': { - if (!attribute_mode) { - TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); - TRY(ctx->write_string_len(ctx->application_data, "<", strlen("<"))); - last_output = content + 1; - } + TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); + TRY(ctx->write_string_len(ctx->application_data, "<", strlen("<"))); + last_output = content + 1; break; } case '>': { - if (!attribute_mode) { - TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); - TRY(ctx->write_string_len(ctx->application_data, ">", strlen(">"))); - last_output = content + 1; - } + TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); + TRY(ctx->write_string_len(ctx->application_data, ">", strlen(">"))); + last_output = content + 1; break; } }