Use strcspn() to optimize dom_html5_escape_string() (#12948)

* Use strcspn() to optimize dom_html5_escape_string()

This routine implemented by libc uses a faster algorithm than the old
naive byte-per-byte approach here. It also is often optimized using
SIMD.

* Calculate mask outside of loop
This commit is contained in:
Niels Dossche 2023-12-16 12:47:57 +00:00 committed by GitHub
parent 82baeeb196
commit 0870da3364
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -70,7 +70,17 @@ static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, con
{ {
const char *last_output = content; const char *last_output = content;
while (*content != '\0') { /* Note: uses UTF-8 internally, so <C2 A0> indicates a non-breaking space */
const char *mask = attribute_mode ? "&\xC2\"" : "&\xC2<>";
while (true) {
size_t chunk_length = strcspn(content, mask);
content += chunk_length;
if (*content == '\0') {
break;
}
switch (*content) { switch (*content) {
/* Step 1 */ /* Step 1 */
case '&': { case '&': {
@ -93,29 +103,23 @@ static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, con
/* Step 3 */ /* Step 3 */
case '"': { case '"': {
if (attribute_mode) { TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); TRY(ctx->write_string_len(ctx->application_data, "&quot;", strlen("&quot;")));
TRY(ctx->write_string_len(ctx->application_data, "&quot;", strlen("&quot;"))); last_output = content + 1;
last_output = content + 1;
}
break; break;
} }
/* Step 4 */ /* Step 4 */
case '<': { case '<': {
if (!attribute_mode) { TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); TRY(ctx->write_string_len(ctx->application_data, "&lt;", strlen("&lt;")));
TRY(ctx->write_string_len(ctx->application_data, "&lt;", strlen("&lt;"))); last_output = content + 1;
last_output = content + 1;
}
break; break;
} }
case '>': { case '>': {
if (!attribute_mode) { TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output)); TRY(ctx->write_string_len(ctx->application_data, "&gt;", strlen("&gt;")));
TRY(ctx->write_string_len(ctx->application_data, "&gt;", strlen("&gt;"))); last_output = content + 1;
last_output = content + 1;
}
break; break;
} }
} }