mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
Optimized php_addslashes with SSE4.2 instructions
According to benchmark (https://gist.github.com/laruence/fd0d443d2c5bacca9d8ab99250499956) this brings 30%+ execution time reduced. In the further, this open the door for possible SSE4.2 optimizations in other places.
This commit is contained in:
parent
0e10bed83e
commit
98aa3a65c4
4 changed files with 485 additions and 7 deletions
21
acinclude.m4
21
acinclude.m4
|
@ -3222,9 +3222,30 @@ AC_DEFUN([PHP_CHECK_BUILTIN_SSUBLL_OVERFLOW], [
|
|||
|
||||
])
|
||||
|
||||
dnl PHP_CHECK_BUILTIN_CPU_INIT
|
||||
AC_DEFUN([PHP_CHECK_BUILTIN_CPU_INIT], [
|
||||
AC_MSG_CHECKING([for __builtin_cpu_init])
|
||||
|
||||
AC_TRY_LINK(, [
|
||||
return __builtin_cpu_init()? 1 : 0;
|
||||
], [
|
||||
have_builtin_cpu_init=1
|
||||
AC_MSG_RESULT([yes])
|
||||
], [
|
||||
have_builtin_cpu_init=0
|
||||
AC_MSG_RESULT([no])
|
||||
])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PHP_HAVE_BUILTIN_CPU_INIT],
|
||||
[$have_builtin_cpu_init], [Whether the compiler supports __builtin_cpu_init])
|
||||
|
||||
])
|
||||
|
||||
dnl Load the AX_CHECK_COMPILE_FLAG macro from the autoconf archive.
|
||||
m4_include([build/ax_check_compile_flag.m4])
|
||||
|
||||
m4_include([build/ax_gcc_func_attribute.m4])
|
||||
|
||||
dnl PHP_CHECK_VALGRIND
|
||||
AC_DEFUN([PHP_CHECK_VALGRIND], [
|
||||
AC_MSG_CHECKING([for valgrind])
|
||||
|
|
241
build/ax_gcc_func_attribute.m4
Normal file
241
build/ax_gcc_func_attribute.m4
Normal file
|
@ -0,0 +1,241 @@
|
|||
# ===========================================================================
|
||||
# https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE)
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# This macro checks if the compiler supports one of GCC's function
|
||||
# attributes; many other compilers also provide function attributes with
|
||||
# the same syntax. Compiler warnings are used to detect supported
|
||||
# attributes as unsupported ones are ignored by default so quieting
|
||||
# warnings when using this macro will yield false positives.
|
||||
#
|
||||
# The ATTRIBUTE parameter holds the name of the attribute to be checked.
|
||||
#
|
||||
# If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_<ATTRIBUTE>.
|
||||
#
|
||||
# The macro caches its result in the ax_cv_have_func_attribute_<attribute>
|
||||
# variable.
|
||||
#
|
||||
# The macro currently supports the following function attributes:
|
||||
#
|
||||
# alias
|
||||
# aligned
|
||||
# alloc_size
|
||||
# always_inline
|
||||
# artificial
|
||||
# cold
|
||||
# const
|
||||
# constructor
|
||||
# constructor_priority for constructor attribute with priority
|
||||
# deprecated
|
||||
# destructor
|
||||
# dllexport
|
||||
# dllimport
|
||||
# error
|
||||
# externally_visible
|
||||
# fallthrough
|
||||
# flatten
|
||||
# format
|
||||
# format_arg
|
||||
# gnu_inline
|
||||
# hot
|
||||
# ifunc
|
||||
# leaf
|
||||
# malloc
|
||||
# noclone
|
||||
# noinline
|
||||
# nonnull
|
||||
# noreturn
|
||||
# nothrow
|
||||
# optimize
|
||||
# pure
|
||||
# sentinel
|
||||
# sentinel_position
|
||||
# unused
|
||||
# used
|
||||
# visibility
|
||||
# warning
|
||||
# warn_unused_result
|
||||
# weak
|
||||
# weakref
|
||||
#
|
||||
# Unsupported function attributes will be tested with a prototype
|
||||
# returning an int and not accepting any arguments and the result of the
|
||||
# check might be wrong or meaningless so use with care.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification, are
|
||||
# permitted in any medium without royalty provided the copyright notice
|
||||
# and this notice are preserved. This file is offered as-is, without any
|
||||
# warranty.
|
||||
|
||||
#serial 9
|
||||
|
||||
AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [
|
||||
AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1])
|
||||
|
||||
AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([
|
||||
m4_case([$1],
|
||||
[alias], [
|
||||
int foo( void ) { return 0; }
|
||||
int bar( void ) __attribute__(($1("foo")));
|
||||
],
|
||||
[aligned], [
|
||||
int foo( void ) __attribute__(($1(32)));
|
||||
],
|
||||
[alloc_size], [
|
||||
void *foo(int a) __attribute__(($1(1)));
|
||||
],
|
||||
[always_inline], [
|
||||
inline __attribute__(($1)) int foo( void ) { return 0; }
|
||||
],
|
||||
[artificial], [
|
||||
inline __attribute__(($1)) int foo( void ) { return 0; }
|
||||
],
|
||||
[cold], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[const], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[constructor_priority], [
|
||||
int foo( void ) __attribute__((__constructor__(65535/2)));
|
||||
],
|
||||
[constructor], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[deprecated], [
|
||||
int foo( void ) __attribute__(($1("")));
|
||||
],
|
||||
[destructor], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[dllexport], [
|
||||
__attribute__(($1)) int foo( void ) { return 0; }
|
||||
],
|
||||
[dllimport], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[error], [
|
||||
int foo( void ) __attribute__(($1("")));
|
||||
],
|
||||
[externally_visible], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[fallthrough], [
|
||||
int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }};
|
||||
],
|
||||
[flatten], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[format], [
|
||||
int foo(const char *p, ...) __attribute__(($1(printf, 1, 2)));
|
||||
],
|
||||
[format_arg], [
|
||||
char *foo(const char *p) __attribute__(($1(1)));
|
||||
],
|
||||
[gnu_inline], [
|
||||
inline __attribute__(($1)) int foo( void ) { return 0; }
|
||||
],
|
||||
[hot], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[ifunc], [
|
||||
int my_foo( void ) { return 0; }
|
||||
static int (*resolve_foo(void))(void) { return my_foo; }
|
||||
int foo( void ) __attribute__(($1("resolve_foo")));
|
||||
],
|
||||
[leaf], [
|
||||
__attribute__(($1)) int foo( void ) { return 0; }
|
||||
],
|
||||
[malloc], [
|
||||
void *foo( void ) __attribute__(($1));
|
||||
],
|
||||
[noclone], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[noinline], [
|
||||
__attribute__(($1)) int foo( void ) { return 0; }
|
||||
],
|
||||
[nonnull], [
|
||||
int foo(char *p) __attribute__(($1(1)));
|
||||
],
|
||||
[noreturn], [
|
||||
void foo( void ) __attribute__(($1));
|
||||
],
|
||||
[nothrow], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[optimize], [
|
||||
__attribute__(($1(3))) int foo( void ) { return 0; }
|
||||
],
|
||||
[pure], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[sentinel], [
|
||||
int foo(void *p, ...) __attribute__(($1));
|
||||
],
|
||||
[sentinel_position], [
|
||||
int foo(void *p, ...) __attribute__(($1(1)));
|
||||
],
|
||||
[returns_nonnull], [
|
||||
void *foo( void ) __attribute__(($1));
|
||||
],
|
||||
[unused], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[used], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[visibility], [
|
||||
int foo_def( void ) __attribute__(($1("default")));
|
||||
int foo_hid( void ) __attribute__(($1("hidden")));
|
||||
int foo_int( void ) __attribute__(($1("internal")));
|
||||
int foo_pro( void ) __attribute__(($1("protected")));
|
||||
],
|
||||
[warning], [
|
||||
int foo( void ) __attribute__(($1("")));
|
||||
],
|
||||
[warn_unused_result], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[weak], [
|
||||
int foo( void ) __attribute__(($1));
|
||||
],
|
||||
[weakref], [
|
||||
static int foo( void ) { return 0; }
|
||||
static int bar( void ) __attribute__(($1("foo")));
|
||||
],
|
||||
[target], [
|
||||
static int bar( void ) __attribute__(($1("sse2")));
|
||||
],
|
||||
[
|
||||
m4_warn([syntax], [Unsupported attribute $1, the test may fail])
|
||||
int foo( void ) __attribute__(($1));
|
||||
]
|
||||
)], [])
|
||||
],
|
||||
dnl GCC doesn't exit with an error if an unknown attribute is
|
||||
dnl provided but only outputs a warning, so accept the attribute
|
||||
dnl only if no warning were issued.
|
||||
[AS_IF([test -s conftest.err],
|
||||
[AS_VAR_SET([ac_var], [no])],
|
||||
[AS_VAR_SET([ac_var], [yes])])],
|
||||
[AS_VAR_SET([ac_var], [no])])
|
||||
])
|
||||
|
||||
AS_IF([test yes = AS_VAR_GET([ac_var])],
|
||||
[AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1,
|
||||
[Define to 1 if the system has the `$1' function attribute])], [])
|
||||
|
||||
AS_VAR_POPDEF([ac_var])
|
||||
])
|
12
configure.ac
12
configure.ac
|
@ -275,8 +275,7 @@ esac
|
|||
|
||||
dnl Mark symbols hidden by default if the compiler (for example, gcc >= 4)
|
||||
dnl supports it. This can help reduce the binary size and startup time.
|
||||
AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],
|
||||
[CFLAGS="$CFLAGS -fvisibility=hidden"])
|
||||
dnl AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [CFLAGS="$CFLAGS -fvisibility=hidden"])
|
||||
|
||||
case $host_alias in
|
||||
*solaris*)
|
||||
|
@ -495,7 +494,8 @@ utime.h \
|
|||
sys/utsname.h \
|
||||
sys/ipc.h \
|
||||
dlfcn.h \
|
||||
assert.h
|
||||
assert.h \
|
||||
nmmintrin.h
|
||||
],[],[],[
|
||||
#ifdef HAVE_SYS_PARAM_H
|
||||
#include <sys/param.h>
|
||||
|
@ -565,6 +565,8 @@ dnl Check __builtin_ssubl_overflow
|
|||
PHP_CHECK_BUILTIN_SSUBL_OVERFLOW
|
||||
dnl Check __builtin_ssubll_overflow
|
||||
PHP_CHECK_BUILTIN_SSUBLL_OVERFLOW
|
||||
dnl Check __builtin_cpu_init
|
||||
PHP_CHECK_BUILTIN_CPU_INIT
|
||||
|
||||
dnl Check for members of the stat structure
|
||||
AC_STRUCT_ST_BLKSIZE
|
||||
|
@ -585,6 +587,10 @@ AC_TYPE_UID_T
|
|||
dnl Checks for sockaddr_storage and sockaddr.sa_len
|
||||
PHP_SOCKADDR_CHECKS
|
||||
|
||||
AC_MSG_CHECKING([checking building environment])
|
||||
AX_GCC_FUNC_ATTRIBUTE([ifunc])
|
||||
AX_GCC_FUNC_ATTRIBUTE([target])
|
||||
|
||||
dnl Check for IPv6 support
|
||||
AC_CACHE_CHECK([for IPv6 support], ac_cv_ipv6_support,
|
||||
[AC_TRY_LINK([ #include <sys/types.h>
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#ifdef HAVE_MONETARY_H
|
||||
# include <monetary.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This define is here because some versions of libintl redefine setlocale
|
||||
* to point to libintl_setlocale. That's a ridiculous thing to do as far
|
||||
|
@ -3863,9 +3864,218 @@ PHPAPI zend_string *php_addcslashes(zend_string *str, int should_free, char *wha
|
|||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_addslashes
|
||||
/* {{{ php_addslashes */
|
||||
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && HAVE_FUNC_ATTRIBUTE_IFUNC && HAVE_FUNC_ATTRIBUTE_TARGET && HAVE_NMMINTRIN_H
|
||||
|
||||
#include <nmmintrin.h>
|
||||
#include "Zend/zend_bitset.h"
|
||||
|
||||
PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) __attribute__((ifunc("resolve_addslashes")));
|
||||
|
||||
zend_string *php_addslashes_sse4(zend_string *str, int should_free) __attribute__((target("sse4.2")));
|
||||
zend_string *php_addslashes_default(zend_string *str, int should_free);
|
||||
|
||||
/* {{{ resolve_addslashes */
|
||||
static void *resolve_addslashes() {
|
||||
#if PHP_HAVE_BUILTIN_CPU_INIT
|
||||
__builtin_cpu_init();
|
||||
if (__builtin_cpu_supports("sse4.2")) {
|
||||
return php_addslashes_sse4;
|
||||
}
|
||||
#endif
|
||||
return php_addslashes_default;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_addslashes_sse4
|
||||
*/
|
||||
PHPAPI zend_string *php_addslashes(zend_string *str, int should_free)
|
||||
zend_string *php_addslashes_sse4(zend_string *str, int should_free)
|
||||
{
|
||||
SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
|
||||
__m128i w128, s128;
|
||||
uint32_t res = 0;
|
||||
/* maximum string length, worst case situation */
|
||||
char *source, *target;
|
||||
char *end;
|
||||
size_t offset;
|
||||
zend_string *new_str;
|
||||
|
||||
if (!str) {
|
||||
return ZSTR_EMPTY_ALLOC();
|
||||
}
|
||||
|
||||
source = ZSTR_VAL(str);
|
||||
end = source + ZSTR_LEN(str);
|
||||
|
||||
if (ZSTR_LEN(str) > 15) {
|
||||
char *aligned = (char*)(((zend_uintptr_t)source + 15) & ~15);
|
||||
|
||||
if (UNEXPECTED(source != aligned)) {
|
||||
do {
|
||||
switch (*source) {
|
||||
case '\0':
|
||||
case '\'':
|
||||
case '\"':
|
||||
case '\\':
|
||||
goto do_escape;
|
||||
default:
|
||||
source++;
|
||||
break;
|
||||
}
|
||||
} while (source < aligned);
|
||||
}
|
||||
|
||||
w128 = _mm_load_si128((__m128i *)slashchars);
|
||||
for (;end - source > 15; source += 16) {
|
||||
s128 = _mm_load_si128((__m128i *)source);
|
||||
res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
|
||||
if (res) {
|
||||
goto do_escape;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (source < end) {
|
||||
switch (*source) {
|
||||
case '\0':
|
||||
case '\'':
|
||||
case '\"':
|
||||
case '\\':
|
||||
goto do_escape;
|
||||
default:
|
||||
source++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!should_free) {
|
||||
return zend_string_copy(str);
|
||||
}
|
||||
|
||||
return str;
|
||||
|
||||
do_escape:
|
||||
offset = source - (char *)ZSTR_VAL(str);
|
||||
new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
|
||||
memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
|
||||
target = ZSTR_VAL(new_str) + offset;
|
||||
|
||||
if (res) {
|
||||
int pos = 0;
|
||||
do {
|
||||
int i, n = zend_ulong_ntz(res);
|
||||
for (i = 0; i < n; i++) {
|
||||
*target++ = source[pos + i];
|
||||
}
|
||||
pos += n;
|
||||
*target++ = '\\';
|
||||
if (source[pos] == '\0') {
|
||||
*target++ = '0';
|
||||
} else {
|
||||
*target++ = source[pos];
|
||||
}
|
||||
pos++;
|
||||
res = res >> (n + 1);
|
||||
} while (res);
|
||||
|
||||
for (; pos < 16; pos++) {
|
||||
*target++ = source[pos];
|
||||
}
|
||||
source += 16;
|
||||
} else if (end - source > 15) {
|
||||
char *aligned = (char*)(((zend_uintptr_t)source + 15) & ~15);
|
||||
|
||||
if (source != aligned) {
|
||||
do {
|
||||
switch (*source) {
|
||||
case '\0':
|
||||
*target++ = '\\';
|
||||
*target++ = '0';
|
||||
break;
|
||||
case '\'':
|
||||
case '\"':
|
||||
case '\\':
|
||||
*target++ = '\\';
|
||||
/* break is missing *intentionally* */
|
||||
default:
|
||||
*target++ = *source;
|
||||
break;
|
||||
}
|
||||
source++;
|
||||
} while (source < aligned);
|
||||
}
|
||||
|
||||
w128 = _mm_load_si128((__m128i *)slashchars);
|
||||
}
|
||||
|
||||
for (; end - source > 15; source += 16) {
|
||||
int pos = 0;
|
||||
s128 = _mm_load_si128((__m128i *)source);
|
||||
res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
|
||||
if (res) {
|
||||
do {
|
||||
int i, n = zend_ulong_ntz(res);
|
||||
for (i = 0; i < n; i++) {
|
||||
*target++ = source[pos + i];
|
||||
}
|
||||
pos += n;
|
||||
*target++ = '\\';
|
||||
if (source[pos] == '\0') {
|
||||
*target++ = '0';
|
||||
} else {
|
||||
*target++ = source[pos];
|
||||
}
|
||||
pos++;
|
||||
res = res >> (n + 1);
|
||||
} while (res);
|
||||
for (; pos < 16; pos++) {
|
||||
*target++ = source[pos];
|
||||
}
|
||||
} else {
|
||||
_mm_storeu_si128((__m128i*)target, s128);
|
||||
target += 16;
|
||||
}
|
||||
}
|
||||
|
||||
while (source < end) {
|
||||
switch (*source) {
|
||||
case '\0':
|
||||
*target++ = '\\';
|
||||
*target++ = '0';
|
||||
break;
|
||||
case '\'':
|
||||
case '\"':
|
||||
case '\\':
|
||||
*target++ = '\\';
|
||||
/* break is missing *intentionally* */
|
||||
default:
|
||||
*target++ = *source;
|
||||
break;
|
||||
}
|
||||
source++;
|
||||
}
|
||||
|
||||
*target = '\0';
|
||||
if (should_free) {
|
||||
zend_string_release(str);
|
||||
}
|
||||
|
||||
if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
|
||||
new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
|
||||
} else {
|
||||
ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
|
||||
}
|
||||
|
||||
return new_str;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_addslashes_default
|
||||
*/
|
||||
zend_string *php_addslashes_default(zend_string *str, int should_free)
|
||||
#else
|
||||
zend_string *php_addslashes(zend_string *str, int should_free)
|
||||
#endif
|
||||
{
|
||||
/* maximum string length, worst case situation */
|
||||
char *source, *target;
|
||||
|
@ -3920,11 +4130,10 @@ do_escape:
|
|||
*target++ = *source;
|
||||
break;
|
||||
}
|
||||
|
||||
source++;
|
||||
}
|
||||
|
||||
*target = 0;
|
||||
*target = '\0';
|
||||
if (should_free) {
|
||||
zend_string_release(str);
|
||||
}
|
||||
|
@ -3938,6 +4147,7 @@ do_escape:
|
|||
return new_str;
|
||||
}
|
||||
/* }}} */
|
||||
/* }}} */
|
||||
|
||||
#define _HEB_BLOCK_TYPE_ENG 1
|
||||
#define _HEB_BLOCK_TYPE_HEB 2
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue