Optimized php_addslashes with SSE4.2 instructions

According to benchmark
(https://gist.github.com/laruence/fd0d443d2c5bacca9d8ab99250499956) this brings 30%+ execution time reduced.

In the further, this open the door for possible SSE4.2 optimizations in
other places.
This commit is contained in:
Xinchen Hui 2018-01-15 21:01:27 +08:00
parent 0e10bed83e
commit 98aa3a65c4
4 changed files with 485 additions and 7 deletions

View file

@ -3222,9 +3222,30 @@ AC_DEFUN([PHP_CHECK_BUILTIN_SSUBLL_OVERFLOW], [
])
dnl PHP_CHECK_BUILTIN_CPU_INIT
AC_DEFUN([PHP_CHECK_BUILTIN_CPU_INIT], [
AC_MSG_CHECKING([for __builtin_cpu_init])
AC_TRY_LINK(, [
return __builtin_cpu_init()? 1 : 0;
], [
have_builtin_cpu_init=1
AC_MSG_RESULT([yes])
], [
have_builtin_cpu_init=0
AC_MSG_RESULT([no])
])
AC_DEFINE_UNQUOTED([PHP_HAVE_BUILTIN_CPU_INIT],
[$have_builtin_cpu_init], [Whether the compiler supports __builtin_cpu_init])
])
dnl Load the AX_CHECK_COMPILE_FLAG macro from the autoconf archive.
m4_include([build/ax_check_compile_flag.m4])
m4_include([build/ax_gcc_func_attribute.m4])
dnl PHP_CHECK_VALGRIND
AC_DEFUN([PHP_CHECK_VALGRIND], [
AC_MSG_CHECKING([for valgrind])

View file

@ -0,0 +1,241 @@
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE)
#
# DESCRIPTION
#
# This macro checks if the compiler supports one of GCC's function
# attributes; many other compilers also provide function attributes with
# the same syntax. Compiler warnings are used to detect supported
# attributes as unsupported ones are ignored by default so quieting
# warnings when using this macro will yield false positives.
#
# The ATTRIBUTE parameter holds the name of the attribute to be checked.
#
# If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_<ATTRIBUTE>.
#
# The macro caches its result in the ax_cv_have_func_attribute_<attribute>
# variable.
#
# The macro currently supports the following function attributes:
#
# alias
# aligned
# alloc_size
# always_inline
# artificial
# cold
# const
# constructor
# constructor_priority for constructor attribute with priority
# deprecated
# destructor
# dllexport
# dllimport
# error
# externally_visible
# fallthrough
# flatten
# format
# format_arg
# gnu_inline
# hot
# ifunc
# leaf
# malloc
# noclone
# noinline
# nonnull
# noreturn
# nothrow
# optimize
# pure
# sentinel
# sentinel_position
# unused
# used
# visibility
# warning
# warn_unused_result
# weak
# weakref
#
# Unsupported function attributes will be tested with a prototype
# returning an int and not accepting any arguments and the result of the
# check might be wrong or meaningless so use with care.
#
# LICENSE
#
# Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 9
AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [
AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1])
AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [
AC_LINK_IFELSE([AC_LANG_PROGRAM([
m4_case([$1],
[alias], [
int foo( void ) { return 0; }
int bar( void ) __attribute__(($1("foo")));
],
[aligned], [
int foo( void ) __attribute__(($1(32)));
],
[alloc_size], [
void *foo(int a) __attribute__(($1(1)));
],
[always_inline], [
inline __attribute__(($1)) int foo( void ) { return 0; }
],
[artificial], [
inline __attribute__(($1)) int foo( void ) { return 0; }
],
[cold], [
int foo( void ) __attribute__(($1));
],
[const], [
int foo( void ) __attribute__(($1));
],
[constructor_priority], [
int foo( void ) __attribute__((__constructor__(65535/2)));
],
[constructor], [
int foo( void ) __attribute__(($1));
],
[deprecated], [
int foo( void ) __attribute__(($1("")));
],
[destructor], [
int foo( void ) __attribute__(($1));
],
[dllexport], [
__attribute__(($1)) int foo( void ) { return 0; }
],
[dllimport], [
int foo( void ) __attribute__(($1));
],
[error], [
int foo( void ) __attribute__(($1("")));
],
[externally_visible], [
int foo( void ) __attribute__(($1));
],
[fallthrough], [
int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }};
],
[flatten], [
int foo( void ) __attribute__(($1));
],
[format], [
int foo(const char *p, ...) __attribute__(($1(printf, 1, 2)));
],
[format_arg], [
char *foo(const char *p) __attribute__(($1(1)));
],
[gnu_inline], [
inline __attribute__(($1)) int foo( void ) { return 0; }
],
[hot], [
int foo( void ) __attribute__(($1));
],
[ifunc], [
int my_foo( void ) { return 0; }
static int (*resolve_foo(void))(void) { return my_foo; }
int foo( void ) __attribute__(($1("resolve_foo")));
],
[leaf], [
__attribute__(($1)) int foo( void ) { return 0; }
],
[malloc], [
void *foo( void ) __attribute__(($1));
],
[noclone], [
int foo( void ) __attribute__(($1));
],
[noinline], [
__attribute__(($1)) int foo( void ) { return 0; }
],
[nonnull], [
int foo(char *p) __attribute__(($1(1)));
],
[noreturn], [
void foo( void ) __attribute__(($1));
],
[nothrow], [
int foo( void ) __attribute__(($1));
],
[optimize], [
__attribute__(($1(3))) int foo( void ) { return 0; }
],
[pure], [
int foo( void ) __attribute__(($1));
],
[sentinel], [
int foo(void *p, ...) __attribute__(($1));
],
[sentinel_position], [
int foo(void *p, ...) __attribute__(($1(1)));
],
[returns_nonnull], [
void *foo( void ) __attribute__(($1));
],
[unused], [
int foo( void ) __attribute__(($1));
],
[used], [
int foo( void ) __attribute__(($1));
],
[visibility], [
int foo_def( void ) __attribute__(($1("default")));
int foo_hid( void ) __attribute__(($1("hidden")));
int foo_int( void ) __attribute__(($1("internal")));
int foo_pro( void ) __attribute__(($1("protected")));
],
[warning], [
int foo( void ) __attribute__(($1("")));
],
[warn_unused_result], [
int foo( void ) __attribute__(($1));
],
[weak], [
int foo( void ) __attribute__(($1));
],
[weakref], [
static int foo( void ) { return 0; }
static int bar( void ) __attribute__(($1("foo")));
],
[target], [
static int bar( void ) __attribute__(($1("sse2")));
],
[
m4_warn([syntax], [Unsupported attribute $1, the test may fail])
int foo( void ) __attribute__(($1));
]
)], [])
],
dnl GCC doesn't exit with an error if an unknown attribute is
dnl provided but only outputs a warning, so accept the attribute
dnl only if no warning were issued.
[AS_IF([test -s conftest.err],
[AS_VAR_SET([ac_var], [no])],
[AS_VAR_SET([ac_var], [yes])])],
[AS_VAR_SET([ac_var], [no])])
])
AS_IF([test yes = AS_VAR_GET([ac_var])],
[AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1,
[Define to 1 if the system has the `$1' function attribute])], [])
AS_VAR_POPDEF([ac_var])
])

View file

@ -275,8 +275,7 @@ esac
dnl Mark symbols hidden by default if the compiler (for example, gcc >= 4)
dnl supports it. This can help reduce the binary size and startup time.
AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],
[CFLAGS="$CFLAGS -fvisibility=hidden"])
dnl AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [CFLAGS="$CFLAGS -fvisibility=hidden"])
case $host_alias in
*solaris*)
@ -495,7 +494,8 @@ utime.h \
sys/utsname.h \
sys/ipc.h \
dlfcn.h \
assert.h
assert.h \
nmmintrin.h
],[],[],[
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
@ -565,6 +565,8 @@ dnl Check __builtin_ssubl_overflow
PHP_CHECK_BUILTIN_SSUBL_OVERFLOW
dnl Check __builtin_ssubll_overflow
PHP_CHECK_BUILTIN_SSUBLL_OVERFLOW
dnl Check __builtin_cpu_init
PHP_CHECK_BUILTIN_CPU_INIT
dnl Check for members of the stat structure
AC_STRUCT_ST_BLKSIZE
@ -585,6 +587,10 @@ AC_TYPE_UID_T
dnl Checks for sockaddr_storage and sockaddr.sa_len
PHP_SOCKADDR_CHECKS
AC_MSG_CHECKING([checking building environment])
AX_GCC_FUNC_ATTRIBUTE([ifunc])
AX_GCC_FUNC_ATTRIBUTE([target])
dnl Check for IPv6 support
AC_CACHE_CHECK([for IPv6 support], ac_cv_ipv6_support,
[AC_TRY_LINK([ #include <sys/types.h>

View file

@ -34,6 +34,7 @@
#ifdef HAVE_MONETARY_H
# include <monetary.h>
#endif
/*
* This define is here because some versions of libintl redefine setlocale
* to point to libintl_setlocale. That's a ridiculous thing to do as far
@ -3863,9 +3864,218 @@ PHPAPI zend_string *php_addcslashes(zend_string *str, int should_free, char *wha
}
/* }}} */
/* {{{ php_addslashes
/* {{{ php_addslashes */
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && HAVE_FUNC_ATTRIBUTE_IFUNC && HAVE_FUNC_ATTRIBUTE_TARGET && HAVE_NMMINTRIN_H
#include <nmmintrin.h>
#include "Zend/zend_bitset.h"
PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) __attribute__((ifunc("resolve_addslashes")));
zend_string *php_addslashes_sse4(zend_string *str, int should_free) __attribute__((target("sse4.2")));
zend_string *php_addslashes_default(zend_string *str, int should_free);
/* {{{ resolve_addslashes */
static void *resolve_addslashes() {
#if PHP_HAVE_BUILTIN_CPU_INIT
__builtin_cpu_init();
if (__builtin_cpu_supports("sse4.2")) {
return php_addslashes_sse4;
}
#endif
return php_addslashes_default;
}
/* }}} */
/* {{{ php_addslashes_sse4
*/
PHPAPI zend_string *php_addslashes(zend_string *str, int should_free)
zend_string *php_addslashes_sse4(zend_string *str, int should_free)
{
SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
__m128i w128, s128;
uint32_t res = 0;
/* maximum string length, worst case situation */
char *source, *target;
char *end;
size_t offset;
zend_string *new_str;
if (!str) {
return ZSTR_EMPTY_ALLOC();
}
source = ZSTR_VAL(str);
end = source + ZSTR_LEN(str);
if (ZSTR_LEN(str) > 15) {
char *aligned = (char*)(((zend_uintptr_t)source + 15) & ~15);
if (UNEXPECTED(source != aligned)) {
do {
switch (*source) {
case '\0':
case '\'':
case '\"':
case '\\':
goto do_escape;
default:
source++;
break;
}
} while (source < aligned);
}
w128 = _mm_load_si128((__m128i *)slashchars);
for (;end - source > 15; source += 16) {
s128 = _mm_load_si128((__m128i *)source);
res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
if (res) {
goto do_escape;
}
}
}
while (source < end) {
switch (*source) {
case '\0':
case '\'':
case '\"':
case '\\':
goto do_escape;
default:
source++;
break;
}
}
if (!should_free) {
return zend_string_copy(str);
}
return str;
do_escape:
offset = source - (char *)ZSTR_VAL(str);
new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
target = ZSTR_VAL(new_str) + offset;
if (res) {
int pos = 0;
do {
int i, n = zend_ulong_ntz(res);
for (i = 0; i < n; i++) {
*target++ = source[pos + i];
}
pos += n;
*target++ = '\\';
if (source[pos] == '\0') {
*target++ = '0';
} else {
*target++ = source[pos];
}
pos++;
res = res >> (n + 1);
} while (res);
for (; pos < 16; pos++) {
*target++ = source[pos];
}
source += 16;
} else if (end - source > 15) {
char *aligned = (char*)(((zend_uintptr_t)source + 15) & ~15);
if (source != aligned) {
do {
switch (*source) {
case '\0':
*target++ = '\\';
*target++ = '0';
break;
case '\'':
case '\"':
case '\\':
*target++ = '\\';
/* break is missing *intentionally* */
default:
*target++ = *source;
break;
}
source++;
} while (source < aligned);
}
w128 = _mm_load_si128((__m128i *)slashchars);
}
for (; end - source > 15; source += 16) {
int pos = 0;
s128 = _mm_load_si128((__m128i *)source);
res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
if (res) {
do {
int i, n = zend_ulong_ntz(res);
for (i = 0; i < n; i++) {
*target++ = source[pos + i];
}
pos += n;
*target++ = '\\';
if (source[pos] == '\0') {
*target++ = '0';
} else {
*target++ = source[pos];
}
pos++;
res = res >> (n + 1);
} while (res);
for (; pos < 16; pos++) {
*target++ = source[pos];
}
} else {
_mm_storeu_si128((__m128i*)target, s128);
target += 16;
}
}
while (source < end) {
switch (*source) {
case '\0':
*target++ = '\\';
*target++ = '0';
break;
case '\'':
case '\"':
case '\\':
*target++ = '\\';
/* break is missing *intentionally* */
default:
*target++ = *source;
break;
}
source++;
}
*target = '\0';
if (should_free) {
zend_string_release(str);
}
if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
} else {
ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
}
return new_str;
}
/* }}} */
/* {{{ php_addslashes_default
*/
zend_string *php_addslashes_default(zend_string *str, int should_free)
#else
zend_string *php_addslashes(zend_string *str, int should_free)
#endif
{
/* maximum string length, worst case situation */
char *source, *target;
@ -3920,11 +4130,10 @@ do_escape:
*target++ = *source;
break;
}
source++;
}
*target = 0;
*target = '\0';
if (should_free) {
zend_string_release(str);
}
@ -3938,6 +4147,7 @@ do_escape:
return new_str;
}
/* }}} */
/* }}} */
#define _HEB_BLOCK_TYPE_ENG 1
#define _HEB_BLOCK_TYPE_HEB 2