is_numeric_string() optimization

# Original Patch by Matt Wilmas
This commit is contained in:
Ilia Alshanetsky 2006-12-26 16:44:20 +00:00
parent 24124518ab
commit ff9d0fcc78
8 changed files with 213 additions and 104 deletions

View file

@ -1,4 +1,5 @@
<?php <?php
date_default_timezone_set("UTC");
function simple() { function simple() {
$a = 0; $a = 0;

View file

@ -214,7 +214,7 @@ ZEND_API void zend_make_printable_zval(zval *expr, zval *expr_copy, int *use_cop
} }
break; break;
case IS_RESOURCE: case IS_RESOURCE:
expr_copy->value.str.val = (char *) emalloc(sizeof("Resource id #")-1 + MAX_LENGTH_OF_LONG); expr_copy->value.str.val = (char *) emalloc(sizeof("Resource id #") + MAX_LENGTH_OF_LONG);
expr_copy->value.str.len = sprintf(expr_copy->value.str.val, "Resource id #%ld", expr->value.lval); expr_copy->value.str.len = sprintf(expr_copy->value.str.val, "Resource id #%ld", expr->value.lval);
break; break;
case IS_ARRAY: case IS_ARRAY:

View file

@ -404,7 +404,7 @@ static int _build_trace_string(zval **frame, int num_args, va_list args, zend_ha
} else { } else {
line = 0; line = 0;
} }
s_tmp = emalloc(Z_STRLEN_PP(file) + MAX_LENGTH_OF_LONG + 2 + 1); s_tmp = emalloc(Z_STRLEN_PP(file) + MAX_LENGTH_OF_LONG + 4 + 1);
sprintf(s_tmp, "%s(%ld): ", Z_STRVAL_PP(file), line); sprintf(s_tmp, "%s(%ld): ", Z_STRVAL_PP(file), line);
TRACE_APPEND_STRL(s_tmp, strlen(s_tmp)); TRACE_APPEND_STRL(s_tmp, strlen(s_tmp));
efree(s_tmp); efree(s_tmp);

View file

@ -1236,38 +1236,44 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_IN_SCRIPTING>{LNUM} { <ST_IN_SCRIPTING>{LNUM} {
errno = 0; if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
zendlval->value.lval = strtol(yytext, NULL, 0); zendlval->value.lval = strtol(yytext, NULL, 0);
if (errno == ERANGE) { /* overflow */
zendlval->value.dval = zend_strtod(yytext, NULL);
zendlval->type = IS_DOUBLE;
return T_DNUMBER;
} else { } else {
zendlval->type = IS_LONG; errno = 0;
return T_LNUMBER; zendlval->value.lval = strtol(yytext, NULL, 0);
} if (errno == ERANGE) { /* Overflow */
} zendlval->value.dval = zend_strtod(yytext, NULL);
<ST_IN_SCRIPTING>{HNUM} {
errno = 0;
zendlval->value.lval = strtoul(yytext, NULL, 16);
if (errno == ERANGE) { /* overflow */
/* not trying strtod - it returns trash on 0x-es */
zendlval->value.lval = LONG_MAX; /* maximal long */
zend_error(E_NOTICE,"Hex number is too big: %s", yytext);
} else {
if (zendlval->value.lval < 0) {
/* maintain consistency with the old way */
zendlval->value.dval = (unsigned long) zendlval->value.lval;
zendlval->type = IS_DOUBLE; zendlval->type = IS_DOUBLE;
return T_DNUMBER; return T_DNUMBER;
} }
zendlval->type = IS_LONG;
} }
zendlval->type = IS_LONG; zendlval->type = IS_LONG;
return T_LNUMBER; return T_LNUMBER;
} }
<ST_IN_SCRIPTING>{HNUM} {
/* Skip "0x" */
yytext += 2;
yyleng -= 2;
/* Skip any leading 0s */
while (*yytext == '0') {
yytext++;
yyleng--;
}
if (yyleng < SIZEOF_LONG * 2 || (yyleng == SIZEOF_LONG * 2 && *yytext <= '7')) {
zendlval->value.lval = strtol(yytext, NULL, 16);
zendlval->type = IS_LONG;
return T_LNUMBER;
} else {
zendlval->value.dval = zend_hex_strtod(yytext, NULL);
zendlval->type = IS_DOUBLE;
return T_DNUMBER;
}
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */ <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */
zendlval->value.str.val = (char *)estrndup(yytext, yyleng); zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng; zendlval->value.str.len = yyleng;

View file

@ -123,14 +123,9 @@ ZEND_API void convert_scalar_to_number(zval *op TSRMLS_DC)
char *strval; char *strval;
strval = op->value.str.val; strval = op->value.str.val;
switch ((op->type=is_numeric_string(strval, op->value.str.len, &op->value.lval, &op->value.dval, 1))) { if ((op->type=is_numeric_string(strval, op->value.str.len, &op->value.lval, &op->value.dval, 1)) == 0) {
case IS_DOUBLE: op->value.lval = 0;
case IS_LONG: op->type = IS_LONG;
break;
default:
op->value.lval = strtol(op->value.str.val, NULL, 10);
op->type = IS_LONG;
break;
} }
STR_FREE(strval); STR_FREE(strval);
break; break;
@ -161,14 +156,9 @@ ZEND_API void convert_scalar_to_number(zval *op TSRMLS_DC)
switch ((op)->type) { \ switch ((op)->type) { \
case IS_STRING: \ case IS_STRING: \
{ \ { \
switch (((holder).type=is_numeric_string((op)->value.str.val, (op)->value.str.len, &(holder).value.lval, &(holder).value.dval, 1))) { \ if (((holder).type=is_numeric_string((op)->value.str.val, (op)->value.str.len, &(holder).value.lval, &(holder).value.dval, 1)) == 0) { \
case IS_DOUBLE: \ (holder).value.lval = 0; \
case IS_LONG: \ (holder).type = IS_LONG; \
break; \
default: \
(holder).value.lval = strtol((op)->value.str.val, NULL, 10); \
(holder).type = IS_LONG; \
break; \
} \ } \
(op) = &(holder); \ (op) = &(holder); \
break; \ break; \
@ -560,7 +550,7 @@ ZEND_API void _convert_to_string(zval *op ZEND_FILE_LINE_DC)
TSRMLS_FETCH(); TSRMLS_FETCH();
zend_list_delete(op->value.lval); zend_list_delete(op->value.lval);
op->value.str.val = (char *) emalloc(sizeof("Resource id #")-1 + MAX_LENGTH_OF_LONG); op->value.str.val = (char *) emalloc(sizeof("Resource id #") + MAX_LENGTH_OF_LONG);
op->value.str.len = sprintf(op->value.str.val, "Resource id #%ld", tmp); op->value.str.len = sprintf(op->value.str.val, "Resource id #%ld", tmp);
break; break;
} }
@ -1227,10 +1217,14 @@ ZEND_API int concat_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{ {
zval op1_copy, op2_copy; zval op1_copy, op2_copy;
int use_copy1, use_copy2; int use_copy1 = 0, use_copy2 = 0;
zend_make_printable_zval(op1, &op1_copy, &use_copy1); if (op1->type != IS_STRING) {
zend_make_printable_zval(op2, &op2_copy, &use_copy2); zend_make_printable_zval(op1, &op1_copy, &use_copy1);
}
if (op2->type != IS_STRING) {
zend_make_printable_zval(op2, &op2_copy, &use_copy2);
}
if (use_copy1) { if (use_copy1) {
op1 = &op1_copy; op1 = &op1_copy;
@ -1255,10 +1249,14 @@ ZEND_API int string_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_D
ZEND_API int string_locale_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) ZEND_API int string_locale_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{ {
zval op1_copy, op2_copy; zval op1_copy, op2_copy;
int use_copy1, use_copy2; int use_copy1 = 0, use_copy2 = 0;
zend_make_printable_zval(op1, &op1_copy, &use_copy1); if (op1->type != IS_STRING) {
zend_make_printable_zval(op2, &op2_copy, &use_copy2); zend_make_printable_zval(op1, &op1_copy, &use_copy1);
}
if (op2->type != IS_STRING) {
zend_make_printable_zval(op2, &op2_copy, &use_copy2);
}
if (use_copy1) { if (use_copy1) {
op1 = &op1_copy; op1 = &op1_copy;

View file

@ -36,7 +36,16 @@
#include "ext/bcmath/libbcmath/src/bcmath.h" #include "ext/bcmath/libbcmath/src/bcmath.h"
#endif #endif
#if SIZEOF_LONG == 4
#define MAX_LENGTH_OF_LONG 11
static const char long_min_digits[] = "2147483648";
#elif SIZEOF_LONG == 8
#define MAX_LENGTH_OF_LONG 20 #define MAX_LENGTH_OF_LONG 20
static const char long_min_digits[] = "9223372036854775808";
#else
#error "Unknown SIZEOF_LONG"
#endif
#define MAX_LENGTH_OF_DOUBLE 32 #define MAX_LENGTH_OF_DOUBLE 32
BEGIN_EXTERN_C() BEGIN_EXTERN_C()
@ -66,82 +75,143 @@ ZEND_API zend_bool instanceof_function_ex(zend_class_entry *instance_ce, zend_cl
ZEND_API zend_bool instanceof_function(zend_class_entry *instance_ce, zend_class_entry *ce TSRMLS_DC); ZEND_API zend_bool instanceof_function(zend_class_entry *instance_ce, zend_class_entry *ce TSRMLS_DC);
END_EXTERN_C() END_EXTERN_C()
#define ZEND_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
#define ZEND_IS_XDIGIT(c) (((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
/** /**
* Checks whether the string "str" with the length "length" is a numeric string. * Checks whether the string "str" with length "length" is numeric. The value
* of allow_errors determines whether it's required to be entirely numeric, or
* just its prefix. Leading whitespace is allowed.
* *
* The function returns 0 if the string did not contain a string; IS_LONG if * The function returns 0 if the string did not contain a valid number; IS_LONG
* the string contained a number that fits in the integer range and IS_DOUBLE * if it contained a number that fits within the range of a long; or IS_DOUBLE
* in case it did not. The long value is returned into the pointer *lval if * if the number was out of long range or contained a decimal point/exponent.
* that pointer was not NULL or into the pointer *dval if that pointer was not * The number's value is returned into the respective pointer, *lval or *dval,
* NULL. * if that pointer is not NULL.
*/ */
static inline zend_bool is_numeric_string(char *str, int length, long *lval, double *dval, int allow_errors)
static inline zend_uchar is_numeric_string(const char *str, int length, long *lval, double *dval, int allow_errors)
{ {
long local_lval; const char *ptr;
int base = 10, digits = 0, dp_or_e = 0;
double local_dval; double local_dval;
char *end_ptr_long, *end_ptr_double; zend_uchar type;
int conv_base=10;
if (!length) { if (!length) {
return 0; return 0;
} }
/* handle hex numbers */ /* Skip any whitespace
if (length>=2 && str[0]=='0' && (str[1]=='x' || str[1]=='X')) { * This is much faster than the isspace() function */
conv_base=16; while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f') {
str++;
length--;
} }
errno=0; ptr = str;
local_lval = strtol(str, &end_ptr_long, conv_base);
if (errno!=ERANGE) { if (*ptr == '-' || *ptr == '+') {
if (end_ptr_long == str+length) { /* integer string */ ptr++;
if (lval) { }
*lval = local_lval;
if (ZEND_IS_DIGIT(*ptr)) {
/* Handle hex numbers
* str is used instead of ptr to disallow signs and keep old behavior */
if (length > 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) {
base = 16;
ptr += 2;
}
/* Skip any leading 0s */
while (*ptr == '0') {
ptr++;
}
/* Count the number of digits. If a decimal point/exponent is found,
* it's a double. Otherwise, if there's a dval or no need to check for
* a full match, stop when there are too many digits for a long */
for (type = IS_LONG; !(digits >= MAX_LENGTH_OF_LONG && (dval || allow_errors == 1)); digits++, ptr++) {
check_digits:
if (ZEND_IS_DIGIT(*ptr) || (base == 16 && ZEND_IS_XDIGIT(*ptr))) {
continue;
} else if (base == 10) {
if (*ptr == '.' && dp_or_e < 1) {
goto process_double;
} else if ((*ptr == 'e' || *ptr == 'E') && dp_or_e < 2) {
const char *e = ptr + 1;
if (*e == '-' || *e == '+') {
ptr = e++;
}
if (ZEND_IS_DIGIT(*e)) {
goto process_double;
}
}
} }
return IS_LONG;
} else if (end_ptr_long == str && *end_ptr_long != '\0' && *str != '.' && *str != '-') { /* ignore partial string matches */ break;
}
if (base == 10) {
if (digits >= MAX_LENGTH_OF_LONG) {
dp_or_e = -1;
goto process_double;
}
} else if (!(digits < SIZEOF_LONG * 2 || (digits == SIZEOF_LONG * 2 && ptr[-digits] <= '7'))) {
if (dval) {
local_dval = zend_hex_strtod(str, (char **)&ptr);
}
type = IS_DOUBLE;
}
} else if (*ptr == '.' && ZEND_IS_DIGIT(ptr[1])) {
process_double:
type = IS_DOUBLE;
/* If there's a dval, do the conversion; else continue checking
* the digits if we need to check for a full match */
if (dval) {
local_dval = zend_strtod(str, (char **)&ptr);
} else if (allow_errors != 1 && dp_or_e != -1) {
dp_or_e = (*ptr++ == '.') ? 1 : 2;
goto check_digits;
}
} else {
return 0;
}
if (ptr != str + length) {
if (!allow_errors) {
return 0; return 0;
} }
} else { if (allow_errors == -1) {
end_ptr_long=NULL; zend_error(E_NOTICE, "A non well formed numeric value encountered");
}
if (conv_base==16) { /* hex string, under UNIX strtod() messes it up */
return 0;
}
errno=0;
local_dval = zend_strtod(str, &end_ptr_double);
if (errno != ERANGE) {
if (end_ptr_double == str+length) { /* floating point string */
if (!zend_finite(local_dval)) {
/* "inf","nan" and maybe other weird ones */
return 0;
}
if (dval) {
*dval = local_dval;
}
return IS_DOUBLE;
} }
} else {
end_ptr_double=NULL;
} }
if (!allow_errors) { if (type == IS_LONG) {
return 0; if (digits == MAX_LENGTH_OF_LONG - 1) {
} int cmp = strcmp(&ptr[-digits], long_min_digits);
if (allow_errors == -1) {
zend_error(E_NOTICE, "A non well formed numeric value encountered"); if (!(cmp < 0 || (cmp == 0 && *str == '-'))) {
} if (dval) {
*dval = zend_strtod(str, NULL);
}
return IS_DOUBLE;
}
}
if (lval) {
*lval = strtol(str, NULL, base);
}
if (end_ptr_double>end_ptr_long && dval) {
*dval = local_dval;
return IS_DOUBLE;
} else if (end_ptr_long && lval) {
*lval = local_lval;
return IS_LONG; return IS_LONG;
} else {
if (dval) {
*dval = local_dval;
}
return IS_DOUBLE;
} }
return 0;
} }
static inline char * static inline char *

View file

@ -2557,6 +2557,39 @@ ret:
return result; return result;
} }
ZEND_API double zend_hex_strtod(const char *str, char **endptr)
{
const char *s = str;
char c;
int any = 0;
double value = 0;
if (*s == '0' && (s[1] == 'x' || s[1] == 'X')) {
s += 2;
}
while (c = *s++) {
if (c >= '0' && c <= '9') {
c -= '0';
} else if (c >= 'A' && c <= 'F') {
c -= 'A' - 10;
} else if (c >= 'a' && c <= 'f') {
c -= 'a' - 10;
} else {
break;
}
any = 1;
value = value * 16 + c;
}
if (endptr != NULL) {
*endptr = (char *)(any ? s - 1 : str);
}
return value;
}
/* /*
* Local variables: * Local variables:
* tab-width: 4 * tab-width: 4

View file

@ -28,6 +28,7 @@ BEGIN_EXTERN_C()
ZEND_API void zend_freedtoa(char *s); ZEND_API void zend_freedtoa(char *s);
ZEND_API char * zend_dtoa(double _d, int mode, int ndigits, int *decpt, int *sign, char **rve); ZEND_API char * zend_dtoa(double _d, int mode, int ndigits, int *decpt, int *sign, char **rve);
ZEND_API double zend_strtod(const char *s00, char **se); ZEND_API double zend_strtod(const char *s00, char **se);
ZEND_API double zend_hex_strtod(const char *str, char **endptr);
ZEND_API int zend_startup_strtod(void); ZEND_API int zend_startup_strtod(void);
ZEND_API int zend_shutdown_strtod(void); ZEND_API int zend_shutdown_strtod(void);
END_EXTERN_C() END_EXTERN_C()