Avoid unnecessary memory allocation/deallocations and pcre calls

This commit is contained in:
Dmitry Stogov 2014-06-09 15:42:32 +04:00
parent 7bf4fa69be
commit dbaef7c038
2 changed files with 201 additions and 113 deletions

View file

@ -188,19 +188,11 @@ static int pcre_clean_cache(zval *data, void *arg TSRMLS_DC)
static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
{
pcre_extra *extra = pce->extra;
int name_cnt = 0, name_size, ni = 0;
int name_cnt = pce->name_count, name_size, ni = 0;
int rc;
char *name_table;
unsigned short name_idx;
char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
efree(subpat_names);
return NULL;
}
if (name_cnt > 0) {
char **subpat_names;
int rc1, rc2;
rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
@ -208,10 +200,10 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D
rc = rc2 ? rc2 : rc1;
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
efree(subpat_names);
return NULL;
}
subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
while (ni++ < name_cnt) {
name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
subpat_names[name_idx] = name_table + 2;
@ -222,8 +214,6 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D
}
name_table += name_size;
}
}
return subpat_names;
}
/* }}} */
@ -252,6 +242,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex TSRMLS
#endif
pcre_cache_entry *pce;
pcre_cache_entry new_entry;
int rc;
#if HAVE_SETLOCALE
# if defined(PHP_WIN32) && defined(ZTS)
@ -268,9 +259,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex TSRMLS
* We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
* is, we flush it and compile the pattern from scratch.
*/
if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
zend_hash_clean(&PCRE_G(pcre_cache));
} else {
//??? if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
//??? zend_hash_clean(&PCRE_G(pcre_cache));
//??? } else {
#if HAVE_SETLOCALE
if (!strcmp(pce->locale, locale)) {
#endif
@ -278,7 +269,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex TSRMLS
#if HAVE_SETLOCALE
}
#endif
}
//??? }
}
p = regex->val;
@ -461,6 +452,18 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex TSRMLS
new_entry.tables = tables;
#endif
rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
return NULL;
}
rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
return NULL;
}
/*
* Interned strings are not duplicated when stored in HashTable,
* but all the interned strings created during HTTP request are removed
@ -517,7 +520,7 @@ static inline void add_offset_pair(zval *result, char *str, int len, int offset,
{
zval match_pair;
array_init(&match_pair);
array_init_size(&match_pair, 2);
/* Add (match, offset) to the return value */
add_next_index_stringl(&match_pair, str, len);
@ -581,6 +584,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
int offset_capture; /* Capture match offsets: yes/no */
unsigned char *mark = NULL; /* Target for MARK name */
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
ALLOCA_FLAG(use_heap);
ZVAL_UNDEF(&marks);
@ -631,25 +635,26 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
#endif
/* Calculate the size of the offsets array, and allocate memory for it. */
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
RETURN_FALSE;
}
num_subpats++;
num_subpats = pce->capture_count + 1;
size_offsets = num_subpats * 3;
/*
* Build a mapping from subpattern numbers to their names. We will always
* allocate the table, even though there may be no named subpatterns. This
* avoids somewhat more complicated logic in the inner loops.
* Build a mapping from subpattern numbers to their names. We will
* allocate the table only if there are any named subpatterns.
*/
subpat_names = NULL;
if (pce->name_count > 0) {
subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
if (!subpat_names) {
RETURN_FALSE;
}
}
if (size_offsets <= 32) {
offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
} else {
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
}
/* Allocate match sets array and initialize the values. */
if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
@ -684,8 +689,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (subpats != NULL) {
/* Try to get the list of substrings and display a warning if failed. */
if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
if (subpat_names) {
efree(subpat_names);
}
if (size_offsets <= 32) {
free_alloca(offsets, use_heap);
} else {
efree(offsets);
}
if (match_sets) efree(match_sets);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
RETURN_FALSE;
@ -694,11 +705,13 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (global) { /* global pattern matching */
if (subpats && subpats_order == PREG_PATTERN_ORDER) {
/* For each subpattern, insert it into the appropriate array. */
for (i = 0; i < count; i++) {
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&match_sets[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
}
} else {
for (i = 0; i < count; i++) {
add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
}
@ -722,14 +735,17 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
/* Allocate the result set array */
array_init(&result_set);
array_init_size(&result_set, count + (mark ? 1 : 0));
/* Add all the subpatterns to it */
for (i = 0; i < count; i++) {
if (subpat_names) {
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&result_set, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
}
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
@ -738,6 +754,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
} else {
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&result_set, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
}
} else {
for (i = 0; i < count; i++) {
add_next_index_stringl(&result_set, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
}
/* Add MARK, if available */
if (mark) {
add_assoc_string(&result_set, "MARK", (char *) mark);
@ -747,12 +776,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else { /* single pattern matching */
/* For each subpattern, insert it into the subpatterns array. */
for (i = 0; i < count; i++) {
if (subpat_names) {
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
offsets[i<<1], subpat_names[i]);
}
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
@ -761,6 +793,20 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
} else {
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
offsets[i<<1], NULL);
}
} else {
for (i = 0; i < count; i++) {
add_next_index_stringl(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
}
/* Add MARK, if available */
if (mark) {
add_assoc_string(subpats, "MARK", (char *) mark);
@ -796,6 +842,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
/* Add the match sets to the output array and clean up */
if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
if (subpat_names) {
for (i = 0; i < num_subpats; i++) {
if (subpat_names[i]) {
zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
@ -804,6 +851,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
}
} else {
for (i = 0; i < num_subpats; i++) {
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
}
}
efree(match_sets);
if (Z_TYPE(marks) != IS_UNDEF) {
@ -811,8 +863,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
if (size_offsets <= 32) {
free_alloca(offsets, use_heap);
} else {
efree(offsets);
}
if (subpat_names) {
efree(subpat_names);
}
/* Did we encounter an error? */
if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
@ -887,13 +945,19 @@ static zend_string *preg_do_repl_func(zval *function, char *subject, int *offset
zval args[1]; /* Argument to pass to function */
int i;
array_init(&args[0]);
array_init_size(&args[0], count + (mark ? 1 : 0));
if (subpat_names) {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
add_assoc_stringl(&args[0], subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
}
add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
}
} else {
for (i = 0; i < count; i++) {
add_next_index_stringl(&args[0], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
}
}
if (mark) {
add_assoc_string(&args[0], "MARK", (char *) mark);
}
@ -1048,6 +1112,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject,
unsigned char *mark = NULL; /* Target for MARK name */
zend_string *result; /* Result of replacement */
zend_string *eval_result=NULL; /* Result of eval or custom function */
ALLOCA_FLAG(use_heap);
if (extra == NULL) {
extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
@ -1073,25 +1138,25 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject,
}
/* Calculate the size of the offsets array, and allocate memory for it. */
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
return NULL;
}
num_subpats++;
num_subpats = pce->capture_count + 1;
size_offsets = num_subpats * 3;
if (size_offsets <= 32) {
offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
} else {
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
}
/*
* Build a mapping from subpattern numbers to their names. We will always
* allocate the table, even though there may be no named subpatterns. This
* avoids somewhat more complicated logic in the inner loops.
* Build a mapping from subpattern numbers to their names. We will
* allocate the table only if there are any named subpatterns.
*/
subpat_names = NULL;
if (pce->name_count > 0) {
subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
if (!subpat_names) {
return NULL;
}
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
}
alloc_len = 2 * subject_len;
result = STR_ALLOC(alloc_len * sizeof(char), 0);
@ -1251,8 +1316,14 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject,
if (result) {
result->len = result_len;
}
if (size_offsets <= 32) {
free_alloca(offsets, use_heap);
} else {
efree(offsets);
}
if (subpat_names) {
efree(subpat_names);
}
return result;
}
@ -1268,7 +1339,7 @@ static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *sub
empty_replace;
zend_string *result;
zend_string *subject_str = zval_get_string(subject);
HashPosition pos;
zend_uint replace_idx;
/* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
ZVAL_EMPTY_STRING(&empty_replace);
@ -1276,8 +1347,7 @@ static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *sub
/* If regex is an array */
if (Z_TYPE_P(regex) == IS_ARRAY) {
replace_value = replace;
if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(replace), &pos);
replace_idx = 0;
/* For each entry in the regex array, get the entry */
ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
@ -1287,12 +1357,20 @@ static zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *sub
/* If replace is an array and not a callable construct */
if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
/* Get current entry */
if ((replace_entry = zend_hash_get_current_data_ex(Z_ARRVAL_P(replace), &pos)) != NULL) {
replace_entry = NULL;
while (replace_idx < Z_ARRVAL_P(replace)->nNumUsed) {
if (Z_TYPE(Z_ARRVAL_P(replace)->arData[replace_idx].val) != IS_UNUSED) {
replace_entry = &Z_ARRVAL_P(replace)->arData[replace_idx].val;
break;
}
replace_idx++;
}
if (replace_entry != NULL) {
if (!is_callable_replace) {
convert_to_string_ex(replace_entry);
}
replace_value = replace_entry;
zend_hash_move_forward_ex(Z_ARRVAL_P(replace), &pos);
replace_idx++;
} else {
/* We've run out of replacement strings, so use an empty one */
replace_value = &empty_replace;
@ -1386,7 +1464,7 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl
/* if subject is an array */
if (Z_TYPE_P(subject) == IS_ARRAY) {
array_init(return_value);
array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
/* For each subject entry, convert it to string, then perform replacement
and add the result to the return_value array. */
@ -1494,6 +1572,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
int no_empty; /* If NO_EMPTY flag is set */
int delim_capture; /* If delimiters should be captured */
int offset_capture; /* If offsets should be captured */
ALLOCA_FLAG(use_heap);
no_empty = flags & PREG_SPLIT_NO_EMPTY;
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
@ -1517,13 +1596,12 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
array_init(return_value);
/* Calculate the size of the offsets array, and allocate memory for it. */
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
RETURN_FALSE;
}
size_offsets = (size_offsets + 1) * 3;
size_offsets = (pce->capture_count + 1) * 3;
if (size_offsets <= 32) {
offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
} else {
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
}
/* Start at the beginning of the string */
start_offset = 0;
@ -1643,8 +1721,12 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
/* Clean up */
if (size_offsets <= 32) {
free_alloca(offsets, use_heap);
} else {
efree(offsets);
}
}
/* }}} */
/* {{{ proto string preg_quote(string str [, string delim_char])
@ -1772,6 +1854,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
zend_bool invert; /* Whether to return non-matching
entries */
int rc;
ALLOCA_FLAG(use_heap);
invert = flags & PREG_GREP_INVERT ? 1 : 0;
@ -1786,13 +1869,12 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
#endif
/* Calculate the size of the offsets array, and allocate memory for it. */
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
if (rc < 0) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
RETURN_FALSE;
}
size_offsets = (size_offsets + 1) * 3;
size_offsets = (pce->capture_count + 1) * 3;
if (size_offsets <= 32) {
offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
} else {
offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
}
/* Initialize return array */
array_init(return_value);
@ -1836,8 +1918,12 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
} ZEND_HASH_FOREACH_END();
/* Clean up */
if (size_offsets <= 32) {
free_alloca(offsets, use_heap);
} else {
efree(offsets);
}
}
/* }}} */
/* {{{ proto int preg_last_error()

View file

@ -44,6 +44,8 @@ typedef struct {
pcre *re;
pcre_extra *extra;
int preg_options;
int capture_count;
int name_count;
#if HAVE_SETLOCALE
char *locale;
unsigned const char *tables;