From ccf15cf2dc92d11f92ee30c97e2d86b07f81e030 Mon Sep 17 00:00:00 2001 From: Gustavo Lopes Date: Mon, 7 Jan 2013 03:13:11 +0100 Subject: [PATCH 1/7] Optimize strtr w/ 2nd arg array Fixes bug #63893: poor efficiency of strtr() using array with keys of very different length. The implementation is basically all new, which carries some risk with it. The algorithm is described in "A Fast Algorithm For Multi-Pattern Searching" (1994) by Sun Wu and Udi Manber. --- ext/standard/string.c | 359 +++++++++++++++++++++++++++++++----------- 1 file changed, 269 insertions(+), 90 deletions(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index 29115fea7a4..dc92e8e085a 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -22,7 +22,9 @@ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ +#define _GNU_SOURCE 1 #include +#include #include "php.h" #include "php_rand.h" #include "php_string.h" @@ -57,6 +59,7 @@ #include "php_globals.h" #include "basic_functions.h" #include "php_smart_str.h" +#include #ifdef ZTS #include "TSRM.h" #endif @@ -2772,112 +2775,288 @@ PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trl } /* }}} */ -/* {{{ php_strtr_array - */ -static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *hash) +/* {{{ Definitions for php_strtr_array */ +typedef size_t STRLEN; /* STRLEN should be unsigned */ +typedef uint16_t HASH; +typedef struct { + HASH table_mask; + STRLEN entries[1]; +} SHIFT_TAB; +typedef struct { + HASH table_mask; + int entries[1]; +} HASH_TAB; +typedef struct { + const char *s; + STRLEN l; +} STR; +typedef struct _match_node MATCH_NODE; +struct _match_node { + STRLEN pos; + MATCH_NODE *next; +}; +typedef struct _pat_and_repl { + STR pat; + STR repl; +} PATNREPL; + +#define S(a) ((a)->s) +#define L(a) ((a)->l) + +#define SHIFT_TAB_BITS 13 +#define HASH_TAB_BITS 10 /* should be less than sizeof(HASH) * 8 */ +#define SHIFT_TAB_SIZE (1U << SHIFT_TAB_BITS) +#define HASH_TAB_SIZE (1U << HASH_TAB_BITS) + +typedef struct { + int B; /* size of suffixes */ + int Bp; /* size of prefixes */ + STRLEN m; /* minimum pattern length */ + int patnum; /* number of patterns */ + SHIFT_TAB *shift; /* table mapping hash to allowed shift */ + HASH_TAB *hash; /* table mapping hash to int (pair of pointers) */ + HASH *prefix; /* array of hashes of prefixes by pattern suffix hash order */ + PATNREPL *patterns; /* array of prefixes by pattern suffix hash order */ +} PPRES; +/* }}} */ + +/* {{{ php_strtr_hash */ +static inline HASH php_strtr_hash(const char *str, int len) { - zval **entry; - char *string_key; - uint string_key_len; - zval **trans; - zval ctmp; - ulong num_key; - int minlen = 128*1024; - int maxlen = 0, pos, len, found; - char *key; - HashPosition hpos; - smart_str result = {0}; - HashTable tmp_hash; - - zend_hash_init(&tmp_hash, zend_hash_num_elements(hash), NULL, NULL, 0); - zend_hash_internal_pointer_reset_ex(hash, &hpos); - while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) { - switch (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, &hpos)) { - case HASH_KEY_IS_STRING: - len = string_key_len-1; - if (len < 1) { - zend_hash_destroy(&tmp_hash); - RETURN_FALSE; - } - zend_hash_add(&tmp_hash, string_key, string_key_len, entry, sizeof(zval*), NULL); - if (len > maxlen) { - maxlen = len; - } - if (len < minlen) { - minlen = len; - } - break; - - case HASH_KEY_IS_LONG: - Z_TYPE(ctmp) = IS_LONG; - Z_LVAL(ctmp) = num_key; - - convert_to_string(&ctmp); - len = Z_STRLEN(ctmp); - zend_hash_add(&tmp_hash, Z_STRVAL(ctmp), len+1, entry, sizeof(zval*), NULL); - zval_dtor(&ctmp); - - if (len > maxlen) { - maxlen = len; - } - if (len < minlen) { - minlen = len; - } - break; - } - zend_hash_move_forward_ex(hash, &hpos); + HASH res = 0; + int i; + for (i = 0; i < len; i++) { + res = (res << 5) + res + (unsigned char)str[i]; } - key = emalloc(maxlen+1); - pos = 0; + return res; +} +/* }}} */ +/* {{{ php_strtr_populate_shift */ +static inline void php_strtr_populate_shift(PATNREPL *patterns, int patnum, int B, STRLEN m, SHIFT_TAB *shift) +{ + int i; + STRLEN j, + max_shift; - while (pos < slen) { - if ((pos + maxlen) > slen) { - maxlen = slen - pos; + max_shift = m - B + 1; + for (i = 0; i < SHIFT_TAB_SIZE; i++) { + shift->entries[i] = max_shift; + } + for (i = 0; i < patnum; i++) { + for (j = 0; j < m - B + 1; j++) { + HASH h = php_strtr_hash(&S(&patterns[i].pat)[j], B) & shift->table_mask; + assert((long long) m - (long long) j - B >= 0); + shift->entries[h] = MIN(shift->entries[h], m - j - B); } + } +} +/* }}} */ +/* {{{ php_strtr_compare_hash_suffix */ +static int php_strtr_compare_hash_suffix(const void *a, const void *b, void *ctx_g) +{ + const PPRES *res = ctx_g; + const PATNREPL *pnr_a = a, + *pnr_b = b; + HASH hash_a = php_strtr_hash(&S(&pnr_a->pat)[res->m - res->B], res->B) + & res->hash->table_mask, + hash_b = php_strtr_hash(&S(&pnr_b->pat)[res->m - res->B], res->B) + & res->hash->table_mask; + /* TODO: don't recalculate the hashes all the time */ + return hash_a - hash_b; +} +/* }}} */ - found = 0; - memcpy(key, str+pos, maxlen); +/* {{{ PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, int B, int Bp) */ +static PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, int B, int Bp) +{ + int i; + PPRES *res = emalloc(sizeof *res); - for (len = maxlen; len >= minlen; len--) { - key[len] = 0; + res->m = (STRLEN)-1; + for (i = 0; i < patnum; i++) { + if (L(&patterns[i].pat) < res->m) { + res->m = L(&patterns[i].pat); + } + } + assert(res->m > 0); + res->B = B = MIN(B, res->m); + res->Bp = Bp = MIN(Bp, res->m); - if (zend_hash_find(&tmp_hash, key, len+1, (void**)&trans) == SUCCESS) { - char *tval; - int tlen; - zval tmp; + res->shift = safe_emalloc(SHIFT_TAB_SIZE, sizeof(*res->shift->entries), sizeof(*res->shift)); + res->shift->table_mask = SHIFT_TAB_SIZE - 1; + php_strtr_populate_shift(patterns, patnum, B, res->m, res->shift); - if (Z_TYPE_PP(trans) != IS_STRING) { - tmp = **trans; - zval_copy_ctor(&tmp); - convert_to_string(&tmp); - tval = Z_STRVAL(tmp); - tlen = Z_STRLEN(tmp); - } else { - tval = Z_STRVAL_PP(trans); - tlen = Z_STRLEN_PP(trans); - } + res->hash = safe_emalloc(HASH_TAB_SIZE, sizeof(*res->hash->entries), sizeof(*res->shift)); + res->hash->table_mask = HASH_TAB_SIZE - 1; - smart_str_appendl(&result, tval, tlen); - pos += len; - found = 1; + res->patterns = safe_emalloc(patnum, sizeof(*res->patterns), 0); + memcpy(res->patterns, patterns, sizeof(*patterns) * patnum); + qsort_r(res->patterns, patnum, sizeof(*res->patterns), php_strtr_compare_hash_suffix, res); - if (Z_TYPE_PP(trans) != IS_STRING) { - zval_dtor(&tmp); - } - break; + res->prefix = safe_emalloc(patnum, sizeof(*res->prefix), 0); + for (i = 0; i < patnum; i++) { + res->prefix[i] = php_strtr_hash(S(&res->patterns[i].pat), Bp); + } + + /* Initialize the rest of ->hash */ + for (i = 0; i < HASH_TAB_SIZE; i++) { + res->hash->entries[i] = -1; + } + { + HASH last_h = -1; /* assumes not all bits are used in res->hash */ + /* res->patterns is already ordered by hash. + * Make res->hash->entries[h] de index of the first pattern in + * res->patterns that has hash h */ + for (i = 0; i < patnum; i++) { + HASH h = php_strtr_hash(&S(&res->patterns[i].pat)[res->m - res->B], res->B) + & res->hash->table_mask; + if (h != last_h) { + res->hash->entries[h] = i; + last_h = h; } } - - if (! found) { - smart_str_appendc(&result, str[pos++]); + } + res->hash->entries[HASH_TAB_SIZE] = patnum; + for (i = HASH_TAB_SIZE - 1; i >= 0; i--) { + if (res->hash->entries[i] == -1) { + res->hash->entries[i] = res->hash->entries[i + 1]; } } - efree(key); - zend_hash_destroy(&tmp_hash); - smart_str_0(&result); - RETVAL_STRINGL(result.c, result.len, 0); + res->patnum = patnum; + + return res; +} +/* }}} */ +/* {{{ php_strtr_array_destroy_ppres(PPRES *d) */ +static void php_strtr_array_destroy_ppres(PPRES *d) +{ + efree(d->shift); + efree(d->hash); + efree(d->prefix); + efree(d->patterns); + efree(d); +} +/* }}} */ + +/* {{{ php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) */ +static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) +{ + STRLEN pos = 0, + lastpos = L(text) - d->m; + smart_str result = {0}; + + while (pos <= lastpos) { + HASH h = php_strtr_hash(&S(text)[pos + d->m - d->B], d->B) & d->shift->table_mask; + STRLEN shift = d->shift->entries[h]; + + if (shift > 0) { + smart_str_appendl(&result, &S(text)[pos], shift); + pos += shift; + } else { + HASH h2 = h & d->hash->table_mask, + prefix_h = php_strtr_hash(&S(text)[pos], d->Bp); + + int offset_start = d->hash->entries[h2], + offset_end = d->hash->entries[h2 + 1], /* exclusive */ + i = 0; + + for (i = offset_start; i < offset_end; i++) { + PATNREPL *pnr; + if (d->prefix[i] != prefix_h) + continue; + + pnr = &d->patterns[i]; + if (L(&pnr->pat) > L(text) - pos || + memcmp(S(&pnr->pat), &S(text)[pos], L(&pnr->pat)) != 0) + continue; + + smart_str_appendl(&result, S(&pnr->repl), (int)L(&pnr->repl)); + pos += L(&pnr->pat); + goto end_outer_loop; + } + + smart_str_appendc(&result, S(text)[pos]); + pos++; +end_outer_loop: ; + } + } + + if (pos < L(text)) { + smart_str_appendl(&result, &S(text)[pos], (int)(L(text) - pos)); + } + + if (result.c != NULL) { + smart_str_0(&result); + RETVAL_STRINGL(result.c, result.len, 0); + } else { + RETURN_EMPTY_STRING(); + } +} +/* }}} */ + +/* {{{ php_strtr_array */ +static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *pats) +{ + PPRES *data; + STR text; + PATNREPL *patterns; + HashPosition hpos; + zval **entry; + int num_pats = zend_hash_num_elements(pats), + i; + + S(&text) = str; + L(&text) = slen; + patterns = safe_emalloc(num_pats, sizeof(*patterns), 0); + + for (i = 0, zend_hash_internal_pointer_reset_ex(pats, &hpos); + zend_hash_get_current_data_ex(pats, (void **)&entry, &hpos) == SUCCESS; + i++, zend_hash_move_forward_ex(pats, &hpos)) { + char *string_key; + uint string_key_len; + ulong num_key; + int free_str = 0, + free_repl = 0; + zval *tzv; + + switch (zend_hash_get_current_key_ex(pats, &string_key, &string_key_len, &num_key, 0, &hpos)) { + case HASH_KEY_IS_LONG: + string_key_len = 1 + zend_spprintf(&string_key, 0, "%ld", (long)num_key); + free_str = 1; + /* break missing intentionally */ + + case HASH_KEY_IS_STRING: + string_key_len--; /* exclude final '\0' */ + if (string_key_len == 0) { /* empty string given as pattern */ + efree(patterns); + RETURN_FALSE; + } + if (string_key_len > slen) { /* this pattern can never match */ + continue; + } + + if (Z_TYPE_PP(entry) != IS_STRING) { + tzv = *entry; + zval_addref_p(tzv); + SEPARATE_ZVAL(&tzv); + convert_to_string(tzv); + entry = &tzv; + free_repl = 1; + } + + S(&patterns[i].pat) = string_key; + L(&patterns[i].pat) = string_key_len; + S(&patterns[i].repl) = Z_STRVAL_PP(entry); + L(&patterns[i].repl) = Z_STRLEN_PP(entry); + } + } + + data = php_strtr_array_prepare(&text, patterns, i, 2, 2); + efree(patterns); + php_strtr_array_do_repl(&text, data, return_value); + php_strtr_array_destroy_ppres(data); } /* }}} */ From 2111ee3df54e890c9e2f14b09c01d68445389540 Mon Sep 17 00:00:00 2001 From: Gustavo Lopes Date: Wed, 9 Jan 2013 00:33:14 +0100 Subject: [PATCH 2/7] Refactoring, bugs & leaks --- ext/standard/string.c | 149 +++++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 52 deletions(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index dc92e8e085a..827f9dec22a 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -2863,7 +2863,91 @@ static int php_strtr_compare_hash_suffix(const void *a, const void *b, void *ctx hash_b = php_strtr_hash(&S(&pnr_b->pat)[res->m - res->B], res->B) & res->hash->table_mask; /* TODO: don't recalculate the hashes all the time */ - return hash_a - hash_b; + if (hash_a > hash_b) { + return 1; + } else if (hash_a < hash_b) { + return -1; + } else { + /* longer patterns must be sorted first */ + if (L(&pnr_a->pat) > L(&pnr_b->pat)) { + return -1; + } else if (L(&pnr_a->pat) < L(&pnr_b->pat)) { + return 1; + } else { + return 0; + } + } +} +/* }}} */ +/* {{{ php_strtr_free_strp */ +static void php_strtr_free_strp(void *strp) +{ + STR_FREE(*(char**)strp); +} +/* }}} */ +/* {{{ php_strtr_array_prepare_repls */ +static PATNREPL *php_strtr_array_prepare_repls(int slen, HashTable *pats, zend_llist **allocs, int *outsize) +{ + PATNREPL *patterns; + HashPosition hpos; + zval **entry; + int num_pats = zend_hash_num_elements(pats), + i; + + patterns = safe_emalloc(num_pats, sizeof(*patterns), 0); + *allocs = emalloc(sizeof **allocs); + zend_llist_init(*allocs, sizeof(void*), &php_strtr_free_strp, 0); + + for (i = 0, zend_hash_internal_pointer_reset_ex(pats, &hpos); + zend_hash_get_current_data_ex(pats, (void **)&entry, &hpos) == SUCCESS; + zend_hash_move_forward_ex(pats, &hpos)) { + char *string_key; + uint string_key_len; + ulong num_key; + zval *tzv = NULL; + + switch (zend_hash_get_current_key_ex(pats, &string_key, &string_key_len, &num_key, 0, &hpos)) { + case HASH_KEY_IS_LONG: + string_key_len = 1 + zend_spprintf(&string_key, 0, "%ld", (long)num_key); + zend_llist_add_element(*allocs, &string_key); + /* break missing intentionally */ + + case HASH_KEY_IS_STRING: + string_key_len--; /* exclude final '\0' */ + if (string_key_len == 0) { /* empty string given as pattern */ + efree(patterns); + zend_llist_destroy(*allocs); + efree(*allocs); + *allocs = NULL; + return NULL; + } + if (string_key_len > slen) { /* this pattern can never match */ + continue; + } + + if (Z_TYPE_PP(entry) != IS_STRING) { + tzv = *entry; + zval_addref_p(tzv); + SEPARATE_ZVAL(&tzv); + convert_to_string(tzv); + entry = &tzv; + zend_llist_add_element(*allocs, &Z_STRVAL_PP(entry)); + } + + S(&patterns[i].pat) = string_key; + L(&patterns[i].pat) = string_key_len; + S(&patterns[i].repl) = Z_STRVAL_PP(entry); + L(&patterns[i].repl) = Z_STRLEN_PP(entry); + i++; + + if (tzv) { + efree(tzv); + } + } + } + + *outsize = i; + return patterns; } /* }}} */ @@ -2952,7 +3036,7 @@ static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) STRLEN shift = d->shift->entries[h]; if (shift > 0) { - smart_str_appendl(&result, &S(text)[pos], shift); + smart_str_appendl(&result, &S(text)[pos], MIN(shift, L(text) - pos)); pos += shift; } else { HASH h2 = h & d->hash->table_mask, @@ -2999,64 +3083,25 @@ end_outer_loop: ; /* {{{ php_strtr_array */ static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *pats) { - PPRES *data; - STR text; - PATNREPL *patterns; - HashPosition hpos; - zval **entry; - int num_pats = zend_hash_num_elements(pats), - i; + PPRES *data; + STR text; + PATNREPL *patterns; + int patterns_len; + zend_llist *allocs; S(&text) = str; L(&text) = slen; - patterns = safe_emalloc(num_pats, sizeof(*patterns), 0); - for (i = 0, zend_hash_internal_pointer_reset_ex(pats, &hpos); - zend_hash_get_current_data_ex(pats, (void **)&entry, &hpos) == SUCCESS; - i++, zend_hash_move_forward_ex(pats, &hpos)) { - char *string_key; - uint string_key_len; - ulong num_key; - int free_str = 0, - free_repl = 0; - zval *tzv; - - switch (zend_hash_get_current_key_ex(pats, &string_key, &string_key_len, &num_key, 0, &hpos)) { - case HASH_KEY_IS_LONG: - string_key_len = 1 + zend_spprintf(&string_key, 0, "%ld", (long)num_key); - free_str = 1; - /* break missing intentionally */ - - case HASH_KEY_IS_STRING: - string_key_len--; /* exclude final '\0' */ - if (string_key_len == 0) { /* empty string given as pattern */ - efree(patterns); - RETURN_FALSE; - } - if (string_key_len > slen) { /* this pattern can never match */ - continue; - } - - if (Z_TYPE_PP(entry) != IS_STRING) { - tzv = *entry; - zval_addref_p(tzv); - SEPARATE_ZVAL(&tzv); - convert_to_string(tzv); - entry = &tzv; - free_repl = 1; - } - - S(&patterns[i].pat) = string_key; - L(&patterns[i].pat) = string_key_len; - S(&patterns[i].repl) = Z_STRVAL_PP(entry); - L(&patterns[i].repl) = Z_STRLEN_PP(entry); - } + patterns = php_strtr_array_prepare_repls(slen, pats, &allocs, &patterns_len); + if (patterns == NULL) { + RETURN_FALSE; } - - data = php_strtr_array_prepare(&text, patterns, i, 2, 2); + data = php_strtr_array_prepare(&text, patterns, patterns_len, 2, 2); efree(patterns); php_strtr_array_do_repl(&text, data, return_value); php_strtr_array_destroy_ppres(data); + zend_llist_destroy(allocs); + efree(allocs); } /* }}} */ From cddbb98ada6bdba1596ea82386401edf9b680d47 Mon Sep 17 00:00:00 2001 From: Gustavo Lopes Date: Wed, 9 Jan 2013 22:29:28 +0100 Subject: [PATCH 3/7] strtr() with 2nd param array - optimization About a 1.25x speedup in my test script by writing the result string only when a match is found and at the end instead of on each iteration. --- ext/standard/string.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index 827f9dec22a..4947a67f860 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -3028,6 +3028,7 @@ static void php_strtr_array_destroy_ppres(PPRES *d) static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) { STRLEN pos = 0, + nextwpos = 0, lastpos = L(text) - d->m; smart_str result = {0}; @@ -3036,7 +3037,6 @@ static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) STRLEN shift = d->shift->entries[h]; if (shift > 0) { - smart_str_appendl(&result, &S(text)[pos], MIN(shift, L(text) - pos)); pos += shift; } else { HASH h2 = h & d->hash->table_mask, @@ -3056,20 +3056,19 @@ static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) memcmp(S(&pnr->pat), &S(text)[pos], L(&pnr->pat)) != 0) continue; - smart_str_appendl(&result, S(&pnr->repl), (int)L(&pnr->repl)); + smart_str_appendl(&result, &S(text)[nextwpos], pos - nextwpos); + smart_str_appendl(&result, S(&pnr->repl), L(&pnr->repl)); pos += L(&pnr->pat); + nextwpos = pos; goto end_outer_loop; } - smart_str_appendc(&result, S(text)[pos]); pos++; end_outer_loop: ; } } - if (pos < L(text)) { - smart_str_appendl(&result, &S(text)[pos], (int)(L(text) - pos)); - } + smart_str_appendl(&result, &S(text)[nextwpos], L(text) - nextwpos); if (result.c != NULL) { smart_str_0(&result); From 93d1171c47d10158e6e9855236277deb09cf7e6b Mon Sep 17 00:00:00 2001 From: Gustavo Lopes Date: Wed, 9 Jan 2013 23:20:11 +0100 Subject: [PATCH 4/7] Remove unused block --- ext/standard/string.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index 4947a67f860..14259cabc03 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -2790,11 +2790,6 @@ typedef struct { const char *s; STRLEN l; } STR; -typedef struct _match_node MATCH_NODE; -struct _match_node { - STRLEN pos; - MATCH_NODE *next; -}; typedef struct _pat_and_repl { STR pat; STR repl; From e5029ac40bd84fd3545538afa013051e161d86a4 Mon Sep 17 00:00:00 2001 From: Gustavo Lopes Date: Wed, 9 Jan 2013 23:20:32 +0100 Subject: [PATCH 5/7] The compiler can figure this out --- ext/standard/string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index 14259cabc03..c7ed8844f10 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -2821,7 +2821,7 @@ static inline HASH php_strtr_hash(const char *str, int len) HASH res = 0; int i; for (i = 0; i < len; i++) { - res = (res << 5) + res + (unsigned char)str[i]; + res = res * 33 + (unsigned char)str[i]; } return res; From d7bac4f5ba17c6fba00943a8bada49a4735a15a6 Mon Sep 17 00:00:00 2001 From: Gustavo Lopes Date: Thu, 10 Jan 2013 13:09:14 +0100 Subject: [PATCH 6/7] Remove _GNU_SOURCE, add local heap sort --- ext/standard/string.c | 69 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index c7ed8844f10..953dfd17fb0 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -22,7 +22,6 @@ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ -#define _GNU_SOURCE 1 #include #include #include "php.h" @@ -135,7 +134,7 @@ static char *php_bin2hex(const unsigned char *old, const size_t oldlen, size_t * size_t i, j; result = (unsigned char *) safe_emalloc(oldlen, 2 * sizeof(char), 1); - + for (i = j = 0; i < oldlen; i++) { result[j++] = hexconvtab[old[i] >> 4]; result[j++] = hexconvtab[old[i] & 15]; @@ -2841,7 +2840,7 @@ static inline void php_strtr_populate_shift(PATNREPL *patterns, int patnum, int for (i = 0; i < patnum; i++) { for (j = 0; j < m - B + 1; j++) { HASH h = php_strtr_hash(&S(&patterns[i].pat)[j], B) & shift->table_mask; - assert((long long) m - (long long) j - B >= 0); + assert((long long) m - (long long) j - B >= 0); shift->entries[h] = MIN(shift->entries[h], m - j - B); } } @@ -2874,6 +2873,62 @@ static int php_strtr_compare_hash_suffix(const void *a, const void *b, void *ctx } } /* }}} */ +/* {{{ Sorting (no zend_qsort_r in this PHP version) */ +#define HS_LEFT(i) ((i) * 2 + 1) +#define HS_RIGHT(i) ((i) * 2 + 2) +#define HS_PARENT(i) (((i) - 1) / 2); +#define HS_OFF(data, i) ((void *)(&((data)->arr)[i])) +#define HS_CMP_CALL(data, i1, i2) \ + (php_strtr_compare_hash_suffix(HS_OFF((data), (i1)), HS_OFF((data), (i2)), (data)->res)) +struct hs_data { + PATNREPL *arr; + size_t nel; + size_t heapel; + PPRES *res; +}; +static inline void php_strtr_swap(PATNREPL *a, PATNREPL *b) +{ + PATNREPL tmp = *a; + *a = *b; + *b = tmp; +} +static inline void php_strtr_fix_heap(struct hs_data *data, size_t i) +{ + size_t li = HS_LEFT(i), + ri = HS_RIGHT(i), + largei; + if (li < data->heapel && HS_CMP_CALL(data, li, i) > 0) { + largei = li; + } else { + largei = i; + } + if (ri < data->heapel && HS_CMP_CALL(data, ri, largei) > 0) { + largei = ri; + } + if (largei != i) { + php_strtr_swap(HS_OFF(data, i), HS_OFF(data, largei)); + php_strtr_fix_heap(data, largei); + } +} +static inline void php_strtr_build_heap(struct hs_data *data) +{ + size_t i; + for (i = data->nel / 2; i > 0; i--) { + php_strtr_fix_heap(data, i - 1); + } +} +static inline void php_strtr_heapsort(PATNREPL *arr, size_t nel, PPRES *res) +{ + struct hs_data data = { arr, nel, nel, res }; + size_t i; + php_strtr_build_heap(&data); + for (i = nel; i > 1; i--) { + php_strtr_swap(arr, HS_OFF(&data, i - 1)); + data.heapel--; + php_strtr_fix_heap(&data, 0); + } +} +/* }}} */ /* {{{ php_strtr_free_strp */ static void php_strtr_free_strp(void *strp) { @@ -2967,11 +3022,11 @@ static PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, php_strtr_populate_shift(patterns, patnum, B, res->m, res->shift); res->hash = safe_emalloc(HASH_TAB_SIZE, sizeof(*res->hash->entries), sizeof(*res->shift)); - res->hash->table_mask = HASH_TAB_SIZE - 1; + res->hash->table_mask = HASH_TAB_SIZE - 1; res->patterns = safe_emalloc(patnum, sizeof(*res->patterns), 0); memcpy(res->patterns, patterns, sizeof(*patterns) * patnum); - qsort_r(res->patterns, patnum, sizeof(*res->patterns), php_strtr_compare_hash_suffix, res); + php_strtr_heapsort(res->patterns, patnum, res); res->prefix = safe_emalloc(patnum, sizeof(*res->prefix), 0); for (i = 0; i < patnum; i++) { @@ -3050,7 +3105,7 @@ static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) if (L(&pnr->pat) > L(text) - pos || memcmp(S(&pnr->pat), &S(text)[pos], L(&pnr->pat)) != 0) continue; - + smart_str_appendl(&result, &S(text)[nextwpos], pos - nextwpos); smart_str_appendl(&result, S(&pnr->repl), L(&pnr->repl)); pos += L(&pnr->pat); @@ -3076,7 +3131,7 @@ end_outer_loop: ; /* {{{ php_strtr_array */ static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *pats) -{ +{ PPRES *data; STR text; PATNREPL *patterns; From 200242595dabfdaf6093da3e9ae6fef246ea8906 Mon Sep 17 00:00:00 2001 From: Gustavo Lopes Date: Mon, 14 Jan 2013 17:17:58 +0100 Subject: [PATCH 7/7] NEWS for bug #63893 --- NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS b/NEWS index 28040f7262f..418bcc91e89 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,8 @@ PHP NEWS protected property). (Stas) . Fixed bug #63943 (Bad warning text from strpos() on empty needle). (Laruence) + . Fixed bug #63893 (poor efficiency of strtr() using array with keys of very + different length). (Gustavo) . Fixed bug #63882 (zend_std_compare_objects crash on recursion). (Dmitry) - Litespeed: