mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Use fast conversion filters to implement php_mb_ord
Even for single-character strings, this is about 50% faster for ASCII, UTF-8, and UTF-16. For long strings, the performance gain is enormous, since the old code would convert the ENTIRE string, just to pick out the first codepoint.
This commit is contained in:
parent
9468fa7ff2
commit
880803a21e
2 changed files with 13 additions and 21 deletions
|
@ -143,6 +143,10 @@ typedef struct {
|
||||||
typedef size_t (*mb_to_wchar_fn)(unsigned char **in, size_t *in_len, uint32_t *out, size_t out_len, unsigned int *state);
|
typedef size_t (*mb_to_wchar_fn)(unsigned char **in, size_t *in_len, uint32_t *out, size_t out_len, unsigned int *state);
|
||||||
typedef void (*mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end);
|
typedef void (*mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end);
|
||||||
|
|
||||||
|
/* When converting encoded text to a buffer of wchars (Unicode codepoints) using `mb_to_wchar_fn`,
|
||||||
|
* the buffer must be at least this size (to work with all supported text encodings) */
|
||||||
|
#define MBSTRING_MIN_WCHAR_BUFSIZE 5
|
||||||
|
|
||||||
static inline void mb_convert_buf_init(mb_convert_buf *buf, size_t initsize, uint32_t repl_char, unsigned int err_mode)
|
static inline void mb_convert_buf_init(mb_convert_buf *buf, size_t initsize, uint32_t repl_char, unsigned int err_mode)
|
||||||
{
|
{
|
||||||
buf->state = buf->errors = 0;
|
buf->state = buf->errors = 0;
|
||||||
|
|
|
@ -3993,29 +3993,17 @@ static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string
|
||||||
return -2;
|
return -2;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
/* Some legacy text encodings have a minimum required wchar buffer size;
|
||||||
mbfl_wchar_device dev;
|
* the ones which need the most are SJIS-Mac, UTF-7, and UTF7-IMAP */
|
||||||
mbfl_convert_filter *filter;
|
uint32_t wchar_buf[MBSTRING_MIN_WCHAR_BUFSIZE];
|
||||||
zend_long cp;
|
unsigned int state = 0;
|
||||||
|
size_t out_len = enc->to_wchar((unsigned char**)&str, &str_len, wchar_buf, MBSTRING_MIN_WCHAR_BUFSIZE, &state);
|
||||||
|
ZEND_ASSERT(out_len <= MBSTRING_MIN_WCHAR_BUFSIZE);
|
||||||
|
|
||||||
mbfl_wchar_device_init(&dev);
|
if (!out_len || wchar_buf[0] == MBFL_BAD_INPUT) {
|
||||||
filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev);
|
return -1;
|
||||||
/* If this assertion fails this means some memory allocation failure which is a bug */
|
|
||||||
ZEND_ASSERT(filter != NULL);
|
|
||||||
|
|
||||||
mbfl_convert_filter_feed_string(filter, (unsigned char*)str, str_len);
|
|
||||||
mbfl_convert_filter_flush(filter);
|
|
||||||
|
|
||||||
if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] == MBFL_BAD_INPUT) {
|
|
||||||
cp = -1;
|
|
||||||
} else {
|
|
||||||
cp = dev.buffer[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
mbfl_convert_filter_delete(filter);
|
|
||||||
mbfl_wchar_device_clear(&dev);
|
|
||||||
return cp;
|
|
||||||
}
|
}
|
||||||
|
return wchar_buf[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue