mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Implement php_mb_zend_encoding_converter using fast text conversion filters
This commit is contained in:
parent
e2654a532a
commit
953864661a
3 changed files with 7 additions and 178 deletions
|
@ -95,130 +95,6 @@
|
|||
|
||||
#include "rare_cp_bitvec.h"
|
||||
|
||||
/*
|
||||
* buffering converter
|
||||
*/
|
||||
mbfl_buffer_converter *
|
||||
mbfl_buffer_converter_new(
|
||||
const mbfl_encoding *from,
|
||||
const mbfl_encoding *to,
|
||||
size_t buf_initsz)
|
||||
{
|
||||
mbfl_buffer_converter *convd = emalloc(sizeof(mbfl_buffer_converter));
|
||||
convd->to = to;
|
||||
|
||||
/* create convert filter */
|
||||
convd->filter1 = NULL;
|
||||
convd->filter2 = NULL;
|
||||
if (mbfl_convert_filter_get_vtbl(from, to) != NULL) {
|
||||
convd->filter1 = mbfl_convert_filter_new(from, to, mbfl_memory_device_output, NULL, &convd->device);
|
||||
} else {
|
||||
convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, to, mbfl_memory_device_output, NULL, &convd->device);
|
||||
if (convd->filter2 != NULL) {
|
||||
convd->filter1 = mbfl_convert_filter_new(from,
|
||||
&mbfl_encoding_wchar,
|
||||
(output_function_t)convd->filter2->filter_function,
|
||||
(flush_function_t)convd->filter2->filter_flush,
|
||||
convd->filter2);
|
||||
if (convd->filter1 == NULL) {
|
||||
mbfl_convert_filter_delete(convd->filter2);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (convd->filter1 == NULL) {
|
||||
efree(convd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
|
||||
|
||||
return convd;
|
||||
}
|
||||
|
||||
void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
|
||||
{
|
||||
mbfl_convert_filter_delete(convd->filter1);
|
||||
if (convd->filter2) {
|
||||
mbfl_convert_filter_delete(convd->filter2);
|
||||
}
|
||||
mbfl_memory_device_clear(&convd->device);
|
||||
efree((void*)convd);
|
||||
}
|
||||
|
||||
void mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
|
||||
{
|
||||
if (convd->filter2) {
|
||||
convd->filter2->illegal_mode = mode;
|
||||
} else {
|
||||
convd->filter1->illegal_mode = mode;
|
||||
}
|
||||
}
|
||||
|
||||
void mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, uint32_t substchar)
|
||||
{
|
||||
if (convd->filter2) {
|
||||
convd->filter2->illegal_substchar = substchar;
|
||||
} else {
|
||||
convd->filter1->illegal_substchar = substchar;
|
||||
}
|
||||
}
|
||||
|
||||
size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
|
||||
{
|
||||
size_t n;
|
||||
unsigned char *p;
|
||||
mbfl_convert_filter *filter;
|
||||
|
||||
ZEND_ASSERT(convd);
|
||||
ZEND_ASSERT(string);
|
||||
|
||||
mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
|
||||
/* feed data */
|
||||
n = string->len;
|
||||
p = string->val;
|
||||
|
||||
filter = convd->filter1;
|
||||
if (filter != NULL) {
|
||||
while (n > 0) {
|
||||
if ((*filter->filter_function)(*p++, filter) < 0) {
|
||||
return p - string->val;
|
||||
}
|
||||
n--;
|
||||
}
|
||||
}
|
||||
return p - string->val;
|
||||
}
|
||||
|
||||
void mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
|
||||
{
|
||||
mbfl_convert_filter_flush(convd->filter1);
|
||||
}
|
||||
|
||||
mbfl_string* mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
|
||||
{
|
||||
result->encoding = convd->to;
|
||||
return mbfl_memory_device_result(&convd->device, result);
|
||||
}
|
||||
|
||||
mbfl_string* mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result)
|
||||
{
|
||||
mbfl_buffer_converter_feed(convd, string);
|
||||
mbfl_convert_filter_flush(convd->filter1);
|
||||
result->encoding = convd->to;
|
||||
return mbfl_memory_device_result(&convd->device, result);
|
||||
}
|
||||
|
||||
size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
|
||||
{
|
||||
size_t num_illegalchars = convd->filter1->num_illegalchar;
|
||||
|
||||
if (convd->filter2) {
|
||||
num_illegalchars += convd->filter2->num_illegalchar;
|
||||
}
|
||||
|
||||
return num_illegalchars;
|
||||
}
|
||||
|
||||
/*
|
||||
* encoding detector
|
||||
*/
|
||||
|
|
|
@ -125,28 +125,6 @@
|
|||
#define MIN(a,b) ((a)<(b)?(a):(b))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* buffering converter
|
||||
*/
|
||||
typedef struct _mbfl_buffer_converter mbfl_buffer_converter;
|
||||
|
||||
struct _mbfl_buffer_converter {
|
||||
mbfl_convert_filter *filter1;
|
||||
mbfl_convert_filter *filter2;
|
||||
mbfl_memory_device device;
|
||||
const mbfl_encoding *to;
|
||||
};
|
||||
|
||||
MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new(const mbfl_encoding *from, const mbfl_encoding *to, size_t buf_initsz);
|
||||
MBFLAPI extern void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd);
|
||||
MBFLAPI extern void mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode);
|
||||
MBFLAPI extern void mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, uint32_t substchar);
|
||||
MBFLAPI extern size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string);
|
||||
MBFLAPI extern void mbfl_buffer_converter_flush(mbfl_buffer_converter *convd);
|
||||
MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result);
|
||||
MBFLAPI extern mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result);
|
||||
MBFLAPI extern size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd);
|
||||
|
||||
/*
|
||||
* encoding detector
|
||||
*/
|
||||
|
|
|
@ -452,40 +452,15 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a
|
|||
|
||||
static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
|
||||
{
|
||||
mbfl_string string, result;
|
||||
mbfl_buffer_converter *convd;
|
||||
unsigned int num_errors = 0;
|
||||
zend_string *result = mb_fast_convert((unsigned char*)from, from_length, (const mbfl_encoding*)encoding_from, (const mbfl_encoding*)encoding_to, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
|
||||
|
||||
/* new encoding */
|
||||
/* initialize string */
|
||||
string.encoding = (const mbfl_encoding*)encoding_from;
|
||||
string.val = (unsigned char*)from;
|
||||
string.len = from_length;
|
||||
*to_length = ZSTR_LEN(result);
|
||||
*to = emalloc(ZSTR_LEN(result) + 1); /* Include terminating null byte */
|
||||
memcpy(*to, ZSTR_VAL(result), ZSTR_LEN(result) + 1);
|
||||
zend_string_free(result);
|
||||
|
||||
/* initialize converter */
|
||||
convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
|
||||
if (convd == NULL) {
|
||||
return (size_t) -1;
|
||||
}
|
||||
|
||||
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
|
||||
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
|
||||
|
||||
/* do it */
|
||||
size_t loc = mbfl_buffer_converter_feed(convd, &string);
|
||||
|
||||
mbfl_buffer_converter_flush(convd);
|
||||
mbfl_string_init(&result);
|
||||
if (!mbfl_buffer_converter_result(convd, &result)) {
|
||||
mbfl_buffer_converter_delete(convd);
|
||||
return (size_t)-1;
|
||||
}
|
||||
|
||||
*to = result.val;
|
||||
*to_length = result.len;
|
||||
|
||||
mbfl_buffer_converter_delete(convd);
|
||||
|
||||
return loc;
|
||||
return from_length;
|
||||
}
|
||||
|
||||
static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue