Implement php_mb_zend_encoding_converter using fast text conversion filters

This commit is contained in:
Alex Dowad 2022-12-15 22:19:06 +02:00
parent e2654a532a
commit 953864661a
3 changed files with 7 additions and 178 deletions

View file

@ -95,130 +95,6 @@
#include "rare_cp_bitvec.h" #include "rare_cp_bitvec.h"
/*
* buffering converter
*/
mbfl_buffer_converter *
mbfl_buffer_converter_new(
const mbfl_encoding *from,
const mbfl_encoding *to,
size_t buf_initsz)
{
mbfl_buffer_converter *convd = emalloc(sizeof(mbfl_buffer_converter));
convd->to = to;
/* create convert filter */
convd->filter1 = NULL;
convd->filter2 = NULL;
if (mbfl_convert_filter_get_vtbl(from, to) != NULL) {
convd->filter1 = mbfl_convert_filter_new(from, to, mbfl_memory_device_output, NULL, &convd->device);
} else {
convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, to, mbfl_memory_device_output, NULL, &convd->device);
if (convd->filter2 != NULL) {
convd->filter1 = mbfl_convert_filter_new(from,
&mbfl_encoding_wchar,
(output_function_t)convd->filter2->filter_function,
(flush_function_t)convd->filter2->filter_flush,
convd->filter2);
if (convd->filter1 == NULL) {
mbfl_convert_filter_delete(convd->filter2);
}
}
}
if (convd->filter1 == NULL) {
efree(convd);
return NULL;
}
mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
return convd;
}
void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
{
mbfl_convert_filter_delete(convd->filter1);
if (convd->filter2) {
mbfl_convert_filter_delete(convd->filter2);
}
mbfl_memory_device_clear(&convd->device);
efree((void*)convd);
}
void mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
{
if (convd->filter2) {
convd->filter2->illegal_mode = mode;
} else {
convd->filter1->illegal_mode = mode;
}
}
void mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, uint32_t substchar)
{
if (convd->filter2) {
convd->filter2->illegal_substchar = substchar;
} else {
convd->filter1->illegal_substchar = substchar;
}
}
size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
{
size_t n;
unsigned char *p;
mbfl_convert_filter *filter;
ZEND_ASSERT(convd);
ZEND_ASSERT(string);
mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
/* feed data */
n = string->len;
p = string->val;
filter = convd->filter1;
if (filter != NULL) {
while (n > 0) {
if ((*filter->filter_function)(*p++, filter) < 0) {
return p - string->val;
}
n--;
}
}
return p - string->val;
}
void mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
{
mbfl_convert_filter_flush(convd->filter1);
}
mbfl_string* mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
{
result->encoding = convd->to;
return mbfl_memory_device_result(&convd->device, result);
}
mbfl_string* mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result)
{
mbfl_buffer_converter_feed(convd, string);
mbfl_convert_filter_flush(convd->filter1);
result->encoding = convd->to;
return mbfl_memory_device_result(&convd->device, result);
}
size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
{
size_t num_illegalchars = convd->filter1->num_illegalchar;
if (convd->filter2) {
num_illegalchars += convd->filter2->num_illegalchar;
}
return num_illegalchars;
}
/* /*
* encoding detector * encoding detector
*/ */

View file

@ -125,28 +125,6 @@
#define MIN(a,b) ((a)<(b)?(a):(b)) #define MIN(a,b) ((a)<(b)?(a):(b))
#endif #endif
/*
* buffering converter
*/
typedef struct _mbfl_buffer_converter mbfl_buffer_converter;
struct _mbfl_buffer_converter {
mbfl_convert_filter *filter1;
mbfl_convert_filter *filter2;
mbfl_memory_device device;
const mbfl_encoding *to;
};
MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new(const mbfl_encoding *from, const mbfl_encoding *to, size_t buf_initsz);
MBFLAPI extern void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd);
MBFLAPI extern void mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode);
MBFLAPI extern void mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, uint32_t substchar);
MBFLAPI extern size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string);
MBFLAPI extern void mbfl_buffer_converter_flush(mbfl_buffer_converter *convd);
MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result);
MBFLAPI extern mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result);
MBFLAPI extern size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd);
/* /*
* encoding detector * encoding detector
*/ */

View file

@ -452,40 +452,15 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a
static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from) static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
{ {
mbfl_string string, result; unsigned int num_errors = 0;
mbfl_buffer_converter *convd; zend_string *result = mb_fast_convert((unsigned char*)from, from_length, (const mbfl_encoding*)encoding_from, (const mbfl_encoding*)encoding_to, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
/* new encoding */ *to_length = ZSTR_LEN(result);
/* initialize string */ *to = emalloc(ZSTR_LEN(result) + 1); /* Include terminating null byte */
string.encoding = (const mbfl_encoding*)encoding_from; memcpy(*to, ZSTR_VAL(result), ZSTR_LEN(result) + 1);
string.val = (unsigned char*)from; zend_string_free(result);
string.len = from_length;
/* initialize converter */ return from_length;
convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
if (convd == NULL) {
return (size_t) -1;
}
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
/* do it */
size_t loc = mbfl_buffer_converter_feed(convd, &string);
mbfl_buffer_converter_flush(convd);
mbfl_string_init(&result);
if (!mbfl_buffer_converter_result(convd, &result)) {
mbfl_buffer_converter_delete(convd);
return (size_t)-1;
}
*to = result.val;
*to_length = result.len;
mbfl_buffer_converter_delete(convd);
return loc;
} }
static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent) static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)