mirror of
https://github.com/php/php-src.git
synced 2025-08-16 05:58:45 +02:00
Implement php_mb_zend_encoding_converter using fast text conversion filters
This commit is contained in:
parent
e2654a532a
commit
953864661a
3 changed files with 7 additions and 178 deletions
|
@ -95,130 +95,6 @@
|
||||||
|
|
||||||
#include "rare_cp_bitvec.h"
|
#include "rare_cp_bitvec.h"
|
||||||
|
|
||||||
/*
|
|
||||||
* buffering converter
|
|
||||||
*/
|
|
||||||
mbfl_buffer_converter *
|
|
||||||
mbfl_buffer_converter_new(
|
|
||||||
const mbfl_encoding *from,
|
|
||||||
const mbfl_encoding *to,
|
|
||||||
size_t buf_initsz)
|
|
||||||
{
|
|
||||||
mbfl_buffer_converter *convd = emalloc(sizeof(mbfl_buffer_converter));
|
|
||||||
convd->to = to;
|
|
||||||
|
|
||||||
/* create convert filter */
|
|
||||||
convd->filter1 = NULL;
|
|
||||||
convd->filter2 = NULL;
|
|
||||||
if (mbfl_convert_filter_get_vtbl(from, to) != NULL) {
|
|
||||||
convd->filter1 = mbfl_convert_filter_new(from, to, mbfl_memory_device_output, NULL, &convd->device);
|
|
||||||
} else {
|
|
||||||
convd->filter2 = mbfl_convert_filter_new(&mbfl_encoding_wchar, to, mbfl_memory_device_output, NULL, &convd->device);
|
|
||||||
if (convd->filter2 != NULL) {
|
|
||||||
convd->filter1 = mbfl_convert_filter_new(from,
|
|
||||||
&mbfl_encoding_wchar,
|
|
||||||
(output_function_t)convd->filter2->filter_function,
|
|
||||||
(flush_function_t)convd->filter2->filter_flush,
|
|
||||||
convd->filter2);
|
|
||||||
if (convd->filter1 == NULL) {
|
|
||||||
mbfl_convert_filter_delete(convd->filter2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (convd->filter1 == NULL) {
|
|
||||||
efree(convd);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
mbfl_memory_device_init(&convd->device, buf_initsz, buf_initsz/4);
|
|
||||||
|
|
||||||
return convd;
|
|
||||||
}
|
|
||||||
|
|
||||||
void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd)
|
|
||||||
{
|
|
||||||
mbfl_convert_filter_delete(convd->filter1);
|
|
||||||
if (convd->filter2) {
|
|
||||||
mbfl_convert_filter_delete(convd->filter2);
|
|
||||||
}
|
|
||||||
mbfl_memory_device_clear(&convd->device);
|
|
||||||
efree((void*)convd);
|
|
||||||
}
|
|
||||||
|
|
||||||
void mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode)
|
|
||||||
{
|
|
||||||
if (convd->filter2) {
|
|
||||||
convd->filter2->illegal_mode = mode;
|
|
||||||
} else {
|
|
||||||
convd->filter1->illegal_mode = mode;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, uint32_t substchar)
|
|
||||||
{
|
|
||||||
if (convd->filter2) {
|
|
||||||
convd->filter2->illegal_substchar = substchar;
|
|
||||||
} else {
|
|
||||||
convd->filter1->illegal_substchar = substchar;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string)
|
|
||||||
{
|
|
||||||
size_t n;
|
|
||||||
unsigned char *p;
|
|
||||||
mbfl_convert_filter *filter;
|
|
||||||
|
|
||||||
ZEND_ASSERT(convd);
|
|
||||||
ZEND_ASSERT(string);
|
|
||||||
|
|
||||||
mbfl_memory_device_realloc(&convd->device, convd->device.pos + string->len, string->len/4);
|
|
||||||
/* feed data */
|
|
||||||
n = string->len;
|
|
||||||
p = string->val;
|
|
||||||
|
|
||||||
filter = convd->filter1;
|
|
||||||
if (filter != NULL) {
|
|
||||||
while (n > 0) {
|
|
||||||
if ((*filter->filter_function)(*p++, filter) < 0) {
|
|
||||||
return p - string->val;
|
|
||||||
}
|
|
||||||
n--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return p - string->val;
|
|
||||||
}
|
|
||||||
|
|
||||||
void mbfl_buffer_converter_flush(mbfl_buffer_converter *convd)
|
|
||||||
{
|
|
||||||
mbfl_convert_filter_flush(convd->filter1);
|
|
||||||
}
|
|
||||||
|
|
||||||
mbfl_string* mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result)
|
|
||||||
{
|
|
||||||
result->encoding = convd->to;
|
|
||||||
return mbfl_memory_device_result(&convd->device, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
mbfl_string* mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result)
|
|
||||||
{
|
|
||||||
mbfl_buffer_converter_feed(convd, string);
|
|
||||||
mbfl_convert_filter_flush(convd->filter1);
|
|
||||||
result->encoding = convd->to;
|
|
||||||
return mbfl_memory_device_result(&convd->device, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
|
|
||||||
{
|
|
||||||
size_t num_illegalchars = convd->filter1->num_illegalchar;
|
|
||||||
|
|
||||||
if (convd->filter2) {
|
|
||||||
num_illegalchars += convd->filter2->num_illegalchar;
|
|
||||||
}
|
|
||||||
|
|
||||||
return num_illegalchars;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* encoding detector
|
* encoding detector
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -125,28 +125,6 @@
|
||||||
#define MIN(a,b) ((a)<(b)?(a):(b))
|
#define MIN(a,b) ((a)<(b)?(a):(b))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* buffering converter
|
|
||||||
*/
|
|
||||||
typedef struct _mbfl_buffer_converter mbfl_buffer_converter;
|
|
||||||
|
|
||||||
struct _mbfl_buffer_converter {
|
|
||||||
mbfl_convert_filter *filter1;
|
|
||||||
mbfl_convert_filter *filter2;
|
|
||||||
mbfl_memory_device device;
|
|
||||||
const mbfl_encoding *to;
|
|
||||||
};
|
|
||||||
|
|
||||||
MBFLAPI extern mbfl_buffer_converter * mbfl_buffer_converter_new(const mbfl_encoding *from, const mbfl_encoding *to, size_t buf_initsz);
|
|
||||||
MBFLAPI extern void mbfl_buffer_converter_delete(mbfl_buffer_converter *convd);
|
|
||||||
MBFLAPI extern void mbfl_buffer_converter_illegal_mode(mbfl_buffer_converter *convd, int mode);
|
|
||||||
MBFLAPI extern void mbfl_buffer_converter_illegal_substchar(mbfl_buffer_converter *convd, uint32_t substchar);
|
|
||||||
MBFLAPI extern size_t mbfl_buffer_converter_feed(mbfl_buffer_converter *convd, mbfl_string *string);
|
|
||||||
MBFLAPI extern void mbfl_buffer_converter_flush(mbfl_buffer_converter *convd);
|
|
||||||
MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result);
|
|
||||||
MBFLAPI extern mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result);
|
|
||||||
MBFLAPI extern size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* encoding detector
|
* encoding detector
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -452,40 +452,15 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a
|
||||||
|
|
||||||
static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
|
static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
|
||||||
{
|
{
|
||||||
mbfl_string string, result;
|
unsigned int num_errors = 0;
|
||||||
mbfl_buffer_converter *convd;
|
zend_string *result = mb_fast_convert((unsigned char*)from, from_length, (const mbfl_encoding*)encoding_from, (const mbfl_encoding*)encoding_to, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
|
||||||
|
|
||||||
/* new encoding */
|
*to_length = ZSTR_LEN(result);
|
||||||
/* initialize string */
|
*to = emalloc(ZSTR_LEN(result) + 1); /* Include terminating null byte */
|
||||||
string.encoding = (const mbfl_encoding*)encoding_from;
|
memcpy(*to, ZSTR_VAL(result), ZSTR_LEN(result) + 1);
|
||||||
string.val = (unsigned char*)from;
|
zend_string_free(result);
|
||||||
string.len = from_length;
|
|
||||||
|
|
||||||
/* initialize converter */
|
return from_length;
|
||||||
convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
|
|
||||||
if (convd == NULL) {
|
|
||||||
return (size_t) -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
|
|
||||||
mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
|
|
||||||
|
|
||||||
/* do it */
|
|
||||||
size_t loc = mbfl_buffer_converter_feed(convd, &string);
|
|
||||||
|
|
||||||
mbfl_buffer_converter_flush(convd);
|
|
||||||
mbfl_string_init(&result);
|
|
||||||
if (!mbfl_buffer_converter_result(convd, &result)) {
|
|
||||||
mbfl_buffer_converter_delete(convd);
|
|
||||||
return (size_t)-1;
|
|
||||||
}
|
|
||||||
|
|
||||||
*to = result.val;
|
|
||||||
*to_length = result.len;
|
|
||||||
|
|
||||||
mbfl_buffer_converter_delete(convd);
|
|
||||||
|
|
||||||
return loc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
|
static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue