mirror of
https://github.com/php/php-src.git
synced 2025-08-18 06:58:55 +02:00
Rewrite unicode_encode() and unicode_decode() functions. Apply the new
conversion error semantics.
This commit is contained in:
parent
19e6c92ad0
commit
b36d2dfef6
1 changed files with 83 additions and 63 deletions
|
@ -23,48 +23,104 @@
|
|||
|
||||
void php_register_unicode_iterators(TSRMLS_D);
|
||||
|
||||
/* {{{ proto unicode unicode_decode(string input, string encoding) U
|
||||
Takes a string in the source encoding and converts it to a UTF-16 unicode string, returning the result */
|
||||
/* {{{ proto unicode unicode_decode(binary input, string encoding [, int flags]) U
|
||||
Takes a binary string converts it to a Unicode string using the specifed encoding */
|
||||
static PHP_FUNCTION(unicode_decode)
|
||||
{
|
||||
union {
|
||||
void *vptr;
|
||||
char *bin;
|
||||
} input;
|
||||
zend_uchar type;
|
||||
int len;
|
||||
char *encoding;
|
||||
int enclen;
|
||||
char *str, *enc;
|
||||
int str_len, enc_len;
|
||||
long flags;
|
||||
UChar *dest;
|
||||
int dest_len;
|
||||
UErrorCode status;
|
||||
UConverter *conv = NULL;
|
||||
UChar *target;
|
||||
int targetlen;
|
||||
int num_conv;
|
||||
|
||||
if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts", &input.vptr, &len, &type, &encoding, &enclen)) {
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Ss|l", &str, &str_len, &enc, &enc_len, &flags)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (type == IS_UNICODE) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "input string is already unicode");
|
||||
RETURN_FALSE;
|
||||
if (ZEND_NUM_ARGS() > 2) {
|
||||
if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal value for conversion error mode");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
} else {
|
||||
flags = UG(to_error_mode);
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
conv = ucnv_open(encoding, &status);
|
||||
if (!conv) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
zend_convert_to_unicode(conv, &target, &targetlen, input.bin, len, &status);
|
||||
conv = ucnv_open(enc, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
/* TODO: error handling semantics ? */
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status);
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for '%s' encoding", enc);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
RETVAL_UNICODEL(target, targetlen, 0);
|
||||
|
||||
zend_set_converter_error_mode(conv, ZEND_TO_UNICODE, flags);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str, str_len, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
zend_raise_conversion_error_ex("could not decode binary string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
|
||||
efree(dest);
|
||||
ucnv_close(conv);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
ucnv_close(conv);
|
||||
|
||||
RETVAL_UNICODEL(dest, dest_len, 0);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto binary unicode_encode(unicode input, string encoding [, int flags]) U
|
||||
Takes a Unicode string and converts it to a binary string using the specified encoding */
|
||||
static PHP_FUNCTION(unicode_encode)
|
||||
{
|
||||
UChar *uni;
|
||||
char *enc;
|
||||
int uni_len, enc_len;
|
||||
long flags;
|
||||
char *dest;
|
||||
int dest_len;
|
||||
UErrorCode status;
|
||||
UConverter *conv = NULL;
|
||||
int num_conv;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Us|l", &uni, &uni_len, &enc, &enc_len, &flags) == FAILURE) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ZEND_NUM_ARGS() > 2) {
|
||||
if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal value for conversion error mode");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
} else {
|
||||
flags = UG(from_error_mode);
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
conv = ucnv_open(enc, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for '%s' encoding", enc);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
zend_set_converter_error_mode(conv, ZEND_FROM_UNICODE, flags);
|
||||
zend_set_converter_subst_char(conv, UG(from_subst_char));
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni, uni_len, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
int32_t offset = u_countChar32(uni, num_conv);
|
||||
zend_raise_conversion_error_ex("could not encode Unicode string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
|
||||
efree(dest);
|
||||
ucnv_close(conv);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
ucnv_close(conv);
|
||||
|
||||
RETVAL_STRINGL(dest, dest_len, 0);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
|
@ -76,42 +132,6 @@ static PHP_FUNCTION(unicode_semantics)
|
|||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto string unicode_encode(unicode input, string encoding) U
|
||||
Takes a unicode string and converts it to a string in the specified encoding */
|
||||
static PHP_FUNCTION(unicode_encode)
|
||||
{
|
||||
UChar *uni;
|
||||
int len;
|
||||
char *encoding;
|
||||
int enclen;
|
||||
UErrorCode status;
|
||||
UConverter *conv = NULL;
|
||||
char *target;
|
||||
int targetlen;
|
||||
|
||||
if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "us", &uni, &len, &encoding, &enclen)) {
|
||||
return;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
conv = ucnv_open(encoding, &status);
|
||||
if (!conv) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
zend_convert_from_unicode(conv, &target, &targetlen, uni, len, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
/* TODO: error handling semantics ? */
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status);
|
||||
}
|
||||
RETVAL_STRINGL(target, targetlen, 0);
|
||||
|
||||
ucnv_close(conv);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto bool unicode_set_error_mode(int direction, int mode) U
|
||||
Sets global conversion error mode for the specified conversion direction */
|
||||
PHP_FUNCTION(unicode_set_error_mode)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue