Rewrite unicode_encode() and unicode_decode() functions. Apply the new

conversion error semantics.
This commit is contained in:
Andrei Zmievski 2006-03-27 03:19:30 +00:00
parent 19e6c92ad0
commit b36d2dfef6

View file

@ -23,48 +23,104 @@
void php_register_unicode_iterators(TSRMLS_D);
/* {{{ proto unicode unicode_decode(string input, string encoding) U
Takes a string in the source encoding and converts it to a UTF-16 unicode string, returning the result */
/* {{{ proto unicode unicode_decode(binary input, string encoding [, int flags]) U
Takes a binary string converts it to a Unicode string using the specifed encoding */
static PHP_FUNCTION(unicode_decode)
{
union {
void *vptr;
char *bin;
} input;
zend_uchar type;
int len;
char *encoding;
int enclen;
char *str, *enc;
int str_len, enc_len;
long flags;
UChar *dest;
int dest_len;
UErrorCode status;
UConverter *conv = NULL;
UChar *target;
int targetlen;
int num_conv;
if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts", &input.vptr, &len, &type, &encoding, &enclen)) {
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Ss|l", &str, &str_len, &enc, &enc_len, &flags)) {
return;
}
if (type == IS_UNICODE) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "input string is already unicode");
RETURN_FALSE;
if (ZEND_NUM_ARGS() > 2) {
if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal value for conversion error mode");
RETURN_FALSE;
}
} else {
flags = UG(to_error_mode);
}
status = U_ZERO_ERROR;
conv = ucnv_open(encoding, &status);
if (!conv) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding);
RETURN_FALSE;
}
status = U_ZERO_ERROR;
zend_convert_to_unicode(conv, &target, &targetlen, input.bin, len, &status);
conv = ucnv_open(enc, &status);
if (U_FAILURE(status)) {
/* TODO: error handling semantics ? */
php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status);
php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for '%s' encoding", enc);
RETURN_FALSE;
}
RETVAL_UNICODEL(target, targetlen, 0);
zend_set_converter_error_mode(conv, ZEND_TO_UNICODE, flags);
status = U_ZERO_ERROR;
num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str, str_len, &status);
if (U_FAILURE(status)) {
zend_raise_conversion_error_ex("could not decode binary string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
efree(dest);
ucnv_close(conv);
RETURN_FALSE;
}
ucnv_close(conv);
RETVAL_UNICODEL(dest, dest_len, 0);
}
/* }}} */
/* {{{ proto binary unicode_encode(unicode input, string encoding [, int flags]) U
Takes a Unicode string and converts it to a binary string using the specified encoding */
static PHP_FUNCTION(unicode_encode)
{
UChar *uni;
char *enc;
int uni_len, enc_len;
long flags;
char *dest;
int dest_len;
UErrorCode status;
UConverter *conv = NULL;
int num_conv;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Us|l", &uni, &uni_len, &enc, &enc_len, &flags) == FAILURE) {
return;
}
if (ZEND_NUM_ARGS() > 2) {
if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal value for conversion error mode");
RETURN_FALSE;
}
} else {
flags = UG(from_error_mode);
}
status = U_ZERO_ERROR;
conv = ucnv_open(enc, &status);
if (U_FAILURE(status)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for '%s' encoding", enc);
RETURN_FALSE;
}
zend_set_converter_error_mode(conv, ZEND_FROM_UNICODE, flags);
zend_set_converter_subst_char(conv, UG(from_subst_char));
status = U_ZERO_ERROR;
num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni, uni_len, &status);
if (U_FAILURE(status)) {
int32_t offset = u_countChar32(uni, num_conv);
zend_raise_conversion_error_ex("could not encode Unicode string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
efree(dest);
ucnv_close(conv);
RETURN_FALSE;
}
ucnv_close(conv);
RETVAL_STRINGL(dest, dest_len, 0);
}
/* }}} */
@ -76,42 +132,6 @@ static PHP_FUNCTION(unicode_semantics)
}
/* }}} */
/* {{{ proto string unicode_encode(unicode input, string encoding) U
Takes a unicode string and converts it to a string in the specified encoding */
static PHP_FUNCTION(unicode_encode)
{
UChar *uni;
int len;
char *encoding;
int enclen;
UErrorCode status;
UConverter *conv = NULL;
char *target;
int targetlen;
if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "us", &uni, &len, &encoding, &enclen)) {
return;
}
status = U_ZERO_ERROR;
conv = ucnv_open(encoding, &status);
if (!conv) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding);
RETURN_FALSE;
}
status = U_ZERO_ERROR;
zend_convert_from_unicode(conv, &target, &targetlen, uni, len, &status);
if (U_FAILURE(status)) {
/* TODO: error handling semantics ? */
php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status);
}
RETVAL_STRINGL(target, targetlen, 0);
ucnv_close(conv);
}
/* }}} */
/* {{{ proto bool unicode_set_error_mode(int direction, int mode) U
Sets global conversion error mode for the specified conversion direction */
PHP_FUNCTION(unicode_set_error_mode)