diff --git a/Zend/zend_constants.c b/Zend/zend_constants.c index be8f0f083c0..089ef6d93b9 100644 --- a/Zend/zend_constants.c +++ b/Zend/zend_constants.c @@ -124,7 +124,6 @@ void zend_register_standard_constants(TSRMLS_D) REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_JAVA", ZEND_CONV_ERROR_ESCAPE_JAVA, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_DEC", ZEND_CONV_ERROR_ESCAPE_XML_DEC, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS); - REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("FROM_UNICODE", ZEND_FROM_UNICODE, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("TO_UNICODE", ZEND_TO_UNICODE, CONST_PERSISTENT | CONST_CS); diff --git a/Zend/zend_unicode.c b/Zend/zend_unicode.c index f5ef2d14684..22922252e71 100644 --- a/Zend/zend_unicode.c +++ b/Zend/zend_unicode.c @@ -390,8 +390,8 @@ ZEND_API char* zend_unicode_to_ascii(const UChar *us, int us_len TSRMLS_DC) } /* }}} */ -/* {{{ zend_raise_conversion_error_ex */ -ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset, int use_exception TSRMLS_DC) +/* {{{ zend_default_conversion_error_handler */ +static void zend_default_conversion_error_handler(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC) { const char *conv_name; UErrorCode status = U_ZERO_ERROR; @@ -399,15 +399,6 @@ ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, ze if (!message) return; - if (!conv) { - if (use_exception) { - zend_throw_exception_ex(unicodeConversionException, 0 TSRMLS_CC, "%s", message); - } else { - zend_error(E_WARNING, "%s", message); - } - return; - } - conv_name = ucnv_getName(conv, &status); /* * UTODO @@ -426,11 +417,7 @@ ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, ze ucnv_getInvalidUChars(conv, err_char, &err_char_len, &status); codepoint = (err_char_len < 2) ? err_char[0] : U16_GET_SUPPLEMENTARY(err_char[0], err_char[1]); - if (use_exception) { - zend_throw_exception_ex(unicodeConversionException, 0 TSRMLS_CC, message_fmt, message, conv_name?conv_name:"", codepoint, error_char_offset-1); - } else { - zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", codepoint, error_char_offset-1); - } + zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", codepoint, error_char_offset-1); } else { char err_char[8]; /* UTF-8 uses up to 8 bytes */ char buf[32]; /* 4x number of error bytes */ @@ -446,11 +433,106 @@ ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, ze p += 5; } - if (use_exception) { - zend_throw_exception_ex(unicodeConversionException, 0 TSRMLS_CC, message_fmt, message, conv_name?conv_name:"", buf, error_char_offset-err_char_len); - } else { - zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", buf, error_char_offset-err_char_len); + zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", buf, error_char_offset-err_char_len); + } +} +/* }}} */ + +/* {{{ zend_call_conversion_error_handler */ +static void zend_call_conversion_error_handler(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC) +{ + zval *z_message, *z_dir, *z_encoding, *z_char, *z_offset; + zval ***params; + zval *retval; + zval *orig_user_error_handler; + const char *conv_name; + UErrorCode status = U_ZERO_ERROR; + + ALLOC_INIT_ZVAL(z_message); + ALLOC_INIT_ZVAL(z_dir); + ALLOC_INIT_ZVAL(z_encoding); + ALLOC_INIT_ZVAL(z_char); + ALLOC_INIT_ZVAL(z_offset); + + if (message) { + ZVAL_STRING(z_message, message, 1); + } else { + ZVAL_NULL(z_message); + } + + ZVAL_LONG(z_dir, dir); + + conv_name = ucnv_getName(conv, &status); + /* + * UTODO + * use some other standard than MIME? or fallback onto IANA? or use + * internal converter name? ponder + * maybe pass Converter object, when it's implemented? + */ + conv_name = ucnv_getStandardName(conv_name, "MIME", &status); + ZVAL_STRING(z_encoding, (char *) conv_name, 1); + + if (dir == ZEND_FROM_UNICODE) { + UChar err_char[U16_MAX_LENGTH]; + int8_t err_char_len = sizeof(err_char); + + ucnv_getInvalidUChars(conv, err_char, &err_char_len, &status); + ZVAL_UNICODEL(z_char, err_char, err_char_len, 1); + ZVAL_LONG(z_offset, error_char_offset-1); + } else { + char err_char[8]; /* UTF-8 uses up to 8 bytes */ + int8_t err_char_len = sizeof(err_char); + + ucnv_getInvalidChars(conv, err_char, &err_char_len, &status); + ZVAL_STRINGL(z_char, err_char, err_char_len, 1); + ZVAL_LONG(z_offset, error_char_offset-err_char_len); + } + + params = (zval ***) emalloc(sizeof(zval **) * 6); + params[0] = &z_dir; + params[1] = &z_encoding; + params[2] = &z_char; + params[3] = &z_offset; + params[4] = &z_message; + + orig_user_error_handler = UG(conv_error_handler); + UG(conv_error_handler) = NULL; + + if (call_user_function_ex(EG(function_table), NULL, orig_user_error_handler, &retval, 5, params, 1, NULL TSRMLS_CC)==SUCCESS) { + if (retval) { + /* user error handler returned 'false', use built-in error handler */ + if (Z_TYPE_P(retval) == IS_BOOL && Z_LVAL_P(retval) == 0) { + zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC); + } + zval_ptr_dtor(&retval); } + } else if (!EG(exception)) { + /* The user error handler failed, use built-in error handler */ + zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC); + } + + if (!UG(conv_error_handler)) { + UG(conv_error_handler) = orig_user_error_handler; + } else { + zval_ptr_dtor(&orig_user_error_handler); + } + + efree(params); + zval_ptr_dtor(&z_dir); + zval_ptr_dtor(&z_encoding); + zval_ptr_dtor(&z_char); + zval_ptr_dtor(&z_offset); + zval_ptr_dtor(&z_message); +} +/* }}} */ + +/* {{{ zend_raise_conversion_error_ex */ +ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC) +{ + if (UG(conv_error_handler)) { + zend_call_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC); + } else { + zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC); } } /* }}} */ @@ -471,7 +553,7 @@ ZEND_API int zval_unicode_to_string(zval *string, UConverter *conv TSRMLS_DC) if (U_FAILURE(status)) { int32_t offset = u_countChar32(u, num_conv); - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC); if (s) { efree(s); } @@ -500,7 +582,7 @@ ZEND_API int zval_string_to_unicode_ex(zval *string, UConverter *conv TSRMLS_DC) num_conv = zend_convert_to_unicode(conv, &u, &u_len, s, s_len, &status); if (U_FAILURE(status)) { - zend_raise_conversion_error_ex("Could not convert binary string to Unicode string", conv, ZEND_TO_UNICODE, num_conv, (UG(to_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Could not convert binary string to Unicode string", conv, ZEND_TO_UNICODE, num_conv TSRMLS_CC); if (u) { efree(u); } diff --git a/Zend/zend_unicode.h b/Zend/zend_unicode.h index 46a2023a82f..f597575b71c 100644 --- a/Zend/zend_unicode.h +++ b/Zend/zend_unicode.h @@ -87,9 +87,7 @@ ZEND_API void zend_case_fold_string(UChar **dest, int *dest_len, UChar *src, int ZEND_API int zend_is_valid_identifier(UChar *ident, int ident_len); ZEND_API int zend_normalize_identifier(UChar **dest, int *dest_len, UChar *ident, int ident_len, zend_bool fold_case); -#define zend_raise_conversion_error(message, exception) \ - zend_raise_conversion_error_ex(message, NULL, 0, 0, exception TSRMLS_CC) -ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset, int use_exception TSRMLS_DC); +ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC); /* * Function to get a codepoint at position n. Iterates over codepoints starting from the diff --git a/ext/unicode/unicode.c b/ext/unicode/unicode.c index 40bb9914c51..dea86311b92 100644 --- a/ext/unicode/unicode.c +++ b/ext/unicode/unicode.c @@ -62,7 +62,7 @@ static PHP_FUNCTION(unicode_decode) status = U_ZERO_ERROR; num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str, str_len, &status); if (U_FAILURE(status)) { - zend_raise_conversion_error_ex("could not decode binary string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("could not decode binary string", conv, ZEND_TO_UNICODE, num_conv TSRMLS_CC); efree(dest); ucnv_close(conv); RETURN_FALSE; @@ -114,7 +114,7 @@ static PHP_FUNCTION(unicode_encode) num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni, uni_len, &status); if (U_FAILURE(status)) { int32_t offset = u_countChar32(uni, num_conv); - zend_raise_conversion_error_ex("could not encode Unicode string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("could not encode Unicode string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC); efree(dest); ucnv_close(conv); RETURN_FALSE; diff --git a/main/streams/filter.c b/main/streams/filter.c index 1d86e65af19..ffac6352978 100644 --- a/main/streams/filter.c +++ b/main/streams/filter.c @@ -750,7 +750,7 @@ PHPAPI int _php_stream_bucket_convert(php_stream_bucket *bucket, unsigned char t if (U_FAILURE(status)) { int32_t offset = u_countChar32(bucket->buf.u, num_conv); - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC); } if (bucket->own_buf) { diff --git a/main/streams/streams.c b/main/streams/streams.c index 38575d41e2a..2d9e7707888 100755 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -1222,7 +1222,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr bu num_conv = zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &str, &len, buf.u, buflen, &status); if (U_FAILURE(status)) { zend_raise_conversion_error_ex("Unable to convert data to be written", ZEND_U_CONVERTER(UG(runtime_encoding_conv)), - ZEND_FROM_UNICODE, num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + ZEND_FROM_UNICODE, num_conv TSRMLS_CC); } else { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "%d character unicode buffer downcoded for binary stream runtime_encoding", ulen); } @@ -1270,7 +1270,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr bu } else { /* Figure out how didwrite corresponds to the input buffer */ char *tmp = emalloc(didwrite + 1), *t = tmp; - UChar *s = buf_orig; + const UChar *s = buf_orig; UErrorCode status = U_ZERO_ERROR; ucnv_resetFromUnicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv))); @@ -1570,7 +1570,7 @@ PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC TSRMLS_DC) while ((b = php_stream_read_unicode(stream, inbuf_start, sizeof(inbuf_start))) > 0) { char *outbuf = outbuf_start; - UChar *inbuf = inbuf_start; + const UChar *inbuf = inbuf_start; UErrorCode status = U_ZERO_ERROR; int len; @@ -1580,7 +1580,7 @@ PHPAPI size_t _php_stream_passthru(php_stream * stream STREAMS_DC TSRMLS_DC) /* Memory overflow isn't a problem becuase MAX_BYTES_FOR_STRING was allocated, anything else is a more serious problem */ zend_raise_conversion_error_ex("Unable to convert Unicode character using output_encoding, at least one character was lost", - conv, ZEND_FROM_UNICODE, len, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + conv, ZEND_FROM_UNICODE, len TSRMLS_CC); } if (outbuf > outbuf_start) { PHPWRITE(outbuf_start, outbuf - outbuf_start); @@ -2594,8 +2594,7 @@ PHPAPI int _php_stream_path_encode(php_stream_wrapper *wrapper, num_conv = zend_convert_from_unicode(UG(utf8_conv), &scheme, &scheme_len, path, (p - path) + delim_len, &status); if (U_FAILURE(status)) { if (options & REPORT_ERRORS) { - zend_raise_conversion_error_ex("Unable to convert filepath", UG(utf8_conv), ZEND_FROM_UNICODE, - num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Unable to convert filepath", UG(utf8_conv), ZEND_FROM_UNICODE, num_conv TSRMLS_CC); } *pathenc = NULL; *pathenc_len = 0; @@ -2634,7 +2633,7 @@ PHPAPI int _php_stream_path_encode(php_stream_wrapper *wrapper, if (U_FAILURE(status)) { if (options & REPORT_ERRORS) { zend_raise_conversion_error_ex("Unable to convert filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), - ZEND_FROM_UNICODE, num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + ZEND_FROM_UNICODE, num_conv TSRMLS_CC); } *pathenc = NULL; @@ -2676,7 +2675,7 @@ PHPAPI int _php_stream_path_decode(php_stream_wrapper *wrapper, if (U_FAILURE(status)) { if (options & REPORT_ERRORS) { zend_raise_conversion_error_ex("Unable to convert filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), - ZEND_TO_UNICODE, num_conv, (UG(to_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + ZEND_TO_UNICODE, num_conv TSRMLS_CC); } *pathdec = NULL;