mirror of
https://github.com/php/php-src.git
synced 2025-08-19 17:04:47 +02:00
Another (and hopefully last) major streams commit.
This moves unicode conversion to the filter layer (rather than at the lower streams layer) unicode_filter.c has been moved from ext/unicode to main/streams as it's an integral part of the streams unicode conversion process. There are now three ways to set encoding on a stream: (1) By context $ctx = stream_context_create(NULL,array('encoding'=>'latin1')); $fp = fopen('somefile', 'r+t', false, $ctx); (2) By stream_encoding() $fp = fopen('somefile', 'r+'); stream_encoding($fp, 'latin1'); (3) By filter $fp = fopen('somefile', 'r+'); stream_filter_append($fp, 'unicode.from.latin1', STREAM_FILTER_READ); stream_filter_append($fp, 'unicode.to.latin1', STREAM_FILTER_WRITE); Note: Methods 1 and 2 are convenience wrappers around method 3.
This commit is contained in:
parent
f028fcecb5
commit
30a2bd1d11
15 changed files with 275 additions and 238 deletions
|
@ -4,4 +4,4 @@ dnl
|
|||
|
||||
PHP_SUBST(UNICODE_SHARED_LIBADD)
|
||||
AC_DEFINE(HAVE_UNICODE, 1, [ ])
|
||||
PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_filter.c unicode_iterators.c collator.c, $ext_shared)
|
||||
PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_iterators.c collator.c, $ext_shared)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
// $Id$
|
||||
// vim:ft=javascript
|
||||
|
||||
EXTENSION("unicode", "unicode.c unicode_filter.c unicode_iterators.c collator.c locale.c");
|
||||
EXTENSION("unicode", "unicode.c unicode_iterators.c collator.c locale.c");
|
||||
AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension');
|
||||
|
|
|
@ -67,7 +67,6 @@ PHP_FUNCTION(collator_get_attribute);
|
|||
PHP_METHOD(collator, __construct);
|
||||
|
||||
void php_init_collation(TSRMLS_D);
|
||||
extern php_stream_filter_factory php_unicode_filter_factory;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -273,10 +273,6 @@ ZEND_GET_MODULE(unicode)
|
|||
/* {{{ PHP_MINIT_FUNCTION */
|
||||
PHP_MINIT_FUNCTION(unicode)
|
||||
{
|
||||
if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) {
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
php_register_unicode_iterators(TSRMLS_C);
|
||||
php_init_collation(TSRMLS_C);
|
||||
|
||||
|
@ -287,9 +283,6 @@ PHP_MINIT_FUNCTION(unicode)
|
|||
/* {{{ PHP_MSHUTDOWN_FUNCTION */
|
||||
PHP_MSHUTDOWN_FUNCTION(unicode)
|
||||
{
|
||||
if (php_stream_filter_unregister_factory("unicode.*" TSRMLS_CC) == FAILURE) {
|
||||
return FAILURE;
|
||||
}
|
||||
/* add your stuff here */
|
||||
|
||||
|
||||
|
|
|
@ -1,322 +0,0 @@
|
|||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 6 |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Authors: Sara Golemon (pollita@php.net) |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
|
||||
#include "php.h"
|
||||
#include <unicode/ucnv.h>
|
||||
|
||||
/* {{{ data structure */
|
||||
typedef struct _php_unicode_filter_data {
|
||||
char is_persistent;
|
||||
UConverter *conv;
|
||||
|
||||
char to_unicode;
|
||||
} php_unicode_filter_data;
|
||||
/* }}} */
|
||||
|
||||
/* {{{ unicode.* filter implementation */
|
||||
|
||||
/* unicode.to.* -- Expects String -- Returns Unicode */
|
||||
static php_stream_filter_status_t php_unicode_to_string_filter(
|
||||
php_stream *stream,
|
||||
php_stream_filter *thisfilter,
|
||||
php_stream_bucket_brigade *buckets_in,
|
||||
php_stream_bucket_brigade *buckets_out,
|
||||
size_t *bytes_consumed,
|
||||
int flags
|
||||
TSRMLS_DC)
|
||||
{
|
||||
php_unicode_filter_data *data;
|
||||
php_stream_filter_status_t exit_status = PSFS_FEED_ME;
|
||||
size_t consumed = 0;
|
||||
|
||||
if (!thisfilter || !thisfilter->abstract) {
|
||||
/* Should never happen */
|
||||
return PSFS_ERR_FATAL;
|
||||
}
|
||||
|
||||
data = (php_unicode_filter_data *)(thisfilter->abstract);
|
||||
while (buckets_in->head) {
|
||||
php_stream_bucket *bucket = buckets_in->head;
|
||||
UChar *src = bucket->buf.u;
|
||||
|
||||
php_stream_bucket_unlink(bucket TSRMLS_CC);
|
||||
if (!bucket->buf_type == IS_UNICODE) {
|
||||
/* Already ASCII, can't really do anything with it */
|
||||
consumed += bucket->buflen;
|
||||
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
|
||||
exit_status = PSFS_PASS_ON;
|
||||
continue;
|
||||
}
|
||||
|
||||
while (src < (bucket->buf.u + bucket->buflen)) {
|
||||
int remaining = bucket->buflen - (src - bucket->buf.u);
|
||||
char *destp, *destbuf;
|
||||
int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv));
|
||||
UErrorCode errCode = U_ZERO_ERROR;
|
||||
php_stream_bucket *new_bucket;
|
||||
|
||||
destp = destbuf = (char *)pemalloc(destlen, data->is_persistent);
|
||||
|
||||
ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, (const UChar**)&src, src + remaining, NULL, FALSE, &errCode);
|
||||
new_bucket = php_stream_bucket_new(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
|
||||
php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
|
||||
exit_status = PSFS_PASS_ON;
|
||||
}
|
||||
consumed += UBYTES(bucket->buflen);
|
||||
php_stream_bucket_delref(bucket TSRMLS_CC);
|
||||
}
|
||||
|
||||
if (flags & PSFS_FLAG_FLUSH_CLOSE) {
|
||||
UErrorCode errCode = U_ZERO_ERROR;
|
||||
char d[64], *dest = d, *destp = d + 64;
|
||||
/* Spit it out! */
|
||||
|
||||
ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
|
||||
if (dest > d) {
|
||||
php_stream_bucket *bucket = php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC);
|
||||
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
|
||||
exit_status = PSFS_PASS_ON;
|
||||
}
|
||||
}
|
||||
|
||||
if (bytes_consumed) {
|
||||
*bytes_consumed = consumed;
|
||||
}
|
||||
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
/* unicode.from.* -- Expects Unicode -- Returns String */
|
||||
static php_stream_filter_status_t php_unicode_from_string_filter(
|
||||
php_stream *stream,
|
||||
php_stream_filter *thisfilter,
|
||||
php_stream_bucket_brigade *buckets_in,
|
||||
php_stream_bucket_brigade *buckets_out,
|
||||
size_t *bytes_consumed,
|
||||
int flags
|
||||
TSRMLS_DC)
|
||||
{
|
||||
php_unicode_filter_data *data;
|
||||
php_stream_filter_status_t exit_status = PSFS_FEED_ME;
|
||||
size_t consumed = 0;
|
||||
|
||||
if (!thisfilter || !thisfilter->abstract) {
|
||||
/* Should never happen */
|
||||
return PSFS_ERR_FATAL;
|
||||
}
|
||||
|
||||
data = (php_unicode_filter_data *)(thisfilter->abstract);
|
||||
while (buckets_in->head) {
|
||||
php_stream_bucket *bucket = buckets_in->head;
|
||||
char *src = bucket->buf.s;
|
||||
|
||||
php_stream_bucket_unlink(bucket TSRMLS_CC);
|
||||
if (bucket->buf_type == IS_UNICODE) {
|
||||
/* already in unicode, nothing to do */
|
||||
consumed += UBYTES(bucket->buflen);
|
||||
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
|
||||
exit_status = PSFS_PASS_ON;
|
||||
continue;
|
||||
}
|
||||
|
||||
while (src < (bucket->buf.s + bucket->buflen)) {
|
||||
int remaining = bucket->buflen - (src - bucket->buf.s);
|
||||
UChar *destp, *destbuf;
|
||||
int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv));
|
||||
UErrorCode errCode = U_ZERO_ERROR;
|
||||
php_stream_bucket *new_bucket;
|
||||
|
||||
destp = destbuf = (UChar *)pemalloc(destlen, data->is_persistent);
|
||||
|
||||
ucnv_toUnicode(data->conv, &destp, (UChar*)((char*)destbuf + destlen), (const char**)&src, src + remaining, NULL, FALSE, &errCode);
|
||||
|
||||
new_bucket = php_stream_bucket_new_unicode(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
|
||||
php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
|
||||
exit_status = PSFS_PASS_ON;
|
||||
}
|
||||
consumed += bucket->buflen;
|
||||
php_stream_bucket_delref(bucket TSRMLS_CC);
|
||||
}
|
||||
|
||||
if (flags & PSFS_FLAG_FLUSH_CLOSE) {
|
||||
UErrorCode errCode = U_ZERO_ERROR;
|
||||
UChar d[64], *dest = d, *destp = d + 64;
|
||||
/* Spit it out! */
|
||||
|
||||
ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
|
||||
if (dest > d) {
|
||||
php_stream_bucket *bucket = php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC);
|
||||
php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
|
||||
exit_status = PSFS_PASS_ON;
|
||||
}
|
||||
}
|
||||
|
||||
if (bytes_consumed) {
|
||||
*bytes_consumed = consumed;
|
||||
}
|
||||
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
/* unicode.tidy.* -- Expects anything -- Returns whatever is preferred by subsequent filters
|
||||
Can be used to "magically" fix-up bucket messes */
|
||||
static php_stream_filter_status_t php_unicode_tidy_filter(
|
||||
php_stream *stream,
|
||||
php_stream_filter *thisfilter,
|
||||
php_stream_bucket_brigade *buckets_in,
|
||||
php_stream_bucket_brigade *buckets_out,
|
||||
size_t *bytes_consumed,
|
||||
int flags
|
||||
TSRMLS_DC)
|
||||
{
|
||||
php_unicode_filter_data *data;
|
||||
int prefer_unicode;
|
||||
|
||||
if (!thisfilter || !thisfilter->abstract) {
|
||||
/* Should never happen */
|
||||
return PSFS_ERR_FATAL;
|
||||
}
|
||||
|
||||
prefer_unicode = php_stream_filter_output_prefer_unicode(thisfilter);
|
||||
data = (php_unicode_filter_data *)(thisfilter->abstract);
|
||||
|
||||
if (prefer_unicode) {
|
||||
if (!data->to_unicode) {
|
||||
ucnv_resetToUnicode(data->conv);
|
||||
data->to_unicode = prefer_unicode;
|
||||
}
|
||||
return php_unicode_from_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC);
|
||||
} else {
|
||||
if (data->to_unicode) {
|
||||
ucnv_resetFromUnicode(data->conv);
|
||||
data->to_unicode = prefer_unicode;
|
||||
}
|
||||
return php_unicode_to_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC);
|
||||
}
|
||||
}
|
||||
|
||||
static void php_unicode_filter_dtor(php_stream_filter *thisfilter TSRMLS_DC)
|
||||
{
|
||||
if (thisfilter && thisfilter->abstract) {
|
||||
php_unicode_filter_data *data = (php_unicode_filter_data *)thisfilter->abstract;
|
||||
ucnv_close(data->conv);
|
||||
pefree(data, data->is_persistent);
|
||||
}
|
||||
}
|
||||
|
||||
static php_stream_filter_ops php_unicode_to_string_filter_ops = {
|
||||
php_unicode_to_string_filter,
|
||||
php_unicode_filter_dtor,
|
||||
"unicode.to.*",
|
||||
PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING
|
||||
};
|
||||
|
||||
static php_stream_filter_ops php_unicode_from_string_filter_ops = {
|
||||
php_unicode_from_string_filter,
|
||||
php_unicode_filter_dtor,
|
||||
"unicode.from.*",
|
||||
PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE
|
||||
};
|
||||
|
||||
static php_stream_filter_ops php_unicode_tidy_filter_ops = {
|
||||
php_unicode_tidy_filter,
|
||||
php_unicode_filter_dtor,
|
||||
"unicode.tidy.*",
|
||||
PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY
|
||||
};
|
||||
/* }}} */
|
||||
|
||||
|
||||
/* {{{ unicode.* factory */
|
||||
|
||||
static php_stream_filter *php_unicode_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC)
|
||||
{
|
||||
php_unicode_filter_data *data;
|
||||
const char *charset, *direction;
|
||||
php_stream_filter_ops *fops;
|
||||
UErrorCode ucnvError = U_ZERO_ERROR;
|
||||
char to_unicode = 0;
|
||||
|
||||
if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) {
|
||||
/* Never happens */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
direction = filtername + sizeof("unicode.") - 1;
|
||||
if (strncmp(direction, "to.", sizeof("to.") - 1) == 0) {
|
||||
fops = &php_unicode_to_string_filter_ops;
|
||||
charset = direction + sizeof("to.") - 1;
|
||||
} else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) {
|
||||
fops = &php_unicode_from_string_filter_ops;
|
||||
to_unicode = 1;
|
||||
charset = direction + sizeof("from.") - 1;
|
||||
} else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) {
|
||||
fops = &php_unicode_tidy_filter_ops;
|
||||
charset = direction + sizeof("tidy.") - 1;
|
||||
} else if (strcmp(direction, "tidy") == 0) {
|
||||
fops = &php_unicode_tidy_filter_ops;
|
||||
charset = "utf8";
|
||||
} else {
|
||||
/* Shouldn't happen */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create this filter */
|
||||
data = (php_unicode_filter_data *)pecalloc(1, sizeof(php_unicode_filter_data), persistent);
|
||||
if (!data) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failed allocating %d bytes.", sizeof(php_unicode_filter_data));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
data->conv = ucnv_open(charset, &ucnvError);
|
||||
data->to_unicode = to_unicode;
|
||||
if (!data->conv) {
|
||||
char *reason = "Unknown Error";
|
||||
pefree(data, persistent);
|
||||
switch (ucnvError) {
|
||||
case U_MEMORY_ALLOCATION_ERROR:
|
||||
reason = "unable to allocate memory";
|
||||
break;
|
||||
case U_FILE_ACCESS_ERROR:
|
||||
reason = "file access error";
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to open charset converter, %s", reason);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return php_stream_filter_alloc(fops, data, persistent);
|
||||
}
|
||||
|
||||
php_stream_filter_factory php_unicode_filter_factory = {
|
||||
php_unicode_filter_create
|
||||
};
|
||||
/* }}} */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
* c-basic-offset: 4
|
||||
* End:
|
||||
* vim600: sw=4 ts=4 fdm=marker
|
||||
* vim<600: sw=4 ts=4
|
||||
*/
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue