mirror of
https://github.com/php/php-src.git
synced 2025-08-18 15:08:55 +02:00
Implement character/word/line/sentence iterators and the reverse
counterparts.
This commit is contained in:
parent
50bbedcec3
commit
5418ae7976
1 changed files with 121 additions and 3 deletions
|
@ -28,11 +28,16 @@
|
|||
#include "php.h"
|
||||
#include "zend_interfaces.h"
|
||||
#include "zend_exceptions.h"
|
||||
#include <unicode/ubrk.h>
|
||||
|
||||
typedef enum {
|
||||
ITER_CODE_UNIT,
|
||||
ITER_CODE_POINT,
|
||||
ITER_COMB_SEQUENCE,
|
||||
ITER_CHARACTER,
|
||||
ITER_WORD,
|
||||
ITER_LINE,
|
||||
ITER_SENTENCE,
|
||||
ITER_TYPE_LAST,
|
||||
} text_iter_type;
|
||||
|
||||
|
@ -60,6 +65,12 @@ typedef struct {
|
|||
int32_t start;
|
||||
int32_t end;
|
||||
} cs;
|
||||
struct {
|
||||
UBreakIterator *iter;
|
||||
int32_t index;
|
||||
int32_t start;
|
||||
int32_t end;
|
||||
} brk;
|
||||
} u;
|
||||
} text_iter_obj;
|
||||
|
||||
|
@ -76,6 +87,13 @@ typedef struct {
|
|||
void (*rewind) (text_iter_obj* object TSRMLS_DC);
|
||||
} text_iter_ops;
|
||||
|
||||
enum UBreakIteratorType brk_type_map[] = {
|
||||
UBRK_CHARACTER,
|
||||
UBRK_WORD,
|
||||
UBRK_LINE,
|
||||
UBRK_SENTENCE,
|
||||
};
|
||||
|
||||
PHPAPI zend_class_entry* text_iterator_aggregate_ce;
|
||||
PHPAPI zend_class_entry* text_iterator_ce;
|
||||
PHPAPI zend_class_entry* rev_text_iterator_ce;
|
||||
|
@ -276,12 +294,95 @@ static text_iter_ops text_iter_cs_ops = {
|
|||
};
|
||||
|
||||
|
||||
/* UBreakIterator Character Ops */
|
||||
|
||||
static int text_iter_brk_char_valid(text_iter_obj* object TSRMLS_DC)
|
||||
{
|
||||
if (object->flags & ITER_REVERSE) {
|
||||
return (object->u.brk.start != UBRK_DONE);
|
||||
} else {
|
||||
return (object->u.brk.end != UBRK_DONE);
|
||||
}
|
||||
}
|
||||
|
||||
static void text_iter_brk_char_current(text_iter_obj* object TSRMLS_DC)
|
||||
{
|
||||
uint32_t length;
|
||||
int32_t start = object->u.brk.start;
|
||||
int32_t end = object->u.brk.end;
|
||||
|
||||
if (object->flags & ITER_REVERSE) {
|
||||
if (end == UBRK_DONE) {
|
||||
end = object->text_len;
|
||||
}
|
||||
} else {
|
||||
if (start == UBRK_DONE) {
|
||||
start = 0;
|
||||
}
|
||||
}
|
||||
length = end - start;
|
||||
if (length > object->current_alloc-1) {
|
||||
object->current_alloc = length+1;
|
||||
Z_USTRVAL_P(object->current) = eurealloc(Z_USTRVAL_P(object->current), object->current_alloc);
|
||||
}
|
||||
u_memcpy(Z_USTRVAL_P(object->current), object->text + start, length);
|
||||
Z_USTRVAL_P(object->current)[length] = 0;
|
||||
Z_USTRLEN_P(object->current) = length;
|
||||
}
|
||||
|
||||
static int text_iter_brk_char_key(text_iter_obj* object TSRMLS_DC)
|
||||
{
|
||||
return object->u.brk.index;
|
||||
}
|
||||
|
||||
static void text_iter_brk_char_next(text_iter_obj* object TSRMLS_DC)
|
||||
{
|
||||
if (object->flags & ITER_REVERSE) {
|
||||
if (object->u.brk.start != UBRK_DONE) {
|
||||
object->u.brk.end = object->u.brk.start;
|
||||
object->u.brk.start = ubrk_previous(object->u.brk.iter);
|
||||
object->u.brk.index++;
|
||||
}
|
||||
} else {
|
||||
if (object->u.brk.end != UBRK_DONE) {
|
||||
object->u.brk.start = object->u.brk.end;
|
||||
object->u.brk.end = ubrk_next(object->u.brk.iter);
|
||||
object->u.brk.index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void text_iter_brk_char_rewind(text_iter_obj *object TSRMLS_DC)
|
||||
{
|
||||
if (object->flags & ITER_REVERSE) {
|
||||
object->u.brk.end = ubrk_last(object->u.brk.iter);
|
||||
object->u.brk.start = ubrk_previous(object->u.brk.iter);
|
||||
} else {
|
||||
object->u.brk.start = ubrk_first(object->u.brk.iter);
|
||||
object->u.brk.end = ubrk_next(object->u.brk.iter);
|
||||
}
|
||||
object->u.brk.index = 0;
|
||||
}
|
||||
|
||||
static text_iter_ops text_iter_brk_ops = {
|
||||
text_iter_brk_char_valid,
|
||||
text_iter_brk_char_current,
|
||||
text_iter_brk_char_key,
|
||||
text_iter_brk_char_next,
|
||||
text_iter_brk_char_rewind,
|
||||
};
|
||||
|
||||
|
||||
/* Ops array */
|
||||
|
||||
static text_iter_ops* iter_ops[] = {
|
||||
&text_iter_cu_ops,
|
||||
&text_iter_cp_ops,
|
||||
&text_iter_cs_ops,
|
||||
&text_iter_brk_ops,
|
||||
&text_iter_brk_ops,
|
||||
&text_iter_brk_ops,
|
||||
&text_iter_brk_ops,
|
||||
};
|
||||
|
||||
/* Iterator Funcs */
|
||||
|
@ -376,6 +477,9 @@ static void text_iterator_free_storage(void *object TSRMLS_DC)
|
|||
if (intern->text) {
|
||||
efree(intern->text);
|
||||
}
|
||||
if (intern->type > ITER_CHARACTER && intern->u.brk.iter) {
|
||||
ubrk_close(intern->u.brk.iter);
|
||||
}
|
||||
zval_ptr_dtor(&intern->current);
|
||||
efree(object);
|
||||
}
|
||||
|
@ -399,6 +503,7 @@ static zend_object_value text_iterator_new(zend_class_entry *class_type TSRMLS_D
|
|||
intern->current_alloc = 3;
|
||||
Z_USTRVAL_P(intern->current) = eumalloc(3);
|
||||
Z_USTRVAL_P(intern->current)[0] = 0;
|
||||
Z_USTRLEN_P(intern->current) = 0;
|
||||
Z_TYPE_P(intern->current) = IS_UNICODE;
|
||||
|
||||
retval.handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) text_iterator_free_storage, NULL TSRMLS_CC);
|
||||
|
@ -426,11 +531,11 @@ PHP_METHOD(TextIterator, __construct)
|
|||
intern->text_len = text_len;
|
||||
if (ZEND_NUM_ARGS() > 1) {
|
||||
ti_type = flags & ITER_TYPE_MASK;
|
||||
if (ti_type < ITER_TYPE_LAST) {
|
||||
intern->type = ti_type;
|
||||
} else {
|
||||
if (ti_type < 0 || ti_type >= ITER_TYPE_LAST) {
|
||||
php_error(E_WARNING, "Invalid iterator type in TextIterator constructor");
|
||||
ti_type = ITER_CODE_POINT;
|
||||
}
|
||||
intern->type = ti_type;
|
||||
intern->flags = flags;
|
||||
}
|
||||
|
||||
|
@ -438,6 +543,15 @@ PHP_METHOD(TextIterator, __construct)
|
|||
intern->flags |= ITER_REVERSE;
|
||||
}
|
||||
|
||||
if (ti_type >= ITER_CHARACTER && ti_type < ITER_TYPE_LAST) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
intern->u.brk.iter = ubrk_open(brk_type_map[ti_type - ITER_CHARACTER], UG(default_locale), text, text_len, &status);
|
||||
if (!U_SUCCESS(status)) {
|
||||
php_error(E_RECOVERABLE_ERROR, "Could not create UBreakIterator: %s", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
iter_ops[intern->type]->rewind(intern TSRMLS_CC);
|
||||
}
|
||||
|
||||
|
@ -513,6 +627,10 @@ void php_register_unicode_iterators(TSRMLS_D)
|
|||
zend_declare_class_constant_long(text_iterator_ce, "CODE_UNIT", sizeof("CODE_UNIT")-1, ITER_CODE_UNIT TSRMLS_CC);
|
||||
zend_declare_class_constant_long(text_iterator_ce, "CODE_POINT", sizeof("CODE_POINT")-1, ITER_CODE_POINT TSRMLS_CC);
|
||||
zend_declare_class_constant_long(text_iterator_ce, "COMB_SEQUENCE", sizeof("COMB_SEQUENCE")-1, ITER_COMB_SEQUENCE TSRMLS_CC);
|
||||
zend_declare_class_constant_long(text_iterator_ce, "CHARACTER", sizeof("CHARACTER")-1, ITER_CHARACTER TSRMLS_CC);
|
||||
zend_declare_class_constant_long(text_iterator_ce, "WORD", sizeof("WORD")-1, ITER_WORD TSRMLS_CC);
|
||||
zend_declare_class_constant_long(text_iterator_ce, "LINE", sizeof("LINE")-1, ITER_LINE TSRMLS_CC);
|
||||
zend_declare_class_constant_long(text_iterator_ce, "SENTENCE", sizeof("SENTENCE")-1, ITER_SENTENCE TSRMLS_CC);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue