Implement TextIterator::isBoundary() for break iterators.

This commit is contained in:
Andrei Zmievski 2006-07-07 22:34:46 +00:00
parent abbc1053fd
commit 4757c4ce46

View file

@ -89,6 +89,7 @@ typedef struct {
void (*next) (text_iter_obj* object, long flags TSRMLS_DC);
void (*rewind) (text_iter_obj* object, long flags TSRMLS_DC);
void (*following) (text_iter_obj* object, int32_t offset, long flags TSRMLS_DC);
zend_bool (*isBoundary)(text_iter_obj* object, int32_t offset, long flags TSRMLS_DC);
} text_iter_ops;
enum UBreakIteratorType brk_type_map[] = {
@ -478,6 +479,71 @@ static void text_iter_brk_following(text_iter_obj *object, int32_t offset, long
}
}
static zend_bool text_iter_brk_isBoundary(text_iter_obj *object, int32_t offset, long flags TSRMLS_DC)
{
int32_t k, tmp;
UBool result;
if (offset < 0) {
offset = 0;
}
/*
* On invalid iterator we always want to start looking for the code unit
* offset from the beginning of the string.
*/
if (object->u.brk.cp_offset == UBRK_DONE) {
object->u.brk.cp_offset = 0;
object->u.brk.bound = 0;
}
/*
* Try to locate the code unit position relative to the last known codepoint
* offset.
*/
k = tmp = object->u.brk.bound;
if (offset > object->u.brk.cp_offset) {
U16_FWD_N(object->text, k, object->text_len, offset - object->u.brk.cp_offset);
} else {
U16_BACK_N(object->text, 0, k, object->u.brk.cp_offset - offset);
}
result = ubrk_isBoundary(object->u.brk.iter, k);
object->u.brk.bound = ubrk_current(object->u.brk.iter);
object->u.brk.next = object->u.brk.bound;
/*
* If boundary is the same one as where we were at before, simply return.
*/
if (object->u.brk.bound == tmp) {
return result;
}
/*
* Adjust the internal codepoint offset based on how far we've moved.
*/
if (object->u.brk.bound != UBRK_DONE) {
if (object->u.brk.bound > tmp) {
if (object->u.brk.bound - tmp > 1) {
object->u.brk.cp_offset += u_countChar32(object->text + tmp, object->u.brk.bound - tmp);
} else {
object->u.brk.cp_offset++;
}
} else {
if (tmp - object->u.brk.bound > 1) {
object->u.brk.cp_offset -= u_countChar32(object->text + object->u.brk.bound, tmp - object->u.brk.bound);
} else {
object->u.brk.cp_offset--;
}
}
} else {
object->u.brk.cp_offset = UBRK_DONE;
}
return result;
}
static text_iter_ops text_iter_brk_ops = {
text_iter_brk_valid,
text_iter_brk_current,
@ -486,6 +552,7 @@ static text_iter_ops text_iter_brk_ops = {
text_iter_brk_next,
text_iter_brk_rewind,
text_iter_brk_following,
text_iter_brk_isBoundary,
};
@ -778,6 +845,23 @@ PHP_METHOD(TextIterator, preceding)
iter_ops[intern->type]->following(intern, offset, flags TSRMLS_CC);
RETURN_LONG(iter_ops[intern->type]->offset(intern, flags TSRMLS_CC));
}
PHP_METHOD(TextIterator, isBoundary)
{
long flags, offset;
zval *object = getThis();
text_iter_obj *intern = (text_iter_obj*) zend_object_store_get_object(object TSRMLS_CC);
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &offset) == FAILURE) {
return;
}
/*
* ReverseTextIterator will behave the same as the normal one.
*/
RETURN_BOOL(iter_ops[intern->type]->isBoundary(intern, offset, flags TSRMLS_CC));
}
static zend_function_entry text_iterator_funcs[] = {
PHP_ME(TextIterator, __construct, NULL, ZEND_ACC_PUBLIC)
@ -794,6 +878,7 @@ static zend_function_entry text_iterator_funcs[] = {
PHP_ME(TextIterator, last, NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, following, NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, preceding, NULL, ZEND_ACC_PUBLIC)
PHP_ME(TextIterator, isBoundary, NULL, ZEND_ACC_PUBLIC)
PHP_MALIAS(TextIterator, first, rewind, NULL, ZEND_ACC_PUBLIC)
{NULL, NULL, NULL}