deps: ICU 61.1 bump

- Update to released ICU 61.1, including:
  - CLDR 33 (many new languages and data improvements)
  - Many small API additions, improvements, and bug fixes
  - note: 'icu::' namespace is no longer used by default
   (Necessated https://github.com/nodejs/node/pull/18667 )

PR-URL: https://github.com/nodejs/node/pull/19621
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
Steven R. Loomis 2018-03-26 15:29:02 -07:00
parent 88773af540
commit 64211405da
180 changed files with 8963 additions and 2015 deletions

33
LICENSE
View file

@ -122,8 +122,8 @@ The externally maintained libraries used by Node.js are:
"""
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html
Copyright © 1991-2018 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
@ -505,6 +505,35 @@ The externally maintained libraries used by Node.js are:
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.
6. Google double-conversion
Copyright 2006-2011, the V8 project authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
- libuv, located at deps/uv, is licensed as follows:

4
configure vendored
View file

@ -1125,8 +1125,8 @@ def glob_to_var(dir_base, dir_sub, patch_dir):
def configure_intl(o):
icus = [
{
'url': 'https://ssl.icu-project.org/files/icu4c/60.2/icu4c-60_2-src.zip',
'md5': '115908818fd0324530b2acb1b029738d',
'url': 'https://ssl.icu-project.org/files/icu4c/61.1/icu4c-61_1-src.zip',
'md5': '780d8524c8a860ed8d8f6fe75cb7ce3f',
},
]
def icu_download(path):

View file

@ -1,7 +1,7 @@
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html
Copyright © 1991-2018 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
@ -383,3 +383,32 @@ Database section 7.
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.
6. Google double-conversion
Copyright 2006-2011, the V8 project authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,8 +1,8 @@
Small ICU sources - auto generated by shrink-icu-src.py
This directory contains the ICU subset used by --with-intl=small-icu (the default)
It is a strict subset of ICU 60 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt60l.dat : Reduced-size data file
It is a strict subset of ICU 61 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt61l.dat : Reduced-size data file
To rebuild this directory, see ../../tools/icu/README.md

View file

@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
++lead;
}
if(lead<limitLead) {
bits=~((1<<lead)-1);
bits=~(((unsigned)1<<lead)-1);
if(limitLead<0x20) {
bits&=(1<<limitLead)-1;
bits&=((unsigned)1<<limitLead)-1;
}
for(trail=0; trail<64; ++trail) {
table[trail]|=bits;

View file

@ -59,58 +59,47 @@ LanguageBreakFactory::~LanguageBreakFactory() {
******************************************************************
*/
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
fHandled[i] = 0;
}
UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
(void)status;
}
UnhandledEngine::~UnhandledEngine() {
for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
if (fHandled[i] != 0) {
delete fHandled[i];
}
}
delete fHandled;
fHandled = nullptr;
}
UBool
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
UnhandledEngine::handles(UChar32 c) const {
return fHandled && fHandled->contains(c);
}
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t endPos,
int32_t breakType,
UVector32 &/*foundBreaks*/ ) const {
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
return 0;
}
void
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
if (fHandled[breakType] == 0) {
fHandled[breakType] = new UnicodeSet();
if (fHandled[breakType] == 0) {
return;
}
}
if (!fHandled[breakType]->contains(c)) {
UErrorCode status = U_ZERO_ERROR;
// Apply the entire script of the character.
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
UnhandledEngine::handleCharacter(UChar32 c) {
if (fHandled == nullptr) {
fHandled = new UnicodeSet();
if (fHandled == nullptr) {
return;
}
}
if (!fHandled->contains(c)) {
UErrorCode status = U_ZERO_ERROR;
// Apply the entire script of the character.
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
}
}
/*
@ -138,7 +127,7 @@ U_NAMESPACE_BEGIN
static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
const LanguageBreakEngine *lbe = NULL;
UErrorCode status = U_ZERO_ERROR;
@ -156,14 +145,14 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != NULL && lbe->handles(c, breakType)) {
if (lbe != NULL && lbe->handles(c)) {
return lbe;
}
}
}
// We didn't find an engine. Create one.
lbe = loadEngineFor(c, breakType);
lbe = loadEngineFor(c);
if (lbe != NULL) {
fEngines->push((void *)lbe, status);
}
@ -171,11 +160,11 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
}
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
DictionaryMatcher *m = loadDictionaryMatcherFor(code);
if (m != NULL) {
const LanguageBreakEngine *engine = NULL;
switch(code) {
@ -236,7 +225,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
}
DictionaryMatcher *
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
UErrorCode status = U_ZERO_ERROR;
// open root from brkitr tree.
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);

View file

@ -54,11 +54,10 @@ class LanguageBreakEngine : public UMemory {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
virtual UBool handles(UChar32 c) const = 0;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@ -68,14 +67,12 @@ class LanguageBreakEngine : public UMemory {
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param breakType The type of break desired, or -1.
* @param foundBreaks A Vector of int32_t to receive the breaks.
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
int32_t breakType,
UVector32 &foundBreaks ) const = 0;
};
@ -125,11 +122,9 @@ class LanguageBreakFactory : public UMemory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
};
@ -152,11 +147,11 @@ class UnhandledEngine : public LanguageBreakEngine {
private:
/**
* The sets of characters handled, for each break type
* The sets of characters handled.
* @internal
*/
UnicodeSet *fHandled[4];
UnicodeSet *fHandled;
public:
@ -176,11 +171,10 @@ class UnhandledEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const;
virtual UBool handles(UChar32 c) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@ -190,23 +184,20 @@ class UnhandledEngine : public LanguageBreakEngine {
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
int32_t breakType,
UVector32 &foundBreaks ) const;
/**
* <p>Tell the engine to handle a particular character and break type.</p>
*
* @param c A character which the engine should handle
* @param breakType The type of text break for which the engine should handle c
*/
virtual void handleCharacter(UChar32 c, int32_t breakType);
virtual void handleCharacter(UChar32 c);
};
@ -250,11 +241,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
protected:
/**
@ -263,21 +252,17 @@ protected:
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
/**
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
* @param script An ISO 15924 script code that identifies the dictionary to be
* created.
* @param breakType The kind of text break for which a dictionary is
* sought.
* @return A DictionaryMatcher with the desired characteristics, or NULL.
*/
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
};
U_NAMESPACE_END

View file

@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN
// -------------------------------------
BreakIterator*
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
{
char fnbuff[256];
char ext[4]={'\0'};
@ -121,7 +121,6 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actualLocale.data());
result->setBreakType(kind);
}
ures_close(b);
@ -413,10 +412,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
result = BreakIterator::buildInstance(loc, "grapheme", status);
break;
case UBRK_WORD:
result = BreakIterator::buildInstance(loc, "word", kind, status);
result = BreakIterator::buildInstance(loc, "word", status);
break;
case UBRK_LINE:
uprv_strcpy(lbType, "line");
@ -429,10 +428,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
uprv_strcat(lbType, lbKeyValue);
}
}
result = BreakIterator::buildInstance(loc, lbType, kind, status);
result = BreakIterator::buildInstance(loc, lbType, status);
break;
case UBRK_SENTENCE:
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
result = BreakIterator::buildInstance(loc, "sentence", status);
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
{
char ssKeyValue[kKeyValueLenMax] = {0};
@ -449,7 +448,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
#endif
break;
case UBRK_TITLE:
result = BreakIterator::buildInstance(loc, "title", kind, status);
result = BreakIterator::buildInstance(loc, "title", status);
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;

View file

@ -92,20 +92,16 @@ ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
sink.Append(s8, 2);
}
UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (length > 0) {
if (edits != nullptr) {
edits->addUnchanged(length);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(reinterpret_cast<const char *>(s), length);
}
void
ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits) {
U_ASSERT(length > 0);
if (edits != nullptr) {
edits->addUnchanged(length);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(reinterpret_cast<const char *>(s), length);
}
return TRUE;
}
UBool
@ -117,7 +113,11 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
int32_t length = (int32_t)(limit - s);
if (length > 0) {
appendNonEmptyUnchanged(s, length, sink, options, edits);
}
return TRUE;
}
U_NAMESPACE_END

View file

@ -43,11 +43,19 @@ public:
static UBool appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
return TRUE;
}
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
private:
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits);
};
U_NAMESPACE_END

View file

@ -41,30 +41,6 @@ static int n=0;
static long b=0;
#endif
#if U_DEBUG
static char gValidMemorySink = 0;
U_CAPI void uprv_checkValidMemory(const void *p, size_t n) {
/*
* Access the memory to ensure that it's all valid.
* Load and save a computed value to try to ensure that the compiler
* does not throw away the whole loop.
* A thread analyzer might complain about un-mutexed access to gValidMemorySink
* which is true but harmless because no one ever uses the value in gValidMemorySink.
*/
const char *s = (const char *)p;
char c = gValidMemorySink;
size_t i;
U_ASSERT(p != NULL);
for(i = 0; i < n; ++i) {
c ^= s[i];
}
gValidMemorySink = c;
}
#endif /* U_DEBUG */
U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)

View file

@ -36,31 +36,10 @@
#include <stdio.h>
#endif
#if U_DEBUG
/*
* The C++ standard requires that the source pointer for memcpy() & memmove()
* is valid, not NULL, and not at the end of an allocated memory block.
* In debug mode, we read one byte from the source point to verify that it's
* a valid, readable pointer.
*/
U_CAPI void uprv_checkValidMemory(const void *p, size_t n);
#define uprv_memcpy(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size))
#define uprv_memmove(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size))
#else
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
#endif /* U_DEBUG */
/**
* \def UPRV_LENGTHOF
* Convenience macro to determine the length of a fixed array at compile-time.

View file

@ -40,28 +40,10 @@
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
#if U_DEBUG
#define uprv_strncpy(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size))
#define uprv_strncmp(s1, s2, n) ( \
uprv_checkValidMemory(s1, 1), \
uprv_checkValidMemory(s2, 1), \
U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n))
#define uprv_strncat(dst, src, n) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE strncat(dst, src, n))
#else
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
#endif /* U_DEBUG */
/**
* Is c an ASCII-repertoire letter a-z or A-Z?
* Note: The implementation is specific to whether ICU is compiled for

View file

@ -29,24 +29,21 @@ U_NAMESPACE_BEGIN
******************************************************************
*/
DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
fTypes = breakTypes;
DictionaryBreakEngine::DictionaryBreakEngine() {
}
DictionaryBreakEngine::~DictionaryBreakEngine() {
}
UBool
DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)
&& fSet.contains(c));
DictionaryBreakEngine::handles(UChar32 c) const {
return fSet.contains(c);
}
int32_t
DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
int32_t breakType,
UVector32 &foundBreaks ) const {
(void)startPos; // TODO: remove this param?
int32_t result = 0;
@ -66,10 +63,8 @@ DictionaryBreakEngine::findBreaks( UText *text,
}
rangeStart = start;
rangeEnd = current;
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current);
}
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current);
return result;
}
@ -194,7 +189,7 @@ static const int32_t THAI_MIN_WORD = 2;
static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
@ -436,7 +431,7 @@ static const int32_t LAO_MIN_WORD = 2;
static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2;
LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
@ -632,7 +627,7 @@ static const int32_t BURMESE_MIN_WORD = 2;
static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2;
BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
@ -825,7 +820,7 @@ static const int32_t KHMER_MIN_WORD = 2;
static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
: DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
@ -1047,7 +1042,7 @@ foundBest:
*/
static const uint32_t kuint32max = 0xFFFFFFFF;
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
@ -1324,8 +1319,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
if (katakanaRunLength < kMaxKatakanaGroupLength) {
uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) {
bestSnlp.setElementAt(newSnlp, j);
if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i;
}
}

View file

@ -42,27 +42,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
UnicodeSet fSet;
/**
* The set of break types handled by this engine
* @internal
*/
uint32_t fTypes;
/**
* <p>Default constructor.</p>
*
*/
DictionaryBreakEngine();
public:
/**
* <p>Constructor setting the break types handled.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
* <p>Constructor </p>
*/
DictionaryBreakEngine( uint32_t breakTypes );
DictionaryBreakEngine();
/**
* <p>Virtual destructor.</p>
@ -74,11 +59,10 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles( UChar32 c, int32_t breakType ) const;
virtual UBool handles(UChar32 c) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@ -88,14 +72,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* that starts from the first character in the range.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param breakType The type of break desired, or -1.
* @param foundBreaks vector of int32_t to receive the break positions
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
int32_t breakType,
UVector32 &foundBreaks ) const;
protected:
@ -107,13 +89,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
*/
virtual void setCharacters( const UnicodeSet &set );
/**
* <p>Set the break types handled by this engine.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
*/
// virtual void setBreakTypes( uint32_t breakTypes );
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*

View file

@ -693,6 +693,11 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
return (U_SUCCESS(status))? ret.orphan(): NULL;
}
FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {
return createEmptyInstance(status);
}
FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
if(U_FAILURE(status)) return NULL;

View file

@ -64,7 +64,9 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
* Constructs a RuleBasedBreakIterator that uses the already-created
* tables object that is passed in as a parameter.
*/
RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) {
RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
: fSCharIter(UnicodeString())
{
init(status);
fData = new RBBIDataWrapper(data, status); // status checked in constructor
if (U_FAILURE(status)) {return;}
@ -80,7 +82,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
//
RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
uint32_t ruleLength,
UErrorCode &status) {
UErrorCode &status)
: fSCharIter(UnicodeString())
{
init(status);
if (U_FAILURE(status)) {
return;
@ -110,6 +114,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
: fSCharIter(UnicodeString())
{
init(status);
fData = new RBBIDataWrapper(udm, status); // status checked in constructor
@ -130,6 +135,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &sta
RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
UParseError &parseError,
UErrorCode &status)
: fSCharIter(UnicodeString())
{
init(status);
if (U_FAILURE(status)) {return;}
@ -152,7 +158,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
// Used when creating a RuleBasedBreakIterator from a set
// of rules.
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator() {
RuleBasedBreakIterator::RuleBasedBreakIterator()
: fSCharIter(UnicodeString())
{
UErrorCode status = U_ZERO_ERROR;
init(status);
}
@ -165,7 +173,8 @@ RuleBasedBreakIterator::RuleBasedBreakIterator() {
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
: BreakIterator(other)
: BreakIterator(other),
fSCharIter(UnicodeString())
{
UErrorCode status = U_ZERO_ERROR;
this->init(status);
@ -177,17 +186,13 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& oth
* Destructor
*/
RuleBasedBreakIterator::~RuleBasedBreakIterator() {
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
if (fCharIter != &fSCharIter) {
// fCharIter was adopted from the outside.
delete fCharIter;
}
fCharIter = NULL;
delete fSCharIter;
fSCharIter = NULL;
delete fDCharIter;
fDCharIter = NULL;
utext_close(fText);
utext_close(&fText);
if (fData != NULL) {
fData->removeReference();
@ -217,26 +222,29 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
}
BreakIterator::operator=(that);
fBreakType = that.fBreakType;
if (fLanguageBreakEngines != NULL) {
delete fLanguageBreakEngines;
fLanguageBreakEngines = NULL; // Just rebuild for now
}
// TODO: clone fLanguageBreakEngines from "that"
UErrorCode status = U_ZERO_ERROR;
fText = utext_clone(fText, that.fText, FALSE, TRUE, &status);
utext_clone(&fText, &that.fText, FALSE, TRUE, &status);
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
if (fCharIter != &fSCharIter) {
delete fCharIter;
}
fCharIter = NULL;
fCharIter = &fSCharIter;
if (that.fCharIter != NULL ) {
if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
// This is a little bit tricky - it will intially appear that
// this->fCharIter is adopted, even if that->fCharIter was
// not adopted. That's ok.
fCharIter = that.fCharIter->clone();
}
fSCharIter = that.fSCharIter;
if (fCharIter == NULL) {
fCharIter = &fSCharIter;
}
if (fData != NULL) {
fData->removeReference();
@ -269,33 +277,30 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
//
//-----------------------------------------------------------------------------
void RuleBasedBreakIterator::init(UErrorCode &status) {
fText = NULL;
fCharIter = NULL;
fSCharIter = NULL;
fDCharIter = NULL;
fData = NULL;
fPosition = 0;
fRuleStatusIndex = 0;
fDone = false;
fDictionaryCharCount = 0;
fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable
// dictionary behavior for Break Iterators that are
// built from rules. Even better would be the ability to
// declare the type in the rules.
fLanguageBreakEngines = NULL;
fUnhandledBreakEngine = NULL;
fBreakCache = NULL;
fDictionaryCache = NULL;
if (U_FAILURE(status)) {
// Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
// fText = UTEXT_INITIALIZER;
static const UText initializedUText = UTEXT_INITIALIZER;
uprv_memcpy(&fText, &initializedUText, sizeof(UText));
if (U_FAILURE(status)) {
return;
}
fText = utext_openUChars(NULL, NULL, 0, &status);
utext_openUChars(&fText, NULL, 0, &status);
fDictionaryCache = new DictionaryCache(this, status);
fBreakCache = new BreakCache(this, status);
if (U_SUCCESS(status) && (fText == NULL || fDictionaryCache == NULL || fBreakCache == NULL)) {
if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
@ -344,7 +349,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
if (!utext_equals(fText, that2.fText)) {
if (!utext_equals(&fText, &that2.fText)) {
// The two break iterators are operating on different text,
// or have a different iteration position.
// Note that fText's position is always the same as the break iterator's position.
@ -385,7 +390,7 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
}
fBreakCache->reset();
fDictionaryCache->reset();
fText = utext_clone(fText, ut, FALSE, TRUE, &status);
utext_clone(&fText, ut, FALSE, TRUE, &status);
// Set up a dummy CharacterIterator to be returned if anyone
// calls getText(). With input from UText, there is no reasonable
@ -393,27 +398,20 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
// Return one over an empty string instead - this is the closest
// we can come to signaling a failure.
// (GetText() is obsolete, this failure is sort of OK)
if (fDCharIter == NULL) {
static const UChar c = 0;
fDCharIter = new UCharCharacterIterator(&c, 0);
if (fDCharIter == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
fSCharIter.setText(UnicodeString());
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
if (fCharIter != &fSCharIter) {
// existing fCharIter was adopted from the outside. Delete it now.
delete fCharIter;
}
fCharIter = fDCharIter;
fCharIter = &fSCharIter;
this->first();
}
UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status);
UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status);
return result;
}
@ -439,7 +437,7 @@ void
RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
// If we are holding a CharacterIterator adopted from a
// previous call to this function, delete it now.
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
if (fCharIter != &fSCharIter) {
delete fCharIter;
}
@ -450,9 +448,9 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
if (newText==NULL || newText->startIndex() != 0) {
// startIndex !=0 wants to be an error, but there's no way to report it.
// Make the iterator text be an empty string.
fText = utext_openUChars(fText, NULL, 0, &status);
utext_openUChars(&fText, NULL, 0, &status);
} else {
fText = utext_openCharacterIterator(fText, newText, &status);
utext_openCharacterIterator(&fText, newText, &status);
}
this->first();
}
@ -467,23 +465,19 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) {
UErrorCode status = U_ZERO_ERROR;
fBreakCache->reset();
fDictionaryCache->reset();
fText = utext_openConstUnicodeString(fText, &newText, &status);
utext_openConstUnicodeString(&fText, &newText, &status);
// Set up a character iterator on the string.
// Needed in case someone calls getText().
// Can not, unfortunately, do this lazily on the (probably never)
// call to getText(), because getText is const.
if (fSCharIter == NULL) {
fSCharIter = new StringCharacterIterator(newText);
} else {
fSCharIter->setText(newText);
}
fSCharIter.setText(newText);
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
if (fCharIter != &fSCharIter) {
// old fCharIter was adopted from the outside. Delete it.
delete fCharIter;
}
fCharIter = fSCharIter;
fCharIter = &fSCharIter;
this->first();
}
@ -503,14 +497,14 @@ RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, U
status = U_ILLEGAL_ARGUMENT_ERROR;
return *this;
}
int64_t pos = utext_getNativeIndex(fText);
int64_t pos = utext_getNativeIndex(&fText);
// Shallow read-only clone of the new UText into the existing input UText
fText = utext_clone(fText, input, FALSE, TRUE, &status);
utext_clone(&fText, input, FALSE, TRUE, &status);
if (U_FAILURE(status)) {
return *this;
}
utext_setNativeIndex(fText, pos);
if (utext_getNativeIndex(fText) != pos) {
utext_setNativeIndex(&fText, pos);
if (utext_getNativeIndex(&fText) != pos) {
// Sanity check. The new input utext is supposed to have the exact same
// contents as the old. If we can't set to the same position, it doesn't.
// The contents underlying the old utext might be invalid at this point,
@ -540,7 +534,7 @@ int32_t RuleBasedBreakIterator::first(void) {
* @return The text's past-the-end offset.
*/
int32_t RuleBasedBreakIterator::last(void) {
int32_t endPos = (int32_t)utext_nativeLength(fText);
int32_t endPos = (int32_t)utext_nativeLength(&fText);
UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
(void)endShouldBeBoundary;
U_ASSERT(endShouldBeBoundary);
@ -611,8 +605,8 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
// Move requested offset to a code point start. It might be on a trail surrogate,
// or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
utext_setNativeIndex(fText, startPos);
startPos = (int32_t)utext_getNativeIndex(fText);
utext_setNativeIndex(&fText, startPos);
startPos = (int32_t)utext_getNativeIndex(&fText);
UErrorCode status = U_ZERO_ERROR;
fBreakCache->following(startPos, status);
@ -626,15 +620,15 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
* @return The position of the last boundary before the starting position.
*/
int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
if (fText == NULL || offset > utext_nativeLength(fText)) {
if (offset > utext_nativeLength(&fText)) {
return last();
}
// Move requested offset to a code point start. It might be on a trail surrogate,
// or on a trail byte if the input is UTF-8.
utext_setNativeIndex(fText, offset);
int32_t adjustedOffset = utext_getNativeIndex(fText);
utext_setNativeIndex(&fText, offset);
int32_t adjustedOffset = utext_getNativeIndex(&fText);
UErrorCode status = U_ZERO_ERROR;
fBreakCache->preceding(adjustedOffset, status);
@ -660,8 +654,8 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
// Note that isBoundary() is always be false for offsets that are not on code point boundaries.
// But we still need the side effect of leaving iteration at the following boundary.
utext_setNativeIndex(fText, offset);
int32_t adjustedOffset = utext_getNativeIndex(fText);
utext_setNativeIndex(&fText, offset);
int32_t adjustedOffset = utext_getNativeIndex(&fText);
bool result = false;
UErrorCode status = U_ZERO_ERROR;
@ -669,7 +663,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
result = (fBreakCache->current() == offset);
}
if (result && adjustedOffset < offset && utext_char32At(fText, offset) == U_SENTINEL) {
if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) {
// Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
// but the iteration position remains set to the end of the text, which is a boundary.
return FALSE;
@ -789,9 +783,9 @@ int32_t RuleBasedBreakIterator::handleNext() {
// if we're already at the end of the text, return DONE.
initialPosition = fPosition;
UTEXT_SETNATIVEINDEX(fText, initialPosition);
UTEXT_SETNATIVEINDEX(&fText, initialPosition);
result = initialPosition;
c = UTEXT_NEXT32(fText);
c = UTEXT_NEXT32(&fText);
if (c==U_SENTINEL) {
fDone = TRUE;
return UBRK_DONE;
@ -854,7 +848,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
#ifdef RBBI_DEBUG
if (gTrace) {
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText));
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText));
if (0x20<=c && c<0x7f) {
RBBIDebugPrintf("\"%c\" ", c);
} else {
@ -867,9 +861,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
// State Transition - move machine to its next state
//
// Note: fNextState is defined as uint16_t[2], but we are casting
// a generated RBBI table to RBBIStateTableRow and some tables
// actually have more than 2 categories.
// fNextState is a variable-length array.
U_ASSERT(category<fData->fHeader->fCatCount);
state = row->fNextState[category]; /*Not accessing beyond memory*/
row = (RBBIStateTableRow *)
@ -880,7 +872,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
if (row->fAccepting == -1) {
// Match found, common case.
if (mode != RBBI_START) {
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values.
}
@ -898,7 +890,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
int16_t rule = row->fLookAhead;
if (rule != 0) {
// At the position of a '/' in a look-ahead match. Record it.
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
lookAheadMatches.setPosition(rule, pos);
}
@ -914,7 +906,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
// the input position. The next iteration will be processing the
// first real input character.
if (mode == RBBI_RUN) {
c = UTEXT_NEXT32(fText);
c = UTEXT_NEXT32(&fText);
} else {
if (mode == RBBI_START) {
mode = RBBI_RUN;
@ -928,9 +920,9 @@ int32_t RuleBasedBreakIterator::handleNext() {
// (This really indicates a defect in the break rules. They should always match
// at least one character.)
if (result == initialPosition) {
utext_setNativeIndex(fText, initialPosition);
utext_next32(fText);
result = (int32_t)utext_getNativeIndex(fText);
utext_setNativeIndex(&fText, initialPosition);
utext_next32(&fText);
result = (int32_t)utext_getNativeIndex(&fText);
fRuleStatusIndex = 0;
}
@ -965,7 +957,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
int32_t initialPosition = 0;
const RBBIStateTable *stateTable = fData->fSafeRevTable;
UTEXT_SETNATIVEINDEX(fText, fromPosition);
UTEXT_SETNATIVEINDEX(&fText, fromPosition);
#ifdef RBBI_DEBUG
if (gTrace) {
RBBIDebugPuts("Handle Previous pos char state category");
@ -973,14 +965,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
#endif
// if we're already at the start of the text, return DONE.
if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) {
if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) {
return BreakIterator::DONE;
}
// Set up the starting char.
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
result = initialPosition;
c = UTEXT_PREVIOUS32(fText);
c = UTEXT_PREVIOUS32(&fText);
// Set the initial state for the state machine
state = START_STATE;
@ -1028,7 +1020,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
#ifdef RBBI_DEBUG
if (gTrace) {
RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText));
RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText));
if (0x20<=c && c<0x7f) {
RBBIDebugPrintf("\"%c\" ", c);
} else {
@ -1041,9 +1033,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// State Transition - move machine to its next state
//
// Note: fNextState is defined as uint16_t[2], but we are casting
// a generated RBBI table to RBBIStateTableRow and some tables
// actually have more than 2 categories.
// fNextState is a variable-length array.
U_ASSERT(category<fData->fHeader->fCatCount);
state = row->fNextState[category]; /*Not accessing beyond memory*/
row = (RBBIStateTableRow *)
@ -1051,7 +1041,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
if (row->fAccepting == -1) {
// Match found, common case.
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
int16_t completedRule = row->fAccepting;
@ -1059,14 +1049,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// Lookahead match is completed.
int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
if (lookaheadResult >= 0) {
UTEXT_SETNATIVEINDEX(fText, lookaheadResult);
UTEXT_SETNATIVEINDEX(&fText, lookaheadResult);
return lookaheadResult;
}
}
int16_t rule = row->fLookAhead;
if (rule != 0) {
// At the position of a '/' in a look-ahead match. Record it.
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
lookAheadMatches.setPosition(rule, pos);
}
@ -1082,7 +1072,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// the input position. The next iteration will be processing the
// first real input character.
if (mode == RBBI_RUN) {
c = UTEXT_PREVIOUS32(fText);
c = UTEXT_PREVIOUS32(&fText);
} else {
if (mode == RBBI_START) {
mode = RBBI_RUN;
@ -1096,9 +1086,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// (This really indicates a defect in the break rules. They should always match
// at least one character.)
if (result == initialPosition) {
UTEXT_SETNATIVEINDEX(fText, initialPosition);
UTEXT_PREVIOUS32(fText);
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
UTEXT_SETNATIVEINDEX(&fText, initialPosition);
UTEXT_PREVIOUS32(&fText);
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
#ifdef RBBI_DEBUG
@ -1247,7 +1237,7 @@ static void U_CALLCONV initLanguageFactories() {
static const LanguageBreakEngine*
getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
getLanguageBreakEngineFromFactory(UChar32 c)
{
umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
if (gLanguageBreakFactories == NULL) {
@ -1258,7 +1248,7 @@ getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
const LanguageBreakEngine *lbe = NULL;
while (--i >= 0) {
LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
lbe = factory->getEngineFor(c, breakType);
lbe = factory->getEngineFor(c);
if (lbe != NULL) {
break;
}
@ -1290,14 +1280,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
int32_t i = fLanguageBreakEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
if (lbe->handles(c, fBreakType)) {
if (lbe->handles(c)) {
return lbe;
}
}
// No existing dictionary took the character. See if a factory wants to
// give us a new LanguageBreakEngine for this character.
lbe = getLanguageBreakEngineFromFactory(c, fBreakType);
lbe = getLanguageBreakEngineFromFactory(c);
// If we got one, use it and push it on our stack.
if (lbe != NULL) {
@ -1313,6 +1303,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
fUnhandledBreakEngine = new UnhandledEngine(status);
if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
// Put it last so that scripts for which we have an engine get tried
// first.
@ -1327,25 +1318,19 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
// Tell the reject engine about the character; at its discretion, it may
// add more than just the one character.
fUnhandledBreakEngine->handleCharacter(c, fBreakType);
fUnhandledBreakEngine->handleCharacter(c);
return fUnhandledBreakEngine;
}
/*int32_t RuleBasedBreakIterator::getBreakType() const {
return fBreakType;
}*/
void RuleBasedBreakIterator::setBreakType(int32_t type) {
fBreakType = type;
}
void RuleBasedBreakIterator::dumpCache() {
fBreakCache->dumpCache();
}
void RuleBasedBreakIterator::dumpTables() {
fData->printData();
}
/**
* Returns the description used to create this iterator
*/

View file

@ -26,14 +26,11 @@ U_NAMESPACE_BEGIN
*/
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
fBI(bi), fBreaks(NULL), fPositionInCache(-1),
fBI(bi), fBreaks(status), fPositionInCache(-1),
fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
fBreaks = new UVector32(status);
}
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
delete fBreaks;
fBreaks = NULL;
}
void RuleBasedBreakIterator::DictionaryCache::reset() {
@ -42,7 +39,7 @@ void RuleBasedBreakIterator::DictionaryCache::reset() {
fLimit = 0;
fFirstRuleStatusIndex = 0;
fOtherRuleStatusIndex = 0;
fBreaks->removeAllElements();
fBreaks.removeAllElements();
}
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
@ -54,13 +51,13 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_
// Sequential iteration, move from previous boundary to the following
int32_t r = 0;
if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
++fPositionInCache;
if (fPositionInCache >= fBreaks->size()) {
if (fPositionInCache >= fBreaks.size()) {
fPositionInCache = -1;
return FALSE;
}
r = fBreaks->elementAti(fPositionInCache);
r = fBreaks.elementAti(fPositionInCache);
U_ASSERT(r > fromPos);
*result = r;
*statusIndex = fOtherRuleStatusIndex;
@ -69,8 +66,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_
// Random indexing. Linear search for the boundary following the given position.
for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) {
r= fBreaks->elementAti(fPositionInCache);
for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
r= fBreaks.elementAti(fPositionInCache);
if (r > fromPos) {
*result = r;
*statusIndex = fOtherRuleStatusIndex;
@ -90,16 +87,16 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_
}
if (fromPos == fLimit) {
fPositionInCache = fBreaks->size() - 1;
fPositionInCache = fBreaks.size() - 1;
if (fPositionInCache >= 0) {
U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos);
U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
}
}
int32_t r;
if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
--fPositionInCache;
r = fBreaks->elementAti(fPositionInCache);
r = fBreaks.elementAti(fPositionInCache);
U_ASSERT(r < fromPos);
*result = r;
*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
@ -111,8 +108,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_
return FALSE;
}
for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) {
r = fBreaks->elementAti(fPositionInCache);
for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
r = fBreaks.elementAti(fPositionInCache);
if (r < fromPos) {
*result = r;
*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
@ -141,7 +138,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
int32_t current;
UErrorCode status = U_ZERO_ERROR;
int32_t foundBreakCount = 0;
UText *text = fBI->fText;
UText *text = &fBI->fText;
// Loop through the text, looking for ranges of dictionary characters.
// For each span, find the appropriate break engine, and ask it to find
@ -168,7 +165,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
}
// Reload the loop variables for the next go-round
@ -182,21 +179,21 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
// printf("foundBreakCount = %d\n", foundBreakCount);
if (foundBreakCount > 0) {
U_ASSERT(foundBreakCount == fBreaks->size());
if (startPos < fBreaks->elementAti(0)) {
U_ASSERT(foundBreakCount == fBreaks.size());
if (startPos < fBreaks.elementAti(0)) {
// The dictionary did not place a boundary at the start of the segment of text.
// Add one now. This should not commonly happen, but it would be easy for interactions
// of the rules for dictionary segments and the break engine implementations to
// inadvertently cause it. Cover it here, just in case.
fBreaks->insertElementAt(startPos, 0, status);
fBreaks.insertElementAt(startPos, 0, status);
}
if (endPos > fBreaks->peeki()) {
fBreaks->push(endPos, status);
if (endPos > fBreaks.peeki()) {
fBreaks.push(endPos, status);
}
fPositionInCache = 0;
// Note: Dictionary matching may extend beyond the original limit.
fStart = fBreaks->elementAti(0);
fLimit = fBreaks->peeki();
fStart = fBreaks.elementAti(0);
fLimit = fBreaks.peeki();
} else {
// there were no language-based breaks, even though the segment contained
// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache

View file

@ -56,7 +56,7 @@ class RuleBasedBreakIterator::DictionaryCache: public UMemory {
RuleBasedBreakIterator *fBI;
UVector32 *fBreaks; // A vector containing the boundaries.
UVector32 fBreaks; // A vector containing the boundaries.
int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
// or preceding(). Optimizes sequential access.
int32_t fStart; // Text position of first boundary in cache.

View file

@ -267,8 +267,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
#endif
#ifdef RBBI_DEBUG
void RBBIDataWrapper::printData() {
#ifdef RBBI_DEBUG
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
@ -285,8 +285,8 @@ void RBBIDataWrapper::printData() {
RBBIDebugPrintf("%c", fRuleSource[c]);
}
RBBIDebugPrintf("\n\n");
}
#endif
}
U_NAMESPACE_END

View file

@ -116,9 +116,10 @@ struct RBBIStateTableRow {
/* StatusTable of the set of matching */
/* tags (rule status values) */
int16_t fReserved;
uint16_t fNextState[2]; /* Next State, indexed by char category. */
/* This array does not have two elements */
/* Array Size is actually fData->fHeader->fCatCount */
uint16_t fNextState[1]; /* Next State, indexed by char category. */
/* Variable-length array declared with length 1 */
/* to disable bounds checkers. */
/* Array Size is actually fData->fHeader->fCatCount*/
/* CAUTION: see RBBITableBuilder::getTableSize() */
/* before changing anything here. */
};
@ -129,7 +130,9 @@ struct RBBIStateTable {
uint32_t fRowLen; /* Length of a state table row, in bytes. */
uint32_t fFlags; /* Option Flags for this state table */
uint32_t fReserved; /* reserved */
char fTableData[4]; /* First RBBIStateTableRow begins here. */
char fTableData[1]; /* First RBBIStateTableRow begins here. */
/* Variable-length array declared with length 1 */
/* to disable bounds checkers. */
/* (making it char[] simplifies ugly address */
/* arithmetic for indexing variable length rows.) */
};
@ -162,13 +165,8 @@ public:
UBool operator ==(const RBBIDataWrapper &other) const;
int32_t hashCode();
const UnicodeString &getRuleSourceString() const;
#ifdef RBBI_DEBUG
void printData();
void printTable(const char *heading, const RBBIStateTable *table);
#else
#define printData()
#define printTable(heading, table)
#endif
/* */
/* Pointers to items within the data */

View file

@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN
RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status)
: fRules(rules)
: fRules(rules), fStrippedRules(rules)
{
fStatus = &status; // status is checked below
fParseError = parseErr;
@ -147,8 +147,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
return NULL;
}
// Remove comments and whitespace from the rules to make it smaller.
UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
// Remove whitespace from the rules to make it smaller.
// The rule parser has already removed comments.
fStrippedRules = fScanner->stripRules(fStrippedRules);
// Calculate the size of each section in the data.
// Sizes here are padded up to a multiple of 8 for better memory alignment.
@ -162,7 +163,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
int32_t trieSize = align8(fSetBuilder->getTrieSize());
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
(void)safeFwdTableSize;
@ -225,7 +226,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize;
data->fRuleSource = data->fStatusTable + statusTableSize;
data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
@ -245,7 +246,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
}
strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
return data;
}
@ -281,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
// Generate the mapping tables (TRIE) from input 32-bit characters to
// Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
builder.fSetBuilder->build();
builder.fSetBuilder->buildRanges();
//
@ -316,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
}
#endif
builder.optimizeTables();
builder.fSetBuilder->buildTrie();
//
// Package up the compiled data into a memory image
// in the run-time format.
@ -347,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
return This;
}
void RBBIRuleBuilder::optimizeTables() {
int32_t leftClass;
int32_t rightClass;
leftClass = 3;
rightClass = 0;
while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
fSetBuilder->mergeCategories(leftClass, rightClass);
fForwardTables->removeColumn(rightClass);
fReverseTables->removeColumn(rightClass);
fSafeFwdTables->removeColumn(rightClass);
fSafeRevTables->removeColumn(rightClass);
}
fForwardTables->removeDuplicateStates();
fReverseTables->removeDuplicateStates();
fSafeFwdTables->removeDuplicateStates();
fSafeRevTables->removeDuplicateStates();
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

View file

@ -126,10 +126,19 @@ public:
);
virtual ~RBBIRuleBuilder();
/**
* Fold together redundant character classes (table columns) and
* redundant states (table rows). Done after initial table generation,
* before serializing the result.
*/
void optimizeTables();
char *fDebugEnv; // controls debug trace output
UErrorCode *fStatus; // Error reporting. Keeping status
UParseError *fParseError; // here avoids passing it everywhere.
const UnicodeString &fRules; // The rule string that we are compiling
UnicodeString fStrippedRules; // The rule string, with comments stripped.
RBBIRuleScanner *fScanner; // The scanner.
RBBINode *fForwardTree; // The parse trees, generated by the scanner,

View file

@ -822,27 +822,24 @@ static const UChar chRParen = 0x29;
//------------------------------------------------------------------------------
//
// stripRules Return a rules string without unnecessary
// characters.
// stripRules Return a rules string without extra spaces.
// (Comments are removed separately, during rule parsing.)
//
//------------------------------------------------------------------------------
UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
UnicodeString strippedRules;
int rulesLength = rules.length();
for (int idx = 0; idx < rulesLength; ) {
UChar ch = rules[idx++];
if (ch == chPound) {
while (idx < rulesLength
&& ch != chCR && ch != chLF && ch != chNEL)
{
ch = rules[idx++];
}
}
if (!u_isISOControl(ch)) {
strippedRules.append(ch);
int32_t rulesLength = rules.length();
bool skippingSpaces = false;
for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
UChar32 cp = rules.char32At(idx);
bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
if (skippingSpaces && whiteSpace) {
continue;
}
strippedRules.append(cp);
skippingSpaces = whiteSpace;
}
// strippedRules = strippedRules.unescape();
return strippedRules;
}
@ -942,6 +939,7 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
// It will be treated as white-space, and serves to break up anything
// that might otherwise incorrectly clump together with a comment in
// the middle (a variable name, for example.)
int32_t commentStart = fScanIndex;
for (;;) {
c.fChar = nextCharLL();
if (c.fChar == (UChar32)-1 || // EOF
@ -950,6 +948,9 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
c.fChar == chNEL ||
c.fChar == chLS) {break;}
}
for (int32_t i=commentStart; i<fNextIndex-1; ++i) {
fRB->fStrippedRules.setCharAt(i, u' ');
}
}
if (c.fChar == (UChar32)-1) {
return;

View file

@ -91,7 +91,7 @@ RBBISetBuilder::~RBBISetBuilder()
// from the Unicode Sets.
//
//------------------------------------------------------------------------
void RBBISetBuilder::build() {
void RBBISetBuilder::buildRanges() {
RBBINode *usetNode;
RangeDescriptor *rlRange;
@ -245,11 +245,16 @@ void RBBISetBuilder::build() {
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
}
//
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number.
//
void RBBISetBuilder::buildTrie() {
RangeDescriptor *rlRange;
//
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number
//
fTrie = utrie2_open(0, // Initial value for all code points.
0, // Error value for out-of-range input.
fStatus);
@ -265,6 +270,22 @@ void RBBISetBuilder::build() {
}
void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
U_ASSERT(left >= 1);
U_ASSERT(right > left);
for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
int32_t rangeNum = rd->fNum & ~DICT_BIT;
int32_t rangeDict = rd->fNum & DICT_BIT;
if (rangeNum == right) {
rd->fNum = left | rangeDict;
} else if (rangeNum > right) {
rd->fNum--;
}
}
--fGroupCount;
}
//-----------------------------------------------------------------------------------
//
// getTrieSize() Return the size that will be required to serialize the Trie.
@ -446,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() {
lastPrintedGroupNum = groupNum;
RBBIDebugPrintf("%2i ", groupNum);
if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
@ -639,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
void RangeDescriptor::setDictionaryFlag() {
int i;
for (i=0; i<this->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
UnicodeString setName;
RBBINode *setRef = usetNode->fParent;
if (setRef != NULL) {
static const char16_t *dictionary = u"dictionary";
for (i=0; i<fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
RBBINode *setRef = usetNode->fParent;
if (setRef != nullptr) {
RBBINode *varRef = setRef->fParent;
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
setName = varRef->fText;
if (varRef && varRef->fType == RBBINode::varRef) {
const UnicodeString *setName = &varRef->fText;
if (setName->compare(dictionary, -1) == 0) {
fNum |= RBBISetBuilder::DICT_BIT;
break;
}
}
}
if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals.
this->fNum |= 0x4000;
break;
}
}
}

View file

@ -82,7 +82,8 @@ public:
RBBISetBuilder(RBBIRuleBuilder *rb);
~RBBISetBuilder();
void build();
void buildRanges();
void buildTrie();
void addValToSets(UVector *sets, uint32_t val);
void addValToSet (RBBINode *usetNode, uint32_t val);
int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
@ -93,6 +94,13 @@ public:
UChar32 getFirstChar(int32_t val) const;
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
// character were encountered.
/** merge two character categories that have been identified as having equivalent behavior.
* The ranges belonging to the right category (table column) will be added to the left.
*/
void mergeCategories(int32_t left, int32_t right);
static constexpr int32_t DICT_BIT = 0x4000;
#ifdef RBBI_DEBUG
void printSets();
void printRanges();

View file

@ -22,6 +22,7 @@
#include "rbbidata.h"
#include "cstring.h"
#include "uassert.h"
#include "uvectr32.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
@ -761,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() {
// if sd->fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
// the fLookAhead field or this state also.
// the fLookAhead field for this state also.
if (endMarker->fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
@ -1077,7 +1078,128 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
}
#endif
//
// findDuplCharClassFrom()
//
bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
uint16_t table_base;
uint16_t table_dupl;
for (; baseCategory < numCols-1; ++baseCategory) {
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);
table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory);
if (table_base != table_dupl) {
break;
}
}
if (table_base == table_dupl) {
return true;
}
}
}
return false;
}
//
// removeColumn()
//
void RBBITableBuilder::removeColumn(int32_t column) {
int32_t numStates = fDStates->size();
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
U_ASSERT(column < sd->fDtran->size());
sd->fDtran->removeElementAt(column);
}
}
/*
* findDuplicateState
*/
bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
for (; firstState<numStates-1; ++firstState) {
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
for (duplState=firstState+1; duplState<numStates; ++duplState) {
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
if (firstSD->fAccepting != duplSD->fAccepting ||
firstSD->fLookAhead != duplSD->fLookAhead ||
firstSD->fTagsIdx != duplSD->fTagsIdx) {
continue;
}
bool rowsMatch = true;
for (int32_t col=0; col < numCols; ++col) {
int32_t firstVal = firstSD->fDtran->elementAti(col);
int32_t duplVal = duplSD->fDtran->elementAti(col);
if (!((firstVal == duplVal) ||
((firstVal == firstState || firstVal == duplState) &&
(duplVal == firstState || duplVal == duplState)))) {
rowsMatch = false;
break;
}
}
if (rowsMatch) {
return true;
}
}
}
return false;
}
void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
U_ASSERT(keepState < duplState);
U_ASSERT(duplState < fDStates->size());
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
fDStates->removeElementAt(duplState);
delete duplSD;
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
for (int32_t state=0; state<numStates; ++state) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
for (int32_t col=0; col<numCols; col++) {
int32_t existingVal = sd->fDtran->elementAti(col);
int32_t newVal = existingVal;
if (existingVal == duplState) {
newVal = keepState;
} else if (existingVal > duplState) {
newVal = existingVal - 1;
}
sd->fDtran->setElementAt(newVal, col);
}
if (sd->fAccepting == duplState) {
sd->fAccepting = keepState;
} else if (sd->fAccepting > duplState) {
sd->fAccepting--;
}
if (sd->fLookAhead == duplState) {
sd->fLookAhead = keepState;
} else if (sd->fLookAhead > duplState) {
sd->fLookAhead--;
}
}
}
/*
* RemoveDuplicateStates
*/
void RBBITableBuilder::removeDuplicateStates() {
int32_t firstState = 3;
int32_t duplicateState = 0;
while (findDuplicateState(firstState, duplicateState)) {
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
removeState(firstState, duplicateState);
}
}
//-----------------------------------------------------------------------------
//
@ -1095,21 +1217,17 @@ int32_t RBBITableBuilder::getTableSize() const {
return 0;
}
size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the table.
size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
numRows = fDStates->size();
numCols = fRB->fSetBuilder->getNumCharCategories();
// Note The declaration of RBBIStateTableRow is for a table of two columns.
// Therefore we subtract two from numCols when determining
// how much storage to add to a row for the total columns.
rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2);
rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
size += numRows * rowSize;
return size;
}
//-----------------------------------------------------------------------------
//
// exportTable() export the state transition table in the format required
@ -1126,14 +1244,14 @@ void RBBITableBuilder::exportTable(void *where) {
return;
}
if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff ||
int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
if (catCount > 0x7fff ||
fDStates->size() > 0x7fff) {
*fStatus = U_BRK_INTERNAL_ERROR;
return;
}
table->fRowLen = sizeof(RBBIStateTableRow) +
sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2);
table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
table->fNumStates = fDStates->size();
table->fFlags = 0;
if (fRB->fLookAheadHardBreak) {
@ -1152,7 +1270,7 @@ void RBBITableBuilder::exportTable(void *where) {
row->fAccepting = (int16_t)sd->fAccepting;
row->fLookAhead = (int16_t)sd->fLookAhead;
row->fTagIdx = (int16_t)sd->fTagsIdx;
for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) {
for (col=0; col<catCount; col++) {
row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col);
}
}
@ -1259,7 +1377,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
fPositions = NULL;
fDtran = NULL;
fDtran = new UVector(lastInputSymbol+1, *fStatus);
fDtran = new UVector32(lastInputSymbol+1, *fStatus);
if (U_FAILURE(*fStatus)) {
return;
}
@ -1267,7 +1385,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return;
}
fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized.
fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized.
// It is indexed by input symbols, and will
// hold the next state number for each
// symbol.

View file

@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN
class RBBIRuleScanner;
class RBBIRuleBuilder;
class UVector32;
//
// class RBBITableBuilder is part of the RBBI rule compiler.
@ -42,9 +43,24 @@ public:
void build();
int32_t getTableSize() const; // Return the runtime size in bytes of
// the built state table
void exportTable(void *where); // fill in the runtime state table.
// Sufficient memory must exist at
// the specified location.
/** Fill in the runtime state table. Sufficient memory must exist at the specified location.
*/
void exportTable(void *where);
/** Find duplicate (redundant) character classes, beginning after the specifed
* pair, within this state table. This is an iterator-like function, used to
* identify char classes (state table columns) that can be eliminated.
*/
bool findDuplCharClassFrom(int &baseClass, int &duplClass);
/** Remove a column from the state table. Used when two character categories
* have been found equivalent, and merged together, to eliminate the uneeded table column.
*/
void removeColumn(int32_t column);
/** Check for, and remove dupicate states (table rows). */
void removeDuplicateStates();
private:
@ -60,8 +76,29 @@ private:
void flagTaggedStates();
void mergeRuleStatusVals();
/**
* Merge redundant state table columns, eliminating character classes with identical behavior.
* Done after the state tables are generated, just before converting to their run-time format.
*/
int32_t mergeColumns();
void addRuleRootNodes(UVector *dest, RBBINode *node);
/** Find the next duplicate state. An iterator function.
* @param firstState (in/out) begin looking at this state, return the first of the
* pair of duplicates.
* @param duplicateState returns the duplicate state of fistState
* @return true if a duplicate pair of states was found.
*/
bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
/** Remove a duplicate state/
* @param keepState First of the duplicate pair. Keep it.
* @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
* to refer to keepState instead.
*/
void removeState(int32_t keepState, int32_t duplState);
// Set functions for UVector.
// TODO: make a USet subclass of UVector
@ -112,7 +149,7 @@ public:
// with this state. Unordered (it's a set).
// UVector contents are RBBINode *
UVector *fDtran; // Transitions out of this state.
UVector32 *fDtran; // Transitions out of this state.
// indexed by input character
// contents is int index of dest state
// in RBBITableBuilder.fDStates

View file

@ -8,7 +8,10 @@
* sharedobject.cpp
*/
#include "sharedobject.h"
#include "mutex.h"
#include "uassert.h"
#include "umutex.h"
#include "unifiedcache.h"
U_NAMESPACE_BEGIN
@ -17,69 +20,41 @@ SharedObject::~SharedObject() {}
UnifiedCacheBase::~UnifiedCacheBase() {}
void
SharedObject::addRef(UBool fromWithinCache) const {
umtx_atomic_inc(&totalRefCount);
// Although items in use may not be correct immediately, it
// will be correct eventually.
if (umtx_atomic_inc(&hardRefCount) == 1 && cachePtr != NULL) {
// If this object is cached, and the hardRefCount goes from 0 to 1,
// then the increment must happen from within the cache while the
// cache global mutex is locked. In this way, we can be rest assured
// that data races can't happen if the cache performs some task if
// the hardRefCount is zero while the global cache mutex is locked.
(void)fromWithinCache; // Suppress unused variable warning in non-debug builds.
U_ASSERT(fromWithinCache);
cachePtr->incrementItemsInUse();
}
SharedObject::addRef() const {
umtx_atomic_inc(&hardRefCount);
}
// removeRef Decrement the reference count and delete if it is zero.
// Note that SharedObjects with a non-null cachePtr are owned by the
// unified cache, and the cache will be responsible for the actual deletion.
// The deletion could be as soon as immediately following the
// update to the reference count, if another thread is running
// a cache eviction cycle concurrently.
// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects.
// THE OBJECT MAY ALREADY BE GONE.
void
SharedObject::removeRef(UBool fromWithinCache) const {
UBool decrementItemsInUse = (umtx_atomic_dec(&hardRefCount) == 0);
UBool allReferencesGone = (umtx_atomic_dec(&totalRefCount) == 0);
// Although items in use may not be correct immediately, it
// will be correct eventually.
if (decrementItemsInUse && cachePtr != NULL) {
if (fromWithinCache) {
cachePtr->decrementItemsInUse();
SharedObject::removeRef() const {
const UnifiedCacheBase *cache = this->cachePtr;
int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount);
U_ASSERT(updatedRefCount >= 0);
if (updatedRefCount == 0) {
if (cache) {
cache->handleUnreferencedObject();
} else {
cachePtr->decrementItemsInUseWithLockingAndEviction();
delete this;
}
}
if (allReferencesGone) {
delete this;
}
}
void
SharedObject::addSoftRef() const {
umtx_atomic_inc(&totalRefCount);
++softRefCount;
}
void
SharedObject::removeSoftRef() const {
--softRefCount;
if (umtx_atomic_dec(&totalRefCount) == 0) {
delete this;
}
}
int32_t
SharedObject::getRefCount() const {
return umtx_loadAcquire(totalRefCount);
}
int32_t
SharedObject::getHardRefCount() const {
return umtx_loadAcquire(hardRefCount);
}
void
SharedObject::deleteIfZeroRefCount() const {
if(getRefCount() == 0) {
if (this->cachePtr == nullptr && getRefCount() == 0) {
delete this;
}
}

View file

@ -17,6 +17,8 @@
U_NAMESPACE_BEGIN
class SharedObject;
/**
* Base class for unified cache exposing enough methods to SharedObject
* instances to allow their addRef() and removeRef() methods to
@ -28,22 +30,12 @@ public:
UnifiedCacheBase() { }
/**
* Called by addRefWhileHoldingCacheLock() when the hard reference count
* of its instance goes from 0 to 1.
* Notify the cache implementation that an object was seen transitioning to
* zero hard references. The cache may use this to keep track the number of
* unreferenced SharedObjects, and to trigger evictions.
*/
virtual void incrementItemsInUse() const = 0;
virtual void handleUnreferencedObject() const = 0;
/**
* Called by removeRef() when the hard reference count of its instance
* drops from 1 to 0.
*/
virtual void decrementItemsInUseWithLockingAndEviction() const = 0;
/**
* Called by removeRefWhileHoldingCacheLock() when the hard reference
* count of its instance drops from 1 to 0.
*/
virtual void decrementItemsInUse() const = 0;
virtual ~UnifiedCacheBase();
private:
UnifiedCacheBase(const UnifiedCacheBase &);
@ -63,7 +55,6 @@ class U_COMMON_API SharedObject : public UObject {
public:
/** Initializes totalRefCount, softRefCount to 0. */
SharedObject() :
totalRefCount(0),
softRefCount(0),
hardRefCount(0),
cachePtr(NULL) {}
@ -71,7 +62,6 @@ public:
/** Initializes totalRefCount, softRefCount to 0. */
SharedObject(const SharedObject &other) :
UObject(other),
totalRefCount(0),
softRefCount(0),
hardRefCount(0),
cachePtr(NULL) {}
@ -79,93 +69,45 @@ public:
virtual ~SharedObject();
/**
* Increments the number of references to this object. Thread-safe.
* Increments the number of hard references to this object. Thread-safe.
* Not for use from within the Unified Cache implementation.
*/
void addRef() const { addRef(FALSE); }
void addRef() const;
/**
* Increments the number of references to this object.
* Must be called only from within the internals of UnifiedCache and
* only while the cache global mutex is held.
* Decrements the number of hard references to this object, and
* arrange for possible cache-eviction and/or deletion if ref
* count goes to zero. Thread-safe.
*
* Not for use from within the UnifiedCache implementation.
*/
void addRefWhileHoldingCacheLock() const { addRef(TRUE); }
void removeRef() const;
/**
* Increments the number of soft references to this object.
* Must be called only from within the internals of UnifiedCache and
* only while the cache global mutex is held.
*/
void addSoftRef() const;
/**
* Decrements the number of references to this object. Thread-safe.
*/
void removeRef() const { removeRef(FALSE); }
/**
* Decrements the number of references to this object.
* Must be called only from within the internals of UnifiedCache and
* only while the cache global mutex is held.
*/
void removeRefWhileHoldingCacheLock() const { removeRef(TRUE); }
/**
* Decrements the number of soft references to this object.
* Must be called only from within the internals of UnifiedCache and
* only while the cache global mutex is held.
*/
void removeSoftRef() const;
/**
* Returns the reference counter including soft references.
* Returns the number of hard references for this object.
* Uses a memory barrier.
*/
int32_t getRefCount() const;
/**
* Returns the count of soft references only.
* Must be called only from within the internals of UnifiedCache and
* only while the cache global mutex is held.
*/
int32_t getSoftRefCount() const { return softRefCount; }
/**
* Returns the count of hard references only. Uses a memory barrier.
* Used for testing the cache. Regular clients won't need this.
*/
int32_t getHardRefCount() const;
/**
* If noHardReferences() == TRUE then this object has no hard references.
* Must be called only from within the internals of UnifiedCache.
*/
inline UBool noHardReferences() const { return getHardRefCount() == 0; }
inline UBool noHardReferences() const { return getRefCount() == 0; }
/**
* If hasHardReferences() == TRUE then this object has hard references.
* Must be called only from within the internals of UnifiedCache.
*/
inline UBool hasHardReferences() const { return getHardRefCount() != 0; }
inline UBool hasHardReferences() const { return getRefCount() != 0; }
/**
* If noSoftReferences() == TRUE then this object has no soft references.
* Must be called only from within the internals of UnifiedCache and
* only while the cache global mutex is held.
*/
UBool noSoftReferences() const { return (softRefCount == 0); }
/**
* Deletes this object if it has no references or soft references.
* Deletes this object if it has no references.
* Available for non-cached SharedObjects only. Ownership of cached objects
* is with the UnifiedCache, which is soley responsible for eviction and deletion.
*/
void deleteIfZeroRefCount() const;
/**
* @internal For UnifedCache use only to register this object with itself.
* Must be called before this object is exposed to multiple threads.
*/
void registerWithCache(const UnifiedCacheBase *ptr) const {
cachePtr = ptr;
}
/**
* Returns a writable version of ptr.
@ -219,15 +161,21 @@ public:
}
private:
mutable u_atomic_int32_t totalRefCount;
// Any thread modifying softRefCount must hold the global cache mutex
/**
* The number of references from the UnifiedCache, which is
* the number of times that the sharedObject is stored as a hash table value.
* For use by UnifiedCache implementation code only.
* All access is synchronized by UnifiedCache's gCacheMutex
*/
mutable int32_t softRefCount;
friend class UnifiedCache;
/**
* Reference count, excluding references from within the UnifiedCache implementation.
*/
mutable u_atomic_int32_t hardRefCount;
mutable const UnifiedCacheBase *cachePtr;
void addRef(UBool withCacheLock) const;
void removeRef(UBool withCacheLock) const;
};

View file

@ -90,7 +90,6 @@ struct UStringPrepProfile{
UTrie sprepTrie;
const uint16_t* mappingData;
UDataMemory* sprepData;
const UBiDiProps *bdp; /* used only if checkBiDi is set */
int32_t refCount;
UBool isDataLoaded;
UBool doNFKC;

View file

@ -152,9 +152,6 @@ ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode)
/* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
uprv_memset(pBiDi, 0, sizeof(UBiDi));
/* get BiDi properties */
pBiDi->bdp=ubidi_getSingleton();
/* allocate memory for arrays as requested */
if(maxLength>0) {
if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
@ -925,7 +922,7 @@ bracketProcessChar(BracketData *bd, int32_t position) {
else
match=0;
if(match!=c && /* has a matching char */
ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
/* special case: process synonyms
create an opening entry for each synonym */
if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
@ -3033,7 +3030,7 @@ ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
if( pBiDi->fnClassCallback == NULL ||
(dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
{
dir = ubidi_getClass(pBiDi->bdp, c);
dir = ubidi_getClass(c);
}
if(dir >= U_CHAR_DIRECTION_COUNT) {
dir = (UCharDirection)ON;

View file

@ -44,13 +44,6 @@ struct UBiDiProps {
#define INCLUDED_FROM_UBIDI_PROPS_C
#include "ubidi_props_data.h"
/* UBiDiProps singleton ----------------------------------------------------- */
U_CFUNC const UBiDiProps *
ubidi_getSingleton() {
return &ubidi_props_singleton;
}
/* set of property starts for UnicodeSet ------------------------------------ */
static UBool U_CALLCONV
@ -64,7 +57,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32
}
U_CFUNC void
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) {
ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
int32_t i, length;
UChar32 c, start, limit;
@ -76,19 +69,19 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
}
/* add the start code point of each same-value range of the trie */
utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa);
utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
/* add the code points from the bidi mirroring table */
length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
for(i=0; i<length; ++i) {
c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]);
c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]);
sa->addRange(sa->set, c, c+1);
}
/* add the code points from the Joining_Group array where the value changes */
start=bdp->indexes[UBIDI_IX_JG_START];
limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
jgArray=bdp->jgArray;
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
jgArray=ubidi_props_singleton.jgArray;
for(;;) {
prev=0;
while(start<limit) {
@ -103,11 +96,11 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
/* add the limit code point if the last value was not 0 (it is now start==limit) */
sa->add(sa->set, limit);
}
if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) {
if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) {
/* switch to the second Joining_Group range */
start=bdp->indexes[UBIDI_IX_JG_START2];
limit=bdp->indexes[UBIDI_IX_JG_LIMIT2];
jgArray=bdp->jgArray2;
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
jgArray=ubidi_props_singleton.jgArray2;
} else {
break;
}
@ -121,14 +114,8 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
/* property access functions ------------------------------------------------ */
U_CFUNC int32_t
ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
int32_t max;
if(bdp==NULL) {
return -1;
}
max=bdp->indexes[UBIDI_MAX_VALUES_INDEX];
ubidi_getMaxValue(UProperty which) {
int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX];
switch(which) {
case UCHAR_BIDI_CLASS:
return (max&UBIDI_CLASS_MASK);
@ -144,19 +131,19 @@ ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
}
U_CAPI UCharDirection
ubidi_getClass(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_getClass(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UCharDirection)UBIDI_GET_CLASS(props);
}
U_CFUNC UBool
ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_isMirrored(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
}
static UChar32
getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
getMirror(UChar32 c, uint16_t props) {
int32_t delta=UBIDI_GET_MIRROR_DELTA(props);
if(delta!=UBIDI_ESC_MIRROR_DELTA) {
return c+delta;
@ -167,8 +154,8 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
int32_t i, length;
UChar32 c2;
mirrors=bdp->mirrors;
length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
mirrors=ubidi_props_singleton.mirrors;
length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
/* linear search */
for(i=0; i<length; ++i) {
@ -188,59 +175,59 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
}
U_CFUNC UChar32
ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
return getMirror(bdp, c, props);
ubidi_getMirror(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return getMirror(c, props);
}
U_CFUNC UBool
ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_isBidiControl(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
}
U_CFUNC UBool
ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_isJoinControl(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
}
U_CFUNC UJoiningType
ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_getJoiningType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
}
U_CFUNC UJoiningGroup
ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) {
ubidi_getJoiningGroup(UChar32 c) {
UChar32 start, limit;
start=bdp->indexes[UBIDI_IX_JG_START];
limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
if(start<=c && c<limit) {
return (UJoiningGroup)bdp->jgArray[c-start];
return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start];
}
start=bdp->indexes[UBIDI_IX_JG_START2];
limit=bdp->indexes[UBIDI_IX_JG_LIMIT2];
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
if(start<=c && c<limit) {
return (UJoiningGroup)bdp->jgArray2[c-start];
return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start];
}
return U_JG_NO_JOINING_GROUP;
}
U_CFUNC UBidiPairedBracketType
ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_getPairedBracketType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT);
}
U_CFUNC UChar32
ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) {
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
ubidi_getPairedBracket(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
if((props&UBIDI_BPT_MASK)==0) {
return c;
} else {
return getMirror(bdp, c, props);
return getMirror(c, props);
}
}
@ -248,20 +235,20 @@ ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) {
U_CFUNC UCharDirection
u_charDirection(UChar32 c) {
return ubidi_getClass(&ubidi_props_singleton, c);
return ubidi_getClass(c);
}
U_CFUNC UBool
u_isMirrored(UChar32 c) {
return ubidi_isMirrored(&ubidi_props_singleton, c);
return ubidi_isMirrored(c);
}
U_CFUNC UChar32
u_charMirror(UChar32 c) {
return ubidi_getMirror(&ubidi_props_singleton, c);
return ubidi_getMirror(c);
}
U_STABLE UChar32 U_EXPORT2
u_getBidiPairedBracket(UChar32 c) {
return ubidi_getPairedBracket(&ubidi_props_singleton, c);
return ubidi_getPairedBracket(c);
}

View file

@ -31,46 +31,40 @@ U_CDECL_BEGIN
/* library API -------------------------------------------------------------- */
struct UBiDiProps;
typedef struct UBiDiProps UBiDiProps;
U_CFUNC const UBiDiProps *
ubidi_getSingleton(void);
U_CFUNC void
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode);
ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
/* property access functions */
U_CFUNC int32_t
ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which);
ubidi_getMaxValue(UProperty which);
U_CAPI UCharDirection
ubidi_getClass(const UBiDiProps *bdp, UChar32 c);
ubidi_getClass(UChar32 c);
U_CFUNC UBool
ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c);
ubidi_isMirrored(UChar32 c);
U_CFUNC UChar32
ubidi_getMirror(const UBiDiProps *bdp, UChar32 c);
ubidi_getMirror(UChar32 c);
U_CFUNC UBool
ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c);
ubidi_isBidiControl(UChar32 c);
U_CFUNC UBool
ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c);
ubidi_isJoinControl(UChar32 c);
U_CFUNC UJoiningType
ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c);
ubidi_getJoiningType(UChar32 c);
U_CFUNC UJoiningGroup
ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c);
ubidi_getJoiningGroup(UChar32 c);
U_CFUNC UBidiPairedBracketType
ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c);
ubidi_getPairedBracketType(UChar32 c);
U_CFUNC UChar32
ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c);
ubidi_getPairedBracket(UChar32 c);
/* file definitions --------------------------------------------------------- */

View file

@ -254,8 +254,6 @@ struct UBiDi {
*/
const UBiDi * pParaBiDi;
const UBiDiProps *bdp;
/* alias pointer to the current text */
const UChar *text;

View file

@ -77,9 +77,12 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
/* data access primitives --------------------------------------------------- */
#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
U_CFUNC const UTrie2 * U_EXPORT2
ucase_getTrie() {
return &ucase_props_singleton.trie;
}
#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
/* number of bits in an 8-bit integer value */
static const uint8_t flagsOffset[256]={
@ -128,8 +131,8 @@ static const uint8_t flagsOffset[256]={
U_CAPI UChar32 U_EXPORT2
ucase_tolower(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
c+=UCASE_GET_DELTA(props);
}
} else {
@ -145,7 +148,7 @@ ucase_tolower(UChar32 c) {
U_CAPI UChar32 U_EXPORT2
ucase_toupper(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
c+=UCASE_GET_DELTA(props);
}
@ -162,7 +165,7 @@ ucase_toupper(UChar32 c) {
U_CAPI UChar32 U_EXPORT2
ucase_totitle(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
c+=UCASE_GET_DELTA(props);
}
@ -223,7 +226,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
}
props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
/* add the one simple case mapping, no matter what type it is */
int32_t delta=UCASE_GET_DELTA(props);
@ -419,6 +422,138 @@ FullCaseFoldingIterator::next(UnicodeString &full) {
return c;
}
namespace LatinCase {
const int8_t TO_LOWER_NORMAL[LIMIT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
};
const int8_t TO_LOWER_TR_LT[LIMIT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
};
const int8_t TO_UPPER_NORMAL[LIMIT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
-1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
};
const int8_t TO_UPPER_TR[LIMIT] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
-1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
};
} // namespace LatinCase
U_NAMESPACE_END
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
@ -439,7 +574,7 @@ ucase_getTypeOrIgnorable(UChar32 c) {
static inline int32_t
getDotType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
return props&UCASE_DOT_MASK;
} else {
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
@ -878,8 +1013,8 @@ ucase_toFullLower(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
result=c+UCASE_GET_DELTA(props);
}
} else {
@ -1024,7 +1159,7 @@ toUpperOrTitle(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
result=c+UCASE_GET_DELTA(props);
}
@ -1169,8 +1304,8 @@ ucase_toFullTitle(UChar32 c,
U_CAPI UChar32 U_EXPORT2
ucase_fold(UChar32 c, uint32_t options) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
c+=UCASE_GET_DELTA(props);
}
} else {
@ -1234,8 +1369,8 @@ ucase_toFullFolding(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!PROPS_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
result=c+UCASE_GET_DELTA(props);
}
} else {

View file

@ -26,6 +26,7 @@
#include "putilimp.h"
#include "uset_imp.h"
#include "udataswp.h"
#include "utrie2.h"
#ifdef __cplusplus
U_NAMESPACE_BEGIN
@ -148,6 +149,33 @@ private:
int32_t rowCpIndex;
};
/**
* Fast case mapping data for ASCII/Latin.
* Linear arrays of delta bytes: 0=no mapping; EXC=exception.
* Deltas must not cross the ASCII boundary, or else they cannot be easily used
* in simple UTF-8 code.
*/
namespace LatinCase {
/** Case mapping/folding data for code points up to U+017F. */
constexpr UChar LIMIT = 0x180;
/** U+017F case-folds and uppercases crossing the ASCII boundary. */
constexpr UChar LONG_S = 0x17f;
/** Exception: Complex mapping, or too-large delta. */
constexpr int8_t EXC = -0x80;
/** Deltas for lowercasing for most locales, and default case folding. */
extern const int8_t TO_LOWER_NORMAL[LIMIT];
/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
extern const int8_t TO_LOWER_TR_LT[LIMIT];
/** Deltas for uppercasing for most locales. */
extern const int8_t TO_UPPER_NORMAL[LIMIT];
/** Deltas for uppercasing for tr/az. */
extern const int8_t TO_UPPER_TR[LIMIT];
} // namespace LatinCase
U_NAMESPACE_END
#endif
@ -308,6 +336,9 @@ enum {
/* definitions for 16-bit case properties word ------------------------------ */
U_CFUNC const UTrie2 * U_EXPORT2
ucase_getTrie();
/* 2-bit constants for types of cased characters */
#define UCASE_TYPE_MASK 3
enum {
@ -320,10 +351,14 @@ enum {
#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)
#define UCASE_IGNORABLE 4
#define UCASE_SENSITIVE 8
#define UCASE_EXCEPTION 0x10
#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
#define UCASE_DOT_MASK 0x60
enum {
UCASE_NO_DOT=0, /* normal characters with cc=0 */

View file

@ -165,9 +165,7 @@ appendResult(int32_t cpLength, int32_t result, const UChar *s,
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
} // namespace
static UChar32 U_CALLCONV
UChar32 U_CALLCONV
utf8_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UChar32 c;
@ -199,36 +197,227 @@ utf8_caseContextIterator(void *context, int8_t dir) {
return U_SENTINEL;
}
/*
* Case-maps [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account.
/**
* caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
* caseLocale < 0: Case-folds [srcStart..srcLimit[.
*/
static void
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
const uint8_t *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex=srcStart;
while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
void toLower(int32_t caseLocale, uint32_t options,
const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
const int8_t *latinToLower;
if (caseLocale == UCASE_LOC_ROOT ||
(caseLocale >= 0 ?
!(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
(options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
latinToLower = LatinCase::TO_LOWER_NORMAL;
} else {
latinToLower = LatinCase::TO_LOWER_TR_LT;
}
const UTrie2 *trie = ucase_getTrie();
int32_t prev = srcStart;
int32_t srcIndex = srcStart;
for (;;) {
// fast path for simple cases
int32_t cpStart;
csc->cpStart=cpStart=srcIndex;
UChar32 c;
U8_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex;
if(c<0) {
// Malformed UTF-8.
ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
for (;;) {
if (U_FAILURE(errorCode) || srcIndex >= srcLimit) {
c = U_SENTINEL;
break;
}
uint8_t lead = src[srcIndex++];
if (lead <= 0x7f) {
int8_t d = latinToLower[lead];
if (d == LatinCase::EXC) {
cpStart = srcIndex - 1;
c = lead;
break;
}
if (d == 0) { continue; }
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
sink, options, edits, errorCode);
char ascii = (char)(lead + d);
sink.Append(&ascii, 1);
if (edits != nullptr) {
edits->addReplace(1, 1);
}
prev = srcIndex;
continue;
} else if (lead < 0xe3) {
uint8_t t;
if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit &&
(t = src[srcIndex] - 0x80) <= 0x3f) {
// U+0080..U+017F
++srcIndex;
c = ((lead - 0xc0) << 6) | t;
int8_t d = latinToLower[c];
if (d == LatinCase::EXC) {
cpStart = srcIndex - 2;
break;
}
if (d == 0) { continue; }
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
sink, options, edits, errorCode);
ByteSinkUtil::appendTwoBytes(c + d, sink);
if (edits != nullptr) {
edits->addReplace(2, 2);
}
prev = srcIndex;
continue;
}
} else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
(srcIndex + 2) <= srcLimit &&
U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
// most of CJK: no case mappings
srcIndex += 2;
continue;
}
cpStart = --srcIndex;
U8_NEXT(src, srcIndex, srcLimit, c);
if (c < 0) {
// ill-formed UTF-8
continue;
}
uint16_t props = UTRIE2_GET16(trie, c);
if (UCASE_HAS_EXCEPTION(props)) { break; }
int32_t delta;
if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
continue;
}
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
prev = srcIndex;
}
if (c < 0) {
break;
}
// slow path
const UChar *s;
if (caseLocale >= 0) {
csc->cpStart = cpStart;
csc->cpLimit = srcIndex;
c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale);
} else {
const UChar *s;
c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
c = ucase_toFullFolding(c, &s, options);
}
if (c >= 0) {
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
prev = srcIndex;
}
}
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
sink, options, edits, errorCode);
}
void toUpper(int32_t caseLocale, uint32_t options,
const uint8_t *src, UCaseContext *csc, int32_t srcLength,
icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
const int8_t *latinToUpper;
if (caseLocale == UCASE_LOC_TURKISH) {
latinToUpper = LatinCase::TO_UPPER_TR;
} else {
latinToUpper = LatinCase::TO_UPPER_NORMAL;
}
const UTrie2 *trie = ucase_getTrie();
int32_t prev = 0;
int32_t srcIndex = 0;
for (;;) {
// fast path for simple cases
int32_t cpStart;
UChar32 c;
for (;;) {
if (U_FAILURE(errorCode) || srcIndex >= srcLength) {
c = U_SENTINEL;
break;
}
uint8_t lead = src[srcIndex++];
if (lead <= 0x7f) {
int8_t d = latinToUpper[lead];
if (d == LatinCase::EXC) {
cpStart = srcIndex - 1;
c = lead;
break;
}
if (d == 0) { continue; }
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
sink, options, edits, errorCode);
char ascii = (char)(lead + d);
sink.Append(&ascii, 1);
if (edits != nullptr) {
edits->addReplace(1, 1);
}
prev = srcIndex;
continue;
} else if (lead < 0xe3) {
uint8_t t;
if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength &&
(t = src[srcIndex] - 0x80) <= 0x3f) {
// U+0080..U+017F
++srcIndex;
c = ((lead - 0xc0) << 6) | t;
int8_t d = latinToUpper[c];
if (d == LatinCase::EXC) {
cpStart = srcIndex - 2;
break;
}
if (d == 0) { continue; }
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
sink, options, edits, errorCode);
ByteSinkUtil::appendTwoBytes(c + d, sink);
if (edits != nullptr) {
edits->addReplace(2, 2);
}
prev = srcIndex;
continue;
}
} else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
(srcIndex + 2) <= srcLength &&
U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
// most of CJK: no case mappings
srcIndex += 2;
continue;
}
cpStart = --srcIndex;
U8_NEXT(src, srcIndex, srcLength, c);
if (c < 0) {
// ill-formed UTF-8
continue;
}
uint16_t props = UTRIE2_GET16(trie, c);
if (UCASE_HAS_EXCEPTION(props)) { break; }
int32_t delta;
if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
continue;
}
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
prev = srcIndex;
}
if (c < 0) {
break;
}
// slow path
csc->cpStart = cpStart;
csc->cpLimit = srcIndex;
const UChar *s;
c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale);
if (c >= 0) {
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
prev = srcIndex;
}
}
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
sink, options, edits, errorCode);
}
} // namespace
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC void U_CALLCONV
@ -335,10 +524,9 @@ ucasemap_internalUTF8ToTitle(
if(titleLimit<index) {
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
_caseMap(caseLocale, options, ucase_toFullLower,
src, &csc,
titleLimit, index,
sink, edits, errorCode);
toLower(caseLocale, options,
src, &csc, titleLimit, index,
sink, edits, errorCode);
if(U_FAILURE(errorCode)) {
return;
}
@ -538,8 +726,8 @@ ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREA
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
_caseMap(
caseLocale, options, ucase_toFullLower,
toLower(
caseLocale, options,
src, &csc, 0, srcLength,
sink, edits, errorCode);
}
@ -555,9 +743,9 @@ ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREA
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
_caseMap(
caseLocale, options, ucase_toFullUpper,
src, &csc, 0, srcLength,
toUpper(
caseLocale, options,
src, &csc, srcLength,
sink, edits, errorCode);
}
}
@ -567,22 +755,10 @@ ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_B
const uint8_t *src, int32_t srcLength,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex = 0;
while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
int32_t cpStart = srcIndex;
UChar32 c;
U8_NEXT(src, srcIndex, srcLength, c);
if(c<0) {
// Malformed UTF-8.
ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
sink, options, edits, errorCode);
} else {
const UChar *s;
c = ucase_toFullFolding(c, &s, options);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
}
}
toLower(
-1, options,
src, nullptr, 0, srcLength,
sink, edits, errorCode);
}
void

View file

@ -60,15 +60,6 @@ u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
int32_t *matchLen1, int32_t *matchLen2,
UErrorCode *pErrorCode);
/**
* Are the Unicode properties loaded?
* This must be used before internal functions are called that do
* not perform this check.
* Generate a debug assertion failure if data is not loaded.
*/
U_CFUNC UBool
uprv_haveProperties(UErrorCode *pErrorCode);
#ifdef __cplusplus
U_NAMESPACE_BEGIN

View file

@ -42,14 +42,6 @@
/* getting a uint32_t properties word from the data */
#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
U_CFUNC UBool
uprv_haveProperties(UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return FALSE;
}
return TRUE;
}
/* API functions ------------------------------------------------------------ */
/* Gets the Unicode character's general category.*/

View file

@ -77,7 +77,11 @@ typedef struct {
typedef struct {
uint32_t count;
uint32_t reserved;
PointerTOCEntry entry[2]; /* Actual size is from count. */
/**
* Variable-length array declared with length 1 to disable bounds checkers.
* The actual array length is in the count field.
*/
PointerTOCEntry entry[1];
} PointerTOC;

View file

@ -52,7 +52,11 @@ typedef struct {
typedef struct {
uint32_t count;
UDataOffsetTOCEntry entry[2]; /* Actual size of array is from count. */
/**
* Variable-length array declared with length 1 to disable bounds checkers.
* The actual array length is in the count field.
*/
UDataOffsetTOCEntry entry[1];
} UDataOffsetTOC;
/**

View file

@ -3512,14 +3512,14 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
case 'k':
if(myConverterData->version == 0) {
if(length == 1) {
if((UBool)args->converter->fromUnicodeStatus) {
if(args->converter->fromUnicodeStatus) {
/* in DBCS mode: switch to SBCS */
args->converter->fromUnicodeStatus = 0;
*p++ = UCNV_SI;
}
*p++ = subchar[0];
} else /* length == 2*/ {
if(!(UBool)args->converter->fromUnicodeStatus) {
if(!args->converter->fromUnicodeStatus) {
/* in SBCS mode: switch to DBCS */
args->converter->fromUnicodeStatus = 1;
*p++ = UCNV_SO;

View file

@ -60,11 +60,12 @@
* To avoid dependency on other code, this list is hard coded here.
* When an ignorable code point is found and is unmappable, the default callbacks
* will ignore them.
* For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
* For a list of the default ignorable code points, use this link:
* https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
*
* This list should be sync with the one in CharsetCallback.java
*/
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
(c == 0x00AD) || \
(c == 0x034F) || \
(c == 0x061C) || \
@ -74,26 +75,15 @@
(0x180B <= c && c <= 0x180E) || \
(0x200B <= c && c <= 0x200F) || \
(0x202A <= c && c <= 0x202E) || \
(c == 0x2060) || \
(0x2066 <= c && c <= 0x2069) || \
(0x2061 <= c && c <= 0x2064) || \
(0x206A <= c && c <= 0x206F) || \
(0x2060 <= c && c <= 0x206F) || \
(c == 0x3164) || \
(0x0FE00 <= c && c <= 0x0FE0F) || \
(c == 0x0FEFF) || \
(c == 0x0FFA0) || \
(0x01BCA0 <= c && c <= 0x01BCA3) || \
(0x01D173 <= c && c <= 0x01D17A) || \
(c == 0x0E0001) || \
(0x0E0020 <= c && c <= 0x0E007F) || \
(0x0E0100 <= c && c <= 0x0E01EF) || \
(c == 0x2065) || \
(0x0FFF0 <= c && c <= 0x0FFF8) || \
(c == 0x0E0000) || \
(0x0E0002 <= c && c <= 0x0E001F) || \
(0x0E0080 <= c && c <= 0x0E00FF) || \
(0x0E01F0 <= c && c <= 0x0E0FFF) \
)
(0xFE00 <= c && c <= 0xFE0F) || \
(c == 0xFEFF) || \
(c == 0xFFA0) || \
(0xFFF0 <= c && c <= 0xFFF8) || \
(0x1BCA0 <= c && c <= 0x1BCA3) || \
(0x1D173 <= c && c <= 0x1D17A) || \
(0xE0000 <= c && c <= 0xE0FFF))
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */

View file

@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
uint32_t ch, i;
/* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
if (args->converter->toULength > 0 && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
int32_t offsetNum = 0;
/* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
if (args->converter->toULength > 0 && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
uint32_t ch, i;
/* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
if (args->converter->toULength > 0 && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
int32_t offsetNum = 0;
/* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
if (args->converter->toULength > 0 && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;

View file

@ -76,7 +76,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
int32_t i, inBytes;
/* Restore size of current sequence */
if (cnv->toUnicodeStatus && myTarget < targetLimit)
if (cnv->toULength > 0 && myTarget < targetLimit)
{
inBytes = cnv->mode; /* restore # of bytes to consume */
i = cnv->toULength; /* restore # of bytes consumed */
@ -194,7 +194,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
int32_t i, inBytes;
/* Restore size of current sequence */
if (cnv->toUnicodeStatus && myTarget < targetLimit)
if (cnv->toULength > 0 && myTarget < targetLimit)
{
inBytes = cnv->mode; /* restore # of bytes to consume */
i = cnv->toULength; /* restore # of bytes consumed */
@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
/* get the converter state from the UTF-8 UConverter */
c=(UChar32)utf8->toUnicodeStatus;
if(c!=0) {
if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
c = 0;
}
count=(int32_t)(sourceLimit-source)+oldToULength;
@ -695,36 +696,20 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
// Use a single counter for source and target, counting the minimum of
// the source length and the target capacity.
// Let the standard converter handle edge cases.
const uint8_t *limit=sourceLimit;
if(count>targetCapacity) {
limit-=(count-targetCapacity);
count=targetCapacity;
}
// The conversion loop checks count>0 only once per 1/2/3-byte character.
// If the buffer ends with a truncated 2- or 3-byte sequence,
// The conversion loop checks count>0 only once per character.
// If the buffer ends with a truncated sequence,
// then we reduce the count to stop before that,
// and collect the remaining bytes after the conversion loop.
{
// Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
int32_t length=count-toULimit;
if(length>0) {
uint8_t b1=*(limit-1);
if(U8_IS_SINGLE(b1)) {
// common ASCII character
} else if(U8_IS_TRAIL(b1) && length>=2) {
uint8_t b2=*(limit-2);
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
// truncated 3-byte sequence
count-=2;
}
} else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--count;
}
}
}
// Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
int32_t length=count-toULimit;
U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
count=toULimit+length;
}
if(c!=0) {
@ -814,7 +799,7 @@ moreBytes:
}
/* copy the legal byte sequence to the target */
if(count>=toULength) {
{
int8_t i;
for(i=0; i<oldToULength; ++i) {
@ -825,14 +810,6 @@ moreBytes:
*target++=*source++;
}
count-=toULength;
} else {
// A supplementary character that does not fit into the target.
// Let the standard converter handle this.
source-=(toULength-oldToULength);
pToUArgs->source=(char *)source;
pFromUArgs->target=(char *)target;
*pErrorCode=U_USING_DEFAULT_WARNING;
return;
}
}
}
@ -856,8 +833,7 @@ moreBytes:
utf8->toULength=toULength;
utf8->mode=toULimit;
break;
} else if(!U8_IS_TRAIL(b=*source)) {
/* lead byte in trail byte position */
} else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) {
utf8->toULength=toULength;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
break;

View file

@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
/* get the converter state from the UTF-8 UConverter */
c=(UChar32)utf8->toUnicodeStatus;
if (utf8->toULength > 0) {
c=(UChar32)utf8->toUnicodeStatus;
} else {
c = 0;
}
if(c!=0 && source<sourceLimit) {
if(targetCapacity==0) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
@ -620,7 +624,7 @@ ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint8_t c;
if(pToUArgs->converter->toUnicodeStatus!=0) {
if(pToUArgs->converter->toULength > 0) {
/* no handling of partial UTF-8 characters here, fall back to pivoting */
*pErrorCode=U_USING_DEFAULT_WARNING;
return;

View file

@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
/* get the converter state from the UTF-8 UConverter */
c=(UChar32)utf8->toUnicodeStatus;
if(c!=0) {
if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
c = 0;
}
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
@ -5359,12 +5360,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
/* get the converter state from the UTF-8 UConverter */
c=(UChar32)utf8->toUnicodeStatus;
if(c!=0) {
if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
c = 0;
}
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.

View file

@ -17,6 +17,7 @@
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
@ -28,9 +29,12 @@
#include "uinvchar.h"
#include "uresimp.h"
#include "ulist.h"
#include "uresimp.h"
#include "ureslocs.h"
#include "ulocimp.h"
using namespace icu;
//#define UCURR_DEBUG_EQUIV 1
#ifdef UCURR_DEBUG_EQUIV
#include "stdio.h"
@ -104,6 +108,7 @@ static const char VAR_DELIM_STR[] = "_";
// Tag for localized display names (symbols) of currencies
static const char CURRENCIES[] = "Currencies";
static const char CURRENCIES_NARROW[] = "Currencies%narrow";
static const char CURRENCYPLURALS[] = "CurrencyPlurals";
static const UChar EUR_STR[] = {0x0045,0x0055,0x0052,0};
@ -698,7 +703,7 @@ ucurr_getName(const UChar* currency,
}
int32_t choice = (int32_t) nameStyle;
if (choice < 0 || choice > 1) {
if (choice < 0 || choice > 2) {
*ec = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
@ -731,15 +736,19 @@ ucurr_getName(const UChar* currency,
const UChar* s = NULL;
ec2 = U_ZERO_ERROR;
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2));
rb = ures_getByKey(rb, CURRENCIES, rb, &ec2);
// Fetch resource with multi-level resource inheritance fallback
rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2);
s = ures_getStringByIndex(rb, choice, len, &ec2);
ures_close(rb);
if (nameStyle == UCURR_NARROW_SYMBOL_NAME) {
CharString key;
key.append(CURRENCIES_NARROW, ec2);
key.append("/", ec2);
key.append(buf, ec2);
s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2);
} else {
ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2);
ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2);
s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2);
}
// If we've succeeded we're done. Otherwise, try to fallback.
// If that fails (because we are already at root) then exit.

View file

@ -298,15 +298,14 @@ public:
virtual int32_t next(int32_t n) = 0;
/**
* For RuleBasedBreakIterators, return the status tag from the
* break rule that determined the most recently
* returned break position.
* For RuleBasedBreakIterators, return the status tag from the break rule
* that determined the boundary at the current iteration position.
* <p>
* For break iterator types that do not support a rule status,
* a default value of 0 is returned.
* <p>
* @return the status from the break rule that determined the most recently
* returned break position.
* @return the status from the break rule that determined the boundary at
* the current iteration position.
* @see RuleBaseBreakIterator::getRuleStatus()
* @see UWordBreak
* @stable ICU 52
@ -315,7 +314,7 @@ public:
/**
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
* that determined the most recently returned break position.
* that determined the boundary at the current iteration position.
* <p>
* For break iterator types that do not support rule status,
* no values are returned.
@ -334,7 +333,7 @@ public:
* normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
* the boundary at the current iteration position.
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
* is the total number of status values that were available,
* not the reduced number that were actually returned.
@ -616,7 +615,7 @@ public:
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
private:
static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);

View file

@ -154,7 +154,6 @@ private:
const char *s;
};
// don't use #ifndef U_HIDE_INTERNAL_API with private class members or virtual methods.
virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
Node *nextNode) const;

View file

@ -18,8 +18,6 @@
U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API
class BreakIterator;
class ByteSink;
class Edits;
@ -27,7 +25,7 @@ class Edits;
/**
* Low-level C++ case mapping functions.
*
* @draft ICU 59
* @stable ICU 59
*/
class U_COMMON_API CaseMap U_FINAL : public UMemory {
public:
@ -59,7 +57,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strToLower
* @draft ICU 59
* @stable ICU 59
*/
static int32_t toLower(
const char *locale, uint32_t options,
@ -95,7 +93,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strToUpper
* @draft ICU 59
* @stable ICU 59
*/
static int32_t toUpper(
const char *locale, uint32_t options,
@ -146,7 +144,7 @@ public:
*
* @see u_strToTitle
* @see ucasemap_toTitle
* @draft ICU 59
* @stable ICU 59
*/
static int32_t toTitle(
const char *locale, uint32_t options, BreakIterator *iter,
@ -188,7 +186,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strFoldCase
* @draft ICU 59
* @stable ICU 59
*/
static int32_t fold(
uint32_t options,
@ -196,6 +194,7 @@ public:
char16_t *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
#ifndef U_HIDE_DRAFT_API
/**
* Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
@ -318,6 +317,7 @@ public:
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
#endif // U_HIDE_DRAFT_API
/**
* Lowercases a UTF-8 string and optionally records edits.
@ -347,7 +347,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see ucasemap_utf8ToLower
* @draft ICU 59
* @stable ICU 59
*/
static int32_t utf8ToLower(
const char *locale, uint32_t options,
@ -383,7 +383,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see ucasemap_utf8ToUpper
* @draft ICU 59
* @stable ICU 59
*/
static int32_t utf8ToUpper(
const char *locale, uint32_t options,
@ -433,7 +433,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see ucasemap_utf8ToTitle
* @draft ICU 59
* @stable ICU 59
*/
static int32_t utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
@ -475,7 +475,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see ucasemap_utf8FoldCase
* @draft ICU 59
* @stable ICU 59
*/
static int32_t utf8Fold(
uint32_t options,
@ -489,8 +489,6 @@ private:
CaseMap &operator=(const CaseMap &other) = delete;
};
#endif // U_HIDE_DRAFT_API
U_NAMESPACE_END
#endif // __CASEMAP_H__

View file

@ -30,25 +30,23 @@ U_NAMESPACE_BEGIN
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
#endif
// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it
// is now used in place of UChar* in several stable C++ methods
/**
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
* @draft ICU 59
* @stable ICU 59
*/
class U_COMMON_API Char16Ptr U_FINAL {
public:
/**
* Copies the pointer.
* @param p pointer
* @draft ICU 59
* @stable ICU 59
*/
inline Char16Ptr(char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
* @draft ICU 59
* @stable ICU 59
*/
inline Char16Ptr(uint16_t *p);
#endif
@ -57,32 +55,32 @@ public:
* Converts the pointer to char16_t *.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* @param p pointer to be converted
* @draft ICU 59
* @stable ICU 59
*/
inline Char16Ptr(wchar_t *p);
#endif
/**
* nullptr constructor.
* @param p nullptr
* @draft ICU 59
* @stable ICU 59
*/
inline Char16Ptr(std::nullptr_t p);
/**
* Destructor.
* @draft ICU 59
* @stable ICU 59
*/
inline ~Char16Ptr();
/**
* Pointer access.
* @return the wrapped pointer
* @draft ICU 59
* @stable ICU 59
*/
inline char16_t *get() const;
/**
* char16_t pointer access via type conversion (e.g., static_cast).
* @return the wrapped pointer
* @draft ICU 59
* @stable ICU 59
*/
inline operator char16_t *() const { return get(); }
@ -137,25 +135,23 @@ char16_t *Char16Ptr::get() const { return u_.cp; }
#endif
// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it is
// now used in place of const UChar* in several stable C++ methods
/**
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
* @draft ICU 59
* @stable ICU 59
*/
class U_COMMON_API ConstChar16Ptr U_FINAL {
public:
/**
* Copies the pointer.
* @param p pointer
* @draft ICU 59
* @stable ICU 59
*/
inline ConstChar16Ptr(const char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
* @draft ICU 59
* @stable ICU 59
*/
inline ConstChar16Ptr(const uint16_t *p);
#endif
@ -164,33 +160,33 @@ public:
* Converts the pointer to char16_t *.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* @param p pointer to be converted
* @draft ICU 59
* @stable ICU 59
*/
inline ConstChar16Ptr(const wchar_t *p);
#endif
/**
* nullptr constructor.
* @param p nullptr
* @draft ICU 59
* @stable ICU 59
*/
inline ConstChar16Ptr(const std::nullptr_t p);
/**
* Destructor.
* @draft ICU 59
* @stable ICU 59
*/
inline ~ConstChar16Ptr();
/**
* Pointer access.
* @return the wrapped pointer
* @draft ICU 59
* @stable ICU 59
*/
inline const char16_t *get() const;
/**
* char16_t pointer access via type conversion (e.g., static_cast).
* @return the wrapped pointer
* @draft ICU 59
* @stable ICU 59
*/
inline operator const char16_t *() const { return get(); }
@ -250,7 +246,7 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as const UChar *
* @draft ICU 59
* @stable ICU 59
*/
inline const UChar *toUCharPtr(const char16_t *p) {
#ifdef U_ALIASING_BARRIER
@ -264,7 +260,7 @@ inline const UChar *toUCharPtr(const char16_t *p) {
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as UChar *
* @draft ICU 59
* @stable ICU 59
*/
inline UChar *toUCharPtr(char16_t *p) {
#ifdef U_ALIASING_BARRIER
@ -278,7 +274,7 @@ inline UChar *toUCharPtr(char16_t *p) {
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as const OldUChar *
* @draft ICU 59
* @stable ICU 59
*/
inline const OldUChar *toOldUCharPtr(const char16_t *p) {
#ifdef U_ALIASING_BARRIER
@ -292,7 +288,7 @@ inline const OldUChar *toOldUCharPtr(const char16_t *p) {
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as OldUChar *
* @draft ICU 59
* @stable ICU 59
*/
inline OldUChar *toOldUCharPtr(char16_t *p) {
#ifdef U_ALIASING_BARRIER

View file

@ -569,7 +569,7 @@ public:
* Returns the numeric index in the underlying text-storage
* object of the character the iterator currently refers to
* (i.e., the character returned by current()).
* @return the numberic index in the text-storage object of
* @return the numeric index in the text-storage object of
* the character the iterator currently refers to
* @stable ICU 2.0
*/

View file

@ -69,7 +69,7 @@ public:
* <pre>
* . Base* polymorphic_pointer = createPolymorphicObject();
* . if (polymorphic_pointer->getDynamicClassID() ==
* . erived::getStaticClassID()) ...
* . derived::getStaticClassID()) ...
* </pre>
* @return The class ID for all objects of this class.
* @stable ICU 4.0

View file

@ -17,8 +17,6 @@
U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
@ -27,13 +25,13 @@ U_NAMESPACE_BEGIN
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
*
* @draft ICU 59
* @stable ICU 59
*/
class U_COMMON_API Edits U_FINAL : public UMemory {
public:
/**
* Constructs an empty object.
* @draft ICU 59
* @stable ICU 59
*/
Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
@ -64,7 +62,7 @@ public:
/**
* Destructor.
* @draft ICU 59
* @stable ICU 59
*/
~Edits();
@ -88,20 +86,20 @@ public:
/**
* Resets the data but may not release memory.
* @draft ICU 59
* @stable ICU 59
*/
void reset() U_NOEXCEPT;
/**
* Adds a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
* @stable ICU 59
*/
void addUnchanged(int32_t unchangedLength);
/**
* Adds a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @draft ICU 59
* @stable ICU 59
*/
void addReplace(int32_t oldLength, int32_t newLength);
/**
@ -112,33 +110,35 @@ public:
* and an error occurred while recording edits.
* Otherwise unchanged.
* @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59
* @stable ICU 59
*/
UBool copyErrorTo(UErrorCode &outErrorCode);
/**
* How much longer is the new text compared with the old text?
* @return new length minus old length
* @draft ICU 59
* @stable ICU 59
*/
int32_t lengthDelta() const { return delta; }
/**
* @return TRUE if there are any change edits
* @draft ICU 59
* @stable ICU 59
*/
UBool hasChanges() const { return numChanges != 0; }
#ifndef U_HIDE_DRAFT_API
/**
* @return the number of change edits
* @draft ICU 60
*/
int32_t numberOfChanges() const { return numChanges; }
#endif // U_HIDE_DRAFT_API
/**
* Access to the list of edits.
* @see getCoarseIterator
* @see getFineIterator
* @draft ICU 59
* @stable ICU 59
*/
struct U_COMMON_API Iterator U_FINAL : public UMemory {
/**
@ -152,12 +152,12 @@ public:
srcIndex(0), replIndex(0), destIndex(0) {}
/**
* Copy constructor.
* @draft ICU 59
* @stable ICU 59
*/
Iterator(const Iterator &other) = default;
/**
* Assignment operator.
* @draft ICU 59
* @stable ICU 59
*/
Iterator &operator=(const Iterator &other) = default;
@ -167,7 +167,7 @@ public:
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if there is another edit
* @draft ICU 59
* @stable ICU 59
*/
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
@ -188,12 +188,13 @@ public:
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the source index was found
* @draft ICU 59
* @stable ICU 59
*/
UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, TRUE, errorCode) == 0;
}
#ifndef U_HIDE_DRAFT_API
/**
* Finds the edit that contains the destination index.
* The destination index may be found in a non-change
@ -264,39 +265,40 @@ public:
* @draft ICU 60
*/
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
#endif // U_HIDE_DRAFT_API
/**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
* FALSE if oldLength units remain unchanged.
* @draft ICU 59
* @stable ICU 59
*/
UBool hasChange() const { return changed; }
/**
* @return the number of units in the original string which are replaced or remain unchanged.
* @draft ICU 59
* @stable ICU 59
*/
int32_t oldLength() const { return oldLength_; }
/**
* @return the number of units in the modified string, if hasChange() is TRUE.
* Same as oldLength if hasChange() is FALSE.
* @draft ICU 59
* @stable ICU 59
*/
int32_t newLength() const { return newLength_; }
/**
* @return the current index into the source string
* @draft ICU 59
* @stable ICU 59
*/
int32_t sourceIndex() const { return srcIndex; }
/**
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* @draft ICU 59
* @stable ICU 59
*/
int32_t replacementIndex() const { return replIndex; }
/**
* @return the current index into the full destination string
* @draft ICU 59
* @stable ICU 59
*/
int32_t destinationIndex() const { return destIndex; }
@ -331,7 +333,7 @@ public:
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
* @stable ICU 59
*/
Iterator getCoarseChangesIterator() const {
return Iterator(array, length, TRUE, TRUE);
@ -340,7 +342,7 @@ public:
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* @return an Iterator that merges adjacent changes.
* @draft ICU 59
* @stable ICU 59
*/
Iterator getCoarseIterator() const {
return Iterator(array, length, FALSE, TRUE);
@ -350,7 +352,7 @@ public:
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
* @stable ICU 59
*/
Iterator getFineChangesIterator() const {
return Iterator(array, length, TRUE, FALSE);
@ -359,12 +361,13 @@ public:
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @draft ICU 59
* @stable ICU 59
*/
Iterator getFineIterator() const {
return Iterator(array, length, FALSE, FALSE);
}
#ifndef U_HIDE_DRAFT_API
/**
* Merges the two input Edits and appends the result to this object.
*
@ -393,6 +396,7 @@ public:
* @draft ICU 60
*/
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
#endif // U_HIDE_DRAFT_API
private:
void releaseArray() U_NOEXCEPT;
@ -415,8 +419,6 @@ private:
uint16_t stackArray[STACK_CAPACITY];
};
#endif // U_HIDE_DRAFT_API
U_NAMESPACE_END
#endif // __EDITS_H__

View file

@ -64,9 +64,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* @deprecated ICU 60 use createEmptyInstance instead
* @see createEmptyInstance()
*/
static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) {
return createEmptyInstance(status);
}
static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
@ -105,7 +103,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/
virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
#ifndef U_HIDE_DEPRECATED_API
/**
* This function has been deprecated in favor of wrapIteratorWithFilter()
* The behavior is identical.
@ -116,7 +113,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* @see wrapBreakIteratorWithFilter()
*/
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/**

View file

@ -353,7 +353,7 @@ public:
* the default locale ID of the runtime environment.
*
* @param newLocale Locale to set to. If NULL, set to the value obtained
* from the runtime environement.
* from the runtime environment.
* @param success The error code.
* @system
* @stable ICU 2.0
@ -629,7 +629,7 @@ public:
/**
* Fills in "name" with the name of this locale in a format suitable for user display
* in the locale specfied by "displayLocale". This function uses getDisplayLanguage(),
* in the locale specified by "displayLocale". This function uses getDisplayLanguage(),
* getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
* name in the format "language (country[,variant])". For example, if displayLocale is
* fr_FR, then en_US's display name would be "Anglais (&Eacute;tats-Unis)", and no_NO_NY's

View file

@ -58,9 +58,9 @@ enum { U_PARSE_CONTEXT_LEN = 16 };
typedef struct UParseError {
/**
* The line on which the error occured. If the parser uses this
* The line on which the error occurred. If the parser uses this
* field, it sets it to the line number of the source text line on
* which the error appears, which will be be a value >= 1. If the
* which the error appears, which will be a value >= 1. If the
* parse does not support line numbers, the value will be <= 0.
* @stable ICU 2.0
*/

View file

@ -482,9 +482,9 @@
/* Otherwise use the predefined value. */
#elif !defined(__cplusplus)
# define U_CPLUSPLUS_VERSION 0
#elif __cplusplus >= 201402L
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
# define U_CPLUSPLUS_VERSION 14
#elif __cplusplus >= 201103L
#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
# define U_CPLUSPLUS_VERSION 11
#else
// C++98 or C++03
@ -631,7 +631,7 @@ namespace std {
*/
#ifdef U_CHARSET_IS_UTF8
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED
# define U_CHARSET_IS_UTF8 1
#else
# define U_CHARSET_IS_UTF8 0
@ -749,8 +749,10 @@ namespace std {
#else
/*
* Notes:
* Visual Studio 10 (_MSC_VER>=1600) defines char16_t but
* does not support u"abc" string literals.
* Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
* and does not support u"abc" string literals.
* Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
* both char16_t and u"abc" string literals.
* gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
* does not support u"abc" string literals.
* C++11 and C11 require support for UTF-16 literals

View file

@ -38,7 +38,7 @@
/**
* Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these
* library. For each platform which this code is ported to, these
* functions may have to be re-implemented.
*/
@ -53,7 +53,7 @@
* The data directory is determined as follows:
* If u_setDataDirectory() has been called, that is it, otherwise
* if the ICU_DATA environment variable is set, use that, otherwise
* If a data directory was specifed at ICU build time
* If a data directory was specified at ICU build time
* <code>
* \code
* #define ICU_DATA_DIR "path"
@ -93,7 +93,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
#ifndef U_HIDE_INTERNAL_API
/**
* Return the time zone files override directory, or an empty string if
* no directory was specified. Certain time zone resources will be preferrentially
* no directory was specified. Certain time zone resources will be preferentially
* loaded from individual files in this directory.
*
* @return the time zone data override directory.

View file

@ -29,7 +29,6 @@
#include "unicode/udata.h"
#include "unicode/parseerr.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
U_NAMESPACE_BEGIN
@ -58,34 +57,18 @@ private:
* The UText through which this BreakIterator accesses the text
* @internal
*/
UText *fText;
UText fText;
#ifndef U_HIDE_INTERNAL_API
public:
#endif /* U_HIDE_INTERNAL_API */
/**
* A character iterator that refers to the same text as the UText, above.
* Only included for compatibility with old API, which was based on CharacterIterators.
* Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
*/
CharacterIterator *fCharIter;
/**
* When the input text is provided by a UnicodeString, this will point to
* a characterIterator that wraps that data. Needed only for the
* implementation of getText(), a backwards compatibility issue.
*/
StringCharacterIterator *fSCharIter;
/**
* When the input text is provided by a UText, this
* dummy CharacterIterator over an empty string will
* be returned from getText()
*/
UCharCharacterIterator *fDCharIter;
/**
* The rule data for this BreakIterator instance
* The rule data for this BreakIterator instance.
* Not for general use; Public only for testing purposes.
* @internal
*/
RBBIDataWrapper *fData;
private:
/**
* The iteration state - current position, rule status for the current position,
@ -105,24 +88,11 @@ private:
*/
int32_t fRuleStatusIndex;
/**
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
*/
UBool fDone;
/**
* Cache of previously determined boundary positions.
*/
public: // TODO: debug, return to private.
class BreakCache;
BreakCache *fBreakCache;
private:
/**
* Counter for the number of characters encountered with the "dictionary"
* flag set.
* @internal
*/
uint32_t fDictionaryCharCount;
/**
* Cache of boundary positions within a region of text that has been
@ -150,11 +120,30 @@ private:
UnhandledEngine *fUnhandledBreakEngine;
/**
*
* The type of the break iterator, or -1 if it has not been set.
* Counter for the number of characters encountered with the "dictionary"
* flag set.
* @internal
*/
int32_t fBreakType;
uint32_t fDictionaryCharCount;
/**
* A character iterator that refers to the same text as the UText, above.
* Only included for compatibility with old API, which was based on CharacterIterators.
* Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
*/
CharacterIterator *fCharIter;
/**
* When the input text is provided by a UnicodeString, this will point to
* a characterIterator that wraps that data. Needed only for the
* implementation of getText(), a backwards compatibility issue.
*/
StringCharacterIterator fSCharIter;
/**
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
*/
UBool fDone;
//=======================================================================
// constructors
@ -206,17 +195,17 @@ public:
UErrorCode &status);
/**
* Contruct a RuleBasedBreakIterator from a set of precompiled binary rules.
* Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
* Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
* Construction of a break iterator in this way is substantially faster than
* constuction from source rules.
* construction from source rules.
*
* Ownership of the storage containing the compiled rules remains with the
* caller of this function. The compiled rules must not be modified or
* deleted during the life of the break iterator.
*
* The compiled rules are not compatible across different major versions of ICU.
* The compiled rules are comaptible only between machines with the same
* The compiled rules are compatible only between machines with the same
* byte ordering (little or big endian) and the same base character set family
* (ASCII or EBCDIC).
*
@ -285,7 +274,7 @@ public:
* behavior, and iterating over the same text, as this one.
* Differs from the copy constructor in that it is polymorphic, and
* will correctly clone (copy) a derived class.
* clone() is thread safe. Multiple threads may simultaeneously
* clone() is thread safe. Multiple threads may simultaneously
* clone the same source break iterator.
* @return a newly-constructed RuleBasedBreakIterator
* @stable ICU 2.0
@ -450,7 +439,7 @@ public:
virtual int32_t preceding(int32_t offset);
/**
* Returns true if the specfied position is a boundary position. As a side
* Returns true if the specified position is a boundary position. As a side
* effect, leaves the iterator pointing to the first boundary position at
* or after "offset".
* @param offset the offset to check.
@ -471,8 +460,8 @@ public:
/**
* Return the status tag from the break rule that determined the most recently
* returned break position. For break rules that do not specify a
* Return the status tag from the break rule that determined the boundary at
* the current iteration position. For break rules that do not specify a
* status, a default value of 0 is returned. If more than one break rule
* would cause a boundary to be located at some position in the text,
* the numerically largest of the applicable status values is returned.
@ -489,16 +478,14 @@ public:
* position from <code>next()</code>, <code>previous()</code>, or
* any other break iterator functions that returns a boundary position.
* <p>
* Note that <code>getRuleStatus()</code> returns the value corresponding to
* <code>current()</code> index even after <code>next()</code> has returned DONE.
* <p>
* When creating custom break rules, one is free to define whatever
* status values may be convenient for the application.
* <p>
* Note: this function is not thread safe. It should not have been
* declared const, and the const remains only for compatibility
* reasons. (The function is logically const, but not bit-wise const).
* TODO: check this. Probably thread safe now.
* <p>
* @return the status from the break rule that determined the most recently
* returned break position.
* @return the status from the break rule that determined the boundary
* at the current iteration position.
*
* @see UWordBreak
* @stable ICU 2.2
@ -506,8 +493,8 @@ public:
virtual int32_t getRuleStatus() const;
/**
* Get the status (tag) values from the break rule(s) that determined the most
* recently returned break position.
* Get the status (tag) values from the break rule(s) that determined the boundary
* at the current iteration position.
* <p>
* The returned status value(s) are stored into an array provided by the caller.
* The values are stored in sorted (ascending) order.
@ -518,10 +505,10 @@ public:
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
* normal way, without attemtping to store any values.
* normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
* @return The number of rule status values from the rules that determined
* the boundary at the current iteration position.
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
* is the total number of status values that were available,
* not the reduced number that were actually returned.
@ -561,7 +548,7 @@ public:
*
* Create a clone (copy) of this break iterator in memory provided
* by the caller. The idea is to increase performance by avoiding
* a storage allocation. Use of this functoin is NOT RECOMMENDED.
* a storage allocation. Use of this function is NOT RECOMMENDED.
* Performance gains are minimal, and correct buffer management is
* tricky. Use clone() instead.
*
@ -574,7 +561,7 @@ public:
* storage for the cloned object.
*
* @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
* returned if the the provided buffer was too small, and
* returned if the provided buffer was too small, and
* the clone was therefore put on the heap.
*
* @return Pointer to the clone object. This may differ from the stackBuffer
@ -597,7 +584,7 @@ public:
* The binary data can only be used with the same version of ICU
* and on the same platform type (processor endian-ness)
*
* @param length Returns the length of the binary data. (Out paramter.)
* @param length Returns the length of the binary data. (Out parameter.)
*
* @return A pointer to the binary (compiled) rule data. The storage
* belongs to the RulesBasedBreakIterator object, not the
@ -645,12 +632,6 @@ private:
*/
void reset(void);
/**
* Set the type of the break iterator.
* @internal
*/
void setBreakType(int32_t type);
/**
* Common initialization function, used by constructors and bufferClone.
* @internal
@ -697,6 +678,13 @@ private:
* @internal
*/
void dumpCache();
/**
* Debugging function only.
* @internal
*/
void dumpTables();
#endif /* U_HIDE_INTERNAL_API */
};

View file

@ -132,7 +132,7 @@ public:
ResourceBundle(UErrorCode &err);
/**
* Standard constructor, onstructs a resource bundle for the locale-specific
* Standard constructor, constructs a resource bundle for the locale-specific
* bundle in the specified package.
*
* @param packageName The packageName and locale together point to an ICU udata object,

View file

@ -69,7 +69,7 @@ public:
* Create an iterator over the UnicodeString referred to by "textStr".
* The UnicodeString object is copied.
* The iteration range begins with the code unit specified by
* "textBegin" and ends with the code unit BEFORE the code unit specfied
* "textBegin" and ends with the code unit BEFORE the code unit specified
* by "textEnd". The starting position is specified by "textPos". If
* "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
* textBegin >= textEnd or either is negative or greater than text.size()),

View file

@ -692,7 +692,7 @@ typedef enum UBiDiReorderingMode {
* @stable ICU 3.6 */
UBIDI_REORDER_DEFAULT = 0,
/** Logical to Visual algorithm which handles numbers in a way which
* mimicks the behavior of Windows XP.
* mimics the behavior of Windows XP.
* @stable ICU 3.6 */
UBIDI_REORDER_NUMBERS_SPECIAL,
/** Logical to Visual algorithm grouping numbers with adjacent R characters
@ -1142,7 +1142,7 @@ ubidi_setContext(UBiDi *pBiDi,
/**
* Perform the Unicode Bidi algorithm. It is defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
* version 13,
* also described in The Unicode Standard, Version 4.0 .<p>
*

View file

@ -268,7 +268,6 @@ ubrk_openRules(const UChar *rules,
UParseError *parseErr,
UErrorCode *status);
#ifndef U_HIDE_DRAFT_API
/**
* Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
* Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
@ -287,15 +286,13 @@ ubrk_openRules(const UChar *rules,
* @param status Pointer to UErrorCode to receive any errors.
* @return UBreakIterator for the specified rules.
* @see ubrk_getBinaryRules
* @draft ICU 59
* @stable ICU 59
*/
U_DRAFT UBreakIterator* U_EXPORT2
U_STABLE UBreakIterator* U_EXPORT2
ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
const UChar * text, int32_t textLength,
UErrorCode * status);
#endif /* U_HIDE_DRAFT_API */
/**
* Thread safe cloning operation
* @param bi iterator to be cloned
@ -510,7 +507,7 @@ ubrk_countAvailable(void);
/**
* Returns true if the specfied position is a boundary position. As a side
* Returns true if the specified position is a boundary position. As a side
* effect, leaves the iterator pointing to the first boundary position at
* or after "offset".
* @param bi The break iterator to use.
@ -544,7 +541,7 @@ ubrk_getRuleStatus(UBreakIterator *bi);
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
* normal way, without attemtping to store any values.
* normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
@ -596,7 +593,6 @@ ubrk_refreshUText(UBreakIterator *bi,
UErrorCode *status);
#ifndef U_HIDE_DRAFT_API
/**
* Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
* The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
@ -620,15 +616,13 @@ ubrk_refreshUText(UBreakIterator *bi,
* otherwise 0. If not preflighting and this is larger than
* rulesCapacity, *status will be set to an error.
* @see ubrk_openBinaryRules
* @draft ICU 59
* @stable ICU 59
*/
U_DRAFT int32_t U_EXPORT2
U_STABLE int32_t U_EXPORT2
ubrk_getBinaryRules(UBreakIterator *bi,
uint8_t * binaryRules, int32_t rulesCapacity,
UErrorCode * status);
#endif /* U_HIDE_DRAFT_API */
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif

View file

@ -112,11 +112,11 @@ U_CDECL_BEGIN
* Comparison:
* - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
* most of general categories "Z" (separators) + most whitespace ISO controls
* (including no-break spaces, but excluding IS1..IS4 and ZWSP)
* (including no-break spaces, but excluding IS1..IS4)
* - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
* - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
* - u_isspace: Z + whitespace ISO controls (including no-break spaces)
* - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
* - u_isblank: "horizontal spaces" = TAB + Zs
*/
/**
@ -2702,8 +2702,7 @@ u_isgraph(UChar32 c);
*
* same as
*
* TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
* except Zero Width Space (ZWSP, U+200B).
* TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators).
*
* Note: There are several ICU whitespace functions; please see the uchar.h
* file documentation for a detailed comparison.

View file

@ -70,7 +70,7 @@ u_init(UErrorCode *status);
* This has the effect of restoring ICU to its initial condition, before
* any of these override functions were installed. Refer to
* u_setMemoryFunctions(), u_setMutexFunctions and
* utrace_setFunctions(). If ICU is to be reinitialized after after
* utrace_setFunctions(). If ICU is to be reinitialized after
* calling u_cleanup(), these runtime override functions will need to
* be set up again if they are still required.
* <p>
@ -104,7 +104,7 @@ u_cleanup(void);
U_CDECL_BEGIN
/**
* Pointer type for a user supplied memory allocation function.
* @param context user supplied value, obtained from from u_setMemoryFunctions().
* @param context user supplied value, obtained from u_setMemoryFunctions().
* @param size The number of bytes to be allocated
* @return Pointer to the newly allocated memory, or NULL if the allocation failed.
* @stable ICU 2.8
@ -113,7 +113,7 @@ U_CDECL_BEGIN
typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
/**
* Pointer type for a user supplied memory re-allocation function.
* @param context user supplied value, obtained from from u_setMemoryFunctions().
* @param context user supplied value, obtained from u_setMemoryFunctions().
* @param size The number of bytes to be allocated
* @return Pointer to the newly allocated memory, or NULL if the allocation failed.
* @stable ICU 2.8
@ -123,7 +123,7 @@ typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t si
/**
* Pointer type for a user supplied memory free function. Behavior should be
* similar the standard C library free().
* @param context user supplied value, obtained from from u_setMemoryFunctions().
* @param context user supplied value, obtained from u_setMemoryFunctions().
* @param mem Pointer to the memory block to be resized
* @param size The new size for the block
* @return Pointer to the resized memory block, or NULL if the resizing failed.
@ -179,8 +179,8 @@ U_CDECL_BEGIN
* The user-supplied function will be called by ICU whenever ICU needs to create a
* new mutex. The function implementation should create a mutex, and store a pointer
* to something that uniquely identifies the mutex into the UMTX that is supplied
* as a paramter.
* @param context user supplied value, obtained from from u_setMutexFunctions().
* as a parameter.
* @param context user supplied value, obtained from u_setMutexFunctions().
* @param mutex Receives a pointer that identifies the new mutex.
* The mutex init function must set the UMTX to a non-null value.
* Subsequent calls by ICU to lock, unlock, or destroy a mutex will
@ -197,7 +197,7 @@ typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCod
* Function Pointer type for a user supplied mutex functions.
* One of the user-supplied functions with this signature will be called by ICU
* whenever ICU needs to lock, unlock, or destroy a mutex.
* @param context user supplied value, obtained from from u_setMutexFunctions().
* @param context user supplied value, obtained from u_setMutexFunctions().
* @param mutex specify the mutex on which to operate.
* @deprecated ICU 52. This function is no longer supported.
* @system
@ -229,7 +229,7 @@ u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtx
/**
* Pointer type for a user supplied atomic increment or decrement function.
* @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
* @param context user supplied value, obtained from u_setAtomicIncDecFunctions().
* @param p Pointer to a 32 bit int to be incremented or decremented
* @return The value of the variable after the inc or dec operation.
* @deprecated ICU 52. This function is no longer supported.

View file

@ -207,7 +207,7 @@ typedef void (U_EXPORT2 *UConverterToUCallback) (
/**
* Function pointer for error callback in the unicode to codepage direction.
* Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
* Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason).
* @param context Pointer to the callback's private data
* @param args Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
@ -353,7 +353,7 @@ ucnv_compareNames(const char *name1, const char *name2);
* ucnv_getAlias for a complete list that is available.
* If this parameter is NULL, the default converter will be used.
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
* @see ucnv_openU
* @see ucnv_openCCSID
* @see ucnv_getAvailableName
@ -386,7 +386,7 @@ ucnv_open(const char *converterName, UErrorCode *err);
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
* U_FILE_ACCESS_ERROR</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if an
* error occured
* error occurred
* @see ucnv_open
* @see ucnv_openCCSID
* @see ucnv_close
@ -489,7 +489,7 @@ ucnv_openCCSID(int32_t codepage,
* @param packageName name of the package (equivalent to 'path' in udata_open() call)
* @param converterName name of the data item to be used, without suffix.
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
* @see udata_open
* @see ucnv_open
* @see ucnv_safeClone

View file

@ -119,19 +119,19 @@ typedef struct UConverter UConverter;
#define UCNV_ESCAPE_JAVA "J"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
* TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
* TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_C "C"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_DEC "D"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_HEX "X"
@ -171,7 +171,7 @@ typedef enum {
code points.
The error code U_INVALID_CHAR_FOUND will be set. */
UCNV_RESET = 3, /**< The callback is called with this reason when a
'reset' has occured. Callback should reset all
'reset' has occurred. Callback should reset all
state. */
UCNV_CLOSE = 4, /**< Called when the converter is closed. The
callback should release any allocated memory.*/
@ -199,7 +199,7 @@ typedef struct {
const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
} UConverterFromUnicodeArgs;
@ -215,7 +215,7 @@ typedef struct {
const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
} UConverterToUnicodeArgs;

View file

@ -103,6 +103,19 @@ typedef enum UCurrNameStyle {
* @stable ICU 2.6
*/
UCURR_LONG_NAME
#ifndef U_HIDE_DRAFT_API
,
/**
* Selector for getName() indicating the narrow currency symbol.
* The narrow currency symbol is similar to the regular currency
* symbol, but it always takes the shortest form: for example,
* "$" instead of "US$" for USD in en-CA.
*
* @draft ICU 61
*/
UCURR_NARROW_SYMBOL_NAME
#endif // U_HIDE_DRAFT_API
} UCurrNameStyle;
#if !UCONFIG_NO_SERVICE

View file

@ -299,6 +299,10 @@ typedef int8_t UBool;
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif
@ -366,7 +370,7 @@ typedef int8_t UBool;
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
*
* @draft ICU 59
* @stable ICU 59
*/
#if U_SIZEOF_WCHAR_T==2
typedef wchar_t OldUChar;

View file

@ -1521,6 +1521,7 @@ private:
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode& ec);
//----------------------------------------------------------------

View file

@ -2995,10 +2995,6 @@ public:
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
#if !U_CHAR16_IS_TYPEDEF
/**
* uint16_t * constructor.
@ -3008,16 +3004,12 @@ public:
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
* on the compiler command line or similar.
* @param text NUL-terminated UTF-16 string
* @draft ICU 59
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
#endif
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* wchar_t * constructor.
@ -3028,16 +3020,12 @@ public:
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
* on the compiler command line or similar.
* @param text NUL-terminated UTF-16 string
* @draft ICU 59
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
#endif
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
/**
* nullptr_t constructor.
* Effectively the same as the default constructor, makes an empty string object.
@ -3046,7 +3034,7 @@ public:
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
* on the compiler command line or similar.
* @param text nullptr
* @draft ICU 59
* @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
@ -3060,26 +3048,18 @@ public:
UnicodeString(const char16_t *text,
int32_t textLength);
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
#if !U_CHAR16_IS_TYPEDEF
/**
* uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *, int32_t).
* @param text UTF-16 string
* @param length string length
* @draft ICU 59
* @stable ICU 59
*/
UnicodeString(const uint16_t *text, int32_t length) :
UnicodeString(ConstChar16Ptr(text), length) {}
#endif
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* wchar_t * constructor.
@ -3087,22 +3067,18 @@ public:
* Delegates to UnicodeString(const char16_t *, int32_t).
* @param text NUL-terminated UTF-16 string
* @param length string length
* @draft ICU 59
* @stable ICU 59
*/
UnicodeString(const wchar_t *text, int32_t length) :
UnicodeString(ConstChar16Ptr(text), length) {}
#endif
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
/**
* nullptr_t constructor.
* Effectively the same as the default constructor, makes an empty string object.
* @param text nullptr
* @param length ignored
* @draft ICU 59
* @stable ICU 59
*/
inline UnicodeString(const std::nullptr_t text, int32_t length);
@ -3152,10 +3128,6 @@ public:
*/
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
#if !U_CHAR16_IS_TYPEDEF
/**
* Writable-aliasing uint16_t * constructor.
@ -3163,16 +3135,12 @@ public:
* @param buffer writable buffer of/for UTF-16 text
* @param buffLength length of the current buffer contents
* @param buffCapacity buffer capacity
* @draft ICU 59
* @stable ICU 59
*/
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
#endif
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Writable-aliasing wchar_t * constructor.
@ -3181,23 +3149,19 @@ public:
* @param buffer writable buffer of/for UTF-16 text
* @param buffLength length of the current buffer contents
* @param buffCapacity buffer capacity
* @draft ICU 59
* @stable ICU 59
*/
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
#endif
/*
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
* it should always be available regardless of U_HIDE_DRAFT_API status
*/
/**
* Writable-aliasing nullptr_t constructor.
* Effectively the same as the default constructor, makes an empty string object.
* @param buffer nullptr
* @param buffLength ignored
* @param buffCapacity ignored
* @draft ICU 59
* @stable ICU 59
*/
inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);

View file

@ -107,7 +107,6 @@
#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup)
#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl)
#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup)
#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats)
@ -446,7 +445,6 @@
#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions)
#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength)
#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns)
#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton)
#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText)
#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex)
#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap)
@ -551,6 +549,7 @@
#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie)
#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty)
@ -862,6 +861,7 @@
#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName)
#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton)
#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton)
#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open)
@ -1326,7 +1326,6 @@
#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime)
#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName)
#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime)
#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties)
#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator)
#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter)
#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite)

View file

@ -16,7 +16,7 @@
* 04/04/99 helena Fixed internal header inclusion.
* 04/15/99 Madhu Updated Javadoc
* 06/14/99 stephen Removed functions taking a filename suffix.
* 07/20/99 stephen Language-independent ypedef to void*
* 07/20/99 stephen Language-independent typedef to void*
* 11/09/99 weiv Added ures_getLocale()
* 06/24/02 weiv Added support for resource sharing
******************************************************************************
@ -138,7 +138,7 @@ typedef enum {
/**
* Opens a UResourceBundle, from which users can extract strings by using
* their corresponding keys.
* Note that the caller is responsible of calling <TT>ures_close</TT> on each succesfully
* Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully
* opened resource bundle.
* @param packageName The packageName and locale together point to an ICU udata object,
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
@ -301,7 +301,7 @@ ures_getVersion(const UResourceBundle* resB,
* you to query for the real locale of the resource. For example, if you requested
* "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
* For subresources, the locale where this resource comes from will be returned.
* If fallback has occured, getLocale will reflect this.
* If fallback has occurred, getLocale will reflect this.
*
* @param resourceBundle resource bundle in question
* @param status just for catching illegal arguments
@ -580,7 +580,7 @@ ures_hasNext(const UResourceBundle *resourceBundle);
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
* Alternatively, you can supply a struct to be filled by this function.
* @param status fills in the outgoing error code. You may still get a non NULL result even if an
* error occured. Check status instead.
* error occurred. Check status instead.
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
* @stable ICU 2.0
*/
@ -596,7 +596,7 @@ ures_getNextResource(UResourceBundle *resourceBundle,
* @param resourceBundle a resource
* @param len fill in length of the string
* @param key fill in for key associated with this string. NULL if no key
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0
@ -615,7 +615,7 @@ ures_getNextString(UResourceBundle *resourceBundle,
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
* Alternatively, you can supply a struct to be filled by this function.
* @param status fills in the outgoing error code. Don't count on NULL being returned if an error has
* occured. Check status instead.
* occurred. Check status instead.
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
* @stable ICU 2.0
*/
@ -631,7 +631,7 @@ ures_getByIndex(const UResourceBundle *resourceBundle,
* @param resourceBundle a resource
* @param indexS an index to the wanted string.
* @param len fill in length of the string
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0
@ -722,7 +722,7 @@ ures_getByKey(const UResourceBundle *resourceBundle,
* @param resB a resource
* @param key a key associated with the wanted string
* @param len fill in length of the string
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0

View file

@ -476,7 +476,7 @@ typedef enum UScriptCode {
* @param nameOrAbbrOrLocale name of the script, as given in
* PropertyValueAliases.txt, or ISO 15924 code or locale
* @param fillIn the UScriptCode buffer to fill in the script code
* @param capacity the capacity (size) fo UScriptCode buffer passed in.
* @param capacity the capacity (size) of UScriptCode buffer passed in.
* @param err the error status code.
* @return The number of script codes filled in the buffer passed in
* @stable ICU 2.4

View file

@ -93,7 +93,7 @@
* which must not indicate a failure before the function call.
*
* @return The number of UChars written to the destination buffer.
* If an error occured, then no output was written, or it may be
* If an error occurred, then no output was written, or it may be
* incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
* the return value indicates the necessary destination buffer size.
* @stable ICU 2.0

View file

@ -33,14 +33,14 @@
* StringPrep prepares Unicode strings for use in network protocols.
* Profiles of StingPrep are set of rules and data according to with the
* Unicode Strings are prepared. Each profiles contains tables which describe
* how a code point should be treated. The tables are broadly classied into
* how a code point should be treated. The tables are broadly classified into
* <ul>
* <li> Unassinged Table: Contains code points that are unassigned
* <li> Unassigned Table: Contains code points that are unassigned
* in the Unicode Version supported by StringPrep. Currently
* RFC 3454 supports Unicode 3.2. </li>
* <li> Prohibited Table: Contains code points that are prohibted from
* <li> Prohibited Table: Contains code points that are prohibited from
* the output of the StringPrep processing function. </li>
* <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
* <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
* </ul>
*
* The procedure for preparing Unicode strings:
@ -230,7 +230,7 @@ U_NAMESPACE_END
/**
* Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
* checks for prohited and BiDi characters in the order defined by RFC 3454
* checks for prohibited and BiDi characters in the order defined by RFC 3454
* depending on the options specified in the profile.
*
* @param prep The profile to use

View file

@ -403,7 +403,7 @@ u_strspn(const UChar *string, const UChar *matchSet);
* @param saveState The current pointer within the original string,
* which is set by this function. The saveState
* parameter should the address of a local variable of type
* UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
* UChar *. (i.e. defined "UChar *myLocalSaveState" and use
* &myLocalSaveState for this parameter).
* @return A pointer to the next token found in src, or NULL
* when there are no more tokens.
@ -884,7 +884,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
* Unicode String literals in C.
* We need one macro to declare a variable for the string
* and to statically preinitialize it if possible,
* and a second macro to dynamically intialize such a string variable if necessary.
* and a second macro to dynamically initialize such a string variable if necessary.
*
* The macros are defined for maximum performance.
* They work only for strings that contain "invariant characters", i.e.,

View file

@ -655,10 +655,10 @@ utext_getPreviousNativeIndex(UText *ut);
* @param ut the UText from which to extract data.
* @param nativeStart the native index of the first character to extract.\
* If the specified index is out of range,
* it will be pinned to to be within 0 <= index <= textLength
* it will be pinned to be within 0 <= index <= textLength
* @param nativeLimit the native string index of the position following the last
* character to extract. If the specified index is out of range,
* it will be pinned to to be within 0 <= index <= textLength.
* it will be pinned to be within 0 <= index <= textLength.
* nativeLimit must be >= nativeStart.
* @param dest the UChar (UTF-16) buffer into which the extracted text is placed
* @param destCapacity The size, in UChars, of the destination buffer. May be zero
@ -906,7 +906,7 @@ utext_copy(UText *ut,
* Caution: freezing a UText will disable changes made via the specific
* frozen UText wrapper only; it will not have any effect on the ability to
* directly modify the text by bypassing the UText. Any such backdoor modifications
* are always an error while UText access is occuring because the underlying
* are always an error while UText access is occurring because the underlying
* text can get out of sync with UText's buffering.
* </p>
*
@ -1452,7 +1452,7 @@ struct UText {
void *pExtra;
/**
* (protected) Pointer to string or text-containin object or similar.
* (protected) Pointer to string or text-containing object or similar.
* This is the source of the text that this UText is wrapping, in a format
* that is known to the text provider functions.
* @stable ICU 3.4

View file

@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @see U8_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U8_NEXT(s, i, length, c) { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t1, __t2; \
if( /* handle U+0800..U+FFFF inline */ \
(0xe0<=(c) && (c)<0xf0) && \
(((i)+1)<(length) || (length)<0) && \
U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
(__t2=(s)[(i)+1]-0x80)<=0x3f) { \
(c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
(i)+=2; \
} else if( /* handle U+0080..U+07FF inline */ \
((c)<0xe0 && (c)>=0xc2) && \
((i)!=(length)) && \
(__t1=(s)[i]-0x80)<=0x3f) { \
(c)=(((c)&0x1f)<<6)|__t1; \
++(i); \
} else { \
/* function call for "complicated" and error cases */ \
(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \
} \
} \
}
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
/**
* Get a code point from a string at a code point boundary offset,
@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @see U8_NEXT
* @stable ICU 51
*/
#define U8_NEXT_OR_FFFD(s, i, length, c) { \
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
/** @internal */
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t1, __t2; \
if( /* handle U+0800..U+FFFF inline */ \
(0xe0<=(c) && (c)<0xf0) && \
(((i)+1)<(length) || (length)<0) && \
U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
(__t2=(s)[(i)+1]-0x80)<=0x3f) { \
(c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
(i)+=2; \
} else if( /* handle U+0080..U+07FF inline */ \
((c)<0xe0 && (c)>=0xc2) && \
((i)!=(length)) && \
(__t1=(s)[i]-0x80)<=0x3f) { \
(c)=(((c)&0x1f)<<6)|__t1; \
++(i); \
uint8_t __t = 0; \
if((i)!=(length) && \
/* fetch/validate/assemble all but last trail byte */ \
((c)>=0xe0 ? \
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
(__t&=0x3f, 1) \
: /* U+10000..U+10FFFF */ \
((c)-=0xf0)<=4 && \
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
(__t=(s)[i]-0x80)<=0x3f) && \
/* valid second-to-last trail byte */ \
((c)=((c)<<6)|__t, ++(i)!=(length)) \
: /* U+0080..U+07FF */ \
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
/* last trail byte */ \
(__t=(s)[i]-0x80)<=0x3f && \
((c)=((c)<<6)|__t, ++(i), 1)) { \
} else { \
/* function call for "complicated" and error cases */ \
(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \
(c)=(sub); /* ill-formed*/ \
} \
} \
}
@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_APPEND_UNSAFE(s, i, c) { \
if((uint32_t)(c)<=0x7f) { \
(s)[(i)++]=(uint8_t)(c); \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else { \
if((uint32_t)(c)<=0x7ff) { \
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
if(__uc<=0x7ff) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
} else { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
if(__uc<=0xffff) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
} else { \
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} \
}
@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_APPEND(s, i, capacity, c, isError) { \
if((uint32_t)(c)<=0x7f) { \
(s)[(i)++]=(uint8_t)(c); \
} else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
} else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else { \
(i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \
(isError)=TRUE; \
} \
}
@ -600,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
* @see U8_TRUNCATE_IF_INCOMPLETE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) { \
@ -614,6 +609,57 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
} \
}
#ifndef U_HIDE_DRAFT_API
/**
* If the string ends with a UTF-8 byte sequence that is valid so far
* but incomplete, then reduce the length of the string to end before
* the lead byte of that incomplete sequence.
* For example, if the string ends with E1 80, the length is reduced by 2.
*
* In all other cases (the string ends with a complete sequence, or it is not
* possible for any further trail byte to extend the trailing sequence)
* the length remains unchanged.
*
* Useful for processing text split across multiple buffers
* (save the incomplete sequence for later)
* and for optimizing iteration
* (check for string length only once per character).
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_SET_CP_START(), this macro never reads s[length].
*
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param length int32_t string length (usually start<=length)
* @see U8_SET_CP_START
* @draft ICU 61
*/
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \
if((length)>(start)) { \
uint8_t __b1=s[(length)-1]; \
if(U8_IS_SINGLE(__b1)) { \
/* common ASCII character */ \
} else if(U8_IS_LEAD(__b1)) { \
--(length); \
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
uint8_t __b2=s[(length)-2]; \
if(0xe0<=__b2 && __b2<=0xf4) { \
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
(length)-=2; \
} \
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
uint8_t __b3=s[(length)-3]; \
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
(length)-=3; \
} \
} \
} \
}
#endif // U_HIDE_DRAFT_API
/* definitions with backward iteration -------------------------------------- */
/**

View file

@ -183,7 +183,7 @@ UTraceData(const void *context, int32_t fnNumber, int32_t level,
* tracing functions must themselves filter by checking that the
* current thread is the desired thread.
*
* @param context an uninterpretted pointer. Whatever is passed in
* @param context an uninterpreted pointer. Whatever is passed in
* here will in turn be passed to each of the tracing
* functions UTraceEntry, UTraceExit and UTraceData.
* ICU does not use or alter this pointer.
@ -320,7 +320,7 @@ utrace_getFunctions(const void **context,
* human readable form. Note that a UTraceData function may choose
* to not format the data; it could, for example, save it in
* in the raw form it was received (more compact), leaving
* formatting for a later trace analyis tool.
* formatting for a later trace analysis tool.
* @param outBuf pointer to a buffer to receive the formatted output. Output
* will be nul terminated if there is space in the buffer -
* if the length of the requested output < the output buffer size.

View file

@ -145,7 +145,7 @@
/**
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
* Defined as a literal, not a string.
* Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
* Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
* from the corresponding macro invocation, _before_ other macro substitutions.
* Need a nested \#defines to get the actual version numbers rather than
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
@ -446,14 +446,14 @@ typedef enum UErrorCode {
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
It is very possible that a circular alias definition has occured */
It is very possible that a circular alias definition has occurred */
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
@ -499,7 +499,7 @@ typedef enum UErrorCode {
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
@ -539,12 +539,15 @@ typedef enum UErrorCode {
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
#ifndef U_HIDE_DRAFT_API
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_FMT_PARSE_ERROR_LIMIT,
U_FMT_PARSE_ERROR_LIMIT = 0x10113,
#endif // U_HIDE_DEPRECATED_API
/*
@ -555,7 +558,7 @@ typedef enum UErrorCode {
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */
U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
@ -564,7 +567,7 @@ typedef enum UErrorCode {
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal BreakIterator error code.

View file

@ -58,13 +58,13 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 60
#define U_ICU_VERSION_MAJOR_NUM 61
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_MINOR_NUM 2
#define U_ICU_VERSION_MINOR_NUM 1
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
@ -84,7 +84,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _60
#define U_ICU_VERSION_SUFFIX _61
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -119,19 +119,26 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "60.2"
#define U_ICU_VERSION "61.1"
/** The current ICU library major/minor version as a string without dots, for library name suffixes.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
/**
* The current ICU library major version number as a string, for library name suffixes.
* This value will change in subsequent releases of ICU.
*
* Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
* into one string without dots ("48").
* Since ICU 49, it is the double-digit major ICU version number.
* See http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "60"
#define U_ICU_VERSION_SHORT "61"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "60.2"
#define U_ICU_DATA_VERSION "61.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================

View file

@ -105,7 +105,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* @stable ICU 2.4
*/
/* Define namespace symbols if the compiler supports it. */
/* Define C++ namespace symbols. */
#ifdef __cplusplus
# if U_DISABLE_RENAMING
# define U_ICU_NAMESPACE icu
@ -122,7 +122,13 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
# ifndef U_USING_ICU_NAMESPACE
# define U_USING_ICU_NAMESPACE 1
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
# define U_USING_ICU_NAMESPACE 0
# else
# define U_USING_ICU_NAMESPACE 0
# endif
# endif
# if U_USING_ICU_NAMESPACE
U_NAMESPACE_USE

View file

@ -6,24 +6,26 @@
* others. All Rights Reserved.
******************************************************************************
*
* File UNIFIEDCACHE.CPP
* File unifiedcache.cpp
******************************************************************************
*/
#include "uhash.h"
#include "unifiedcache.h"
#include "umutex.h"
#include <algorithm> // For std::max()
#include "mutex.h"
#include "uassert.h"
#include "uhash.h"
#include "ucln_cmn.h"
#include "umutex.h"
static icu::UnifiedCache *gCache = NULL;
static icu::SharedObject *gNoValue = NULL;
static UMutex gCacheMutex = U_MUTEX_INITIALIZER;
static UConditionVar gInProgressValueAddedCond = U_CONDITION_INITIALIZER;
static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER;
static const int32_t MAX_EVICT_ITERATIONS = 10;
static const int32_t MAX_EVICT_ITERATIONS = 10;
static const int32_t DEFAULT_MAX_UNUSED = 1000;
static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100;
@ -35,10 +37,6 @@ static UBool U_CALLCONV unifiedcache_cleanup() {
delete gCache;
gCache = NULL;
}
if (gNoValue) {
delete gNoValue;
gNoValue = NULL;
}
return TRUE;
}
U_CDECL_END
@ -73,23 +71,15 @@ static void U_CALLCONV cacheInit(UErrorCode &status) {
ucln_common_registerCleanup(
UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup);
// gNoValue must be created first to avoid assertion error in
// cache constructor.
gNoValue = new SharedObject();
gCache = new UnifiedCache(status);
if (gCache == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(status)) {
delete gCache;
delete gNoValue;
gCache = NULL;
gNoValue = NULL;
return;
}
// We add a softref because we want hash elements with gNoValue to be
// elligible for purging but we don't ever want gNoValue to be deleted.
gNoValue->addSoftRef();
}
UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) {
@ -104,14 +94,24 @@ UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) {
UnifiedCache::UnifiedCache(UErrorCode &status) :
fHashtable(NULL),
fEvictPos(UHASH_FIRST),
fItemsInUseCount(0),
fNumValuesTotal(0),
fNumValuesInUse(0),
fMaxUnused(DEFAULT_MAX_UNUSED),
fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE),
fAutoEvictedCount(0) {
fAutoEvictedCount(0),
fNoValue(nullptr) {
if (U_FAILURE(status)) {
return;
}
U_ASSERT(gNoValue != NULL);
fNoValue = new SharedObject();
if (fNoValue == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted
fNoValue->hardRefCount = 1; // when other references to it are removed.
fNoValue->cachePtr = this;
fHashtable = uhash_open(
&ucache_hashKeys,
&ucache_compareKeys,
@ -139,7 +139,7 @@ void UnifiedCache::setEvictionPolicy(
int32_t UnifiedCache::unusedCount() const {
Mutex lock(&gCacheMutex);
return uhash_count(fHashtable) - fItemsInUseCount;
return uhash_count(fHashtable) - fNumValuesInUse;
}
int64_t UnifiedCache::autoEvictedCount() const {
@ -161,6 +161,12 @@ void UnifiedCache::flush() const {
while (_flush(FALSE));
}
void UnifiedCache::handleUnreferencedObject() const {
Mutex lock(&gCacheMutex);
--fNumValuesInUse;
_runEvictionSlice();
}
#ifdef UNIFIED_CACHE_DEBUG
#include <stdio.h>
@ -199,7 +205,7 @@ void UnifiedCache::_dumpContents() const {
"Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n",
key->writeDescription(buffer, 256),
key->creationStatus,
sharedObject == gNoValue ? NULL :sharedObject,
sharedObject == fNoValue ? NULL :sharedObject,
sharedObject->getRefCount(),
sharedObject->getSoftRefCount());
}
@ -219,10 +225,11 @@ UnifiedCache::~UnifiedCache() {
_flush(TRUE);
}
uhash_close(fHashtable);
fHashtable = nullptr;
delete fNoValue;
fNoValue = nullptr;
}
// Returns the next element in the cache round robin style.
// On entry, gCacheMutex must be held.
const UHashElement *
UnifiedCache::_nextElement() const {
const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos);
@ -233,46 +240,36 @@ UnifiedCache::_nextElement() const {
return element;
}
// Flushes the contents of the cache. If cache values hold references to other
// cache values then _flush should be called in a loop until it returns FALSE.
// On entry, gCacheMutex must be held.
// On exit, those values with are evictable are flushed. If all is true
// then every value is flushed even if it is not evictable.
// Returns TRUE if any value in cache was flushed or FALSE otherwise.
UBool UnifiedCache::_flush(UBool all) const {
UBool result = FALSE;
int32_t origSize = uhash_count(fHashtable);
for (int32_t i = 0; i < origSize; ++i) {
const UHashElement *element = _nextElement();
if (element == nullptr) {
break;
}
if (all || _isEvictable(element)) {
const SharedObject *sharedObject =
(const SharedObject *) element->value.pointer;
U_ASSERT(sharedObject->cachePtr = this);
uhash_removeElement(fHashtable, element);
sharedObject->removeSoftRef();
removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero.
result = TRUE;
}
}
return result;
}
// Computes how many items should be evicted.
// On entry, gCacheMutex must be held.
// Returns number of items that should be evicted or a value <= 0 if no
// items need to be evicted.
int32_t UnifiedCache::_computeCountOfItemsToEvict() const {
int32_t maxPercentageOfInUseCount =
fItemsInUseCount * fMaxPercentageOfInUse / 100;
int32_t maxUnusedCount = fMaxUnused;
if (maxUnusedCount < maxPercentageOfInUseCount) {
maxUnusedCount = maxPercentageOfInUseCount;
}
return uhash_count(fHashtable) - fItemsInUseCount - maxUnusedCount;
int32_t totalItems = uhash_count(fHashtable);
int32_t evictableItems = totalItems - fNumValuesInUse;
int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100;
int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused);
int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit);
return countOfItemsToEvict;
}
// Run an eviction slice.
// On entry, gCacheMutex must be held.
// _runEvictionSlice runs a slice of the evict pipeline by examining the next
// 10 entries in the cache round robin style evicting them if they are eligible.
void UnifiedCache::_runEvictionSlice() const {
int32_t maxItemsToEvict = _computeCountOfItemsToEvict();
if (maxItemsToEvict <= 0) {
@ -280,11 +277,14 @@ void UnifiedCache::_runEvictionSlice() const {
}
for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) {
const UHashElement *element = _nextElement();
if (element == nullptr) {
break;
}
if (_isEvictable(element)) {
const SharedObject *sharedObject =
(const SharedObject *) element->value.pointer;
uhash_removeElement(fHashtable, element);
sharedObject->removeSoftRef();
removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero.
++fAutoEvictedCount;
if (--maxItemsToEvict == 0) {
break;
@ -293,11 +293,6 @@ void UnifiedCache::_runEvictionSlice() const {
}
}
// Places a new value and creationStatus in the cache for the given key.
// On entry, gCacheMutex must be held. key must not exist in the cache.
// On exit, value and creation status placed under key. Soft reference added
// to value on successful add. On error sets status.
void UnifiedCache::_putNew(
const CacheKeyBase &key,
const SharedObject *value,
@ -312,24 +307,17 @@ void UnifiedCache::_putNew(
return;
}
keyToAdopt->fCreationStatus = creationStatus;
if (value->noSoftReferences()) {
if (value->softRefCount == 0) {
_registerMaster(keyToAdopt, value);
}
uhash_put(fHashtable, keyToAdopt, (void *) value, &status);
void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status);
U_ASSERT(oldValue == nullptr);
(void)oldValue;
if (U_SUCCESS(status)) {
value->addSoftRef();
value->softRefCount++;
}
}
// Places value and status at key if there is no value at key or if cache
// entry for key is in progress. Otherwise, it leaves the current value and
// status there.
// On entry. gCacheMutex must not be held. value must be
// included in the reference count of the object to which it points.
// On exit, value and status are changed to what was already in the cache if
// something was there and not in progress. Otherwise, value and status are left
// unchanged in which case they are placed in the cache on a best-effort basis.
// Caller must call removeRef() on value.
void UnifiedCache::_putIfAbsentAndGet(
const CacheKeyBase &key,
const SharedObject *&value,
@ -352,15 +340,7 @@ void UnifiedCache::_putIfAbsentAndGet(
_runEvictionSlice();
}
// Attempts to fetch value and status for key from cache.
// On entry, gCacheMutex must not be held value must be NULL and status must
// be U_ZERO_ERROR.
// On exit, either returns FALSE (In this
// case caller should try to create the object) or returns TRUE with value
// pointing to the fetched value and status set to fetched status. When
// FALSE is returned status may be set to failure if an in progress hash
// entry could not be made but value will remain unchanged. When TRUE is
// returned, caler must call removeRef() on value.
UBool UnifiedCache::_poll(
const CacheKeyBase &key,
const SharedObject *&value,
@ -369,27 +349,29 @@ UBool UnifiedCache::_poll(
U_ASSERT(status == U_ZERO_ERROR);
Mutex lock(&gCacheMutex);
const UHashElement *element = uhash_find(fHashtable, &key);
while (element != NULL && _inProgress(element)) {
// If the hash table contains an inProgress placeholder entry for this key,
// this means that another thread is currently constructing the value object.
// Loop, waiting for that construction to complete.
while (element != NULL && _inProgress(element)) {
umtx_condWait(&gInProgressValueAddedCond, &gCacheMutex);
element = uhash_find(fHashtable, &key);
}
// If the hash table contains an entry for the key,
// fetch out the contents and return them.
if (element != NULL) {
_fetch(element, value, status);
_fetch(element, value, status);
return TRUE;
}
_putNew(key, gNoValue, U_ZERO_ERROR, status);
// The hash table contained nothing for this key.
// Insert an inProgress place holder value.
// Our caller will create the final value and update the hash table.
_putNew(key, fNoValue, U_ZERO_ERROR, status);
return FALSE;
}
// Gets value out of cache.
// On entry. gCacheMutex must not be held. value must be NULL. status
// must be U_ZERO_ERROR.
// On exit. value and status set to what is in cache at key or on cache
// miss the key's createObject() is called and value and status are set to
// the result of that. In this latter case, best effort is made to add the
// value and status to the cache. If createObject() fails to create a value,
// gNoValue is stored in cache, and value is set to NULL. Caller must call
// removeRef on value if non NULL.
void UnifiedCache::_get(
const CacheKeyBase &key,
const SharedObject *&value,
@ -398,7 +380,7 @@ void UnifiedCache::_get(
U_ASSERT(value == NULL);
U_ASSERT(status == U_ZERO_ERROR);
if (_poll(key, value, status)) {
if (value == gNoValue) {
if (value == fNoValue) {
SharedObject::clearPtr(value);
}
return;
@ -410,46 +392,22 @@ void UnifiedCache::_get(
U_ASSERT(value == NULL || value->hasHardReferences());
U_ASSERT(value != NULL || status != U_ZERO_ERROR);
if (value == NULL) {
SharedObject::copyPtr(gNoValue, value);
SharedObject::copyPtr(fNoValue, value);
}
_putIfAbsentAndGet(key, value, status);
if (value == gNoValue) {
if (value == fNoValue) {
SharedObject::clearPtr(value);
}
}
void UnifiedCache::decrementItemsInUseWithLockingAndEviction() const {
Mutex mutex(&gCacheMutex);
decrementItemsInUse();
_runEvictionSlice();
}
void UnifiedCache::incrementItemsInUse() const {
++fItemsInUseCount;
}
void UnifiedCache::decrementItemsInUse() const {
--fItemsInUseCount;
}
// Register a master cache entry.
// On entry, gCacheMutex must be held.
// On exit, items in use count incremented, entry is marked as a master
// entry, and value registered with cache so that subsequent calls to
// addRef() and removeRef() on it correctly updates items in use count
void UnifiedCache::_registerMaster(
const CacheKeyBase *theKey, const SharedObject *value) const {
theKey->fIsMaster = TRUE;
++fItemsInUseCount;
value->registerWithCache(this);
const CacheKeyBase *theKey, const SharedObject *value) const {
theKey->fIsMaster = true;
value->cachePtr = this;
++fNumValuesTotal;
++fNumValuesInUse;
}
// Store a value and error in given hash entry.
// On entry, gCacheMutex must be held. Hash entry element must be in progress.
// value must be non NULL.
// On Exit, soft reference added to value. value and status stored in hash
// entry. Soft reference removed from previous stored value. Waiting
// threads notified.
void UnifiedCache::_put(
const UHashElement *element,
const SharedObject *value,
@ -458,86 +416,52 @@ void UnifiedCache::_put(
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
const SharedObject *oldValue = (const SharedObject *) element->value.pointer;
theKey->fCreationStatus = status;
if (value->noSoftReferences()) {
if (value->softRefCount == 0) {
_registerMaster(theKey, value);
}
value->addSoftRef();
value->softRefCount++;
UHashElement *ptr = const_cast<UHashElement *>(element);
ptr->value.pointer = (void *) value;
oldValue->removeSoftRef();
U_ASSERT(oldValue == fNoValue);
removeSoftRef(oldValue);
// Tell waiting threads that we replace in-progress status with
// an error.
umtx_condBroadcast(&gInProgressValueAddedCond);
}
void
UnifiedCache::copyPtr(const SharedObject *src, const SharedObject *&dest) {
if(src != dest) {
if(dest != NULL) {
dest->removeRefWhileHoldingCacheLock();
}
dest = src;
if(src != NULL) {
src->addRefWhileHoldingCacheLock();
}
}
}
void
UnifiedCache::clearPtr(const SharedObject *&ptr) {
if (ptr != NULL) {
ptr->removeRefWhileHoldingCacheLock();
ptr = NULL;
}
}
// Fetch value and error code from a particular hash entry.
// On entry, gCacheMutex must be held. value must be either NULL or must be
// included in the ref count of the object to which it points.
// On exit, value and status set to what is in the hash entry. Caller must
// eventually call removeRef on value.
// If hash entry is in progress, value will be set to gNoValue and status will
// be set to U_ZERO_ERROR.
void UnifiedCache::_fetch(
const UHashElement *element,
const SharedObject *&value,
UErrorCode &status) {
UErrorCode &status) const {
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
status = theKey->fCreationStatus;
// Since we have the cache lock, calling regular SharedObject methods
// Since we have the cache lock, calling regular SharedObject add/removeRef
// could cause us to deadlock on ourselves since they may need to lock
// the cache mutex.
UnifiedCache::copyPtr((const SharedObject *) element->value.pointer, value);
removeHardRef(value);
value = static_cast<const SharedObject *>(element->value.pointer);
addHardRef(value);
}
// Determine if given hash entry is in progress.
// On entry, gCacheMutex must be held.
UBool UnifiedCache::_inProgress(const UHashElement *element) {
const SharedObject *value = NULL;
UBool UnifiedCache::_inProgress(const UHashElement* element) const {
UErrorCode status = U_ZERO_ERROR;
const SharedObject * value = NULL;
_fetch(element, value, status);
UBool result = _inProgress(value, status);
// Since we have the cache lock, calling regular SharedObject methods
// could cause us to deadlock on ourselves since they may need to lock
// the cache mutex.
UnifiedCache::clearPtr(value);
removeHardRef(value);
return result;
}
// Determine if given hash entry is in progress.
// On entry, gCacheMutex must be held.
UBool UnifiedCache::_inProgress(
const SharedObject *theValue, UErrorCode creationStatus) {
return (theValue == gNoValue && creationStatus == U_ZERO_ERROR);
const SharedObject* theValue, UErrorCode creationStatus) const {
return (theValue == fNoValue && creationStatus == U_ZERO_ERROR);
}
// Determine if given hash entry is eligible for eviction.
// On entry, gCacheMutex must be held.
UBool UnifiedCache::_isEvictable(const UHashElement *element) {
UBool UnifiedCache::_isEvictable(const UHashElement *element) const
{
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
const SharedObject *theValue =
(const SharedObject *) element->value.pointer;
@ -549,7 +473,47 @@ UBool UnifiedCache::_isEvictable(const UHashElement *element) {
// We can evict entries that are either not a master or have just
// one reference (The one reference being from the cache itself).
return (!theKey->fIsMaster || (theValue->getSoftRefCount() == 1 && theValue->noHardReferences()));
return (!theKey->fIsMaster || (theValue->softRefCount == 1 && theValue->noHardReferences()));
}
void UnifiedCache::removeSoftRef(const SharedObject *value) const {
U_ASSERT(value->cachePtr == this);
U_ASSERT(value->softRefCount > 0);
if (--value->softRefCount == 0) {
--fNumValuesTotal;
if (value->noHardReferences()) {
delete value;
} else {
// This path only happens from flush(all). Which only happens from the
// UnifiedCache destructor. Nulling out value.cacheptr changes the behavior
// of value.removeRef(), causing the deletion to be done there.
value->cachePtr = nullptr;
}
}
}
int32_t UnifiedCache::removeHardRef(const SharedObject *value) const {
int refCount = 0;
if (value) {
refCount = umtx_atomic_dec(&value->hardRefCount);
U_ASSERT(refCount >= 0);
if (refCount == 0) {
--fNumValuesInUse;
}
}
return refCount;
}
int32_t UnifiedCache::addHardRef(const SharedObject *value) const {
int refCount = 0;
if (value) {
refCount = umtx_atomic_inc(&value->hardRefCount);
U_ASSERT(refCount >= 1);
if (refCount == 1) {
fNumValuesInUse++;
}
}
return refCount;
}
U_NAMESPACE_END

View file

@ -190,7 +190,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
UnifiedCache(UErrorCode &status);
/**
* Returns the cache instance.
* Return a pointer to the global cache instance.
*/
static UnifiedCache *getInstance(UErrorCode &status);
@ -294,7 +294,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
/**
* Configures at what point evcition of unused entries will begin.
* Eviction is triggered whenever the number of unused entries exeeds
* Eviction is triggered whenever the number of evictable keys exeeds
* BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100).
* Once the number of unused entries drops below one of these,
* eviction ceases. Because eviction happens incrementally,
@ -341,60 +341,214 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
*/
int32_t unusedCount() const;
virtual void incrementItemsInUse() const;
virtual void decrementItemsInUseWithLockingAndEviction() const;
virtual void decrementItemsInUse() const;
virtual void handleUnreferencedObject() const;
virtual ~UnifiedCache();
private:
UHashtable *fHashtable;
mutable int32_t fEvictPos;
mutable int32_t fItemsInUseCount;
mutable int32_t fNumValuesTotal;
mutable int32_t fNumValuesInUse;
int32_t fMaxUnused;
int32_t fMaxPercentageOfInUse;
mutable int64_t fAutoEvictedCount;
SharedObject *fNoValue;
UnifiedCache(const UnifiedCache &other);
UnifiedCache &operator=(const UnifiedCache &other);
/**
* Flushes the contents of the cache. If cache values hold references to other
* cache values then _flush should be called in a loop until it returns FALSE.
*
* On entry, gCacheMutex must be held.
* On exit, those values with are evictable are flushed.
*
* @param all if false flush evictable items only, which are those with no external
* references, plus those that can be safely recreated.<br>
* if true, flush all elements. Any values (sharedObjects) with remaining
* hard (external) references are not deleted, but are detached from
* the cache, so that a subsequent removeRefs can delete them.
* _flush is not thread safe when all is true.
* @return TRUE if any value in cache was flushed or FALSE otherwise.
*/
UBool _flush(UBool all) const;
/**
* Gets value out of cache.
* On entry. gCacheMutex must not be held. value must be NULL. status
* must be U_ZERO_ERROR.
* On exit. value and status set to what is in cache at key or on cache
* miss the key's createObject() is called and value and status are set to
* the result of that. In this latter case, best effort is made to add the
* value and status to the cache. If createObject() fails to create a value,
* fNoValue is stored in cache, and value is set to NULL. Caller must call
* removeRef on value if non NULL.
*/
void _get(
const CacheKeyBase &key,
const SharedObject *&value,
const void *creationContext,
UErrorCode &status) const;
UBool _poll(
const CacheKeyBase &key,
const SharedObject *&value,
UErrorCode &status) const;
void _putNew(
const CacheKeyBase &key,
const SharedObject *value,
const UErrorCode creationStatus,
UErrorCode &status) const;
/**
* Attempts to fetch value and status for key from cache.
* On entry, gCacheMutex must not be held value must be NULL and status must
* be U_ZERO_ERROR.
* On exit, either returns FALSE (In this
* case caller should try to create the object) or returns TRUE with value
* pointing to the fetched value and status set to fetched status. When
* FALSE is returned status may be set to failure if an in progress hash
* entry could not be made but value will remain unchanged. When TRUE is
* returned, caller must call removeRef() on value.
*/
UBool _poll(
const CacheKeyBase &key,
const SharedObject *&value,
UErrorCode &status) const;
/**
* Places a new value and creationStatus in the cache for the given key.
* On entry, gCacheMutex must be held. key must not exist in the cache.
* On exit, value and creation status placed under key. Soft reference added
* to value on successful add. On error sets status.
*/
void _putNew(
const CacheKeyBase &key,
const SharedObject *value,
const UErrorCode creationStatus,
UErrorCode &status) const;
/**
* Places value and status at key if there is no value at key or if cache
* entry for key is in progress. Otherwise, it leaves the current value and
* status there.
*
* On entry. gCacheMutex must not be held. Value must be
* included in the reference count of the object to which it points.
*
* On exit, value and status are changed to what was already in the cache if
* something was there and not in progress. Otherwise, value and status are left
* unchanged in which case they are placed in the cache on a best-effort basis.
* Caller must call removeRef() on value.
*/
void _putIfAbsentAndGet(
const CacheKeyBase &key,
const SharedObject *&value,
UErrorCode &status) const;
const UHashElement *_nextElement() const;
/**
* Returns the next element in the cache round robin style.
* Returns nullptr if the cache is empty.
* On entry, gCacheMutex must be held.
*/
const UHashElement *_nextElement() const;
/**
* Return the number of cache items that would need to be evicted
* to bring usage into conformance with eviction policy.
*
* An item corresponds to an entry in the hash table, a hash table element.
*
* On entry, gCacheMutex must be held.
*/
int32_t _computeCountOfItemsToEvict() const;
/**
* Run an eviction slice.
* On entry, gCacheMutex must be held.
* _runEvictionSlice runs a slice of the evict pipeline by examining the next
* 10 entries in the cache round robin style evicting them if they are eligible.
*/
void _runEvictionSlice() const;
void _registerMaster(
const CacheKeyBase *theKey, const SharedObject *value) const;
/**
* Register a master cache entry. A master key is the first key to create
* a given SharedObject value. Subsequent keys whose create function
* produce referneces to an already existing SharedObject are not masters -
* they can be evicted and subsequently recreated.
*
* On entry, gCacheMutex must be held.
* On exit, items in use count incremented, entry is marked as a master
* entry, and value registered with cache so that subsequent calls to
* addRef() and removeRef() on it correctly interact with the cache.
*/
void _registerMaster(const CacheKeyBase *theKey, const SharedObject *value) const;
/**
* Store a value and creation error status in given hash entry.
* On entry, gCacheMutex must be held. Hash entry element must be in progress.
* value must be non NULL.
* On Exit, soft reference added to value. value and status stored in hash
* entry. Soft reference removed from previous stored value. Waiting
* threads notified.
*/
void _put(
const UHashElement *element,
const SharedObject *value,
const UErrorCode status) const;
/**
* Remove a soft reference, and delete the SharedObject if no references remain.
* To be used from within the UnifiedCache implementation only.
* gCacheMutex must be held by caller.
* @param value the SharedObject to be acted on.
*/
void removeSoftRef(const SharedObject *value) const;
/**
* Increment the hard reference count of the given SharedObject.
* gCacheMutex must be held by the caller.
* Update numValuesEvictable on transitions between zero and one reference.
*
* @param value The SharedObject to be referenced.
* @return the hard reference count after the addition.
*/
int32_t addHardRef(const SharedObject *value) const;
/**
* Decrement the hard reference count of the given SharedObject.
* gCacheMutex must be held by the caller.
* Update numValuesEvictable on transitions between one and zero reference.
*
* @param value The SharedObject to be referenced.
* @return the hard reference count after the removal.
*/
int32_t removeHardRef(const SharedObject *value) const;
#ifdef UNIFIED_CACHE_DEBUG
void _dumpContents() const;
#endif
static void copyPtr(const SharedObject *src, const SharedObject *&dest);
static void clearPtr(const SharedObject *&ptr);
static void _fetch(
const UHashElement *element,
const SharedObject *&value,
UErrorCode &status);
static UBool _inProgress(const UHashElement *element);
static UBool _inProgress(
const SharedObject *theValue, UErrorCode creationStatus);
static UBool _isEvictable(const UHashElement *element);
/**
* Fetch value and error code from a particular hash entry.
* On entry, gCacheMutex must be held. value must be either NULL or must be
* included in the ref count of the object to which it points.
* On exit, value and status set to what is in the hash entry. Caller must
* eventually call removeRef on value.
* If hash entry is in progress, value will be set to gNoValue and status will
* be set to U_ZERO_ERROR.
*/
void _fetch(const UHashElement *element, const SharedObject *&value,
UErrorCode &status) const;
/**
* Determine if given hash entry is in progress.
* On entry, gCacheMutex must be held.
*/
UBool _inProgress(const UHashElement *element) const;
/**
* Determine if given hash entry is in progress.
* On entry, gCacheMutex must be held.
*/
UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const;
/**
* Determine if given hash entry is eligible for eviction.
* On entry, gCacheMutex must be held.
*/
UBool _isEvictable(const UHashElement *element) const;
};
U_NAMESPACE_END

View file

@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
if (U_FAILURE(status)) return *this;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");

View file

@ -231,7 +231,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
ucase_addPropertyStarts(&sa, &status);
break;
case UPROPS_SRC_BIDI:
ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status);
ubidi_addPropertyStarts(&sa, &status);
break;
default:
status = U_INTERNAL_PROGRAM_ERROR;
@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
return i.fSet;
}
namespace {
// Cache some sets for other services -------------------------------------- ***
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
// memory leak checker tools
#define _dbgct(me)
} // namespace
//----------------------------------------------------------------
// Constructors &c
//----------------------------------------------------------------
@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
if (U_FAILURE(status)) return;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
// Implementation: Pattern parsing
//----------------------------------------------------------------
namespace {
/**
* A small all-inline class to manage a UnicodeSet pointer. Add
* operator->() etc. as needed.
@ -424,6 +429,10 @@ public:
}
};
constexpr int32_t MAX_DEPTH = 100;
} // namespace
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
if (depth > MAX_DEPTH) {
ec = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Syntax characters: [ ] ^ - & { }
@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
}
switch (setMode) {
case 1:
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
break;
case 2:
chars.skipIgnored(opts);
@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// Property set implementation
//----------------------------------------------------------------
namespace {
static UBool numericValueFilter(UChar32 ch, void* context) {
return u_getNumericValue(ch) == *(double*)context;
}
@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
return uscript_hasScript(ch, *(UScriptCode*)context);
}
} // namespace
/**
* Generic filter-based scanning code for UCD property UnicodeSets.
*/
@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
}
}
namespace {
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
/* Note: we use ' ' in compiler code page */
int32_t j = 0;
@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
return TRUE;
}
} // namespace
//----------------------------------------------------------------
// Property set API
//----------------------------------------------------------------

View file

@ -38,8 +38,6 @@
U_NAMESPACE_USE
#define GET_BIDI_PROPS() ubidi_getSingleton()
/* general properties API functions ----------------------------------------- */
struct BinaryProperty;
@ -62,15 +60,15 @@ static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32
}
static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_isBidiControl(GET_BIDI_PROPS(), c);
return ubidi_isBidiControl(c);
}
static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_isMirrored(GET_BIDI_PROPS(), c);
return ubidi_isMirrored(c);
}
static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_isJoinControl(GET_BIDI_PROPS(), c);
return ubidi_isJoinControl(c);
}
#if UCONFIG_NO_NORMALIZATION
@ -329,11 +327,11 @@ static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*
}
static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c);
return (int32_t)ubidi_getPairedBracketType(c);
}
static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
return ubidi_getMaxValue(which);
}
#if UCONFIG_NO_NORMALIZATION
@ -351,11 +349,11 @@ static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UPrope
}
static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
return ubidi_getJoiningGroup(c);
}
static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
return ubidi_getJoiningType(c);
}
static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {

View file

@ -342,18 +342,16 @@ static void
_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
UChar digitBase,
UBool isLogical, UBool lastStrongWasAL) {
const UBiDiProps *bdp;
int32_t i;
UChar c;
bdp=ubidi_getSingleton();
digitBase-=0x30;
/* the iteration direction depends on the type of input */
if(isLogical) {
for(i=0; i<length; ++i) {
c=s[i];
switch(ubidi_getClass(bdp, c)) {
switch(ubidi_getClass(c)) {
case U_LEFT_TO_RIGHT: /* L */
case U_RIGHT_TO_LEFT: /* R */
lastStrongWasAL=FALSE;
@ -373,7 +371,7 @@ _shapeToArabicDigitsWithContext(UChar *s, int32_t length,
} else {
for(i=length; i>0; /* pre-decrement in the body */) {
c=s[--i];
switch(ubidi_getClass(bdp, c)) {
switch(ubidi_getClass(c)) {
case U_LEFT_TO_RIGHT: /* L */
case U_RIGHT_TO_LEFT: /* R */
lastStrongWasAL=FALSE;

View file

@ -347,10 +347,6 @@ usprep_getProfile(const char* path,
newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
if(newProfile->checkBiDi) {
newProfile->bdp = ubidi_getSingleton();
}
LocalMemory<UStringPrepKey> key;
LocalMemory<char> keyName;
LocalMemory<char> keyPath;
@ -735,7 +731,7 @@ usprep_prepare( const UStringPrepProfile* profile,
}
if(profile->checkBiDi) {
direction = ubidi_getClass(profile->bdp, ch);
direction = ubidi_getClass(ch);
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}

View file

@ -342,7 +342,7 @@ _strFromWCS( UChar *dest,
pSrcLimit = src + srcLength;
for(;;){
register int32_t nulLen = 0;
int32_t nulLen = 0;
/* find nulls in the string */
while(nulLen<srcLength && pSrc[nulLen++]!=0){

View file

@ -52,16 +52,8 @@ int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE
/* string casing ------------------------------------------------------------ */
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
static inline int32_t
inline int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s,
int32_t cpLength, uint32_t options, icu::Edits *edits) {
@ -134,7 +126,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
return destIndex;
}
static inline int32_t
inline int32_t
appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
if(destIndex<destCapacity) {
dest[destIndex]=c;
@ -144,28 +136,34 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
return destIndex+1;
}
static inline int32_t
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
if(length>0) {
if(edits!=NULL) {
edits->addUnchanged(length);
}
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if((destIndex+length)<=destCapacity) {
u_memcpy(dest+destIndex, s, length);
}
destIndex+=length;
int32_t
appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
if(edits!=NULL) {
edits->addUnchanged(length);
}
return destIndex;
if(options & U_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if((destIndex+length)<=destCapacity) {
u_memcpy(dest+destIndex, s, length);
}
return destIndex + length;
}
static UChar32 U_CALLCONV
inline int32_t
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
if (length <= 0) {
return destIndex;
}
return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
}
UChar32 U_CALLCONV
utf16_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UChar32 c;
@ -197,39 +195,205 @@ utf16_caseContextIterator(void *context, int8_t dir) {
return U_SENTINEL;
}
/*
* Case-maps [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account.
/**
* caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
* caseLocale < 0: Case-folds [srcStart..srcLimit[.
*/
static int32_t
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit,
icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex=srcStart;
int32_t destIndex=0;
while(srcIndex<srcLimit) {
int32_t cpStart;
csc->cpStart=cpStart=srcIndex;
int32_t toLower(int32_t caseLocale, uint32_t options,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
icu::Edits *edits, UErrorCode &errorCode) {
const int8_t *latinToLower;
if (caseLocale == UCASE_LOC_ROOT ||
(caseLocale >= 0 ?
!(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
(options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
latinToLower = LatinCase::TO_LOWER_NORMAL;
} else {
latinToLower = LatinCase::TO_LOWER_TR_LT;
}
const UTrie2 *trie = ucase_getTrie();
int32_t destIndex = 0;
int32_t prev = srcStart;
int32_t srcIndex = srcStart;
for (;;) {
// fast path for simple cases
UChar lead;
while (srcIndex < srcLimit) {
lead = src[srcIndex];
int32_t delta;
if (lead < LatinCase::LONG_S) {
int8_t d = latinToLower[lead];
if (d == LatinCase::EXC) { break; }
++srcIndex;
if (d == 0) { continue; }
delta = d;
} else if (lead >= 0xd800) {
break; // surrogate or higher
} else {
uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
if (UCASE_HAS_EXCEPTION(props)) { break; }
++srcIndex;
if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
continue;
}
}
lead += delta;
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, srcIndex - 1 - prev, options, edits);
if (destIndex >= 0) {
destIndex = appendUChar(dest, destIndex, destCapacity, lead);
if (edits != nullptr) {
edits->addReplace(1, 1);
}
}
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
prev = srcIndex;
}
if (srcIndex >= srcLimit) {
break;
}
// slow path
int32_t cpStart = srcIndex++;
UChar trail;
UChar32 c;
U16_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex;
if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
c = U16_GET_SUPPLEMENTARY(lead, trail);
++srcIndex;
} else {
c = lead;
}
const UChar *s;
c=map(c, utf16_caseContextIterator, csc, &s, caseLocale);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
if (caseLocale >= 0) {
csc->cpStart = cpStart;
csc->cpLimit = srcIndex;
c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
} else {
c = ucase_toFullFolding(c, &s, options);
}
if (c >= 0) {
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, cpStart - prev, options, edits);
if (destIndex >= 0) {
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
}
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
prev = srcIndex;
}
}
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, srcIndex - prev, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
return destIndex;
}
int32_t toUpper(int32_t caseLocale, uint32_t options,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc, int32_t srcLength,
icu::Edits *edits, UErrorCode &errorCode) {
const int8_t *latinToUpper;
if (caseLocale == UCASE_LOC_TURKISH) {
latinToUpper = LatinCase::TO_UPPER_TR;
} else {
latinToUpper = LatinCase::TO_UPPER_NORMAL;
}
const UTrie2 *trie = ucase_getTrie();
int32_t destIndex = 0;
int32_t prev = 0;
int32_t srcIndex = 0;
for (;;) {
// fast path for simple cases
UChar lead;
while (srcIndex < srcLength) {
lead = src[srcIndex];
int32_t delta;
if (lead < LatinCase::LONG_S) {
int8_t d = latinToUpper[lead];
if (d == LatinCase::EXC) { break; }
++srcIndex;
if (d == 0) { continue; }
delta = d;
} else if (lead >= 0xd800) {
break; // surrogate or higher
} else {
uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
if (UCASE_HAS_EXCEPTION(props)) { break; }
++srcIndex;
if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
continue;
}
}
lead += delta;
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, srcIndex - 1 - prev, options, edits);
if (destIndex >= 0) {
destIndex = appendUChar(dest, destIndex, destCapacity, lead);
if (edits != nullptr) {
edits->addReplace(1, 1);
}
}
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
prev = srcIndex;
}
if (srcIndex >= srcLength) {
break;
}
// slow path
int32_t cpStart;
csc->cpStart = cpStart = srcIndex++;
UChar trail;
UChar32 c;
if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
c = U16_GET_SUPPLEMENTARY(lead, trail);
++srcIndex;
} else {
c = lead;
}
csc->cpLimit = srcIndex;
const UChar *s;
c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
if (c >= 0) {
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, cpStart - prev, options, edits);
if (destIndex >= 0) {
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
}
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
prev = srcIndex;
}
}
destIndex = appendUnchanged(dest, destIndex, destCapacity,
src + prev, srcIndex - prev, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV
@ -344,11 +508,10 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
destIndex+=
_caseMap(
caseLocale, options, ucase_toFullLower,
toLower(
caseLocale, options,
dest+destIndex, destCapacity-destIndex,
src, &csc,
titleLimit, index,
src, &csc, titleLimit, index,
edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
@ -1013,8 +1176,8 @@ ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
int32_t destIndex = _caseMap(
caseLocale, options, ucase_toFullLower,
int32_t destIndex = toLower(
caseLocale, options,
dest, destCapacity,
src, &csc, 0, srcLength,
edits, errorCode);
@ -1035,10 +1198,10 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
destIndex = _caseMap(
caseLocale, options, ucase_toFullUpper,
destIndex = toUpper(
caseLocale, options,
dest, destCapacity,
src, &csc, 0, srcLength,
src, &csc, srcLength,
edits, errorCode);
}
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
@ -1050,23 +1213,11 @@ ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex = 0;
int32_t destIndex = 0;
while (srcIndex < srcLength) {
int32_t cpStart = srcIndex;
UChar32 c;
U16_NEXT(src, srcIndex, srcLength, c);
const UChar *s;
c = ucase_toFullFolding(c, &s, options);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
int32_t destIndex = toLower(
-1, options,
dest, destCapacity,
src, nullptr, 0, srcLength,
edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}

View file

@ -238,33 +238,45 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
int32_t i=*pi;
if(U8_IS_TRAIL(c) && i>start) {
uint8_t b1=s[--i];
if(0xc2<=b1 && b1<0xe0) {
*pi=i;
return ((b1-0xc0)<<6)|(c&0x3f);
if(U8_IS_LEAD(b1)) {
if(b1<0xe0) {
*pi=i;
return ((b1-0xc0)<<6)|(c&0x3f);
} else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
// Truncated 3- or 4-byte sequence.
*pi=i;
return errorValue(1, strict);
}
} else if(U8_IS_TRAIL(b1) && i>start) {
// Extract the value bits from the last trail byte.
c&=0x3f;
uint8_t b2=s[--i];
if(0xe0<=b2 && b2<0xf0) {
b2&=0xf;
if(strict!=-2) {
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
*pi=i;
c=(b2<<12)|((b1&0x3f)<<6)|c;
if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
return c;
} else {
// strict: forbid non-characters like U+fffe
return errorValue(2, strict);
if(0xe0<=b2 && b2<=0xf4) {
if(b2<0xf0) {
b2&=0xf;
if(strict!=-2) {
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
*pi=i;
c=(b2<<12)|((b1&0x3f)<<6)|c;
if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
return c;
} else {
// strict: forbid non-characters like U+fffe
return errorValue(2, strict);
}
}
} else {
// strict=-2 -> lenient: allow surrogates
b1-=0x80;
if((b2>0 || b1>=0x20)) {
*pi=i;
return (b2<<12)|(b1<<6)|c;
}
}
} else {
// strict=-2 -> lenient: allow surrogates
b1-=0x80;
if((b2>0 || b1>=0x20)) {
*pi=i;
return (b2<<12)|(b1<<6)|c;
}
} else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
// Truncated 4-byte sequence.
*pi=i;
return errorValue(2, strict);
}
} else if(U8_IS_TRAIL(b2) && i>start) {
uint8_t b3=s[--i];
@ -281,16 +293,7 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
}
}
}
} else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
// Truncated 4-byte sequence.
*pi=i;
return errorValue(2, strict);
}
} else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
(0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
// Truncated 3- or 4-byte sequence.
*pi=i;
return errorValue(1, strict);
}
}
return errorValue(0, strict);
@ -303,29 +306,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
uint8_t c=s[i];
if(U8_IS_TRAIL(c) && i>start) {
uint8_t b1=s[--i];
if(0xc2<=b1 && b1<0xe0) {
return i;
if(U8_IS_LEAD(b1)) {
if(b1<0xe0 ||
(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
return i;
}
} else if(U8_IS_TRAIL(b1) && i>start) {
uint8_t b2=s[--i];
if(0xe0<=b2 && b2<0xf0) {
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
if(0xe0<=b2 && b2<=0xf4) {
if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
return i;
}
} else if(U8_IS_TRAIL(b2) && i>start) {
uint8_t b3=s[--i];
if(0xf0<=b3 && b3<=0xf4) {
if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
return i;
}
if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
return i;
}
} else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
// Truncated 4-byte sequence.
return i;
}
} else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
(0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
// Truncated 3- or 4-byte sequence.
return i;
}
}
return orig_i;

View file

@ -556,7 +556,7 @@ struct UNewTrie {
* Index values at build-time are 32 bits wide for easier processing.
* Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()).
*/
int32_t index[UTRIE_MAX_INDEX_LENGTH];
int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT];
uint32_t *data;
uint32_t leadUnitValue;

View file

@ -1126,7 +1126,6 @@ isASCIIOkBiDi(const char *s, int32_t length) {
UBool
UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
const UBiDiProps *bdp=ubidi_getSingleton();
// [IDNA2008-Tables]
// 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
for(int32_t i=0; i<labelLength; ++i) {
@ -1148,7 +1147,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
}
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
for(;;) {
UJoiningType type=ubidi_getJoiningType(bdp, c);
UJoiningType type=ubidi_getJoiningType(c);
if(type==U_JT_TRANSPARENT) {
if(j==0) {
return FALSE;
@ -1166,7 +1165,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
return FALSE;
}
U16_NEXT_UNSAFE(label, j, c);
UJoiningType type=ubidi_getJoiningType(bdp, c);
UJoiningType type=ubidi_getJoiningType(c);
if(type==U_JT_TRANSPARENT) {
// just skip this character
} else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {

View file

@ -125,7 +125,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_UNDEFINED_KEYWORD",
"U_DEFAULT_KEYWORD_MISSING",
"U_DECIMAL_NUMBER_SYNTAX_ERROR",
"U_FORMAT_INEXACT_ERROR"
"U_FORMAT_INEXACT_ERROR",
"U_NUMBER_ARG_OUTOFBOUNDS_ERROR"
};
static const char * const

Some files were not shown because too many files have changed in this diff Show more