mirror of
https://github.com/nodejs/node.git
synced 2025-08-15 13:48:44 +02:00
deps: ICU 61.1 bump
- Update to released ICU 61.1, including: - CLDR 33 (many new languages and data improvements) - Many small API additions, improvements, and bug fixes - note: 'icu::' namespace is no longer used by default (Necessated https://github.com/nodejs/node/pull/18667 ) PR-URL: https://github.com/nodejs/node/pull/19621 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
parent
88773af540
commit
64211405da
180 changed files with 8963 additions and 2015 deletions
33
LICENSE
33
LICENSE
|
@ -122,8 +122,8 @@ The externally maintained libraries used by Node.js are:
|
|||
"""
|
||||
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
|
||||
|
||||
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in http://www.unicode.org/copyright.html
|
||||
Copyright © 1991-2018 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
|
@ -505,6 +505,35 @@ The externally maintained libraries used by Node.js are:
|
|||
# by ICANN or the IETF Trust on the database or the code. Any person
|
||||
# making a contribution to the database or code waives all rights to
|
||||
# future claims in that contribution or in the TZ Database.
|
||||
|
||||
6. Google double-conversion
|
||||
|
||||
Copyright 2006-2011, the V8 project authors. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
- libuv, located at deps/uv, is licensed as follows:
|
||||
|
|
4
configure
vendored
4
configure
vendored
|
@ -1125,8 +1125,8 @@ def glob_to_var(dir_base, dir_sub, patch_dir):
|
|||
def configure_intl(o):
|
||||
icus = [
|
||||
{
|
||||
'url': 'https://ssl.icu-project.org/files/icu4c/60.2/icu4c-60_2-src.zip',
|
||||
'md5': '115908818fd0324530b2acb1b029738d',
|
||||
'url': 'https://ssl.icu-project.org/files/icu4c/61.1/icu4c-61_1-src.zip',
|
||||
'md5': '780d8524c8a860ed8d8f6fe75cb7ce3f',
|
||||
},
|
||||
]
|
||||
def icu_download(path):
|
||||
|
|
33
deps/icu-small/LICENSE
vendored
33
deps/icu-small/LICENSE
vendored
|
@ -1,7 +1,7 @@
|
|||
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
|
||||
|
||||
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in http://www.unicode.org/copyright.html
|
||||
Copyright © 1991-2018 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
|
@ -383,3 +383,32 @@ Database section 7.
|
|||
# by ICANN or the IETF Trust on the database or the code. Any person
|
||||
# making a contribution to the database or code waives all rights to
|
||||
# future claims in that contribution or in the TZ Database.
|
||||
|
||||
6. Google double-conversion
|
||||
|
||||
Copyright 2006-2011, the V8 project authors. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
4
deps/icu-small/README-SMALL-ICU.txt
vendored
4
deps/icu-small/README-SMALL-ICU.txt
vendored
|
@ -1,8 +1,8 @@
|
|||
Small ICU sources - auto generated by shrink-icu-src.py
|
||||
|
||||
This directory contains the ICU subset used by --with-intl=small-icu (the default)
|
||||
It is a strict subset of ICU 60 source files with the following exception(s):
|
||||
* deps/icu-small/source/data/in/icudt60l.dat : Reduced-size data file
|
||||
It is a strict subset of ICU 61 source files with the following exception(s):
|
||||
* deps/icu-small/source/data/in/icudt61l.dat : Reduced-size data file
|
||||
|
||||
|
||||
To rebuild this directory, see ../../tools/icu/README.md
|
||||
|
|
4
deps/icu-small/source/common/bmpset.cpp
vendored
4
deps/icu-small/source/common/bmpset.cpp
vendored
|
@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
|
|||
++lead;
|
||||
}
|
||||
if(lead<limitLead) {
|
||||
bits=~((1<<lead)-1);
|
||||
bits=~(((unsigned)1<<lead)-1);
|
||||
if(limitLead<0x20) {
|
||||
bits&=(1<<limitLead)-1;
|
||||
bits&=((unsigned)1<<limitLead)-1;
|
||||
}
|
||||
for(trail=0; trail<64; ++trail) {
|
||||
table[trail]|=bits;
|
||||
|
|
65
deps/icu-small/source/common/brkeng.cpp
vendored
65
deps/icu-small/source/common/brkeng.cpp
vendored
|
@ -59,58 +59,47 @@ LanguageBreakFactory::~LanguageBreakFactory() {
|
|||
******************************************************************
|
||||
*/
|
||||
|
||||
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
|
||||
for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
|
||||
fHandled[i] = 0;
|
||||
}
|
||||
UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
|
||||
(void)status;
|
||||
}
|
||||
|
||||
UnhandledEngine::~UnhandledEngine() {
|
||||
for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
|
||||
if (fHandled[i] != 0) {
|
||||
delete fHandled[i];
|
||||
}
|
||||
}
|
||||
delete fHandled;
|
||||
fHandled = nullptr;
|
||||
}
|
||||
|
||||
UBool
|
||||
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
|
||||
return (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)
|
||||
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
|
||||
UnhandledEngine::handles(UChar32 c) const {
|
||||
return fHandled && fHandled->contains(c);
|
||||
}
|
||||
|
||||
int32_t
|
||||
UnhandledEngine::findBreaks( UText *text,
|
||||
int32_t /* startPos */,
|
||||
int32_t endPos,
|
||||
int32_t breakType,
|
||||
UVector32 &/*foundBreaks*/ ) const {
|
||||
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
|
||||
UChar32 c = utext_current32(text);
|
||||
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
||||
c = utext_current32(text);
|
||||
}
|
||||
UChar32 c = utext_current32(text);
|
||||
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
||||
c = utext_current32(text);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
|
||||
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
|
||||
if (fHandled[breakType] == 0) {
|
||||
fHandled[breakType] = new UnicodeSet();
|
||||
if (fHandled[breakType] == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!fHandled[breakType]->contains(c)) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Apply the entire script of the character.
|
||||
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
|
||||
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
|
||||
UnhandledEngine::handleCharacter(UChar32 c) {
|
||||
if (fHandled == nullptr) {
|
||||
fHandled = new UnicodeSet();
|
||||
if (fHandled == nullptr) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!fHandled->contains(c)) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Apply the entire script of the character.
|
||||
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
|
||||
fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -138,7 +127,7 @@ U_NAMESPACE_BEGIN
|
|||
static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
|
||||
|
||||
const LanguageBreakEngine *
|
||||
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
||||
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
|
||||
const LanguageBreakEngine *lbe = NULL;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
|
@ -156,14 +145,14 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
|||
int32_t i = fEngines->size();
|
||||
while (--i >= 0) {
|
||||
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
|
||||
if (lbe != NULL && lbe->handles(c, breakType)) {
|
||||
if (lbe != NULL && lbe->handles(c)) {
|
||||
return lbe;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We didn't find an engine. Create one.
|
||||
lbe = loadEngineFor(c, breakType);
|
||||
lbe = loadEngineFor(c);
|
||||
if (lbe != NULL) {
|
||||
fEngines->push((void *)lbe, status);
|
||||
}
|
||||
|
@ -171,11 +160,11 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
|||
}
|
||||
|
||||
const LanguageBreakEngine *
|
||||
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
|
||||
ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UScriptCode code = uscript_getScript(c, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
|
||||
DictionaryMatcher *m = loadDictionaryMatcherFor(code);
|
||||
if (m != NULL) {
|
||||
const LanguageBreakEngine *engine = NULL;
|
||||
switch(code) {
|
||||
|
@ -236,7 +225,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
|
|||
}
|
||||
|
||||
DictionaryMatcher *
|
||||
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
|
||||
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// open root from brkitr tree.
|
||||
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
|
||||
|
|
33
deps/icu-small/source/common/brkeng.h
vendored
33
deps/icu-small/source/common/brkeng.h
vendored
|
@ -54,11 +54,10 @@ class LanguageBreakEngine : public UMemory {
|
|||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
|
||||
virtual UBool handles(UChar32 c) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
|
@ -68,14 +67,12 @@ class LanguageBreakEngine : public UMemory {
|
|||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks A Vector of int32_t to receive the breaks.
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
int32_t breakType,
|
||||
UVector32 &foundBreaks ) const = 0;
|
||||
|
||||
};
|
||||
|
@ -125,11 +122,9 @@ class LanguageBreakFactory : public UMemory {
|
|||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
|
||||
|
||||
};
|
||||
|
||||
|
@ -152,11 +147,11 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||
private:
|
||||
|
||||
/**
|
||||
* The sets of characters handled, for each break type
|
||||
* The sets of characters handled.
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UnicodeSet *fHandled[4];
|
||||
UnicodeSet *fHandled;
|
||||
|
||||
public:
|
||||
|
||||
|
@ -176,11 +171,10 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c, int32_t breakType) const;
|
||||
virtual UBool handles(UChar32 c) const;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
|
@ -190,23 +184,20 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
int32_t breakType,
|
||||
UVector32 &foundBreaks ) const;
|
||||
|
||||
/**
|
||||
* <p>Tell the engine to handle a particular character and break type.</p>
|
||||
*
|
||||
* @param c A character which the engine should handle
|
||||
* @param breakType The type of text break for which the engine should handle c
|
||||
*/
|
||||
virtual void handleCharacter(UChar32 c, int32_t breakType);
|
||||
virtual void handleCharacter(UChar32 c);
|
||||
|
||||
};
|
||||
|
||||
|
@ -250,11 +241,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
|
|||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
|
||||
|
||||
protected:
|
||||
/**
|
||||
|
@ -263,21 +252,17 @@ protected:
|
|||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param breakType The kind of text break for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
|
||||
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
|
||||
|
||||
/**
|
||||
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
|
||||
* @param script An ISO 15924 script code that identifies the dictionary to be
|
||||
* created.
|
||||
* @param breakType The kind of text break for which a dictionary is
|
||||
* sought.
|
||||
* @return A DictionaryMatcher with the desired characteristics, or NULL.
|
||||
*/
|
||||
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
|
||||
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
13
deps/icu-small/source/common/brkiter.cpp
vendored
13
deps/icu-small/source/common/brkiter.cpp
vendored
|
@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN
|
|||
// -------------------------------------
|
||||
|
||||
BreakIterator*
|
||||
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
|
||||
BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
|
||||
{
|
||||
char fnbuff[256];
|
||||
char ext[4]={'\0'};
|
||||
|
@ -121,7 +121,6 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
|
|||
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
|
||||
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
|
||||
actualLocale.data());
|
||||
result->setBreakType(kind);
|
||||
}
|
||||
|
||||
ures_close(b);
|
||||
|
@ -413,10 +412,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||
BreakIterator *result = NULL;
|
||||
switch (kind) {
|
||||
case UBRK_CHARACTER:
|
||||
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
|
||||
result = BreakIterator::buildInstance(loc, "grapheme", status);
|
||||
break;
|
||||
case UBRK_WORD:
|
||||
result = BreakIterator::buildInstance(loc, "word", kind, status);
|
||||
result = BreakIterator::buildInstance(loc, "word", status);
|
||||
break;
|
||||
case UBRK_LINE:
|
||||
uprv_strcpy(lbType, "line");
|
||||
|
@ -429,10 +428,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||
uprv_strcat(lbType, lbKeyValue);
|
||||
}
|
||||
}
|
||||
result = BreakIterator::buildInstance(loc, lbType, kind, status);
|
||||
result = BreakIterator::buildInstance(loc, lbType, status);
|
||||
break;
|
||||
case UBRK_SENTENCE:
|
||||
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
|
||||
result = BreakIterator::buildInstance(loc, "sentence", status);
|
||||
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
{
|
||||
char ssKeyValue[kKeyValueLenMax] = {0};
|
||||
|
@ -449,7 +448,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||
#endif
|
||||
break;
|
||||
case UBRK_TITLE:
|
||||
result = BreakIterator::buildInstance(loc, "title", kind, status);
|
||||
result = BreakIterator::buildInstance(loc, "title", status);
|
||||
break;
|
||||
default:
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
|
28
deps/icu-small/source/common/bytesinkutil.cpp
vendored
28
deps/icu-small/source/common/bytesinkutil.cpp
vendored
|
@ -92,20 +92,16 @@ ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
|
|||
sink.Append(s8, 2);
|
||||
}
|
||||
|
||||
UBool
|
||||
ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
|
||||
ByteSink &sink, uint32_t options, Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
if (length > 0) {
|
||||
if (edits != nullptr) {
|
||||
edits->addUnchanged(length);
|
||||
}
|
||||
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
|
||||
sink.Append(reinterpret_cast<const char *>(s), length);
|
||||
}
|
||||
void
|
||||
ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
|
||||
ByteSink &sink, uint32_t options, Edits *edits) {
|
||||
U_ASSERT(length > 0);
|
||||
if (edits != nullptr) {
|
||||
edits->addUnchanged(length);
|
||||
}
|
||||
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
|
||||
sink.Append(reinterpret_cast<const char *>(s), length);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool
|
||||
|
@ -117,7 +113,11 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
|
|||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
|
||||
int32_t length = (int32_t)(limit - s);
|
||||
if (length > 0) {
|
||||
appendNonEmptyUnchanged(s, length, sink, options, edits);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
10
deps/icu-small/source/common/bytesinkutil.h
vendored
10
deps/icu-small/source/common/bytesinkutil.h
vendored
|
@ -43,11 +43,19 @@ public:
|
|||
|
||||
static UBool appendUnchanged(const uint8_t *s, int32_t length,
|
||||
ByteSink &sink, uint32_t options, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return FALSE; }
|
||||
if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
|
||||
ByteSink &sink, uint32_t options, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
|
||||
ByteSink &sink, uint32_t options, Edits *edits);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
24
deps/icu-small/source/common/cmemory.cpp
vendored
24
deps/icu-small/source/common/cmemory.cpp
vendored
|
@ -41,30 +41,6 @@ static int n=0;
|
|||
static long b=0;
|
||||
#endif
|
||||
|
||||
#if U_DEBUG
|
||||
|
||||
static char gValidMemorySink = 0;
|
||||
|
||||
U_CAPI void uprv_checkValidMemory(const void *p, size_t n) {
|
||||
/*
|
||||
* Access the memory to ensure that it's all valid.
|
||||
* Load and save a computed value to try to ensure that the compiler
|
||||
* does not throw away the whole loop.
|
||||
* A thread analyzer might complain about un-mutexed access to gValidMemorySink
|
||||
* which is true but harmless because no one ever uses the value in gValidMemorySink.
|
||||
*/
|
||||
const char *s = (const char *)p;
|
||||
char c = gValidMemorySink;
|
||||
size_t i;
|
||||
U_ASSERT(p != NULL);
|
||||
for(i = 0; i < n; ++i) {
|
||||
c ^= s[i];
|
||||
}
|
||||
gValidMemorySink = c;
|
||||
}
|
||||
|
||||
#endif /* U_DEBUG */
|
||||
|
||||
U_CAPI void * U_EXPORT2
|
||||
uprv_malloc(size_t s) {
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
|
|
21
deps/icu-small/source/common/cmemory.h
vendored
21
deps/icu-small/source/common/cmemory.h
vendored
|
@ -36,31 +36,10 @@
|
|||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#if U_DEBUG
|
||||
|
||||
/*
|
||||
* The C++ standard requires that the source pointer for memcpy() & memmove()
|
||||
* is valid, not NULL, and not at the end of an allocated memory block.
|
||||
* In debug mode, we read one byte from the source point to verify that it's
|
||||
* a valid, readable pointer.
|
||||
*/
|
||||
|
||||
U_CAPI void uprv_checkValidMemory(const void *p, size_t n);
|
||||
|
||||
#define uprv_memcpy(dst, src, size) ( \
|
||||
uprv_checkValidMemory(src, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size))
|
||||
#define uprv_memmove(dst, src, size) ( \
|
||||
uprv_checkValidMemory(src, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size))
|
||||
|
||||
#else
|
||||
|
||||
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
|
||||
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
|
||||
|
||||
#endif /* U_DEBUG */
|
||||
|
||||
/**
|
||||
* \def UPRV_LENGTHOF
|
||||
* Convenience macro to determine the length of a fixed array at compile-time.
|
||||
|
|
18
deps/icu-small/source/common/cstring.h
vendored
18
deps/icu-small/source/common/cstring.h
vendored
|
@ -40,28 +40,10 @@
|
|||
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
|
||||
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
|
||||
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
|
||||
|
||||
#if U_DEBUG
|
||||
|
||||
#define uprv_strncpy(dst, src, size) ( \
|
||||
uprv_checkValidMemory(src, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size))
|
||||
#define uprv_strncmp(s1, s2, n) ( \
|
||||
uprv_checkValidMemory(s1, 1), \
|
||||
uprv_checkValidMemory(s2, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n))
|
||||
#define uprv_strncat(dst, src, n) ( \
|
||||
uprv_checkValidMemory(src, 1), \
|
||||
U_STANDARD_CPP_NAMESPACE strncat(dst, src, n))
|
||||
|
||||
#else
|
||||
|
||||
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
|
||||
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
|
||||
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
|
||||
|
||||
#endif /* U_DEBUG */
|
||||
|
||||
/**
|
||||
* Is c an ASCII-repertoire letter a-z or A-Z?
|
||||
* Note: The implementation is specific to whether ICU is compiled for
|
||||
|
|
29
deps/icu-small/source/common/dictbe.cpp
vendored
29
deps/icu-small/source/common/dictbe.cpp
vendored
|
@ -29,24 +29,21 @@ U_NAMESPACE_BEGIN
|
|||
******************************************************************
|
||||
*/
|
||||
|
||||
DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
|
||||
fTypes = breakTypes;
|
||||
DictionaryBreakEngine::DictionaryBreakEngine() {
|
||||
}
|
||||
|
||||
DictionaryBreakEngine::~DictionaryBreakEngine() {
|
||||
}
|
||||
|
||||
UBool
|
||||
DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
|
||||
return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)
|
||||
&& fSet.contains(c));
|
||||
DictionaryBreakEngine::handles(UChar32 c) const {
|
||||
return fSet.contains(c);
|
||||
}
|
||||
|
||||
int32_t
|
||||
DictionaryBreakEngine::findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
int32_t breakType,
|
||||
UVector32 &foundBreaks ) const {
|
||||
(void)startPos; // TODO: remove this param?
|
||||
int32_t result = 0;
|
||||
|
@ -66,10 +63,8 @@ DictionaryBreakEngine::findBreaks( UText *text,
|
|||
}
|
||||
rangeStart = start;
|
||||
rangeEnd = current;
|
||||
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
|
||||
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
|
||||
utext_setNativeIndex(text, current);
|
||||
}
|
||||
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
|
||||
utext_setNativeIndex(text, current);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -194,7 +189,7 @@ static const int32_t THAI_MIN_WORD = 2;
|
|||
static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
|
||||
|
||||
ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
|
||||
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
|
||||
: DictionaryBreakEngine(),
|
||||
fDictionary(adoptDictionary)
|
||||
{
|
||||
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
|
||||
|
@ -436,7 +431,7 @@ static const int32_t LAO_MIN_WORD = 2;
|
|||
static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2;
|
||||
|
||||
LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
|
||||
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
|
||||
: DictionaryBreakEngine(),
|
||||
fDictionary(adoptDictionary)
|
||||
{
|
||||
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
|
||||
|
@ -632,7 +627,7 @@ static const int32_t BURMESE_MIN_WORD = 2;
|
|||
static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2;
|
||||
|
||||
BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
|
||||
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
|
||||
: DictionaryBreakEngine(),
|
||||
fDictionary(adoptDictionary)
|
||||
{
|
||||
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
|
||||
|
@ -825,7 +820,7 @@ static const int32_t KHMER_MIN_WORD = 2;
|
|||
static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
|
||||
|
||||
KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
|
||||
: DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
|
||||
: DictionaryBreakEngine(),
|
||||
fDictionary(adoptDictionary)
|
||||
{
|
||||
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
|
||||
|
@ -1047,7 +1042,7 @@ foundBest:
|
|||
*/
|
||||
static const uint32_t kuint32max = 0xFFFFFFFF;
|
||||
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
|
||||
: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
|
||||
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
|
||||
// Korean dictionary only includes Hangul syllables
|
||||
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
|
||||
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
|
||||
|
@ -1324,8 +1319,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
|
|||
}
|
||||
if (katakanaRunLength < kMaxKatakanaGroupLength) {
|
||||
uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
|
||||
if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) {
|
||||
bestSnlp.setElementAt(newSnlp, j);
|
||||
if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
|
||||
bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
|
||||
prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i;
|
||||
}
|
||||
}
|
||||
|
|
31
deps/icu-small/source/common/dictbe.h
vendored
31
deps/icu-small/source/common/dictbe.h
vendored
|
@ -42,27 +42,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
|
||||
UnicodeSet fSet;
|
||||
|
||||
/**
|
||||
* The set of break types handled by this engine
|
||||
* @internal
|
||||
*/
|
||||
|
||||
uint32_t fTypes;
|
||||
|
||||
/**
|
||||
* <p>Default constructor.</p>
|
||||
*
|
||||
*/
|
||||
DictionaryBreakEngine();
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* <p>Constructor setting the break types handled.</p>
|
||||
*
|
||||
* @param breakTypes A bitmap of types handled by the engine.
|
||||
* <p>Constructor </p>
|
||||
*/
|
||||
DictionaryBreakEngine( uint32_t breakTypes );
|
||||
DictionaryBreakEngine();
|
||||
|
||||
/**
|
||||
* <p>Virtual destructor.</p>
|
||||
|
@ -74,11 +59,10 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param breakType The type of text break which the caller wants to determine
|
||||
* @return TRUE if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles( UChar32 c, int32_t breakType ) const;
|
||||
virtual UBool handles(UChar32 c) const;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
|
@ -88,14 +72,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
* that starts from the first character in the range.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
* @param endPos The end of the run within the supplied text.
|
||||
* @param breakType The type of break desired, or -1.
|
||||
* @param foundBreaks vector of int32_t to receive the break positions
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
int32_t breakType,
|
||||
UVector32 &foundBreaks ) const;
|
||||
|
||||
protected:
|
||||
|
@ -107,13 +89,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
*/
|
||||
virtual void setCharacters( const UnicodeSet &set );
|
||||
|
||||
/**
|
||||
* <p>Set the break types handled by this engine.</p>
|
||||
*
|
||||
* @param breakTypes A bitmap of types handled by the engine.
|
||||
*/
|
||||
// virtual void setBreakTypes( uint32_t breakTypes );
|
||||
|
||||
/**
|
||||
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
|
||||
*
|
||||
|
|
5
deps/icu-small/source/common/filteredbrk.cpp
vendored
5
deps/icu-small/source/common/filteredbrk.cpp
vendored
|
@ -693,6 +693,11 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
|
|||
return (U_SUCCESS(status))? ret.orphan(): NULL;
|
||||
}
|
||||
|
||||
FilteredBreakIteratorBuilder *
|
||||
FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {
|
||||
return createEmptyInstance(status);
|
||||
}
|
||||
|
||||
FilteredBreakIteratorBuilder *
|
||||
FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
|
||||
if(U_FAILURE(status)) return NULL;
|
||||
|
|
195
deps/icu-small/source/common/rbbi.cpp
vendored
195
deps/icu-small/source/common/rbbi.cpp
vendored
|
@ -64,7 +64,9 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
|
|||
* Constructs a RuleBasedBreakIterator that uses the already-created
|
||||
* tables object that is passed in as a parameter.
|
||||
*/
|
||||
RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) {
|
||||
RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
|
||||
: fSCharIter(UnicodeString())
|
||||
{
|
||||
init(status);
|
||||
fData = new RBBIDataWrapper(data, status); // status checked in constructor
|
||||
if (U_FAILURE(status)) {return;}
|
||||
|
@ -80,7 +82,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
|
|||
//
|
||||
RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
|
||||
uint32_t ruleLength,
|
||||
UErrorCode &status) {
|
||||
UErrorCode &status)
|
||||
: fSCharIter(UnicodeString())
|
||||
{
|
||||
init(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
|
@ -110,6 +114,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
|
|||
//
|
||||
//-------------------------------------------------------------------------------
|
||||
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
|
||||
: fSCharIter(UnicodeString())
|
||||
{
|
||||
init(status);
|
||||
fData = new RBBIDataWrapper(udm, status); // status checked in constructor
|
||||
|
@ -130,6 +135,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &sta
|
|||
RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
|
||||
UParseError &parseError,
|
||||
UErrorCode &status)
|
||||
: fSCharIter(UnicodeString())
|
||||
{
|
||||
init(status);
|
||||
if (U_FAILURE(status)) {return;}
|
||||
|
@ -152,7 +158,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
|
|||
// Used when creating a RuleBasedBreakIterator from a set
|
||||
// of rules.
|
||||
//-------------------------------------------------------------------------------
|
||||
RuleBasedBreakIterator::RuleBasedBreakIterator() {
|
||||
RuleBasedBreakIterator::RuleBasedBreakIterator()
|
||||
: fSCharIter(UnicodeString())
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
init(status);
|
||||
}
|
||||
|
@ -165,7 +173,8 @@ RuleBasedBreakIterator::RuleBasedBreakIterator() {
|
|||
//
|
||||
//-------------------------------------------------------------------------------
|
||||
RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
|
||||
: BreakIterator(other)
|
||||
: BreakIterator(other),
|
||||
fSCharIter(UnicodeString())
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
this->init(status);
|
||||
|
@ -177,17 +186,13 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& oth
|
|||
* Destructor
|
||||
*/
|
||||
RuleBasedBreakIterator::~RuleBasedBreakIterator() {
|
||||
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
|
||||
if (fCharIter != &fSCharIter) {
|
||||
// fCharIter was adopted from the outside.
|
||||
delete fCharIter;
|
||||
}
|
||||
fCharIter = NULL;
|
||||
delete fSCharIter;
|
||||
fSCharIter = NULL;
|
||||
delete fDCharIter;
|
||||
fDCharIter = NULL;
|
||||
|
||||
utext_close(fText);
|
||||
utext_close(&fText);
|
||||
|
||||
if (fData != NULL) {
|
||||
fData->removeReference();
|
||||
|
@ -217,26 +222,29 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
|
|||
}
|
||||
BreakIterator::operator=(that);
|
||||
|
||||
fBreakType = that.fBreakType;
|
||||
if (fLanguageBreakEngines != NULL) {
|
||||
delete fLanguageBreakEngines;
|
||||
fLanguageBreakEngines = NULL; // Just rebuild for now
|
||||
}
|
||||
// TODO: clone fLanguageBreakEngines from "that"
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fText = utext_clone(fText, that.fText, FALSE, TRUE, &status);
|
||||
utext_clone(&fText, &that.fText, FALSE, TRUE, &status);
|
||||
|
||||
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
|
||||
if (fCharIter != &fSCharIter) {
|
||||
delete fCharIter;
|
||||
}
|
||||
fCharIter = NULL;
|
||||
fCharIter = &fSCharIter;
|
||||
|
||||
if (that.fCharIter != NULL ) {
|
||||
if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
|
||||
// This is a little bit tricky - it will intially appear that
|
||||
// this->fCharIter is adopted, even if that->fCharIter was
|
||||
// not adopted. That's ok.
|
||||
fCharIter = that.fCharIter->clone();
|
||||
}
|
||||
fSCharIter = that.fSCharIter;
|
||||
if (fCharIter == NULL) {
|
||||
fCharIter = &fSCharIter;
|
||||
}
|
||||
|
||||
if (fData != NULL) {
|
||||
fData->removeReference();
|
||||
|
@ -269,33 +277,30 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
|
|||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
void RuleBasedBreakIterator::init(UErrorCode &status) {
|
||||
fText = NULL;
|
||||
fCharIter = NULL;
|
||||
fSCharIter = NULL;
|
||||
fDCharIter = NULL;
|
||||
fData = NULL;
|
||||
fPosition = 0;
|
||||
fRuleStatusIndex = 0;
|
||||
fDone = false;
|
||||
fDictionaryCharCount = 0;
|
||||
fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable
|
||||
// dictionary behavior for Break Iterators that are
|
||||
// built from rules. Even better would be the ability to
|
||||
// declare the type in the rules.
|
||||
|
||||
fLanguageBreakEngines = NULL;
|
||||
fUnhandledBreakEngine = NULL;
|
||||
fBreakCache = NULL;
|
||||
fDictionaryCache = NULL;
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
// Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
|
||||
// fText = UTEXT_INITIALIZER;
|
||||
static const UText initializedUText = UTEXT_INITIALIZER;
|
||||
uprv_memcpy(&fText, &initializedUText, sizeof(UText));
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
fText = utext_openUChars(NULL, NULL, 0, &status);
|
||||
utext_openUChars(&fText, NULL, 0, &status);
|
||||
fDictionaryCache = new DictionaryCache(this, status);
|
||||
fBreakCache = new BreakCache(this, status);
|
||||
if (U_SUCCESS(status) && (fText == NULL || fDictionaryCache == NULL || fBreakCache == NULL)) {
|
||||
if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
|
||||
|
@ -344,7 +349,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
|
|||
|
||||
const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
|
||||
|
||||
if (!utext_equals(fText, that2.fText)) {
|
||||
if (!utext_equals(&fText, &that2.fText)) {
|
||||
// The two break iterators are operating on different text,
|
||||
// or have a different iteration position.
|
||||
// Note that fText's position is always the same as the break iterator's position.
|
||||
|
@ -385,7 +390,7 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
|
|||
}
|
||||
fBreakCache->reset();
|
||||
fDictionaryCache->reset();
|
||||
fText = utext_clone(fText, ut, FALSE, TRUE, &status);
|
||||
utext_clone(&fText, ut, FALSE, TRUE, &status);
|
||||
|
||||
// Set up a dummy CharacterIterator to be returned if anyone
|
||||
// calls getText(). With input from UText, there is no reasonable
|
||||
|
@ -393,27 +398,20 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
|
|||
// Return one over an empty string instead - this is the closest
|
||||
// we can come to signaling a failure.
|
||||
// (GetText() is obsolete, this failure is sort of OK)
|
||||
if (fDCharIter == NULL) {
|
||||
static const UChar c = 0;
|
||||
fDCharIter = new UCharCharacterIterator(&c, 0);
|
||||
if (fDCharIter == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
fSCharIter.setText(UnicodeString());
|
||||
|
||||
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
|
||||
if (fCharIter != &fSCharIter) {
|
||||
// existing fCharIter was adopted from the outside. Delete it now.
|
||||
delete fCharIter;
|
||||
}
|
||||
fCharIter = fDCharIter;
|
||||
fCharIter = &fSCharIter;
|
||||
|
||||
this->first();
|
||||
}
|
||||
|
||||
|
||||
UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
|
||||
UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status);
|
||||
UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -439,7 +437,7 @@ void
|
|||
RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
|
||||
// If we are holding a CharacterIterator adopted from a
|
||||
// previous call to this function, delete it now.
|
||||
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
|
||||
if (fCharIter != &fSCharIter) {
|
||||
delete fCharIter;
|
||||
}
|
||||
|
||||
|
@ -450,9 +448,9 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
|
|||
if (newText==NULL || newText->startIndex() != 0) {
|
||||
// startIndex !=0 wants to be an error, but there's no way to report it.
|
||||
// Make the iterator text be an empty string.
|
||||
fText = utext_openUChars(fText, NULL, 0, &status);
|
||||
utext_openUChars(&fText, NULL, 0, &status);
|
||||
} else {
|
||||
fText = utext_openCharacterIterator(fText, newText, &status);
|
||||
utext_openCharacterIterator(&fText, newText, &status);
|
||||
}
|
||||
this->first();
|
||||
}
|
||||
|
@ -467,23 +465,19 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
fBreakCache->reset();
|
||||
fDictionaryCache->reset();
|
||||
fText = utext_openConstUnicodeString(fText, &newText, &status);
|
||||
utext_openConstUnicodeString(&fText, &newText, &status);
|
||||
|
||||
// Set up a character iterator on the string.
|
||||
// Needed in case someone calls getText().
|
||||
// Can not, unfortunately, do this lazily on the (probably never)
|
||||
// call to getText(), because getText is const.
|
||||
if (fSCharIter == NULL) {
|
||||
fSCharIter = new StringCharacterIterator(newText);
|
||||
} else {
|
||||
fSCharIter->setText(newText);
|
||||
}
|
||||
fSCharIter.setText(newText);
|
||||
|
||||
if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
|
||||
if (fCharIter != &fSCharIter) {
|
||||
// old fCharIter was adopted from the outside. Delete it.
|
||||
delete fCharIter;
|
||||
}
|
||||
fCharIter = fSCharIter;
|
||||
fCharIter = &fSCharIter;
|
||||
|
||||
this->first();
|
||||
}
|
||||
|
@ -503,14 +497,14 @@ RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, U
|
|||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return *this;
|
||||
}
|
||||
int64_t pos = utext_getNativeIndex(fText);
|
||||
int64_t pos = utext_getNativeIndex(&fText);
|
||||
// Shallow read-only clone of the new UText into the existing input UText
|
||||
fText = utext_clone(fText, input, FALSE, TRUE, &status);
|
||||
utext_clone(&fText, input, FALSE, TRUE, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
utext_setNativeIndex(fText, pos);
|
||||
if (utext_getNativeIndex(fText) != pos) {
|
||||
utext_setNativeIndex(&fText, pos);
|
||||
if (utext_getNativeIndex(&fText) != pos) {
|
||||
// Sanity check. The new input utext is supposed to have the exact same
|
||||
// contents as the old. If we can't set to the same position, it doesn't.
|
||||
// The contents underlying the old utext might be invalid at this point,
|
||||
|
@ -540,7 +534,7 @@ int32_t RuleBasedBreakIterator::first(void) {
|
|||
* @return The text's past-the-end offset.
|
||||
*/
|
||||
int32_t RuleBasedBreakIterator::last(void) {
|
||||
int32_t endPos = (int32_t)utext_nativeLength(fText);
|
||||
int32_t endPos = (int32_t)utext_nativeLength(&fText);
|
||||
UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
|
||||
(void)endShouldBeBoundary;
|
||||
U_ASSERT(endShouldBeBoundary);
|
||||
|
@ -611,8 +605,8 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
|
|||
|
||||
// Move requested offset to a code point start. It might be on a trail surrogate,
|
||||
// or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
|
||||
utext_setNativeIndex(fText, startPos);
|
||||
startPos = (int32_t)utext_getNativeIndex(fText);
|
||||
utext_setNativeIndex(&fText, startPos);
|
||||
startPos = (int32_t)utext_getNativeIndex(&fText);
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fBreakCache->following(startPos, status);
|
||||
|
@ -626,15 +620,15 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
|
|||
* @return The position of the last boundary before the starting position.
|
||||
*/
|
||||
int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
|
||||
if (fText == NULL || offset > utext_nativeLength(fText)) {
|
||||
if (offset > utext_nativeLength(&fText)) {
|
||||
return last();
|
||||
}
|
||||
|
||||
// Move requested offset to a code point start. It might be on a trail surrogate,
|
||||
// or on a trail byte if the input is UTF-8.
|
||||
|
||||
utext_setNativeIndex(fText, offset);
|
||||
int32_t adjustedOffset = utext_getNativeIndex(fText);
|
||||
utext_setNativeIndex(&fText, offset);
|
||||
int32_t adjustedOffset = utext_getNativeIndex(&fText);
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fBreakCache->preceding(adjustedOffset, status);
|
||||
|
@ -660,8 +654,8 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
|
|||
// Note that isBoundary() is always be false for offsets that are not on code point boundaries.
|
||||
// But we still need the side effect of leaving iteration at the following boundary.
|
||||
|
||||
utext_setNativeIndex(fText, offset);
|
||||
int32_t adjustedOffset = utext_getNativeIndex(fText);
|
||||
utext_setNativeIndex(&fText, offset);
|
||||
int32_t adjustedOffset = utext_getNativeIndex(&fText);
|
||||
|
||||
bool result = false;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
@ -669,7 +663,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
|
|||
result = (fBreakCache->current() == offset);
|
||||
}
|
||||
|
||||
if (result && adjustedOffset < offset && utext_char32At(fText, offset) == U_SENTINEL) {
|
||||
if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) {
|
||||
// Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
|
||||
// but the iteration position remains set to the end of the text, which is a boundary.
|
||||
return FALSE;
|
||||
|
@ -789,9 +783,9 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
|
||||
// if we're already at the end of the text, return DONE.
|
||||
initialPosition = fPosition;
|
||||
UTEXT_SETNATIVEINDEX(fText, initialPosition);
|
||||
UTEXT_SETNATIVEINDEX(&fText, initialPosition);
|
||||
result = initialPosition;
|
||||
c = UTEXT_NEXT32(fText);
|
||||
c = UTEXT_NEXT32(&fText);
|
||||
if (c==U_SENTINEL) {
|
||||
fDone = TRUE;
|
||||
return UBRK_DONE;
|
||||
|
@ -854,7 +848,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
|
||||
#ifdef RBBI_DEBUG
|
||||
if (gTrace) {
|
||||
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText));
|
||||
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText));
|
||||
if (0x20<=c && c<0x7f) {
|
||||
RBBIDebugPrintf("\"%c\" ", c);
|
||||
} else {
|
||||
|
@ -867,9 +861,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
// State Transition - move machine to its next state
|
||||
//
|
||||
|
||||
// Note: fNextState is defined as uint16_t[2], but we are casting
|
||||
// a generated RBBI table to RBBIStateTableRow and some tables
|
||||
// actually have more than 2 categories.
|
||||
// fNextState is a variable-length array.
|
||||
U_ASSERT(category<fData->fHeader->fCatCount);
|
||||
state = row->fNextState[category]; /*Not accessing beyond memory*/
|
||||
row = (RBBIStateTableRow *)
|
||||
|
@ -880,7 +872,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
if (row->fAccepting == -1) {
|
||||
// Match found, common case.
|
||||
if (mode != RBBI_START) {
|
||||
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
|
||||
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
|
||||
}
|
||||
fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values.
|
||||
}
|
||||
|
@ -898,7 +890,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
int16_t rule = row->fLookAhead;
|
||||
if (rule != 0) {
|
||||
// At the position of a '/' in a look-ahead match. Record it.
|
||||
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
|
||||
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
|
||||
lookAheadMatches.setPosition(rule, pos);
|
||||
}
|
||||
|
||||
|
@ -914,7 +906,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
// the input position. The next iteration will be processing the
|
||||
// first real input character.
|
||||
if (mode == RBBI_RUN) {
|
||||
c = UTEXT_NEXT32(fText);
|
||||
c = UTEXT_NEXT32(&fText);
|
||||
} else {
|
||||
if (mode == RBBI_START) {
|
||||
mode = RBBI_RUN;
|
||||
|
@ -928,9 +920,9 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
// (This really indicates a defect in the break rules. They should always match
|
||||
// at least one character.)
|
||||
if (result == initialPosition) {
|
||||
utext_setNativeIndex(fText, initialPosition);
|
||||
utext_next32(fText);
|
||||
result = (int32_t)utext_getNativeIndex(fText);
|
||||
utext_setNativeIndex(&fText, initialPosition);
|
||||
utext_next32(&fText);
|
||||
result = (int32_t)utext_getNativeIndex(&fText);
|
||||
fRuleStatusIndex = 0;
|
||||
}
|
||||
|
||||
|
@ -965,7 +957,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
|
|||
int32_t initialPosition = 0;
|
||||
|
||||
const RBBIStateTable *stateTable = fData->fSafeRevTable;
|
||||
UTEXT_SETNATIVEINDEX(fText, fromPosition);
|
||||
UTEXT_SETNATIVEINDEX(&fText, fromPosition);
|
||||
#ifdef RBBI_DEBUG
|
||||
if (gTrace) {
|
||||
RBBIDebugPuts("Handle Previous pos char state category");
|
||||
|
@ -973,14 +965,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
|
|||
#endif
|
||||
|
||||
// if we're already at the start of the text, return DONE.
|
||||
if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) {
|
||||
if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) {
|
||||
return BreakIterator::DONE;
|
||||
}
|
||||
|
||||
// Set up the starting char.
|
||||
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
|
||||
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
|
||||
result = initialPosition;
|
||||
c = UTEXT_PREVIOUS32(fText);
|
||||
c = UTEXT_PREVIOUS32(&fText);
|
||||
|
||||
// Set the initial state for the state machine
|
||||
state = START_STATE;
|
||||
|
@ -1028,7 +1020,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
|
|||
|
||||
#ifdef RBBI_DEBUG
|
||||
if (gTrace) {
|
||||
RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText));
|
||||
RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText));
|
||||
if (0x20<=c && c<0x7f) {
|
||||
RBBIDebugPrintf("\"%c\" ", c);
|
||||
} else {
|
||||
|
@ -1041,9 +1033,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
|
|||
// State Transition - move machine to its next state
|
||||
//
|
||||
|
||||
// Note: fNextState is defined as uint16_t[2], but we are casting
|
||||
// a generated RBBI table to RBBIStateTableRow and some tables
|
||||
// actually have more than 2 categories.
|
||||
// fNextState is a variable-length array.
|
||||
U_ASSERT(category<fData->fHeader->fCatCount);
|
||||
state = row->fNextState[category]; /*Not accessing beyond memory*/
|
||||
row = (RBBIStateTableRow *)
|
||||
|
@ -1051,7 +1041,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
|
|||
|
||||
if (row->fAccepting == -1) {
|
||||
// Match found, common case.
|
||||
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
|
||||
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
|
||||
}
|
||||
|
||||
int16_t completedRule = row->fAccepting;
|
||||
|
@ -1059,14 +1049,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
|
|||
// Lookahead match is completed.
|
||||
int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
|
||||
if (lookaheadResult >= 0) {
|
||||
UTEXT_SETNATIVEINDEX(fText, lookaheadResult);
|
||||
UTEXT_SETNATIVEINDEX(&fText, lookaheadResult);
|
||||
return lookaheadResult;
|
||||
}
|
||||
}
|
||||
int16_t rule = row->fLookAhead;
|
||||
if (rule != 0) {
|
||||
// At the position of a '/' in a look-ahead match. Record it.
|
||||
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
|
||||
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
|
||||
lookAheadMatches.setPosition(rule, pos);
|
||||
}
|
||||
|
||||
|
@ -1082,7 +1072,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
|
|||
// the input position. The next iteration will be processing the
|
||||
// first real input character.
|
||||
if (mode == RBBI_RUN) {
|
||||
c = UTEXT_PREVIOUS32(fText);
|
||||
c = UTEXT_PREVIOUS32(&fText);
|
||||
} else {
|
||||
if (mode == RBBI_START) {
|
||||
mode = RBBI_RUN;
|
||||
|
@ -1096,9 +1086,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
|
|||
// (This really indicates a defect in the break rules. They should always match
|
||||
// at least one character.)
|
||||
if (result == initialPosition) {
|
||||
UTEXT_SETNATIVEINDEX(fText, initialPosition);
|
||||
UTEXT_PREVIOUS32(fText);
|
||||
result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
|
||||
UTEXT_SETNATIVEINDEX(&fText, initialPosition);
|
||||
UTEXT_PREVIOUS32(&fText);
|
||||
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
|
||||
}
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
|
@ -1247,7 +1237,7 @@ static void U_CALLCONV initLanguageFactories() {
|
|||
|
||||
|
||||
static const LanguageBreakEngine*
|
||||
getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
|
||||
getLanguageBreakEngineFromFactory(UChar32 c)
|
||||
{
|
||||
umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
|
||||
if (gLanguageBreakFactories == NULL) {
|
||||
|
@ -1258,7 +1248,7 @@ getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
|
|||
const LanguageBreakEngine *lbe = NULL;
|
||||
while (--i >= 0) {
|
||||
LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
|
||||
lbe = factory->getEngineFor(c, breakType);
|
||||
lbe = factory->getEngineFor(c);
|
||||
if (lbe != NULL) {
|
||||
break;
|
||||
}
|
||||
|
@ -1290,14 +1280,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
|
|||
int32_t i = fLanguageBreakEngines->size();
|
||||
while (--i >= 0) {
|
||||
lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
|
||||
if (lbe->handles(c, fBreakType)) {
|
||||
if (lbe->handles(c)) {
|
||||
return lbe;
|
||||
}
|
||||
}
|
||||
|
||||
// No existing dictionary took the character. See if a factory wants to
|
||||
// give us a new LanguageBreakEngine for this character.
|
||||
lbe = getLanguageBreakEngineFromFactory(c, fBreakType);
|
||||
lbe = getLanguageBreakEngineFromFactory(c);
|
||||
|
||||
// If we got one, use it and push it on our stack.
|
||||
if (lbe != NULL) {
|
||||
|
@ -1313,6 +1303,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
|
|||
fUnhandledBreakEngine = new UnhandledEngine(status);
|
||||
if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
// Put it last so that scripts for which we have an engine get tried
|
||||
// first.
|
||||
|
@ -1327,25 +1318,19 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
|
|||
|
||||
// Tell the reject engine about the character; at its discretion, it may
|
||||
// add more than just the one character.
|
||||
fUnhandledBreakEngine->handleCharacter(c, fBreakType);
|
||||
fUnhandledBreakEngine->handleCharacter(c);
|
||||
|
||||
return fUnhandledBreakEngine;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*int32_t RuleBasedBreakIterator::getBreakType() const {
|
||||
return fBreakType;
|
||||
}*/
|
||||
|
||||
void RuleBasedBreakIterator::setBreakType(int32_t type) {
|
||||
fBreakType = type;
|
||||
}
|
||||
|
||||
void RuleBasedBreakIterator::dumpCache() {
|
||||
fBreakCache->dumpCache();
|
||||
}
|
||||
|
||||
void RuleBasedBreakIterator::dumpTables() {
|
||||
fData->printData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the description used to create this iterator
|
||||
*/
|
||||
|
|
47
deps/icu-small/source/common/rbbi_cache.cpp
vendored
47
deps/icu-small/source/common/rbbi_cache.cpp
vendored
|
@ -26,14 +26,11 @@ U_NAMESPACE_BEGIN
|
|||
*/
|
||||
|
||||
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
|
||||
fBI(bi), fBreaks(NULL), fPositionInCache(-1),
|
||||
fBI(bi), fBreaks(status), fPositionInCache(-1),
|
||||
fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
|
||||
fBreaks = new UVector32(status);
|
||||
}
|
||||
|
||||
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
|
||||
delete fBreaks;
|
||||
fBreaks = NULL;
|
||||
}
|
||||
|
||||
void RuleBasedBreakIterator::DictionaryCache::reset() {
|
||||
|
@ -42,7 +39,7 @@ void RuleBasedBreakIterator::DictionaryCache::reset() {
|
|||
fLimit = 0;
|
||||
fFirstRuleStatusIndex = 0;
|
||||
fOtherRuleStatusIndex = 0;
|
||||
fBreaks->removeAllElements();
|
||||
fBreaks.removeAllElements();
|
||||
}
|
||||
|
||||
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
|
||||
|
@ -54,13 +51,13 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_
|
|||
// Sequential iteration, move from previous boundary to the following
|
||||
|
||||
int32_t r = 0;
|
||||
if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
|
||||
if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
|
||||
++fPositionInCache;
|
||||
if (fPositionInCache >= fBreaks->size()) {
|
||||
if (fPositionInCache >= fBreaks.size()) {
|
||||
fPositionInCache = -1;
|
||||
return FALSE;
|
||||
}
|
||||
r = fBreaks->elementAti(fPositionInCache);
|
||||
r = fBreaks.elementAti(fPositionInCache);
|
||||
U_ASSERT(r > fromPos);
|
||||
*result = r;
|
||||
*statusIndex = fOtherRuleStatusIndex;
|
||||
|
@ -69,8 +66,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_
|
|||
|
||||
// Random indexing. Linear search for the boundary following the given position.
|
||||
|
||||
for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) {
|
||||
r= fBreaks->elementAti(fPositionInCache);
|
||||
for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
|
||||
r= fBreaks.elementAti(fPositionInCache);
|
||||
if (r > fromPos) {
|
||||
*result = r;
|
||||
*statusIndex = fOtherRuleStatusIndex;
|
||||
|
@ -90,16 +87,16 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_
|
|||
}
|
||||
|
||||
if (fromPos == fLimit) {
|
||||
fPositionInCache = fBreaks->size() - 1;
|
||||
fPositionInCache = fBreaks.size() - 1;
|
||||
if (fPositionInCache >= 0) {
|
||||
U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos);
|
||||
U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t r;
|
||||
if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
|
||||
if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
|
||||
--fPositionInCache;
|
||||
r = fBreaks->elementAti(fPositionInCache);
|
||||
r = fBreaks.elementAti(fPositionInCache);
|
||||
U_ASSERT(r < fromPos);
|
||||
*result = r;
|
||||
*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
|
||||
|
@ -111,8 +108,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) {
|
||||
r = fBreaks->elementAti(fPositionInCache);
|
||||
for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
|
||||
r = fBreaks.elementAti(fPositionInCache);
|
||||
if (r < fromPos) {
|
||||
*result = r;
|
||||
*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
|
||||
|
@ -141,7 +138,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
|
|||
int32_t current;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t foundBreakCount = 0;
|
||||
UText *text = fBI->fText;
|
||||
UText *text = &fBI->fText;
|
||||
|
||||
// Loop through the text, looking for ranges of dictionary characters.
|
||||
// For each span, find the appropriate break engine, and ask it to find
|
||||
|
@ -168,7 +165,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
|
|||
// Ask the language object if there are any breaks. It will add them to the cache and
|
||||
// leave the text pointer on the other side of its range, ready to search for the next one.
|
||||
if (lbe != NULL) {
|
||||
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
|
||||
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
|
||||
}
|
||||
|
||||
// Reload the loop variables for the next go-round
|
||||
|
@ -182,21 +179,21 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
|
|||
|
||||
// printf("foundBreakCount = %d\n", foundBreakCount);
|
||||
if (foundBreakCount > 0) {
|
||||
U_ASSERT(foundBreakCount == fBreaks->size());
|
||||
if (startPos < fBreaks->elementAti(0)) {
|
||||
U_ASSERT(foundBreakCount == fBreaks.size());
|
||||
if (startPos < fBreaks.elementAti(0)) {
|
||||
// The dictionary did not place a boundary at the start of the segment of text.
|
||||
// Add one now. This should not commonly happen, but it would be easy for interactions
|
||||
// of the rules for dictionary segments and the break engine implementations to
|
||||
// inadvertently cause it. Cover it here, just in case.
|
||||
fBreaks->insertElementAt(startPos, 0, status);
|
||||
fBreaks.insertElementAt(startPos, 0, status);
|
||||
}
|
||||
if (endPos > fBreaks->peeki()) {
|
||||
fBreaks->push(endPos, status);
|
||||
if (endPos > fBreaks.peeki()) {
|
||||
fBreaks.push(endPos, status);
|
||||
}
|
||||
fPositionInCache = 0;
|
||||
// Note: Dictionary matching may extend beyond the original limit.
|
||||
fStart = fBreaks->elementAti(0);
|
||||
fLimit = fBreaks->peeki();
|
||||
fStart = fBreaks.elementAti(0);
|
||||
fLimit = fBreaks.peeki();
|
||||
} else {
|
||||
// there were no language-based breaks, even though the segment contained
|
||||
// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
|
||||
|
|
2
deps/icu-small/source/common/rbbi_cache.h
vendored
2
deps/icu-small/source/common/rbbi_cache.h
vendored
|
@ -56,7 +56,7 @@ class RuleBasedBreakIterator::DictionaryCache: public UMemory {
|
|||
|
||||
RuleBasedBreakIterator *fBI;
|
||||
|
||||
UVector32 *fBreaks; // A vector containing the boundaries.
|
||||
UVector32 fBreaks; // A vector containing the boundaries.
|
||||
int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
|
||||
// or preceding(). Optimizes sequential access.
|
||||
int32_t fStart; // Text position of first boundary in cache.
|
||||
|
|
4
deps/icu-small/source/common/rbbidata.cpp
vendored
4
deps/icu-small/source/common/rbbidata.cpp
vendored
|
@ -267,8 +267,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBIDataWrapper::printData() {
|
||||
#ifdef RBBI_DEBUG
|
||||
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
|
||||
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
|
||||
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
|
||||
|
@ -285,8 +285,8 @@ void RBBIDataWrapper::printData() {
|
|||
RBBIDebugPrintf("%c", fRuleSource[c]);
|
||||
}
|
||||
RBBIDebugPrintf("\n\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
16
deps/icu-small/source/common/rbbidata.h
vendored
16
deps/icu-small/source/common/rbbidata.h
vendored
|
@ -116,9 +116,10 @@ struct RBBIStateTableRow {
|
|||
/* StatusTable of the set of matching */
|
||||
/* tags (rule status values) */
|
||||
int16_t fReserved;
|
||||
uint16_t fNextState[2]; /* Next State, indexed by char category. */
|
||||
/* This array does not have two elements */
|
||||
/* Array Size is actually fData->fHeader->fCatCount */
|
||||
uint16_t fNextState[1]; /* Next State, indexed by char category. */
|
||||
/* Variable-length array declared with length 1 */
|
||||
/* to disable bounds checkers. */
|
||||
/* Array Size is actually fData->fHeader->fCatCount*/
|
||||
/* CAUTION: see RBBITableBuilder::getTableSize() */
|
||||
/* before changing anything here. */
|
||||
};
|
||||
|
@ -129,7 +130,9 @@ struct RBBIStateTable {
|
|||
uint32_t fRowLen; /* Length of a state table row, in bytes. */
|
||||
uint32_t fFlags; /* Option Flags for this state table */
|
||||
uint32_t fReserved; /* reserved */
|
||||
char fTableData[4]; /* First RBBIStateTableRow begins here. */
|
||||
char fTableData[1]; /* First RBBIStateTableRow begins here. */
|
||||
/* Variable-length array declared with length 1 */
|
||||
/* to disable bounds checkers. */
|
||||
/* (making it char[] simplifies ugly address */
|
||||
/* arithmetic for indexing variable length rows.) */
|
||||
};
|
||||
|
@ -162,13 +165,8 @@ public:
|
|||
UBool operator ==(const RBBIDataWrapper &other) const;
|
||||
int32_t hashCode();
|
||||
const UnicodeString &getRuleSourceString() const;
|
||||
#ifdef RBBI_DEBUG
|
||||
void printData();
|
||||
void printTable(const char *heading, const RBBIStateTable *table);
|
||||
#else
|
||||
#define printData()
|
||||
#define printTable(heading, table)
|
||||
#endif
|
||||
|
||||
/* */
|
||||
/* Pointers to items within the data */
|
||||
|
|
45
deps/icu-small/source/common/rbbirb.cpp
vendored
45
deps/icu-small/source/common/rbbirb.cpp
vendored
|
@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN
|
|||
RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
|
||||
UParseError *parseErr,
|
||||
UErrorCode &status)
|
||||
: fRules(rules)
|
||||
: fRules(rules), fStrippedRules(rules)
|
||||
{
|
||||
fStatus = &status; // status is checked below
|
||||
fParseError = parseErr;
|
||||
|
@ -147,8 +147,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
// Remove comments and whitespace from the rules to make it smaller.
|
||||
UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
|
||||
// Remove whitespace from the rules to make it smaller.
|
||||
// The rule parser has already removed comments.
|
||||
fStrippedRules = fScanner->stripRules(fStrippedRules);
|
||||
|
||||
// Calculate the size of each section in the data.
|
||||
// Sizes here are padded up to a multiple of 8 for better memory alignment.
|
||||
|
@ -162,7 +163,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
|
||||
int32_t trieSize = align8(fSetBuilder->getTrieSize());
|
||||
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
|
||||
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
|
||||
int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
|
||||
|
||||
(void)safeFwdTableSize;
|
||||
|
||||
|
@ -225,7 +226,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
data->fStatusTable = data->fTrie + trieSize;
|
||||
data->fStatusTableLen= statusTableSize;
|
||||
data->fRuleSource = data->fStatusTable + statusTableSize;
|
||||
data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
|
||||
data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
|
||||
|
||||
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
|
||||
|
||||
|
@ -245,7 +246,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
|
||||
}
|
||||
|
||||
strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
|
||||
fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
@ -281,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
|
|||
//
|
||||
// UnicodeSet processing.
|
||||
// Munge the Unicode Sets to create a set of character categories.
|
||||
// Generate the mapping tables (TRIE) from input 32-bit characters to
|
||||
// Generate the mapping tables (TRIE) from input code points to
|
||||
// the character categories.
|
||||
//
|
||||
builder.fSetBuilder->build();
|
||||
builder.fSetBuilder->buildRanges();
|
||||
|
||||
|
||||
//
|
||||
|
@ -316,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
|
|||
}
|
||||
#endif
|
||||
|
||||
builder.optimizeTables();
|
||||
builder.fSetBuilder->buildTrie();
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Package up the compiled data into a memory image
|
||||
// in the run-time format.
|
||||
|
@ -347,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
|
|||
return This;
|
||||
}
|
||||
|
||||
void RBBIRuleBuilder::optimizeTables() {
|
||||
int32_t leftClass;
|
||||
int32_t rightClass;
|
||||
|
||||
leftClass = 3;
|
||||
rightClass = 0;
|
||||
while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
|
||||
fSetBuilder->mergeCategories(leftClass, rightClass);
|
||||
fForwardTables->removeColumn(rightClass);
|
||||
fReverseTables->removeColumn(rightClass);
|
||||
fSafeFwdTables->removeColumn(rightClass);
|
||||
fSafeRevTables->removeColumn(rightClass);
|
||||
}
|
||||
|
||||
fForwardTables->removeDuplicateStates();
|
||||
fReverseTables->removeDuplicateStates();
|
||||
fSafeFwdTables->removeDuplicateStates();
|
||||
fSafeRevTables->removeDuplicateStates();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
|
9
deps/icu-small/source/common/rbbirb.h
vendored
9
deps/icu-small/source/common/rbbirb.h
vendored
|
@ -126,10 +126,19 @@ public:
|
|||
);
|
||||
|
||||
virtual ~RBBIRuleBuilder();
|
||||
|
||||
/**
|
||||
* Fold together redundant character classes (table columns) and
|
||||
* redundant states (table rows). Done after initial table generation,
|
||||
* before serializing the result.
|
||||
*/
|
||||
void optimizeTables();
|
||||
|
||||
char *fDebugEnv; // controls debug trace output
|
||||
UErrorCode *fStatus; // Error reporting. Keeping status
|
||||
UParseError *fParseError; // here avoids passing it everywhere.
|
||||
const UnicodeString &fRules; // The rule string that we are compiling
|
||||
UnicodeString fStrippedRules; // The rule string, with comments stripped.
|
||||
|
||||
RBBIRuleScanner *fScanner; // The scanner.
|
||||
RBBINode *fForwardTree; // The parse trees, generated by the scanner,
|
||||
|
|
31
deps/icu-small/source/common/rbbiscan.cpp
vendored
31
deps/icu-small/source/common/rbbiscan.cpp
vendored
|
@ -822,27 +822,24 @@ static const UChar chRParen = 0x29;
|
|||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// stripRules Return a rules string without unnecessary
|
||||
// characters.
|
||||
// stripRules Return a rules string without extra spaces.
|
||||
// (Comments are removed separately, during rule parsing.)
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
|
||||
UnicodeString strippedRules;
|
||||
int rulesLength = rules.length();
|
||||
for (int idx = 0; idx < rulesLength; ) {
|
||||
UChar ch = rules[idx++];
|
||||
if (ch == chPound) {
|
||||
while (idx < rulesLength
|
||||
&& ch != chCR && ch != chLF && ch != chNEL)
|
||||
{
|
||||
ch = rules[idx++];
|
||||
}
|
||||
}
|
||||
if (!u_isISOControl(ch)) {
|
||||
strippedRules.append(ch);
|
||||
int32_t rulesLength = rules.length();
|
||||
bool skippingSpaces = false;
|
||||
|
||||
for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
|
||||
UChar32 cp = rules.char32At(idx);
|
||||
bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
|
||||
if (skippingSpaces && whiteSpace) {
|
||||
continue;
|
||||
}
|
||||
strippedRules.append(cp);
|
||||
skippingSpaces = whiteSpace;
|
||||
}
|
||||
// strippedRules = strippedRules.unescape();
|
||||
return strippedRules;
|
||||
}
|
||||
|
||||
|
@ -942,6 +939,7 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
|
|||
// It will be treated as white-space, and serves to break up anything
|
||||
// that might otherwise incorrectly clump together with a comment in
|
||||
// the middle (a variable name, for example.)
|
||||
int32_t commentStart = fScanIndex;
|
||||
for (;;) {
|
||||
c.fChar = nextCharLL();
|
||||
if (c.fChar == (UChar32)-1 || // EOF
|
||||
|
@ -950,6 +948,9 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
|
|||
c.fChar == chNEL ||
|
||||
c.fChar == chLS) {break;}
|
||||
}
|
||||
for (int32_t i=commentStart; i<fNextIndex-1; ++i) {
|
||||
fRB->fStrippedRules.setCharAt(i, u' ');
|
||||
}
|
||||
}
|
||||
if (c.fChar == (UChar32)-1) {
|
||||
return;
|
||||
|
|
55
deps/icu-small/source/common/rbbisetb.cpp
vendored
55
deps/icu-small/source/common/rbbisetb.cpp
vendored
|
@ -91,7 +91,7 @@ RBBISetBuilder::~RBBISetBuilder()
|
|||
// from the Unicode Sets.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void RBBISetBuilder::build() {
|
||||
void RBBISetBuilder::buildRanges() {
|
||||
RBBINode *usetNode;
|
||||
RangeDescriptor *rlRange;
|
||||
|
||||
|
@ -245,11 +245,16 @@ void RBBISetBuilder::build() {
|
|||
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Build the Trie table for mapping UChar32 values to the corresponding
|
||||
// range group number.
|
||||
//
|
||||
void RBBISetBuilder::buildTrie() {
|
||||
RangeDescriptor *rlRange;
|
||||
|
||||
//
|
||||
// Build the Trie table for mapping UChar32 values to the corresponding
|
||||
// range group number
|
||||
//
|
||||
fTrie = utrie2_open(0, // Initial value for all code points.
|
||||
0, // Error value for out-of-range input.
|
||||
fStatus);
|
||||
|
@ -265,6 +270,22 @@ void RBBISetBuilder::build() {
|
|||
}
|
||||
|
||||
|
||||
void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
|
||||
U_ASSERT(left >= 1);
|
||||
U_ASSERT(right > left);
|
||||
for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
|
||||
int32_t rangeNum = rd->fNum & ~DICT_BIT;
|
||||
int32_t rangeDict = rd->fNum & DICT_BIT;
|
||||
if (rangeNum == right) {
|
||||
rd->fNum = left | rangeDict;
|
||||
} else if (rangeNum > right) {
|
||||
rd->fNum--;
|
||||
}
|
||||
}
|
||||
--fGroupCount;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// getTrieSize() Return the size that will be required to serialize the Trie.
|
||||
|
@ -446,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() {
|
|||
lastPrintedGroupNum = groupNum;
|
||||
RBBIDebugPrintf("%2i ", groupNum);
|
||||
|
||||
if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
|
||||
if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
|
||||
|
||||
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
|
||||
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
|
||||
|
@ -639,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
|
|||
void RangeDescriptor::setDictionaryFlag() {
|
||||
int i;
|
||||
|
||||
for (i=0; i<this->fIncludesSets->size(); i++) {
|
||||
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
|
||||
UnicodeString setName;
|
||||
RBBINode *setRef = usetNode->fParent;
|
||||
if (setRef != NULL) {
|
||||
static const char16_t *dictionary = u"dictionary";
|
||||
for (i=0; i<fIncludesSets->size(); i++) {
|
||||
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
|
||||
RBBINode *setRef = usetNode->fParent;
|
||||
if (setRef != nullptr) {
|
||||
RBBINode *varRef = setRef->fParent;
|
||||
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
|
||||
setName = varRef->fText;
|
||||
if (varRef && varRef->fType == RBBINode::varRef) {
|
||||
const UnicodeString *setName = &varRef->fText;
|
||||
if (setName->compare(dictionary, -1) == 0) {
|
||||
fNum |= RBBISetBuilder::DICT_BIT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals.
|
||||
this->fNum |= 0x4000;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
10
deps/icu-small/source/common/rbbisetb.h
vendored
10
deps/icu-small/source/common/rbbisetb.h
vendored
|
@ -82,7 +82,8 @@ public:
|
|||
RBBISetBuilder(RBBIRuleBuilder *rb);
|
||||
~RBBISetBuilder();
|
||||
|
||||
void build();
|
||||
void buildRanges();
|
||||
void buildTrie();
|
||||
void addValToSets(UVector *sets, uint32_t val);
|
||||
void addValToSet (RBBINode *usetNode, uint32_t val);
|
||||
int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
|
||||
|
@ -93,6 +94,13 @@ public:
|
|||
UChar32 getFirstChar(int32_t val) const;
|
||||
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
|
||||
// character were encountered.
|
||||
/** merge two character categories that have been identified as having equivalent behavior.
|
||||
* The ranges belonging to the right category (table column) will be added to the left.
|
||||
*/
|
||||
void mergeCategories(int32_t left, int32_t right);
|
||||
|
||||
static constexpr int32_t DICT_BIT = 0x4000;
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
void printSets();
|
||||
void printRanges();
|
||||
|
|
144
deps/icu-small/source/common/rbbitblb.cpp
vendored
144
deps/icu-small/source/common/rbbitblb.cpp
vendored
|
@ -22,6 +22,7 @@
|
|||
#include "rbbidata.h"
|
||||
#include "cstring.h"
|
||||
#include "uassert.h"
|
||||
#include "uvectr32.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -761,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() {
|
|||
// if sd->fAccepting already had a value other than 0 or -1, leave it be.
|
||||
|
||||
// If the end marker node is from a look-ahead rule, set
|
||||
// the fLookAhead field or this state also.
|
||||
// the fLookAhead field for this state also.
|
||||
if (endMarker->fLookAheadEnd) {
|
||||
// TODO: don't change value if already set?
|
||||
// TODO: allow for more than one active look-ahead rule in engine.
|
||||
|
@ -1077,7 +1078,128 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
|
|||
}
|
||||
#endif
|
||||
|
||||
//
|
||||
// findDuplCharClassFrom()
|
||||
//
|
||||
bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) {
|
||||
int32_t numStates = fDStates->size();
|
||||
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
|
||||
|
||||
uint16_t table_base;
|
||||
uint16_t table_dupl;
|
||||
for (; baseCategory < numCols-1; ++baseCategory) {
|
||||
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
|
||||
for (int32_t state=0; state<numStates; state++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
|
||||
table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);
|
||||
table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory);
|
||||
if (table_base != table_dupl) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (table_base == table_dupl) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// removeColumn()
|
||||
//
|
||||
void RBBITableBuilder::removeColumn(int32_t column) {
|
||||
int32_t numStates = fDStates->size();
|
||||
for (int32_t state=0; state<numStates; state++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
|
||||
U_ASSERT(column < sd->fDtran->size());
|
||||
sd->fDtran->removeElementAt(column);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* findDuplicateState
|
||||
*/
|
||||
bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
|
||||
int32_t numStates = fDStates->size();
|
||||
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
|
||||
|
||||
for (; firstState<numStates-1; ++firstState) {
|
||||
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
|
||||
for (duplState=firstState+1; duplState<numStates; ++duplState) {
|
||||
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
|
||||
if (firstSD->fAccepting != duplSD->fAccepting ||
|
||||
firstSD->fLookAhead != duplSD->fLookAhead ||
|
||||
firstSD->fTagsIdx != duplSD->fTagsIdx) {
|
||||
continue;
|
||||
}
|
||||
bool rowsMatch = true;
|
||||
for (int32_t col=0; col < numCols; ++col) {
|
||||
int32_t firstVal = firstSD->fDtran->elementAti(col);
|
||||
int32_t duplVal = duplSD->fDtran->elementAti(col);
|
||||
if (!((firstVal == duplVal) ||
|
||||
((firstVal == firstState || firstVal == duplState) &&
|
||||
(duplVal == firstState || duplVal == duplState)))) {
|
||||
rowsMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rowsMatch) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
|
||||
U_ASSERT(keepState < duplState);
|
||||
U_ASSERT(duplState < fDStates->size());
|
||||
|
||||
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
|
||||
fDStates->removeElementAt(duplState);
|
||||
delete duplSD;
|
||||
|
||||
int32_t numStates = fDStates->size();
|
||||
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
|
||||
for (int32_t state=0; state<numStates; ++state) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
|
||||
for (int32_t col=0; col<numCols; col++) {
|
||||
int32_t existingVal = sd->fDtran->elementAti(col);
|
||||
int32_t newVal = existingVal;
|
||||
if (existingVal == duplState) {
|
||||
newVal = keepState;
|
||||
} else if (existingVal > duplState) {
|
||||
newVal = existingVal - 1;
|
||||
}
|
||||
sd->fDtran->setElementAt(newVal, col);
|
||||
}
|
||||
if (sd->fAccepting == duplState) {
|
||||
sd->fAccepting = keepState;
|
||||
} else if (sd->fAccepting > duplState) {
|
||||
sd->fAccepting--;
|
||||
}
|
||||
if (sd->fLookAhead == duplState) {
|
||||
sd->fLookAhead = keepState;
|
||||
} else if (sd->fLookAhead > duplState) {
|
||||
sd->fLookAhead--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RemoveDuplicateStates
|
||||
*/
|
||||
void RBBITableBuilder::removeDuplicateStates() {
|
||||
int32_t firstState = 3;
|
||||
int32_t duplicateState = 0;
|
||||
while (findDuplicateState(firstState, duplicateState)) {
|
||||
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
|
||||
removeState(firstState, duplicateState);
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
|
@ -1095,21 +1217,17 @@ int32_t RBBITableBuilder::getTableSize() const {
|
|||
return 0;
|
||||
}
|
||||
|
||||
size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the table.
|
||||
size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
|
||||
|
||||
numRows = fDStates->size();
|
||||
numCols = fRB->fSetBuilder->getNumCharCategories();
|
||||
|
||||
// Note The declaration of RBBIStateTableRow is for a table of two columns.
|
||||
// Therefore we subtract two from numCols when determining
|
||||
// how much storage to add to a row for the total columns.
|
||||
rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2);
|
||||
rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
|
||||
size += numRows * rowSize;
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// exportTable() export the state transition table in the format required
|
||||
|
@ -1126,14 +1244,14 @@ void RBBITableBuilder::exportTable(void *where) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff ||
|
||||
int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
|
||||
if (catCount > 0x7fff ||
|
||||
fDStates->size() > 0x7fff) {
|
||||
*fStatus = U_BRK_INTERNAL_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
table->fRowLen = sizeof(RBBIStateTableRow) +
|
||||
sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2);
|
||||
table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
|
||||
table->fNumStates = fDStates->size();
|
||||
table->fFlags = 0;
|
||||
if (fRB->fLookAheadHardBreak) {
|
||||
|
@ -1152,7 +1270,7 @@ void RBBITableBuilder::exportTable(void *where) {
|
|||
row->fAccepting = (int16_t)sd->fAccepting;
|
||||
row->fLookAhead = (int16_t)sd->fLookAhead;
|
||||
row->fTagIdx = (int16_t)sd->fTagsIdx;
|
||||
for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) {
|
||||
for (col=0; col<catCount; col++) {
|
||||
row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col);
|
||||
}
|
||||
}
|
||||
|
@ -1259,7 +1377,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
|
|||
fPositions = NULL;
|
||||
fDtran = NULL;
|
||||
|
||||
fDtran = new UVector(lastInputSymbol+1, *fStatus);
|
||||
fDtran = new UVector32(lastInputSymbol+1, *fStatus);
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return;
|
||||
}
|
||||
|
@ -1267,7 +1385,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
|
|||
*fStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized.
|
||||
fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized.
|
||||
// It is indexed by input symbols, and will
|
||||
// hold the next state number for each
|
||||
// symbol.
|
||||
|
|
45
deps/icu-small/source/common/rbbitblb.h
vendored
45
deps/icu-small/source/common/rbbitblb.h
vendored
|
@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
class RBBIRuleScanner;
|
||||
class RBBIRuleBuilder;
|
||||
class UVector32;
|
||||
|
||||
//
|
||||
// class RBBITableBuilder is part of the RBBI rule compiler.
|
||||
|
@ -42,9 +43,24 @@ public:
|
|||
void build();
|
||||
int32_t getTableSize() const; // Return the runtime size in bytes of
|
||||
// the built state table
|
||||
void exportTable(void *where); // fill in the runtime state table.
|
||||
// Sufficient memory must exist at
|
||||
// the specified location.
|
||||
|
||||
/** Fill in the runtime state table. Sufficient memory must exist at the specified location.
|
||||
*/
|
||||
void exportTable(void *where);
|
||||
|
||||
/** Find duplicate (redundant) character classes, beginning after the specifed
|
||||
* pair, within this state table. This is an iterator-like function, used to
|
||||
* identify char classes (state table columns) that can be eliminated.
|
||||
*/
|
||||
bool findDuplCharClassFrom(int &baseClass, int &duplClass);
|
||||
|
||||
/** Remove a column from the state table. Used when two character categories
|
||||
* have been found equivalent, and merged together, to eliminate the uneeded table column.
|
||||
*/
|
||||
void removeColumn(int32_t column);
|
||||
|
||||
/** Check for, and remove dupicate states (table rows). */
|
||||
void removeDuplicateStates();
|
||||
|
||||
|
||||
private:
|
||||
|
@ -60,8 +76,29 @@ private:
|
|||
void flagTaggedStates();
|
||||
void mergeRuleStatusVals();
|
||||
|
||||
/**
|
||||
* Merge redundant state table columns, eliminating character classes with identical behavior.
|
||||
* Done after the state tables are generated, just before converting to their run-time format.
|
||||
*/
|
||||
int32_t mergeColumns();
|
||||
|
||||
void addRuleRootNodes(UVector *dest, RBBINode *node);
|
||||
|
||||
/** Find the next duplicate state. An iterator function.
|
||||
* @param firstState (in/out) begin looking at this state, return the first of the
|
||||
* pair of duplicates.
|
||||
* @param duplicateState returns the duplicate state of fistState
|
||||
* @return true if a duplicate pair of states was found.
|
||||
*/
|
||||
bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
|
||||
|
||||
/** Remove a duplicate state/
|
||||
* @param keepState First of the duplicate pair. Keep it.
|
||||
* @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
|
||||
* to refer to keepState instead.
|
||||
*/
|
||||
void removeState(int32_t keepState, int32_t duplState);
|
||||
|
||||
// Set functions for UVector.
|
||||
// TODO: make a USet subclass of UVector
|
||||
|
||||
|
@ -112,7 +149,7 @@ public:
|
|||
// with this state. Unordered (it's a set).
|
||||
// UVector contents are RBBINode *
|
||||
|
||||
UVector *fDtran; // Transitions out of this state.
|
||||
UVector32 *fDtran; // Transitions out of this state.
|
||||
// indexed by input character
|
||||
// contents is int index of dest state
|
||||
// in RBBITableBuilder.fDStates
|
||||
|
|
69
deps/icu-small/source/common/sharedobject.cpp
vendored
69
deps/icu-small/source/common/sharedobject.cpp
vendored
|
@ -8,7 +8,10 @@
|
|||
* sharedobject.cpp
|
||||
*/
|
||||
#include "sharedobject.h"
|
||||
#include "mutex.h"
|
||||
#include "uassert.h"
|
||||
#include "umutex.h"
|
||||
#include "unifiedcache.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -17,69 +20,41 @@ SharedObject::~SharedObject() {}
|
|||
UnifiedCacheBase::~UnifiedCacheBase() {}
|
||||
|
||||
void
|
||||
SharedObject::addRef(UBool fromWithinCache) const {
|
||||
umtx_atomic_inc(&totalRefCount);
|
||||
|
||||
// Although items in use may not be correct immediately, it
|
||||
// will be correct eventually.
|
||||
if (umtx_atomic_inc(&hardRefCount) == 1 && cachePtr != NULL) {
|
||||
// If this object is cached, and the hardRefCount goes from 0 to 1,
|
||||
// then the increment must happen from within the cache while the
|
||||
// cache global mutex is locked. In this way, we can be rest assured
|
||||
// that data races can't happen if the cache performs some task if
|
||||
// the hardRefCount is zero while the global cache mutex is locked.
|
||||
(void)fromWithinCache; // Suppress unused variable warning in non-debug builds.
|
||||
U_ASSERT(fromWithinCache);
|
||||
cachePtr->incrementItemsInUse();
|
||||
}
|
||||
SharedObject::addRef() const {
|
||||
umtx_atomic_inc(&hardRefCount);
|
||||
}
|
||||
|
||||
// removeRef Decrement the reference count and delete if it is zero.
|
||||
// Note that SharedObjects with a non-null cachePtr are owned by the
|
||||
// unified cache, and the cache will be responsible for the actual deletion.
|
||||
// The deletion could be as soon as immediately following the
|
||||
// update to the reference count, if another thread is running
|
||||
// a cache eviction cycle concurrently.
|
||||
// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects.
|
||||
// THE OBJECT MAY ALREADY BE GONE.
|
||||
void
|
||||
SharedObject::removeRef(UBool fromWithinCache) const {
|
||||
UBool decrementItemsInUse = (umtx_atomic_dec(&hardRefCount) == 0);
|
||||
UBool allReferencesGone = (umtx_atomic_dec(&totalRefCount) == 0);
|
||||
|
||||
// Although items in use may not be correct immediately, it
|
||||
// will be correct eventually.
|
||||
if (decrementItemsInUse && cachePtr != NULL) {
|
||||
if (fromWithinCache) {
|
||||
cachePtr->decrementItemsInUse();
|
||||
SharedObject::removeRef() const {
|
||||
const UnifiedCacheBase *cache = this->cachePtr;
|
||||
int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount);
|
||||
U_ASSERT(updatedRefCount >= 0);
|
||||
if (updatedRefCount == 0) {
|
||||
if (cache) {
|
||||
cache->handleUnreferencedObject();
|
||||
} else {
|
||||
cachePtr->decrementItemsInUseWithLockingAndEviction();
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
if (allReferencesGone) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SharedObject::addSoftRef() const {
|
||||
umtx_atomic_inc(&totalRefCount);
|
||||
++softRefCount;
|
||||
}
|
||||
|
||||
void
|
||||
SharedObject::removeSoftRef() const {
|
||||
--softRefCount;
|
||||
if (umtx_atomic_dec(&totalRefCount) == 0) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
SharedObject::getRefCount() const {
|
||||
return umtx_loadAcquire(totalRefCount);
|
||||
}
|
||||
|
||||
int32_t
|
||||
SharedObject::getHardRefCount() const {
|
||||
return umtx_loadAcquire(hardRefCount);
|
||||
}
|
||||
|
||||
void
|
||||
SharedObject::deleteIfZeroRefCount() const {
|
||||
if(getRefCount() == 0) {
|
||||
if (this->cachePtr == nullptr && getRefCount() == 0) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
|
116
deps/icu-small/source/common/sharedobject.h
vendored
116
deps/icu-small/source/common/sharedobject.h
vendored
|
@ -17,6 +17,8 @@
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class SharedObject;
|
||||
|
||||
/**
|
||||
* Base class for unified cache exposing enough methods to SharedObject
|
||||
* instances to allow their addRef() and removeRef() methods to
|
||||
|
@ -28,22 +30,12 @@ public:
|
|||
UnifiedCacheBase() { }
|
||||
|
||||
/**
|
||||
* Called by addRefWhileHoldingCacheLock() when the hard reference count
|
||||
* of its instance goes from 0 to 1.
|
||||
* Notify the cache implementation that an object was seen transitioning to
|
||||
* zero hard references. The cache may use this to keep track the number of
|
||||
* unreferenced SharedObjects, and to trigger evictions.
|
||||
*/
|
||||
virtual void incrementItemsInUse() const = 0;
|
||||
virtual void handleUnreferencedObject() const = 0;
|
||||
|
||||
/**
|
||||
* Called by removeRef() when the hard reference count of its instance
|
||||
* drops from 1 to 0.
|
||||
*/
|
||||
virtual void decrementItemsInUseWithLockingAndEviction() const = 0;
|
||||
|
||||
/**
|
||||
* Called by removeRefWhileHoldingCacheLock() when the hard reference
|
||||
* count of its instance drops from 1 to 0.
|
||||
*/
|
||||
virtual void decrementItemsInUse() const = 0;
|
||||
virtual ~UnifiedCacheBase();
|
||||
private:
|
||||
UnifiedCacheBase(const UnifiedCacheBase &);
|
||||
|
@ -63,7 +55,6 @@ class U_COMMON_API SharedObject : public UObject {
|
|||
public:
|
||||
/** Initializes totalRefCount, softRefCount to 0. */
|
||||
SharedObject() :
|
||||
totalRefCount(0),
|
||||
softRefCount(0),
|
||||
hardRefCount(0),
|
||||
cachePtr(NULL) {}
|
||||
|
@ -71,7 +62,6 @@ public:
|
|||
/** Initializes totalRefCount, softRefCount to 0. */
|
||||
SharedObject(const SharedObject &other) :
|
||||
UObject(other),
|
||||
totalRefCount(0),
|
||||
softRefCount(0),
|
||||
hardRefCount(0),
|
||||
cachePtr(NULL) {}
|
||||
|
@ -79,93 +69,45 @@ public:
|
|||
virtual ~SharedObject();
|
||||
|
||||
/**
|
||||
* Increments the number of references to this object. Thread-safe.
|
||||
* Increments the number of hard references to this object. Thread-safe.
|
||||
* Not for use from within the Unified Cache implementation.
|
||||
*/
|
||||
void addRef() const { addRef(FALSE); }
|
||||
void addRef() const;
|
||||
|
||||
/**
|
||||
* Increments the number of references to this object.
|
||||
* Must be called only from within the internals of UnifiedCache and
|
||||
* only while the cache global mutex is held.
|
||||
* Decrements the number of hard references to this object, and
|
||||
* arrange for possible cache-eviction and/or deletion if ref
|
||||
* count goes to zero. Thread-safe.
|
||||
*
|
||||
* Not for use from within the UnifiedCache implementation.
|
||||
*/
|
||||
void addRefWhileHoldingCacheLock() const { addRef(TRUE); }
|
||||
void removeRef() const;
|
||||
|
||||
/**
|
||||
* Increments the number of soft references to this object.
|
||||
* Must be called only from within the internals of UnifiedCache and
|
||||
* only while the cache global mutex is held.
|
||||
*/
|
||||
void addSoftRef() const;
|
||||
|
||||
/**
|
||||
* Decrements the number of references to this object. Thread-safe.
|
||||
*/
|
||||
void removeRef() const { removeRef(FALSE); }
|
||||
|
||||
/**
|
||||
* Decrements the number of references to this object.
|
||||
* Must be called only from within the internals of UnifiedCache and
|
||||
* only while the cache global mutex is held.
|
||||
*/
|
||||
void removeRefWhileHoldingCacheLock() const { removeRef(TRUE); }
|
||||
|
||||
/**
|
||||
* Decrements the number of soft references to this object.
|
||||
* Must be called only from within the internals of UnifiedCache and
|
||||
* only while the cache global mutex is held.
|
||||
*/
|
||||
void removeSoftRef() const;
|
||||
|
||||
/**
|
||||
* Returns the reference counter including soft references.
|
||||
* Returns the number of hard references for this object.
|
||||
* Uses a memory barrier.
|
||||
*/
|
||||
int32_t getRefCount() const;
|
||||
|
||||
/**
|
||||
* Returns the count of soft references only.
|
||||
* Must be called only from within the internals of UnifiedCache and
|
||||
* only while the cache global mutex is held.
|
||||
*/
|
||||
int32_t getSoftRefCount() const { return softRefCount; }
|
||||
|
||||
/**
|
||||
* Returns the count of hard references only. Uses a memory barrier.
|
||||
* Used for testing the cache. Regular clients won't need this.
|
||||
*/
|
||||
int32_t getHardRefCount() const;
|
||||
|
||||
/**
|
||||
* If noHardReferences() == TRUE then this object has no hard references.
|
||||
* Must be called only from within the internals of UnifiedCache.
|
||||
*/
|
||||
inline UBool noHardReferences() const { return getHardRefCount() == 0; }
|
||||
inline UBool noHardReferences() const { return getRefCount() == 0; }
|
||||
|
||||
/**
|
||||
* If hasHardReferences() == TRUE then this object has hard references.
|
||||
* Must be called only from within the internals of UnifiedCache.
|
||||
*/
|
||||
inline UBool hasHardReferences() const { return getHardRefCount() != 0; }
|
||||
inline UBool hasHardReferences() const { return getRefCount() != 0; }
|
||||
|
||||
/**
|
||||
* If noSoftReferences() == TRUE then this object has no soft references.
|
||||
* Must be called only from within the internals of UnifiedCache and
|
||||
* only while the cache global mutex is held.
|
||||
*/
|
||||
UBool noSoftReferences() const { return (softRefCount == 0); }
|
||||
|
||||
/**
|
||||
* Deletes this object if it has no references or soft references.
|
||||
* Deletes this object if it has no references.
|
||||
* Available for non-cached SharedObjects only. Ownership of cached objects
|
||||
* is with the UnifiedCache, which is soley responsible for eviction and deletion.
|
||||
*/
|
||||
void deleteIfZeroRefCount() const;
|
||||
|
||||
/**
|
||||
* @internal For UnifedCache use only to register this object with itself.
|
||||
* Must be called before this object is exposed to multiple threads.
|
||||
*/
|
||||
void registerWithCache(const UnifiedCacheBase *ptr) const {
|
||||
cachePtr = ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a writable version of ptr.
|
||||
|
@ -219,15 +161,21 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
mutable u_atomic_int32_t totalRefCount;
|
||||
|
||||
// Any thread modifying softRefCount must hold the global cache mutex
|
||||
/**
|
||||
* The number of references from the UnifiedCache, which is
|
||||
* the number of times that the sharedObject is stored as a hash table value.
|
||||
* For use by UnifiedCache implementation code only.
|
||||
* All access is synchronized by UnifiedCache's gCacheMutex
|
||||
*/
|
||||
mutable int32_t softRefCount;
|
||||
friend class UnifiedCache;
|
||||
|
||||
/**
|
||||
* Reference count, excluding references from within the UnifiedCache implementation.
|
||||
*/
|
||||
mutable u_atomic_int32_t hardRefCount;
|
||||
|
||||
mutable const UnifiedCacheBase *cachePtr;
|
||||
void addRef(UBool withCacheLock) const;
|
||||
void removeRef(UBool withCacheLock) const;
|
||||
|
||||
};
|
||||
|
||||
|
|
1
deps/icu-small/source/common/sprpimpl.h
vendored
1
deps/icu-small/source/common/sprpimpl.h
vendored
|
@ -90,7 +90,6 @@ struct UStringPrepProfile{
|
|||
UTrie sprepTrie;
|
||||
const uint16_t* mappingData;
|
||||
UDataMemory* sprepData;
|
||||
const UBiDiProps *bdp; /* used only if checkBiDi is set */
|
||||
int32_t refCount;
|
||||
UBool isDataLoaded;
|
||||
UBool doNFKC;
|
||||
|
|
7
deps/icu-small/source/common/ubidi.cpp
vendored
7
deps/icu-small/source/common/ubidi.cpp
vendored
|
@ -152,9 +152,6 @@ ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode)
|
|||
/* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
|
||||
uprv_memset(pBiDi, 0, sizeof(UBiDi));
|
||||
|
||||
/* get BiDi properties */
|
||||
pBiDi->bdp=ubidi_getSingleton();
|
||||
|
||||
/* allocate memory for arrays as requested */
|
||||
if(maxLength>0) {
|
||||
if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
|
||||
|
@ -925,7 +922,7 @@ bracketProcessChar(BracketData *bd, int32_t position) {
|
|||
else
|
||||
match=0;
|
||||
if(match!=c && /* has a matching char */
|
||||
ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
|
||||
ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
|
||||
/* special case: process synonyms
|
||||
create an opening entry for each synonym */
|
||||
if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
|
||||
|
@ -3033,7 +3030,7 @@ ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
|
|||
if( pBiDi->fnClassCallback == NULL ||
|
||||
(dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
|
||||
{
|
||||
dir = ubidi_getClass(pBiDi->bdp, c);
|
||||
dir = ubidi_getClass(c);
|
||||
}
|
||||
if(dir >= U_CHAR_DIRECTION_COUNT) {
|
||||
dir = (UCharDirection)ON;
|
||||
|
|
103
deps/icu-small/source/common/ubidi_props.cpp
vendored
103
deps/icu-small/source/common/ubidi_props.cpp
vendored
|
@ -44,13 +44,6 @@ struct UBiDiProps {
|
|||
#define INCLUDED_FROM_UBIDI_PROPS_C
|
||||
#include "ubidi_props_data.h"
|
||||
|
||||
/* UBiDiProps singleton ----------------------------------------------------- */
|
||||
|
||||
U_CFUNC const UBiDiProps *
|
||||
ubidi_getSingleton() {
|
||||
return &ubidi_props_singleton;
|
||||
}
|
||||
|
||||
/* set of property starts for UnicodeSet ------------------------------------ */
|
||||
|
||||
static UBool U_CALLCONV
|
||||
|
@ -64,7 +57,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32
|
|||
}
|
||||
|
||||
U_CFUNC void
|
||||
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
int32_t i, length;
|
||||
UChar32 c, start, limit;
|
||||
|
||||
|
@ -76,19 +69,19 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
|
|||
}
|
||||
|
||||
/* add the start code point of each same-value range of the trie */
|
||||
utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa);
|
||||
utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
|
||||
|
||||
/* add the code points from the bidi mirroring table */
|
||||
length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
|
||||
length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
|
||||
for(i=0; i<length; ++i) {
|
||||
c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]);
|
||||
c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]);
|
||||
sa->addRange(sa->set, c, c+1);
|
||||
}
|
||||
|
||||
/* add the code points from the Joining_Group array where the value changes */
|
||||
start=bdp->indexes[UBIDI_IX_JG_START];
|
||||
limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
|
||||
jgArray=bdp->jgArray;
|
||||
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
|
||||
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
|
||||
jgArray=ubidi_props_singleton.jgArray;
|
||||
for(;;) {
|
||||
prev=0;
|
||||
while(start<limit) {
|
||||
|
@ -103,11 +96,11 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
|
|||
/* add the limit code point if the last value was not 0 (it is now start==limit) */
|
||||
sa->add(sa->set, limit);
|
||||
}
|
||||
if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) {
|
||||
if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) {
|
||||
/* switch to the second Joining_Group range */
|
||||
start=bdp->indexes[UBIDI_IX_JG_START2];
|
||||
limit=bdp->indexes[UBIDI_IX_JG_LIMIT2];
|
||||
jgArray=bdp->jgArray2;
|
||||
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
|
||||
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
|
||||
jgArray=ubidi_props_singleton.jgArray2;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -121,14 +114,8 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
|
|||
/* property access functions ------------------------------------------------ */
|
||||
|
||||
U_CFUNC int32_t
|
||||
ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
|
||||
int32_t max;
|
||||
|
||||
if(bdp==NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
max=bdp->indexes[UBIDI_MAX_VALUES_INDEX];
|
||||
ubidi_getMaxValue(UProperty which) {
|
||||
int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX];
|
||||
switch(which) {
|
||||
case UCHAR_BIDI_CLASS:
|
||||
return (max&UBIDI_CLASS_MASK);
|
||||
|
@ -144,19 +131,19 @@ ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
|
|||
}
|
||||
|
||||
U_CAPI UCharDirection
|
||||
ubidi_getClass(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
|
||||
ubidi_getClass(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
|
||||
return (UCharDirection)UBIDI_GET_CLASS(props);
|
||||
}
|
||||
|
||||
U_CFUNC UBool
|
||||
ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
|
||||
ubidi_isMirrored(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
|
||||
return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
|
||||
}
|
||||
|
||||
static UChar32
|
||||
getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
|
||||
getMirror(UChar32 c, uint16_t props) {
|
||||
int32_t delta=UBIDI_GET_MIRROR_DELTA(props);
|
||||
if(delta!=UBIDI_ESC_MIRROR_DELTA) {
|
||||
return c+delta;
|
||||
|
@ -167,8 +154,8 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
|
|||
int32_t i, length;
|
||||
UChar32 c2;
|
||||
|
||||
mirrors=bdp->mirrors;
|
||||
length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
|
||||
mirrors=ubidi_props_singleton.mirrors;
|
||||
length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
|
||||
|
||||
/* linear search */
|
||||
for(i=0; i<length; ++i) {
|
||||
|
@ -188,59 +175,59 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
|
|||
}
|
||||
|
||||
U_CFUNC UChar32
|
||||
ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
|
||||
return getMirror(bdp, c, props);
|
||||
ubidi_getMirror(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
|
||||
return getMirror(c, props);
|
||||
}
|
||||
|
||||
U_CFUNC UBool
|
||||
ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
|
||||
ubidi_isBidiControl(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
|
||||
return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
|
||||
}
|
||||
|
||||
U_CFUNC UBool
|
||||
ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
|
||||
ubidi_isJoinControl(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
|
||||
return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
|
||||
}
|
||||
|
||||
U_CFUNC UJoiningType
|
||||
ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
|
||||
ubidi_getJoiningType(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
|
||||
return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
|
||||
}
|
||||
|
||||
U_CFUNC UJoiningGroup
|
||||
ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) {
|
||||
ubidi_getJoiningGroup(UChar32 c) {
|
||||
UChar32 start, limit;
|
||||
|
||||
start=bdp->indexes[UBIDI_IX_JG_START];
|
||||
limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
|
||||
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
|
||||
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
|
||||
if(start<=c && c<limit) {
|
||||
return (UJoiningGroup)bdp->jgArray[c-start];
|
||||
return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start];
|
||||
}
|
||||
start=bdp->indexes[UBIDI_IX_JG_START2];
|
||||
limit=bdp->indexes[UBIDI_IX_JG_LIMIT2];
|
||||
start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
|
||||
limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
|
||||
if(start<=c && c<limit) {
|
||||
return (UJoiningGroup)bdp->jgArray2[c-start];
|
||||
return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start];
|
||||
}
|
||||
return U_JG_NO_JOINING_GROUP;
|
||||
}
|
||||
|
||||
U_CFUNC UBidiPairedBracketType
|
||||
ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
|
||||
ubidi_getPairedBracketType(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
|
||||
return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT);
|
||||
}
|
||||
|
||||
U_CFUNC UChar32
|
||||
ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&bdp->trie, c);
|
||||
ubidi_getPairedBracket(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
|
||||
if((props&UBIDI_BPT_MASK)==0) {
|
||||
return c;
|
||||
} else {
|
||||
return getMirror(bdp, c, props);
|
||||
return getMirror(c, props);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -248,20 +235,20 @@ ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) {
|
|||
|
||||
U_CFUNC UCharDirection
|
||||
u_charDirection(UChar32 c) {
|
||||
return ubidi_getClass(&ubidi_props_singleton, c);
|
||||
return ubidi_getClass(c);
|
||||
}
|
||||
|
||||
U_CFUNC UBool
|
||||
u_isMirrored(UChar32 c) {
|
||||
return ubidi_isMirrored(&ubidi_props_singleton, c);
|
||||
return ubidi_isMirrored(c);
|
||||
}
|
||||
|
||||
U_CFUNC UChar32
|
||||
u_charMirror(UChar32 c) {
|
||||
return ubidi_getMirror(&ubidi_props_singleton, c);
|
||||
return ubidi_getMirror(c);
|
||||
}
|
||||
|
||||
U_STABLE UChar32 U_EXPORT2
|
||||
u_getBidiPairedBracket(UChar32 c) {
|
||||
return ubidi_getPairedBracket(&ubidi_props_singleton, c);
|
||||
return ubidi_getPairedBracket(c);
|
||||
}
|
||||
|
|
28
deps/icu-small/source/common/ubidi_props.h
vendored
28
deps/icu-small/source/common/ubidi_props.h
vendored
|
@ -31,46 +31,40 @@ U_CDECL_BEGIN
|
|||
|
||||
/* library API -------------------------------------------------------------- */
|
||||
|
||||
struct UBiDiProps;
|
||||
typedef struct UBiDiProps UBiDiProps;
|
||||
|
||||
U_CFUNC const UBiDiProps *
|
||||
ubidi_getSingleton(void);
|
||||
|
||||
U_CFUNC void
|
||||
ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode);
|
||||
ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
|
||||
|
||||
/* property access functions */
|
||||
|
||||
U_CFUNC int32_t
|
||||
ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which);
|
||||
ubidi_getMaxValue(UProperty which);
|
||||
|
||||
U_CAPI UCharDirection
|
||||
ubidi_getClass(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_getClass(UChar32 c);
|
||||
|
||||
U_CFUNC UBool
|
||||
ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_isMirrored(UChar32 c);
|
||||
|
||||
U_CFUNC UChar32
|
||||
ubidi_getMirror(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_getMirror(UChar32 c);
|
||||
|
||||
U_CFUNC UBool
|
||||
ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_isBidiControl(UChar32 c);
|
||||
|
||||
U_CFUNC UBool
|
||||
ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_isJoinControl(UChar32 c);
|
||||
|
||||
U_CFUNC UJoiningType
|
||||
ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_getJoiningType(UChar32 c);
|
||||
|
||||
U_CFUNC UJoiningGroup
|
||||
ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_getJoiningGroup(UChar32 c);
|
||||
|
||||
U_CFUNC UBidiPairedBracketType
|
||||
ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_getPairedBracketType(UChar32 c);
|
||||
|
||||
U_CFUNC UChar32
|
||||
ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c);
|
||||
ubidi_getPairedBracket(UChar32 c);
|
||||
|
||||
/* file definitions --------------------------------------------------------- */
|
||||
|
||||
|
|
2
deps/icu-small/source/common/ubidiimp.h
vendored
2
deps/icu-small/source/common/ubidiimp.h
vendored
|
@ -254,8 +254,6 @@ struct UBiDi {
|
|||
*/
|
||||
const UBiDi * pParaBiDi;
|
||||
|
||||
const UBiDiProps *bdp;
|
||||
|
||||
/* alias pointer to the current text */
|
||||
const UChar *text;
|
||||
|
||||
|
|
165
deps/icu-small/source/common/ucase.cpp
vendored
165
deps/icu-small/source/common/ucase.cpp
vendored
|
@ -77,9 +77,12 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
|
||||
/* data access primitives --------------------------------------------------- */
|
||||
|
||||
#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
|
||||
U_CFUNC const UTrie2 * U_EXPORT2
|
||||
ucase_getTrie() {
|
||||
return &ucase_props_singleton.trie;
|
||||
}
|
||||
|
||||
#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
|
||||
#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
|
||||
|
||||
/* number of bits in an 8-bit integer value */
|
||||
static const uint8_t flagsOffset[256]={
|
||||
|
@ -128,8 +131,8 @@ static const uint8_t flagsOffset[256]={
|
|||
U_CAPI UChar32 U_EXPORT2
|
||||
ucase_tolower(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_IS_UPPER_OR_TITLE(props)) {
|
||||
c+=UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
|
@ -145,7 +148,7 @@ ucase_tolower(UChar32 c) {
|
|||
U_CAPI UChar32 U_EXPORT2
|
||||
ucase_toupper(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
|
||||
c+=UCASE_GET_DELTA(props);
|
||||
}
|
||||
|
@ -162,7 +165,7 @@ ucase_toupper(UChar32 c) {
|
|||
U_CAPI UChar32 U_EXPORT2
|
||||
ucase_totitle(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
|
||||
c+=UCASE_GET_DELTA(props);
|
||||
}
|
||||
|
@ -223,7 +226,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
|
|||
}
|
||||
|
||||
props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
|
||||
/* add the one simple case mapping, no matter what type it is */
|
||||
int32_t delta=UCASE_GET_DELTA(props);
|
||||
|
@ -419,6 +422,138 @@ FullCaseFoldingIterator::next(UnicodeString &full) {
|
|||
return c;
|
||||
}
|
||||
|
||||
namespace LatinCase {
|
||||
|
||||
const int8_t TO_LOWER_NORMAL[LIMIT] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
|
||||
};
|
||||
|
||||
const int8_t TO_LOWER_TR_LT[LIMIT] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
|
||||
EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
|
||||
};
|
||||
|
||||
const int8_t TO_UPPER_NORMAL[LIMIT] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
|
||||
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
|
||||
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
|
||||
-32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
|
||||
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
|
||||
|
||||
-1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
|
||||
};
|
||||
|
||||
const int8_t TO_UPPER_TR[LIMIT] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
|
||||
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
|
||||
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
|
||||
-32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
|
||||
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
|
||||
|
||||
-1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
|
||||
0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
|
||||
};
|
||||
|
||||
} // namespace LatinCase
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
|
||||
|
@ -439,7 +574,7 @@ ucase_getTypeOrIgnorable(UChar32 c) {
|
|||
static inline int32_t
|
||||
getDotType(UChar32 c) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
return props&UCASE_DOT_MASK;
|
||||
} else {
|
||||
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
|
||||
|
@ -878,8 +1013,8 @@ ucase_toFullLower(UChar32 c,
|
|||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_IS_UPPER_OR_TITLE(props)) {
|
||||
result=c+UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
|
@ -1024,7 +1159,7 @@ toUpperOrTitle(UChar32 c,
|
|||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
|
||||
result=c+UCASE_GET_DELTA(props);
|
||||
}
|
||||
|
@ -1169,8 +1304,8 @@ ucase_toFullTitle(UChar32 c,
|
|||
U_CAPI UChar32 U_EXPORT2
|
||||
ucase_fold(UChar32 c, uint32_t options) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_IS_UPPER_OR_TITLE(props)) {
|
||||
c+=UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
|
@ -1234,8 +1369,8 @@ ucase_toFullFolding(UChar32 c,
|
|||
U_ASSERT(c >= 0);
|
||||
UChar32 result=c;
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
if(!PROPS_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
|
||||
if(!UCASE_HAS_EXCEPTION(props)) {
|
||||
if(UCASE_IS_UPPER_OR_TITLE(props)) {
|
||||
result=c+UCASE_GET_DELTA(props);
|
||||
}
|
||||
} else {
|
||||
|
|
35
deps/icu-small/source/common/ucase.h
vendored
35
deps/icu-small/source/common/ucase.h
vendored
|
@ -26,6 +26,7 @@
|
|||
#include "putilimp.h"
|
||||
#include "uset_imp.h"
|
||||
#include "udataswp.h"
|
||||
#include "utrie2.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -148,6 +149,33 @@ private:
|
|||
int32_t rowCpIndex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Fast case mapping data for ASCII/Latin.
|
||||
* Linear arrays of delta bytes: 0=no mapping; EXC=exception.
|
||||
* Deltas must not cross the ASCII boundary, or else they cannot be easily used
|
||||
* in simple UTF-8 code.
|
||||
*/
|
||||
namespace LatinCase {
|
||||
|
||||
/** Case mapping/folding data for code points up to U+017F. */
|
||||
constexpr UChar LIMIT = 0x180;
|
||||
/** U+017F case-folds and uppercases crossing the ASCII boundary. */
|
||||
constexpr UChar LONG_S = 0x17f;
|
||||
/** Exception: Complex mapping, or too-large delta. */
|
||||
constexpr int8_t EXC = -0x80;
|
||||
|
||||
/** Deltas for lowercasing for most locales, and default case folding. */
|
||||
extern const int8_t TO_LOWER_NORMAL[LIMIT];
|
||||
/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
|
||||
extern const int8_t TO_LOWER_TR_LT[LIMIT];
|
||||
|
||||
/** Deltas for uppercasing for most locales. */
|
||||
extern const int8_t TO_UPPER_NORMAL[LIMIT];
|
||||
/** Deltas for uppercasing for tr/az. */
|
||||
extern const int8_t TO_UPPER_TR[LIMIT];
|
||||
|
||||
} // namespace LatinCase
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
|
@ -308,6 +336,9 @@ enum {
|
|||
|
||||
/* definitions for 16-bit case properties word ------------------------------ */
|
||||
|
||||
U_CFUNC const UTrie2 * U_EXPORT2
|
||||
ucase_getTrie();
|
||||
|
||||
/* 2-bit constants for types of cased characters */
|
||||
#define UCASE_TYPE_MASK 3
|
||||
enum {
|
||||
|
@ -320,10 +351,14 @@ enum {
|
|||
#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
|
||||
#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
|
||||
|
||||
#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)
|
||||
|
||||
#define UCASE_IGNORABLE 4
|
||||
#define UCASE_SENSITIVE 8
|
||||
#define UCASE_EXCEPTION 0x10
|
||||
|
||||
#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
|
||||
|
||||
#define UCASE_DOT_MASK 0x60
|
||||
enum {
|
||||
UCASE_NO_DOT=0, /* normal characters with cc=0 */
|
||||
|
|
272
deps/icu-small/source/common/ucasemap.cpp
vendored
272
deps/icu-small/source/common/ucasemap.cpp
vendored
|
@ -165,9 +165,7 @@ appendResult(int32_t cpLength, int32_t result, const UChar *s,
|
|||
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
|
||||
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
|
||||
|
||||
} // namespace
|
||||
|
||||
static UChar32 U_CALLCONV
|
||||
UChar32 U_CALLCONV
|
||||
utf8_caseContextIterator(void *context, int8_t dir) {
|
||||
UCaseContext *csc=(UCaseContext *)context;
|
||||
UChar32 c;
|
||||
|
@ -199,36 +197,227 @@ utf8_caseContextIterator(void *context, int8_t dir) {
|
|||
return U_SENTINEL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Case-maps [srcStart..srcLimit[ but takes
|
||||
* context [0..srcLength[ into account.
|
||||
/**
|
||||
* caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
|
||||
* caseLocale < 0: Case-folds [srcStart..srcLimit[.
|
||||
*/
|
||||
static void
|
||||
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
|
||||
const uint8_t *src, UCaseContext *csc,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
icu::ByteSink &sink, icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
/* case mapping loop */
|
||||
int32_t srcIndex=srcStart;
|
||||
while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
|
||||
void toLower(int32_t caseLocale, uint32_t options,
|
||||
const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
|
||||
icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
|
||||
const int8_t *latinToLower;
|
||||
if (caseLocale == UCASE_LOC_ROOT ||
|
||||
(caseLocale >= 0 ?
|
||||
!(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
|
||||
(options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
|
||||
latinToLower = LatinCase::TO_LOWER_NORMAL;
|
||||
} else {
|
||||
latinToLower = LatinCase::TO_LOWER_TR_LT;
|
||||
}
|
||||
const UTrie2 *trie = ucase_getTrie();
|
||||
int32_t prev = srcStart;
|
||||
int32_t srcIndex = srcStart;
|
||||
for (;;) {
|
||||
// fast path for simple cases
|
||||
int32_t cpStart;
|
||||
csc->cpStart=cpStart=srcIndex;
|
||||
UChar32 c;
|
||||
U8_NEXT(src, srcIndex, srcLimit, c);
|
||||
csc->cpLimit=srcIndex;
|
||||
if(c<0) {
|
||||
// Malformed UTF-8.
|
||||
ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
|
||||
for (;;) {
|
||||
if (U_FAILURE(errorCode) || srcIndex >= srcLimit) {
|
||||
c = U_SENTINEL;
|
||||
break;
|
||||
}
|
||||
uint8_t lead = src[srcIndex++];
|
||||
if (lead <= 0x7f) {
|
||||
int8_t d = latinToLower[lead];
|
||||
if (d == LatinCase::EXC) {
|
||||
cpStart = srcIndex - 1;
|
||||
c = lead;
|
||||
break;
|
||||
}
|
||||
if (d == 0) { continue; }
|
||||
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
|
||||
sink, options, edits, errorCode);
|
||||
char ascii = (char)(lead + d);
|
||||
sink.Append(&ascii, 1);
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(1, 1);
|
||||
}
|
||||
prev = srcIndex;
|
||||
continue;
|
||||
} else if (lead < 0xe3) {
|
||||
uint8_t t;
|
||||
if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit &&
|
||||
(t = src[srcIndex] - 0x80) <= 0x3f) {
|
||||
// U+0080..U+017F
|
||||
++srcIndex;
|
||||
c = ((lead - 0xc0) << 6) | t;
|
||||
int8_t d = latinToLower[c];
|
||||
if (d == LatinCase::EXC) {
|
||||
cpStart = srcIndex - 2;
|
||||
break;
|
||||
}
|
||||
if (d == 0) { continue; }
|
||||
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
|
||||
sink, options, edits, errorCode);
|
||||
ByteSinkUtil::appendTwoBytes(c + d, sink);
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(2, 2);
|
||||
}
|
||||
prev = srcIndex;
|
||||
continue;
|
||||
}
|
||||
} else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
|
||||
(srcIndex + 2) <= srcLimit &&
|
||||
U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
|
||||
// most of CJK: no case mappings
|
||||
srcIndex += 2;
|
||||
continue;
|
||||
}
|
||||
cpStart = --srcIndex;
|
||||
U8_NEXT(src, srcIndex, srcLimit, c);
|
||||
if (c < 0) {
|
||||
// ill-formed UTF-8
|
||||
continue;
|
||||
}
|
||||
uint16_t props = UTRIE2_GET16(trie, c);
|
||||
if (UCASE_HAS_EXCEPTION(props)) { break; }
|
||||
int32_t delta;
|
||||
if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
|
||||
continue;
|
||||
}
|
||||
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
|
||||
sink, options, edits, errorCode);
|
||||
ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
|
||||
prev = srcIndex;
|
||||
}
|
||||
if (c < 0) {
|
||||
break;
|
||||
}
|
||||
// slow path
|
||||
const UChar *s;
|
||||
if (caseLocale >= 0) {
|
||||
csc->cpStart = cpStart;
|
||||
csc->cpLimit = srcIndex;
|
||||
c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale);
|
||||
} else {
|
||||
const UChar *s;
|
||||
c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
|
||||
c = ucase_toFullFolding(c, &s, options);
|
||||
}
|
||||
if (c >= 0) {
|
||||
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
|
||||
sink, options, edits, errorCode);
|
||||
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
|
||||
prev = srcIndex;
|
||||
}
|
||||
}
|
||||
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
|
||||
sink, options, edits, errorCode);
|
||||
}
|
||||
|
||||
void toUpper(int32_t caseLocale, uint32_t options,
|
||||
const uint8_t *src, UCaseContext *csc, int32_t srcLength,
|
||||
icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
|
||||
const int8_t *latinToUpper;
|
||||
if (caseLocale == UCASE_LOC_TURKISH) {
|
||||
latinToUpper = LatinCase::TO_UPPER_TR;
|
||||
} else {
|
||||
latinToUpper = LatinCase::TO_UPPER_NORMAL;
|
||||
}
|
||||
const UTrie2 *trie = ucase_getTrie();
|
||||
int32_t prev = 0;
|
||||
int32_t srcIndex = 0;
|
||||
for (;;) {
|
||||
// fast path for simple cases
|
||||
int32_t cpStart;
|
||||
UChar32 c;
|
||||
for (;;) {
|
||||
if (U_FAILURE(errorCode) || srcIndex >= srcLength) {
|
||||
c = U_SENTINEL;
|
||||
break;
|
||||
}
|
||||
uint8_t lead = src[srcIndex++];
|
||||
if (lead <= 0x7f) {
|
||||
int8_t d = latinToUpper[lead];
|
||||
if (d == LatinCase::EXC) {
|
||||
cpStart = srcIndex - 1;
|
||||
c = lead;
|
||||
break;
|
||||
}
|
||||
if (d == 0) { continue; }
|
||||
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
|
||||
sink, options, edits, errorCode);
|
||||
char ascii = (char)(lead + d);
|
||||
sink.Append(&ascii, 1);
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(1, 1);
|
||||
}
|
||||
prev = srcIndex;
|
||||
continue;
|
||||
} else if (lead < 0xe3) {
|
||||
uint8_t t;
|
||||
if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength &&
|
||||
(t = src[srcIndex] - 0x80) <= 0x3f) {
|
||||
// U+0080..U+017F
|
||||
++srcIndex;
|
||||
c = ((lead - 0xc0) << 6) | t;
|
||||
int8_t d = latinToUpper[c];
|
||||
if (d == LatinCase::EXC) {
|
||||
cpStart = srcIndex - 2;
|
||||
break;
|
||||
}
|
||||
if (d == 0) { continue; }
|
||||
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
|
||||
sink, options, edits, errorCode);
|
||||
ByteSinkUtil::appendTwoBytes(c + d, sink);
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(2, 2);
|
||||
}
|
||||
prev = srcIndex;
|
||||
continue;
|
||||
}
|
||||
} else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
|
||||
(srcIndex + 2) <= srcLength &&
|
||||
U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
|
||||
// most of CJK: no case mappings
|
||||
srcIndex += 2;
|
||||
continue;
|
||||
}
|
||||
cpStart = --srcIndex;
|
||||
U8_NEXT(src, srcIndex, srcLength, c);
|
||||
if (c < 0) {
|
||||
// ill-formed UTF-8
|
||||
continue;
|
||||
}
|
||||
uint16_t props = UTRIE2_GET16(trie, c);
|
||||
if (UCASE_HAS_EXCEPTION(props)) { break; }
|
||||
int32_t delta;
|
||||
if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
|
||||
continue;
|
||||
}
|
||||
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
|
||||
sink, options, edits, errorCode);
|
||||
ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
|
||||
prev = srcIndex;
|
||||
}
|
||||
if (c < 0) {
|
||||
break;
|
||||
}
|
||||
// slow path
|
||||
csc->cpStart = cpStart;
|
||||
csc->cpLimit = srcIndex;
|
||||
const UChar *s;
|
||||
c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale);
|
||||
if (c >= 0) {
|
||||
ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
|
||||
sink, options, edits, errorCode);
|
||||
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
|
||||
prev = srcIndex;
|
||||
}
|
||||
}
|
||||
ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
|
||||
sink, options, edits, errorCode);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_CFUNC void U_CALLCONV
|
||||
|
@ -335,10 +524,9 @@ ucasemap_internalUTF8ToTitle(
|
|||
if(titleLimit<index) {
|
||||
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
_caseMap(caseLocale, options, ucase_toFullLower,
|
||||
src, &csc,
|
||||
titleLimit, index,
|
||||
sink, edits, errorCode);
|
||||
toLower(caseLocale, options,
|
||||
src, &csc, titleLimit, index,
|
||||
sink, edits, errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
@ -538,8 +726,8 @@ ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREA
|
|||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
_caseMap(
|
||||
caseLocale, options, ucase_toFullLower,
|
||||
toLower(
|
||||
caseLocale, options,
|
||||
src, &csc, 0, srcLength,
|
||||
sink, edits, errorCode);
|
||||
}
|
||||
|
@ -555,9 +743,9 @@ ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREA
|
|||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
_caseMap(
|
||||
caseLocale, options, ucase_toFullUpper,
|
||||
src, &csc, 0, srcLength,
|
||||
toUpper(
|
||||
caseLocale, options,
|
||||
src, &csc, srcLength,
|
||||
sink, edits, errorCode);
|
||||
}
|
||||
}
|
||||
|
@ -567,22 +755,10 @@ ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_B
|
|||
const uint8_t *src, int32_t srcLength,
|
||||
icu::ByteSink &sink, icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
/* case mapping loop */
|
||||
int32_t srcIndex = 0;
|
||||
while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
|
||||
int32_t cpStart = srcIndex;
|
||||
UChar32 c;
|
||||
U8_NEXT(src, srcIndex, srcLength, c);
|
||||
if(c<0) {
|
||||
// Malformed UTF-8.
|
||||
ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
|
||||
sink, options, edits, errorCode);
|
||||
} else {
|
||||
const UChar *s;
|
||||
c = ucase_toFullFolding(c, &s, options);
|
||||
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
|
||||
}
|
||||
}
|
||||
toLower(
|
||||
-1, options,
|
||||
src, nullptr, 0, srcLength,
|
||||
sink, edits, errorCode);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
9
deps/icu-small/source/common/ucasemap_imp.h
vendored
9
deps/icu-small/source/common/ucasemap_imp.h
vendored
|
@ -60,15 +60,6 @@ u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
|
|||
int32_t *matchLen1, int32_t *matchLen2,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Are the Unicode properties loaded?
|
||||
* This must be used before internal functions are called that do
|
||||
* not perform this check.
|
||||
* Generate a debug assertion failure if data is not loaded.
|
||||
*/
|
||||
U_CFUNC UBool
|
||||
uprv_haveProperties(UErrorCode *pErrorCode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
|
8
deps/icu-small/source/common/uchar.cpp
vendored
8
deps/icu-small/source/common/uchar.cpp
vendored
|
@ -42,14 +42,6 @@
|
|||
/* getting a uint32_t properties word from the data */
|
||||
#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
|
||||
|
||||
U_CFUNC UBool
|
||||
uprv_haveProperties(UErrorCode *pErrorCode) {
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* API functions ------------------------------------------------------------ */
|
||||
|
||||
/* Gets the Unicode character's general category.*/
|
||||
|
|
6
deps/icu-small/source/common/ucmndata.cpp
vendored
6
deps/icu-small/source/common/ucmndata.cpp
vendored
|
@ -77,7 +77,11 @@ typedef struct {
|
|||
typedef struct {
|
||||
uint32_t count;
|
||||
uint32_t reserved;
|
||||
PointerTOCEntry entry[2]; /* Actual size is from count. */
|
||||
/**
|
||||
* Variable-length array declared with length 1 to disable bounds checkers.
|
||||
* The actual array length is in the count field.
|
||||
*/
|
||||
PointerTOCEntry entry[1];
|
||||
} PointerTOC;
|
||||
|
||||
|
||||
|
|
6
deps/icu-small/source/common/ucmndata.h
vendored
6
deps/icu-small/source/common/ucmndata.h
vendored
|
@ -52,7 +52,11 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
uint32_t count;
|
||||
UDataOffsetTOCEntry entry[2]; /* Actual size of array is from count. */
|
||||
/**
|
||||
* Variable-length array declared with length 1 to disable bounds checkers.
|
||||
* The actual array length is in the count field.
|
||||
*/
|
||||
UDataOffsetTOCEntry entry[1];
|
||||
} UDataOffsetTOC;
|
||||
|
||||
/**
|
||||
|
|
4
deps/icu-small/source/common/ucnv2022.cpp
vendored
4
deps/icu-small/source/common/ucnv2022.cpp
vendored
|
@ -3512,14 +3512,14 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
|
|||
case 'k':
|
||||
if(myConverterData->version == 0) {
|
||||
if(length == 1) {
|
||||
if((UBool)args->converter->fromUnicodeStatus) {
|
||||
if(args->converter->fromUnicodeStatus) {
|
||||
/* in DBCS mode: switch to SBCS */
|
||||
args->converter->fromUnicodeStatus = 0;
|
||||
*p++ = UCNV_SI;
|
||||
}
|
||||
*p++ = subchar[0];
|
||||
} else /* length == 2*/ {
|
||||
if(!(UBool)args->converter->fromUnicodeStatus) {
|
||||
if(!args->converter->fromUnicodeStatus) {
|
||||
/* in SBCS mode: switch to DBCS */
|
||||
args->converter->fromUnicodeStatus = 1;
|
||||
*p++ = UCNV_SO;
|
||||
|
|
32
deps/icu-small/source/common/ucnv_err.cpp
vendored
32
deps/icu-small/source/common/ucnv_err.cpp
vendored
|
@ -60,11 +60,12 @@
|
|||
* To avoid dependency on other code, this list is hard coded here.
|
||||
* When an ignorable code point is found and is unmappable, the default callbacks
|
||||
* will ignore them.
|
||||
* For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
|
||||
* For a list of the default ignorable code points, use this link:
|
||||
* https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
|
||||
*
|
||||
* This list should be sync with the one in CharsetCallback.java
|
||||
*/
|
||||
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
|
||||
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
|
||||
(c == 0x00AD) || \
|
||||
(c == 0x034F) || \
|
||||
(c == 0x061C) || \
|
||||
|
@ -74,26 +75,15 @@
|
|||
(0x180B <= c && c <= 0x180E) || \
|
||||
(0x200B <= c && c <= 0x200F) || \
|
||||
(0x202A <= c && c <= 0x202E) || \
|
||||
(c == 0x2060) || \
|
||||
(0x2066 <= c && c <= 0x2069) || \
|
||||
(0x2061 <= c && c <= 0x2064) || \
|
||||
(0x206A <= c && c <= 0x206F) || \
|
||||
(0x2060 <= c && c <= 0x206F) || \
|
||||
(c == 0x3164) || \
|
||||
(0x0FE00 <= c && c <= 0x0FE0F) || \
|
||||
(c == 0x0FEFF) || \
|
||||
(c == 0x0FFA0) || \
|
||||
(0x01BCA0 <= c && c <= 0x01BCA3) || \
|
||||
(0x01D173 <= c && c <= 0x01D17A) || \
|
||||
(c == 0x0E0001) || \
|
||||
(0x0E0020 <= c && c <= 0x0E007F) || \
|
||||
(0x0E0100 <= c && c <= 0x0E01EF) || \
|
||||
(c == 0x2065) || \
|
||||
(0x0FFF0 <= c && c <= 0x0FFF8) || \
|
||||
(c == 0x0E0000) || \
|
||||
(0x0E0002 <= c && c <= 0x0E001F) || \
|
||||
(0x0E0080 <= c && c <= 0x0E00FF) || \
|
||||
(0x0E01F0 <= c && c <= 0x0E0FFF) \
|
||||
)
|
||||
(0xFE00 <= c && c <= 0xFE0F) || \
|
||||
(c == 0xFEFF) || \
|
||||
(c == 0xFFA0) || \
|
||||
(0xFFF0 <= c && c <= 0xFFF8) || \
|
||||
(0x1BCA0 <= c && c <= 0x1BCA3) || \
|
||||
(0x1D173 <= c && c <= 0x1D17A) || \
|
||||
(0xE0000 <= c && c <= 0xE0FFF))
|
||||
|
||||
|
||||
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
|
||||
|
|
8
deps/icu-small/source/common/ucnv_u32.cpp
vendored
8
deps/icu-small/source/common/ucnv_u32.cpp
vendored
|
@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
|
|||
uint32_t ch, i;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
|
||||
if (args->converter->toULength > 0 && myTarget < targetLimit) {
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
||||
|
@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
|
|||
int32_t offsetNum = 0;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
|
||||
if (args->converter->toULength > 0 && myTarget < targetLimit) {
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
||||
|
@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
|
|||
uint32_t ch, i;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
|
||||
if (args->converter->toULength > 0 && myTarget < targetLimit)
|
||||
{
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
|
|||
int32_t offsetNum = 0;
|
||||
|
||||
/* Restore state of current sequence */
|
||||
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
|
||||
if (args->converter->toULength > 0 && myTarget < targetLimit)
|
||||
{
|
||||
i = args->converter->toULength; /* restore # of bytes consumed */
|
||||
args->converter->toULength = 0;
|
||||
|
|
54
deps/icu-small/source/common/ucnv_u8.cpp
vendored
54
deps/icu-small/source/common/ucnv_u8.cpp
vendored
|
@ -76,7 +76,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
|||
int32_t i, inBytes;
|
||||
|
||||
/* Restore size of current sequence */
|
||||
if (cnv->toUnicodeStatus && myTarget < targetLimit)
|
||||
if (cnv->toULength > 0 && myTarget < targetLimit)
|
||||
{
|
||||
inBytes = cnv->mode; /* restore # of bytes to consume */
|
||||
i = cnv->toULength; /* restore # of bytes consumed */
|
||||
|
@ -194,7 +194,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
|
|||
int32_t i, inBytes;
|
||||
|
||||
/* Restore size of current sequence */
|
||||
if (cnv->toUnicodeStatus && myTarget < targetLimit)
|
||||
if (cnv->toULength > 0 && myTarget < targetLimit)
|
||||
{
|
||||
inBytes = cnv->mode; /* restore # of bytes to consume */
|
||||
i = cnv->toULength; /* restore # of bytes consumed */
|
||||
|
@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
|||
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
|
||||
|
||||
/* get the converter state from the UTF-8 UConverter */
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
if(c!=0) {
|
||||
if(utf8->toULength > 0) {
|
||||
toULength=oldToULength=utf8->toULength;
|
||||
toULimit=(int8_t)utf8->mode;
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
} else {
|
||||
toULength=oldToULength=toULimit=0;
|
||||
c = 0;
|
||||
}
|
||||
|
||||
count=(int32_t)(sourceLimit-source)+oldToULength;
|
||||
|
@ -695,36 +696,20 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
|||
// Use a single counter for source and target, counting the minimum of
|
||||
// the source length and the target capacity.
|
||||
// Let the standard converter handle edge cases.
|
||||
const uint8_t *limit=sourceLimit;
|
||||
if(count>targetCapacity) {
|
||||
limit-=(count-targetCapacity);
|
||||
count=targetCapacity;
|
||||
}
|
||||
|
||||
// The conversion loop checks count>0 only once per 1/2/3-byte character.
|
||||
// If the buffer ends with a truncated 2- or 3-byte sequence,
|
||||
// The conversion loop checks count>0 only once per character.
|
||||
// If the buffer ends with a truncated sequence,
|
||||
// then we reduce the count to stop before that,
|
||||
// and collect the remaining bytes after the conversion loop.
|
||||
{
|
||||
// Do not go back into the bytes that will be read for finishing a partial
|
||||
// sequence from the previous buffer.
|
||||
int32_t length=count-toULimit;
|
||||
if(length>0) {
|
||||
uint8_t b1=*(limit-1);
|
||||
if(U8_IS_SINGLE(b1)) {
|
||||
// common ASCII character
|
||||
} else if(U8_IS_TRAIL(b1) && length>=2) {
|
||||
uint8_t b2=*(limit-2);
|
||||
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
|
||||
// truncated 3-byte sequence
|
||||
count-=2;
|
||||
}
|
||||
} else if(0xc2<=b1 && b1<0xf0) {
|
||||
// truncated 2- or 3-byte sequence
|
||||
--count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do not go back into the bytes that will be read for finishing a partial
|
||||
// sequence from the previous buffer.
|
||||
int32_t length=count-toULimit;
|
||||
U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
|
||||
count=toULimit+length;
|
||||
}
|
||||
|
||||
if(c!=0) {
|
||||
|
@ -814,7 +799,7 @@ moreBytes:
|
|||
}
|
||||
|
||||
/* copy the legal byte sequence to the target */
|
||||
if(count>=toULength) {
|
||||
{
|
||||
int8_t i;
|
||||
|
||||
for(i=0; i<oldToULength; ++i) {
|
||||
|
@ -825,14 +810,6 @@ moreBytes:
|
|||
*target++=*source++;
|
||||
}
|
||||
count-=toULength;
|
||||
} else {
|
||||
// A supplementary character that does not fit into the target.
|
||||
// Let the standard converter handle this.
|
||||
source-=(toULength-oldToULength);
|
||||
pToUArgs->source=(char *)source;
|
||||
pFromUArgs->target=(char *)target;
|
||||
*pErrorCode=U_USING_DEFAULT_WARNING;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -856,8 +833,7 @@ moreBytes:
|
|||
utf8->toULength=toULength;
|
||||
utf8->mode=toULimit;
|
||||
break;
|
||||
} else if(!U8_IS_TRAIL(b=*source)) {
|
||||
/* lead byte in trail byte position */
|
||||
} else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) {
|
||||
utf8->toULength=toULength;
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
break;
|
||||
|
|
8
deps/icu-small/source/common/ucnvlat1.cpp
vendored
8
deps/icu-small/source/common/ucnvlat1.cpp
vendored
|
@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
|||
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
|
||||
|
||||
/* get the converter state from the UTF-8 UConverter */
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
if (utf8->toULength > 0) {
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
} else {
|
||||
c = 0;
|
||||
}
|
||||
if(c!=0 && source<sourceLimit) {
|
||||
if(targetCapacity==0) {
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
|
@ -620,7 +624,7 @@ ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
|||
|
||||
uint8_t c;
|
||||
|
||||
if(pToUArgs->converter->toUnicodeStatus!=0) {
|
||||
if(pToUArgs->converter->toULength > 0) {
|
||||
/* no handling of partial UTF-8 characters here, fall back to pivoting */
|
||||
*pErrorCode=U_USING_DEFAULT_WARNING;
|
||||
return;
|
||||
|
|
10
deps/icu-small/source/common/ucnvmbcs.cpp
vendored
10
deps/icu-small/source/common/ucnvmbcs.cpp
vendored
|
@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
|||
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
|
||||
|
||||
/* get the converter state from the UTF-8 UConverter */
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
if(c!=0) {
|
||||
if(utf8->toULength > 0) {
|
||||
toULength=oldToULength=utf8->toULength;
|
||||
toULimit=(int8_t)utf8->mode;
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
} else {
|
||||
toULength=oldToULength=toULimit=0;
|
||||
c = 0;
|
||||
}
|
||||
|
||||
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
|
||||
|
@ -5359,12 +5360,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
|
|||
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
|
||||
|
||||
/* get the converter state from the UTF-8 UConverter */
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
if(c!=0) {
|
||||
if(utf8->toULength > 0) {
|
||||
toULength=oldToULength=utf8->toULength;
|
||||
toULimit=(int8_t)utf8->mode;
|
||||
c=(UChar32)utf8->toUnicodeStatus;
|
||||
} else {
|
||||
toULength=oldToULength=toULimit=0;
|
||||
c = 0;
|
||||
}
|
||||
|
||||
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
|
||||
|
|
27
deps/icu-small/source/common/ucurr.cpp
vendored
27
deps/icu-small/source/common/ucurr.cpp
vendored
|
@ -17,6 +17,7 @@
|
|||
#include "unicode/ustring.h"
|
||||
#include "unicode/parsepos.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uassert.h"
|
||||
|
@ -28,9 +29,12 @@
|
|||
#include "uinvchar.h"
|
||||
#include "uresimp.h"
|
||||
#include "ulist.h"
|
||||
#include "uresimp.h"
|
||||
#include "ureslocs.h"
|
||||
#include "ulocimp.h"
|
||||
|
||||
using namespace icu;
|
||||
|
||||
//#define UCURR_DEBUG_EQUIV 1
|
||||
#ifdef UCURR_DEBUG_EQUIV
|
||||
#include "stdio.h"
|
||||
|
@ -104,6 +108,7 @@ static const char VAR_DELIM_STR[] = "_";
|
|||
|
||||
// Tag for localized display names (symbols) of currencies
|
||||
static const char CURRENCIES[] = "Currencies";
|
||||
static const char CURRENCIES_NARROW[] = "Currencies%narrow";
|
||||
static const char CURRENCYPLURALS[] = "CurrencyPlurals";
|
||||
|
||||
static const UChar EUR_STR[] = {0x0045,0x0055,0x0052,0};
|
||||
|
@ -698,7 +703,7 @@ ucurr_getName(const UChar* currency,
|
|||
}
|
||||
|
||||
int32_t choice = (int32_t) nameStyle;
|
||||
if (choice < 0 || choice > 1) {
|
||||
if (choice < 0 || choice > 2) {
|
||||
*ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
@ -731,15 +736,19 @@ ucurr_getName(const UChar* currency,
|
|||
|
||||
const UChar* s = NULL;
|
||||
ec2 = U_ZERO_ERROR;
|
||||
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
|
||||
LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2));
|
||||
|
||||
rb = ures_getByKey(rb, CURRENCIES, rb, &ec2);
|
||||
|
||||
// Fetch resource with multi-level resource inheritance fallback
|
||||
rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2);
|
||||
|
||||
s = ures_getStringByIndex(rb, choice, len, &ec2);
|
||||
ures_close(rb);
|
||||
if (nameStyle == UCURR_NARROW_SYMBOL_NAME) {
|
||||
CharString key;
|
||||
key.append(CURRENCIES_NARROW, ec2);
|
||||
key.append("/", ec2);
|
||||
key.append(buf, ec2);
|
||||
s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2);
|
||||
} else {
|
||||
ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2);
|
||||
ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2);
|
||||
s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2);
|
||||
}
|
||||
|
||||
// If we've succeeded we're done. Otherwise, try to fallback.
|
||||
// If that fails (because we are already at root) then exit.
|
||||
|
|
15
deps/icu-small/source/common/unicode/brkiter.h
vendored
15
deps/icu-small/source/common/unicode/brkiter.h
vendored
|
@ -298,15 +298,14 @@ public:
|
|||
virtual int32_t next(int32_t n) = 0;
|
||||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, return the status tag from the
|
||||
* break rule that determined the most recently
|
||||
* returned break position.
|
||||
* For RuleBasedBreakIterators, return the status tag from the break rule
|
||||
* that determined the boundary at the current iteration position.
|
||||
* <p>
|
||||
* For break iterator types that do not support a rule status,
|
||||
* a default value of 0 is returned.
|
||||
* <p>
|
||||
* @return the status from the break rule that determined the most recently
|
||||
* returned break position.
|
||||
* @return the status from the break rule that determined the boundary at
|
||||
* the current iteration position.
|
||||
* @see RuleBaseBreakIterator::getRuleStatus()
|
||||
* @see UWordBreak
|
||||
* @stable ICU 52
|
||||
|
@ -315,7 +314,7 @@ public:
|
|||
|
||||
/**
|
||||
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
|
||||
* that determined the most recently returned break position.
|
||||
* that determined the boundary at the current iteration position.
|
||||
* <p>
|
||||
* For break iterator types that do not support rule status,
|
||||
* no values are returned.
|
||||
|
@ -334,7 +333,7 @@ public:
|
|||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
* the boundary at the current iteration position.
|
||||
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
|
||||
* is the total number of status values that were available,
|
||||
* not the reduced number that were actually returned.
|
||||
|
@ -616,7 +615,7 @@ public:
|
|||
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
|
||||
|
||||
private:
|
||||
static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
|
||||
static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
|
||||
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
|
||||
|
||||
|
|
|
@ -154,7 +154,6 @@ private:
|
|||
const char *s;
|
||||
};
|
||||
|
||||
// don't use #ifndef U_HIDE_INTERNAL_API with private class members or virtual methods.
|
||||
virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
|
||||
Node *nextNode) const;
|
||||
|
||||
|
|
24
deps/icu-small/source/common/unicode/casemap.h
vendored
24
deps/icu-small/source/common/unicode/casemap.h
vendored
|
@ -18,8 +18,6 @@
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
class BreakIterator;
|
||||
class ByteSink;
|
||||
class Edits;
|
||||
|
@ -27,7 +25,7 @@ class Edits;
|
|||
/**
|
||||
* Low-level C++ case mapping functions.
|
||||
*
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API CaseMap U_FINAL : public UMemory {
|
||||
public:
|
||||
|
@ -59,7 +57,7 @@ public:
|
|||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see u_strToLower
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t toLower(
|
||||
const char *locale, uint32_t options,
|
||||
|
@ -95,7 +93,7 @@ public:
|
|||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see u_strToUpper
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t toUpper(
|
||||
const char *locale, uint32_t options,
|
||||
|
@ -146,7 +144,7 @@ public:
|
|||
*
|
||||
* @see u_strToTitle
|
||||
* @see ucasemap_toTitle
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t toTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
|
@ -188,7 +186,7 @@ public:
|
|||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see u_strFoldCase
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t fold(
|
||||
uint32_t options,
|
||||
|
@ -196,6 +194,7 @@ public:
|
|||
char16_t *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Lowercases a UTF-8 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
|
@ -318,6 +317,7 @@ public:
|
|||
uint32_t options,
|
||||
StringPiece src, ByteSink &sink, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Lowercases a UTF-8 string and optionally records edits.
|
||||
|
@ -347,7 +347,7 @@ public:
|
|||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see ucasemap_utf8ToLower
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t utf8ToLower(
|
||||
const char *locale, uint32_t options,
|
||||
|
@ -383,7 +383,7 @@ public:
|
|||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see ucasemap_utf8ToUpper
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t utf8ToUpper(
|
||||
const char *locale, uint32_t options,
|
||||
|
@ -433,7 +433,7 @@ public:
|
|||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t utf8ToTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
|
@ -475,7 +475,7 @@ public:
|
|||
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
|
||||
*
|
||||
* @see ucasemap_utf8FoldCase
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
static int32_t utf8Fold(
|
||||
uint32_t options,
|
||||
|
@ -489,8 +489,6 @@ private:
|
|||
CaseMap &operator=(const CaseMap &other) = delete;
|
||||
};
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __CASEMAP_H__
|
||||
|
|
44
deps/icu-small/source/common/unicode/char16ptr.h
vendored
44
deps/icu-small/source/common/unicode/char16ptr.h
vendored
|
@ -30,25 +30,23 @@ U_NAMESPACE_BEGIN
|
|||
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
|
||||
#endif
|
||||
|
||||
// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it
|
||||
// is now used in place of UChar* in several stable C++ methods
|
||||
/**
|
||||
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API Char16Ptr U_FINAL {
|
||||
public:
|
||||
/**
|
||||
* Copies the pointer.
|
||||
* @param p pointer
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(char16_t *p);
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* @param p pointer to be converted
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(uint16_t *p);
|
||||
#endif
|
||||
|
@ -57,32 +55,32 @@ public:
|
|||
* Converts the pointer to char16_t *.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* @param p pointer to be converted
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(wchar_t *p);
|
||||
#endif
|
||||
/**
|
||||
* nullptr constructor.
|
||||
* @param p nullptr
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline Char16Ptr(std::nullptr_t p);
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ~Char16Ptr();
|
||||
|
||||
/**
|
||||
* Pointer access.
|
||||
* @return the wrapped pointer
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline char16_t *get() const;
|
||||
/**
|
||||
* char16_t pointer access via type conversion (e.g., static_cast).
|
||||
* @return the wrapped pointer
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline operator char16_t *() const { return get(); }
|
||||
|
||||
|
@ -137,25 +135,23 @@ char16_t *Char16Ptr::get() const { return u_.cp; }
|
|||
|
||||
#endif
|
||||
|
||||
// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it is
|
||||
// now used in place of const UChar* in several stable C++ methods
|
||||
/**
|
||||
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API ConstChar16Ptr U_FINAL {
|
||||
public:
|
||||
/**
|
||||
* Copies the pointer.
|
||||
* @param p pointer
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const char16_t *p);
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Converts the pointer to char16_t *.
|
||||
* @param p pointer to be converted
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const uint16_t *p);
|
||||
#endif
|
||||
|
@ -164,33 +160,33 @@ public:
|
|||
* Converts the pointer to char16_t *.
|
||||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* @param p pointer to be converted
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const wchar_t *p);
|
||||
#endif
|
||||
/**
|
||||
* nullptr constructor.
|
||||
* @param p nullptr
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ConstChar16Ptr(const std::nullptr_t p);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline ~ConstChar16Ptr();
|
||||
|
||||
/**
|
||||
* Pointer access.
|
||||
* @return the wrapped pointer
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const char16_t *get() const;
|
||||
/**
|
||||
* char16_t pointer access via type conversion (e.g., static_cast).
|
||||
* @return the wrapped pointer
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline operator const char16_t *() const { return get(); }
|
||||
|
||||
|
@ -250,7 +246,7 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
|
|||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as const UChar *
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const UChar *toUCharPtr(const char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
@ -264,7 +260,7 @@ inline const UChar *toUCharPtr(const char16_t *p) {
|
|||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as UChar *
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline UChar *toUCharPtr(char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
@ -278,7 +274,7 @@ inline UChar *toUCharPtr(char16_t *p) {
|
|||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as const OldUChar *
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline const OldUChar *toOldUCharPtr(const char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
@ -292,7 +288,7 @@ inline const OldUChar *toOldUCharPtr(const char16_t *p) {
|
|||
* Includes an aliasing barrier if available.
|
||||
* @param p pointer
|
||||
* @return p as OldUChar *
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline OldUChar *toOldUCharPtr(char16_t *p) {
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
|
|
@ -569,7 +569,7 @@ public:
|
|||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character the iterator currently refers to
|
||||
* (i.e., the character returned by current()).
|
||||
* @return the numberic index in the text-storage object of
|
||||
* @return the numeric index in the text-storage object of
|
||||
* the character the iterator currently refers to
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
|
|
@ -69,7 +69,7 @@ public:
|
|||
* <pre>
|
||||
* . Base* polymorphic_pointer = createPolymorphicObject();
|
||||
* . if (polymorphic_pointer->getDynamicClassID() ==
|
||||
* . erived::getStaticClassID()) ...
|
||||
* . derived::getStaticClassID()) ...
|
||||
* </pre>
|
||||
* @return The class ID for all objects of this class.
|
||||
* @stable ICU 4.0
|
||||
|
|
58
deps/icu-small/source/common/unicode/edits.h
vendored
58
deps/icu-small/source/common/unicode/edits.h
vendored
|
@ -17,8 +17,6 @@
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Records lengths of string edits but not replacement text.
|
||||
* Supports replacements, insertions, deletions in linear progression.
|
||||
|
@ -27,13 +25,13 @@ U_NAMESPACE_BEGIN
|
|||
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
|
||||
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
|
||||
*
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
class U_COMMON_API Edits U_FINAL : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty object.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Edits() :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
|
||||
|
@ -64,7 +62,7 @@ public:
|
|||
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
~Edits();
|
||||
|
||||
|
@ -88,20 +86,20 @@ public:
|
|||
|
||||
/**
|
||||
* Resets the data but may not release memory.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
void reset() U_NOEXCEPT;
|
||||
|
||||
/**
|
||||
* Adds a record for an unchanged segment of text.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
void addUnchanged(int32_t unchangedLength);
|
||||
/**
|
||||
* Adds a record for a text replacement/insertion/deletion.
|
||||
* Normally called from inside ICU string transformation functions, not user code.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
void addReplace(int32_t oldLength, int32_t newLength);
|
||||
/**
|
||||
|
@ -112,33 +110,35 @@ public:
|
|||
* and an error occurred while recording edits.
|
||||
* Otherwise unchanged.
|
||||
* @return TRUE if U_FAILURE(outErrorCode)
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool copyErrorTo(UErrorCode &outErrorCode);
|
||||
|
||||
/**
|
||||
* How much longer is the new text compared with the old text?
|
||||
* @return new length minus old length
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t lengthDelta() const { return delta; }
|
||||
/**
|
||||
* @return TRUE if there are any change edits
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool hasChanges() const { return numChanges != 0; }
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* @return the number of change edits
|
||||
* @draft ICU 60
|
||||
*/
|
||||
int32_t numberOfChanges() const { return numChanges; }
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
* @see getCoarseIterator
|
||||
* @see getFineIterator
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
struct U_COMMON_API Iterator U_FINAL : public UMemory {
|
||||
/**
|
||||
|
@ -152,12 +152,12 @@ public:
|
|||
srcIndex(0), replIndex(0), destIndex(0) {}
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator(const Iterator &other) = default;
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator &operator=(const Iterator &other) = default;
|
||||
|
||||
|
@ -167,7 +167,7 @@ public:
|
|||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return TRUE if there is another edit
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
|
||||
|
||||
|
@ -188,12 +188,13 @@ public:
|
|||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return TRUE if the edit for the source index was found
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
|
||||
return findIndex(i, TRUE, errorCode) == 0;
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Finds the edit that contains the destination index.
|
||||
* The destination index may be found in a non-change
|
||||
|
@ -264,39 +265,40 @@ public:
|
|||
* @draft ICU 60
|
||||
*/
|
||||
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
|
||||
* FALSE if oldLength units remain unchanged.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UBool hasChange() const { return changed; }
|
||||
/**
|
||||
* @return the number of units in the original string which are replaced or remain unchanged.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t oldLength() const { return oldLength_; }
|
||||
/**
|
||||
* @return the number of units in the modified string, if hasChange() is TRUE.
|
||||
* Same as oldLength if hasChange() is FALSE.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t newLength() const { return newLength_; }
|
||||
|
||||
/**
|
||||
* @return the current index into the source string
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t sourceIndex() const { return srcIndex; }
|
||||
/**
|
||||
* @return the current index into the replacement-characters-only string,
|
||||
* not counting unchanged spans
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t replacementIndex() const { return replIndex; }
|
||||
/**
|
||||
* @return the current index into the full destination string
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
int32_t destinationIndex() const { return destIndex; }
|
||||
|
||||
|
@ -331,7 +333,7 @@ public:
|
|||
* Returns an Iterator for coarse-grained changes for simple string updates.
|
||||
* Skips non-changes.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator getCoarseChangesIterator() const {
|
||||
return Iterator(array, length, TRUE, TRUE);
|
||||
|
@ -340,7 +342,7 @@ public:
|
|||
/**
|
||||
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
|
||||
* @return an Iterator that merges adjacent changes.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator getCoarseIterator() const {
|
||||
return Iterator(array, length, FALSE, TRUE);
|
||||
|
@ -350,7 +352,7 @@ public:
|
|||
* Returns an Iterator for fine-grained changes for modifying styled text.
|
||||
* Skips non-changes.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator getFineChangesIterator() const {
|
||||
return Iterator(array, length, TRUE, FALSE);
|
||||
|
@ -359,12 +361,13 @@ public:
|
|||
/**
|
||||
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
|
||||
* @return an Iterator that separates adjacent changes.
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
Iterator getFineIterator() const {
|
||||
return Iterator(array, length, FALSE, FALSE);
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Merges the two input Edits and appends the result to this object.
|
||||
*
|
||||
|
@ -393,6 +396,7 @@ public:
|
|||
* @draft ICU 60
|
||||
*/
|
||||
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
private:
|
||||
void releaseArray() U_NOEXCEPT;
|
||||
|
@ -415,8 +419,6 @@ private:
|
|||
uint16_t stackArray[STACK_CAPACITY];
|
||||
};
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __EDITS_H__
|
||||
|
|
|
@ -64,9 +64,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
|
|||
* @deprecated ICU 60 use createEmptyInstance instead
|
||||
* @see createEmptyInstance()
|
||||
*/
|
||||
static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) {
|
||||
return createEmptyInstance(status);
|
||||
}
|
||||
static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
@ -105,7 +103,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
|
|||
*/
|
||||
virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* This function has been deprecated in favor of wrapIteratorWithFilter()
|
||||
* The behavior is identical.
|
||||
|
@ -116,7 +113,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
|
|||
* @see wrapBreakIteratorWithFilter()
|
||||
*/
|
||||
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
|
|
4
deps/icu-small/source/common/unicode/locid.h
vendored
4
deps/icu-small/source/common/unicode/locid.h
vendored
|
@ -353,7 +353,7 @@ public:
|
|||
* the default locale ID of the runtime environment.
|
||||
*
|
||||
* @param newLocale Locale to set to. If NULL, set to the value obtained
|
||||
* from the runtime environement.
|
||||
* from the runtime environment.
|
||||
* @param success The error code.
|
||||
* @system
|
||||
* @stable ICU 2.0
|
||||
|
@ -629,7 +629,7 @@ public:
|
|||
|
||||
/**
|
||||
* Fills in "name" with the name of this locale in a format suitable for user display
|
||||
* in the locale specfied by "displayLocale". This function uses getDisplayLanguage(),
|
||||
* in the locale specified by "displayLocale". This function uses getDisplayLanguage(),
|
||||
* getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
|
||||
* name in the format "language (country[,variant])". For example, if displayLocale is
|
||||
* fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's
|
||||
|
|
|
@ -58,9 +58,9 @@ enum { U_PARSE_CONTEXT_LEN = 16 };
|
|||
typedef struct UParseError {
|
||||
|
||||
/**
|
||||
* The line on which the error occured. If the parser uses this
|
||||
* The line on which the error occurred. If the parser uses this
|
||||
* field, it sets it to the line number of the source text line on
|
||||
* which the error appears, which will be be a value >= 1. If the
|
||||
* which the error appears, which will be a value >= 1. If the
|
||||
* parse does not support line numbers, the value will be <= 0.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
|
12
deps/icu-small/source/common/unicode/platform.h
vendored
12
deps/icu-small/source/common/unicode/platform.h
vendored
|
@ -482,9 +482,9 @@
|
|||
/* Otherwise use the predefined value. */
|
||||
#elif !defined(__cplusplus)
|
||||
# define U_CPLUSPLUS_VERSION 0
|
||||
#elif __cplusplus >= 201402L
|
||||
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
|
||||
# define U_CPLUSPLUS_VERSION 14
|
||||
#elif __cplusplus >= 201103L
|
||||
#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
|
||||
# define U_CPLUSPLUS_VERSION 11
|
||||
#else
|
||||
// C++98 or C++03
|
||||
|
@ -631,7 +631,7 @@ namespace std {
|
|||
*/
|
||||
#ifdef U_CHARSET_IS_UTF8
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
|
||||
#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED
|
||||
# define U_CHARSET_IS_UTF8 1
|
||||
#else
|
||||
# define U_CHARSET_IS_UTF8 0
|
||||
|
@ -749,8 +749,10 @@ namespace std {
|
|||
#else
|
||||
/*
|
||||
* Notes:
|
||||
* Visual Studio 10 (_MSC_VER>=1600) defines char16_t but
|
||||
* does not support u"abc" string literals.
|
||||
* Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
|
||||
* and does not support u"abc" string literals.
|
||||
* Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
|
||||
* both char16_t and u"abc" string literals.
|
||||
* gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
|
||||
* does not support u"abc" string literals.
|
||||
* C++11 and C11 require support for UTF-16 literals
|
||||
|
|
6
deps/icu-small/source/common/unicode/putil.h
vendored
6
deps/icu-small/source/common/unicode/putil.h
vendored
|
@ -38,7 +38,7 @@
|
|||
|
||||
/**
|
||||
* Platform utilities isolates the platform dependencies of the
|
||||
* libarary. For each platform which this code is ported to, these
|
||||
* library. For each platform which this code is ported to, these
|
||||
* functions may have to be re-implemented.
|
||||
*/
|
||||
|
||||
|
@ -53,7 +53,7 @@
|
|||
* The data directory is determined as follows:
|
||||
* If u_setDataDirectory() has been called, that is it, otherwise
|
||||
* if the ICU_DATA environment variable is set, use that, otherwise
|
||||
* If a data directory was specifed at ICU build time
|
||||
* If a data directory was specified at ICU build time
|
||||
* <code>
|
||||
* \code
|
||||
* #define ICU_DATA_DIR "path"
|
||||
|
@ -93,7 +93,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
|
|||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Return the time zone files override directory, or an empty string if
|
||||
* no directory was specified. Certain time zone resources will be preferrentially
|
||||
* no directory was specified. Certain time zone resources will be preferentially
|
||||
* loaded from individual files in this directory.
|
||||
*
|
||||
* @return the time zone data override directory.
|
||||
|
|
124
deps/icu-small/source/common/unicode/rbbi.h
vendored
124
deps/icu-small/source/common/unicode/rbbi.h
vendored
|
@ -29,7 +29,6 @@
|
|||
#include "unicode/udata.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/uchriter.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -58,34 +57,18 @@ private:
|
|||
* The UText through which this BreakIterator accesses the text
|
||||
* @internal
|
||||
*/
|
||||
UText *fText;
|
||||
UText fText;
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
public:
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
/**
|
||||
* A character iterator that refers to the same text as the UText, above.
|
||||
* Only included for compatibility with old API, which was based on CharacterIterators.
|
||||
* Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
|
||||
*/
|
||||
CharacterIterator *fCharIter;
|
||||
|
||||
/**
|
||||
* When the input text is provided by a UnicodeString, this will point to
|
||||
* a characterIterator that wraps that data. Needed only for the
|
||||
* implementation of getText(), a backwards compatibility issue.
|
||||
*/
|
||||
StringCharacterIterator *fSCharIter;
|
||||
|
||||
/**
|
||||
* When the input text is provided by a UText, this
|
||||
* dummy CharacterIterator over an empty string will
|
||||
* be returned from getText()
|
||||
*/
|
||||
UCharCharacterIterator *fDCharIter;
|
||||
|
||||
/**
|
||||
* The rule data for this BreakIterator instance
|
||||
* The rule data for this BreakIterator instance.
|
||||
* Not for general use; Public only for testing purposes.
|
||||
* @internal
|
||||
*/
|
||||
RBBIDataWrapper *fData;
|
||||
private:
|
||||
|
||||
/**
|
||||
* The iteration state - current position, rule status for the current position,
|
||||
|
@ -105,24 +88,11 @@ private:
|
|||
*/
|
||||
int32_t fRuleStatusIndex;
|
||||
|
||||
/**
|
||||
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
|
||||
*/
|
||||
UBool fDone;
|
||||
|
||||
/**
|
||||
* Cache of previously determined boundary positions.
|
||||
*/
|
||||
public: // TODO: debug, return to private.
|
||||
class BreakCache;
|
||||
BreakCache *fBreakCache;
|
||||
private:
|
||||
/**
|
||||
* Counter for the number of characters encountered with the "dictionary"
|
||||
* flag set.
|
||||
* @internal
|
||||
*/
|
||||
uint32_t fDictionaryCharCount;
|
||||
|
||||
/**
|
||||
* Cache of boundary positions within a region of text that has been
|
||||
|
@ -150,11 +120,30 @@ private:
|
|||
UnhandledEngine *fUnhandledBreakEngine;
|
||||
|
||||
/**
|
||||
*
|
||||
* The type of the break iterator, or -1 if it has not been set.
|
||||
* Counter for the number of characters encountered with the "dictionary"
|
||||
* flag set.
|
||||
* @internal
|
||||
*/
|
||||
int32_t fBreakType;
|
||||
uint32_t fDictionaryCharCount;
|
||||
|
||||
/**
|
||||
* A character iterator that refers to the same text as the UText, above.
|
||||
* Only included for compatibility with old API, which was based on CharacterIterators.
|
||||
* Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
|
||||
*/
|
||||
CharacterIterator *fCharIter;
|
||||
|
||||
/**
|
||||
* When the input text is provided by a UnicodeString, this will point to
|
||||
* a characterIterator that wraps that data. Needed only for the
|
||||
* implementation of getText(), a backwards compatibility issue.
|
||||
*/
|
||||
StringCharacterIterator fSCharIter;
|
||||
|
||||
/**
|
||||
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
|
||||
*/
|
||||
UBool fDone;
|
||||
|
||||
//=======================================================================
|
||||
// constructors
|
||||
|
@ -206,17 +195,17 @@ public:
|
|||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Contruct a RuleBasedBreakIterator from a set of precompiled binary rules.
|
||||
* Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
|
||||
* Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
|
||||
* Construction of a break iterator in this way is substantially faster than
|
||||
* constuction from source rules.
|
||||
* construction from source rules.
|
||||
*
|
||||
* Ownership of the storage containing the compiled rules remains with the
|
||||
* caller of this function. The compiled rules must not be modified or
|
||||
* deleted during the life of the break iterator.
|
||||
*
|
||||
* The compiled rules are not compatible across different major versions of ICU.
|
||||
* The compiled rules are comaptible only between machines with the same
|
||||
* The compiled rules are compatible only between machines with the same
|
||||
* byte ordering (little or big endian) and the same base character set family
|
||||
* (ASCII or EBCDIC).
|
||||
*
|
||||
|
@ -285,7 +274,7 @@ public:
|
|||
* behavior, and iterating over the same text, as this one.
|
||||
* Differs from the copy constructor in that it is polymorphic, and
|
||||
* will correctly clone (copy) a derived class.
|
||||
* clone() is thread safe. Multiple threads may simultaeneously
|
||||
* clone() is thread safe. Multiple threads may simultaneously
|
||||
* clone the same source break iterator.
|
||||
* @return a newly-constructed RuleBasedBreakIterator
|
||||
* @stable ICU 2.0
|
||||
|
@ -450,7 +439,7 @@ public:
|
|||
virtual int32_t preceding(int32_t offset);
|
||||
|
||||
/**
|
||||
* Returns true if the specfied position is a boundary position. As a side
|
||||
* Returns true if the specified position is a boundary position. As a side
|
||||
* effect, leaves the iterator pointing to the first boundary position at
|
||||
* or after "offset".
|
||||
* @param offset the offset to check.
|
||||
|
@ -471,8 +460,8 @@ public:
|
|||
|
||||
|
||||
/**
|
||||
* Return the status tag from the break rule that determined the most recently
|
||||
* returned break position. For break rules that do not specify a
|
||||
* Return the status tag from the break rule that determined the boundary at
|
||||
* the current iteration position. For break rules that do not specify a
|
||||
* status, a default value of 0 is returned. If more than one break rule
|
||||
* would cause a boundary to be located at some position in the text,
|
||||
* the numerically largest of the applicable status values is returned.
|
||||
|
@ -489,16 +478,14 @@ public:
|
|||
* position from <code>next()</code>, <code>previous()</code>, or
|
||||
* any other break iterator functions that returns a boundary position.
|
||||
* <p>
|
||||
* Note that <code>getRuleStatus()</code> returns the value corresponding to
|
||||
* <code>current()</code> index even after <code>next()</code> has returned DONE.
|
||||
* <p>
|
||||
* When creating custom break rules, one is free to define whatever
|
||||
* status values may be convenient for the application.
|
||||
* <p>
|
||||
* Note: this function is not thread safe. It should not have been
|
||||
* declared const, and the const remains only for compatibility
|
||||
* reasons. (The function is logically const, but not bit-wise const).
|
||||
* TODO: check this. Probably thread safe now.
|
||||
* <p>
|
||||
* @return the status from the break rule that determined the most recently
|
||||
* returned break position.
|
||||
* @return the status from the break rule that determined the boundary
|
||||
* at the current iteration position.
|
||||
*
|
||||
* @see UWordBreak
|
||||
* @stable ICU 2.2
|
||||
|
@ -506,8 +493,8 @@ public:
|
|||
virtual int32_t getRuleStatus() const;
|
||||
|
||||
/**
|
||||
* Get the status (tag) values from the break rule(s) that determined the most
|
||||
* recently returned break position.
|
||||
* Get the status (tag) values from the break rule(s) that determined the boundary
|
||||
* at the current iteration position.
|
||||
* <p>
|
||||
* The returned status value(s) are stored into an array provided by the caller.
|
||||
* The values are stored in sorted (ascending) order.
|
||||
|
@ -518,10 +505,10 @@ public:
|
|||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attemtping to store any values.
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
* @return The number of rule status values from the rules that determined
|
||||
* the boundary at the current iteration position.
|
||||
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
|
||||
* is the total number of status values that were available,
|
||||
* not the reduced number that were actually returned.
|
||||
|
@ -561,7 +548,7 @@ public:
|
|||
*
|
||||
* Create a clone (copy) of this break iterator in memory provided
|
||||
* by the caller. The idea is to increase performance by avoiding
|
||||
* a storage allocation. Use of this functoin is NOT RECOMMENDED.
|
||||
* a storage allocation. Use of this function is NOT RECOMMENDED.
|
||||
* Performance gains are minimal, and correct buffer management is
|
||||
* tricky. Use clone() instead.
|
||||
*
|
||||
|
@ -574,7 +561,7 @@ public:
|
|||
* storage for the cloned object.
|
||||
*
|
||||
* @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
|
||||
* returned if the the provided buffer was too small, and
|
||||
* returned if the provided buffer was too small, and
|
||||
* the clone was therefore put on the heap.
|
||||
*
|
||||
* @return Pointer to the clone object. This may differ from the stackBuffer
|
||||
|
@ -597,7 +584,7 @@ public:
|
|||
* The binary data can only be used with the same version of ICU
|
||||
* and on the same platform type (processor endian-ness)
|
||||
*
|
||||
* @param length Returns the length of the binary data. (Out paramter.)
|
||||
* @param length Returns the length of the binary data. (Out parameter.)
|
||||
*
|
||||
* @return A pointer to the binary (compiled) rule data. The storage
|
||||
* belongs to the RulesBasedBreakIterator object, not the
|
||||
|
@ -645,12 +632,6 @@ private:
|
|||
*/
|
||||
void reset(void);
|
||||
|
||||
/**
|
||||
* Set the type of the break iterator.
|
||||
* @internal
|
||||
*/
|
||||
void setBreakType(int32_t type);
|
||||
|
||||
/**
|
||||
* Common initialization function, used by constructors and bufferClone.
|
||||
* @internal
|
||||
|
@ -697,6 +678,13 @@ private:
|
|||
* @internal
|
||||
*/
|
||||
void dumpCache();
|
||||
|
||||
/**
|
||||
* Debugging function only.
|
||||
* @internal
|
||||
*/
|
||||
void dumpTables();
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
};
|
||||
|
||||
|
|
|
@ -132,7 +132,7 @@ public:
|
|||
ResourceBundle(UErrorCode &err);
|
||||
|
||||
/**
|
||||
* Standard constructor, onstructs a resource bundle for the locale-specific
|
||||
* Standard constructor, constructs a resource bundle for the locale-specific
|
||||
* bundle in the specified package.
|
||||
*
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
|
|
|
@ -69,7 +69,7 @@ public:
|
|||
* Create an iterator over the UnicodeString referred to by "textStr".
|
||||
* The UnicodeString object is copied.
|
||||
* The iteration range begins with the code unit specified by
|
||||
* "textBegin" and ends with the code unit BEFORE the code unit specfied
|
||||
* "textBegin" and ends with the code unit BEFORE the code unit specified
|
||||
* by "textEnd". The starting position is specified by "textPos". If
|
||||
* "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
|
||||
* textBegin >= textEnd or either is negative or greater than text.size()),
|
||||
|
|
4
deps/icu-small/source/common/unicode/ubidi.h
vendored
4
deps/icu-small/source/common/unicode/ubidi.h
vendored
|
@ -692,7 +692,7 @@ typedef enum UBiDiReorderingMode {
|
|||
* @stable ICU 3.6 */
|
||||
UBIDI_REORDER_DEFAULT = 0,
|
||||
/** Logical to Visual algorithm which handles numbers in a way which
|
||||
* mimicks the behavior of Windows XP.
|
||||
* mimics the behavior of Windows XP.
|
||||
* @stable ICU 3.6 */
|
||||
UBIDI_REORDER_NUMBERS_SPECIAL,
|
||||
/** Logical to Visual algorithm grouping numbers with adjacent R characters
|
||||
|
@ -1142,7 +1142,7 @@ ubidi_setContext(UBiDi *pBiDi,
|
|||
|
||||
/**
|
||||
* Perform the Unicode Bidi algorithm. It is defined in the
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
|
||||
* version 13,
|
||||
* also described in The Unicode Standard, Version 4.0 .<p>
|
||||
*
|
||||
|
|
18
deps/icu-small/source/common/unicode/ubrk.h
vendored
18
deps/icu-small/source/common/unicode/ubrk.h
vendored
|
@ -268,7 +268,6 @@ ubrk_openRules(const UChar *rules,
|
|||
UParseError *parseErr,
|
||||
UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
|
||||
* Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
|
||||
|
@ -287,15 +286,13 @@ ubrk_openRules(const UChar *rules,
|
|||
* @param status Pointer to UErrorCode to receive any errors.
|
||||
* @return UBreakIterator for the specified rules.
|
||||
* @see ubrk_getBinaryRules
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
U_DRAFT UBreakIterator* U_EXPORT2
|
||||
U_STABLE UBreakIterator* U_EXPORT2
|
||||
ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
||||
const UChar * text, int32_t textLength,
|
||||
UErrorCode * status);
|
||||
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation
|
||||
* @param bi iterator to be cloned
|
||||
|
@ -510,7 +507,7 @@ ubrk_countAvailable(void);
|
|||
|
||||
|
||||
/**
|
||||
* Returns true if the specfied position is a boundary position. As a side
|
||||
* Returns true if the specified position is a boundary position. As a side
|
||||
* effect, leaves the iterator pointing to the first boundary position at
|
||||
* or after "offset".
|
||||
* @param bi The break iterator to use.
|
||||
|
@ -544,7 +541,7 @@ ubrk_getRuleStatus(UBreakIterator *bi);
|
|||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attemtping to store any values.
|
||||
* normal way, without attempting to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
|
@ -596,7 +593,6 @@ ubrk_refreshUText(UBreakIterator *bi,
|
|||
UErrorCode *status);
|
||||
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
|
||||
* The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
|
||||
|
@ -620,15 +616,13 @@ ubrk_refreshUText(UBreakIterator *bi,
|
|||
* otherwise 0. If not preflighting and this is larger than
|
||||
* rulesCapacity, *status will be set to an error.
|
||||
* @see ubrk_openBinaryRules
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
U_DRAFT int32_t U_EXPORT2
|
||||
U_STABLE int32_t U_EXPORT2
|
||||
ubrk_getBinaryRules(UBreakIterator *bi,
|
||||
uint8_t * binaryRules, int32_t rulesCapacity,
|
||||
UErrorCode * status);
|
||||
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif
|
||||
|
|
7
deps/icu-small/source/common/unicode/uchar.h
vendored
7
deps/icu-small/source/common/unicode/uchar.h
vendored
|
@ -112,11 +112,11 @@ U_CDECL_BEGIN
|
|||
* Comparison:
|
||||
* - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
|
||||
* most of general categories "Z" (separators) + most whitespace ISO controls
|
||||
* (including no-break spaces, but excluding IS1..IS4 and ZWSP)
|
||||
* (including no-break spaces, but excluding IS1..IS4)
|
||||
* - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
|
||||
* - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
|
||||
* - u_isspace: Z + whitespace ISO controls (including no-break spaces)
|
||||
* - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
|
||||
* - u_isblank: "horizontal spaces" = TAB + Zs
|
||||
*/
|
||||
|
||||
/**
|
||||
|
@ -2702,8 +2702,7 @@ u_isgraph(UChar32 c);
|
|||
*
|
||||
* same as
|
||||
*
|
||||
* TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
|
||||
* except Zero Width Space (ZWSP, U+200B).
|
||||
* TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators).
|
||||
*
|
||||
* Note: There are several ICU whitespace functions; please see the uchar.h
|
||||
* file documentation for a detailed comparison.
|
||||
|
|
16
deps/icu-small/source/common/unicode/uclean.h
vendored
16
deps/icu-small/source/common/unicode/uclean.h
vendored
|
@ -70,7 +70,7 @@ u_init(UErrorCode *status);
|
|||
* This has the effect of restoring ICU to its initial condition, before
|
||||
* any of these override functions were installed. Refer to
|
||||
* u_setMemoryFunctions(), u_setMutexFunctions and
|
||||
* utrace_setFunctions(). If ICU is to be reinitialized after after
|
||||
* utrace_setFunctions(). If ICU is to be reinitialized after
|
||||
* calling u_cleanup(), these runtime override functions will need to
|
||||
* be set up again if they are still required.
|
||||
* <p>
|
||||
|
@ -104,7 +104,7 @@ u_cleanup(void);
|
|||
U_CDECL_BEGIN
|
||||
/**
|
||||
* Pointer type for a user supplied memory allocation function.
|
||||
* @param context user supplied value, obtained from from u_setMemoryFunctions().
|
||||
* @param context user supplied value, obtained from u_setMemoryFunctions().
|
||||
* @param size The number of bytes to be allocated
|
||||
* @return Pointer to the newly allocated memory, or NULL if the allocation failed.
|
||||
* @stable ICU 2.8
|
||||
|
@ -113,7 +113,7 @@ U_CDECL_BEGIN
|
|||
typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
|
||||
/**
|
||||
* Pointer type for a user supplied memory re-allocation function.
|
||||
* @param context user supplied value, obtained from from u_setMemoryFunctions().
|
||||
* @param context user supplied value, obtained from u_setMemoryFunctions().
|
||||
* @param size The number of bytes to be allocated
|
||||
* @return Pointer to the newly allocated memory, or NULL if the allocation failed.
|
||||
* @stable ICU 2.8
|
||||
|
@ -123,7 +123,7 @@ typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t si
|
|||
/**
|
||||
* Pointer type for a user supplied memory free function. Behavior should be
|
||||
* similar the standard C library free().
|
||||
* @param context user supplied value, obtained from from u_setMemoryFunctions().
|
||||
* @param context user supplied value, obtained from u_setMemoryFunctions().
|
||||
* @param mem Pointer to the memory block to be resized
|
||||
* @param size The new size for the block
|
||||
* @return Pointer to the resized memory block, or NULL if the resizing failed.
|
||||
|
@ -179,8 +179,8 @@ U_CDECL_BEGIN
|
|||
* The user-supplied function will be called by ICU whenever ICU needs to create a
|
||||
* new mutex. The function implementation should create a mutex, and store a pointer
|
||||
* to something that uniquely identifies the mutex into the UMTX that is supplied
|
||||
* as a paramter.
|
||||
* @param context user supplied value, obtained from from u_setMutexFunctions().
|
||||
* as a parameter.
|
||||
* @param context user supplied value, obtained from u_setMutexFunctions().
|
||||
* @param mutex Receives a pointer that identifies the new mutex.
|
||||
* The mutex init function must set the UMTX to a non-null value.
|
||||
* Subsequent calls by ICU to lock, unlock, or destroy a mutex will
|
||||
|
@ -197,7 +197,7 @@ typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCod
|
|||
* Function Pointer type for a user supplied mutex functions.
|
||||
* One of the user-supplied functions with this signature will be called by ICU
|
||||
* whenever ICU needs to lock, unlock, or destroy a mutex.
|
||||
* @param context user supplied value, obtained from from u_setMutexFunctions().
|
||||
* @param context user supplied value, obtained from u_setMutexFunctions().
|
||||
* @param mutex specify the mutex on which to operate.
|
||||
* @deprecated ICU 52. This function is no longer supported.
|
||||
* @system
|
||||
|
@ -229,7 +229,7 @@ u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtx
|
|||
|
||||
/**
|
||||
* Pointer type for a user supplied atomic increment or decrement function.
|
||||
* @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
|
||||
* @param context user supplied value, obtained from u_setAtomicIncDecFunctions().
|
||||
* @param p Pointer to a 32 bit int to be incremented or decremented
|
||||
* @return The value of the variable after the inc or dec operation.
|
||||
* @deprecated ICU 52. This function is no longer supported.
|
||||
|
|
8
deps/icu-small/source/common/unicode/ucnv.h
vendored
8
deps/icu-small/source/common/unicode/ucnv.h
vendored
|
@ -207,7 +207,7 @@ typedef void (U_EXPORT2 *UConverterToUCallback) (
|
|||
|
||||
/**
|
||||
* Function pointer for error callback in the unicode to codepage direction.
|
||||
* Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
|
||||
* Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason).
|
||||
* @param context Pointer to the callback's private data
|
||||
* @param args Information about the conversion in progress
|
||||
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
|
||||
|
@ -353,7 +353,7 @@ ucnv_compareNames(const char *name1, const char *name2);
|
|||
* ucnv_getAlias for a complete list that is available.
|
||||
* If this parameter is NULL, the default converter will be used.
|
||||
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
|
||||
* @see ucnv_openU
|
||||
* @see ucnv_openCCSID
|
||||
* @see ucnv_getAvailableName
|
||||
|
@ -386,7 +386,7 @@ ucnv_open(const char *converterName, UErrorCode *err);
|
|||
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
|
||||
* U_FILE_ACCESS_ERROR</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an
|
||||
* error occured
|
||||
* error occurred
|
||||
* @see ucnv_open
|
||||
* @see ucnv_openCCSID
|
||||
* @see ucnv_close
|
||||
|
@ -489,7 +489,7 @@ ucnv_openCCSID(int32_t codepage,
|
|||
* @param packageName name of the package (equivalent to 'path' in udata_open() call)
|
||||
* @param converterName name of the data item to be used, without suffix.
|
||||
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
|
||||
* @see udata_open
|
||||
* @see ucnv_open
|
||||
* @see ucnv_safeClone
|
||||
|
|
12
deps/icu-small/source/common/unicode/ucnv_err.h
vendored
12
deps/icu-small/source/common/unicode/ucnv_err.h
vendored
|
@ -119,19 +119,19 @@ typedef struct UConverter UConverter;
|
|||
#define UCNV_ESCAPE_JAVA "J"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
|
||||
* TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
|
||||
* TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_C "C"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_XML_DEC "D"
|
||||
/**
|
||||
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define UCNV_ESCAPE_XML_HEX "X"
|
||||
|
@ -171,7 +171,7 @@ typedef enum {
|
|||
code points.
|
||||
The error code U_INVALID_CHAR_FOUND will be set. */
|
||||
UCNV_RESET = 3, /**< The callback is called with this reason when a
|
||||
'reset' has occured. Callback should reset all
|
||||
'reset' has occurred. Callback should reset all
|
||||
state. */
|
||||
UCNV_CLOSE = 4, /**< Called when the converter is closed. The
|
||||
callback should release any allocated memory.*/
|
||||
|
@ -199,7 +199,7 @@ typedef struct {
|
|||
const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
|
||||
char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
|
||||
const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
} UConverterFromUnicodeArgs;
|
||||
|
||||
|
||||
|
@ -215,7 +215,7 @@ typedef struct {
|
|||
const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
|
||||
UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
|
||||
const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
|
||||
} UConverterToUnicodeArgs;
|
||||
|
||||
|
||||
|
|
13
deps/icu-small/source/common/unicode/ucurr.h
vendored
13
deps/icu-small/source/common/unicode/ucurr.h
vendored
|
@ -103,6 +103,19 @@ typedef enum UCurrNameStyle {
|
|||
* @stable ICU 2.6
|
||||
*/
|
||||
UCURR_LONG_NAME
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
,
|
||||
/**
|
||||
* Selector for getName() indicating the narrow currency symbol.
|
||||
* The narrow currency symbol is similar to the regular currency
|
||||
* symbol, but it always takes the shortest form: for example,
|
||||
* "$" instead of "US$" for USD in en-CA.
|
||||
*
|
||||
* @draft ICU 61
|
||||
*/
|
||||
UCURR_NARROW_SYMBOL_NAME
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
} UCurrNameStyle;
|
||||
|
||||
#if !UCONFIG_NO_SERVICE
|
||||
|
|
|
@ -299,6 +299,10 @@ typedef int8_t UBool;
|
|||
// for AIX, uchar.h needs to be included
|
||||
# include <uchar.h>
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
|
||||
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#else
|
||||
# define U_CHAR16_IS_TYPEDEF 0
|
||||
#endif
|
||||
|
@ -366,7 +370,7 @@ typedef int8_t UBool;
|
|||
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
|
||||
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
|
||||
*
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
typedef wchar_t OldUChar;
|
||||
|
|
|
@ -1521,6 +1521,7 @@ private:
|
|||
UnicodeString& rebuiltPat,
|
||||
uint32_t options,
|
||||
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
|
||||
int32_t depth,
|
||||
UErrorCode& ec);
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
|
54
deps/icu-small/source/common/unicode/unistr.h
vendored
54
deps/icu-small/source/common/unicode/unistr.h
vendored
|
@ -2995,10 +2995,6 @@ public:
|
|||
*/
|
||||
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* uint16_t * constructor.
|
||||
|
@ -3008,16 +3004,12 @@ public:
|
|||
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
|
||||
* on the compiler command line or similar.
|
||||
* @param text NUL-terminated UTF-16 string
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
|
||||
UnicodeString(ConstChar16Ptr(text)) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* wchar_t * constructor.
|
||||
|
@ -3028,16 +3020,12 @@ public:
|
|||
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
|
||||
* on the compiler command line or similar.
|
||||
* @param text NUL-terminated UTF-16 string
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
|
||||
UnicodeString(ConstChar16Ptr(text)) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
/**
|
||||
* nullptr_t constructor.
|
||||
* Effectively the same as the default constructor, makes an empty string object.
|
||||
|
@ -3046,7 +3034,7 @@ public:
|
|||
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
|
||||
* on the compiler command line or similar.
|
||||
* @param text nullptr
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
|
||||
|
||||
|
@ -3060,26 +3048,18 @@ public:
|
|||
UnicodeString(const char16_t *text,
|
||||
int32_t textLength);
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* uint16_t * constructor.
|
||||
* Delegates to UnicodeString(const char16_t *, int32_t).
|
||||
* @param text UTF-16 string
|
||||
* @param length string length
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UnicodeString(const uint16_t *text, int32_t length) :
|
||||
UnicodeString(ConstChar16Ptr(text), length) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* wchar_t * constructor.
|
||||
|
@ -3087,22 +3067,18 @@ public:
|
|||
* Delegates to UnicodeString(const char16_t *, int32_t).
|
||||
* @param text NUL-terminated UTF-16 string
|
||||
* @param length string length
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UnicodeString(const wchar_t *text, int32_t length) :
|
||||
UnicodeString(ConstChar16Ptr(text), length) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
/**
|
||||
* nullptr_t constructor.
|
||||
* Effectively the same as the default constructor, makes an empty string object.
|
||||
* @param text nullptr
|
||||
* @param length ignored
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline UnicodeString(const std::nullptr_t text, int32_t length);
|
||||
|
||||
|
@ -3152,10 +3128,6 @@ public:
|
|||
*/
|
||||
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
#if !U_CHAR16_IS_TYPEDEF
|
||||
/**
|
||||
* Writable-aliasing uint16_t * constructor.
|
||||
|
@ -3163,16 +3135,12 @@ public:
|
|||
* @param buffer writable buffer of/for UTF-16 text
|
||||
* @param buffLength length of the current buffer contents
|
||||
* @param buffCapacity buffer capacity
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
|
||||
UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Writable-aliasing wchar_t * constructor.
|
||||
|
@ -3181,23 +3149,19 @@ public:
|
|||
* @param buffer writable buffer of/for UTF-16 text
|
||||
* @param buffLength length of the current buffer contents
|
||||
* @param buffCapacity buffer capacity
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
|
||||
UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
|
||||
* it should always be available regardless of U_HIDE_DRAFT_API status
|
||||
*/
|
||||
/**
|
||||
* Writable-aliasing nullptr_t constructor.
|
||||
* Effectively the same as the default constructor, makes an empty string object.
|
||||
* @param buffer nullptr
|
||||
* @param buffLength ignored
|
||||
* @param buffCapacity ignored
|
||||
* @draft ICU 59
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
|
||||
|
||||
|
|
|
@ -107,7 +107,6 @@
|
|||
#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
|
||||
#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
|
||||
#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup)
|
||||
#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl)
|
||||
#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
|
||||
#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup)
|
||||
#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats)
|
||||
|
@ -446,7 +445,6 @@
|
|||
#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions)
|
||||
#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength)
|
||||
#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns)
|
||||
#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton)
|
||||
#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText)
|
||||
#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex)
|
||||
#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap)
|
||||
|
@ -551,6 +549,7 @@
|
|||
#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
|
||||
#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
|
||||
#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
|
||||
#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie)
|
||||
#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
|
||||
#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
|
||||
#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty)
|
||||
|
@ -862,6 +861,7 @@
|
|||
#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
|
||||
#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
|
||||
#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
|
||||
#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName)
|
||||
#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton)
|
||||
#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton)
|
||||
#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open)
|
||||
|
@ -1326,7 +1326,6 @@
|
|||
#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime)
|
||||
#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName)
|
||||
#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime)
|
||||
#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties)
|
||||
#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator)
|
||||
#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter)
|
||||
#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite)
|
||||
|
|
16
deps/icu-small/source/common/unicode/ures.h
vendored
16
deps/icu-small/source/common/unicode/ures.h
vendored
|
@ -16,7 +16,7 @@
|
|||
* 04/04/99 helena Fixed internal header inclusion.
|
||||
* 04/15/99 Madhu Updated Javadoc
|
||||
* 06/14/99 stephen Removed functions taking a filename suffix.
|
||||
* 07/20/99 stephen Language-independent ypedef to void*
|
||||
* 07/20/99 stephen Language-independent typedef to void*
|
||||
* 11/09/99 weiv Added ures_getLocale()
|
||||
* 06/24/02 weiv Added support for resource sharing
|
||||
******************************************************************************
|
||||
|
@ -138,7 +138,7 @@ typedef enum {
|
|||
/**
|
||||
* Opens a UResourceBundle, from which users can extract strings by using
|
||||
* their corresponding keys.
|
||||
* Note that the caller is responsible of calling <TT>ures_close</TT> on each succesfully
|
||||
* Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully
|
||||
* opened resource bundle.
|
||||
* @param packageName The packageName and locale together point to an ICU udata object,
|
||||
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
|
||||
|
@ -301,7 +301,7 @@ ures_getVersion(const UResourceBundle* resB,
|
|||
* you to query for the real locale of the resource. For example, if you requested
|
||||
* "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
|
||||
* For subresources, the locale where this resource comes from will be returned.
|
||||
* If fallback has occured, getLocale will reflect this.
|
||||
* If fallback has occurred, getLocale will reflect this.
|
||||
*
|
||||
* @param resourceBundle resource bundle in question
|
||||
* @param status just for catching illegal arguments
|
||||
|
@ -580,7 +580,7 @@ ures_hasNext(const UResourceBundle *resourceBundle);
|
|||
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
|
||||
* Alternatively, you can supply a struct to be filled by this function.
|
||||
* @param status fills in the outgoing error code. You may still get a non NULL result even if an
|
||||
* error occured. Check status instead.
|
||||
* error occurred. Check status instead.
|
||||
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
@ -596,7 +596,7 @@ ures_getNextResource(UResourceBundle *resourceBundle,
|
|||
* @param resourceBundle a resource
|
||||
* @param len fill in length of the string
|
||||
* @param key fill in for key associated with this string. NULL if no key
|
||||
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
|
||||
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
|
||||
* count on it. Check status instead!
|
||||
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
|
@ -615,7 +615,7 @@ ures_getNextString(UResourceBundle *resourceBundle,
|
|||
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
|
||||
* Alternatively, you can supply a struct to be filled by this function.
|
||||
* @param status fills in the outgoing error code. Don't count on NULL being returned if an error has
|
||||
* occured. Check status instead.
|
||||
* occurred. Check status instead.
|
||||
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
@ -631,7 +631,7 @@ ures_getByIndex(const UResourceBundle *resourceBundle,
|
|||
* @param resourceBundle a resource
|
||||
* @param indexS an index to the wanted string.
|
||||
* @param len fill in length of the string
|
||||
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
|
||||
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
|
||||
* count on it. Check status instead!
|
||||
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
|
@ -722,7 +722,7 @@ ures_getByKey(const UResourceBundle *resourceBundle,
|
|||
* @param resB a resource
|
||||
* @param key a key associated with the wanted string
|
||||
* @param len fill in length of the string
|
||||
* @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
|
||||
* @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
|
||||
* count on it. Check status instead!
|
||||
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
|
||||
* @stable ICU 2.0
|
||||
|
|
|
@ -476,7 +476,7 @@ typedef enum UScriptCode {
|
|||
* @param nameOrAbbrOrLocale name of the script, as given in
|
||||
* PropertyValueAliases.txt, or ISO 15924 code or locale
|
||||
* @param fillIn the UScriptCode buffer to fill in the script code
|
||||
* @param capacity the capacity (size) fo UScriptCode buffer passed in.
|
||||
* @param capacity the capacity (size) of UScriptCode buffer passed in.
|
||||
* @param err the error status code.
|
||||
* @return The number of script codes filled in the buffer passed in
|
||||
* @stable ICU 2.4
|
||||
|
|
|
@ -93,7 +93,7 @@
|
|||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The number of UChars written to the destination buffer.
|
||||
* If an error occured, then no output was written, or it may be
|
||||
* If an error occurred, then no output was written, or it may be
|
||||
* incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
|
||||
* the return value indicates the necessary destination buffer size.
|
||||
* @stable ICU 2.0
|
||||
|
|
10
deps/icu-small/source/common/unicode/usprep.h
vendored
10
deps/icu-small/source/common/unicode/usprep.h
vendored
|
@ -33,14 +33,14 @@
|
|||
* StringPrep prepares Unicode strings for use in network protocols.
|
||||
* Profiles of StingPrep are set of rules and data according to with the
|
||||
* Unicode Strings are prepared. Each profiles contains tables which describe
|
||||
* how a code point should be treated. The tables are broadly classied into
|
||||
* how a code point should be treated. The tables are broadly classified into
|
||||
* <ul>
|
||||
* <li> Unassinged Table: Contains code points that are unassigned
|
||||
* <li> Unassigned Table: Contains code points that are unassigned
|
||||
* in the Unicode Version supported by StringPrep. Currently
|
||||
* RFC 3454 supports Unicode 3.2. </li>
|
||||
* <li> Prohibited Table: Contains code points that are prohibted from
|
||||
* <li> Prohibited Table: Contains code points that are prohibited from
|
||||
* the output of the StringPrep processing function. </li>
|
||||
* <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
|
||||
* <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
|
||||
* </ul>
|
||||
*
|
||||
* The procedure for preparing Unicode strings:
|
||||
|
@ -230,7 +230,7 @@ U_NAMESPACE_END
|
|||
|
||||
/**
|
||||
* Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
|
||||
* checks for prohited and BiDi characters in the order defined by RFC 3454
|
||||
* checks for prohibited and BiDi characters in the order defined by RFC 3454
|
||||
* depending on the options specified in the profile.
|
||||
*
|
||||
* @param prep The profile to use
|
||||
|
|
|
@ -403,7 +403,7 @@ u_strspn(const UChar *string, const UChar *matchSet);
|
|||
* @param saveState The current pointer within the original string,
|
||||
* which is set by this function. The saveState
|
||||
* parameter should the address of a local variable of type
|
||||
* UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
|
||||
* UChar *. (i.e. defined "UChar *myLocalSaveState" and use
|
||||
* &myLocalSaveState for this parameter).
|
||||
* @return A pointer to the next token found in src, or NULL
|
||||
* when there are no more tokens.
|
||||
|
@ -884,7 +884,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
|
|||
* Unicode String literals in C.
|
||||
* We need one macro to declare a variable for the string
|
||||
* and to statically preinitialize it if possible,
|
||||
* and a second macro to dynamically intialize such a string variable if necessary.
|
||||
* and a second macro to dynamically initialize such a string variable if necessary.
|
||||
*
|
||||
* The macros are defined for maximum performance.
|
||||
* They work only for strings that contain "invariant characters", i.e.,
|
||||
|
|
8
deps/icu-small/source/common/unicode/utext.h
vendored
8
deps/icu-small/source/common/unicode/utext.h
vendored
|
@ -655,10 +655,10 @@ utext_getPreviousNativeIndex(UText *ut);
|
|||
* @param ut the UText from which to extract data.
|
||||
* @param nativeStart the native index of the first character to extract.\
|
||||
* If the specified index is out of range,
|
||||
* it will be pinned to to be within 0 <= index <= textLength
|
||||
* it will be pinned to be within 0 <= index <= textLength
|
||||
* @param nativeLimit the native string index of the position following the last
|
||||
* character to extract. If the specified index is out of range,
|
||||
* it will be pinned to to be within 0 <= index <= textLength.
|
||||
* it will be pinned to be within 0 <= index <= textLength.
|
||||
* nativeLimit must be >= nativeStart.
|
||||
* @param dest the UChar (UTF-16) buffer into which the extracted text is placed
|
||||
* @param destCapacity The size, in UChars, of the destination buffer. May be zero
|
||||
|
@ -906,7 +906,7 @@ utext_copy(UText *ut,
|
|||
* Caution: freezing a UText will disable changes made via the specific
|
||||
* frozen UText wrapper only; it will not have any effect on the ability to
|
||||
* directly modify the text by bypassing the UText. Any such backdoor modifications
|
||||
* are always an error while UText access is occuring because the underlying
|
||||
* are always an error while UText access is occurring because the underlying
|
||||
* text can get out of sync with UText's buffering.
|
||||
* </p>
|
||||
*
|
||||
|
@ -1452,7 +1452,7 @@ struct UText {
|
|||
void *pExtra;
|
||||
|
||||
/**
|
||||
* (protected) Pointer to string or text-containin object or similar.
|
||||
* (protected) Pointer to string or text-containing object or similar.
|
||||
* This is the source of the text that this UText is wrapping, in a format
|
||||
* that is known to the text provider functions.
|
||||
* @stable ICU 3.4
|
||||
|
|
166
deps/icu-small/source/common/unicode/utf8.h
vendored
166
deps/icu-small/source/common/unicode/utf8.h
vendored
|
@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* @see U8_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT(s, i, length, c) { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
uint8_t __t1, __t2; \
|
||||
if( /* handle U+0800..U+FFFF inline */ \
|
||||
(0xe0<=(c) && (c)<0xf0) && \
|
||||
(((i)+1)<(length) || (length)<0) && \
|
||||
U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
|
||||
(__t2=(s)[(i)+1]-0x80)<=0x3f) { \
|
||||
(c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
|
||||
(i)+=2; \
|
||||
} else if( /* handle U+0080..U+07FF inline */ \
|
||||
((c)<0xe0 && (c)>=0xc2) && \
|
||||
((i)!=(length)) && \
|
||||
(__t1=(s)[i]-0x80)<=0x3f) { \
|
||||
(c)=(((c)&0x1f)<<6)|__t1; \
|
||||
++(i); \
|
||||
} else { \
|
||||
/* function call for "complicated" and error cases */ \
|
||||
(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
|
@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* @see U8_NEXT
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_NEXT_OR_FFFD(s, i, length, c) { \
|
||||
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
|
||||
|
||||
/** @internal */
|
||||
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
uint8_t __t1, __t2; \
|
||||
if( /* handle U+0800..U+FFFF inline */ \
|
||||
(0xe0<=(c) && (c)<0xf0) && \
|
||||
(((i)+1)<(length) || (length)<0) && \
|
||||
U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
|
||||
(__t2=(s)[(i)+1]-0x80)<=0x3f) { \
|
||||
(c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
|
||||
(i)+=2; \
|
||||
} else if( /* handle U+0080..U+07FF inline */ \
|
||||
((c)<0xe0 && (c)>=0xc2) && \
|
||||
((i)!=(length)) && \
|
||||
(__t1=(s)[i]-0x80)<=0x3f) { \
|
||||
(c)=(((c)&0x1f)<<6)|__t1; \
|
||||
++(i); \
|
||||
uint8_t __t = 0; \
|
||||
if((i)!=(length) && \
|
||||
/* fetch/validate/assemble all but last trail byte */ \
|
||||
((c)>=0xe0 ? \
|
||||
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
|
||||
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
|
||||
(__t&=0x3f, 1) \
|
||||
: /* U+10000..U+10FFFF */ \
|
||||
((c)-=0xf0)<=4 && \
|
||||
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
|
||||
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
|
||||
(__t=(s)[i]-0x80)<=0x3f) && \
|
||||
/* valid second-to-last trail byte */ \
|
||||
((c)=((c)<<6)|__t, ++(i)!=(length)) \
|
||||
: /* U+0080..U+07FF */ \
|
||||
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
|
||||
/* last trail byte */ \
|
||||
(__t=(s)[i]-0x80)<=0x3f && \
|
||||
((c)=((c)<<6)|__t, ++(i), 1)) { \
|
||||
} else { \
|
||||
/* function call for "complicated" and error cases */ \
|
||||
(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \
|
||||
(c)=(sub); /* ill-formed*/ \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND_UNSAFE(s, i, c) { \
|
||||
if((uint32_t)(c)<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)(c); \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else { \
|
||||
if((uint32_t)(c)<=0x7ff) { \
|
||||
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
|
||||
if(__uc<=0x7ff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
} else { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
|
||||
if(__uc<=0xffff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND(s, i, capacity, c, isError) { \
|
||||
if((uint32_t)(c)<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)(c); \
|
||||
} else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
|
||||
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
|
||||
} else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
|
||||
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else { \
|
||||
(i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \
|
||||
(isError)=TRUE; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
@ -600,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i
|
||||
* @see U8_SET_CP_START_UNSAFE
|
||||
* @see U8_TRUNCATE_IF_INCOMPLETE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START(s, start, i) { \
|
||||
|
@ -614,6 +609,57 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
|||
} \
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* If the string ends with a UTF-8 byte sequence that is valid so far
|
||||
* but incomplete, then reduce the length of the string to end before
|
||||
* the lead byte of that incomplete sequence.
|
||||
* For example, if the string ends with E1 80, the length is reduced by 2.
|
||||
*
|
||||
* In all other cases (the string ends with a complete sequence, or it is not
|
||||
* possible for any further trail byte to extend the trailing sequence)
|
||||
* the length remains unchanged.
|
||||
*
|
||||
* Useful for processing text split across multiple buffers
|
||||
* (save the incomplete sequence for later)
|
||||
* and for optimizing iteration
|
||||
* (check for string length only once per character).
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_SET_CP_START(), this macro never reads s[length].
|
||||
*
|
||||
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param length int32_t string length (usually start<=length)
|
||||
* @see U8_SET_CP_START
|
||||
* @draft ICU 61
|
||||
*/
|
||||
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \
|
||||
if((length)>(start)) { \
|
||||
uint8_t __b1=s[(length)-1]; \
|
||||
if(U8_IS_SINGLE(__b1)) { \
|
||||
/* common ASCII character */ \
|
||||
} else if(U8_IS_LEAD(__b1)) { \
|
||||
--(length); \
|
||||
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
|
||||
uint8_t __b2=s[(length)-2]; \
|
||||
if(0xe0<=__b2 && __b2<=0xf4) { \
|
||||
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
|
||||
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
|
||||
(length)-=2; \
|
||||
} \
|
||||
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
|
||||
uint8_t __b3=s[(length)-3]; \
|
||||
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
|
||||
(length)-=3; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
|
|
|
@ -183,7 +183,7 @@ UTraceData(const void *context, int32_t fnNumber, int32_t level,
|
|||
* tracing functions must themselves filter by checking that the
|
||||
* current thread is the desired thread.
|
||||
*
|
||||
* @param context an uninterpretted pointer. Whatever is passed in
|
||||
* @param context an uninterpreted pointer. Whatever is passed in
|
||||
* here will in turn be passed to each of the tracing
|
||||
* functions UTraceEntry, UTraceExit and UTraceData.
|
||||
* ICU does not use or alter this pointer.
|
||||
|
@ -320,7 +320,7 @@ utrace_getFunctions(const void **context,
|
|||
* human readable form. Note that a UTraceData function may choose
|
||||
* to not format the data; it could, for example, save it in
|
||||
* in the raw form it was received (more compact), leaving
|
||||
* formatting for a later trace analyis tool.
|
||||
* formatting for a later trace analysis tool.
|
||||
* @param outBuf pointer to a buffer to receive the formatted output. Output
|
||||
* will be nul terminated if there is space in the buffer -
|
||||
* if the length of the requested output < the output buffer size.
|
||||
|
|
17
deps/icu-small/source/common/unicode/utypes.h
vendored
17
deps/icu-small/source/common/unicode/utypes.h
vendored
|
@ -145,7 +145,7 @@
|
|||
/**
|
||||
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
|
||||
* Defined as a literal, not a string.
|
||||
* Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
|
||||
* Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
|
||||
* from the corresponding macro invocation, _before_ other macro substitutions.
|
||||
* Need a nested \#defines to get the actual version numbers rather than
|
||||
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
|
||||
|
@ -446,14 +446,14 @@ typedef enum UErrorCode {
|
|||
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
|
||||
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
|
||||
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
|
||||
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */
|
||||
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
|
||||
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
|
||||
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
|
||||
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
|
||||
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
|
||||
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
|
||||
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
|
||||
It is very possible that a circular alias definition has occured */
|
||||
It is very possible that a circular alias definition has occurred */
|
||||
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
|
||||
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
|
||||
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
|
||||
|
@ -499,7 +499,7 @@ typedef enum UErrorCode {
|
|||
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
|
||||
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
|
||||
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
|
||||
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */
|
||||
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
|
||||
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
|
||||
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
|
||||
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
|
||||
|
@ -539,12 +539,15 @@ typedef enum UErrorCode {
|
|||
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
|
||||
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
|
||||
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal formatting API error code.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_FMT_PARSE_ERROR_LIMIT,
|
||||
U_FMT_PARSE_ERROR_LIMIT = 0x10113,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/*
|
||||
|
@ -555,7 +558,7 @@ typedef enum UErrorCode {
|
|||
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
|
||||
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
|
||||
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
|
||||
U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */
|
||||
U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
|
||||
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
|
||||
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
|
||||
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
|
||||
|
@ -564,7 +567,7 @@ typedef enum UErrorCode {
|
|||
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
|
||||
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
|
||||
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
|
||||
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */
|
||||
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal BreakIterator error code.
|
||||
|
|
25
deps/icu-small/source/common/unicode/uvernum.h
vendored
25
deps/icu-small/source/common/unicode/uvernum.h
vendored
|
@ -58,13 +58,13 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 60
|
||||
#define U_ICU_VERSION_MAJOR_NUM 61
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_MINOR_NUM 2
|
||||
#define U_ICU_VERSION_MINOR_NUM 1
|
||||
|
||||
/** The current ICU patchlevel version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
|
@ -84,7 +84,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _60
|
||||
#define U_ICU_VERSION_SUFFIX _61
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
|
@ -119,19 +119,26 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "60.2"
|
||||
#define U_ICU_VERSION "61.1"
|
||||
|
||||
/** The current ICU library major/minor version as a string without dots, for library name suffixes.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
* This value will change in subsequent releases of ICU.
|
||||
*
|
||||
* Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
|
||||
* into one string without dots ("48").
|
||||
* Since ICU 49, it is the double-digit major ICU version number.
|
||||
* See http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "60"
|
||||
#define U_ICU_VERSION_SHORT "61"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "60.2"
|
||||
#define U_ICU_DATA_VERSION "61.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
|
|
10
deps/icu-small/source/common/unicode/uversion.h
vendored
10
deps/icu-small/source/common/unicode/uversion.h
vendored
|
@ -105,7 +105,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
|
|||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/* Define namespace symbols if the compiler supports it. */
|
||||
/* Define C++ namespace symbols. */
|
||||
#ifdef __cplusplus
|
||||
# if U_DISABLE_RENAMING
|
||||
# define U_ICU_NAMESPACE icu
|
||||
|
@ -122,7 +122,13 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
|
|||
# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
|
||||
|
||||
# ifndef U_USING_ICU_NAMESPACE
|
||||
# define U_USING_ICU_NAMESPACE 1
|
||||
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
|
||||
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
|
||||
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
|
||||
# define U_USING_ICU_NAMESPACE 0
|
||||
# else
|
||||
# define U_USING_ICU_NAMESPACE 0
|
||||
# endif
|
||||
# endif
|
||||
# if U_USING_ICU_NAMESPACE
|
||||
U_NAMESPACE_USE
|
||||
|
|
302
deps/icu-small/source/common/unifiedcache.cpp
vendored
302
deps/icu-small/source/common/unifiedcache.cpp
vendored
|
@ -6,24 +6,26 @@
|
|||
* others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
*
|
||||
* File UNIFIEDCACHE.CPP
|
||||
* File unifiedcache.cpp
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "uhash.h"
|
||||
#include "unifiedcache.h"
|
||||
#include "umutex.h"
|
||||
|
||||
#include <algorithm> // For std::max()
|
||||
|
||||
#include "mutex.h"
|
||||
#include "uassert.h"
|
||||
#include "uhash.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "umutex.h"
|
||||
|
||||
static icu::UnifiedCache *gCache = NULL;
|
||||
static icu::SharedObject *gNoValue = NULL;
|
||||
static UMutex gCacheMutex = U_MUTEX_INITIALIZER;
|
||||
static UConditionVar gInProgressValueAddedCond = U_CONDITION_INITIALIZER;
|
||||
static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER;
|
||||
static const int32_t MAX_EVICT_ITERATIONS = 10;
|
||||
|
||||
static const int32_t MAX_EVICT_ITERATIONS = 10;
|
||||
static const int32_t DEFAULT_MAX_UNUSED = 1000;
|
||||
static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100;
|
||||
|
||||
|
@ -35,10 +37,6 @@ static UBool U_CALLCONV unifiedcache_cleanup() {
|
|||
delete gCache;
|
||||
gCache = NULL;
|
||||
}
|
||||
if (gNoValue) {
|
||||
delete gNoValue;
|
||||
gNoValue = NULL;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
U_CDECL_END
|
||||
|
@ -73,23 +71,15 @@ static void U_CALLCONV cacheInit(UErrorCode &status) {
|
|||
ucln_common_registerCleanup(
|
||||
UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup);
|
||||
|
||||
// gNoValue must be created first to avoid assertion error in
|
||||
// cache constructor.
|
||||
gNoValue = new SharedObject();
|
||||
gCache = new UnifiedCache(status);
|
||||
if (gCache == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
delete gCache;
|
||||
delete gNoValue;
|
||||
gCache = NULL;
|
||||
gNoValue = NULL;
|
||||
return;
|
||||
}
|
||||
// We add a softref because we want hash elements with gNoValue to be
|
||||
// elligible for purging but we don't ever want gNoValue to be deleted.
|
||||
gNoValue->addSoftRef();
|
||||
}
|
||||
|
||||
UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) {
|
||||
|
@ -104,14 +94,24 @@ UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) {
|
|||
UnifiedCache::UnifiedCache(UErrorCode &status) :
|
||||
fHashtable(NULL),
|
||||
fEvictPos(UHASH_FIRST),
|
||||
fItemsInUseCount(0),
|
||||
fNumValuesTotal(0),
|
||||
fNumValuesInUse(0),
|
||||
fMaxUnused(DEFAULT_MAX_UNUSED),
|
||||
fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE),
|
||||
fAutoEvictedCount(0) {
|
||||
fAutoEvictedCount(0),
|
||||
fNoValue(nullptr) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
U_ASSERT(gNoValue != NULL);
|
||||
fNoValue = new SharedObject();
|
||||
if (fNoValue == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted
|
||||
fNoValue->hardRefCount = 1; // when other references to it are removed.
|
||||
fNoValue->cachePtr = this;
|
||||
|
||||
fHashtable = uhash_open(
|
||||
&ucache_hashKeys,
|
||||
&ucache_compareKeys,
|
||||
|
@ -139,7 +139,7 @@ void UnifiedCache::setEvictionPolicy(
|
|||
|
||||
int32_t UnifiedCache::unusedCount() const {
|
||||
Mutex lock(&gCacheMutex);
|
||||
return uhash_count(fHashtable) - fItemsInUseCount;
|
||||
return uhash_count(fHashtable) - fNumValuesInUse;
|
||||
}
|
||||
|
||||
int64_t UnifiedCache::autoEvictedCount() const {
|
||||
|
@ -161,6 +161,12 @@ void UnifiedCache::flush() const {
|
|||
while (_flush(FALSE));
|
||||
}
|
||||
|
||||
void UnifiedCache::handleUnreferencedObject() const {
|
||||
Mutex lock(&gCacheMutex);
|
||||
--fNumValuesInUse;
|
||||
_runEvictionSlice();
|
||||
}
|
||||
|
||||
#ifdef UNIFIED_CACHE_DEBUG
|
||||
#include <stdio.h>
|
||||
|
||||
|
@ -199,7 +205,7 @@ void UnifiedCache::_dumpContents() const {
|
|||
"Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n",
|
||||
key->writeDescription(buffer, 256),
|
||||
key->creationStatus,
|
||||
sharedObject == gNoValue ? NULL :sharedObject,
|
||||
sharedObject == fNoValue ? NULL :sharedObject,
|
||||
sharedObject->getRefCount(),
|
||||
sharedObject->getSoftRefCount());
|
||||
}
|
||||
|
@ -219,10 +225,11 @@ UnifiedCache::~UnifiedCache() {
|
|||
_flush(TRUE);
|
||||
}
|
||||
uhash_close(fHashtable);
|
||||
fHashtable = nullptr;
|
||||
delete fNoValue;
|
||||
fNoValue = nullptr;
|
||||
}
|
||||
|
||||
// Returns the next element in the cache round robin style.
|
||||
// On entry, gCacheMutex must be held.
|
||||
const UHashElement *
|
||||
UnifiedCache::_nextElement() const {
|
||||
const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos);
|
||||
|
@ -233,46 +240,36 @@ UnifiedCache::_nextElement() const {
|
|||
return element;
|
||||
}
|
||||
|
||||
// Flushes the contents of the cache. If cache values hold references to other
|
||||
// cache values then _flush should be called in a loop until it returns FALSE.
|
||||
// On entry, gCacheMutex must be held.
|
||||
// On exit, those values with are evictable are flushed. If all is true
|
||||
// then every value is flushed even if it is not evictable.
|
||||
// Returns TRUE if any value in cache was flushed or FALSE otherwise.
|
||||
UBool UnifiedCache::_flush(UBool all) const {
|
||||
UBool result = FALSE;
|
||||
int32_t origSize = uhash_count(fHashtable);
|
||||
for (int32_t i = 0; i < origSize; ++i) {
|
||||
const UHashElement *element = _nextElement();
|
||||
if (element == nullptr) {
|
||||
break;
|
||||
}
|
||||
if (all || _isEvictable(element)) {
|
||||
const SharedObject *sharedObject =
|
||||
(const SharedObject *) element->value.pointer;
|
||||
U_ASSERT(sharedObject->cachePtr = this);
|
||||
uhash_removeElement(fHashtable, element);
|
||||
sharedObject->removeSoftRef();
|
||||
removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero.
|
||||
result = TRUE;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Computes how many items should be evicted.
|
||||
// On entry, gCacheMutex must be held.
|
||||
// Returns number of items that should be evicted or a value <= 0 if no
|
||||
// items need to be evicted.
|
||||
int32_t UnifiedCache::_computeCountOfItemsToEvict() const {
|
||||
int32_t maxPercentageOfInUseCount =
|
||||
fItemsInUseCount * fMaxPercentageOfInUse / 100;
|
||||
int32_t maxUnusedCount = fMaxUnused;
|
||||
if (maxUnusedCount < maxPercentageOfInUseCount) {
|
||||
maxUnusedCount = maxPercentageOfInUseCount;
|
||||
}
|
||||
return uhash_count(fHashtable) - fItemsInUseCount - maxUnusedCount;
|
||||
int32_t totalItems = uhash_count(fHashtable);
|
||||
int32_t evictableItems = totalItems - fNumValuesInUse;
|
||||
|
||||
int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100;
|
||||
int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused);
|
||||
int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit);
|
||||
return countOfItemsToEvict;
|
||||
}
|
||||
|
||||
// Run an eviction slice.
|
||||
// On entry, gCacheMutex must be held.
|
||||
// _runEvictionSlice runs a slice of the evict pipeline by examining the next
|
||||
// 10 entries in the cache round robin style evicting them if they are eligible.
|
||||
void UnifiedCache::_runEvictionSlice() const {
|
||||
int32_t maxItemsToEvict = _computeCountOfItemsToEvict();
|
||||
if (maxItemsToEvict <= 0) {
|
||||
|
@ -280,11 +277,14 @@ void UnifiedCache::_runEvictionSlice() const {
|
|||
}
|
||||
for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) {
|
||||
const UHashElement *element = _nextElement();
|
||||
if (element == nullptr) {
|
||||
break;
|
||||
}
|
||||
if (_isEvictable(element)) {
|
||||
const SharedObject *sharedObject =
|
||||
(const SharedObject *) element->value.pointer;
|
||||
uhash_removeElement(fHashtable, element);
|
||||
sharedObject->removeSoftRef();
|
||||
removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero.
|
||||
++fAutoEvictedCount;
|
||||
if (--maxItemsToEvict == 0) {
|
||||
break;
|
||||
|
@ -293,11 +293,6 @@ void UnifiedCache::_runEvictionSlice() const {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// Places a new value and creationStatus in the cache for the given key.
|
||||
// On entry, gCacheMutex must be held. key must not exist in the cache.
|
||||
// On exit, value and creation status placed under key. Soft reference added
|
||||
// to value on successful add. On error sets status.
|
||||
void UnifiedCache::_putNew(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *value,
|
||||
|
@ -312,24 +307,17 @@ void UnifiedCache::_putNew(
|
|||
return;
|
||||
}
|
||||
keyToAdopt->fCreationStatus = creationStatus;
|
||||
if (value->noSoftReferences()) {
|
||||
if (value->softRefCount == 0) {
|
||||
_registerMaster(keyToAdopt, value);
|
||||
}
|
||||
uhash_put(fHashtable, keyToAdopt, (void *) value, &status);
|
||||
void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status);
|
||||
U_ASSERT(oldValue == nullptr);
|
||||
(void)oldValue;
|
||||
if (U_SUCCESS(status)) {
|
||||
value->addSoftRef();
|
||||
value->softRefCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// Places value and status at key if there is no value at key or if cache
|
||||
// entry for key is in progress. Otherwise, it leaves the current value and
|
||||
// status there.
|
||||
// On entry. gCacheMutex must not be held. value must be
|
||||
// included in the reference count of the object to which it points.
|
||||
// On exit, value and status are changed to what was already in the cache if
|
||||
// something was there and not in progress. Otherwise, value and status are left
|
||||
// unchanged in which case they are placed in the cache on a best-effort basis.
|
||||
// Caller must call removeRef() on value.
|
||||
void UnifiedCache::_putIfAbsentAndGet(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *&value,
|
||||
|
@ -352,15 +340,7 @@ void UnifiedCache::_putIfAbsentAndGet(
|
|||
_runEvictionSlice();
|
||||
}
|
||||
|
||||
// Attempts to fetch value and status for key from cache.
|
||||
// On entry, gCacheMutex must not be held value must be NULL and status must
|
||||
// be U_ZERO_ERROR.
|
||||
// On exit, either returns FALSE (In this
|
||||
// case caller should try to create the object) or returns TRUE with value
|
||||
// pointing to the fetched value and status set to fetched status. When
|
||||
// FALSE is returned status may be set to failure if an in progress hash
|
||||
// entry could not be made but value will remain unchanged. When TRUE is
|
||||
// returned, caler must call removeRef() on value.
|
||||
|
||||
UBool UnifiedCache::_poll(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *&value,
|
||||
|
@ -369,27 +349,29 @@ UBool UnifiedCache::_poll(
|
|||
U_ASSERT(status == U_ZERO_ERROR);
|
||||
Mutex lock(&gCacheMutex);
|
||||
const UHashElement *element = uhash_find(fHashtable, &key);
|
||||
while (element != NULL && _inProgress(element)) {
|
||||
|
||||
// If the hash table contains an inProgress placeholder entry for this key,
|
||||
// this means that another thread is currently constructing the value object.
|
||||
// Loop, waiting for that construction to complete.
|
||||
while (element != NULL && _inProgress(element)) {
|
||||
umtx_condWait(&gInProgressValueAddedCond, &gCacheMutex);
|
||||
element = uhash_find(fHashtable, &key);
|
||||
}
|
||||
|
||||
// If the hash table contains an entry for the key,
|
||||
// fetch out the contents and return them.
|
||||
if (element != NULL) {
|
||||
_fetch(element, value, status);
|
||||
_fetch(element, value, status);
|
||||
return TRUE;
|
||||
}
|
||||
_putNew(key, gNoValue, U_ZERO_ERROR, status);
|
||||
|
||||
// The hash table contained nothing for this key.
|
||||
// Insert an inProgress place holder value.
|
||||
// Our caller will create the final value and update the hash table.
|
||||
_putNew(key, fNoValue, U_ZERO_ERROR, status);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// Gets value out of cache.
|
||||
// On entry. gCacheMutex must not be held. value must be NULL. status
|
||||
// must be U_ZERO_ERROR.
|
||||
// On exit. value and status set to what is in cache at key or on cache
|
||||
// miss the key's createObject() is called and value and status are set to
|
||||
// the result of that. In this latter case, best effort is made to add the
|
||||
// value and status to the cache. If createObject() fails to create a value,
|
||||
// gNoValue is stored in cache, and value is set to NULL. Caller must call
|
||||
// removeRef on value if non NULL.
|
||||
void UnifiedCache::_get(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *&value,
|
||||
|
@ -398,7 +380,7 @@ void UnifiedCache::_get(
|
|||
U_ASSERT(value == NULL);
|
||||
U_ASSERT(status == U_ZERO_ERROR);
|
||||
if (_poll(key, value, status)) {
|
||||
if (value == gNoValue) {
|
||||
if (value == fNoValue) {
|
||||
SharedObject::clearPtr(value);
|
||||
}
|
||||
return;
|
||||
|
@ -410,46 +392,22 @@ void UnifiedCache::_get(
|
|||
U_ASSERT(value == NULL || value->hasHardReferences());
|
||||
U_ASSERT(value != NULL || status != U_ZERO_ERROR);
|
||||
if (value == NULL) {
|
||||
SharedObject::copyPtr(gNoValue, value);
|
||||
SharedObject::copyPtr(fNoValue, value);
|
||||
}
|
||||
_putIfAbsentAndGet(key, value, status);
|
||||
if (value == gNoValue) {
|
||||
if (value == fNoValue) {
|
||||
SharedObject::clearPtr(value);
|
||||
}
|
||||
}
|
||||
|
||||
void UnifiedCache::decrementItemsInUseWithLockingAndEviction() const {
|
||||
Mutex mutex(&gCacheMutex);
|
||||
decrementItemsInUse();
|
||||
_runEvictionSlice();
|
||||
}
|
||||
|
||||
void UnifiedCache::incrementItemsInUse() const {
|
||||
++fItemsInUseCount;
|
||||
}
|
||||
|
||||
void UnifiedCache::decrementItemsInUse() const {
|
||||
--fItemsInUseCount;
|
||||
}
|
||||
|
||||
// Register a master cache entry.
|
||||
// On entry, gCacheMutex must be held.
|
||||
// On exit, items in use count incremented, entry is marked as a master
|
||||
// entry, and value registered with cache so that subsequent calls to
|
||||
// addRef() and removeRef() on it correctly updates items in use count
|
||||
void UnifiedCache::_registerMaster(
|
||||
const CacheKeyBase *theKey, const SharedObject *value) const {
|
||||
theKey->fIsMaster = TRUE;
|
||||
++fItemsInUseCount;
|
||||
value->registerWithCache(this);
|
||||
const CacheKeyBase *theKey, const SharedObject *value) const {
|
||||
theKey->fIsMaster = true;
|
||||
value->cachePtr = this;
|
||||
++fNumValuesTotal;
|
||||
++fNumValuesInUse;
|
||||
}
|
||||
|
||||
// Store a value and error in given hash entry.
|
||||
// On entry, gCacheMutex must be held. Hash entry element must be in progress.
|
||||
// value must be non NULL.
|
||||
// On Exit, soft reference added to value. value and status stored in hash
|
||||
// entry. Soft reference removed from previous stored value. Waiting
|
||||
// threads notified.
|
||||
void UnifiedCache::_put(
|
||||
const UHashElement *element,
|
||||
const SharedObject *value,
|
||||
|
@ -458,86 +416,52 @@ void UnifiedCache::_put(
|
|||
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
|
||||
const SharedObject *oldValue = (const SharedObject *) element->value.pointer;
|
||||
theKey->fCreationStatus = status;
|
||||
if (value->noSoftReferences()) {
|
||||
if (value->softRefCount == 0) {
|
||||
_registerMaster(theKey, value);
|
||||
}
|
||||
value->addSoftRef();
|
||||
value->softRefCount++;
|
||||
UHashElement *ptr = const_cast<UHashElement *>(element);
|
||||
ptr->value.pointer = (void *) value;
|
||||
oldValue->removeSoftRef();
|
||||
U_ASSERT(oldValue == fNoValue);
|
||||
removeSoftRef(oldValue);
|
||||
|
||||
// Tell waiting threads that we replace in-progress status with
|
||||
// an error.
|
||||
umtx_condBroadcast(&gInProgressValueAddedCond);
|
||||
}
|
||||
|
||||
void
|
||||
UnifiedCache::copyPtr(const SharedObject *src, const SharedObject *&dest) {
|
||||
if(src != dest) {
|
||||
if(dest != NULL) {
|
||||
dest->removeRefWhileHoldingCacheLock();
|
||||
}
|
||||
dest = src;
|
||||
if(src != NULL) {
|
||||
src->addRefWhileHoldingCacheLock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
UnifiedCache::clearPtr(const SharedObject *&ptr) {
|
||||
if (ptr != NULL) {
|
||||
ptr->removeRefWhileHoldingCacheLock();
|
||||
ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Fetch value and error code from a particular hash entry.
|
||||
// On entry, gCacheMutex must be held. value must be either NULL or must be
|
||||
// included in the ref count of the object to which it points.
|
||||
// On exit, value and status set to what is in the hash entry. Caller must
|
||||
// eventually call removeRef on value.
|
||||
// If hash entry is in progress, value will be set to gNoValue and status will
|
||||
// be set to U_ZERO_ERROR.
|
||||
void UnifiedCache::_fetch(
|
||||
const UHashElement *element,
|
||||
const SharedObject *&value,
|
||||
UErrorCode &status) {
|
||||
UErrorCode &status) const {
|
||||
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
|
||||
status = theKey->fCreationStatus;
|
||||
|
||||
// Since we have the cache lock, calling regular SharedObject methods
|
||||
// Since we have the cache lock, calling regular SharedObject add/removeRef
|
||||
// could cause us to deadlock on ourselves since they may need to lock
|
||||
// the cache mutex.
|
||||
UnifiedCache::copyPtr((const SharedObject *) element->value.pointer, value);
|
||||
removeHardRef(value);
|
||||
value = static_cast<const SharedObject *>(element->value.pointer);
|
||||
addHardRef(value);
|
||||
}
|
||||
|
||||
// Determine if given hash entry is in progress.
|
||||
// On entry, gCacheMutex must be held.
|
||||
UBool UnifiedCache::_inProgress(const UHashElement *element) {
|
||||
const SharedObject *value = NULL;
|
||||
|
||||
UBool UnifiedCache::_inProgress(const UHashElement* element) const {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const SharedObject * value = NULL;
|
||||
_fetch(element, value, status);
|
||||
UBool result = _inProgress(value, status);
|
||||
|
||||
// Since we have the cache lock, calling regular SharedObject methods
|
||||
// could cause us to deadlock on ourselves since they may need to lock
|
||||
// the cache mutex.
|
||||
UnifiedCache::clearPtr(value);
|
||||
removeHardRef(value);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Determine if given hash entry is in progress.
|
||||
// On entry, gCacheMutex must be held.
|
||||
UBool UnifiedCache::_inProgress(
|
||||
const SharedObject *theValue, UErrorCode creationStatus) {
|
||||
return (theValue == gNoValue && creationStatus == U_ZERO_ERROR);
|
||||
const SharedObject* theValue, UErrorCode creationStatus) const {
|
||||
return (theValue == fNoValue && creationStatus == U_ZERO_ERROR);
|
||||
}
|
||||
|
||||
// Determine if given hash entry is eligible for eviction.
|
||||
// On entry, gCacheMutex must be held.
|
||||
UBool UnifiedCache::_isEvictable(const UHashElement *element) {
|
||||
UBool UnifiedCache::_isEvictable(const UHashElement *element) const
|
||||
{
|
||||
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
|
||||
const SharedObject *theValue =
|
||||
(const SharedObject *) element->value.pointer;
|
||||
|
@ -549,7 +473,47 @@ UBool UnifiedCache::_isEvictable(const UHashElement *element) {
|
|||
|
||||
// We can evict entries that are either not a master or have just
|
||||
// one reference (The one reference being from the cache itself).
|
||||
return (!theKey->fIsMaster || (theValue->getSoftRefCount() == 1 && theValue->noHardReferences()));
|
||||
return (!theKey->fIsMaster || (theValue->softRefCount == 1 && theValue->noHardReferences()));
|
||||
}
|
||||
|
||||
void UnifiedCache::removeSoftRef(const SharedObject *value) const {
|
||||
U_ASSERT(value->cachePtr == this);
|
||||
U_ASSERT(value->softRefCount > 0);
|
||||
if (--value->softRefCount == 0) {
|
||||
--fNumValuesTotal;
|
||||
if (value->noHardReferences()) {
|
||||
delete value;
|
||||
} else {
|
||||
// This path only happens from flush(all). Which only happens from the
|
||||
// UnifiedCache destructor. Nulling out value.cacheptr changes the behavior
|
||||
// of value.removeRef(), causing the deletion to be done there.
|
||||
value->cachePtr = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int32_t UnifiedCache::removeHardRef(const SharedObject *value) const {
|
||||
int refCount = 0;
|
||||
if (value) {
|
||||
refCount = umtx_atomic_dec(&value->hardRefCount);
|
||||
U_ASSERT(refCount >= 0);
|
||||
if (refCount == 0) {
|
||||
--fNumValuesInUse;
|
||||
}
|
||||
}
|
||||
return refCount;
|
||||
}
|
||||
|
||||
int32_t UnifiedCache::addHardRef(const SharedObject *value) const {
|
||||
int refCount = 0;
|
||||
if (value) {
|
||||
refCount = umtx_atomic_inc(&value->hardRefCount);
|
||||
U_ASSERT(refCount >= 1);
|
||||
if (refCount == 1) {
|
||||
fNumValuesInUse++;
|
||||
}
|
||||
}
|
||||
return refCount;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
210
deps/icu-small/source/common/unifiedcache.h
vendored
210
deps/icu-small/source/common/unifiedcache.h
vendored
|
@ -190,7 +190,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
|
|||
UnifiedCache(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Returns the cache instance.
|
||||
* Return a pointer to the global cache instance.
|
||||
*/
|
||||
static UnifiedCache *getInstance(UErrorCode &status);
|
||||
|
||||
|
@ -294,7 +294,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
|
|||
|
||||
/**
|
||||
* Configures at what point evcition of unused entries will begin.
|
||||
* Eviction is triggered whenever the number of unused entries exeeds
|
||||
* Eviction is triggered whenever the number of evictable keys exeeds
|
||||
* BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100).
|
||||
* Once the number of unused entries drops below one of these,
|
||||
* eviction ceases. Because eviction happens incrementally,
|
||||
|
@ -341,60 +341,214 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
|
|||
*/
|
||||
int32_t unusedCount() const;
|
||||
|
||||
virtual void incrementItemsInUse() const;
|
||||
virtual void decrementItemsInUseWithLockingAndEviction() const;
|
||||
virtual void decrementItemsInUse() const;
|
||||
virtual void handleUnreferencedObject() const;
|
||||
virtual ~UnifiedCache();
|
||||
|
||||
private:
|
||||
UHashtable *fHashtable;
|
||||
mutable int32_t fEvictPos;
|
||||
mutable int32_t fItemsInUseCount;
|
||||
mutable int32_t fNumValuesTotal;
|
||||
mutable int32_t fNumValuesInUse;
|
||||
int32_t fMaxUnused;
|
||||
int32_t fMaxPercentageOfInUse;
|
||||
mutable int64_t fAutoEvictedCount;
|
||||
SharedObject *fNoValue;
|
||||
|
||||
UnifiedCache(const UnifiedCache &other);
|
||||
UnifiedCache &operator=(const UnifiedCache &other);
|
||||
|
||||
/**
|
||||
* Flushes the contents of the cache. If cache values hold references to other
|
||||
* cache values then _flush should be called in a loop until it returns FALSE.
|
||||
*
|
||||
* On entry, gCacheMutex must be held.
|
||||
* On exit, those values with are evictable are flushed.
|
||||
*
|
||||
* @param all if false flush evictable items only, which are those with no external
|
||||
* references, plus those that can be safely recreated.<br>
|
||||
* if true, flush all elements. Any values (sharedObjects) with remaining
|
||||
* hard (external) references are not deleted, but are detached from
|
||||
* the cache, so that a subsequent removeRefs can delete them.
|
||||
* _flush is not thread safe when all is true.
|
||||
* @return TRUE if any value in cache was flushed or FALSE otherwise.
|
||||
*/
|
||||
UBool _flush(UBool all) const;
|
||||
|
||||
/**
|
||||
* Gets value out of cache.
|
||||
* On entry. gCacheMutex must not be held. value must be NULL. status
|
||||
* must be U_ZERO_ERROR.
|
||||
* On exit. value and status set to what is in cache at key or on cache
|
||||
* miss the key's createObject() is called and value and status are set to
|
||||
* the result of that. In this latter case, best effort is made to add the
|
||||
* value and status to the cache. If createObject() fails to create a value,
|
||||
* fNoValue is stored in cache, and value is set to NULL. Caller must call
|
||||
* removeRef on value if non NULL.
|
||||
*/
|
||||
void _get(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *&value,
|
||||
const void *creationContext,
|
||||
UErrorCode &status) const;
|
||||
UBool _poll(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *&value,
|
||||
UErrorCode &status) const;
|
||||
void _putNew(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *value,
|
||||
const UErrorCode creationStatus,
|
||||
UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* Attempts to fetch value and status for key from cache.
|
||||
* On entry, gCacheMutex must not be held value must be NULL and status must
|
||||
* be U_ZERO_ERROR.
|
||||
* On exit, either returns FALSE (In this
|
||||
* case caller should try to create the object) or returns TRUE with value
|
||||
* pointing to the fetched value and status set to fetched status. When
|
||||
* FALSE is returned status may be set to failure if an in progress hash
|
||||
* entry could not be made but value will remain unchanged. When TRUE is
|
||||
* returned, caller must call removeRef() on value.
|
||||
*/
|
||||
UBool _poll(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *&value,
|
||||
UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* Places a new value and creationStatus in the cache for the given key.
|
||||
* On entry, gCacheMutex must be held. key must not exist in the cache.
|
||||
* On exit, value and creation status placed under key. Soft reference added
|
||||
* to value on successful add. On error sets status.
|
||||
*/
|
||||
void _putNew(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *value,
|
||||
const UErrorCode creationStatus,
|
||||
UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* Places value and status at key if there is no value at key or if cache
|
||||
* entry for key is in progress. Otherwise, it leaves the current value and
|
||||
* status there.
|
||||
*
|
||||
* On entry. gCacheMutex must not be held. Value must be
|
||||
* included in the reference count of the object to which it points.
|
||||
*
|
||||
* On exit, value and status are changed to what was already in the cache if
|
||||
* something was there and not in progress. Otherwise, value and status are left
|
||||
* unchanged in which case they are placed in the cache on a best-effort basis.
|
||||
* Caller must call removeRef() on value.
|
||||
*/
|
||||
void _putIfAbsentAndGet(
|
||||
const CacheKeyBase &key,
|
||||
const SharedObject *&value,
|
||||
UErrorCode &status) const;
|
||||
const UHashElement *_nextElement() const;
|
||||
|
||||
/**
|
||||
* Returns the next element in the cache round robin style.
|
||||
* Returns nullptr if the cache is empty.
|
||||
* On entry, gCacheMutex must be held.
|
||||
*/
|
||||
const UHashElement *_nextElement() const;
|
||||
|
||||
/**
|
||||
* Return the number of cache items that would need to be evicted
|
||||
* to bring usage into conformance with eviction policy.
|
||||
*
|
||||
* An item corresponds to an entry in the hash table, a hash table element.
|
||||
*
|
||||
* On entry, gCacheMutex must be held.
|
||||
*/
|
||||
int32_t _computeCountOfItemsToEvict() const;
|
||||
|
||||
/**
|
||||
* Run an eviction slice.
|
||||
* On entry, gCacheMutex must be held.
|
||||
* _runEvictionSlice runs a slice of the evict pipeline by examining the next
|
||||
* 10 entries in the cache round robin style evicting them if they are eligible.
|
||||
*/
|
||||
void _runEvictionSlice() const;
|
||||
void _registerMaster(
|
||||
const CacheKeyBase *theKey, const SharedObject *value) const;
|
||||
|
||||
/**
|
||||
* Register a master cache entry. A master key is the first key to create
|
||||
* a given SharedObject value. Subsequent keys whose create function
|
||||
* produce referneces to an already existing SharedObject are not masters -
|
||||
* they can be evicted and subsequently recreated.
|
||||
*
|
||||
* On entry, gCacheMutex must be held.
|
||||
* On exit, items in use count incremented, entry is marked as a master
|
||||
* entry, and value registered with cache so that subsequent calls to
|
||||
* addRef() and removeRef() on it correctly interact with the cache.
|
||||
*/
|
||||
void _registerMaster(const CacheKeyBase *theKey, const SharedObject *value) const;
|
||||
|
||||
/**
|
||||
* Store a value and creation error status in given hash entry.
|
||||
* On entry, gCacheMutex must be held. Hash entry element must be in progress.
|
||||
* value must be non NULL.
|
||||
* On Exit, soft reference added to value. value and status stored in hash
|
||||
* entry. Soft reference removed from previous stored value. Waiting
|
||||
* threads notified.
|
||||
*/
|
||||
void _put(
|
||||
const UHashElement *element,
|
||||
const SharedObject *value,
|
||||
const UErrorCode status) const;
|
||||
/**
|
||||
* Remove a soft reference, and delete the SharedObject if no references remain.
|
||||
* To be used from within the UnifiedCache implementation only.
|
||||
* gCacheMutex must be held by caller.
|
||||
* @param value the SharedObject to be acted on.
|
||||
*/
|
||||
void removeSoftRef(const SharedObject *value) const;
|
||||
|
||||
/**
|
||||
* Increment the hard reference count of the given SharedObject.
|
||||
* gCacheMutex must be held by the caller.
|
||||
* Update numValuesEvictable on transitions between zero and one reference.
|
||||
*
|
||||
* @param value The SharedObject to be referenced.
|
||||
* @return the hard reference count after the addition.
|
||||
*/
|
||||
int32_t addHardRef(const SharedObject *value) const;
|
||||
|
||||
/**
|
||||
* Decrement the hard reference count of the given SharedObject.
|
||||
* gCacheMutex must be held by the caller.
|
||||
* Update numValuesEvictable on transitions between one and zero reference.
|
||||
*
|
||||
* @param value The SharedObject to be referenced.
|
||||
* @return the hard reference count after the removal.
|
||||
*/
|
||||
int32_t removeHardRef(const SharedObject *value) const;
|
||||
|
||||
|
||||
#ifdef UNIFIED_CACHE_DEBUG
|
||||
void _dumpContents() const;
|
||||
#endif
|
||||
static void copyPtr(const SharedObject *src, const SharedObject *&dest);
|
||||
static void clearPtr(const SharedObject *&ptr);
|
||||
static void _fetch(
|
||||
const UHashElement *element,
|
||||
const SharedObject *&value,
|
||||
UErrorCode &status);
|
||||
static UBool _inProgress(const UHashElement *element);
|
||||
static UBool _inProgress(
|
||||
const SharedObject *theValue, UErrorCode creationStatus);
|
||||
static UBool _isEvictable(const UHashElement *element);
|
||||
|
||||
/**
|
||||
* Fetch value and error code from a particular hash entry.
|
||||
* On entry, gCacheMutex must be held. value must be either NULL or must be
|
||||
* included in the ref count of the object to which it points.
|
||||
* On exit, value and status set to what is in the hash entry. Caller must
|
||||
* eventually call removeRef on value.
|
||||
* If hash entry is in progress, value will be set to gNoValue and status will
|
||||
* be set to U_ZERO_ERROR.
|
||||
*/
|
||||
void _fetch(const UHashElement *element, const SharedObject *&value,
|
||||
UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* Determine if given hash entry is in progress.
|
||||
* On entry, gCacheMutex must be held.
|
||||
*/
|
||||
UBool _inProgress(const UHashElement *element) const;
|
||||
|
||||
/**
|
||||
* Determine if given hash entry is in progress.
|
||||
* On entry, gCacheMutex must be held.
|
||||
*/
|
||||
UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const;
|
||||
|
||||
/**
|
||||
* Determine if given hash entry is eligible for eviction.
|
||||
* On entry, gCacheMutex must be held.
|
||||
*/
|
||||
UBool _isEvictable(const UHashElement *element) const;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
|
|||
// _applyPattern calls add() etc., which set pat to empty.
|
||||
UnicodeString rebuiltPat;
|
||||
RuleCharacterIterator chars(pattern, symbols, pos);
|
||||
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
|
||||
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
|
||||
if (U_FAILURE(status)) return *this;
|
||||
if (chars.inVariable()) {
|
||||
// syntaxError(chars, "Extra chars in variable value");
|
||||
|
|
28
deps/icu-small/source/common/uniset_props.cpp
vendored
28
deps/icu-small/source/common/uniset_props.cpp
vendored
|
@ -231,7 +231,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
|
|||
ucase_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_BIDI:
|
||||
ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status);
|
||||
ubidi_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
default:
|
||||
status = U_INTERNAL_PROGRAM_ERROR;
|
||||
|
@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
|
|||
return i.fSet;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Cache some sets for other services -------------------------------------- ***
|
||||
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
|
||||
|
@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
|
|||
// memory leak checker tools
|
||||
#define _dbgct(me)
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Constructors &c
|
||||
//----------------------------------------------------------------
|
||||
|
@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
|
|||
// _applyPattern calls add() etc., which set pat to empty.
|
||||
UnicodeString rebuiltPat;
|
||||
RuleCharacterIterator chars(pattern, symbols, pos);
|
||||
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
|
||||
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
|
||||
if (U_FAILURE(status)) return;
|
||||
if (chars.inVariable()) {
|
||||
// syntaxError(chars, "Extra chars in variable value");
|
||||
|
@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
|
|||
// Implementation: Pattern parsing
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* A small all-inline class to manage a UnicodeSet pointer. Add
|
||||
* operator->() etc. as needed.
|
||||
|
@ -424,6 +429,10 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
constexpr int32_t MAX_DEPTH = 100;
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Parse the pattern from the given RuleCharacterIterator. The
|
||||
* iterator is advanced over the parsed pattern.
|
||||
|
@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
UnicodeString& rebuiltPat,
|
||||
uint32_t options,
|
||||
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
|
||||
int32_t depth,
|
||||
UErrorCode& ec) {
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (depth > MAX_DEPTH) {
|
||||
ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// Syntax characters: [ ] ^ - & { }
|
||||
|
||||
|
@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
}
|
||||
switch (setMode) {
|
||||
case 1:
|
||||
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
|
||||
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
|
||||
break;
|
||||
case 2:
|
||||
chars.skipIgnored(opts);
|
||||
|
@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
// Property set implementation
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
static UBool numericValueFilter(UChar32 ch, void* context) {
|
||||
return u_getNumericValue(ch) == *(double*)context;
|
||||
}
|
||||
|
@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
|
|||
return uscript_hasScript(ch, *(UScriptCode*)context);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Generic filter-based scanning code for UCD property UnicodeSets.
|
||||
*/
|
||||
|
@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
|||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
||||
/* Note: we use ' ' in compiler code page */
|
||||
int32_t j = 0;
|
||||
|
@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Property set API
|
||||
//----------------------------------------------------------------
|
||||
|
|
16
deps/icu-small/source/common/uprops.cpp
vendored
16
deps/icu-small/source/common/uprops.cpp
vendored
|
@ -38,8 +38,6 @@
|
|||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
#define GET_BIDI_PROPS() ubidi_getSingleton()
|
||||
|
||||
/* general properties API functions ----------------------------------------- */
|
||||
|
||||
struct BinaryProperty;
|
||||
|
@ -62,15 +60,15 @@ static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32
|
|||
}
|
||||
|
||||
static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_isBidiControl(GET_BIDI_PROPS(), c);
|
||||
return ubidi_isBidiControl(c);
|
||||
}
|
||||
|
||||
static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_isMirrored(GET_BIDI_PROPS(), c);
|
||||
return ubidi_isMirrored(c);
|
||||
}
|
||||
|
||||
static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_isJoinControl(GET_BIDI_PROPS(), c);
|
||||
return ubidi_isJoinControl(c);
|
||||
}
|
||||
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
|
@ -329,11 +327,11 @@ static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*
|
|||
}
|
||||
|
||||
static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c);
|
||||
return (int32_t)ubidi_getPairedBracketType(c);
|
||||
}
|
||||
|
||||
static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
|
||||
return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
|
||||
return ubidi_getMaxValue(which);
|
||||
}
|
||||
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
|
@ -351,11 +349,11 @@ static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UPrope
|
|||
}
|
||||
|
||||
static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
|
||||
return ubidi_getJoiningGroup(c);
|
||||
}
|
||||
|
||||
static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
|
||||
return ubidi_getJoiningType(c);
|
||||
}
|
||||
|
||||
static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
|
|
6
deps/icu-small/source/common/ushape.cpp
vendored
6
deps/icu-small/source/common/ushape.cpp
vendored
|
@ -342,18 +342,16 @@ static void
|
|||
_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
|
||||
UChar digitBase,
|
||||
UBool isLogical, UBool lastStrongWasAL) {
|
||||
const UBiDiProps *bdp;
|
||||
int32_t i;
|
||||
UChar c;
|
||||
|
||||
bdp=ubidi_getSingleton();
|
||||
digitBase-=0x30;
|
||||
|
||||
/* the iteration direction depends on the type of input */
|
||||
if(isLogical) {
|
||||
for(i=0; i<length; ++i) {
|
||||
c=s[i];
|
||||
switch(ubidi_getClass(bdp, c)) {
|
||||
switch(ubidi_getClass(c)) {
|
||||
case U_LEFT_TO_RIGHT: /* L */
|
||||
case U_RIGHT_TO_LEFT: /* R */
|
||||
lastStrongWasAL=FALSE;
|
||||
|
@ -373,7 +371,7 @@ _shapeToArabicDigitsWithContext(UChar *s, int32_t length,
|
|||
} else {
|
||||
for(i=length; i>0; /* pre-decrement in the body */) {
|
||||
c=s[--i];
|
||||
switch(ubidi_getClass(bdp, c)) {
|
||||
switch(ubidi_getClass(c)) {
|
||||
case U_LEFT_TO_RIGHT: /* L */
|
||||
case U_RIGHT_TO_LEFT: /* R */
|
||||
lastStrongWasAL=FALSE;
|
||||
|
|
6
deps/icu-small/source/common/usprep.cpp
vendored
6
deps/icu-small/source/common/usprep.cpp
vendored
|
@ -347,10 +347,6 @@ usprep_getProfile(const char* path,
|
|||
newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
|
||||
newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
|
||||
|
||||
if(newProfile->checkBiDi) {
|
||||
newProfile->bdp = ubidi_getSingleton();
|
||||
}
|
||||
|
||||
LocalMemory<UStringPrepKey> key;
|
||||
LocalMemory<char> keyName;
|
||||
LocalMemory<char> keyPath;
|
||||
|
@ -735,7 +731,7 @@ usprep_prepare( const UStringPrepProfile* profile,
|
|||
}
|
||||
|
||||
if(profile->checkBiDi) {
|
||||
direction = ubidi_getClass(profile->bdp, ch);
|
||||
direction = ubidi_getClass(ch);
|
||||
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
|
||||
firstCharDir = direction;
|
||||
}
|
||||
|
|
2
deps/icu-small/source/common/ustr_wcs.cpp
vendored
2
deps/icu-small/source/common/ustr_wcs.cpp
vendored
|
@ -342,7 +342,7 @@ _strFromWCS( UChar *dest,
|
|||
pSrcLimit = src + srcLength;
|
||||
|
||||
for(;;){
|
||||
register int32_t nulLen = 0;
|
||||
int32_t nulLen = 0;
|
||||
|
||||
/* find nulls in the string */
|
||||
while(nulLen<srcLength && pSrc[nulLen++]!=0){
|
||||
|
|
311
deps/icu-small/source/common/ustrcase.cpp
vendored
311
deps/icu-small/source/common/ustrcase.cpp
vendored
|
@ -52,16 +52,8 @@ int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
|
|||
return destIndex;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/* string casing ------------------------------------------------------------ */
|
||||
|
||||
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
|
||||
static inline int32_t
|
||||
inline int32_t
|
||||
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||
int32_t result, const UChar *s,
|
||||
int32_t cpLength, uint32_t options, icu::Edits *edits) {
|
||||
|
@ -134,7 +126,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
|||
return destIndex;
|
||||
}
|
||||
|
||||
static inline int32_t
|
||||
inline int32_t
|
||||
appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
|
||||
if(destIndex<destCapacity) {
|
||||
dest[destIndex]=c;
|
||||
|
@ -144,28 +136,34 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
|
|||
return destIndex+1;
|
||||
}
|
||||
|
||||
static inline int32_t
|
||||
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
|
||||
if(length>0) {
|
||||
if(edits!=NULL) {
|
||||
edits->addUnchanged(length);
|
||||
}
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
if(length>(INT32_MAX-destIndex)) {
|
||||
return -1; // integer overflow
|
||||
}
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
u_memcpy(dest+destIndex, s, length);
|
||||
}
|
||||
destIndex+=length;
|
||||
int32_t
|
||||
appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
|
||||
if(edits!=NULL) {
|
||||
edits->addUnchanged(length);
|
||||
}
|
||||
return destIndex;
|
||||
if(options & U_OMIT_UNCHANGED_TEXT) {
|
||||
return destIndex;
|
||||
}
|
||||
if(length>(INT32_MAX-destIndex)) {
|
||||
return -1; // integer overflow
|
||||
}
|
||||
if((destIndex+length)<=destCapacity) {
|
||||
u_memcpy(dest+destIndex, s, length);
|
||||
}
|
||||
return destIndex + length;
|
||||
}
|
||||
|
||||
static UChar32 U_CALLCONV
|
||||
inline int32_t
|
||||
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
|
||||
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
|
||||
if (length <= 0) {
|
||||
return destIndex;
|
||||
}
|
||||
return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
|
||||
}
|
||||
|
||||
UChar32 U_CALLCONV
|
||||
utf16_caseContextIterator(void *context, int8_t dir) {
|
||||
UCaseContext *csc=(UCaseContext *)context;
|
||||
UChar32 c;
|
||||
|
@ -197,39 +195,205 @@ utf16_caseContextIterator(void *context, int8_t dir) {
|
|||
return U_SENTINEL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Case-maps [srcStart..srcLimit[ but takes
|
||||
* context [0..srcLength[ into account.
|
||||
/**
|
||||
* caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
|
||||
* caseLocale < 0: Case-folds [srcStart..srcLimit[.
|
||||
*/
|
||||
static int32_t
|
||||
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc,
|
||||
int32_t srcStart, int32_t srcLimit,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
/* case mapping loop */
|
||||
int32_t srcIndex=srcStart;
|
||||
int32_t destIndex=0;
|
||||
while(srcIndex<srcLimit) {
|
||||
int32_t cpStart;
|
||||
csc->cpStart=cpStart=srcIndex;
|
||||
int32_t toLower(int32_t caseLocale, uint32_t options,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
|
||||
icu::Edits *edits, UErrorCode &errorCode) {
|
||||
const int8_t *latinToLower;
|
||||
if (caseLocale == UCASE_LOC_ROOT ||
|
||||
(caseLocale >= 0 ?
|
||||
!(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
|
||||
(options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
|
||||
latinToLower = LatinCase::TO_LOWER_NORMAL;
|
||||
} else {
|
||||
latinToLower = LatinCase::TO_LOWER_TR_LT;
|
||||
}
|
||||
const UTrie2 *trie = ucase_getTrie();
|
||||
int32_t destIndex = 0;
|
||||
int32_t prev = srcStart;
|
||||
int32_t srcIndex = srcStart;
|
||||
for (;;) {
|
||||
// fast path for simple cases
|
||||
UChar lead;
|
||||
while (srcIndex < srcLimit) {
|
||||
lead = src[srcIndex];
|
||||
int32_t delta;
|
||||
if (lead < LatinCase::LONG_S) {
|
||||
int8_t d = latinToLower[lead];
|
||||
if (d == LatinCase::EXC) { break; }
|
||||
++srcIndex;
|
||||
if (d == 0) { continue; }
|
||||
delta = d;
|
||||
} else if (lead >= 0xd800) {
|
||||
break; // surrogate or higher
|
||||
} else {
|
||||
uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
|
||||
if (UCASE_HAS_EXCEPTION(props)) { break; }
|
||||
++srcIndex;
|
||||
if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
lead += delta;
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity,
|
||||
src + prev, srcIndex - 1 - prev, options, edits);
|
||||
if (destIndex >= 0) {
|
||||
destIndex = appendUChar(dest, destIndex, destCapacity, lead);
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(1, 1);
|
||||
}
|
||||
}
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
prev = srcIndex;
|
||||
}
|
||||
if (srcIndex >= srcLimit) {
|
||||
break;
|
||||
}
|
||||
// slow path
|
||||
int32_t cpStart = srcIndex++;
|
||||
UChar trail;
|
||||
UChar32 c;
|
||||
U16_NEXT(src, srcIndex, srcLimit, c);
|
||||
csc->cpLimit=srcIndex;
|
||||
if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
|
||||
c = U16_GET_SUPPLEMENTARY(lead, trail);
|
||||
++srcIndex;
|
||||
} else {
|
||||
c = lead;
|
||||
}
|
||||
const UChar *s;
|
||||
c=map(c, utf16_caseContextIterator, csc, &s, caseLocale);
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
srcIndex - cpStart, options, edits);
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
if (caseLocale >= 0) {
|
||||
csc->cpStart = cpStart;
|
||||
csc->cpLimit = srcIndex;
|
||||
c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
|
||||
} else {
|
||||
c = ucase_toFullFolding(c, &s, options);
|
||||
}
|
||||
if (c >= 0) {
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity,
|
||||
src + prev, cpStart - prev, options, edits);
|
||||
if (destIndex >= 0) {
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
srcIndex - cpStart, options, edits);
|
||||
}
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
prev = srcIndex;
|
||||
}
|
||||
}
|
||||
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity,
|
||||
src + prev, srcIndex - prev, options, edits);
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
int32_t toUpper(int32_t caseLocale, uint32_t options,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, UCaseContext *csc, int32_t srcLength,
|
||||
icu::Edits *edits, UErrorCode &errorCode) {
|
||||
const int8_t *latinToUpper;
|
||||
if (caseLocale == UCASE_LOC_TURKISH) {
|
||||
latinToUpper = LatinCase::TO_UPPER_TR;
|
||||
} else {
|
||||
latinToUpper = LatinCase::TO_UPPER_NORMAL;
|
||||
}
|
||||
const UTrie2 *trie = ucase_getTrie();
|
||||
int32_t destIndex = 0;
|
||||
int32_t prev = 0;
|
||||
int32_t srcIndex = 0;
|
||||
for (;;) {
|
||||
// fast path for simple cases
|
||||
UChar lead;
|
||||
while (srcIndex < srcLength) {
|
||||
lead = src[srcIndex];
|
||||
int32_t delta;
|
||||
if (lead < LatinCase::LONG_S) {
|
||||
int8_t d = latinToUpper[lead];
|
||||
if (d == LatinCase::EXC) { break; }
|
||||
++srcIndex;
|
||||
if (d == 0) { continue; }
|
||||
delta = d;
|
||||
} else if (lead >= 0xd800) {
|
||||
break; // surrogate or higher
|
||||
} else {
|
||||
uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
|
||||
if (UCASE_HAS_EXCEPTION(props)) { break; }
|
||||
++srcIndex;
|
||||
if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
lead += delta;
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity,
|
||||
src + prev, srcIndex - 1 - prev, options, edits);
|
||||
if (destIndex >= 0) {
|
||||
destIndex = appendUChar(dest, destIndex, destCapacity, lead);
|
||||
if (edits != nullptr) {
|
||||
edits->addReplace(1, 1);
|
||||
}
|
||||
}
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
prev = srcIndex;
|
||||
}
|
||||
if (srcIndex >= srcLength) {
|
||||
break;
|
||||
}
|
||||
// slow path
|
||||
int32_t cpStart;
|
||||
csc->cpStart = cpStart = srcIndex++;
|
||||
UChar trail;
|
||||
UChar32 c;
|
||||
if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
|
||||
c = U16_GET_SUPPLEMENTARY(lead, trail);
|
||||
++srcIndex;
|
||||
} else {
|
||||
c = lead;
|
||||
}
|
||||
csc->cpLimit = srcIndex;
|
||||
const UChar *s;
|
||||
c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
|
||||
if (c >= 0) {
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity,
|
||||
src + prev, cpStart - prev, options, edits);
|
||||
if (destIndex >= 0) {
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
srcIndex - cpStart, options, edits);
|
||||
}
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
prev = srcIndex;
|
||||
}
|
||||
}
|
||||
destIndex = appendUnchanged(dest, destIndex, destCapacity,
|
||||
src + prev, srcIndex - prev, options, edits);
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
return destIndex;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
U_CFUNC int32_t U_CALLCONV
|
||||
|
@ -344,11 +508,10 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
|
|||
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
|
||||
/* Normal operation: Lowercase the rest of the word. */
|
||||
destIndex+=
|
||||
_caseMap(
|
||||
caseLocale, options, ucase_toFullLower,
|
||||
toLower(
|
||||
caseLocale, options,
|
||||
dest+destIndex, destCapacity-destIndex,
|
||||
src, &csc,
|
||||
titleLimit, index,
|
||||
src, &csc, titleLimit, index,
|
||||
edits, errorCode);
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
|
@ -1013,8 +1176,8 @@ ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
|
|||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
int32_t destIndex = _caseMap(
|
||||
caseLocale, options, ucase_toFullLower,
|
||||
int32_t destIndex = toLower(
|
||||
caseLocale, options,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
edits, errorCode);
|
||||
|
@ -1035,10 +1198,10 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
|
|||
UCaseContext csc=UCASECONTEXT_INITIALIZER;
|
||||
csc.p=(void *)src;
|
||||
csc.limit=srcLength;
|
||||
destIndex = _caseMap(
|
||||
caseLocale, options, ucase_toFullUpper,
|
||||
destIndex = toUpper(
|
||||
caseLocale, options,
|
||||
dest, destCapacity,
|
||||
src, &csc, 0, srcLength,
|
||||
src, &csc, srcLength,
|
||||
edits, errorCode);
|
||||
}
|
||||
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
|
||||
|
@ -1050,23 +1213,11 @@ ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK
|
|||
const UChar *src, int32_t srcLength,
|
||||
icu::Edits *edits,
|
||||
UErrorCode &errorCode) {
|
||||
/* case mapping loop */
|
||||
int32_t srcIndex = 0;
|
||||
int32_t destIndex = 0;
|
||||
while (srcIndex < srcLength) {
|
||||
int32_t cpStart = srcIndex;
|
||||
UChar32 c;
|
||||
U16_NEXT(src, srcIndex, srcLength, c);
|
||||
const UChar *s;
|
||||
c = ucase_toFullFolding(c, &s, options);
|
||||
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
|
||||
srcIndex - cpStart, options, edits);
|
||||
if (destIndex < 0) {
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t destIndex = toLower(
|
||||
-1, options,
|
||||
dest, destCapacity,
|
||||
src, nullptr, 0, srcLength,
|
||||
edits, errorCode);
|
||||
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
|
||||
}
|
||||
|
||||
|
|
87
deps/icu-small/source/common/utf_impl.cpp
vendored
87
deps/icu-small/source/common/utf_impl.cpp
vendored
|
@ -238,33 +238,45 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
|
|||
int32_t i=*pi;
|
||||
if(U8_IS_TRAIL(c) && i>start) {
|
||||
uint8_t b1=s[--i];
|
||||
if(0xc2<=b1 && b1<0xe0) {
|
||||
*pi=i;
|
||||
return ((b1-0xc0)<<6)|(c&0x3f);
|
||||
if(U8_IS_LEAD(b1)) {
|
||||
if(b1<0xe0) {
|
||||
*pi=i;
|
||||
return ((b1-0xc0)<<6)|(c&0x3f);
|
||||
} else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
|
||||
// Truncated 3- or 4-byte sequence.
|
||||
*pi=i;
|
||||
return errorValue(1, strict);
|
||||
}
|
||||
} else if(U8_IS_TRAIL(b1) && i>start) {
|
||||
// Extract the value bits from the last trail byte.
|
||||
c&=0x3f;
|
||||
uint8_t b2=s[--i];
|
||||
if(0xe0<=b2 && b2<0xf0) {
|
||||
b2&=0xf;
|
||||
if(strict!=-2) {
|
||||
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
|
||||
*pi=i;
|
||||
c=(b2<<12)|((b1&0x3f)<<6)|c;
|
||||
if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
|
||||
return c;
|
||||
} else {
|
||||
// strict: forbid non-characters like U+fffe
|
||||
return errorValue(2, strict);
|
||||
if(0xe0<=b2 && b2<=0xf4) {
|
||||
if(b2<0xf0) {
|
||||
b2&=0xf;
|
||||
if(strict!=-2) {
|
||||
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
|
||||
*pi=i;
|
||||
c=(b2<<12)|((b1&0x3f)<<6)|c;
|
||||
if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
|
||||
return c;
|
||||
} else {
|
||||
// strict: forbid non-characters like U+fffe
|
||||
return errorValue(2, strict);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// strict=-2 -> lenient: allow surrogates
|
||||
b1-=0x80;
|
||||
if((b2>0 || b1>=0x20)) {
|
||||
*pi=i;
|
||||
return (b2<<12)|(b1<<6)|c;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// strict=-2 -> lenient: allow surrogates
|
||||
b1-=0x80;
|
||||
if((b2>0 || b1>=0x20)) {
|
||||
*pi=i;
|
||||
return (b2<<12)|(b1<<6)|c;
|
||||
}
|
||||
} else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
|
||||
// Truncated 4-byte sequence.
|
||||
*pi=i;
|
||||
return errorValue(2, strict);
|
||||
}
|
||||
} else if(U8_IS_TRAIL(b2) && i>start) {
|
||||
uint8_t b3=s[--i];
|
||||
|
@ -281,16 +293,7 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
|
||||
// Truncated 4-byte sequence.
|
||||
*pi=i;
|
||||
return errorValue(2, strict);
|
||||
}
|
||||
} else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
|
||||
(0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
|
||||
// Truncated 3- or 4-byte sequence.
|
||||
*pi=i;
|
||||
return errorValue(1, strict);
|
||||
}
|
||||
}
|
||||
return errorValue(0, strict);
|
||||
|
@ -303,29 +306,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
|
|||
uint8_t c=s[i];
|
||||
if(U8_IS_TRAIL(c) && i>start) {
|
||||
uint8_t b1=s[--i];
|
||||
if(0xc2<=b1 && b1<0xe0) {
|
||||
return i;
|
||||
if(U8_IS_LEAD(b1)) {
|
||||
if(b1<0xe0 ||
|
||||
(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
|
||||
return i;
|
||||
}
|
||||
} else if(U8_IS_TRAIL(b1) && i>start) {
|
||||
uint8_t b2=s[--i];
|
||||
if(0xe0<=b2 && b2<0xf0) {
|
||||
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
|
||||
if(0xe0<=b2 && b2<=0xf4) {
|
||||
if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
|
||||
return i;
|
||||
}
|
||||
} else if(U8_IS_TRAIL(b2) && i>start) {
|
||||
uint8_t b3=s[--i];
|
||||
if(0xf0<=b3 && b3<=0xf4) {
|
||||
if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
|
||||
return i;
|
||||
}
|
||||
if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
|
||||
return i;
|
||||
}
|
||||
} else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
|
||||
// Truncated 4-byte sequence.
|
||||
return i;
|
||||
}
|
||||
} else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
|
||||
(0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
|
||||
// Truncated 3- or 4-byte sequence.
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return orig_i;
|
||||
|
|
2
deps/icu-small/source/common/utrie.h
vendored
2
deps/icu-small/source/common/utrie.h
vendored
|
@ -556,7 +556,7 @@ struct UNewTrie {
|
|||
* Index values at build-time are 32 bits wide for easier processing.
|
||||
* Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()).
|
||||
*/
|
||||
int32_t index[UTRIE_MAX_INDEX_LENGTH];
|
||||
int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT];
|
||||
uint32_t *data;
|
||||
|
||||
uint32_t leadUnitValue;
|
||||
|
|
5
deps/icu-small/source/common/uts46.cpp
vendored
5
deps/icu-small/source/common/uts46.cpp
vendored
|
@ -1126,7 +1126,6 @@ isASCIIOkBiDi(const char *s, int32_t length) {
|
|||
|
||||
UBool
|
||||
UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
|
||||
const UBiDiProps *bdp=ubidi_getSingleton();
|
||||
// [IDNA2008-Tables]
|
||||
// 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
for(int32_t i=0; i<labelLength; ++i) {
|
||||
|
@ -1148,7 +1147,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
|
|||
}
|
||||
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
|
||||
for(;;) {
|
||||
UJoiningType type=ubidi_getJoiningType(bdp, c);
|
||||
UJoiningType type=ubidi_getJoiningType(c);
|
||||
if(type==U_JT_TRANSPARENT) {
|
||||
if(j==0) {
|
||||
return FALSE;
|
||||
|
@ -1166,7 +1165,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
|
|||
return FALSE;
|
||||
}
|
||||
U16_NEXT_UNSAFE(label, j, c);
|
||||
UJoiningType type=ubidi_getJoiningType(bdp, c);
|
||||
UJoiningType type=ubidi_getJoiningType(c);
|
||||
if(type==U_JT_TRANSPARENT) {
|
||||
// just skip this character
|
||||
} else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {
|
||||
|
|
3
deps/icu-small/source/common/utypes.cpp
vendored
3
deps/icu-small/source/common/utypes.cpp
vendored
|
@ -125,7 +125,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
|
|||
"U_UNDEFINED_KEYWORD",
|
||||
"U_DEFAULT_KEYWORD_MISSING",
|
||||
"U_DECIMAL_NUMBER_SYNTAX_ERROR",
|
||||
"U_FORMAT_INEXACT_ERROR"
|
||||
"U_FORMAT_INEXACT_ERROR",
|
||||
"U_NUMBER_ARG_OUTOFBOUNDS_ERROR"
|
||||
};
|
||||
|
||||
static const char * const
|
||||
|
|
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue