27#include <unicode/ubrk.h>
28#include <unicode/uchar.h>
29#include <unicode/uloc.h>
30#include <unicode/umachine.h>
31#include <unicode/utext.h>
32#include <unicode/utypes.h>
42#if defined(SK_USING_THIRD_PARTY_ICU) && defined(SK_BUILD_FOR_WIN)
55#define SKICU_FUNC(funcname) \
56 template <typename... Args> \
57 auto sk_##funcname(Args&&... args) -> decltype(funcname(std::forward<Args>(args)...)) { \
58 return SkGetICULib()->f_##funcname(std::forward<Args>(args)...); \
64static inline UBreakIterator*
sk_ubrk_clone(
const UBreakIterator* bi, UErrorCode* status) {
66 SkASSERT(icu->f_ubrk_clone_ || icu->f_ubrk_safeClone_);
67 return icu->f_ubrk_clone_
68 ? icu->f_ubrk_clone_(bi, status)
69 : icu->f_ubrk_safeClone_(bi,
nullptr,
nullptr, status);
73 return sk_utext_close(ut);
79using ICUUText = std::unique_ptr<UText, SkFunctionObject<utext_close_wrapper>>;
80using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionObject<ubrk_close_wrapper>>;
84 return val < 0 ? 0xFFFD : val;
95 return UBRK_CHARACTER;
104 : fBreakIterator(
std::move(iter))
106 Position first()
override {
return fLastResult = sk_ubrk_first(fBreakIterator.get()); }
107 Position current()
override {
return fLastResult = sk_ubrk_current(fBreakIterator.get()); }
108 Position next()
override {
return fLastResult = sk_ubrk_next(fBreakIterator.get()); }
109 Status status()
override {
return sk_ubrk_getRuleStatus(fBreakIterator.get()); }
110 bool isDone()
override {
return fLastResult == UBRK_DONE; }
112 bool setText(
const char utftext8[],
int utf8Units)
override {
113 UErrorCode
status = U_ZERO_ERROR;
121 sk_ubrk_setUText(fBreakIterator.get(),
text.get(), &
status);
129 bool setText(
const char16_t utftext16[],
int utf16Units)
override {
130 UErrorCode
status = U_ZERO_ERROR;
131 ICUUText text(sk_utext_openUChars(
nullptr,
reinterpret_cast<const UChar*
>(&utftext16[0]),
139 sk_ubrk_setUText(fBreakIterator.get(),
text.get(), &
status);
150 struct Request final {
153 , fIcuLocale(icuLocale)
164 bool operator==(
const Request& that)
const {
165 return this->fType == that.fType && this->fIcuLocale == that.fIcuLocale;
171 class BreakIteratorRef final {
173 BreakIteratorRef(
ICUBreakIterator iter) : breakIterator(iter.release()), fRefCnt(1) {
179 BreakIteratorRef& operator=(
const SkRefCntBase&) =
delete;
180 ~BreakIteratorRef() {
192 if (1 == fRefCnt--) {
198 UBreakIterator* breakIterator;
199 static int32_t GetInstanceCount() {
return Instances; }
201 mutable int32_t fRefCnt;
202 static int32_t Instances;
207 void purgeIfNeeded() {
210 if (fRequestCache.
count() > 100) {
212 fRequestCache.
reset();
215 if (BreakIteratorRef::GetInstanceCount() > 4) {
217 for (
auto&& [
key, value] : fRequestCache) {
218 if (
value->breakIterator) {
219 sk_ubrk_close(
value->breakIterator);
220 value->breakIterator =
nullptr;
234 UErrorCode status = U_ZERO_ERROR;
237 char localeIDStorage[ULOC_FULLNAME_CAPACITY];
238 const char* localeID =
nullptr;
240 sk_uloc_forLanguageTag(bcp47, localeIDStorage, ULOC_FULLNAME_CAPACITY,
nullptr, &status);
241 if (U_FAILURE(status)) {
242 SkDEBUGF(
"Break error could not get language tag: %s", sk_u_errorName(status));
243 }
else if (localeIDStorage[0]) {
244 localeID = localeIDStorage;
248 localeID = sk_uloc_getDefault();
251 auto make = [](
const Request& request) -> UBreakIterator* {
252 UErrorCode status = U_ZERO_ERROR;
253 UBreakIterator* bi = sk_ubrk_open(
convertType(request.fType),
254 request.fIcuLocale.c_str(),
255 nullptr, 0, &status);
256 if (U_FAILURE(status)) {
257 SkDEBUGF(
"Break error: %s", sk_u_errorName(status));
267 UErrorCode status = U_ZERO_ERROR;
269 if (U_FAILURE(status)) {
270 SkDEBUGF(
"Break error: %s", sk_u_errorName(status));
275 Request request(
type, localeID);
281 if (!(*ref)->breakIterator) {
282 (*ref)->breakIterator =
make(request);
284 return clone((*ref)->breakIterator);
300 newIter.get(), ULOC_ACTUAL_LOCALE, &status);
302 if (!U_FAILURE(status)) {
307 if (strcmp(actualLocale, localeID) != 0) {
308 Request actualRequest(
type, actualLocale);
311 if (!(*actualRef)->breakIterator) {
312 (*actualRef)->breakIterator = newIter.release();
314 actualRef = fRequestCache.
set(request, *actualRef);
315 return clone((*actualRef)->breakIterator);
317 this->purgeIfNeeded();
318 newRef = sk_make_sp<BreakIteratorRef>(std::move(newIter));
319 fRequestCache.
set(actualRequest, newRef);
326 this->purgeIfNeeded();
327 newRef = sk_make_sp<BreakIteratorRef>(std::move(newIter));
329 fRequestCache.
set(request, newRef);
331 return clone(newRef->breakIterator);
334 int32_t SkIcuBreakIteratorCache::BreakIteratorRef::Instances{0};
338 static bool extractWords(uint16_t utf16[],
int utf16Units,
const char* locale,
339 std::vector<Position>* words) {
341 UErrorCode status = U_ZERO_ERROR;
346 SkDEBUGF(
"Break error: %s", sk_u_errorName(status));
351 ICUUText utf16UText(sk_utext_openUChars(
nullptr, (UChar*)utf16, utf16Units, &status));
352 if (U_FAILURE(status)) {
353 SkDEBUGF(
"Break error: %s", sk_u_errorName(status));
357 sk_ubrk_setUText(iterator.get(), utf16UText.get(), &status);
358 if (U_FAILURE(status)) {
359 SkDEBUGF(
"Break error: %s", sk_u_errorName(status));
364 int32_t
pos = sk_ubrk_first(iterator.get());
365 while (
pos != UBRK_DONE) {
366 words->emplace_back(
pos);
367 pos = sk_ubrk_next(iterator.get());
373 static bool extractPositions(
const char utf8[],
int utf8Units,
375 const std::function<
void(
int,
int)>& setBreak) {
377 UErrorCode status = U_ZERO_ERROR;
378 ICUUText text(sk_utext_openUTF8(
nullptr, &utf8[0], utf8Units, &status));
379 if (U_FAILURE(status)) {
380 SkDEBUGF(
"Break error: %s", sk_u_errorName(status));
390 sk_ubrk_setUText(iterator.get(),
text.get(), &status);
391 if (U_FAILURE(status)) {
392 SkDEBUGF(
"Break error: %s", sk_u_errorName(status));
396 auto iter = iterator.get();
397 int32_t
pos = sk_ubrk_first(iter);
398 while (
pos != UBRK_DONE) {
401 : sk_ubrk_getRuleStatus(iter);
403 pos = sk_ubrk_next(iter);
411 const char*
end = utf8 + utf8Units;
412 const char* ch = utf8;
415 if (SkUnicode_icu::isHardLineBreak(unichar)) {
416 setBreak(ch - utf8, UBRK_LINE_HARD);
424 return sk_u_iscntrl(utf8);
428 return sk_u_isWhitespace(utf8);
432 return sk_u_isspace(utf8);
436 return SkUnicode_icu::isHardLineBreak(utf8);
440 return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI);
444 return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_COMPONENT);
448 return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_MODIFIER_BASE);
452 return sk_u_hasBinaryProperty(unichar, UCHAR_EMOJI_MODIFIER);
456 return sk_u_hasBinaryProperty(unichar, UCHAR_REGIONAL_INDICATOR);
460 return sk_u_hasBinaryProperty(unichar, UCHAR_IDEOGRAPHIC);
467 static bool isHardLineBreak(
SkUnichar utf8) {
468 auto property = sk_u_getIntPropertyValue(utf8, UCHAR_LINE_BREAK);
469 return property == U_LB_LINE_FEED ||
property == U_LB_MANDATORY_BREAK;
496 return this->
toUpper(str,
nullptr);
503 UErrorCode icu_err = U_ZERO_ERROR;
504 const auto upper16len = sk_u_strToUpper(
nullptr, 0, (UChar*)(str16.c_str()), str16.size(),
506 if (icu_err != U_BUFFER_OVERFLOW_ERROR || upper16len <= 0) {
511 icu_err = U_ZERO_ERROR;
513 (UChar*)(str16.c_str()), str16.size(),
524 std::vector<BidiRegion>* results)
override {
525 return fBidiFact->
ExtractBidi(utf8, utf8Units, dir, results);
528 bool getWords(
const char utf8[],
int utf8Units,
const char* locale,
529 std::vector<Position>* results)
override {
533 return SkUnicode_icu::extractWords((uint16_t*)utf16.c_str(), utf16.size(), locale, results);
539 std::vector<Position>* results)
override {
542 std::vector<Position> utf16Results;
543 if (!SkUnicode_icu::extractWords(
544 (uint16_t*)utf16.c_str(), utf16.size(), locale, &utf16Results)) {
548 std::vector<Position> mapping;
551 text, [&](
size_t index) { mapping.emplace_back(index); }, [&](
size_t index) {});
553 for (
auto i16 : utf16Results) {
554 results->emplace_back(mapping[i16]);
562 std::vector<SkUnicode::Position>* results)
override {
563 SkUnicode_icu::extractPositions(
565 [&](
int pos,
int status) {
566 results->emplace_back(
pos);
577 [&](
int pos,
int status) {
578 (*results)[
pos] |= status == UBRK_LINE_HARD
584 [&](
int pos,
int status) {
588 const char* current = utf8;
589 const char*
end = utf8 + utf8Units;
590 while (current <
end) {
591 auto before = current - utf8;
593 if (unichar < 0) unichar = 0xFFFD;
594 auto after = current - utf8;
602 for (
auto i = before; i < after; ++i) {
628 [
this, results, replaceTabs, &utf16](
SkUnichar unichar, int32_t
start, int32_t
end) {
629 for (
auto i =
start; i <
end; ++i) {
657 (
char16_t*)&utf16[0],
676 int32_t logicalFromVisual[])
override {
688#if defined(SK_USING_THIRD_PARTY_ICU) && defined(SK_BUILD_FOR_WIN)
691 once([] {
SkDEBUGF(
"SkLoadICU() failed!\n"); });
696 return sk_make_sp<SkUnicode_icu>();
constexpr int32_t SkToS32(S x)
static SKICU_EMIT_FUNCS UBreakIterator * sk_ubrk_clone(const UBreakIterator *bi, UErrorCode *status)
static void ubrk_close_wrapper(UBreakIterator *bi)
static UText * utext_close_wrapper(UText *ut)
std::unique_ptr< UText, SkFunctionObject< utext_close_wrapper > > ICUUText
const SkICULib * SkGetICULib()
std::unique_ptr< UBreakIterator, SkFunctionObject< ubrk_close_wrapper > > ICUBreakIterator
static UBreakIteratorType convertType(SkUnicode::BreakType type)
static SkUnichar utf8_next(const char **ptr, const char *end)
std::unique_ptr< SkICULib > SkLoadICULib()
bool ExtractBidi(const char utf8[], int utf8Units, SkUnicode::TextDirection dir, std::vector< SkUnicode::BidiRegion > *bidiRegions) const
virtual void bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) const =0
std::unique_ptr< SkBidiIterator > MakeIterator(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir) const
SkBreakIterator_icu(ICUBreakIterator iter)
Position first() override
bool setText(const char utftext8[], int utf8Units) override
bool setText(const char16_t utftext16[], int utf16Units) override
Position current() override
static SkIcuBreakIteratorCache & get()
ICUBreakIterator makeBreakIterator(SkUnicode::BreakType type, const char *bcp47)
const char * c_str() const
bool isEmojiModifierBase(SkUnichar unichar) override
bool isEmojiComponent(SkUnichar unichar) override
std::unique_ptr< SkBidiIterator > makeBidiIterator(const char text[], int count, SkBidiIterator::Direction dir) override
std::unique_ptr< SkBreakIterator > makeBreakIterator(BreakType type) override
SkString toUpper(const SkString &str) override
void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override
SkString toUpper(const SkString &str, const char *locale) override
bool isSpace(SkUnichar utf8) override
bool getUtf8Words(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
bool isTabulation(SkUnichar utf8) override
bool isRegionalIndicator(SkUnichar unichar) override
bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, TArray< SkUnicode::CodeUnitFlags, true > *results) override
bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector< BidiRegion > *results) override
std::unique_ptr< SkBidiIterator > makeBidiIterator(const uint16_t text[], int count, SkBidiIterator::Direction dir) override
bool isHardBreak(SkUnichar utf8) override
bool isIdeographic(SkUnichar unichar) override
bool isControl(SkUnichar utf8) override
bool getSentences(const char utf8[], int utf8Units, const char *locale, std::vector< SkUnicode::Position > *results) override
~SkUnicode_icu() override
bool isWhitespace(SkUnichar utf8) override
bool isEmoji(SkUnichar unichar) override
bool isEmojiModifier(SkUnichar unichar) override
bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, TArray< SkUnicode::CodeUnitFlags, true > *results) override
std::unique_ptr< SkBreakIterator > makeBreakIterator(const char locale[], BreakType type) override
bool getWords(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
static std::u16string convertUtf8ToUtf16(const char *utf8, int utf8Units)
void forEachCodepoint(const char *utf8, int32_t utf8Units, Callback &&callback)
static SkString convertUtf16ToUtf8(const char16_t *utf16, int utf16Units)
static bool extractUtfConversionMapping(SkSpan< const char > utf8, Appender8 &&appender8, Appender16 &&appender16)
void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback &&callback)
V * find(const K &key) const
static sk_sp< SkImage > make(sk_sp< SkColorSpace > cs)
SK_SPI SkUnichar NextUTF8(const char **ptr, const char *end)
SKUNICODE_API sk_sp< SkUnicode > Make()
const char *(* f_ubrk_getLocaleByType)(const UBreakIterator *, ULocDataLocaleType, UErrorCode *)
uint32_t operator()(const Request &key) const