19#include <ICU4XBidi.hpp>
20#include <ICU4XCaseMapper.hpp>
21#include <ICU4XCodePointMapData8.hpp>
22#include <ICU4XCodePointSetData.hpp>
23#include <ICU4XDataProvider.hpp>
24#include <ICU4XGraphemeClusterSegmenter.hpp>
25#include <ICU4XLineSegmenter.hpp>
26#include <ICU4XWordSegmenter.hpp>
38 fLocale = ICU4XLocale::create_from_string(
"tr").ok().value();
39 fDataProvider = ICU4XDataProvider::create_compiled();
40 fCaseMapper = ICU4XCaseMapper::create(fDataProvider).ok().value();
41 const auto general = ICU4XCodePointMapData8::load_general_category(fDataProvider).ok().value();
42 fControls = general.get_set_for_value(15);
43 fWhitespaces = general.get_set_for_value(12);
44 fSpaces = general.get_set_for_value(12);
46 fBlanks = ICU4XCodePointSetData::load_blank(fDataProvider).ok().value();
47 fEmoji = ICU4XCodePointSetData::load_emoji(fDataProvider).ok().value();
48 fEmojiComponent = ICU4XCodePointSetData::load_emoji_component(fDataProvider).ok().value();
49 fEmojiModifier = ICU4XCodePointSetData::load_emoji_modifier(fDataProvider).ok().value();
50 fEmojiModifierBase = ICU4XCodePointSetData::load_emoji_modifier_base(fDataProvider).ok().value();
51 fEmoji = ICU4XCodePointSetData::load_emoji(fDataProvider).ok().value();
52 fRegionalIndicator = ICU4XCodePointSetData::load_regional_indicator(fDataProvider).ok().value();
53 fIdeographic = ICU4XCodePointSetData::load_ideographic(fDataProvider).ok().value();
54 fLineBreaks = ICU4XCodePointMapData8::load_line_break(fDataProvider).ok().value();
66 auto value = fLineBreaks.get(utf8);
67 return (
value == 6) ||
97 std::vector<BidiRegion>* results)
override {
99 const auto bidi = ICU4XBidi::create(fDataProvider).ok().value();
100 std::string_view string_view(utf8, utf8Units);
102 auto currentLevel =
info.level_at(0);
105 for (
size_t i = 1; i <
info.size(); i++) {
106 const auto level =
info.level_at(i);
107 if (level != currentLevel) {
108 (*results).emplace_back(
start, i, currentLevel);
109 currentLevel = level;
113 (*results).emplace_back(
start,
info.size(), currentLevel);
120 std::vector<BidiRegion>* results) {
122 return this->
getBidiRegions(utf8.data(), utf8.size(), dir, results);
131 this->markLineBreaks(utf8, utf8Units,
false, results);
132 this->markHardLineBreaksHack(utf8, utf8Units, results);
133 this->markGraphemes(utf8, utf8Units, results);
134 this->markCharacters(utf8, utf8Units, replaceTabs, results);
147 std::vector<Position>* results)
override {
149 const diplomat::span<const uint16_t> span((uint16_t*)utf16.data(), utf16.size());
150 const auto segmenter = ICU4XWordSegmenter::create_dictionary(fDataProvider).ok().value();
151 auto iterator = segmenter.segment_utf16(span);
153 int32_t breakpoint = iterator.next();
154 if (breakpoint == -1) {
157 results->emplace_back(breakpoint);
167 auto locale = ICU4XLocale::create_from_string(localeStr).ok().value();
168 std::string std_string(str.
data(), str.
size());
170 auto result = fCaseMapper.uppercase(std_string, locale).ok().value();
176 int32_t logicalFromVisual[])
override {
178 const auto bidi = ICU4XBidi::create(fDataProvider).ok().value();
179 const diplomat::span<const uint8_t> levels(&runLevels[0], levelsCount);
180 auto map = bidi.reorder_visual(levels);
182 std::vector<int32_t> results;
183 for (
size_t i = 0; i < map.len(); i++) {
184 auto level = map.get(i);
185 logicalFromVisual[i] =
SkToS32(level);
193 bool markHardLineBreaksHack(
char utf8[],
196 const char*
end = utf8 + utf8Units;
197 const char* ch = utf8;
207 SkUnichar getChar32(
const char* pointer,
const char*
end) {
214 bool markLineBreaks(
char utf8[],
218 if (utf8Units == 0) {
223 const auto lineBreakingOptions = hardLineBreaks
224 ? ICU4XLineBreakOptionsV1{ICU4XLineBreakStrictness::Strict, ICU4XLineBreakWordOption::Normal}
225 : ICU4XLineBreakOptionsV1{ICU4XLineBreakStrictness::Loose, ICU4XLineBreakWordOption::Normal};
226 const auto segmenter = ICU4XLineSegmenter::create_auto_with_options_v1(fDataProvider, lineBreakingOptions).ok().value();
227 std::string_view string_view(utf8, utf8Units);
228 auto iterator = segmenter.segment_utf8(string_view);
231 int32_t lineBreak = iterator.next();
232 if (lineBreak == -1) {
235 if (hardLineBreaks) {
241 if (!hardLineBreaks) {
248 bool markGraphemes(
const char utf8[],
251 const auto segmenter = ICU4XGraphemeClusterSegmenter::create(fDataProvider).ok().value();
252 std::string_view string_view(utf8, utf8Units);
253 auto iterator = segmenter.segment_utf8(string_view);
255 int32_t graphemeStart = iterator.next();
256 if (graphemeStart == -1) {
264 bool markCharacters(
char utf8[],
268 const char* current =
utf8;
269 const char*
end =
utf8 + utf8Units;
270 while (current <
end) {
271 auto before = current -
utf8;
273 if (unichar < 0) unichar = 0xFFFD;
274 auto after = current -
utf8;
282 for (
auto i = before; i < after; ++i) {
303 std::vector<Position>* results)
override {
304 SkDEBUGF(
"Method 'getUtf8Words' is not implemented\n");
311 std::vector<SkUnicode::Position>* results)
override {
312 SkDEBUGF(
"Method 'getSentences' is not implemented\n");
316 std::shared_ptr<std::vector<SkUnicode::BidiRegion>> fRegions;
318 ICU4XDataProvider fDataProvider;
319 ICU4XCaseMapper fCaseMapper;
320 ICU4XCodePointSetData fWhitespaces;
321 ICU4XCodePointSetData fSpaces;
322 ICU4XCodePointSetData fBlanks;
323 ICU4XCodePointSetData fEmoji;
324 ICU4XCodePointSetData fEmojiComponent;
325 ICU4XCodePointSetData fEmojiModifier;
326 ICU4XCodePointSetData fEmojiModifierBase;
327 ICU4XCodePointSetData fRegionalIndicator;
328 ICU4XCodePointSetData fIdeographic;
329 ICU4XCodePointSetData fControls;
330 ICU4XCodePointMapData8 fLineBreaks;
344 bool setText(
const char utftext8[],
int utf8Units)
override {
SkASSERT(
false);
return false; }
345 bool setText(
const char16_t utftext16[],
int utf16Units)
override {
SkASSERT(
false);
return false; }
349 std::shared_ptr<std::vector<SkUnicode::BidiRegion>> fRegions;
352 : fRegions(regions) { }
355 auto found = std::lower_bound(
360 return a.start <= b.start && a.end <= b.end;
371 fRegions = std::make_shared<std::vector<SkUnicode::BidiRegion>>();
375 return std::make_unique<SkBidiIterator_icu4x>(fRegions);
387 fRegions = std::make_shared<std::vector<SkUnicode::BidiRegion>>();
390 return std::make_unique<SkBidiIterator_icu4x>(fRegions);
407 return sk_make_sp<SkUnicode_icu4x>();
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
constexpr int32_t SkToS32(S x)
Position getLength() override
Level getLevelAt(Position pos) override
SkBidiIterator_icu4x(std::shared_ptr< std::vector< SkUnicode::BidiRegion > > regions)
bool setText(const char16_t utftext16[], int utf16Units) override
bool setText(const char utftext8[], int utf8Units) override
Position first() override
Position current() override
const char * data() const
bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector< BidiRegion > *results) override
bool isWhitespace(SkUnichar utf8) override
std::unique_ptr< SkBidiIterator > makeBidiIterator(const uint16_t text[], int count, SkBidiIterator::Direction dir) override
bool isHardBreak(SkUnichar utf8) override
bool isEmojiModifierBase(SkUnichar utf8) override
std::unique_ptr< SkBreakIterator > makeBreakIterator(const char locale[], BreakType breakType) override
bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
SkString toUpper(const SkString &str) override
bool getUtf8Words(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
bool isSpace(SkUnichar utf8) override
bool isEmojiComponent(SkUnichar utf8) override
bool isEmojiModifier(SkUnichar utf8) override
bool isRegionalIndicator(SkUnichar utf8) override
bool getBidiRegions(const uint16_t utf16[], int utf16Units, TextDirection dir, std::vector< BidiRegion > *results)
bool getWords(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override
bool getSentences(const char utf8[], int utf8Units, const char *locale, std::vector< SkUnicode::Position > *results) override
SkString toUpper(const SkString &str, const char *localeStr) override
bool isControl(SkUnichar utf8) override
bool isIdeographic(SkUnichar utf8) override
~SkUnicode_icu4x() override=default
bool isTabulation(SkUnichar utf8) override
bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
bool isEmoji(SkUnichar utf8) override
static std::u16string convertUtf8ToUtf16(const char *utf8, int utf8Units)
static SkString convertUtf16ToUtf8(const char16_t *utf16, int utf16Units)
SK_SPI SkUnichar NextUTF8(const char **ptr, const char *end)
SKUNICODE_API sk_sp< SkUnicode > Make()