Flutter Engine
The Flutter Engine
SkUnicode_icu4x.cpp
Go to the documentation of this file.
1/*
2* Copyright 2023 Google Inc.
3*
4* Use of this source code is governed by a BSD-style license that can be
5* found in the LICENSE file.
6*/
8
17#include "src/base/SkUTF.h"
18
19#include <ICU4XBidi.hpp>
20#include <ICU4XCaseMapper.hpp>
21#include <ICU4XCodePointMapData8.hpp>
22#include <ICU4XCodePointSetData.hpp>
23#include <ICU4XDataProvider.hpp>
24#include <ICU4XGraphemeClusterSegmenter.hpp>
25#include <ICU4XLineSegmenter.hpp>
26#include <ICU4XWordSegmenter.hpp>
27
28#include <algorithm>
29#include <cstdint>
30#include <memory>
31#include <string>
32#include <utility>
33#include <vector>
34
35class SkUnicode_icu4x : public SkUnicode {
36public:
38 fLocale = ICU4XLocale::create_from_string("tr").ok().value();
39 fDataProvider = ICU4XDataProvider::create_compiled();
40 fCaseMapper = ICU4XCaseMapper::create(fDataProvider).ok().value();
41 const auto general = ICU4XCodePointMapData8::load_general_category(fDataProvider).ok().value();
42 fControls = general.get_set_for_value(/*Control*/15);
43 fWhitespaces = general.get_set_for_value(/*SpaceSeparator*/12);
44 fSpaces = general.get_set_for_value(/*SpaceSeparator*/12);
45 // TODO: u_isSpace
46 fBlanks = ICU4XCodePointSetData::load_blank(fDataProvider).ok().value();
47 fEmoji = ICU4XCodePointSetData::load_emoji(fDataProvider).ok().value();
48 fEmojiComponent = ICU4XCodePointSetData::load_emoji_component(fDataProvider).ok().value();
49 fEmojiModifier = ICU4XCodePointSetData::load_emoji_modifier(fDataProvider).ok().value();
50 fEmojiModifierBase = ICU4XCodePointSetData::load_emoji_modifier_base(fDataProvider).ok().value();
51 fEmoji = ICU4XCodePointSetData::load_emoji(fDataProvider).ok().value();
52 fRegionalIndicator = ICU4XCodePointSetData::load_regional_indicator(fDataProvider).ok().value();
53 fIdeographic = ICU4XCodePointSetData::load_ideographic(fDataProvider).ok().value();
54 fLineBreaks = ICU4XCodePointMapData8::load_line_break(fDataProvider).ok().value();
55 }
56
57 ~SkUnicode_icu4x() override = default;
58
59 void reset();
60
61 // SkUnicode properties
62 bool isControl(SkUnichar utf8) override { return fControls.contains(utf8); }
63 bool isWhitespace(SkUnichar utf8) override { return fWhitespaces.contains(utf8); }
64 bool isSpace(SkUnichar utf8) override { return fBlanks.contains(utf8); }
65 bool isHardBreak(SkUnichar utf8) override {
66 auto value = fLineBreaks.get(utf8);
67 return (value == /*MandatoryBreak*/6) ||
68 (value == /*CarriageReturn*/10) ||
69 (value == /*LineFeed*/17) ||
70 (value == /*NextLine*/29);
71 }
72 bool isEmoji(SkUnichar utf8) override { return fEmoji.contains(utf8); }
73 bool isEmojiComponent(SkUnichar utf8) override { return fEmojiComponent.contains(utf8); }
74 bool isEmojiModifierBase(SkUnichar utf8) override { return fEmojiModifierBase.contains(utf8); }
75 bool isEmojiModifier(SkUnichar utf8) override { return fEmojiModifier.contains(utf8); }
76 bool isRegionalIndicator(SkUnichar utf8) override { return fRegionalIndicator.contains(utf8); }
77 bool isIdeographic(SkUnichar utf8) override { return fIdeographic.contains(utf8); }
78
79 // TODO: is there a check for tabulation
80 bool isTabulation(SkUnichar utf8) override {
81 return utf8 == '\t';
82 }
83
84 // For SkShaper
85 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
87 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
88 int count,
90 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
91 BreakType breakType) override;
92 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
93 // For SkParagraph
94 bool getBidiRegions(const char utf8[],
95 int utf8Units,
97 std::vector<BidiRegion>* results) override {
98
99 const auto bidi = ICU4XBidi::create(fDataProvider).ok().value();
100 std::string_view string_view(utf8, utf8Units);
101 auto info = bidi.for_text(string_view, dir == TextDirection::kLTR ? 0 : 1);
102 auto currentLevel = info.level_at(0);
103 size_t start = 0;
104
105 for (size_t i = 1; i < info.size(); i++) {
106 const auto level = info.level_at(i);
107 if (level != currentLevel) {
108 (*results).emplace_back(start, i, currentLevel);
109 currentLevel = level;
110 start = i;
111 }
112 }
113 (*results).emplace_back(start, info.size(), currentLevel);
114 return true;
115 }
116
117 bool getBidiRegions(const uint16_t utf16[],
118 int utf16Units,
120 std::vector<BidiRegion>* results) {
121 auto utf8 = SkUnicode::convertUtf16ToUtf8((char16_t*)utf16, utf16Units);
122 return this->getBidiRegions(utf8.data(), utf8.size(), dir, results);
123 }
124
126 int utf8Units,
127 bool replaceTabs,
129 results->clear();
130 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
131 this->markLineBreaks(utf8, utf8Units, /*hardLineBreaks=*/false, results);
132 this->markHardLineBreaksHack(utf8, utf8Units, results);
133 this->markGraphemes(utf8, utf8Units, results);
134 this->markCharacters(utf8, utf8Units, replaceTabs, results);
135 return true;
136 }
137
138 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
140 SkASSERT(false);
141 return true;
142 }
143
144 bool getWords(const char utf8[],
145 int utf8Units,
146 const char* locale,
147 std::vector<Position>* results) override {
148 auto utf16 = SkUnicode::convertUtf8ToUtf16(utf8, utf8Units);
149 const diplomat::span<const uint16_t> span((uint16_t*)utf16.data(), utf16.size());
150 const auto segmenter = ICU4XWordSegmenter::create_dictionary(fDataProvider).ok().value();
151 auto iterator = segmenter.segment_utf16(span);
152 while (true) {
153 int32_t breakpoint = iterator.next();
154 if (breakpoint == -1) {
155 break;
156 }
157 results->emplace_back(breakpoint);
158 }
159 return true;
160 }
161
162 SkString toUpper(const SkString& str) override {
163 return toUpper(str, "und");
164 }
165
166 SkString toUpper(const SkString& str, const char* localeStr) override {
167 auto locale = ICU4XLocale::create_from_string(localeStr).ok().value();
168 std::string std_string(str.data(), str.size());
169 // TODO: upper case
170 auto result = fCaseMapper.uppercase(std_string, locale).ok().value();
171 return SkString(result.data(), result.size());
172 }
173
174 void reorderVisual(const BidiLevel runLevels[],
175 int levelsCount,
176 int32_t logicalFromVisual[]) override {
177
178 const auto bidi = ICU4XBidi::create(fDataProvider).ok().value();
179 const diplomat::span<const uint8_t> levels(&runLevels[0], levelsCount);
180 auto map = bidi.reorder_visual(levels);
181 SkASSERT(levelsCount == map.len());
182 std::vector<int32_t> results;
183 for (size_t i = 0; i < map.len(); i++) {
184 auto level = map.get(i);
185 logicalFromVisual[i] = SkToS32(level);
186 }
187 }
188
189private:
192
193 bool markHardLineBreaksHack(char utf8[],
194 int utf8Units,
196 const char* end = utf8 + utf8Units;
197 const char* ch = utf8;
198 while (ch < end) {
199 auto unichar = SkUTF::NextUTF8(&ch, end);
200 if (this->isHardBreak(unichar)) {
201 (*results)[ch - utf8] |= CodeUnitFlags::kHardLineBreakBefore;
202 }
203 }
204 return true;
205 }
206
207 SkUnichar getChar32(const char* pointer, const char* end) {
208 if (pointer < end) {
209 return SkUTF::NextUTF8(&pointer, end);
210 }
211 return -1;
212 }
213
214 bool markLineBreaks(char utf8[],
215 int utf8Units,
216 bool hardLineBreaks,
218 if (utf8Units == 0) {
219 return true;
220 }
221 // TODO: Remove hard line break hack and detect it here
222 SkASSERT(!hardLineBreaks);
223 const auto lineBreakingOptions = hardLineBreaks
224 ? ICU4XLineBreakOptionsV1{ICU4XLineBreakStrictness::Strict, ICU4XLineBreakWordOption::Normal}
225 : ICU4XLineBreakOptionsV1{ICU4XLineBreakStrictness::Loose, ICU4XLineBreakWordOption::Normal};
226 const auto segmenter = ICU4XLineSegmenter::create_auto_with_options_v1(fDataProvider, lineBreakingOptions).ok().value();
227 std::string_view string_view(utf8, utf8Units);
228 auto iterator = segmenter.segment_utf8(string_view);
229
230 while (true) {
231 int32_t lineBreak = iterator.next();
232 if (lineBreak == -1) {
233 break;
234 }
235 if (hardLineBreaks) {
236 (*results)[lineBreak] |= CodeUnitFlags::kHardLineBreakBefore;
237 } else {
238 (*results)[lineBreak] |= CodeUnitFlags::kSoftLineBreakBefore;
239 }
240 }
241 if (!hardLineBreaks) {
242 (*results)[0] |= CodeUnitFlags::kSoftLineBreakBefore;
243 (*results)[utf8Units] |= CodeUnitFlags::kSoftLineBreakBefore;
244 }
245 return true;
246 }
247
248 bool markGraphemes(const char utf8[],
249 int utf8Units,
251 const auto segmenter = ICU4XGraphemeClusterSegmenter::create(fDataProvider).ok().value();
252 std::string_view string_view(utf8, utf8Units);
253 auto iterator = segmenter.segment_utf8(string_view);
254 while (true) {
255 int32_t graphemeStart = iterator.next();
256 if (graphemeStart == -1) {
257 break;
258 }
259 (*results)[graphemeStart] |= CodeUnitFlags::kGraphemeStart;
260 }
261 return true;
262 }
263
264 bool markCharacters(char utf8[],
265 int utf8Units,
266 bool replaceTabs,
268 const char* current = utf8;
269 const char* end = utf8 + utf8Units;
270 while (current < end) {
271 auto before = current - utf8;
272 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
273 if (unichar < 0) unichar = 0xFFFD;
274 auto after = current - utf8;
275 if (replaceTabs && SkUnicode_icu4x::isTabulation(unichar)) {
276 results->at(before) |= SkUnicode::kTabulation;
277 if (replaceTabs) {
278 unichar = ' ';
279 utf8[before] = ' ';
280 }
281 }
282 for (auto i = before; i < after; ++i) {
283 bool isHardBreak = this->isHardBreak(unichar);
284 bool isSpace = this->isSpace(unichar) || isHardBreak;
285 bool isWhitespace = this->isWhitespace(unichar) || isHardBreak;
286 if (isSpace) {
288 }
289 if (isWhitespace) {
291 }
292 if (this->isControl(unichar)) {
293 results->at(i) |= SkUnicode::kControl;
294 }
295 }
296 }
297 return true;
298 }
299
300 bool getUtf8Words(const char utf8[],
301 int utf8Units,
302 const char* locale,
303 std::vector<Position>* results) override {
304 SkDEBUGF("Method 'getUtf8Words' is not implemented\n");
305 return false;
306 }
307
308 bool getSentences(const char utf8[],
309 int utf8Units,
310 const char* locale,
311 std::vector<SkUnicode::Position>* results) override {
312 SkDEBUGF("Method 'getSentences' is not implemented\n");
313 return false;
314 }
315
316 std::shared_ptr<std::vector<SkUnicode::BidiRegion>> fRegions;
317 ICU4XLocale fLocale;
318 ICU4XDataProvider fDataProvider;
319 ICU4XCaseMapper fCaseMapper;
320 ICU4XCodePointSetData fWhitespaces;
321 ICU4XCodePointSetData fSpaces;
322 ICU4XCodePointSetData fBlanks;
323 ICU4XCodePointSetData fEmoji;
324 ICU4XCodePointSetData fEmojiComponent;
325 ICU4XCodePointSetData fEmojiModifier;
326 ICU4XCodePointSetData fEmojiModifierBase;
327 ICU4XCodePointSetData fRegionalIndicator;
328 ICU4XCodePointSetData fIdeographic;
329 ICU4XCodePointSetData fControls;
330 ICU4XCodePointMapData8 fLineBreaks;
331};
332
334 Position fLastResult;
335 Position fStart;
336 Position fEnd;
337public:
339 Position first() override { SkASSERT(false); return -1; }
340 Position current() override { SkASSERT(false); return -1; }
341 Position next() override { SkASSERT(false); return -1; }
342 Status status() override { SkASSERT(false); return -1; }
343 bool isDone() override { SkASSERT(false); return false; }
344 bool setText(const char utftext8[], int utf8Units) override { SkASSERT(false); return false; }
345 bool setText(const char16_t utftext16[], int utf16Units) override { SkASSERT(false); return false; }
346};
347
349 std::shared_ptr<std::vector<SkUnicode::BidiRegion>> fRegions;
350public:
351 explicit SkBidiIterator_icu4x(std::shared_ptr<std::vector<SkUnicode::BidiRegion>> regions)
352 : fRegions(regions) { }
353 Position getLength() override { return fRegions->size(); }
355 auto found = std::lower_bound(
356 fRegions->begin(),
357 fRegions->end(),
360 return a.start <= b.start && a.end <= b.end;
361 });
362 return found->level;
363 }
364};
365
366std::unique_ptr<SkBidiIterator> SkUnicode_icu4x::makeBidiIterator(const uint16_t text[], int count,
368 if (fRegions) {
369 fRegions->clear();
370 } else {
371 fRegions = std::make_shared<std::vector<SkUnicode::BidiRegion>>();
372 }
373
374 if (this->getBidiRegions(text, count, dir == SkBidiIterator::Direction::kLTR ? TextDirection::kLTR : TextDirection::kRTL, fRegions.get())) {
375 return std::make_unique<SkBidiIterator_icu4x>(fRegions);
376 } else {
377 return nullptr;
378 }
379}
380
381std::unique_ptr<SkBidiIterator> SkUnicode_icu4x::makeBidiIterator(const char text[],
382 int count,
384 if (fRegions) {
385 fRegions->clear();
386 } else {
387 fRegions = std::make_shared<std::vector<SkUnicode::BidiRegion>>();
388 }
389 if (this->getBidiRegions(text, count, dir == SkBidiIterator::Direction::kLTR ? TextDirection::kLTR : TextDirection::kRTL, fRegions.get())) {
390 return std::make_unique<SkBidiIterator_icu4x>(fRegions);
391 } else {
392 return nullptr;
393 }
394}
395
396std::unique_ptr<SkBreakIterator> SkUnicode_icu4x::makeBreakIterator(const char locale[],
397 BreakType breakType) {
398 SkASSERT(false); return nullptr;
399}
400
401std::unique_ptr<SkBreakIterator> SkUnicode_icu4x::makeBreakIterator(BreakType breakType) {
402 SkASSERT(false); return nullptr;
403}
404
405namespace SkUnicodes::ICU4X {
407 return sk_make_sp<SkUnicode_icu4x>();
408}
409}
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
Definition: DM.cpp:213
int count
Definition: FontMgrTest.cpp:50
SkPoint pos
#define SkASSERT(cond)
Definition: SkAssert.h:116
#define SkDEBUGF(...)
Definition: SkDebug.h:24
constexpr int32_t SkToS32(S x)
Definition: SkTo.h:25
int32_t SkUnichar
Definition: SkTypes.h:175
void * getSentences(char *text, int *length)
Definition: bridge.cpp:79
Position getLength() override
Level getLevelAt(Position pos) override
SkBidiIterator_icu4x(std::shared_ptr< std::vector< SkUnicode::BidiRegion > > regions)
int32_t Position
Definition: SkUnicode.h:45
uint8_t Level
Definition: SkUnicode.h:46
Position next() override
Status status() override
bool setText(const char16_t utftext16[], int utf16Units) override
bool setText(const char utftext8[], int utf8Units) override
Position first() override
Position current() override
int32_t Position
Definition: SkUnicode.h:65
int32_t Status
Definition: SkUnicode.h:66
size_t size() const
Definition: SkString.h:131
const char * data() const
Definition: SkString.h:132
bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector< BidiRegion > *results) override
bool isWhitespace(SkUnichar utf8) override
std::unique_ptr< SkBidiIterator > makeBidiIterator(const uint16_t text[], int count, SkBidiIterator::Direction dir) override
bool isHardBreak(SkUnichar utf8) override
bool isEmojiModifierBase(SkUnichar utf8) override
std::unique_ptr< SkBreakIterator > makeBreakIterator(const char locale[], BreakType breakType) override
bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
SkString toUpper(const SkString &str) override
bool isSpace(SkUnichar utf8) override
bool isEmojiComponent(SkUnichar utf8) override
bool isEmojiModifier(SkUnichar utf8) override
bool isRegionalIndicator(SkUnichar utf8) override
bool getBidiRegions(const uint16_t utf16[], int utf16Units, TextDirection dir, std::vector< BidiRegion > *results)
bool getWords(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override
SkString toUpper(const SkString &str, const char *localeStr) override
bool isControl(SkUnichar utf8) override
bool isIdeographic(SkUnichar utf8) override
~SkUnicode_icu4x() override=default
bool isTabulation(SkUnichar utf8) override
bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
bool isEmoji(SkUnichar utf8) override
static std::u16string convertUtf8ToUtf16(const char *utf8, int utf8Units)
Definition: SkUnicode.cpp:32
uint8_t BidiLevel
Definition: SkUnicode.h:99
@ kPartOfWhiteSpaceBreak
Definition: SkUnicode.h:81
@ kTabulation
Definition: SkUnicode.h:87
@ kControl
Definition: SkUnicode.h:86
@ kPartOfIntraWordBreak
Definition: SkUnicode.h:85
static SkString convertUtf16ToUtf8(const char16_t *utf16, int utf16Units)
Definition: SkUnicode.cpp:14
T * push_back_n(int n)
Definition: SkTArray.h:267
static bool b
struct MyStruct a[10]
glong glong end
uint8_t value
GAsyncResult * result
std::u16string text
SK_SPI SkUnichar NextUTF8(const char **ptr, const char *end)
Definition: SkUTF.cpp:118
SKUNICODE_API sk_sp< SkUnicode > Make()
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace Enable an endless trace buffer The default is a ring buffer This is useful when very old events need to viewed For during application launch Memory usage will continue to grow indefinitely however Start app with an specific route defined on the framework flutter assets dir
Definition: switches.h:145
SI auto map(std::index_sequence< I... >, Fn &&fn, const Args &... args) -> skvx::Vec< sizeof...(I), decltype(fn(args[0]...))>
Definition: SkVx.h:680
int_closure create