Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
SkUnicode_icu4x.cpp
Go to the documentation of this file.
1/*
2* Copyright 2023 Google Inc.
3*
4* Use of this source code is governed by a BSD-style license that can be
5* found in the LICENSE file.
6*/
8
17#include "src/base/SkUTF.h"
18
19#include <ICU4XBidi.hpp>
20#include <ICU4XCaseMapper.hpp>
21#include <ICU4XCodePointMapData8.hpp>
22#include <ICU4XCodePointSetData.hpp>
23#include <ICU4XDataProvider.hpp>
24#include <ICU4XGraphemeClusterSegmenter.hpp>
25#include <ICU4XLineSegmenter.hpp>
26#include <ICU4XWordSegmenter.hpp>
27
28#include <algorithm>
29#include <cstdint>
30#include <memory>
31#include <string>
32#include <utility>
33#include <vector>
34
35class SkUnicode_icu4x : public SkUnicode {
36public:
38 fLocale = ICU4XLocale::create_from_string("tr").ok().value();
39 fDataProvider = ICU4XDataProvider::create_compiled();
40 fCaseMapper = ICU4XCaseMapper::create(fDataProvider).ok().value();
41 const auto general = ICU4XCodePointMapData8::load_general_category(fDataProvider).ok().value();
42 fControls = general.get_set_for_value(/*Control*/15);
43 fWhitespaces = general.get_set_for_value(/*SpaceSeparator*/12);
44 fSpaces = general.get_set_for_value(/*SpaceSeparator*/12);
45 // TODO: u_isSpace
46 fBlanks = ICU4XCodePointSetData::load_blank(fDataProvider).ok().value();
47 fEmoji = ICU4XCodePointSetData::load_emoji(fDataProvider).ok().value();
48 fEmojiComponent = ICU4XCodePointSetData::load_emoji_component(fDataProvider).ok().value();
49 fEmojiModifier = ICU4XCodePointSetData::load_emoji_modifier(fDataProvider).ok().value();
50 fEmojiModifierBase = ICU4XCodePointSetData::load_emoji_modifier_base(fDataProvider).ok().value();
51 fEmoji = ICU4XCodePointSetData::load_emoji(fDataProvider).ok().value();
52 fRegionalIndicator = ICU4XCodePointSetData::load_regional_indicator(fDataProvider).ok().value();
53 fIdeographic = ICU4XCodePointSetData::load_ideographic(fDataProvider).ok().value();
54 fLineBreaks = ICU4XCodePointMapData8::load_line_break(fDataProvider).ok().value();
55 }
56
57 ~SkUnicode_icu4x() override = default;
58
59 void reset();
60
61 // SkUnicode properties
62 bool isControl(SkUnichar utf8) override { return fControls.contains(utf8); }
63 bool isWhitespace(SkUnichar utf8) override { return fWhitespaces.contains(utf8); }
64 bool isSpace(SkUnichar utf8) override { return fBlanks.contains(utf8); }
65 bool isHardBreak(SkUnichar utf8) override {
66 auto value = fLineBreaks.get(utf8);
67 return (value == /*MandatoryBreak*/6) ||
68 (value == /*CarriageReturn*/10) ||
69 (value == /*LineFeed*/17) ||
70 (value == /*NextLine*/29);
71 }
72 bool isEmoji(SkUnichar utf8) override { return fEmoji.contains(utf8); }
73 bool isEmojiComponent(SkUnichar utf8) override { return fEmojiComponent.contains(utf8); }
74 bool isEmojiModifierBase(SkUnichar utf8) override { return fEmojiModifierBase.contains(utf8); }
75 bool isEmojiModifier(SkUnichar utf8) override { return fEmojiModifier.contains(utf8); }
76 bool isRegionalIndicator(SkUnichar utf8) override { return fRegionalIndicator.contains(utf8); }
77 bool isIdeographic(SkUnichar utf8) override { return fIdeographic.contains(utf8); }
78
79 // TODO: is there a check for tabulation
80 bool isTabulation(SkUnichar utf8) override {
81 return utf8 == '\t';
82 }
83
84 // For SkShaper
85 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
86 SkBidiIterator::Direction dir) override;
87 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
88 int count,
89 SkBidiIterator::Direction dir) override;
90 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
91 BreakType breakType) override;
92 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
93 // For SkParagraph
94 bool getBidiRegions(const char utf8[],
95 int utf8Units,
96 TextDirection dir,
97 std::vector<BidiRegion>* results) override {
98
99 const auto bidi = ICU4XBidi::create(fDataProvider).ok().value();
100 std::string_view string_view(utf8, utf8Units);
101 auto info = bidi.for_text(string_view, dir == TextDirection::kLTR ? 0 : 1);
102 auto currentLevel = info.level_at(0);
103 size_t start = 0;
104
105 for (size_t i = 1; i < info.size(); i++) {
106 const auto level = info.level_at(i);
107 if (level != currentLevel) {
108 (*results).emplace_back(start, i, currentLevel);
109 currentLevel = level;
110 start = i;
111 }
112 }
113 (*results).emplace_back(start, info.size(), currentLevel);
114 return true;
115 }
116
117 bool getBidiRegions(const uint16_t utf16[],
118 int utf16Units,
119 TextDirection dir,
120 std::vector<BidiRegion>* results) {
121 auto utf8 = SkUnicode::convertUtf16ToUtf8((char16_t*)utf16, utf16Units);
122 return this->getBidiRegions(utf8.data(), utf8.size(), dir, results);
123 }
124
125 bool computeCodeUnitFlags(char utf8[],
126 int utf8Units,
127 bool replaceTabs,
129 results->clear();
130 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
131 this->markLineBreaks(utf8, utf8Units, /*hardLineBreaks=*/false, results);
132 this->markHardLineBreaksHack(utf8, utf8Units, results);
133 this->markGraphemes(utf8, utf8Units, results);
134 this->markCharacters(utf8, utf8Units, replaceTabs, results);
135 return true;
136 }
137
138 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
140 SkASSERT(false);
141 return true;
142 }
143
144 bool getWords(const char utf8[],
145 int utf8Units,
146 const char* locale,
147 std::vector<Position>* results) override {
148 auto utf16 = SkUnicode::convertUtf8ToUtf16(utf8, utf8Units);
149 const diplomat::span<const uint16_t> span((uint16_t*)utf16.data(), utf16.size());
150 const auto segmenter = ICU4XWordSegmenter::create_dictionary(fDataProvider).ok().value();
151 auto iterator = segmenter.segment_utf16(span);
152 while (true) {
153 int32_t breakpoint = iterator.next();
154 if (breakpoint == -1) {
155 break;
156 }
157 results->emplace_back(breakpoint);
158 }
159 return true;
160 }
161
162 SkString toUpper(const SkString& str) override {
163 return toUpper(str, "und");
164 }
165
166 SkString toUpper(const SkString& str, const char* localeStr) override {
167 auto locale = ICU4XLocale::create_from_string(localeStr).ok().value();
168 std::string std_string(str.data(), str.size());
169 // TODO: upper case
170 auto result = fCaseMapper.uppercase(std_string, locale).ok().value();
171 return SkString(result.data(), result.size());
172 }
173
174 void reorderVisual(const BidiLevel runLevels[],
175 int levelsCount,
176 int32_t logicalFromVisual[]) override {
177
178 const auto bidi = ICU4XBidi::create(fDataProvider).ok().value();
179 const diplomat::span<const uint8_t> levels(&runLevels[0], levelsCount);
180 auto map = bidi.reorder_visual(levels);
181 SkASSERT(levelsCount == map.len());
182 std::vector<int32_t> results;
183 for (size_t i = 0; i < map.len(); i++) {
184 auto level = map.get(i);
185 logicalFromVisual[i] = SkToS32(level);
186 }
187 }
188
189private:
192
193 bool markHardLineBreaksHack(char utf8[],
194 int utf8Units,
196 const char* end = utf8 + utf8Units;
197 const char* ch = utf8;
198 while (ch < end) {
199 auto unichar = SkUTF::NextUTF8(&ch, end);
200 if (this->isHardBreak(unichar)) {
201 (*results)[ch - utf8] |= CodeUnitFlags::kHardLineBreakBefore;
202 }
203 }
204 return true;
205 }
206
207 SkUnichar getChar32(const char* pointer, const char* end) {
208 if (pointer < end) {
209 return SkUTF::NextUTF8(&pointer, end);
210 }
211 return -1;
212 }
213
214 bool markLineBreaks(char utf8[],
215 int utf8Units,
216 bool hardLineBreaks,
218 if (utf8Units == 0) {
219 return true;
220 }
221 // TODO: Remove hard line break hack and detect it here
222 SkASSERT(!hardLineBreaks);
223 const auto lineBreakingOptions = hardLineBreaks
224 ? ICU4XLineBreakOptionsV1{ICU4XLineBreakStrictness::Strict, ICU4XLineBreakWordOption::Normal}
225 : ICU4XLineBreakOptionsV1{ICU4XLineBreakStrictness::Loose, ICU4XLineBreakWordOption::Normal};
226 const auto segmenter = ICU4XLineSegmenter::create_auto_with_options_v1(fDataProvider, lineBreakingOptions).ok().value();
227 std::string_view string_view(utf8, utf8Units);
228 auto iterator = segmenter.segment_utf8(string_view);
229
230 while (true) {
231 int32_t lineBreak = iterator.next();
232 if (lineBreak == -1) {
233 break;
234 }
235 if (hardLineBreaks) {
236 (*results)[lineBreak] |= CodeUnitFlags::kHardLineBreakBefore;
237 } else {
238 (*results)[lineBreak] |= CodeUnitFlags::kSoftLineBreakBefore;
239 }
240 }
241 if (!hardLineBreaks) {
243 (*results)[utf8Units] |= CodeUnitFlags::kSoftLineBreakBefore;
244 }
245 return true;
246 }
247
248 bool markGraphemes(const char utf8[],
249 int utf8Units,
251 const auto segmenter = ICU4XGraphemeClusterSegmenter::create(fDataProvider).ok().value();
252 std::string_view string_view(utf8, utf8Units);
253 auto iterator = segmenter.segment_utf8(string_view);
254 while (true) {
255 int32_t graphemeStart = iterator.next();
256 if (graphemeStart == -1) {
257 break;
258 }
259 (*results)[graphemeStart] |= CodeUnitFlags::kGraphemeStart;
260 }
261 return true;
262 }
263
264 bool markCharacters(char utf8[],
265 int utf8Units,
266 bool replaceTabs,
268 const char* current = utf8;
269 const char* end = utf8 + utf8Units;
270 while (current < end) {
271 auto before = current - utf8;
272 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
273 if (unichar < 0) unichar = 0xFFFD;
274 auto after = current - utf8;
275 if (replaceTabs && SkUnicode_icu4x::isTabulation(unichar)) {
276 results->at(before) |= SkUnicode::kTabulation;
277 if (replaceTabs) {
278 unichar = ' ';
279 utf8[before] = ' ';
280 }
281 }
282 for (auto i = before; i < after; ++i) {
283 bool isHardBreak = this->isHardBreak(unichar);
284 bool isSpace = this->isSpace(unichar) || isHardBreak;
285 bool isWhitespace = this->isWhitespace(unichar) || isHardBreak;
286 if (isSpace) {
288 }
289 if (isWhitespace) {
291 }
292 if (this->isControl(unichar)) {
293 results->at(i) |= SkUnicode::kControl;
294 }
295 }
296 }
297 return true;
298 }
299
300 bool getUtf8Words(const char utf8[],
301 int utf8Units,
302 const char* locale,
303 std::vector<Position>* results) override {
304 SkDEBUGF("Method 'getUtf8Words' is not implemented\n");
305 return false;
306 }
307
308 bool getSentences(const char utf8[],
309 int utf8Units,
310 const char* locale,
311 std::vector<SkUnicode::Position>* results) override {
312 SkDEBUGF("Method 'getSentences' is not implemented\n");
313 return false;
314 }
315
316 std::shared_ptr<std::vector<SkUnicode::BidiRegion>> fRegions;
317 ICU4XLocale fLocale;
318 ICU4XDataProvider fDataProvider;
319 ICU4XCaseMapper fCaseMapper;
320 ICU4XCodePointSetData fWhitespaces;
321 ICU4XCodePointSetData fSpaces;
322 ICU4XCodePointSetData fBlanks;
323 ICU4XCodePointSetData fEmoji;
324 ICU4XCodePointSetData fEmojiComponent;
325 ICU4XCodePointSetData fEmojiModifier;
326 ICU4XCodePointSetData fEmojiModifierBase;
327 ICU4XCodePointSetData fRegionalIndicator;
328 ICU4XCodePointSetData fIdeographic;
329 ICU4XCodePointSetData fControls;
330 ICU4XCodePointMapData8 fLineBreaks;
331};
332
334 Position fLastResult;
335 Position fStart;
336 Position fEnd;
337public:
339 Position first() override { SkASSERT(false); return -1; }
340 Position current() override { SkASSERT(false); return -1; }
341 Position next() override { SkASSERT(false); return -1; }
342 Status status() override { SkASSERT(false); return -1; }
343 bool isDone() override { SkASSERT(false); return false; }
344 bool setText(const char utftext8[], int utf8Units) override { SkASSERT(false); return false; }
345 bool setText(const char16_t utftext16[], int utf16Units) override { SkASSERT(false); return false; }
346};
347
349 std::shared_ptr<std::vector<SkUnicode::BidiRegion>> fRegions;
350public:
351 explicit SkBidiIterator_icu4x(std::shared_ptr<std::vector<SkUnicode::BidiRegion>> regions)
352 : fRegions(regions) { }
353 Position getLength() override { return fRegions->size(); }
355 auto found = std::lower_bound(
356 fRegions->begin(),
357 fRegions->end(),
360 return a.start <= b.start && a.end <= b.end;
361 });
362 return found->level;
363 }
364};
365
366std::unique_ptr<SkBidiIterator> SkUnicode_icu4x::makeBidiIterator(const uint16_t text[], int count,
368 if (fRegions) {
369 fRegions->clear();
370 } else {
371 fRegions = std::make_shared<std::vector<SkUnicode::BidiRegion>>();
372 }
373
375 return std::make_unique<SkBidiIterator_icu4x>(fRegions);
376 } else {
377 return nullptr;
378 }
379}
380
381std::unique_ptr<SkBidiIterator> SkUnicode_icu4x::makeBidiIterator(const char text[],
382 int count,
384 if (fRegions) {
385 fRegions->clear();
386 } else {
387 fRegions = std::make_shared<std::vector<SkUnicode::BidiRegion>>();
388 }
390 return std::make_unique<SkBidiIterator_icu4x>(fRegions);
391 } else {
392 return nullptr;
393 }
394}
395
396std::unique_ptr<SkBreakIterator> SkUnicode_icu4x::makeBreakIterator(const char locale[],
397 BreakType breakType) {
398 SkASSERT(false); return nullptr;
399}
400
401std::unique_ptr<SkBreakIterator> SkUnicode_icu4x::makeBreakIterator(BreakType breakType) {
402 SkASSERT(false); return nullptr;
403}
404
405namespace SkUnicodes::ICU4X {
407 return sk_make_sp<SkUnicode_icu4x>();
408}
409}
static void info(const char *fmt,...) SK_PRINTF_LIKE(1
Definition DM.cpp:213
int count
SkPoint pos
#define SkASSERT(cond)
Definition SkAssert.h:116
#define SkDEBUGF(...)
Definition SkDebug.h:24
constexpr int32_t SkToS32(S x)
Definition SkTo.h:25
int32_t SkUnichar
Definition SkTypes.h:175
Position getLength() override
Level getLevelAt(Position pos) override
SkBidiIterator_icu4x(std::shared_ptr< std::vector< SkUnicode::BidiRegion > > regions)
int32_t Position
Definition SkUnicode.h:45
uint8_t Level
Definition SkUnicode.h:46
Position next() override
Status status() override
bool setText(const char16_t utftext16[], int utf16Units) override
bool setText(const char utftext8[], int utf8Units) override
Position first() override
Position current() override
int32_t Position
Definition SkUnicode.h:65
int32_t Status
Definition SkUnicode.h:66
size_t size() const
Definition SkString.h:131
const char * data() const
Definition SkString.h:132
bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector< BidiRegion > *results) override
bool isWhitespace(SkUnichar utf8) override
std::unique_ptr< SkBidiIterator > makeBidiIterator(const uint16_t text[], int count, SkBidiIterator::Direction dir) override
bool isHardBreak(SkUnichar utf8) override
bool isEmojiModifierBase(SkUnichar utf8) override
std::unique_ptr< SkBreakIterator > makeBreakIterator(const char locale[], BreakType breakType) override
bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
SkString toUpper(const SkString &str) override
bool getUtf8Words(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
bool isSpace(SkUnichar utf8) override
bool isEmojiComponent(SkUnichar utf8) override
bool isEmojiModifier(SkUnichar utf8) override
bool isRegionalIndicator(SkUnichar utf8) override
bool getBidiRegions(const uint16_t utf16[], int utf16Units, TextDirection dir, std::vector< BidiRegion > *results)
bool getWords(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override
bool getSentences(const char utf8[], int utf8Units, const char *locale, std::vector< SkUnicode::Position > *results) override
SkString toUpper(const SkString &str, const char *localeStr) override
bool isControl(SkUnichar utf8) override
bool isIdeographic(SkUnichar utf8) override
~SkUnicode_icu4x() override=default
bool isTabulation(SkUnichar utf8) override
bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
bool isEmoji(SkUnichar utf8) override
static std::u16string convertUtf8ToUtf16(const char *utf8, int utf8Units)
Definition SkUnicode.cpp:32
uint8_t BidiLevel
Definition SkUnicode.h:99
@ kNoCodeUnitFlag
Definition SkUnicode.h:80
@ kPartOfWhiteSpaceBreak
Definition SkUnicode.h:81
@ kHardLineBreakBefore
Definition SkUnicode.h:84
@ kTabulation
Definition SkUnicode.h:87
@ kGraphemeStart
Definition SkUnicode.h:82
@ kSoftLineBreakBefore
Definition SkUnicode.h:83
@ kPartOfIntraWordBreak
Definition SkUnicode.h:85
static SkString convertUtf16ToUtf8(const char16_t *utf16, int utf16Units)
Definition SkUnicode.cpp:14
T * push_back_n(int n)
Definition SkTArray.h:262
static bool b
struct MyStruct a[10]
glong glong end
uint8_t value
GAsyncResult * result
std::u16string text
SK_SPI SkUnichar NextUTF8(const char **ptr, const char *end)
Definition SkUTF.cpp:118
SKUNICODE_API sk_sp< SkUnicode > Make()