Flutter Engine
The Flutter Engine
SkUnicode_libgrapheme.cpp
Go to the documentation of this file.
1/*
2* Copyright 2022 Google Inc.
3*
4* Use of this source code is governed by a BSD-style license that can be
5* found in the LICENSE file.
6*/
7
9
10#include "include/core/SkSpan.h"
19
20extern "C" {
21#include <grapheme.h>
22}
23#include <array>
24#include <memory>
25#include <vector>
26#include <unordered_map>
27
28using namespace skia_private;
29
31public:
33
34 ~SkUnicode_libgrapheme() override = default;
35
36 // For SkShaper
37 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
39 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
40 int count,
42 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
43 BreakType breakType) override;
44 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
45 bool getBidiRegions(const char utf8[],
46 int utf8Units,
48 std::vector<BidiRegion>* results) override {
49 return fBidiFact->ExtractBidi(utf8, utf8Units, dir, results);
50 }
51
52 bool getSentences(const char utf8[],
53 int utf8Units,
54 const char* locale,
55 std::vector<SkUnicode::Position>* results) override {
56 SkDEBUGF("Method 'getSentences' is not implemented\n");
57 return false;
58 }
59
61 int utf8Units,
62 bool replaceTabs,
64 results->clear();
65 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
66
67 size_t lineBreak = 0;
68 (*results)[lineBreak] |= CodeUnitFlags::kSoftLineBreakBefore;
69 while (lineBreak < utf8Units) {
70 lineBreak += grapheme_next_line_break_utf8(utf8 + lineBreak, utf8Units - lineBreak);
71 // Check if the previous code unit is a hard break.
72 auto codePoint = utf8[lineBreak - 1];
73 (*results)[lineBreak] |= this->isHardBreak(codePoint)
74 ? CodeUnitFlags::kHardLineBreakBefore
75 : CodeUnitFlags::kSoftLineBreakBefore;
76 }
77 (*results)[utf8Units] |= CodeUnitFlags::kSoftLineBreakBefore;
78
79 size_t graphemeBreak = 0;
80 (*results)[graphemeBreak] |= CodeUnitFlags::kGraphemeStart;
81 while (graphemeBreak < utf8Units) {
82 graphemeBreak += grapheme_next_character_break_utf8(utf8 + graphemeBreak, utf8Units - graphemeBreak);
83 (*results)[graphemeBreak] |= CodeUnitFlags::kGraphemeStart;
84 }
85
86 const char* current = utf8;
87 const char* end = utf8 + utf8Units;
88 while (current < end) {
89 auto before = current - utf8;
90 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
91 if (unichar < 0) unichar = 0xFFFD;
92 auto after = current - utf8;
93 if (replaceTabs && this->isTabulation(unichar)) {
94 results->at(before) |= SkUnicode::kTabulation;
95 if (replaceTabs) {
96 unichar = ' ';
97 utf8[before] = ' ';
98 }
99 }
100 for (auto i = before; i < after; ++i) {
101 if (this->isSpace(unichar)) {
103 }
104 if (this->isWhitespace(unichar)) {
106 }
107 if (this->isControl(unichar)) {
108 results->at(i) |= SkUnicode::kControl;
109 }
110 }
111 }
112 return true;
113 }
114
115 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
117 SkASSERT(false);
118 return false;
119 }
120
121 bool getUtf8To16Mapping(const char utf8[], int utf8Units, std::unordered_map<Position, Position>* results) {
122 int utf16Units = 0;
123 const char* ptr8 = utf8;
124 const char* end8 = utf8 + utf8Units;
125 while (ptr8 < end8) {
126 results->emplace(ptr8 - utf8, utf16Units);
127 SkUnichar uni = SkUTF::NextUTF8(&ptr8, end8);
128 if (uni < 0) {
129 return false;
130 }
131
132 uint16_t utf16[2];
133 size_t count = SkUTF::ToUTF16(uni, utf16);
134 if (count == 0) {
135 return false;
136 }
137 utf16Units += count;
138 }
139 results->emplace(utf8Units, utf16Units);
140 return true;
141 }
142
143 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
144 std::unordered_map<Position, Position> mapping;
145 if (!getUtf8To16Mapping(utf8, utf8Units, &mapping)) {
146 return false;
147 }
148 size_t wordBreak = 0;
149 while (wordBreak < utf8Units) {
150 wordBreak += grapheme_next_word_break_utf8(utf8 + wordBreak, utf8Units - wordBreak);
151 if (mapping.find(wordBreak) == mapping.end()) {
152 return false;
153 }
154 results->emplace_back(mapping[wordBreak]);
155 }
156 return true;
157 }
158
159 bool getUtf8Words(const char utf8[],
160 int utf8Units,
161 const char* locale,
162 std::vector<Position>* results) override {
163 // Let's consider sort line breaks, whitespaces and CJK codepoints instead
164 std::vector<CodeUnitFlags> breaks(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
165
166 size_t lineBreak = 0;
167 breaks[lineBreak] = CodeUnitFlags::kSoftLineBreakBefore;
168 while (lineBreak < utf8Units) {
169 lineBreak += grapheme_next_line_break_utf8(utf8 + lineBreak, utf8Units - lineBreak);
170 breaks[lineBreak] = CodeUnitFlags::kSoftLineBreakBefore;
171 }
172 breaks[lineBreak] = CodeUnitFlags::kSoftLineBreakBefore;
173
174 const char* current = utf8;
175 const char* end = utf8 + utf8Units;
176 while (current < end) {
177 auto index = current - utf8;
178 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
179 if (this->isWhitespace(unichar)) {
180 breaks[index] = CodeUnitFlags::kPartOfWhiteSpaceBreak;
181 } else if (this->isIdeographic(unichar)) {
182 breaks[index] = CodeUnitFlags::kIdeographic;
183 }
184 }
185
186 bool whitespaces = false;
187 for (size_t i = 0; i < breaks.size(); ++i) {
188 auto b = breaks[i];
189 if (b == CodeUnitFlags::kSoftLineBreakBefore) {
190 results->emplace_back(i);
191 whitespaces = false;
192 } else if (b == CodeUnitFlags::kIdeographic) {
193 results->emplace_back(i);
194 whitespaces = false;
195 } else if (b == CodeUnitFlags::kPartOfWhiteSpaceBreak) {
196 if (!whitespaces) {
197 results->emplace_back(i);
198 }
199 whitespaces = true;
200 } else {
201 whitespaces = false;
202 }
203 }
204
205 return true;
206
207 /*
208 size_t wordBreak = 0;
209 while (wordBreak < utf8Units) {
210 wordBreak += grapheme_next_word_break_utf8(utf8 + wordBreak, utf8Units - wordBreak);
211 results->emplace_back(wordBreak);
212 }
213 return true;
214 */
215 }
216
217 SkString toUpper(const SkString& str) override {
218 return this->toUpper(str, nullptr);
219 }
220
221 SkString toUpper(const SkString& str, const char* locale) override {
222 SkString res(" ", str.size());
223 grapheme_to_uppercase_utf8(str.data(), str.size(), res.data(), res.size());
224 return res;
225 }
226
227 void reorderVisual(const BidiLevel runLevels[],
228 int levelsCount,
229 int32_t logicalFromVisual[]) override {
230 fBidiFact->bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
231 }
232private:
234
235 sk_sp<SkBidiFactory> fBidiFact = sk_make_sp<SkBidiSubsetFactory>();
236};
237
239 SkUnicode_libgrapheme* fUnicode;
240 std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
241 Position fLineBreakIndex;
242 static constexpr const int kDone = -1;
243public:
245 Position first() override
246 { return fLineBreaks[(fLineBreakIndex = 0)].pos; }
247 Position current() override
248 { return fLineBreaks[fLineBreakIndex].pos; }
249 Position next() override
250 { return fLineBreaks[++fLineBreakIndex].pos; }
251 Status status() override {
252 return fLineBreaks[fLineBreakIndex].breakType ==
254 ? SkUnicode::CodeUnitFlags::kHardLineBreakBefore
255 : SkUnicode::CodeUnitFlags::kSoftLineBreakBefore;
256 }
257 bool isDone() override { return fLineBreaks[fLineBreakIndex].pos == kDone; }
258 bool setText(const char utftext8[], int utf8Units) override {
259 fLineBreaks.clear();
260 size_t lineBreak = 0;
261 // first() must always go to the beginning of the string.
262 fLineBreaks.emplace_back(0, SkUnicode::LineBreakType::kHardLineBreak);
263 for (size_t pos = 0; pos < utf8Units;) {
264 pos += grapheme_next_line_break_utf8(utftext8 + pos, utf8Units - pos);
265 auto codePoint = utftext8[pos];
266 fLineBreaks.emplace_back(pos,
267 fUnicode->isHardBreak(codePoint)
270 }
271 // There is always an "end" which signals "done".
272 fLineBreaks.emplace_back(kDone, SkUnicode::LineBreakType::kHardLineBreak);
273 fLineBreakIndex = 0;
274 return true;
275 }
276 bool setText(const char16_t utftext16[], int utf16Units) override {
277 SkASSERT(false);
278 return false;
279 }
280};
281
282std::unique_ptr<SkBidiIterator> SkUnicode_libgrapheme::makeBidiIterator(const uint16_t text[], int count,
284 return fBidiFact->MakeIterator(text, count, dir);
285}
286std::unique_ptr<SkBidiIterator> SkUnicode_libgrapheme::makeBidiIterator(const char text[],
287 int count,
289 return fBidiFact->MakeIterator(text, count, dir);
290}
291std::unique_ptr<SkBreakIterator> SkUnicode_libgrapheme::makeBreakIterator(const char locale[],
292 BreakType breakType) {
293 return std::make_unique<SkBreakIterator_libgrapheme>(this);
294}
295std::unique_ptr<SkBreakIterator> SkUnicode_libgrapheme::makeBreakIterator(BreakType breakType) {
296 return std::make_unique<SkBreakIterator_libgrapheme>(this);
297}
298
299namespace SkUnicodes::Libgrapheme {
301 return sk_make_sp<SkUnicode_libgrapheme>();
302}
303}
int count
Definition: FontMgrTest.cpp:50
SkPoint pos
#define SkASSERT(cond)
Definition: SkAssert.h:116
#define SkDEBUGF(...)
Definition: SkDebug.h:24
int32_t SkUnichar
Definition: SkTypes.h:175
bool ExtractBidi(const char utf8[], int utf8Units, SkUnicode::TextDirection dir, std::vector< SkUnicode::BidiRegion > *bidiRegions) const
virtual void bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) const =0
std::unique_ptr< SkBidiIterator > MakeIterator(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir) const
bool setText(const char16_t utftext16[], int utf16Units) override
SkBreakIterator_libgrapheme(SkUnicode_libgrapheme *unicode)
bool setText(const char utftext8[], int utf8Units) override
int32_t Position
Definition: SkUnicode.h:65
int32_t Status
Definition: SkUnicode.h:66
size_t size() const
Definition: SkString.h:131
const char * data() const
Definition: SkString.h:132
bool isTabulation(SkUnichar utf8) override
bool isIdeographic(SkUnichar utf8) override
bool isSpace(SkUnichar utf8) override
bool isHardBreak(SkUnichar utf8) override
bool isControl(SkUnichar utf8) override
bool isWhitespace(SkUnichar utf8) override
bool getUtf8Words(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
bool getUtf8To16Mapping(const char utf8[], int utf8Units, std::unordered_map< Position, Position > *results)
SkString toUpper(const SkString &str) override
SkString toUpper(const SkString &str, const char *locale) override
void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override
bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
~SkUnicode_libgrapheme() override=default
std::unique_ptr< SkBidiIterator > makeBidiIterator(const uint16_t text[], int count, SkBidiIterator::Direction dir) override
bool getWords(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
bool getSentences(const char utf8[], int utf8Units, const char *locale, std::vector< SkUnicode::Position > *results) override
bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector< BidiRegion > *results) override
std::unique_ptr< SkBreakIterator > makeBreakIterator(const char locale[], BreakType breakType) override
uint8_t BidiLevel
Definition: SkUnicode.h:99
@ kPartOfWhiteSpaceBreak
Definition: SkUnicode.h:81
@ kTabulation
Definition: SkUnicode.h:87
@ kControl
Definition: SkUnicode.h:86
@ kPartOfIntraWordBreak
Definition: SkUnicode.h:85
T * push_back_n(int n)
Definition: SkTArray.h:267
static bool b
glong glong end
std::u16string text
SK_SPI SkUnichar NextUTF8(const char **ptr, const char *end)
Definition: SkUTF.cpp:118
SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2]=nullptr)
Definition: SkUTF.cpp:243
SKUNICODE_API sk_sp< SkUnicode > Make()
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace Enable an endless trace buffer The default is a ring buffer This is useful when very old events need to viewed For during application launch Memory usage will continue to grow indefinitely however Start app with an specific route defined on the framework flutter assets dir
Definition: switches.h:145
@ kIdeographic
Definition: text_baseline.h:24