Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
SkUnicode_libgrapheme.cpp
Go to the documentation of this file.
1/*
2* Copyright 2022 Google Inc.
3*
4* Use of this source code is governed by a BSD-style license that can be
5* found in the LICENSE file.
6*/
7
9
10#include "include/core/SkSpan.h"
19
20extern "C" {
21#include <grapheme.h>
22}
23#include <array>
24#include <memory>
25#include <vector>
26#include <unordered_map>
27
28using namespace skia_private;
29
31public:
33
34 ~SkUnicode_libgrapheme() override = default;
35
36 // For SkShaper
37 std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
38 SkBidiIterator::Direction dir) override;
39 std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
40 int count,
41 SkBidiIterator::Direction dir) override;
42 std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
43 BreakType breakType) override;
44 std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
45 bool getBidiRegions(const char utf8[],
46 int utf8Units,
47 TextDirection dir,
48 std::vector<BidiRegion>* results) override {
49 return fBidiFact->ExtractBidi(utf8, utf8Units, dir, results);
50 }
51
52 bool getSentences(const char utf8[],
53 int utf8Units,
54 const char* locale,
55 std::vector<SkUnicode::Position>* results) override {
56 SkDEBUGF("Method 'getSentences' is not implemented\n");
57 return false;
58 }
59
60 bool computeCodeUnitFlags(char utf8[],
61 int utf8Units,
62 bool replaceTabs,
64 results->clear();
65 results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
66
67 size_t lineBreak = 0;
68 (*results)[lineBreak] |= CodeUnitFlags::kSoftLineBreakBefore;
69 while (lineBreak < utf8Units) {
70 lineBreak += grapheme_next_line_break_utf8(utf8 + lineBreak, utf8Units - lineBreak);
71 // Check if the previous code unit is a hard break.
72 auto codePoint = utf8[lineBreak - 1];
73 (*results)[lineBreak] |= this->isHardBreak(codePoint)
76 }
77 (*results)[utf8Units] |= CodeUnitFlags::kSoftLineBreakBefore;
78
79 size_t graphemeBreak = 0;
80 (*results)[graphemeBreak] |= CodeUnitFlags::kGraphemeStart;
81 while (graphemeBreak < utf8Units) {
82 graphemeBreak += grapheme_next_character_break_utf8(utf8 + graphemeBreak, utf8Units - graphemeBreak);
83 (*results)[graphemeBreak] |= CodeUnitFlags::kGraphemeStart;
84 }
85
86 const char* current = utf8;
87 const char* end = utf8 + utf8Units;
88 while (current < end) {
89 auto before = current - utf8;
90 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
91 if (unichar < 0) unichar = 0xFFFD;
92 auto after = current - utf8;
93 if (replaceTabs && this->isTabulation(unichar)) {
94 results->at(before) |= SkUnicode::kTabulation;
95 if (replaceTabs) {
96 unichar = ' ';
97 utf8[before] = ' ';
98 }
99 }
100 for (auto i = before; i < after; ++i) {
101 if (this->isSpace(unichar)) {
103 }
104 if (this->isWhitespace(unichar)) {
106 }
107 if (this->isControl(unichar)) {
108 results->at(i) |= SkUnicode::kControl;
109 }
110 }
111 }
112 return true;
113 }
114
115 bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
117 SkASSERT(false);
118 return false;
119 }
120
121 bool getUtf8To16Mapping(const char utf8[], int utf8Units, std::unordered_map<Position, Position>* results) {
122 int utf16Units = 0;
123 const char* ptr8 = utf8;
124 const char* end8 = utf8 + utf8Units;
125 while (ptr8 < end8) {
126 results->emplace(ptr8 - utf8, utf16Units);
127 SkUnichar uni = SkUTF::NextUTF8(&ptr8, end8);
128 if (uni < 0) {
129 return false;
130 }
131
132 uint16_t utf16[2];
133 size_t count = SkUTF::ToUTF16(uni, utf16);
134 if (count == 0) {
135 return false;
136 }
137 utf16Units += count;
138 }
139 results->emplace(utf8Units, utf16Units);
140 return true;
141 }
142
143 bool getWords(const char utf8[], int utf8Units, const char* locale, std::vector<Position>* results) override {
144 std::unordered_map<Position, Position> mapping;
145 if (!getUtf8To16Mapping(utf8, utf8Units, &mapping)) {
146 return false;
147 }
148 size_t wordBreak = 0;
149 while (wordBreak < utf8Units) {
150 wordBreak += grapheme_next_word_break_utf8(utf8 + wordBreak, utf8Units - wordBreak);
151 if (mapping.find(wordBreak) == mapping.end()) {
152 return false;
153 }
154 results->emplace_back(mapping[wordBreak]);
155 }
156 return true;
157 }
158
159 bool getUtf8Words(const char utf8[],
160 int utf8Units,
161 const char* locale,
162 std::vector<Position>* results) override {
163 // Let's consider sort line breaks, whitespaces and CJK codepoints instead
164 std::vector<CodeUnitFlags> breaks(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
165
166 size_t lineBreak = 0;
167 breaks[lineBreak] = CodeUnitFlags::kSoftLineBreakBefore;
168 while (lineBreak < utf8Units) {
169 lineBreak += grapheme_next_line_break_utf8(utf8 + lineBreak, utf8Units - lineBreak);
170 breaks[lineBreak] = CodeUnitFlags::kSoftLineBreakBefore;
171 }
172 breaks[lineBreak] = CodeUnitFlags::kSoftLineBreakBefore;
173
174 const char* current = utf8;
175 const char* end = utf8 + utf8Units;
176 while (current < end) {
177 auto index = current - utf8;
178 SkUnichar unichar = SkUTF::NextUTF8(&current, end);
179 if (this->isWhitespace(unichar)) {
181 } else if (this->isIdeographic(unichar)) {
182 breaks[index] = CodeUnitFlags::kIdeographic;
183 }
184 }
185
186 bool whitespaces = false;
187 for (size_t i = 0; i < breaks.size(); ++i) {
188 auto b = breaks[i];
190 results->emplace_back(i);
191 whitespaces = false;
192 } else if (b == CodeUnitFlags::kIdeographic) {
193 results->emplace_back(i);
194 whitespaces = false;
196 if (!whitespaces) {
197 results->emplace_back(i);
198 }
199 whitespaces = true;
200 } else {
201 whitespaces = false;
202 }
203 }
204
205 return true;
206
207 /*
208 size_t wordBreak = 0;
209 while (wordBreak < utf8Units) {
210 wordBreak += grapheme_next_word_break_utf8(utf8 + wordBreak, utf8Units - wordBreak);
211 results->emplace_back(wordBreak);
212 }
213 return true;
214 */
215 }
216
217 SkString toUpper(const SkString& str) override {
218 return this->toUpper(str, nullptr);
219 }
220
221 SkString toUpper(const SkString& str, const char* locale) override {
222 SkString res(" ", str.size());
223 grapheme_to_uppercase_utf8(str.data(), str.size(), res.data(), res.size());
224 return res;
225 }
226
227 void reorderVisual(const BidiLevel runLevels[],
228 int levelsCount,
229 int32_t logicalFromVisual[]) override {
230 fBidiFact->bidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
231 }
232private:
234
235 sk_sp<SkBidiFactory> fBidiFact = sk_make_sp<SkBidiSubsetFactory>();
236};
237
239 SkUnicode_libgrapheme* fUnicode;
240 std::vector<SkUnicode::LineBreakBefore> fLineBreaks;
241 Position fLineBreakIndex;
242 static constexpr const int kDone = -1;
243public:
244 explicit SkBreakIterator_libgrapheme(SkUnicode_libgrapheme* unicode) : fUnicode(unicode) { }
245 Position first() override
246 { return fLineBreaks[(fLineBreakIndex = 0)].pos; }
247 Position current() override
248 { return fLineBreaks[fLineBreakIndex].pos; }
249 Position next() override
250 { return fLineBreaks[++fLineBreakIndex].pos; }
257 bool isDone() override { return fLineBreaks[fLineBreakIndex].pos == kDone; }
258 bool setText(const char utftext8[], int utf8Units) override {
259 fLineBreaks.clear();
260 size_t lineBreak = 0;
261 // first() must always go to the beginning of the string.
262 fLineBreaks.emplace_back(0, SkUnicode::LineBreakType::kHardLineBreak);
263 for (size_t pos = 0; pos < utf8Units;) {
264 pos += grapheme_next_line_break_utf8(utftext8 + pos, utf8Units - pos);
265 auto codePoint = utftext8[pos];
266 fLineBreaks.emplace_back(pos,
267 fUnicode->isHardBreak(codePoint)
270 }
271 // There is always an "end" which signals "done".
272 fLineBreaks.emplace_back(kDone, SkUnicode::LineBreakType::kHardLineBreak);
273 fLineBreakIndex = 0;
274 return true;
275 }
276 bool setText(const char16_t utftext16[], int utf16Units) override {
277 SkASSERT(false);
278 return false;
279 }
280};
281
282std::unique_ptr<SkBidiIterator> SkUnicode_libgrapheme::makeBidiIterator(const uint16_t text[], int count,
284 return fBidiFact->MakeIterator(text, count, dir);
285}
286std::unique_ptr<SkBidiIterator> SkUnicode_libgrapheme::makeBidiIterator(const char text[],
287 int count,
289 return fBidiFact->MakeIterator(text, count, dir);
290}
291std::unique_ptr<SkBreakIterator> SkUnicode_libgrapheme::makeBreakIterator(const char locale[],
292 BreakType breakType) {
293 return std::make_unique<SkBreakIterator_libgrapheme>(this);
294}
295std::unique_ptr<SkBreakIterator> SkUnicode_libgrapheme::makeBreakIterator(BreakType breakType) {
296 return std::make_unique<SkBreakIterator_libgrapheme>(this);
297}
298
299namespace SkUnicodes::Libgrapheme {
301 return sk_make_sp<SkUnicode_libgrapheme>();
302}
303}
int count
SkPoint pos
#define SkASSERT(cond)
Definition SkAssert.h:116
#define SkDEBUGF(...)
Definition SkDebug.h:24
int32_t SkUnichar
Definition SkTypes.h:175
bool ExtractBidi(const char utf8[], int utf8Units, SkUnicode::TextDirection dir, std::vector< SkUnicode::BidiRegion > *bidiRegions) const
virtual void bidi_reorderVisual(const SkUnicode::BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) const =0
std::unique_ptr< SkBidiIterator > MakeIterator(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir) const
bool setText(const char16_t utftext16[], int utf16Units) override
SkBreakIterator_libgrapheme(SkUnicode_libgrapheme *unicode)
bool setText(const char utftext8[], int utf8Units) override
int32_t Position
Definition SkUnicode.h:65
int32_t Status
Definition SkUnicode.h:66
size_t size() const
Definition SkString.h:131
const char * data() const
Definition SkString.h:132
bool isTabulation(SkUnichar utf8) override
bool isIdeographic(SkUnichar utf8) override
bool isSpace(SkUnichar utf8) override
bool isHardBreak(SkUnichar utf8) override
bool isControl(SkUnichar utf8) override
bool isWhitespace(SkUnichar utf8) override
bool getUtf8Words(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
bool computeCodeUnitFlags(char utf8[], int utf8Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
bool getUtf8To16Mapping(const char utf8[], int utf8Units, std::unordered_map< Position, Position > *results)
SkString toUpper(const SkString &str) override
SkString toUpper(const SkString &str, const char *locale) override
void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override
bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs, skia_private::TArray< SkUnicode::CodeUnitFlags, true > *results) override
~SkUnicode_libgrapheme() override=default
std::unique_ptr< SkBidiIterator > makeBidiIterator(const uint16_t text[], int count, SkBidiIterator::Direction dir) override
bool getWords(const char utf8[], int utf8Units, const char *locale, std::vector< Position > *results) override
bool getSentences(const char utf8[], int utf8Units, const char *locale, std::vector< SkUnicode::Position > *results) override
bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector< BidiRegion > *results) override
std::unique_ptr< SkBreakIterator > makeBreakIterator(const char locale[], BreakType breakType) override
uint8_t BidiLevel
Definition SkUnicode.h:99
@ kNoCodeUnitFlag
Definition SkUnicode.h:80
@ kPartOfWhiteSpaceBreak
Definition SkUnicode.h:81
@ kHardLineBreakBefore
Definition SkUnicode.h:84
@ kTabulation
Definition SkUnicode.h:87
@ kGraphemeStart
Definition SkUnicode.h:82
@ kIdeographic
Definition SkUnicode.h:89
@ kSoftLineBreakBefore
Definition SkUnicode.h:83
@ kPartOfIntraWordBreak
Definition SkUnicode.h:85
T * push_back_n(int n)
Definition SkTArray.h:262
static bool b
glong glong end
std::u16string text
SK_SPI SkUnichar NextUTF8(const char **ptr, const char *end)
Definition SkUTF.cpp:118
SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2]=nullptr)
Definition SkUTF.cpp:243
SKUNICODE_API sk_sp< SkUnicode > Make()