Flutter Engine
The Flutter Engine
Functions | Variables
SkUTF Namespace Reference

Functions

SK_SPI int CountUTF8 (const char *utf8, size_t byteLength)
 
SK_SPI int CountUTF16 (const uint16_t *utf16, size_t byteLength)
 
SK_SPI int CountUTF32 (const int32_t *utf32, size_t byteLength)
 
SK_SPI SkUnichar NextUTF8 (const char **ptr, const char *end)
 
SK_SPI SkUnichar NextUTF8WithReplacement (const char **ptr, const char *end)
 
SK_SPI SkUnichar NextUTF16 (const uint16_t **ptr, const uint16_t *end)
 
SK_SPI SkUnichar NextUTF32 (const int32_t **ptr, const int32_t *end)
 
SK_SPI size_t ToUTF8 (SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence]=nullptr)
 
SK_SPI size_t ToUTF16 (SkUnichar uni, uint16_t utf16[2]=nullptr)
 
SK_SPI int UTF8ToUTF16 (uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength)
 
SK_SPI int UTF16ToUTF8 (char dst[], int dstCapacity, const uint16_t src[], size_t srcLength)
 
static bool IsLeadingSurrogateUTF16 (uint16_t c)
 
static bool IsTrailingSurrogateUTF16 (uint16_t c)
 

Variables

constexpr unsigned kMaxBytesInUTF8Sequence = 4
 

Function Documentation

◆ CountUTF16()

int SkUTF::CountUTF16 ( const uint16_t *  utf16,
size_t  byteLength 
)

Given a sequence of aligned UTF-16 characters in machine-endian form, return the number of unicode codepoints. If the sequence is invalid UTF-16, return -1.

Definition at line 70 of file SkUTF.cpp.

70 {
71 if (!utf16 || !is_align2(intptr_t(utf16)) || !is_align2(byteLength)) {
72 return -1;
73 }
74 const uint16_t* src = (const uint16_t*)utf16;
75 const uint16_t* stop = src + (byteLength >> 1);
76 int count = 0;
77 while (src < stop) {
78 unsigned c = *src++;
80 return -1;
81 }
83 if (src >= stop) {
84 return -1;
85 }
86 c = *src++;
87 if (!utf16_is_low_surrogate(c)) {
88 return -1;
89 }
90 }
91 count += 1;
92 }
93 return count;
94}
int count
Definition: FontMgrTest.cpp:50
static constexpr bool is_align2(T x)
Definition: SkUTF.cpp:12
static constexpr bool utf16_is_high_surrogate(uint16_t c)
Definition: SkUTF.cpp:16
static constexpr bool utf16_is_low_surrogate(uint16_t c)
Definition: SkUTF.cpp:18

◆ CountUTF32()

int SkUTF::CountUTF32 ( const int32_t *  utf32,
size_t  byteLength 
)

Given a sequence of aligned UTF-32 characters in machine-endian form, return the number of unicode codepoints. If the sequence is invalid UTF-32, return -1.

Definition at line 96 of file SkUTF.cpp.

96 {
97 if (!is_align4(intptr_t(utf32)) || !is_align4(byteLength) || !SkTFitsIn<int>(byteLength >> 2)) {
98 return -1;
99 }
100 const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
101 const uint32_t* ptr = (const uint32_t*)utf32;
102 const uint32_t* stop = ptr + (byteLength >> 2);
103 while (ptr < stop) {
104 if (*ptr & kInvalidUnicharMask) {
105 return -1;
106 }
107 ptr += 1;
108 }
109 return (int)(byteLength >> 2);
110}
static constexpr bool is_align4(T x)
Definition: SkUTF.cpp:14

◆ CountUTF8()

int SkUTF::CountUTF8 ( const char *  utf8,
size_t  byteLength 
)

Given a sequence of UTF-8 bytes, return the number of unicode codepoints. If the sequence is invalid UTF-8, return -1.

Definition at line 47 of file SkUTF.cpp.

47 {
48 if (!utf8 && byteLength) {
49 return -1;
50 }
51 int count = 0;
52 const char* stop = utf8 + byteLength;
53 while (utf8 < stop) {
54 int type = utf8_byte_type(*(const uint8_t*)utf8);
56 return -1; // Sequence extends beyond end.
57 }
58 while(type-- > 1) {
59 ++utf8;
60 if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
61 return -1;
62 }
63 }
64 ++utf8;
65 ++count;
66 }
67 return count;
68}
static int utf8_byte_type(uint8_t c)
Definition: SkUTF.cpp:28
static bool utf8_byte_is_continuation(uint8_t c)
Definition: SkUTF.cpp:43
static bool utf8_type_is_valid_leading_byte(int type)
Definition: SkUTF.cpp:41
GLenum type

◆ IsLeadingSurrogateUTF16()

static bool SkUTF::IsLeadingSurrogateUTF16 ( uint16_t  c)
inlinestatic

Given a UTF-16 code point, returns true iff it is a leading surrogate. https://unicode.org/faq/utf_bom.html#utf16-2

Definition at line 91 of file SkUTF.h.

91{ return ((c) & 0xFC00) == 0xD800; }

◆ IsTrailingSurrogateUTF16()

static bool SkUTF::IsTrailingSurrogateUTF16 ( uint16_t  c)
inlinestatic

Given a UTF-16 code point, returns true iff it is a trailing surrogate. https://unicode.org/faq/utf_bom.html#utf16-2

Definition at line 97 of file SkUTF.h.

97{ return ((c) & 0xFC00) == 0xDC00; }

◆ NextUTF16()

SkUnichar SkUTF::NextUTF16 ( const uint16_t **  ptr,
const uint16_t *  end 
)

Given a sequence of aligned UTF-16 characters in machine-endian form, return the first unicode codepoint. The pointer will be incremented to point at the next codepoint's start. If invalid UTF-16 is encountered, set *ptr to end and return -1.

Definition at line 159 of file SkUTF.cpp.

159 {
160 if (!ptr || !end ) {
161 return -1;
162 }
163 const uint16_t* src = *ptr;
164 if (!src || src + 1 > end || !is_align2(intptr_t(src))) {
165 return next_fail(ptr, end);
166 }
167 uint16_t c = *src++;
168 SkUnichar result = c;
169 if (utf16_is_low_surrogate(c)) {
170 return next_fail(ptr, end); // srcPtr should never point at low surrogate.
171 }
173 if (src + 1 > end) {
174 return next_fail(ptr, end); // Truncated string.
175 }
176 uint16_t low = *src++;
177 if (!utf16_is_low_surrogate(low)) {
178 return next_fail(ptr, end);
179 }
180 /*
181 [paraphrased from wikipedia]
182 Take the high surrogate and subtract 0xD800, then multiply by 0x400.
183 Take the low surrogate and subtract 0xDC00. Add these two results
184 together, and finally add 0x10000 to get the final decoded codepoint.
185
186 unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
187 unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000
188 unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000
189 unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000)
190 */
191 result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000);
192 }
193 *ptr = src;
194 return result;
195}
int32_t SkUnichar
Definition: SkTypes.h:175
static SkUnichar next_fail(const T **ptr, const T *end)
Definition: SkUTF.cpp:113
glong glong end
GAsyncResult * result

◆ NextUTF32()

SkUnichar SkUTF::NextUTF32 ( const int32_t **  ptr,
const int32_t *  end 
)

Given a sequence of aligned UTF-32 characters in machine-endian form, return the first unicode codepoint. The pointer will be incremented to point at the next codepoint's start. If invalid UTF-32 is encountered, set *ptr to end and return -1.

Definition at line 197 of file SkUTF.cpp.

197 {
198 if (!ptr || !end ) {
199 return -1;
200 }
201 const int32_t* s = *ptr;
202 if (!s || s + 1 > end || !is_align4(intptr_t(s))) {
203 return next_fail(ptr, end);
204 }
205 int32_t value = *s;
206 const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
207 if (value & kInvalidUnicharMask) {
208 return next_fail(ptr, end);
209 }
210 *ptr = s + 1;
211 return value;
212}
struct MyStruct s
uint8_t value

◆ NextUTF8()

SkUnichar SkUTF::NextUTF8 ( const char **  ptr,
const char *  end 
)

Given a sequence of UTF-8 bytes, return the first unicode codepoint. The pointer will be incremented to point at the next codepoint's start. If invalid UTF-8 is encountered, set *ptr to end and return -1.

Definition at line 118 of file SkUTF.cpp.

118 {
119 if (!ptr || !end ) {
120 return -1;
121 }
122 const uint8_t* p = (const uint8_t*)*ptr;
123 if (!p || p >= (const uint8_t*)end) {
124 return next_fail(ptr, end);
125 }
126 int c = *p;
127 int hic = c << 24;
128
130 return next_fail(ptr, end);
131 }
132 if (hic < 0) {
133 uint32_t mask = (uint32_t)~0x3F;
134 hic = left_shift(hic, 1);
135 do {
136 ++p;
137 if (p >= (const uint8_t*)end) {
138 return next_fail(ptr, end);
139 }
140 // check before reading off end of array.
141 uint8_t nextByte = *p;
142 if (!utf8_byte_is_continuation(nextByte)) {
143 return next_fail(ptr, end);
144 }
145 c = (c << 6) | (nextByte & 0x3F);
146 mask <<= 5;
147 } while ((hic = left_shift(hic, 1)) < 0);
148 c &= ~mask;
149 }
150 *ptr = (const char*)p + 1;
151 return c;
152}
static constexpr int32_t left_shift(int32_t value, int32_t shift)
Definition: SkUTF.cpp:8

◆ NextUTF8WithReplacement()

SkUnichar SkUTF::NextUTF8WithReplacement ( const char **  ptr,
const char *  end 
)

Given a sequence of UTF-8 bytes, return the first unicode codepoint. The pointer will be incremented to point at the next codepoint's start. If invalid UTF-8 is encountered, set *ptr to end and return the replacement character (0xFFFD)

Definition at line 154 of file SkUTF.cpp.

154 {
155 SkUnichar val = SkUTF::NextUTF8(ptr, end);
156 return val < 0 ? 0xFFFD : val;
157}
SK_SPI SkUnichar NextUTF8(const char **ptr, const char *end)
Definition: SkUTF.cpp:118

◆ ToUTF16()

size_t SkUTF::ToUTF16 ( SkUnichar  uni,
uint16_t  utf16[2] = nullptr 
)

Convert the unicode codepoint into UTF-16. If utf16 is non-null, place the result in that array. Return the number of UTF-16 code units in the result (1 or 2). If utf16 is null, simply return the number of code units that would be used. For invalid unicode codepoints, return 0.

Definition at line 243 of file SkUTF.cpp.

243 {
244 if ((uint32_t)uni > 0x10FFFF) {
245 return 0;
246 }
247 int extra = (uni > 0xFFFF);
248 if (utf16) {
249 if (extra) {
250 utf16[0] = (uint16_t)((0xD800 - 64) + (uni >> 10));
251 utf16[1] = (uint16_t)(0xDC00 | (uni & 0x3FF));
252 } else {
253 utf16[0] = (uint16_t)uni;
254 }
255 }
256 return 1 + extra;
257}

◆ ToUTF8()

SK_SPI size_t SkUTF::ToUTF8 ( SkUnichar  uni,
char  utf8[kMaxBytesInUTF8Sequence] = nullptr 
)

Convert the unicode codepoint into UTF-8. If utf8 is non-null, place the result in that array. Return the number of bytes in the result. If utf8 is null, simply return the number of bytes that would be used. For invalid unicode codepoints, return 0.

◆ UTF16ToUTF8()

int SkUTF::UTF16ToUTF8 ( char  dst[],
int  dstCapacity,
const uint16_t  src[],
size_t  srcLength 
)

Returns the number of resulting UTF8 values needed to convert the src utf16 sequence. If dst is not null, it is filled with the corresponding values up to its capacity. If there is an error, -1 is returned and the dst[] buffer is undefined.

Definition at line 291 of file SkUTF.cpp.

291 {
292 if (!dst) {
293 dstCapacity = 0;
294 }
295
296 int dstLength = 0;
297 const char* endDst = dst + dstCapacity;
298 const uint16_t* endSrc = src + srcLength;
299 while (src < endSrc) {
300 SkUnichar uni = NextUTF16(&src, endSrc);
301 if (uni < 0) {
302 return -1;
303 }
304
306 size_t count = ToUTF8(uni, utf8);
307 if (count == 0) {
308 return -1;
309 }
310 dstLength += count;
311
312 if (dst) {
313 const char* elems = utf8;
314 while (dst < endDst && count > 0) {
315 *dst++ = *elems++;
316 count -= 1;
317 }
318 }
319 }
320 return dstLength;
321}
SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence]=nullptr)
constexpr unsigned kMaxBytesInUTF8Sequence
Definition: SkUTF.h:59
SK_SPI SkUnichar NextUTF16(const uint16_t **ptr, const uint16_t *end)
Definition: SkUTF.cpp:159
dst
Definition: cp.py:12

◆ UTF8ToUTF16()

int SkUTF::UTF8ToUTF16 ( uint16_t  dst[],
int  dstCapacity,
const char  src[],
size_t  srcByteLength 
)

Returns the number of resulting UTF16 values needed to convert the src utf8 sequence. If dst is not null, it is filled with the corresponding values up to its capacity. If there is an error, -1 is returned and the dst[] buffer is undefined.

Definition at line 259 of file SkUTF.cpp.

259 {
260 if (!dst) {
261 dstCapacity = 0;
262 }
263
264 int dstLength = 0;
265 uint16_t* endDst = dst + dstCapacity;
266 const char* endSrc = src + srcByteLength;
267 while (src < endSrc) {
268 SkUnichar uni = NextUTF8(&src, endSrc);
269 if (uni < 0) {
270 return -1;
271 }
272
273 uint16_t utf16[2];
274 size_t count = ToUTF16(uni, utf16);
275 if (count == 0) {
276 return -1;
277 }
278 dstLength += count;
279
280 if (dst) {
281 uint16_t* elems = utf16;
282 while (dst < endDst && count > 0) {
283 *dst++ = *elems++;
284 count -= 1;
285 }
286 }
287 }
288 return dstLength;
289}
SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2]=nullptr)
Definition: SkUTF.cpp:243

Variable Documentation

◆ kMaxBytesInUTF8Sequence

constexpr unsigned SkUTF::kMaxBytesInUTF8Sequence = 4
constexpr

Definition at line 59 of file SkUTF.h.