Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
SkUTF.cpp
Go to the documentation of this file.
1// Copyright 2018 Google LLC.
2// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
3
4#include "src/base/SkUTF.h"
5
7
8static constexpr inline int32_t left_shift(int32_t value, int32_t shift) {
9 return (int32_t) ((uint32_t) value << shift);
10}
11
12template <typename T> static constexpr bool is_align2(T x) { return 0 == (x & 1); }
13
14template <typename T> static constexpr bool is_align4(T x) { return 0 == (x & 3); }
15
16static constexpr inline bool utf16_is_high_surrogate(uint16_t c) { return (c & 0xFC00) == 0xD800; }
17
18static constexpr inline bool utf16_is_low_surrogate(uint16_t c) { return (c & 0xFC00) == 0xDC00; }
19
20/** @returns -1 iff invalid UTF8 byte,
21 0 iff UTF8 continuation byte,
22 1 iff ASCII byte,
23 2 iff leading byte of 2-byte sequence,
24 3 iff leading byte of 3-byte sequence, and
25 4 iff leading byte of 4-byte sequence.
26 I.e.: if return value > 0, then gives length of sequence.
27*/
28static int utf8_byte_type(uint8_t c) {
29 if (c < 0x80) {
30 return 1;
31 } else if (c < 0xC0) {
32 return 0;
33 } else if (c >= 0xF5 || (c & 0xFE) == 0xC0) { // "octet values c0, c1, f5 to ff never appear"
34 return -1;
35 } else {
36 int value = (((0xe5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
37 // assert(value >= 2 && value <=4);
38 return value;
39 }
40}
41static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
42
43static bool utf8_byte_is_continuation(uint8_t c) { return utf8_byte_type(c) == 0; }
44
45////////////////////////////////////////////////////////////////////////////////
46
47int SkUTF::CountUTF8(const char* utf8, size_t byteLength) {
48 if (!utf8 && byteLength) {
49 return -1;
50 }
51 int count = 0;
52 const char* stop = utf8 + byteLength;
53 while (utf8 < stop) {
54 int type = utf8_byte_type(*(const uint8_t*)utf8);
55 if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) {
56 return -1; // Sequence extends beyond end.
57 }
58 while(type-- > 1) {
59 ++utf8;
60 if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
61 return -1;
62 }
63 }
64 ++utf8;
65 ++count;
66 }
67 return count;
68}
69
70int SkUTF::CountUTF16(const uint16_t* utf16, size_t byteLength) {
71 if (!utf16 || !is_align2(intptr_t(utf16)) || !is_align2(byteLength)) {
72 return -1;
73 }
74 const uint16_t* src = (const uint16_t*)utf16;
75 const uint16_t* stop = src + (byteLength >> 1);
76 int count = 0;
77 while (src < stop) {
78 unsigned c = *src++;
80 return -1;
81 }
83 if (src >= stop) {
84 return -1;
85 }
86 c = *src++;
87 if (!utf16_is_low_surrogate(c)) {
88 return -1;
89 }
90 }
91 count += 1;
92 }
93 return count;
94}
95
96int SkUTF::CountUTF32(const int32_t* utf32, size_t byteLength) {
97 if (!is_align4(intptr_t(utf32)) || !is_align4(byteLength) || !SkTFitsIn<int>(byteLength >> 2)) {
98 return -1;
99 }
100 const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
101 const uint32_t* ptr = (const uint32_t*)utf32;
102 const uint32_t* stop = ptr + (byteLength >> 2);
103 while (ptr < stop) {
104 if (*ptr & kInvalidUnicharMask) {
105 return -1;
106 }
107 ptr += 1;
108 }
109 return (int)(byteLength >> 2);
110}
111
112template <typename T>
113static SkUnichar next_fail(const T** ptr, const T* end) {
114 *ptr = end;
115 return -1;
116}
117
118SkUnichar SkUTF::NextUTF8(const char** ptr, const char* end) {
119 if (!ptr || !end ) {
120 return -1;
121 }
122 const uint8_t* p = (const uint8_t*)*ptr;
123 if (!p || p >= (const uint8_t*)end) {
124 return next_fail(ptr, end);
125 }
126 int c = *p;
127 int hic = c << 24;
128
130 return next_fail(ptr, end);
131 }
132 if (hic < 0) {
133 uint32_t mask = (uint32_t)~0x3F;
134 hic = left_shift(hic, 1);
135 do {
136 ++p;
137 if (p >= (const uint8_t*)end) {
138 return next_fail(ptr, end);
139 }
140 // check before reading off end of array.
141 uint8_t nextByte = *p;
142 if (!utf8_byte_is_continuation(nextByte)) {
143 return next_fail(ptr, end);
144 }
145 c = (c << 6) | (nextByte & 0x3F);
146 mask <<= 5;
147 } while ((hic = left_shift(hic, 1)) < 0);
148 c &= ~mask;
149 }
150 *ptr = (const char*)p + 1;
151 return c;
152}
153
154SkUnichar SkUTF::NextUTF8WithReplacement(const char** ptr, const char* end) {
155 SkUnichar val = SkUTF::NextUTF8(ptr, end);
156 return val < 0 ? 0xFFFD : val;
157}
158
159SkUnichar SkUTF::NextUTF16(const uint16_t** ptr, const uint16_t* end) {
160 if (!ptr || !end ) {
161 return -1;
162 }
163 const uint16_t* src = *ptr;
164 if (!src || src + 1 > end || !is_align2(intptr_t(src))) {
165 return next_fail(ptr, end);
166 }
167 uint16_t c = *src++;
168 SkUnichar result = c;
169 if (utf16_is_low_surrogate(c)) {
170 return next_fail(ptr, end); // srcPtr should never point at low surrogate.
171 }
173 if (src + 1 > end) {
174 return next_fail(ptr, end); // Truncated string.
175 }
176 uint16_t low = *src++;
177 if (!utf16_is_low_surrogate(low)) {
178 return next_fail(ptr, end);
179 }
180 /*
181 [paraphrased from wikipedia]
182 Take the high surrogate and subtract 0xD800, then multiply by 0x400.
183 Take the low surrogate and subtract 0xDC00. Add these two results
184 together, and finally add 0x10000 to get the final decoded codepoint.
185
186 unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
187 unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000
188 unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000
189 unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000)
190 */
191 result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000);
192 }
193 *ptr = src;
194 return result;
195}
196
197SkUnichar SkUTF::NextUTF32(const int32_t** ptr, const int32_t* end) {
198 if (!ptr || !end ) {
199 return -1;
200 }
201 const int32_t* s = *ptr;
202 if (!s || s + 1 > end || !is_align4(intptr_t(s))) {
203 return next_fail(ptr, end);
204 }
205 int32_t value = *s;
206 const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits
207 if (value & kInvalidUnicharMask) {
208 return next_fail(ptr, end);
209 }
210 *ptr = s + 1;
211 return value;
212}
213
215 if ((uint32_t)uni > 0x10FFFF) {
216 return 0;
217 }
218 if (uni <= 127) {
219 if (utf8) {
220 *utf8 = (char)uni;
221 }
222 return 1;
223 }
224 char tmp[4];
225 char* p = tmp;
226 size_t count = 1;
227 while (uni > 0x7F >> count) {
228 *p++ = (char)(0x80 | (uni & 0x3F));
229 uni >>= 6;
230 count += 1;
231 }
232 if (utf8) {
233 p = tmp;
234 utf8 += count;
235 while (p < tmp + count - 1) {
236 *--utf8 = *p++;
237 }
238 *--utf8 = (char)(~(0xFF >> count) | uni);
239 }
240 return count;
241}
242
243size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) {
244 if ((uint32_t)uni > 0x10FFFF) {
245 return 0;
246 }
247 int extra = (uni > 0xFFFF);
248 if (utf16) {
249 if (extra) {
250 utf16[0] = (uint16_t)((0xD800 - 64) + (uni >> 10));
251 utf16[1] = (uint16_t)(0xDC00 | (uni & 0x3FF));
252 } else {
253 utf16[0] = (uint16_t)uni;
254 }
255 }
256 return 1 + extra;
257}
258
259int SkUTF::UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength) {
260 if (!dst) {
261 dstCapacity = 0;
262 }
263
264 int dstLength = 0;
265 uint16_t* endDst = dst + dstCapacity;
266 const char* endSrc = src + srcByteLength;
267 while (src < endSrc) {
268 SkUnichar uni = NextUTF8(&src, endSrc);
269 if (uni < 0) {
270 return -1;
271 }
272
273 uint16_t utf16[2];
274 size_t count = ToUTF16(uni, utf16);
275 if (count == 0) {
276 return -1;
277 }
278 dstLength += count;
279
280 if (dst) {
281 uint16_t* elems = utf16;
282 while (dst < endDst && count > 0) {
283 *dst++ = *elems++;
284 count -= 1;
285 }
286 }
287 }
288 return dstLength;
289}
290
291int SkUTF::UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength) {
292 if (!dst) {
293 dstCapacity = 0;
294 }
295
296 int dstLength = 0;
297 const char* endDst = dst + dstCapacity;
298 const uint16_t* endSrc = src + srcLength;
299 while (src < endSrc) {
300 SkUnichar uni = NextUTF16(&src, endSrc);
301 if (uni < 0) {
302 return -1;
303 }
304
306 size_t count = ToUTF8(uni, utf8);
307 if (count == 0) {
308 return -1;
309 }
310 dstLength += count;
311
312 if (dst) {
313 const char* elems = utf8;
314 while (dst < endDst && count > 0) {
315 *dst++ = *elems++;
316 count -= 1;
317 }
318 }
319 }
320 return dstLength;
321}
int count
int32_t SkUnichar
Definition SkTypes.h:175
static constexpr bool is_align4(T x)
Definition SkUTF.cpp:14
static int utf8_byte_type(uint8_t c)
Definition SkUTF.cpp:28
static constexpr bool is_align2(T x)
Definition SkUTF.cpp:12
static constexpr int32_t left_shift(int32_t value, int32_t shift)
Definition SkUTF.cpp:8
static constexpr bool utf16_is_high_surrogate(uint16_t c)
Definition SkUTF.cpp:16
static bool utf8_byte_is_continuation(uint8_t c)
Definition SkUTF.cpp:43
static SkUnichar next_fail(const T **ptr, const T *end)
Definition SkUTF.cpp:113
static bool utf8_type_is_valid_leading_byte(int type)
Definition SkUTF.cpp:41
static constexpr bool utf16_is_low_surrogate(uint16_t c)
Definition SkUTF.cpp:18
struct MyStruct s
glong glong end
uint8_t value
GAsyncResult * result
double x
SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence]=nullptr)
SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength)
Definition SkUTF.cpp:259
constexpr unsigned kMaxBytesInUTF8Sequence
Definition SkUTF.h:59
SK_SPI int CountUTF16(const uint16_t *utf16, size_t byteLength)
Definition SkUTF.cpp:70
SK_SPI SkUnichar NextUTF16(const uint16_t **ptr, const uint16_t *end)
Definition SkUTF.cpp:159
SK_SPI SkUnichar NextUTF8WithReplacement(const char **ptr, const char *end)
Definition SkUTF.cpp:154
SK_SPI SkUnichar NextUTF8(const char **ptr, const char *end)
Definition SkUTF.cpp:118
SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2]=nullptr)
Definition SkUTF.cpp:243
SK_SPI int CountUTF8(const char *utf8, size_t byteLength)
Definition SkUTF.cpp:47
SK_SPI SkUnichar NextUTF32(const int32_t **ptr, const int32_t *end)
Definition SkUTF.cpp:197
SK_SPI int CountUTF32(const int32_t *utf32, size_t byteLength)
Definition SkUTF.cpp:96
SK_SPI int UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength)
Definition SkUTF.cpp:291
#define T