#include <unicode.h>
|
static intptr_t | CodeUnitCount (const uint8_t *utf8_array, intptr_t array_len, Type *type) |
|
static bool | IsValid (const uint8_t *utf8_array, intptr_t array_len) |
|
static intptr_t | Length (int32_t ch) |
|
static intptr_t | Length (const String &str) |
|
static intptr_t | Encode (int32_t ch, char *dst) |
|
static intptr_t | Encode (const String &src, char *dst, intptr_t len) |
|
static intptr_t | Decode (const uint8_t *utf8_array, intptr_t array_len, int32_t *ch) |
|
static bool | DecodeToLatin1 (const uint8_t *utf8_array, intptr_t array_len, uint8_t *dst, intptr_t len) |
|
static bool | DecodeToUTF16 (const uint8_t *utf8_array, intptr_t array_len, uint16_t *dst, intptr_t len) |
|
static bool | DecodeToUTF32 (const uint8_t *utf8_array, intptr_t array_len, int32_t *dst, intptr_t len) |
|
static intptr_t | ReportInvalidByte (const uint8_t *utf8_array, intptr_t array_len, intptr_t len) |
|
static bool | DecodeCStringToUTF32 (const char *str, int32_t *dst, intptr_t len) |
|
Definition at line 41 of file unicode.h.
◆ Type
Enumerator |
---|
kLatin1 | |
kBMP | |
kSupplementary | |
Definition at line 43 of file unicode.h.
◆ CodeUnitCount()
intptr_t dart::Utf8::CodeUnitCount |
( |
const uint8_t * |
utf8_array, |
|
|
intptr_t |
array_len, |
|
|
Type * |
type |
|
) |
| |
|
static |
Definition at line 46 of file unicode.cc.
48 {
51 for (intptr_t
i = 0;
i < array_len;
i++) {
52 uint8_t code_unit = utf8_array[
i];
53 if (!IsTrailByte(code_unit)) {
55 if (!IsLatin1SequenceStart(code_unit)) {
56 if (IsSupplementarySequenceStart(code_unit)) {
59 }
else if (char_type ==
kLatin1) {
61 }
62 }
63 }
64 }
67}
◆ Decode()
intptr_t dart::Utf8::Decode |
( |
const uint8_t * |
utf8_array, |
|
|
intptr_t |
array_len, |
|
|
int32_t * |
ch |
|
) |
| |
|
static |
Definition at line 135 of file unicode.cc.
137 {
138 uint32_t ch = utf8_array[0] & 0xFF;
140 if (ch >= 0x80) {
141 intptr_t num_trail_bytes = kTrailBytes[ch];
142 bool is_malformed = false;
143 for (;
i < num_trail_bytes; ++
i) {
145 uint8_t code_unit = utf8_array[
i];
146 is_malformed |= !IsTrailByte(code_unit);
147 ch = (ch << 6) + code_unit;
148 } else {
150 return 0;
151 }
152 }
153 ch -= kMagicBits[num_trail_bytes];
154 if (!((is_malformed ==
false) && (
i == num_trail_bytes) &&
157 return 0;
158 }
159 }
162}
static bool IsOutOfRange(int32_t code_point)
◆ DecodeCStringToUTF32()
bool dart::Utf8::DecodeCStringToUTF32 |
( |
const char * |
str, |
|
|
int32_t * |
dst, |
|
|
intptr_t |
len |
|
) |
| |
|
static |
Definition at line 266 of file unicode.cc.
266 {
268 intptr_t array_len = strlen(str);
269 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str);
271}
static bool DecodeToUTF32(const uint8_t *utf8_array, intptr_t array_len, int32_t *dst, intptr_t len)
◆ DecodeToLatin1()
bool dart::Utf8::DecodeToLatin1 |
( |
const uint8_t * |
utf8_array, |
|
|
intptr_t |
array_len, |
|
|
uint8_t * |
dst, |
|
|
intptr_t |
len |
|
) |
| |
|
static |
Definition at line 194 of file unicode.cc.
197 {
199 intptr_t j = 0;
200 intptr_t num_bytes;
201 for (; (
i < array_len) && (j <
len);
i += num_bytes, ++j) {
202 int32_t ch;
203 ASSERT(IsLatin1SequenceStart(utf8_array[
i]));
205 if (ch == -1) {
206 return false;
207 }
210 }
211 if ((
i < array_len) && (j ==
len)) {
212 return false;
213 }
214 return true;
215}
static intptr_t Decode(const uint8_t *utf8_array, intptr_t array_len, int32_t *ch)
static bool IsLatin1(int32_t code_point)
◆ DecodeToUTF16()
bool dart::Utf8::DecodeToUTF16 |
( |
const uint8_t * |
utf8_array, |
|
|
intptr_t |
array_len, |
|
|
uint16_t * |
dst, |
|
|
intptr_t |
len |
|
) |
| |
|
static |
Definition at line 217 of file unicode.cc.
220 {
222 intptr_t j = 0;
223 intptr_t num_bytes;
224 for (; (
i < array_len) && (j <
len);
i += num_bytes, ++j) {
225 int32_t ch;
226 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[
i]);
228 if (ch == -1) {
229 return false;
230 }
231 if (is_supplementary) {
232 if (j == (
len - 1))
return false;
234 j = j + 1;
235 } else {
237 }
238 }
239 if ((
i < array_len) && (j ==
len)) {
240 return false;
241 }
242 return true;
243}
static void Encode(int32_t codepoint, uint16_t *dst)
◆ DecodeToUTF32()
bool dart::Utf8::DecodeToUTF32 |
( |
const uint8_t * |
utf8_array, |
|
|
intptr_t |
array_len, |
|
|
int32_t * |
dst, |
|
|
intptr_t |
len |
|
) |
| |
|
static |
Definition at line 245 of file unicode.cc.
248 {
250 intptr_t j = 0;
251 intptr_t num_bytes;
252 for (; (
i < array_len) && (j <
len);
i += num_bytes, ++j) {
253 int32_t ch;
255 if (ch == -1) {
256 return false;
257 }
259 }
260 if ((
i < array_len) && (j ==
len)) {
261 return false;
262 }
263 return true;
264}
◆ Encode() [1/2]
intptr_t dart::Utf8::Encode |
( |
const String & |
src, |
|
|
char * |
dst, |
|
|
intptr_t |
len |
|
) |
| |
|
static |
Definition at line 65 of file unicode.cc.
65 {
66 uintptr_t array_len =
len;
69 if (
src.IsOneByteString()) {
70
71
72 NoSafepointScope scope;
73 const uintptr_t*
data =
74 reinterpret_cast<const uintptr_t*
>(OneByteString::DataStart(
src));
75 uintptr_t char_length =
src.Length();
78 for (uintptr_t
i = 0;
i < char_length;
i +=
sizeof(uintptr_t)) {
79
80
81 if (
i +
sizeof(uintptr_t) <= char_length &&
83 pos +
sizeof(uintptr_t) <= array_len) {
85 pos +=
sizeof(uintptr_t);
86 } else {
87
88
89 const uint8_t*
p =
reinterpret_cast<const uint8_t*
>(
data);
90 const uint8_t* limit =
92 for (;
p < limit;
p++) {
94
95
96 intptr_t bytes =
Length(c);
97 if (
pos + bytes > array_len) {
99 }
102 }
103 }
105 }
106 } else {
107
108
109 String::CodePointIterator it(
src);
110 while (it.Next()) {
111 int32_t ch = it.Current();
114
115
116
118 }
120 if (
pos + num_bytes >
len) {
121 break;
122 }
125 }
126 }
128}
static bool IsSurrogate(uint32_t ch)
static intptr_t Length(int32_t ch)
static intptr_t Encode(int32_t ch, char *dst)
static constexpr int32_t kMaxOneByteChar
static constexpr int32_t kReplacementChar
static T Minimum(T x, T y)
static void StoreUnaligned(T *ptr, T value)
static int8_t data[kExtLength]
static constexpr uintptr_t kAsciiWordMask
◆ Encode() [2/2]
intptr_t dart::Utf8::Encode |
( |
int32_t |
ch, |
|
|
char * |
dst |
|
) |
| |
|
static |
Definition at line 110 of file unicode.cc.
110 {
111 constexpr int kMask = ~(1 << 6);
114 return 1;
115 }
117 dst[0] = 0xC0 | (ch >> 6);
119 return 2;
120 }
122 dst[0] = 0xE0 | (ch >> 12);
123 dst[1] = 0x80 | ((ch >> 6) & kMask);
125 return 3;
126 }
128 dst[0] = 0xF0 | (ch >> 18);
129 dst[1] = 0x80 | ((ch >> 12) & kMask);
130 dst[2] = 0x80 | ((ch >> 6) & kMask);
132 return 4;
133}
static constexpr int32_t kMaxTwoByteChar
static constexpr int32_t kMaxFourByteChar
static constexpr int32_t kMaxThreeByteChar
◆ IsValid()
bool dart::Utf8::IsValid |
( |
const uint8_t * |
utf8_array, |
|
|
intptr_t |
array_len |
|
) |
| |
|
static |
Definition at line 70 of file unicode.cc.
70 {
72 while (
i < array_len) {
73 uint32_t ch = utf8_array[
i] & 0xFF;
74 intptr_t j = 1;
75 if (ch >= 0x80) {
76 int8_t num_trail_bytes = kTrailBytes[ch];
77 bool is_malformed = false;
78 for (; j < num_trail_bytes; ++j) {
79 if ((
i + j) < array_len) {
80 uint8_t code_unit = utf8_array[
i + j];
81 is_malformed |= !IsTrailByte(code_unit);
82 ch = (ch << 6) + code_unit;
83 } else {
84 return false;
85 }
86 }
87 ch -= kMagicBits[num_trail_bytes];
88 if (!((is_malformed == false) && (j == num_trail_bytes) &&
90 return false;
91 }
92 }
94 }
95 return true;
96}
◆ Length() [1/2]
intptr_t dart::Utf8::Length |
( |
const String & |
str | ) |
|
|
static |
Definition at line 21 of file unicode.cc.
21 {
22 if (str.IsOneByteString()) {
23
24
25
26
27 uintptr_t char_length = str.Length();
28 uintptr_t
length = char_length;
29 NoSafepointScope no_safepoint;
30 const uintptr_t*
data =
31 reinterpret_cast<const uintptr_t*>(OneByteString::DataStart(str));
33 for (
i =
sizeof(uintptr_t);
i <= char_length;
i +=
sizeof(uintptr_t)) {
34 uintptr_t chunk = *
data++;
36 if (chunk != 0) {
37
38#if defined(ARCH_IS_64_BIT)
39 chunk += chunk >> 32;
40#endif
41 chunk += chunk >> 16;
42 chunk += chunk >> 8;
43 length += (chunk >> 7) & 0xf;
44 }
45 }
46
47 i -=
sizeof(uintptr_t);
48 for (;
i < char_length;
i++) {
50 }
52 }
53
54
55
57 String::CodePointIterator it(str);
58 while (it.Next()) {
59 int32_t ch = it.Current();
61 }
63}
◆ Length() [2/2]
intptr_t dart::Utf8::Length |
( |
int32_t |
ch | ) |
|
|
static |
Definition at line 98 of file unicode.cc.
98 {
100 return 1;
102 return 2;
104 return 3;
105 }
107 return 4;
108}
◆ ReportInvalidByte()
intptr_t dart::Utf8::ReportInvalidByte |
( |
const uint8_t * |
utf8_array, |
|
|
intptr_t |
array_len, |
|
|
intptr_t |
len |
|
) |
| |
|
static |
Definition at line 163 of file unicode.cc.
165 {
167 intptr_t j = 0;
168 intptr_t num_bytes;
169 for (; (
i < array_len) && (j <
len);
i += num_bytes, ++j) {
170 int32_t ch;
171 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[
i]);
173 if (ch == -1) {
174 break;
175 }
176 if (is_supplementary) {
177 j = j + 1;
178 }
179 }
180#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
181
182
183#else
185 for (intptr_t idx = 0; idx < 10 && (
i + idx) < array_len; idx++) {
188 }
190#endif
192}
static void PrintErr(const char *format,...) PRINTF_ATTRIBUTE(1
◆ kMaxFourByteChar
◆ kMaxOneByteChar
constexpr int32_t dart::Utf8::kMaxOneByteChar = 0x7F |
|
staticconstexpr |
◆ kMaxThreeByteChar
constexpr int32_t dart::Utf8::kMaxThreeByteChar = 0xFFFF |
|
staticconstexpr |
◆ kMaxTwoByteChar
constexpr int32_t dart::Utf8::kMaxTwoByteChar = 0x7FF |
|
staticconstexpr |
The documentation for this class was generated from the following files:
- third_party/dart-lang/sdk/runtime/platform/unicode.h
- third_party/dart-lang/sdk/runtime/platform/unicode.cc
- third_party/dart-lang/sdk/runtime/vm/unicode.cc