Flutter Engine
The Flutter Engine
unicode_test.cc
Go to the documentation of this file.
1// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#include "platform/unicode.h"
6#include "vm/globals.h"
7#include "vm/unit_test.h"
8
9namespace dart {
10
12 const intptr_t kInputLen = 3;
13 const uint16_t kInput[kInputLen] = {0xe6, 0xe7, 0xe8}; // æøå
14 const String& input = String::Handle(String::FromUTF16(kInput, kInputLen));
15 const uintptr_t kBufferLength = 10;
16 unsigned char buffer[kBufferLength];
17 for (uintptr_t i = 0; i < kBufferLength; i++) {
18 buffer[i] = 42;
19 }
20 Utf8::Encode(input, reinterpret_cast<char*>(&buffer[0]), 10);
21 uintptr_t i;
22 for (i = 0; i < static_cast<uintptr_t>(Utf8::Length(input)); i++) {
23 EXPECT(buffer[i] > 127);
24 }
25 for (; i < kBufferLength; i++) {
26 EXPECT(buffer[i] == 42);
27 }
28}
29
30ISOLATE_UNIT_TEST_CASE(Utf8InvalidByte) {
31 {
32 uint8_t array[] = {0x41, 0xF0, 0x92};
33 intptr_t encode_len = 3;
34 intptr_t decode_len = 3;
35 intptr_t pos = Utf8::ReportInvalidByte(array, encode_len, decode_len);
36 EXPECT(pos == 1);
37 }
38
39 {
40 uint8_t array[] = {0x81, 0x40, 0x42};
41 intptr_t encode_len = 3;
42 intptr_t decode_len = 3;
43 intptr_t pos = Utf8::ReportInvalidByte(array, encode_len, decode_len);
44 EXPECT(pos == 0);
45 }
46
47 {
48 uint8_t array[] = {0x42, 0x40, 0x80};
49 intptr_t encode_len = 3;
50 intptr_t decode_len = 3;
51 intptr_t pos = Utf8::ReportInvalidByte(array, encode_len, decode_len);
52 EXPECT(pos == 2);
53 }
54
55 {
56 uint8_t array[] = {0x41, 0xF0, 0x92, 0x92, 0x91};
57 intptr_t encode_len = 5;
58 intptr_t decode_len = 2;
59 intptr_t pos = Utf8::ReportInvalidByte(array, encode_len, decode_len);
60 EXPECT(pos == encode_len);
61 }
62}
63
65 // Examples from the Unicode specification, chapter 3
66 {
67 const char* src = "\x41\xC3\xB1\x42";
68 int32_t expected[] = {0x41, 0xF1, 0x42};
69 int32_t dst[ARRAY_SIZE(expected)];
70 memset(dst, 0, sizeof(dst));
73 EXPECT(!memcmp(expected, dst, sizeof(expected)));
74 }
75
76 {
77 const char* src = "\x4D";
78 int32_t expected[] = {0x4D};
79 int32_t dst[ARRAY_SIZE(expected)];
80 memset(dst, 0, sizeof(dst));
83 EXPECT(!memcmp(expected, dst, sizeof(expected)));
84 }
85
86 {
87 const char* src = "\xD0\xB0";
88 int32_t expected[] = {0x430};
89 int32_t dst[ARRAY_SIZE(expected)];
90 memset(dst, 0, sizeof(dst));
93 EXPECT(!memcmp(expected, dst, sizeof(expected)));
94 }
95
96 {
97 const char* src = "\xE4\xBA\x8C";
98 int32_t expected[] = {0x4E8C};
99 int32_t dst[ARRAY_SIZE(expected)];
100 memset(dst, 0, sizeof(dst));
103 EXPECT(!memcmp(expected, dst, sizeof(expected)));
104 }
105
106 {
107 const char* src = "\xF0\x90\x8C\x82";
108 int32_t expected[] = {0x10302};
109 int32_t dst[ARRAY_SIZE(expected)];
110 memset(dst, 0, sizeof(dst));
113 EXPECT(!memcmp(expected, dst, sizeof(expected)));
114 }
115
116 {
117 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82";
118 int32_t expected[] = {0x4D, 0x430, 0x4E8C, 0x10302};
119 int32_t dst[ARRAY_SIZE(expected)];
120 memset(dst, 0, sizeof(dst));
123 EXPECT(!memcmp(expected, dst, sizeof(expected)));
124 }
125
126 // Mixture of non-ASCII and ASCII characters
127 {
128 const char* src =
129 "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93"
130 "\x20"
131 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94";
132 int32_t expected[] = {0x5D2, 0x5DC, 0x5E2, 0x5D3, 0x20,
133 0x5D1, 0x5E8, 0x5DB, 0x5D4};
134 int32_t dst[ARRAY_SIZE(expected)];
135 memset(dst, 0, sizeof(dst));
138 EXPECT(!memcmp(expected, dst, sizeof(expected)));
139 }
140
141 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
142
143 // 1 - Some correct UTF-8 text
144 {
145 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5";
146 int32_t expected[] = {0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5};
147 int32_t dst[ARRAY_SIZE(expected)];
148 memset(dst, 0, sizeof(dst));
151 EXPECT(!memcmp(expected, dst, sizeof(expected)));
152 }
153
154 // 2 - Boundary condition test cases
155
156 // 2.1 - First possible sequence of a certain length
157
158 // 2.1.1 - 1 byte (U-00000000): "\x00"
159 {
160 const char* src = "\x00";
161 int32_t expected[] = {0x0};
162 int32_t dst[ARRAY_SIZE(expected)];
163 memset(dst, 0xFF, sizeof(dst));
166 EXPECT(memcmp(expected, dst, sizeof(expected)));
167 }
168
169 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80"
170 {
171 const char* src = "\xC2\x80";
172 int32_t expected[] = {0x80};
173 int32_t dst[ARRAY_SIZE(expected)];
174 memset(dst, 0, sizeof(dst));
177 EXPECT(!memcmp(expected, dst, sizeof(expected)));
178 }
179
180 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80"
181 {
182 const char* src = "\xE0\xA0\x80";
183 int32_t expected[] = {0x800};
184 int32_t dst[ARRAY_SIZE(expected)];
185 memset(dst, 0, sizeof(dst));
188 EXPECT(!memcmp(expected, dst, sizeof(expected)));
189 }
190
191 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80"
192 {
193 const char* src = "\xF0\x90\x80\x80";
194 int32_t expected[] = {0x10000};
195 int32_t dst[ARRAY_SIZE(expected)];
196 memset(dst, 0, sizeof(dst));
199 EXPECT(!memcmp(expected, dst, sizeof(expected)));
200 }
201
202 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80"
203 {
204 const char* src = "\xF8\x88\x80\x80\x80";
205 int32_t expected[] = {0x200000};
206 int32_t dst[ARRAY_SIZE(expected)];
207 memset(dst, 0, sizeof(dst));
210 EXPECT(memcmp(expected, dst, sizeof(expected)));
211 }
212
213 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80"
214 {
215 const char* src = "\xFC\x84\x80\x80\x80\x80";
216 int32_t expected[] = {0x400000};
217 int32_t dst[ARRAY_SIZE(expected)];
218 memset(dst, 0, sizeof(dst));
221 EXPECT(memcmp(expected, dst, sizeof(expected)));
222 }
223
224 // 2.2 - Last possible sequence of a certain length
225
226 // 2.2.1 - 1 byte (U-0000007F): "\x7F"
227 {
228 const char* src = "\x7F";
229 int32_t expected[] = {0x7F};
230 int32_t dst[ARRAY_SIZE(expected)];
231 memset(dst, 0, sizeof(dst));
234 EXPECT(!memcmp(expected, dst, sizeof(expected)));
235 }
236
237 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF"
238 {
239 const char* src = "\xDF\xBF";
240 int32_t expected[] = {0x7FF};
241 int32_t dst[ARRAY_SIZE(expected)];
242 memset(dst, 0, sizeof(dst));
245 EXPECT(!memcmp(expected, dst, sizeof(expected)));
246 }
247
248 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF"
249 {
250 const char* src = "\xEF\xBF\xBF";
251 int32_t expected[] = {0xFFFF};
252 int32_t dst[ARRAY_SIZE(expected)];
253 memset(dst, 0, sizeof(dst));
256 EXPECT(!memcmp(expected, dst, sizeof(expected)));
257 }
258
259 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF"
260 {
261 const char* src = "\xF7\xBF\xBF\xBF";
262 int32_t expected[] = {0x1FFFF};
263 int32_t dst[ARRAY_SIZE(expected)];
264 memset(dst, 0, sizeof(dst));
267 EXPECT(memcmp(expected, dst, sizeof(expected)));
268 }
269
270 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF"
271 {
272 const char* src = "\xFB\xBF\xBF\xBF\xBF";
273 int32_t expected[] = {0x3FFFFFF};
274 int32_t dst[ARRAY_SIZE(expected)];
275 memset(dst, 0, sizeof(dst));
278 EXPECT(memcmp(expected, dst, sizeof(expected)));
279 }
280
281 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF"
282 {
283 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF";
284 int32_t expected[] = {0x7FFFFFF};
285 int32_t dst[ARRAY_SIZE(expected)];
286 memset(dst, 0, sizeof(dst));
289 EXPECT(memcmp(expected, dst, sizeof(expected)));
290 }
291
292 // 2.3 - Other boundary conditions
293
294 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF"
295 {
296 const char* src = "\xED\x9F\xBF";
297 int32_t expected[] = {0xD7FF};
298 int32_t dst[ARRAY_SIZE(expected)];
299 memset(dst, 0, sizeof(dst));
302 EXPECT(!memcmp(expected, dst, sizeof(expected)));
303 }
304
305 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80"
306 {
307 const char* src = "\xEE\x80\x80";
308 int32_t expected[] = {0xE000};
309 int32_t dst[ARRAY_SIZE(expected)];
310 memset(dst, 0, sizeof(dst));
313 EXPECT(!memcmp(expected, dst, sizeof(expected)));
314 }
315
316 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD"
317 {
318 const char* src = "\xEF\xBF\xBD";
319 int32_t expected[] = {0xFFFD};
320 int32_t dst[ARRAY_SIZE(expected)];
321 memset(dst, 0, sizeof(dst));
324 EXPECT(!memcmp(expected, dst, sizeof(expected)));
325 }
326
327 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF"
328 {
329 const char* src = "\xF4\x8F\xBF\xBF";
330 int32_t expected[] = {0x10FFFF};
331 int32_t dst[ARRAY_SIZE(expected)];
332 memset(dst, 0, sizeof(dst));
335 EXPECT(!memcmp(expected, dst, sizeof(expected)));
336 }
337
338 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80"
339 {
340 const char* src = "\xF4\x90\x80\x80";
341 int32_t expected[] = {0x110000};
342 int32_t dst[ARRAY_SIZE(expected)];
343 memset(dst, 0, sizeof(dst));
346 EXPECT(memcmp(expected, dst, sizeof(expected)));
347 }
348
349 // 3 - Malformed sequences
350
351 // 3.1 - Unexpected continuation bytes
352
353 // 3.1.1 - First continuation byte 0x80: "\x80"
354 {
355 const char* src = "\x80";
356 int32_t expected[] = {0x80};
357 int32_t dst[ARRAY_SIZE(expected)];
358 memset(dst, 0, sizeof(dst));
361 EXPECT(memcmp(expected, dst, sizeof(expected)));
362 }
363
364 // 3.1.2 - Last continuation byte 0xbf: "\xBF"
365 {
366 const char* src = "\xBF";
367 int32_t expected[] = {0xBF};
368 int32_t dst[ARRAY_SIZE(expected)];
369 memset(dst, 0, sizeof(dst));
372 EXPECT(memcmp(expected, dst, sizeof(expected)));
373 }
374
375 // 3.1.3 - 2 continuation bytes: "\x80\xBF"
376 {
377 const char* src = "\x80\xBF";
378 int32_t expected[] = {0x80, 0xBF};
379 int32_t dst[ARRAY_SIZE(expected)];
380 memset(dst, 0, sizeof(dst));
383 EXPECT(memcmp(expected, dst, sizeof(expected)));
384 }
385
386 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80"
387 {
388 const char* src = "\x80\xBF\x80";
389 int32_t expected[] = {0x80, 0xBF, 0x80};
390 int32_t dst[ARRAY_SIZE(expected)];
391 memset(dst, 0, sizeof(dst));
394 EXPECT(memcmp(expected, dst, sizeof(expected)));
395 }
396
397 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF"
398 {
399 const char* src = "\x80\xBF\x80\xBF";
400 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF};
401 int32_t dst[ARRAY_SIZE(expected)];
402 memset(dst, 0, sizeof(dst));
405 EXPECT(memcmp(expected, dst, sizeof(expected)));
406 }
407
408 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80"
409 {
410 const char* src = "\x80\xBF\x80\xBF\x80";
411 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80};
412 int32_t dst[ARRAY_SIZE(expected)];
413 memset(dst, 0, sizeof(dst));
416 EXPECT(memcmp(expected, dst, sizeof(expected)));
417 }
418
419 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF"
420 {
421 const char* src = "\x80\xBF\x80\xBF\x80\xBF";
422 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF};
423 int32_t dst[ARRAY_SIZE(expected)];
424 memset(dst, 0, sizeof(dst));
427 EXPECT(memcmp(expected, dst, sizeof(expected)));
428 }
429
430 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80"
431 {
432 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80";
433 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80};
434 int32_t dst[ARRAY_SIZE(expected)];
435 memset(dst, 0, sizeof(dst));
438 EXPECT(memcmp(expected, dst, sizeof(expected)));
439 }
440
441 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf):
442 {
443 const char* src =
444 "\x80\x81\x82\x83\x84\x85\x86\x87"
445 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
446 "\x90\x91\x92\x93\x94\x95\x96\x97"
447 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
448 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
449 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
450 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
451 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF";
452 int32_t expected[] = {0x0};
453 int32_t dst[ARRAY_SIZE(expected)];
454 for (size_t i = 0; i < strlen(src); ++i) {
455 memset(dst, 0xFF, sizeof(dst));
458 EXPECT(memcmp(expected, dst, sizeof(expected)));
459 }
460 }
461
462 // 3.2 - Lonely start character
463
464 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each
465 // followed by a space character:
466 {
467 const char* src =
468 "\xC0\x20\xC1\x20\xC2\x20\xC3\x20"
469 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20"
470 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20"
471 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20"
472 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20"
473 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20"
474 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20"
475 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20";
476 int32_t expected[] = {0x0};
477 int32_t dst[ARRAY_SIZE(expected)];
478 for (size_t i = 0; i < strlen(src); i += 2) {
479 memset(dst, 0xFF, sizeof(dst));
482 EXPECT(memcmp(expected, dst, sizeof(expected)));
483 }
484 }
485
486 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each
487 // followed by a space character:
488 {
489 const char* src =
490 "\xE0\x20\xE1\x20\xE2\x20\xE3\x20"
491 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20"
492 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20"
493 "\xEC\x20\xED\x20\xEE\x20\xEF\x20";
494 int32_t expected[] = {0x0};
495 int32_t dst[ARRAY_SIZE(expected)];
496 for (size_t i = 0; i < strlen(src); i += 2) {
497 memset(dst, 0xFF, sizeof(dst));
500 EXPECT(memcmp(expected, dst, sizeof(expected)));
501 }
502 }
503
504 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each
505 // followed by a space character:
506 {
507 const char* src =
508 "\xF0\x20\xF1\x20\xF2\x20\xF3\x20"
509 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20";
510 int32_t expected[] = {0x0};
511 int32_t dst[ARRAY_SIZE(expected)];
512 for (size_t i = 0; i < strlen(src); i += 2) {
513 memset(dst, 0xFF, sizeof(dst));
516 EXPECT(memcmp(expected, dst, sizeof(expected)));
517 }
518 }
519
520 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each
521 // followed by a space character:
522 {
523 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20";
524 int32_t expected[] = {0x0};
525 int32_t dst[ARRAY_SIZE(expected)];
526 for (size_t i = 0; i < strlen(src); i += 2) {
527 memset(dst, 0xFF, sizeof(dst));
530 EXPECT(memcmp(expected, dst, sizeof(expected)));
531 }
532 }
533
534 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each
535 // followed by a space character:
536 {
537 const char* src = "\xFC\x20\xFD\x20";
538 int32_t expected[] = {0x0};
539 int32_t dst[ARRAY_SIZE(expected)];
540 for (size_t i = 0; i < strlen(src); i += 2) {
541 memset(dst, 0xFF, sizeof(dst));
544 EXPECT(memcmp(expected, dst, sizeof(expected)));
545 }
546 }
547
548 // 3.3 - Sequences with last continuation byte missing
549
550 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0"
551 {
552 const char* src = "\xC0";
553 int32_t expected[] = {0x0};
554 int32_t dst[ARRAY_SIZE(expected)];
555 memset(dst, 0xFF, sizeof(dst));
558 EXPECT(memcmp(expected, dst, sizeof(expected)));
559 }
560
561 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80"
562 {
563 const char* src = "\xE0\x80";
564 int32_t expected[] = {0x0};
565 int32_t dst[ARRAY_SIZE(expected)];
566 memset(dst, 0xFF, sizeof(dst));
569 EXPECT(memcmp(expected, dst, sizeof(expected)));
570 }
571
572 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80"
573 {
574 const char* src = "\xF0\x80\x80";
575 int32_t expected[] = {0x0};
576 int32_t dst[ARRAY_SIZE(expected)];
577 memset(dst, 0xFF, sizeof(dst));
580 EXPECT(memcmp(expected, dst, sizeof(expected)));
581 }
582
583 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80"
584 {
585 const char* src = "\xF8\x80\x80\x80";
586 int32_t expected[] = {0x0};
587 int32_t dst[ARRAY_SIZE(expected)];
588 memset(dst, 0xFF, sizeof(dst));
591 EXPECT(memcmp(expected, dst, sizeof(expected)));
592 }
593
594 // 3.3.5 - 6-byte sequence with last byte missing (U+0000):
595 // "\xFC\x80\x80\x80\x80"
596 {
597 const char* src = "\xFC\x80\x80\x80\x80";
598 int32_t expected[] = {0x0};
599 int32_t dst[ARRAY_SIZE(expected)];
600 memset(dst, 0xFF, sizeof(dst));
603 EXPECT(memcmp(expected, dst, sizeof(expected)));
604 }
605
606 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF"
607 {
608 const char* src = "\xDF";
609 int32_t expected[] = {0x0};
610 int32_t dst[ARRAY_SIZE(expected)];
611 memset(dst, 0xFF, sizeof(dst));
614 EXPECT(memcmp(expected, dst, sizeof(expected)));
615 }
616
617 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF"
618 {
619 const char* src = "\xEF\xBF";
620 int32_t expected[] = {0x0};
621 int32_t dst[ARRAY_SIZE(expected)];
622 memset(dst, 0xFF, sizeof(dst));
625 EXPECT(memcmp(expected, dst, sizeof(expected)));
626 }
627
628 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF"
629 {
630 const char* src = "\xF7\xBF\xBF";
631 int32_t expected[] = {0x0};
632 int32_t dst[ARRAY_SIZE(expected)];
633 memset(dst, 0xFF, sizeof(dst));
636 EXPECT(memcmp(expected, dst, sizeof(expected)));
637 }
638
639 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF):
640 // "\xFB\xBF\xBF\xBF"
641 {
642 const char* src = "\xFB\xBF\xBF\xBF";
643 int32_t expected[] = {0x0};
644 int32_t dst[ARRAY_SIZE(expected)];
645 memset(dst, 0xFF, sizeof(dst));
648 EXPECT(memcmp(expected, dst, sizeof(expected)));
649 }
650
651 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF):
652 // "\xFD\xBF\xBF\xBF\xBF"
653 {
654 const char* src = "\xFD\xBF\xBF\xBF\xBF";
655 int32_t expected[] = {0x0};
656 int32_t dst[ARRAY_SIZE(expected)];
657 memset(dst, 0xFF, sizeof(dst));
660 EXPECT(memcmp(expected, dst, sizeof(expected)));
661 }
662
663 // 3.4 - Concatenation of incomplete sequences
664 {
665 const char* src =
666 "\xC0\xE0\x80\xF0\x80\x80"
667 "\xF8\x80\x80\x80\xFC\x80"
668 "\x80\x80\x80\xDF\xEF\xBF"
669 "\xF7\xBF\xBF\xFB\xBF\xBF"
670 "\xBF\xFD\xBF\xBF\xBF\xBF";
671 int32_t expected[] = {0x0};
672 int32_t dst[ARRAY_SIZE(expected)];
673 for (size_t i = 0; i < strlen(src); ++i) {
674 for (size_t j = 1; j < (strlen(src) - i); ++j) {
675 memset(dst, 0xFF, sizeof(dst));
676 bool is_valid =
679 EXPECT(memcmp(expected, dst, sizeof(expected)));
680 }
681 }
682 }
683
684 // 3.5 - Impossible bytes
685
686 // 3.5.1 - fe = "\xFE"
687 {
688 const char* src = "\xFE";
689 int32_t expected[] = {0xFE};
690 int32_t dst[ARRAY_SIZE(expected)];
691 memset(dst, 0, sizeof(dst));
694 EXPECT(memcmp(expected, dst, sizeof(expected)));
695 }
696
697 // 3.5.2 - ff = "\xFF"
698 {
699 const char* src = "\xFF";
700 int32_t expected[] = {0xFF};
701 int32_t dst[ARRAY_SIZE(expected)];
702 memset(dst, 0, sizeof(dst));
705 EXPECT(memcmp(expected, dst, sizeof(expected)));
706 }
707
708 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF"
709 {
710 const char* src = "\xFE\xFE\xFF\xFF";
711 int32_t expected[] = {0xFF};
712 int32_t dst[ARRAY_SIZE(expected)];
713 memset(dst, 0, sizeof(dst));
716 EXPECT(memcmp(expected, dst, sizeof(expected)));
717 }
718
719 // 4 - Overlong sequences
720
721 // 4.1 - Examples of an overlong ASCII character
722
723 // 4.1.1 - U+002F = c0 af = "\xC0\xAF"
724 {
725 const char* src = "\xC0\xAF";
726 int32_t expected[] = {0x2F};
727 int32_t dst[ARRAY_SIZE(expected)];
728 memset(dst, 0, sizeof(dst));
731 EXPECT(memcmp(expected, dst, sizeof(expected)));
732 }
733
734 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF"
735 {
736 const char* src = "\xE0\x80\xAF";
737 int32_t expected[] = {0x2F};
738 int32_t dst[ARRAY_SIZE(expected)];
739 memset(dst, 0, sizeof(dst));
742 EXPECT(memcmp(expected, dst, sizeof(expected)));
743 }
744
745 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF"
746 {
747 const char* src = "\xF0\x80\x80\xAF";
748 int32_t expected[] = {0x2F};
749 int32_t dst[ARRAY_SIZE(expected)];
750 memset(dst, 0, sizeof(dst));
753 EXPECT(memcmp(expected, dst, sizeof(expected)));
754 }
755
756 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF"
757 {
758 const char* src = "\xF8\x80\x80\x80\xAF";
759 int32_t expected[] = {0x2F};
760 int32_t dst[ARRAY_SIZE(expected)];
761 memset(dst, 0, sizeof(dst));
764 EXPECT(memcmp(expected, dst, sizeof(expected)));
765 }
766
767 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF"
768 {
769 const char* src = "\xFC\x80\x80\x80\x80\xAF";
770 int32_t expected[] = {0x2F};
771 int32_t dst[ARRAY_SIZE(expected)];
772 memset(dst, 0, sizeof(dst));
775 EXPECT(memcmp(expected, dst, sizeof(expected)));
776 }
777
778 // 4.2 Maximum overlong sequences
779
780 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF"
781 {
782 const char* src = "\xC1\xBF";
783 int32_t expected[] = {0x7F};
784 int32_t dst[ARRAY_SIZE(expected)];
785 memset(dst, 0, sizeof(dst));
788 EXPECT(memcmp(expected, dst, sizeof(expected)));
789 }
790
791 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF"
792 {
793 const char* src = "\xE0\x9F\xBF";
794 int32_t expected[] = {0x7FF};
795 int32_t dst[ARRAY_SIZE(expected)];
796 memset(dst, 0, sizeof(dst));
799 EXPECT(memcmp(expected, dst, sizeof(expected)));
800 }
801
802 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF"
803 {
804 const char* src = "\xF0\x8F\xBF\xBF";
805 int32_t expected[] = {0xFFFF};
806 int32_t dst[ARRAY_SIZE(expected)];
807 memset(dst, 0, sizeof(dst));
810 EXPECT(memcmp(expected, dst, sizeof(expected)));
811 }
812
813 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF"
814 {
815 const char* src = "\xF8\x87\xBF\xBF\xBF";
816 int32_t expected[] = {0x1FFFFF};
817 int32_t dst[ARRAY_SIZE(expected)];
818 memset(dst, 0, sizeof(dst));
821 EXPECT(memcmp(expected, dst, sizeof(expected)));
822 }
823
824 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF"
825 {
826 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF";
827 int32_t expected[] = {0x3FFFFFF};
828 int32_t dst[ARRAY_SIZE(expected)];
829 memset(dst, 0, sizeof(dst));
832 EXPECT(memcmp(expected, dst, sizeof(expected)));
833 }
834
835 // 4.3 - Overlong representation of the NUL character
836
837 // 4.3.1 - U+0000 = "\xC0\x80"
838 {
839 const char* src = "\xC0\x80";
840 int32_t expected[] = {0x0};
841 int32_t dst[ARRAY_SIZE(expected)];
842 memset(dst, 0xFF, sizeof(dst));
845 EXPECT(memcmp(expected, dst, sizeof(expected)));
846 }
847
848 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80"
849 {
850 const char* src = "\xE0\x80\x80";
851 int32_t expected[] = {0x0};
852 int32_t dst[ARRAY_SIZE(expected)];
853 memset(dst, 0xFF, sizeof(dst));
856 EXPECT(memcmp(expected, dst, sizeof(expected)));
857 }
858
859 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80"
860 {
861 const char* src = "\xF0\x80\x80\x80";
862 int32_t expected[] = {0x0};
863 int32_t dst[ARRAY_SIZE(expected)];
864 memset(dst, 0xFF, sizeof(dst));
867 EXPECT(memcmp(expected, dst, sizeof(expected)));
868 }
869
870 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80"
871 {
872 const char* src = "\xF8\x80\x80\x80\x80";
873 int32_t expected[] = {0x0};
874 int32_t dst[ARRAY_SIZE(expected)];
875 memset(dst, 0xFF, sizeof(dst));
878 EXPECT(memcmp(expected, dst, sizeof(expected)));
879 }
880
881 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80"
882 {
883 const char* src = "\xFC\x80\x80\x80\x80\x80";
884 int32_t expected[] = {0x0};
885 int32_t dst[ARRAY_SIZE(expected)];
886 memset(dst, 0xFF, sizeof(dst));
889 EXPECT(memcmp(expected, dst, sizeof(expected)));
890 }
891
892 // 5.1 - Single UTF-16 surrogates
893 // UTF-8 suggests single surrogates are invalid, but both JS and
894 // Dart allow them and make use of them.
895
896 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80"
897 {
898 const char* src = "\xED\xA0\x80";
899 int32_t expected[] = {0xD800};
900 int32_t dst[ARRAY_SIZE(expected)];
901 memset(dst, 0, sizeof(dst));
904 EXPECT(!memcmp(expected, dst, sizeof(expected)));
905 }
906
907 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF"
908 {
909 const char* src = "\xED\xAD\xBF";
910 int32_t expected[] = {0xDB7F};
911 int32_t dst[ARRAY_SIZE(expected)];
912 memset(dst, 0, sizeof(dst));
915 EXPECT(!memcmp(expected, dst, sizeof(expected)));
916 }
917
918 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80"
919 {
920 const char* src = "\xED\xAE\x80";
921 int32_t expected[] = {0xDB80};
922 int32_t dst[ARRAY_SIZE(expected)];
923 memset(dst, 0, sizeof(dst));
926 EXPECT(!memcmp(expected, dst, sizeof(expected)));
927 }
928
929 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF"
930 {
931 const char* src = "\xED\xAF\xBF";
932 int32_t expected[] = {0xDBFF};
933 int32_t dst[ARRAY_SIZE(expected)];
934 memset(dst, 0, sizeof(dst));
937 EXPECT(!memcmp(expected, dst, sizeof(expected)));
938 }
939
940 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80"
941 {
942 const char* src = "\xED\xB0\x80";
943 int32_t expected[] = {0xDC00};
944 int32_t dst[ARRAY_SIZE(expected)];
945 memset(dst, 0, sizeof(dst));
948 EXPECT(!memcmp(expected, dst, sizeof(expected)));
949 }
950
951 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80"
952 {
953 const char* src = "\xED\xBE\x80";
954 int32_t expected[] = {0xDF80};
955 int32_t dst[ARRAY_SIZE(expected)];
956 memset(dst, 0, sizeof(dst));
959 EXPECT(!memcmp(expected, dst, sizeof(expected)));
960 }
961
962 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF"
963 {
964 const char* src = "\xED\xBF\xBF";
965 int32_t expected[] = {0xDFFF};
966 int32_t dst[ARRAY_SIZE(expected)];
967 memset(dst, 0, sizeof(dst));
970 EXPECT(!memcmp(expected, dst, sizeof(expected)));
971 }
972
973 // 5.2 Paired UTF-16 surrogates
974 // Also not a valid string, but accepted in Dart, even if it doesn't make
975 // sense. e.g.
976 // var s = new String.fromCharCodes([0xd800, 0xDC00]);
977 // print(s.runes); // (65536) (0x10000)
978 // print(s.codeUnits); // [55296, 56320]
979
980 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80"
981 {
982 const char* src = "\xED\xA0\x80\xED\xB0\x80";
983 int32_t expected[] = {0xD800, 0xDC00};
984 int32_t dst[ARRAY_SIZE(expected)];
985 memset(dst, 0, sizeof(dst));
988 EXPECT(!memcmp(expected, dst, sizeof(expected)));
989 }
990
991 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF"
992 {
993 const char* src = "\xED\xA0\x80\xED\xBF\xBF";
994 int32_t expected[] = {0xD800, 0xDFFF};
995 int32_t dst[ARRAY_SIZE(expected)];
996 memset(dst, 0, sizeof(dst));
999 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1000 }
1001
1002 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80"
1003 {
1004 const char* src = "\xED\xAD\xBF\xED\xB0\x80";
1005 int32_t expected[] = {0xDB7F, 0xDC00};
1006 int32_t dst[ARRAY_SIZE(expected)];
1007 memset(dst, 0, sizeof(dst));
1010 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1011 }
1012
1013 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF"
1014 {
1015 const char* src = "\xED\xAD\xBF\xED\xBF\xBF";
1016 int32_t expected[] = {0xDB7F, 0xDFFF};
1017 int32_t dst[ARRAY_SIZE(expected)];
1018 memset(dst, 0, sizeof(dst));
1021 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1022 }
1023
1024 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80"
1025 {
1026 const char* src = "\xED\xAE\x80\xED\xB0\x80";
1027 int32_t expected[] = {0xDB80, 0xDC00};
1028 int32_t dst[ARRAY_SIZE(expected)];
1029 memset(dst, 0, sizeof(dst));
1032 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1033 }
1034
1035 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF"
1036 {
1037 const char* src = "\xED\xAE\x80\xED\xBF\xBF";
1038 int32_t expected[] = {0xDB80, 0xDFFF};
1039 int32_t dst[ARRAY_SIZE(expected)];
1040 memset(dst, 0, sizeof(dst));
1043 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1044 }
1045
1046 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80"
1047 {
1048 const char* src = "\xED\xAF\xBF\xED\xB0\x80";
1049 int32_t expected[] = {0xDBFF, 0xDC00};
1050 int32_t dst[ARRAY_SIZE(expected)];
1051 memset(dst, 0, sizeof(dst));
1054 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1055 }
1056
1057 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF"
1058 {
1059 const char* src = "\xED\xAF\xBF\xED\xBF\xBF";
1060 int32_t expected[] = {0xDBFF, 0xDFFF};
1061 int32_t dst[ARRAY_SIZE(expected)];
1062 memset(dst, 0, sizeof(dst));
1065 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1066 }
1067
1068 // 5.3 - Other illegal code positions
1069
1070 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE"
1071 {
1072 const char* src = "\xEF\xBF\xBE";
1073 int32_t expected[] = {0xFFFE};
1074 int32_t dst[ARRAY_SIZE(expected)];
1075 memset(dst, 0, sizeof(dst));
1078 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1079 }
1080
1081 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF"
1082 {
1083 const char* src = "\xEF\xBF\xBF";
1084 int32_t expected[] = {0xFFFF};
1085 int32_t dst[ARRAY_SIZE(expected)];
1086 memset(dst, 0, sizeof(dst));
1089 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1090 }
1091}
1092
1093} // namespace dart
SkPoint pos
static bool is_valid(SkISize dim)
#define EXPECT(type, expectedAlignment, expectedSize)
static Object & Handle()
Definition: object.h:407
static StringPtr FromUTF16(const uint16_t *utf16_array, intptr_t array_len, Heap::Space space=Heap::kNew)
Definition: object.cc:23739
static bool DecodeCStringToUTF32(const char *str, int32_t *dst, intptr_t len)
Definition: unicode.cc:266
static intptr_t Length(int32_t ch)
Definition: unicode.cc:98
static intptr_t ReportInvalidByte(const uint8_t *utf8_array, intptr_t array_len, intptr_t len)
Definition: unicode.cc:163
static intptr_t Encode(int32_t ch, char *dst)
Definition: unicode.cc:110
Definition: dart_vm.cc:33
ISOLATE_UNIT_TEST_CASE(StackAllocatedDestruction)
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace buffer
Definition: switches.h:126
dst
Definition: cp.py:12
#define ARRAY_SIZE(array)
Definition: globals.h:72