Flutter Engine
The Flutter Engine
regexp_interpreter.cc
Go to the documentation of this file.
1// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5// A simple interpreter for the Irregexp byte code.
6
7#include <memory>
8#include <utility>
9
10#include "heap/safepoint.h"
12
13#include "platform/unicode.h"
14#include "vm/object.h"
15#include "vm/regexp_assembler.h"
16#include "vm/regexp_bytecodes.h"
17#include "vm/unibrow-inl.h"
18#include "vm/unibrow.h"
19
20namespace dart {
21
22DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes");
24 regexp_backtrack_stack_size_kb,
25 256,
26 "Size of backtracking stack");
27
29
30template <typename Char>
31static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
32 intptr_t from,
33 intptr_t current,
34 intptr_t len,
35 const String& subject,
36 bool unicode);
37
38template <>
40 intptr_t from,
41 intptr_t current,
42 intptr_t len,
43 const String& subject,
44 bool unicode) {
45 Bool& ret = Bool::Handle();
46 if (unicode) {
47 ret = static_cast<BoolPtr>(CaseInsensitiveCompareUTF16(
48 static_cast<uword>(subject.ptr()), static_cast<uword>(Smi::New(from)),
49 static_cast<uword>(Smi::New(current)),
50 static_cast<uword>(Smi::New(len))));
51 } else {
52 ret = static_cast<BoolPtr>(CaseInsensitiveCompareUCS2(
53 static_cast<uword>(subject.ptr()), static_cast<uword>(Smi::New(from)),
54 static_cast<uword>(Smi::New(current)),
55 static_cast<uword>(Smi::New(len))));
56 }
57 return ret.value();
58}
59
60template <>
62 intptr_t from,
63 intptr_t current,
64 intptr_t len,
65 const String& subject,
66 bool unicode) {
67 // For Latin1 characters the unicode flag makes no difference.
68 for (int i = 0; i < len; i++) {
69 unsigned int old_char = subject.CharAt(from++);
70 unsigned int new_char = subject.CharAt(current++);
71 if (old_char == new_char) continue;
72 // Convert both characters to lower case.
73 old_char |= 0x20;
74 new_char |= 0x20;
75 if (old_char != new_char) return false;
76 // Not letters in the ASCII range and Latin-1 range.
77 if (!(old_char - 'a' <= 'z' - 'a') &&
78 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
79 return false;
80 }
81 }
82 return true;
83}
84
85#ifdef DEBUG
86static void TraceInterpreter(const uint8_t* code_base,
87 const uint8_t* pc,
88 int stack_depth,
89 int current_position,
90 uint32_t current_char,
91 int bytecode_length,
92 const char* bytecode_name) {
93 if (FLAG_trace_regexp_bytecodes) {
94 bool printable = (current_char < 127 && current_char >= 32);
95 const char* format =
96 printable
97 ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s"
98 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
99 OS::PrintErr(format, pc - code_base, stack_depth, current_position,
100 current_char, printable ? current_char : '.', bytecode_name);
101 for (int i = 0; i < bytecode_length; i++) {
102 OS::PrintErr(", %02x", pc[i]);
103 }
104 OS::PrintErr(" ");
105 for (int i = 1; i < bytecode_length; i++) {
106 unsigned char b = pc[i];
107 if (b < 127 && b >= 32) {
108 OS::PrintErr("%c", b);
109 } else {
110 OS::PrintErr(".");
111 }
112 }
113 OS::PrintErr("\n");
114 }
115}
116
117#define BYTECODE(name) \
118 case BC_##name: \
119 TraceInterpreter(code_base, pc, \
120 static_cast<int>(backtrack_sp - backtrack_stack_base), \
121 current, current_char, BC_##name##_LENGTH, #name);
122#else
123#define BYTECODE(name) case BC_##name:
124#endif
125
126static int32_t Load32Aligned(const uint8_t* pc) {
127 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
128 return *reinterpret_cast<const int32_t*>(pc);
129}
130
131static int32_t Load16Aligned(const uint8_t* pc) {
132 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
133 return *reinterpret_cast<const uint16_t*>(pc);
134}
135
136// A simple abstraction over the backtracking stack used by the interpreter.
137// This backtracking stack does not grow automatically, but it ensures that the
138// the memory held by the stack is released or remembered in a cache if the
139// matching terminates.
141 public:
144 // Note: using malloc here has a potential of triggering jemalloc/tcmalloc
145 // bugs which cause application to leak memory and eventually OOM.
146 // See https://github.com/dart-lang/sdk/issues/38820 and
147 // https://github.com/flutter/flutter/issues/29007 for examples.
148 // So instead we directly ask OS to provide us memory.
149 if (memory_ == nullptr) {
150 const bool executable = false;
151 const bool compressed = false;
152 const intptr_t size_in_bytes = Utils::RoundUp(
153 FLAG_regexp_backtrack_stack_size_kb * KB, VirtualMemory::PageSize());
154 memory_ = std::unique_ptr<VirtualMemory>(VirtualMemory::Allocate(
155 size_in_bytes, executable, compressed, "regexp-backtrack-stack"));
156 }
157 }
158
160 if (memory_ != nullptr) {
161 Isolate::Current()->CacheRegexpBacktrackStack(std::move(memory_));
162 }
163 }
164
165 bool out_of_memory() const { return memory_ == nullptr; }
166
167 int32_t* data() const {
168 return reinterpret_cast<int32_t*>(memory_->address());
169 }
170
171 intptr_t max_size() const { return memory_->size() / sizeof(int32_t); }
172
173 private:
174 std::unique_ptr<VirtualMemory> memory_;
175
176 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
177};
178
179// Returns True if success, False if failure, Null if internal exception,
180// Error if VM error needs to be propagated up the callchain.
181template <typename Char>
182static ObjectPtr RawMatch(const TypedData& bytecode,
183 const String& subject,
184 int32_t* registers,
185 int32_t current,
186 uint32_t current_char) {
187 // BacktrackStack ensures that the memory allocated for the backtracking stack
188 // is returned to the system or cached if there is no stack being cached at
189 // the moment.
190 BacktrackStack backtrack_stack;
191 if (backtrack_stack.out_of_memory()) {
193 UNREACHABLE();
194 }
195 int32_t* backtrack_stack_base = backtrack_stack.data();
196 int32_t* backtrack_sp = backtrack_stack_base;
197 intptr_t backtrack_stack_space = backtrack_stack.max_size();
198
199 // TODO(zerny): Optimize as single instance. V8 has this as an
200 // isolate member.
202
203 intptr_t subject_length = subject.Length();
204
205#ifdef DEBUG
206 if (FLAG_trace_regexp_bytecodes) {
207 OS::PrintErr("Start irregexp bytecode interpreter\n");
208 }
209#endif
210 const auto thread = Thread::Current();
211 const uint8_t* code_base;
212 const uint8_t* pc;
213 {
214 NoSafepointScope no_safepoint;
215 code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0));
216 pc = code_base;
217 }
218 while (true) {
219 if (UNLIKELY(thread->HasScheduledInterrupts())) {
220 intptr_t pc_offset = pc - code_base;
221 ErrorPtr error = thread->HandleInterrupts();
222 if (error != Object::null()) {
223 // Needs to be propagated to the Dart native invoking the
224 // regex matcher.
225 return error;
226 }
227 NoSafepointScope no_safepoint;
228 code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0));
229 pc = code_base + pc_offset;
230 }
231 NoSafepointScope no_safepoint;
232 bool check_for_safepoint_now = false;
233 while (!check_for_safepoint_now) {
234 int32_t insn = Load32Aligned(pc);
235 switch (insn & BYTECODE_MASK) {
236 BYTECODE(BREAK)
237 UNREACHABLE();
238 return Bool::False().ptr();
239 BYTECODE(PUSH_CP)
240 if (--backtrack_stack_space < 0) {
241 return Object::null();
242 }
243 *backtrack_sp++ = current;
244 pc += BC_PUSH_CP_LENGTH;
245 break;
246 BYTECODE(PUSH_BT)
247 if (--backtrack_stack_space < 0) {
248 return Object::null();
249 }
250 *backtrack_sp++ = Load32Aligned(pc + 4);
251 pc += BC_PUSH_BT_LENGTH;
252 break;
253 BYTECODE(PUSH_REGISTER)
254 if (--backtrack_stack_space < 0) {
255 return Object::null();
256 }
257 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
258 pc += BC_PUSH_REGISTER_LENGTH;
259 break;
260 BYTECODE(SET_REGISTER)
261 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
262 pc += BC_SET_REGISTER_LENGTH;
263 break;
264 BYTECODE(ADVANCE_REGISTER)
265 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
266 pc += BC_ADVANCE_REGISTER_LENGTH;
267 break;
268 BYTECODE(SET_REGISTER_TO_CP)
269 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
270 pc += BC_SET_REGISTER_TO_CP_LENGTH;
271 break;
272 BYTECODE(SET_CP_TO_REGISTER)
273 current = registers[insn >> BYTECODE_SHIFT];
274 pc += BC_SET_CP_TO_REGISTER_LENGTH;
275 break;
276 BYTECODE(SET_REGISTER_TO_SP)
277 registers[insn >> BYTECODE_SHIFT] =
278 static_cast<int>(backtrack_sp - backtrack_stack_base);
279 pc += BC_SET_REGISTER_TO_SP_LENGTH;
280 break;
281 BYTECODE(SET_SP_TO_REGISTER)
282 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
283 backtrack_stack_space =
284 backtrack_stack.max_size() -
285 static_cast<int>(backtrack_sp - backtrack_stack_base);
286 pc += BC_SET_SP_TO_REGISTER_LENGTH;
287 break;
288 BYTECODE(POP_CP)
289 backtrack_stack_space++;
290 --backtrack_sp;
291 current = *backtrack_sp;
292 pc += BC_POP_CP_LENGTH;
293 break;
294 BYTECODE(POP_BT)
295 backtrack_stack_space++;
296 --backtrack_sp;
297 pc = code_base + *backtrack_sp;
298 // This should match check cadence in JIT irregexp implementation.
299 check_for_safepoint_now = true;
300 break;
301 BYTECODE(POP_REGISTER)
302 backtrack_stack_space++;
303 --backtrack_sp;
304 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
305 pc += BC_POP_REGISTER_LENGTH;
306 break;
308 return Bool::False().ptr();
309 BYTECODE(SUCCEED)
310 return Bool::True().ptr();
311 BYTECODE(ADVANCE_CP)
312 current += insn >> BYTECODE_SHIFT;
313 pc += BC_ADVANCE_CP_LENGTH;
314 break;
315 BYTECODE(GOTO)
316 pc = code_base + Load32Aligned(pc + 4);
317 break;
318 BYTECODE(ADVANCE_CP_AND_GOTO)
319 current += insn >> BYTECODE_SHIFT;
320 pc = code_base + Load32Aligned(pc + 4);
321 break;
322 BYTECODE(CHECK_GREEDY)
323 if (current == backtrack_sp[-1]) {
324 backtrack_sp--;
325 backtrack_stack_space++;
326 pc = code_base + Load32Aligned(pc + 4);
327 } else {
328 pc += BC_CHECK_GREEDY_LENGTH;
329 }
330 break;
331 BYTECODE(LOAD_CURRENT_CHAR) {
332 int pos = current + (insn >> BYTECODE_SHIFT);
333 if (pos < 0 || pos >= subject_length) {
334 pc = code_base + Load32Aligned(pc + 4);
335 } else {
336 current_char = subject.CharAt(pos);
337 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
338 }
339 break;
340 }
341 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
342 int pos = current + (insn >> BYTECODE_SHIFT);
343 current_char = subject.CharAt(pos);
344 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
345 break;
346 }
347 BYTECODE(LOAD_2_CURRENT_CHARS) {
348 int pos = current + (insn >> BYTECODE_SHIFT);
349 if (pos + 2 > subject_length) {
350 pc = code_base + Load32Aligned(pc + 4);
351 } else {
352 Char next = subject.CharAt(pos + 1);
353 current_char =
354 subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char)));
355 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
356 }
357 break;
358 }
359 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
360 int pos = current + (insn >> BYTECODE_SHIFT);
361 Char next = subject.CharAt(pos + 1);
362 current_char =
363 subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char)));
364 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
365 break;
366 }
367 BYTECODE(LOAD_4_CURRENT_CHARS) {
368 ASSERT(sizeof(Char) == 1);
369 int pos = current + (insn >> BYTECODE_SHIFT);
370 if (pos + 4 > subject_length) {
371 pc = code_base + Load32Aligned(pc + 4);
372 } else {
373 Char next1 = subject.CharAt(pos + 1);
374 Char next2 = subject.CharAt(pos + 2);
375 Char next3 = subject.CharAt(pos + 3);
376 current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) |
377 (next3 << 24));
378 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
379 }
380 break;
381 }
382 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
383 ASSERT(sizeof(Char) == 1);
384 int pos = current + (insn >> BYTECODE_SHIFT);
385 Char next1 = subject.CharAt(pos + 1);
386 Char next2 = subject.CharAt(pos + 2);
387 Char next3 = subject.CharAt(pos + 3);
388 current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) |
389 (next3 << 24));
390 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
391 break;
392 }
393 BYTECODE(CHECK_4_CHARS) {
394 uint32_t c = Load32Aligned(pc + 4);
395 if (c == current_char) {
396 pc = code_base + Load32Aligned(pc + 8);
397 } else {
398 pc += BC_CHECK_4_CHARS_LENGTH;
399 }
400 break;
401 }
402 BYTECODE(CHECK_CHAR) {
403 uint32_t c = (insn >> BYTECODE_SHIFT);
404 if (c == current_char) {
405 pc = code_base + Load32Aligned(pc + 4);
406 } else {
407 pc += BC_CHECK_CHAR_LENGTH;
408 }
409 break;
410 }
411 BYTECODE(CHECK_NOT_4_CHARS) {
412 uint32_t c = Load32Aligned(pc + 4);
413 if (c != current_char) {
414 pc = code_base + Load32Aligned(pc + 8);
415 } else {
416 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
417 }
418 break;
419 }
420 BYTECODE(CHECK_NOT_CHAR) {
421 uint32_t c = (insn >> BYTECODE_SHIFT);
422 if (c != current_char) {
423 pc = code_base + Load32Aligned(pc + 4);
424 } else {
425 pc += BC_CHECK_NOT_CHAR_LENGTH;
426 }
427 break;
428 }
429 BYTECODE(AND_CHECK_4_CHARS) {
430 uint32_t c = Load32Aligned(pc + 4);
431 if (c == (current_char & Load32Aligned(pc + 8))) {
432 pc = code_base + Load32Aligned(pc + 12);
433 } else {
434 pc += BC_AND_CHECK_4_CHARS_LENGTH;
435 }
436 break;
437 }
438 BYTECODE(AND_CHECK_CHAR) {
439 uint32_t c = (insn >> BYTECODE_SHIFT);
440 if (c == (current_char & Load32Aligned(pc + 4))) {
441 pc = code_base + Load32Aligned(pc + 8);
442 } else {
443 pc += BC_AND_CHECK_CHAR_LENGTH;
444 }
445 break;
446 }
447 BYTECODE(AND_CHECK_NOT_4_CHARS) {
448 uint32_t c = Load32Aligned(pc + 4);
449 if (c != (current_char & Load32Aligned(pc + 8))) {
450 pc = code_base + Load32Aligned(pc + 12);
451 } else {
452 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
453 }
454 break;
455 }
456 BYTECODE(AND_CHECK_NOT_CHAR) {
457 uint32_t c = (insn >> BYTECODE_SHIFT);
458 if (c != (current_char & Load32Aligned(pc + 4))) {
459 pc = code_base + Load32Aligned(pc + 8);
460 } else {
461 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
462 }
463 break;
464 }
465 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
466 uint32_t c = (insn >> BYTECODE_SHIFT);
467 uint32_t minus = Load16Aligned(pc + 4);
468 uint32_t mask = Load16Aligned(pc + 6);
469 if (c != ((current_char - minus) & mask)) {
470 pc = code_base + Load32Aligned(pc + 8);
471 } else {
472 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
473 }
474 break;
475 }
476 BYTECODE(CHECK_CHAR_IN_RANGE) {
477 uint32_t from = Load16Aligned(pc + 4);
478 uint32_t to = Load16Aligned(pc + 6);
479 if (from <= current_char && current_char <= to) {
480 pc = code_base + Load32Aligned(pc + 8);
481 } else {
482 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
483 }
484 break;
485 }
486 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
487 uint32_t from = Load16Aligned(pc + 4);
488 uint32_t to = Load16Aligned(pc + 6);
489 if (from > current_char || current_char > to) {
490 pc = code_base + Load32Aligned(pc + 8);
491 } else {
492 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
493 }
494 break;
495 }
496 BYTECODE(CHECK_BIT_IN_TABLE) {
498 uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
499 int bit = (current_char & (kBitsPerByte - 1));
500 if ((b & (1 << bit)) != 0) {
501 pc = code_base + Load32Aligned(pc + 4);
502 } else {
503 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
504 }
505 break;
506 }
507 BYTECODE(CHECK_LT) {
508 uint32_t limit = (insn >> BYTECODE_SHIFT);
509 if (current_char < limit) {
510 pc = code_base + Load32Aligned(pc + 4);
511 } else {
512 pc += BC_CHECK_LT_LENGTH;
513 }
514 break;
515 }
516 BYTECODE(CHECK_GT) {
517 uint32_t limit = (insn >> BYTECODE_SHIFT);
518 if (current_char > limit) {
519 pc = code_base + Load32Aligned(pc + 4);
520 } else {
521 pc += BC_CHECK_GT_LENGTH;
522 }
523 break;
524 }
525 BYTECODE(CHECK_REGISTER_LT)
526 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
527 pc = code_base + Load32Aligned(pc + 8);
528 } else {
529 pc += BC_CHECK_REGISTER_LT_LENGTH;
530 }
531 break;
532 BYTECODE(CHECK_REGISTER_GE)
533 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
534 pc = code_base + Load32Aligned(pc + 8);
535 } else {
536 pc += BC_CHECK_REGISTER_GE_LENGTH;
537 }
538 break;
539 BYTECODE(CHECK_REGISTER_EQ_POS)
540 if (registers[insn >> BYTECODE_SHIFT] == current) {
541 pc = code_base + Load32Aligned(pc + 4);
542 } else {
543 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
544 }
545 break;
546 BYTECODE(CHECK_NOT_REGS_EQUAL)
547 if (registers[insn >> BYTECODE_SHIFT] ==
548 registers[Load32Aligned(pc + 4)]) {
549 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
550 } else {
551 pc = code_base + Load32Aligned(pc + 8);
552 }
553 break;
554 BYTECODE(CHECK_NOT_BACK_REF) {
555 int from = registers[insn >> BYTECODE_SHIFT];
556 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
557 if (from < 0 || len <= 0) {
558 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
559 break;
560 }
561 if (current + len > subject_length) {
562 pc = code_base + Load32Aligned(pc + 4);
563 break;
564 } else {
565 int i;
566 for (i = 0; i < len; i++) {
567 if (subject.CharAt(from + i) != subject.CharAt(current + i)) {
568 pc = code_base + Load32Aligned(pc + 4);
569 break;
570 }
571 }
572 if (i < len) break;
573 current += len;
574 }
575 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
576 break;
577 }
578 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
580 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
581 const bool unicode =
582 (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
583 int from = registers[insn >> BYTECODE_SHIFT];
584 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
585 if (from < 0 || len <= 0) {
586 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
587 break;
588 }
589 if (current + len > subject_length) {
590 pc = code_base + Load32Aligned(pc + 4);
591 break;
592 } else {
593 if (BackRefMatchesNoCase<Char>(&canonicalize, from, current, len,
594 subject, unicode)) {
595 current += len;
596 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
597 } else {
598 pc = code_base + Load32Aligned(pc + 4);
599 }
600 }
601 break;
602 }
603 BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
604 const int from = registers[insn >> BYTECODE_SHIFT];
605 const int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
606 if (from < 0 || len <= 0) {
607 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
608 break;
609 }
610 if ((current - len) < 0) {
611 pc = code_base + Load32Aligned(pc + 4);
612 break;
613 } else {
614 // When looking behind, the string to match (if it is there) lies
615 // before the current position, so we will check the [len]
616 // characters before the current position, excluding the current
617 // position itself.
618 const int start = current - len;
619 int i;
620 for (i = 0; i < len; i++) {
621 if (subject.CharAt(from + i) != subject.CharAt(start + i)) {
622 pc = code_base + Load32Aligned(pc + 4);
623 break;
624 }
625 }
626 if (i < len) break;
627 current -= len;
628 }
629 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
630 break;
631 }
632 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
634 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
635 bool unicode = (insn & BYTECODE_MASK) ==
636 BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
637 int from = registers[insn >> BYTECODE_SHIFT];
638 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
639 if (from < 0 || len <= 0) {
640 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
641 break;
642 }
643 if (current < len) {
644 pc = code_base + Load32Aligned(pc + 4);
645 break;
646 } else {
647 if (BackRefMatchesNoCase<Char>(&canonicalize, from, current - len,
648 len, subject, unicode)) {
649 current -= len;
650 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
651 } else {
652 pc = code_base + Load32Aligned(pc + 4);
653 }
654 }
655 break;
656 }
657 BYTECODE(CHECK_AT_START)
658 if (current == 0) {
659 pc = code_base + Load32Aligned(pc + 4);
660 } else {
661 pc += BC_CHECK_AT_START_LENGTH;
662 }
663 break;
664 BYTECODE(CHECK_NOT_AT_START) {
665 const int32_t cp_offset = insn >> BYTECODE_SHIFT;
666 if (current + cp_offset == 0) {
667 pc += BC_CHECK_NOT_AT_START_LENGTH;
668 } else {
669 pc = code_base + Load32Aligned(pc + 4);
670 }
671 break;
672 }
673 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
674 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
675 if (subject_length - current > by) {
676 current = subject_length - by;
677 current_char = subject.CharAt(current - 1);
678 }
679 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
680 break;
681 }
682 default:
683 UNREACHABLE();
684 break;
685 }
686 }
687 }
688}
689
690// Returns True if success, False if failure, Null if internal exception,
691// Error if VM error needs to be propagated up the callchain.
693 const String& subject,
694 int32_t* registers,
695 int32_t start_position) {
696 uint16_t previous_char = '\n';
697 if (start_position != 0) {
698 previous_char = subject.CharAt(start_position - 1);
699 }
700
701 if (subject.IsOneByteString()) {
702 return RawMatch<uint8_t>(bytecode, subject, registers, start_position,
703 previous_char);
704 } else if (subject.IsTwoByteString()) {
705 return RawMatch<uint16_t>(bytecode, subject, registers, start_position,
706 previous_char);
707 } else {
708 UNREACHABLE();
709 return Bool::False().ptr();
710 }
711}
712
713} // namespace dart
SkPoint pos
static float next(float f)
#define UNREACHABLE()
Definition: assert.h:248
int32_t * data() const
intptr_t max_size() const
static const Bool & False()
Definition: object.h:10799
static const Bool & True()
Definition: object.h:10797
bool value() const
Definition: object.h:10791
static DART_NORETURN void ThrowOOM()
Definition: exceptions.cc:1066
static ObjectPtr Match(const TypedData &bytecode, const String &subject, int32_t *captures, int32_t start_position)
static Isolate * Current()
Definition: isolate.h:986
void CacheRegexpBacktrackStack(std::unique_ptr< VirtualMemory > stack)
Definition: isolate.h:1471
std::unique_ptr< VirtualMemory > TakeRegexpBacktrackStack()
Definition: isolate.h:1467
static void static void PrintErr(const char *format,...) PRINTF_ATTRIBUTE(1
static ObjectPtr null()
Definition: object.h:433
ObjectPtr ptr() const
Definition: object.h:332
static Object & Handle()
Definition: object.h:407
static constexpr intptr_t kTableMask
static SmiPtr New(intptr_t value)
Definition: object.h:10006
bool IsOneByteString() const
Definition: object.h:10311
intptr_t Length() const
Definition: object.h:10210
bool IsTwoByteString() const
Definition: object.h:10315
uint16_t CharAt(intptr_t index) const
Definition: object.h:10259
static Thread * Current()
Definition: thread.h:362
void * DataAddr(intptr_t byte_offset) const
Definition: object.h:11571
static constexpr T RoundUp(T x, uintptr_t alignment, uintptr_t offset=0)
Definition: utils.h:120
static intptr_t PageSize()
static VirtualMemory * Allocate(intptr_t size, bool is_executable, bool is_compressed, const char *name)
#define FAIL(name, result)
#define ASSERT(E)
static bool b
const uint8_t uint32_t uint32_t GError ** error
uint32_t uint32_t * format
Definition: dart_vm.cc:33
constexpr intptr_t kBitsPerByteLog2
Definition: globals.h:462
uword CaseInsensitiveCompareUTF16(uword str_raw, uword lhs_index_raw, uword rhs_index_raw, uword length_raw)
static ObjectPtr RawMatch(const TypedData &bytecode, const String &subject, int32_t *registers, int32_t current, uint32_t current_char)
static int32_t Load32Aligned(const uint8_t *pc)
bool BackRefMatchesNoCase< uint8_t >(Canonicalize *interp_canonicalize, intptr_t from, intptr_t current, intptr_t len, const String &subject, bool unicode)
const int BYTECODE_SHIFT
constexpr intptr_t kBitsPerByte
Definition: globals.h:463
unibrow::Mapping< unibrow::Ecma262Canonicalize > Canonicalize
constexpr intptr_t KB
Definition: globals.h:528
uintptr_t uword
Definition: globals.h:501
static bool BackRefMatchesNoCase(Canonicalize *interp_canonicalize, intptr_t from, intptr_t current, intptr_t len, const String &subject, bool unicode)
DEFINE_FLAG(bool, print_cluster_information, false, "Print information about clusters written to snapshot")
uword CaseInsensitiveCompareUCS2(uword str_raw, uword lhs_index_raw, uword rhs_index_raw, uword length_raw)
const int BYTECODE_MASK
static int32_t Load16Aligned(const uint8_t *pc)
bool BackRefMatchesNoCase< uint16_t >(Canonicalize *interp_canonicalize, intptr_t from, intptr_t current, intptr_t len, const String &subject, bool unicode)
#define FALL_THROUGH
Definition: globals.h:15
#define UNLIKELY(cond)
Definition: globals.h:261
#define BYTECODE(name)