22DEFINE_FLAG(
bool, trace_regexp_bytecodes,
false,
"trace_regexp_bytecodes");
24 regexp_backtrack_stack_size_kb,
26 "Size of backtracking stack");
30template <
typename Char>
68 for (
int i = 0;
i <
len;
i++) {
69 unsigned int old_char = subject.
CharAt(from++);
70 unsigned int new_char = subject.
CharAt(current++);
71 if (old_char == new_char)
continue;
75 if (old_char != new_char)
return false;
77 if (!(old_char -
'a' <=
'z' -
'a') &&
78 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
86static void TraceInterpreter(
const uint8_t* code_base,
90 uint32_t current_char,
92 const char* bytecode_name) {
93 if (FLAG_trace_regexp_bytecodes) {
94 bool printable = (current_char < 127 && current_char >= 32);
97 ?
"pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s"
98 :
"pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
100 current_char, printable ? current_char :
'.', bytecode_name);
101 for (
int i = 0;
i < bytecode_length;
i++) {
105 for (
int i = 1;
i < bytecode_length;
i++) {
106 unsigned char b = pc[
i];
107 if (b < 127 && b >= 32) {
117#define BYTECODE(name) \
119 TraceInterpreter(code_base, pc, \
120 static_cast<int>(backtrack_sp - backtrack_stack_base), \
121 current, current_char, BC_##name##_LENGTH, #name);
123#define BYTECODE(name) case BC_##name:
127 ASSERT((
reinterpret_cast<intptr_t
>(pc) & 3) == 0);
128 return *
reinterpret_cast<const int32_t*
>(pc);
132 ASSERT((
reinterpret_cast<intptr_t
>(pc) & 1) == 0);
133 return *
reinterpret_cast<const uint16_t*
>(pc);
149 if (memory_ ==
nullptr) {
150 const bool executable =
false;
151 const bool compressed =
false;
155 size_in_bytes, executable, compressed,
"regexp-backtrack-stack"));
160 if (memory_ !=
nullptr) {
168 return reinterpret_cast<int32_t*
>(memory_->address());
171 intptr_t
max_size()
const {
return memory_->size() /
sizeof(int32_t); }
174 std::unique_ptr<VirtualMemory> memory_;
181template <
typename Char>
186 uint32_t current_char) {
195 int32_t* backtrack_stack_base = backtrack_stack.
data();
196 int32_t* backtrack_sp = backtrack_stack_base;
197 intptr_t backtrack_stack_space = backtrack_stack.
max_size();
203 intptr_t subject_length = subject.
Length();
206 if (FLAG_trace_regexp_bytecodes) {
211 const uint8_t* code_base;
215 code_base =
reinterpret_cast<uint8_t*
>(bytecode.
DataAddr(0));
219 if (
UNLIKELY(thread->HasScheduledInterrupts())) {
220 intptr_t pc_offset = pc - code_base;
221 ErrorPtr
error = thread->HandleInterrupts();
228 code_base =
reinterpret_cast<uint8_t*
>(bytecode.
DataAddr(0));
229 pc = code_base + pc_offset;
232 bool check_for_safepoint_now =
false;
233 while (!check_for_safepoint_now) {
240 if (--backtrack_stack_space < 0) {
243 *backtrack_sp++ = current;
244 pc += BC_PUSH_CP_LENGTH;
247 if (--backtrack_stack_space < 0) {
251 pc += BC_PUSH_BT_LENGTH;
254 if (--backtrack_stack_space < 0) {
258 pc += BC_PUSH_REGISTER_LENGTH;
262 pc += BC_SET_REGISTER_LENGTH;
266 pc += BC_ADVANCE_REGISTER_LENGTH;
270 pc += BC_SET_REGISTER_TO_CP_LENGTH;
274 pc += BC_SET_CP_TO_REGISTER_LENGTH;
278 static_cast<int>(backtrack_sp - backtrack_stack_base);
279 pc += BC_SET_REGISTER_TO_SP_LENGTH;
282 backtrack_sp = backtrack_stack_base + registers[insn >>
BYTECODE_SHIFT];
283 backtrack_stack_space =
285 static_cast<int>(backtrack_sp - backtrack_stack_base);
286 pc += BC_SET_SP_TO_REGISTER_LENGTH;
289 backtrack_stack_space++;
291 current = *backtrack_sp;
292 pc += BC_POP_CP_LENGTH;
295 backtrack_stack_space++;
297 pc = code_base + *backtrack_sp;
299 check_for_safepoint_now =
true;
302 backtrack_stack_space++;
305 pc += BC_POP_REGISTER_LENGTH;
313 pc += BC_ADVANCE_CP_LENGTH;
323 if (current == backtrack_sp[-1]) {
325 backtrack_stack_space++;
328 pc += BC_CHECK_GREEDY_LENGTH;
333 if (pos < 0 || pos >= subject_length) {
337 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
341 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
344 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
349 if (
pos + 2 > subject_length) {
355 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
359 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
364 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
368 ASSERT(
sizeof(Char) == 1);
370 if (
pos + 4 > subject_length) {
376 current_char = (subject.
CharAt(
pos) | (next1 << 8) | (next2 << 16) |
378 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
382 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
383 ASSERT(
sizeof(Char) == 1);
388 current_char = (subject.
CharAt(
pos) | (next1 << 8) | (next2 << 16) |
390 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
395 if (c == current_char) {
398 pc += BC_CHECK_4_CHARS_LENGTH;
404 if (c == current_char) {
407 pc += BC_CHECK_CHAR_LENGTH;
413 if (c != current_char) {
416 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
422 if (c != current_char) {
425 pc += BC_CHECK_NOT_CHAR_LENGTH;
434 pc += BC_AND_CHECK_4_CHARS_LENGTH;
443 pc += BC_AND_CHECK_CHAR_LENGTH;
452 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
461 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
465 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
469 if (c != ((current_char - minus) & mask)) {
472 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
479 if (from <= current_char && current_char <= to) {
482 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
489 if (from > current_char || current_char > to) {
492 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
500 if ((
b & (1 << bit)) != 0) {
503 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
509 if (current_char < limit) {
512 pc += BC_CHECK_LT_LENGTH;
518 if (current_char > limit) {
521 pc += BC_CHECK_GT_LENGTH;
529 pc += BC_CHECK_REGISTER_LT_LENGTH;
536 pc += BC_CHECK_REGISTER_GE_LENGTH;
543 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
549 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
557 if (from < 0 ||
len <= 0) {
558 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
561 if (current +
len > subject_length) {
566 for (
i = 0;
i <
len;
i++) {
575 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
578 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
580 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
582 (insn &
BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
585 if (from < 0 ||
len <= 0) {
586 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
589 if (current +
len > subject_length) {
593 if (BackRefMatchesNoCase<Char>(&canonicalize, from, current,
len,
596 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
603 BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
606 if (from < 0 ||
len <= 0) {
607 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
610 if ((current -
len) < 0) {
620 for (
i = 0;
i <
len;
i++) {
629 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
632 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
634 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
636 BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
639 if (from < 0 ||
len <= 0) {
640 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
647 if (BackRefMatchesNoCase<Char>(&canonicalize, from, current -
len,
650 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
661 pc += BC_CHECK_AT_START_LENGTH;
666 if (current + cp_offset == 0) {
667 pc += BC_CHECK_NOT_AT_START_LENGTH;
673 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
675 if (subject_length - current > by) {
676 current = subject_length - by;
677 current_char = subject.
CharAt(current - 1);
679 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
695 int32_t start_position) {
696 uint16_t previous_char =
'\n';
697 if (start_position != 0) {
698 previous_char = subject.
CharAt(start_position - 1);
702 return RawMatch<uint8_t>(bytecode, subject, registers, start_position,
705 return RawMatch<uint16_t>(bytecode, subject, registers, start_position,
static float next(float f)
intptr_t max_size() const
bool out_of_memory() const
static const Bool & False()
static const Bool & True()
static DART_NORETURN void ThrowOOM()
static ObjectPtr Match(const TypedData &bytecode, const String &subject, int32_t *captures, int32_t start_position)
static Isolate * Current()
void CacheRegexpBacktrackStack(std::unique_ptr< VirtualMemory > stack)
std::unique_ptr< VirtualMemory > TakeRegexpBacktrackStack()
static void static void PrintErr(const char *format,...) PRINTF_ATTRIBUTE(1
static constexpr intptr_t kTableMask
static SmiPtr New(intptr_t value)
bool IsOneByteString() const
bool IsTwoByteString() const
uint16_t CharAt(intptr_t index) const
static Thread * Current()
void * DataAddr(intptr_t byte_offset) const
static constexpr T RoundUp(T x, uintptr_t alignment, uintptr_t offset=0)
static intptr_t PageSize()
static VirtualMemory * Allocate(intptr_t size, bool is_executable, bool is_compressed, const char *name)
#define FAIL(name, result)
const uint8_t uint32_t uint32_t GError ** error
uint32_t uint32_t * format
constexpr intptr_t kBitsPerByteLog2
uword CaseInsensitiveCompareUTF16(uword str_raw, uword lhs_index_raw, uword rhs_index_raw, uword length_raw)
static ObjectPtr RawMatch(const TypedData &bytecode, const String &subject, int32_t *registers, int32_t current, uint32_t current_char)
static int32_t Load32Aligned(const uint8_t *pc)
bool BackRefMatchesNoCase< uint8_t >(Canonicalize *interp_canonicalize, intptr_t from, intptr_t current, intptr_t len, const String &subject, bool unicode)
constexpr intptr_t kBitsPerByte
unibrow::Mapping< unibrow::Ecma262Canonicalize > Canonicalize
static bool BackRefMatchesNoCase(Canonicalize *interp_canonicalize, intptr_t from, intptr_t current, intptr_t len, const String &subject, bool unicode)
DEFINE_FLAG(bool, print_cluster_information, false, "Print information about clusters written to snapshot")
uword CaseInsensitiveCompareUCS2(uword str_raw, uword lhs_index_raw, uword rhs_index_raw, uword length_raw)
static int32_t Load16Aligned(const uint8_t *pc)
bool BackRefMatchesNoCase< uint16_t >(Canonicalize *interp_canonicalize, intptr_t from, intptr_t current, intptr_t len, const String &subject, bool unicode)