Flutter Engine
The Flutter Engine
Loading...
Searching...
No Matches
regexp_assembler_bytecode.cc
Go to the documentation of this file.
1// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
6
7#include "vm/exceptions.h"
8#include "vm/object_store.h"
9#include "vm/regexp.h"
10#include "vm/regexp_assembler.h"
12#include "vm/regexp_bytecodes.h"
14#include "vm/regexp_parser.h"
15#include "vm/timeline.h"
16
17namespace dart {
18
21 Zone* zone)
23 buffer_(buffer),
24 pc_(0),
25 advance_current_end_(kInvalidPC) {}
26
30
35
37 advance_current_end_ = kInvalidPC;
38 ASSERT(!l->is_bound());
39 if (l->is_linked()) {
40 intptr_t pos = l->pos();
41 while (pos != 0) {
42 intptr_t fixup = pos;
43 pos = *reinterpret_cast<int32_t*>(buffer_->data() + fixup);
44 *reinterpret_cast<uint32_t*>(buffer_->data() + fixup) = pc_;
45 }
46 }
47 l->BindTo(pc_);
48}
49
50void BytecodeRegExpMacroAssembler::EmitOrLink(BlockLabel* l) {
51 if (l == nullptr) l = &backtrack_;
52 if (l->is_bound()) {
53 Emit32(l->pos());
54 } else {
55 int pos = 0;
56 if (l->is_linked()) {
57 pos = l->pos();
58 }
59 l->LinkTo(pc_);
60 Emit32(pos);
61 }
62}
63
64void BytecodeRegExpMacroAssembler::PopRegister(intptr_t register_index) {
65 ASSERT(register_index >= 0);
66 ASSERT(register_index <= kMaxRegister);
67 Emit(BC_POP_REGISTER, register_index);
68}
69
70void BytecodeRegExpMacroAssembler::PushRegister(intptr_t register_index) {
71 ASSERT(register_index >= 0);
72 ASSERT(register_index <= kMaxRegister);
73 Emit(BC_PUSH_REGISTER, register_index);
74}
75
77 intptr_t register_index,
78 intptr_t cp_offset) {
79 ASSERT(register_index >= 0);
80 ASSERT(register_index <= kMaxRegister);
81 Emit(BC_SET_REGISTER_TO_CP, register_index);
82 Emit32(cp_offset); // Current position offset.
83}
84
86 intptr_t reg_to) {
87 ASSERT(reg_from <= reg_to);
88 for (int reg = reg_from; reg <= reg_to; reg++) {
89 SetRegister(reg, -1);
90 }
91}
92
94 intptr_t register_index) {
95 ASSERT(register_index >= 0);
96 ASSERT(register_index <= kMaxRegister);
97 Emit(BC_SET_CP_TO_REGISTER, register_index);
98}
99
101 intptr_t register_index) {
102 ASSERT(register_index >= 0);
103 ASSERT(register_index <= kMaxRegister);
104 Emit(BC_SET_REGISTER_TO_SP, register_index);
105}
106
108 intptr_t register_index) {
109 ASSERT(register_index >= 0);
110 ASSERT(register_index <= kMaxRegister);
111 Emit(BC_SET_SP_TO_REGISTER, register_index);
112}
113
115 ASSERT(Utils::IsUint(24, by));
116 Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
117}
118
119void BytecodeRegExpMacroAssembler::SetRegister(intptr_t register_index,
120 intptr_t to) {
121 ASSERT(register_index >= 0);
122 ASSERT(register_index <= kMaxRegister);
123 Emit(BC_SET_REGISTER, register_index);
124 Emit32(to);
125}
126
128 intptr_t by) {
129 ASSERT(register_index >= 0);
130 ASSERT(register_index <= kMaxRegister);
131 Emit(BC_ADVANCE_REGISTER, register_index);
132 Emit32(by);
133}
134
136 Emit(BC_POP_CP, 0);
137}
138
140 Emit(BC_PUSH_CP, 0);
141}
142
144 Emit(BC_POP_BT, 0);
145}
146
148 if (advance_current_end_ == pc_) {
149 // Combine advance current and goto.
150 pc_ = advance_current_start_;
151 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
152 EmitOrLink(l);
153 advance_current_end_ = kInvalidPC;
154 } else {
155 // Regular goto.
156 Emit(BC_GOTO, 0);
157 EmitOrLink(l);
158 }
159}
160
162 Emit(BC_PUSH_BT, 0);
163 EmitOrLink(l);
164}
165
167 Emit(BC_SUCCEED, 0);
168 return false; // Restart matching for global regexp not supported.
169}
170
172 Emit(BC_FAIL, 0);
173}
174
176 ASSERT(by >= kMinCPOffset);
177 ASSERT(by <= kMaxCPOffset);
178 advance_current_start_ = pc_;
179 advance_current_offset_ = by;
180 Emit(BC_ADVANCE_CP, by);
181 advance_current_end_ = pc_;
182}
183
185 BlockLabel* on_tos_equals_current_position) {
186 Emit(BC_CHECK_GREEDY, 0);
187 EmitOrLink(on_tos_equals_current_position);
188}
189
191 BlockLabel* on_failure,
192 bool check_bounds,
193 intptr_t characters) {
194 ASSERT(cp_offset >= kMinCPOffset);
195 ASSERT(cp_offset <= kMaxCPOffset);
196 int bytecode;
197 if (check_bounds) {
198 if (characters == 4) {
199 bytecode = BC_LOAD_4_CURRENT_CHARS;
200 } else if (characters == 2) {
201 bytecode = BC_LOAD_2_CURRENT_CHARS;
202 } else {
203 ASSERT(characters == 1);
204 bytecode = BC_LOAD_CURRENT_CHAR;
205 }
206 } else {
207 if (characters == 4) {
208 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
209 } else if (characters == 2) {
210 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
211 } else {
212 ASSERT(characters == 1);
213 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
214 }
215 }
216 Emit(bytecode, cp_offset);
217 if (check_bounds) EmitOrLink(on_failure);
218}
219
221 BlockLabel* on_less) {
222 Emit(BC_CHECK_LT, limit);
223 EmitOrLink(on_less);
224}
225
227 BlockLabel* on_greater) {
228 Emit(BC_CHECK_GT, limit);
229 EmitOrLink(on_greater);
230}
231
233 BlockLabel* on_equal) {
234 if (c > MAX_FIRST_ARG) {
235 Emit(BC_CHECK_4_CHARS, 0);
236 Emit32(c);
237 } else {
238 Emit(BC_CHECK_CHAR, c);
239 }
240 EmitOrLink(on_equal);
241}
242
244 Emit(BC_CHECK_AT_START, 0);
245 EmitOrLink(on_at_start);
246}
247
249 intptr_t cp_offset,
250 BlockLabel* on_not_at_start) {
251 Emit(BC_CHECK_NOT_AT_START, cp_offset);
252 EmitOrLink(on_not_at_start);
253}
254
256 BlockLabel* on_not_equal) {
257 if (c > MAX_FIRST_ARG) {
258 Emit(BC_CHECK_NOT_4_CHARS, 0);
259 Emit32(c);
260 } else {
261 Emit(BC_CHECK_NOT_CHAR, c);
262 }
263 EmitOrLink(on_not_equal);
264}
265
267 uint32_t c,
268 uint32_t mask,
269 BlockLabel* on_equal) {
270 if (c > MAX_FIRST_ARG) {
271 Emit(BC_AND_CHECK_4_CHARS, 0);
272 Emit32(c);
273 } else {
274 Emit(BC_AND_CHECK_CHAR, c);
275 }
276 Emit32(mask);
277 EmitOrLink(on_equal);
278}
279
281 uint32_t c,
282 uint32_t mask,
283 BlockLabel* on_not_equal) {
284 if (c > MAX_FIRST_ARG) {
285 Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
286 Emit32(c);
287 } else {
288 Emit(BC_AND_CHECK_NOT_CHAR, c);
289 }
290 Emit32(mask);
291 EmitOrLink(on_not_equal);
292}
293
295 uint16_t c,
296 uint16_t minus,
297 uint16_t mask,
298 BlockLabel* on_not_equal) {
299 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
300 Emit16(minus);
301 Emit16(mask);
302 EmitOrLink(on_not_equal);
303}
304
306 uint16_t from,
307 uint16_t to,
308 BlockLabel* on_in_range) {
309 Emit(BC_CHECK_CHAR_IN_RANGE, 0);
310 Emit16(from);
311 Emit16(to);
312 EmitOrLink(on_in_range);
313}
314
316 uint16_t from,
317 uint16_t to,
318 BlockLabel* on_not_in_range) {
319 Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
320 Emit16(from);
321 Emit16(to);
322 EmitOrLink(on_not_in_range);
323}
324
326 BlockLabel* on_bit_set) {
327 Emit(BC_CHECK_BIT_IN_TABLE, 0);
328 EmitOrLink(on_bit_set);
329 for (int i = 0; i < kTableSize; i += kBitsPerByte) {
330 int byte = 0;
331 for (int j = 0; j < kBitsPerByte; j++) {
332 if (table.GetUint8(i + j) != 0) byte |= 1 << j;
333 }
334 Emit8(byte);
335 }
336}
337
339 intptr_t start_reg,
340 bool read_backward,
341 BlockLabel* on_not_equal) {
342 ASSERT(start_reg >= 0);
343 ASSERT(start_reg <= kMaxRegister);
344 Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
345 start_reg);
346 EmitOrLink(on_not_equal);
347}
348
350 intptr_t start_reg,
351 bool read_backward,
352 bool unicode,
353 BlockLabel* on_not_equal) {
354 ASSERT(start_reg >= 0);
355 ASSERT(start_reg <= kMaxRegister);
356 Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
357 : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
358 : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
359 : BC_CHECK_NOT_BACK_REF_NO_CASE),
360 start_reg);
361 EmitOrLink(on_not_equal);
362}
363
365 intptr_t comparand,
366 BlockLabel* on_less_than) {
367 ASSERT(register_index >= 0);
368 ASSERT(register_index <= kMaxRegister);
369 Emit(BC_CHECK_REGISTER_LT, register_index);
370 Emit32(comparand);
371 EmitOrLink(on_less_than);
372}
373
375 intptr_t register_index,
376 intptr_t comparand,
377 BlockLabel* on_greater_or_equal) {
378 ASSERT(register_index >= 0);
379 ASSERT(register_index <= kMaxRegister);
380 Emit(BC_CHECK_REGISTER_GE, register_index);
381 Emit32(comparand);
382 EmitOrLink(on_greater_or_equal);
383}
384
386 BlockLabel* on_eq) {
387 ASSERT(register_index >= 0);
388 ASSERT(register_index <= kMaxRegister);
389 Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
390 EmitOrLink(on_eq);
391}
392
394 BindBlock(&backtrack_);
395 Emit(BC_POP_BT, 0);
396
397 intptr_t len = length();
398 const TypedData& bytecode =
399 TypedData::Handle(TypedData::New(kTypedDataUint8ArrayCid, len));
400
401 NoSafepointScope no_safepoint;
402 memmove(bytecode.DataAddr(0), buffer_->data(), len);
403
404 return bytecode.ptr();
405}
406
407intptr_t BytecodeRegExpMacroAssembler::length() {
408 return pc_;
409}
410
411void BytecodeRegExpMacroAssembler::Expand() {
412 // BOGUS
413 buffer_->Add(0);
414 buffer_->Add(0);
415 buffer_->Add(0);
416 buffer_->Add(0);
417 intptr_t x = buffer_->length();
418 for (intptr_t i = 0; i < x; i++)
419 buffer_->Add(0);
420}
421
422static intptr_t Prepare(const RegExp& regexp,
423 const String& subject,
424 bool sticky,
425 Zone* zone) {
426 bool is_one_byte = subject.IsOneByteString();
427
428 if (regexp.bytecode(is_one_byte, sticky) == TypedData::null()) {
429 const String& pattern = String::Handle(zone, regexp.pattern());
430#if defined(SUPPORT_TIMELINE)
431 TimelineBeginEndScope tbes(Thread::Current(), Timeline::GetCompilerStream(),
432 "CompileIrregexpBytecode");
433 if (tbes.enabled()) {
434 tbes.SetNumArguments(1);
435 tbes.CopyArgument(0, "pattern", pattern.ToCString());
436 }
437#endif // !defined(PRODUCT)
438
439 RegExpCompileData* compile_data = new (zone) RegExpCompileData();
440
441 // Parsing failures are handled in the RegExp factory constructor.
442 RegExpParser::ParseRegExp(pattern, regexp.flags(), compile_data);
443
444 regexp.set_num_bracket_expressions(compile_data->capture_count);
445 regexp.set_capture_name_map(compile_data->capture_name_map);
446 if (compile_data->simple) {
447 regexp.set_is_simple();
448 } else {
449 regexp.set_is_complex();
450 }
451
453 compile_data, regexp, is_one_byte, sticky, zone);
454 if (result.error_message != nullptr) {
456 }
457 ASSERT(result.bytecode != nullptr);
458 ASSERT(regexp.num_registers(is_one_byte) == -1 ||
459 regexp.num_registers(is_one_byte) == result.num_registers);
460 regexp.set_num_registers(is_one_byte, result.num_registers);
461 regexp.set_bytecode(is_one_byte, sticky, *(result.bytecode));
462 }
463
464 ASSERT(regexp.num_registers(is_one_byte) != -1);
465
466 return regexp.num_registers(is_one_byte) +
467 (regexp.num_bracket_expressions() + 1) * 2;
468}
469
470static ObjectPtr ExecRaw(const RegExp& regexp,
471 const String& subject,
472 int32_t index,
473 bool sticky,
474 int32_t* output,
475 intptr_t output_size,
476 Zone* zone) {
477 bool is_one_byte = subject.IsOneByteString();
478
479 // We must have done EnsureCompiledIrregexp, so we can get the number of
480 // registers.
481 int number_of_capture_registers = (regexp.num_bracket_expressions() + 1) * 2;
482 int32_t* raw_output = &output[number_of_capture_registers];
483
484 // We do not touch the actual capture result registers until we know there
485 // has been a match so that we can use those capture results to set the
486 // last match info.
487 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
488 raw_output[i] = -1;
489 }
490
491 const TypedData& bytecode =
492 TypedData::Handle(zone, regexp.bytecode(is_one_byte, sticky));
493 ASSERT(!bytecode.IsNull());
495 zone, IrregexpInterpreter::Match(bytecode, subject, raw_output, index));
496
497 if (result.ptr() == Bool::True().ptr()) {
498 // Copy capture results to the start of the registers array.
499 memmove(output, raw_output, number_of_capture_registers * sizeof(int32_t));
500 }
501 if (result.ptr() == Object::null()) {
502 // Exception during regexp processing
504 UNREACHABLE();
505 }
506 return result.ptr();
507}
508
510 const String& subject,
511 const Smi& start_index,
512 bool sticky,
513 Zone* zone) {
514 intptr_t required_registers = Prepare(regexp, subject, sticky, zone);
515 if (required_registers < 0) {
516 // Compiling failed with an exception.
517 UNREACHABLE();
518 }
519
520 // V8 uses a shared copy on the isolate when smaller than some threshold.
521 int32_t* output_registers = zone->Alloc<int32_t>(required_registers);
522
523 const Object& result =
524 Object::Handle(zone, ExecRaw(regexp, subject, start_index.Value(), sticky,
525 output_registers, required_registers, zone));
526 if (result.ptr() == Bool::True().ptr()) {
527 intptr_t capture_count = regexp.num_bracket_expressions();
528 intptr_t capture_register_count = (capture_count + 1) * 2;
529 ASSERT(required_registers >= capture_register_count);
530
532 TypedData::New(kTypedDataInt32ArrayCid, capture_register_count));
533 {
534#ifdef DEBUG
535 // These indices will be used with substring operations that don't check
536 // bounds, so sanity check them here.
537 for (intptr_t i = 0; i < capture_register_count; i++) {
538 int32_t val = output_registers[i];
539 ASSERT(val == -1 || (val >= 0 && val <= subject.Length()));
540 }
541#endif
542
543 NoSafepointScope no_safepoint;
544 memmove(result.DataAddr(0), output_registers,
545 capture_register_count * sizeof(int32_t));
546 }
547
548 return result.ptr();
549 }
550 if (result.ptr() == Object::null()) {
551 // internal exception
552 UNREACHABLE();
553 }
554 if (result.IsError()) {
556 UNREACHABLE();
557 }
558 ASSERT(result.ptr() == Bool::False().ptr());
559 return Instance::null();
560}
561
562} // namespace dart
SkPoint pos
void check_bounds(skiatest::Reporter *reporter, const SkPath &path)
SI F table(const skcms_Curve *curve, F v)
#define UNREACHABLE()
Definition assert.h:248
void Add(const T &value)
intptr_t length() const
Convenience wrapper around a BlockEntryInstr pointer.
bool is_bound() const
bool is_linked() const
void BindTo(intptr_t pos)
void LinkTo(intptr_t pos)
intptr_t pos() const
static const Bool & False()
Definition object.h:10778
static const Bool & True()
Definition object.h:10776
virtual void SetCurrentPositionFromEnd(intptr_t by)
virtual void CheckAtStart(BlockLabel *on_at_start)
virtual void CheckCharacterInRange(uint16_t from, uint16_t to, BlockLabel *on_in_range)
virtual void CheckBitInTable(const TypedData &table, BlockLabel *on_bit_set)
virtual IrregexpImplementation Implementation()
virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask, BlockLabel *on_equal)
BytecodeRegExpMacroAssembler(ZoneGrowableArray< uint8_t > *buffer, Zone *zone)
virtual void CheckCharacterLT(uint16_t limit, BlockLabel *on_less)
virtual void AdvanceRegister(intptr_t reg, intptr_t by)
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask, BlockLabel *on_not_equal)
virtual void ReadCurrentPositionFromRegister(intptr_t reg)
virtual void IfRegisterGE(intptr_t register_index, intptr_t comparand, BlockLabel *if_ge)
virtual void PopRegister(intptr_t register_index)
virtual void CheckNotCharacter(unsigned c, BlockLabel *on_not_equal)
virtual void WriteStackPointerToRegister(intptr_t reg)
virtual void ReadStackPointerFromRegister(intptr_t reg)
virtual void CheckCharacter(unsigned c, BlockLabel *on_equal)
virtual void IfRegisterEqPos(intptr_t register_index, BlockLabel *if_eq)
virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to)
virtual void CheckCharacterNotInRange(uint16_t from, uint16_t to, BlockLabel *on_not_in_range)
virtual void PushBacktrack(BlockLabel *label)
virtual void LoadCurrentCharacter(intptr_t cp_offset, BlockLabel *on_end_of_input, bool check_bounds=true, intptr_t characters=1)
virtual void WriteCurrentPositionToRegister(intptr_t reg, intptr_t cp_offset)
virtual void PushRegister(intptr_t register_index)
virtual void BindBlock(BlockLabel *label)
virtual void CheckNotCharacterAfterMinusAnd(uint16_t c, uint16_t minus, uint16_t mask, BlockLabel *on_not_equal)
virtual void IfRegisterLT(intptr_t register_index, intptr_t comparand, BlockLabel *if_lt)
virtual void SetRegister(intptr_t register_index, intptr_t to)
static ObjectPtr Interpret(const RegExp &regexp, const String &str, const Smi &start_index, bool is_sticky, Zone *zone)
virtual void CheckNotAtStart(intptr_t cp_offset, BlockLabel *on_not_at_start)
virtual void CheckGreedyLoop(BlockLabel *on_tos_equals_current_position)
virtual void CheckCharacterGT(uint16_t limit, BlockLabel *on_greater)
virtual void CheckNotBackReference(intptr_t start_reg, bool read_backward, BlockLabel *on_no_match)
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg, bool read_backward, bool unicode, BlockLabel *on_no_match)
static DART_NORETURN void ThrowStackOverflow()
static DART_NORETURN void ThrowUnsupportedError(const char *msg)
static DART_NORETURN void PropagateError(const Error &error)
static ObjectPtr Match(const TypedData &bytecode, const String &subject, int32_t *captures, int32_t start_position)
static ObjectPtr null()
Definition object.h:433
ObjectPtr ptr() const
Definition object.h:332
bool IsNull() const
Definition object.h:363
static Object & Handle()
Definition object.h:407
static CompilationResult CompileBytecode(RegExpCompileData *data, const RegExp &regexp, bool is_one_byte, bool sticky, Zone *zone)
Definition regexp.cc:5414
static constexpr intptr_t kMaxRegister
static constexpr intptr_t kMinCPOffset
static constexpr intptr_t kTableSize
static constexpr intptr_t kMaxCPOffset
static void ParseRegExp(const String &input, RegExpFlags regexp_flags, RegExpCompileData *result)
TypedDataPtr bytecode(bool is_one_byte, bool sticky) const
Definition object.h:12777
void set_is_simple() const
Definition object.h:12856
StringPtr pattern() const
Definition object.h:12771
void set_is_complex() const
Definition object.h:12857
void set_num_bracket_expressions(SmiPtr value) const
void set_capture_name_map(const Array &array) const
Definition object.cc:26737
intptr_t num_bracket_expressions() const
Definition object.h:12772
void set_bytecode(bool is_one_byte, bool sticky, const TypedData &bytecode) const
Definition object.cc:26715
RegExpFlags flags() const
Definition object.h:12865
intptr_t num_registers(bool is_one_byte) const
Definition object.h:12765
void set_num_registers(bool is_one_byte, intptr_t value) const
Definition object.h:12858
intptr_t Value() const
Definition object.h:9969
bool IsOneByteString() const
Definition object.h:10290
intptr_t Length() const
Definition object.h:10189
static const char * ToCString(Thread *thread, StringPtr ptr)
Definition object.cc:24205
static Thread * Current()
Definition thread.h:361
void * DataAddr(intptr_t byte_offset) const
Definition object.h:11545
static TypedDataPtr New(intptr_t class_id, intptr_t len, Heap::Space space=Heap::kNew)
Definition object.cc:25666
static bool IsUint(intptr_t N, T value)
Definition utils.h:313
ElementType * Alloc(intptr_t length)
#define ASSERT(E)
static const uint8_t buffer[]
GAsyncResult * result
double x
constexpr intptr_t kBitsPerByte
Definition globals.h:463
static ObjectPtr ExecRaw(const RegExp &regexp, const String &subject, int32_t index, bool sticky, int32_t *output, intptr_t output_size, Zone *zone)
static intptr_t Prepare(const RegExp &regexp, const String &subject, bool sticky, Zone *zone)
const unsigned int MAX_FIRST_ARG