Flutter Engine
The Flutter Engine
Classes | Static Public Member Functions | List of all members
dart::RegExpEngine Class Reference

#include <regexp.h>

Inheritance diagram for dart::RegExpEngine:
dart::AllStatic

Classes

struct  CompilationResult
 

Static Public Member Functions

static CompilationResult CompileIR (RegExpCompileData *input, const ParsedFunction *parsed_function, const ZoneGrowableArray< const ICData * > &ic_data_array, intptr_t osr_id)
 
static CompilationResult CompileBytecode (RegExpCompileData *data, const RegExp &regexp, bool is_one_byte, bool sticky, Zone *zone)
 
static RegExpPtr CreateRegExp (Thread *thread, const String &pattern, RegExpFlags flags)
 
static void DotPrint (const char *label, RegExpNode *node, bool ignore_case)
 

Detailed Description

Definition at line 1448 of file regexp.h.

Member Function Documentation

◆ CompileBytecode()

RegExpEngine::CompilationResult dart::RegExpEngine::CompileBytecode ( RegExpCompileData data,
const RegExp regexp,
bool  is_one_byte,
bool  sticky,
Zone zone 
)
static

Definition at line 5414 of file regexp.cc.

5419 {
5420 ASSERT(FLAG_interpret_irregexp);
5421 const String& pattern = String::Handle(zone, regexp.pattern());
5422
5423 ASSERT(!regexp.IsNull());
5424 ASSERT(!pattern.IsNull());
5425
5426 const bool is_global = regexp.flags().IsGlobal();
5427 const bool is_unicode = regexp.flags().IsUnicode();
5428
5429 RegExpCompiler compiler(data->capture_count, is_one_byte);
5430
5431 // TODO(zerny): Frequency sampling is currently disabled because of several
5432 // issues. We do not want to store subject strings in the regexp object since
5433 // they might be long and we should not prevent their garbage collection.
5434 // Passing them to this function explicitly does not help, since we must
5435 // generate exactly the same IR for both the unoptimizing and optimizing
5436 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
5437 // An option would be to store sampling results in the regexp object, but
5438 // I'm not sure the performance gains are relevant enough.
5439
5440 // Wrap the body of the regexp in capture #0.
5441 RegExpNode* captured_body =
5442 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept());
5443
5444 RegExpNode* node = captured_body;
5445 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5446 bool is_start_anchored = data->tree->IsAnchoredAtStart();
5447 intptr_t max_length = data->tree->max_match();
5448 if (!is_start_anchored && !is_sticky) {
5449 // Add a .*? at the beginning, outside the body capture, unless
5450 // this expression is anchored at the beginning.
5451 RegExpNode* loop_node = RegExpQuantifier::ToNode(
5452 0, RegExpTree::kInfinity, false,
5453 new (zone) RegExpCharacterClass('*', RegExpFlags()), &compiler,
5454 captured_body, data->contains_anchor);
5455
5456 if (data->contains_anchor) {
5457 // Unroll loop once, to take care of the case that might start
5458 // at the start of input.
5459 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
5460 first_step_node->AddAlternative(GuardedAlternative(captured_body));
5461 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
5462 new (zone) RegExpCharacterClass('*', RegExpFlags()),
5463 /*read_backwards=*/false, loop_node)));
5464 node = first_step_node;
5465 } else {
5466 node = loop_node;
5467 }
5468 }
5469 if (is_one_byte) {
5470 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
5471 // Do it again to propagate the new nodes to places where they were not
5472 // put because they had not been calculated yet.
5473 if (node != nullptr) {
5474 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
5475 }
5476 } else if (is_unicode && (is_global || is_sticky)) {
5477 node = OptionallyStepBackToLeadSurrogate(&compiler, node, regexp.flags());
5478 }
5479
5480 if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
5481 data->node = node;
5482 Analysis analysis(is_one_byte);
5483 analysis.EnsureAnalyzed(node);
5484 if (analysis.has_failed()) {
5485 const char* error_message = analysis.error_message();
5486 return CompilationResult(error_message);
5487 }
5488
5489 // Bytecode regexp implementation.
5490
5491 ZoneGrowableArray<uint8_t> buffer(zone, 1024);
5492 BytecodeRegExpMacroAssembler* macro_assembler =
5493 new (zone) BytecodeRegExpMacroAssembler(&buffer, zone);
5494
5495 // Inserted here, instead of in Assembler, because it depends on information
5496 // in the AST that isn't replicated in the Node structure.
5497 const intptr_t kMaxBacksearchLimit = 1024;
5498 if (is_end_anchored && !is_start_anchored && !is_sticky &&
5499 max_length < kMaxBacksearchLimit) {
5500 macro_assembler->SetCurrentPositionFromEnd(max_length);
5501 }
5502
5503 if (is_global) {
5505 if (data->tree->min_match() > 0) {
5507 } else if (is_unicode) {
5509 }
5510 macro_assembler->set_global_mode(mode);
5511 }
5512
5513 RegExpEngine::CompilationResult result =
5514 compiler.Assemble(macro_assembler, node, data->capture_count, pattern);
5515
5516 if (FLAG_trace_irregexp) {
5517 macro_assembler->PrintBlocks();
5518 }
5519
5520 return result;
5521}
static Object & Handle()
Definition: object.h:407
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: regexp.cc:4562
static constexpr intptr_t kMaxRecursion
Definition: regexp.cc:336
virtual RegExpNode * ToNode(RegExpCompiler *compiler, RegExpNode *on_success)
Definition: regexp.cc:4216
static constexpr intptr_t kInfinity
Definition: regexp_ast.h:39
#define ASSERT(E)
GAsyncResult * result
RegExpNode * OptionallyStepBackToLeadSurrogate(RegExpCompiler *compiler, RegExpNode *on_success, RegExpFlags flags)
Definition: regexp.cc:5263
static int8_t data[kExtLength]
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not defaults to or::depending on whether ipv6 is specified vm service A custom Dart VM Service port The default is to pick a randomly available open port disable vm Disable the Dart VM Service The Dart VM Service is never available in release mode disable vm service Disable mDNS Dart VM Service publication Bind to the IPv6 localhost address for the Dart VM Service Ignored if vm service host is set endless trace buffer
Definition: switches.h:126
it will be possible to load the file into Perfetto s trace viewer disable asset Prevents usage of any non test fonts unless they were explicitly Loaded via prefetched default font Indicates whether the embedding started a prefetch of the default font manager before creating the engine run In non interactive mode
Definition: switches.h:228
compiler
Definition: malisc.py:17

◆ CompileIR()

RegExpEngine::CompilationResult dart::RegExpEngine::CompileIR ( RegExpCompileData input,
const ParsedFunction parsed_function,
const ZoneGrowableArray< const ICData * > &  ic_data_array,
intptr_t  osr_id 
)
static

Definition at line 5298 of file regexp.cc.

5302 {
5303 ASSERT(!FLAG_interpret_irregexp);
5304 Zone* zone = Thread::Current()->zone();
5305
5306 const Function& function = parsed_function->function();
5307 const intptr_t specialization_cid = function.string_specialization_cid();
5308 const bool is_sticky = function.is_sticky_specialization();
5309 const bool is_one_byte = (specialization_cid == kOneByteStringCid);
5310 RegExp& regexp = RegExp::Handle(zone, function.regexp());
5311 const String& pattern = String::Handle(zone, regexp.pattern());
5312
5313 ASSERT(!regexp.IsNull());
5314 ASSERT(!pattern.IsNull());
5315
5316 const bool is_global = regexp.flags().IsGlobal();
5317 const bool is_unicode = regexp.flags().IsUnicode();
5318
5319 RegExpCompiler compiler(data->capture_count, is_one_byte);
5320
5321 // TODO(zerny): Frequency sampling is currently disabled because of several
5322 // issues. We do not want to store subject strings in the regexp object since
5323 // they might be long and we should not prevent their garbage collection.
5324 // Passing them to this function explicitly does not help, since we must
5325 // generate exactly the same IR for both the unoptimizing and optimizing
5326 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
5327 // An option would be to store sampling results in the regexp object, but
5328 // I'm not sure the performance gains are relevant enough.
5329
5330 // Wrap the body of the regexp in capture #0.
5331 RegExpNode* captured_body =
5332 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept());
5333
5334 RegExpNode* node = captured_body;
5335 const bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5336 const bool is_start_anchored = data->tree->IsAnchoredAtStart();
5337 intptr_t max_length = data->tree->max_match();
5338 if (!is_start_anchored && !is_sticky) {
5339 // Add a .*? at the beginning, outside the body capture, unless
5340 // this expression is anchored at the beginning or is sticky.
5341 RegExpNode* loop_node = RegExpQuantifier::ToNode(
5342 0, RegExpTree::kInfinity, false,
5343 new (zone) RegExpCharacterClass('*', RegExpFlags()), &compiler,
5344 captured_body, data->contains_anchor);
5345
5346 if (data->contains_anchor) {
5347 // Unroll loop once, to take care of the case that might start
5348 // at the start of input.
5349 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
5350 first_step_node->AddAlternative(GuardedAlternative(captured_body));
5351 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
5352 new (zone) RegExpCharacterClass('*', RegExpFlags()),
5353 /*read_backwards=*/false, loop_node)));
5354 node = first_step_node;
5355 } else {
5356 node = loop_node;
5357 }
5358 }
5359 if (is_one_byte) {
5360 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
5361 // Do it again to propagate the new nodes to places where they were not
5362 // put because they had not been calculated yet.
5363 if (node != nullptr) {
5364 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
5365 }
5366 } else if (is_unicode && (is_global || is_sticky)) {
5367 node = OptionallyStepBackToLeadSurrogate(&compiler, node, regexp.flags());
5368 }
5369
5370 if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
5371 data->node = node;
5372 Analysis analysis(is_one_byte);
5373 analysis.EnsureAnalyzed(node);
5374 if (analysis.has_failed()) {
5375 const char* error_message = analysis.error_message();
5376 return CompilationResult(error_message);
5377 }
5378
5379 // Native regexp implementation.
5380
5381 IRRegExpMacroAssembler* macro_assembler = new (zone)
5382 IRRegExpMacroAssembler(specialization_cid, data->capture_count,
5383 parsed_function, ic_data_array, osr_id, zone);
5384
5385 // Inserted here, instead of in Assembler, because it depends on information
5386 // in the AST that isn't replicated in the Node structure.
5387 const intptr_t kMaxBacksearchLimit = 1024;
5388 if (is_end_anchored && !is_start_anchored && !is_sticky &&
5389 max_length < kMaxBacksearchLimit) {
5390 macro_assembler->SetCurrentPositionFromEnd(max_length);
5391 }
5392
5393 if (is_global) {
5395 if (data->tree->min_match() > 0) {
5397 } else if (is_unicode) {
5399 }
5400 macro_assembler->set_global_mode(mode);
5401 }
5402
5403 RegExpEngine::CompilationResult result =
5404 compiler.Assemble(macro_assembler, node, data->capture_count, pattern);
5405
5406 if (FLAG_trace_irregexp) {
5407 macro_assembler->PrintBlocks();
5408 }
5409
5410 return result;
5411}
Zone * zone() const
Definition: thread_state.h:37
static Thread * Current()
Definition: thread.h:362
Dart_NativeFunction function
Definition: fuchsia.cc:51

◆ CreateRegExp()

RegExpPtr dart::RegExpEngine::CreateRegExp ( Thread thread,
const String pattern,
RegExpFlags  flags 
)
static

Definition at line 5573 of file regexp.cc.

5575 {
5576 Zone* zone = thread->zone();
5577 const RegExp& regexp = RegExp::Handle(RegExp::New(zone));
5578
5579 regexp.set_pattern(pattern);
5580 regexp.set_flags(flags);
5581
5582 // TODO(zerny): We might want to use normal string searching algorithms
5583 // for simple patterns.
5584 regexp.set_is_complex();
5585 regexp.set_is_global(); // All dart regexps are global.
5586
5587 if (!FLAG_interpret_irregexp) {
5588 const Library& lib = Library::Handle(zone, Library::CoreLibrary());
5589 const Class& owner =
5590 Class::Handle(zone, lib.LookupClass(Symbols::RegExp()));
5591
5592 for (intptr_t cid = kOneByteStringCid; cid <= kTwoByteStringCid; cid++) {
5593 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false,
5594 owner);
5595 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true,
5596 owner);
5597 }
5598 }
5599
5600 return regexp.ptr();
5601}
static LibraryPtr CoreLibrary()
Definition: object.cc:14787
static RegExpPtr New(Zone *zone, Heap::Space space=Heap::kNew)
Definition: object.cc:26662
FlutterSemanticsFlag flags
void CreateSpecializedFunction(Thread *thread, Zone *zone, const RegExp &regexp, intptr_t specialization_cid, bool sticky, const Object &owner)
Definition: regexp.cc:5523
const intptr_t cid

◆ DotPrint()

static void dart::RegExpEngine::DotPrint ( const char *  label,
RegExpNode node,
bool  ignore_case 
)
static

The documentation for this class was generated from the following files: