d0/d97/regexp__parser_8cc_source.html

// Copyright (c) 2014, the Dart project authors.  Please see the AUTHORS file

// for details. All rights reserved. Use of this source code is governed by a

// BSD-style license that can be found in the LICENSE file.


#include "vm/regexp_parser.h"


#include "unicode/uchar.h"

#include "unicode/uniset.h"


#include "platform/unicode.h"


#include "vm/longjump.h"

#include "vm/object_store.h"

#include "vm/symbols.h"


namespace dart {


#define Z zone()


// Enables possessive quantifier syntax for testing.

static constexpr bool FLAG_regexp_possessive_quantifier = false;


RegExpBuilder::RegExpBuilder(RegExpFlags flags)

    : zone_(Thread::Current()->zone()),

      pending_empty_(false),

      flags_(flags),

      characters_(nullptr),

      pending_surrogate_(kNoPendingSurrogate),

      terms_(),

      text_(),

      alternatives_()

#ifdef DEBUG

      ,

      last_added_(ADD_NONE)

#endif

{

}


void RegExpBuilder::AddLeadSurrogate(uint16_t lead_surrogate) {

  ASSERT(Utf16::IsLeadSurrogate(lead_surrogate));

  FlushPendingSurrogate();

  // Hold onto the lead surrogate, waiting for a trail surrogate to follow.

  pending_surrogate_ = lead_surrogate;

}


void RegExpBuilder::AddTrailSurrogate(uint16_t trail_surrogate) {

  ASSERT(Utf16::IsTrailSurrogate(trail_surrogate));

  if (pending_surrogate_ != kNoPendingSurrogate) {

    uint16_t lead_surrogate = pending_surrogate_;

    pending_surrogate_ = kNoPendingSurrogate;

    ASSERT(Utf16::IsLeadSurrogate(lead_surrogate));

    uint32_t combined = Utf16::Decode(lead_surrogate, trail_surrogate);

    if (NeedsDesugaringForIgnoreCase(combined)) {

      AddCharacterClassForDesugaring(combined);

    } else {

      auto surrogate_pair = new (Z) ZoneGrowableArray<uint16_t>(2);

      surrogate_pair->Add(lead_surrogate);

      surrogate_pair->Add(trail_surrogate);

      RegExpAtom* atom = new (Z) RegExpAtom(surrogate_pair, flags_);

      AddAtom(atom);

    }

  } else {

    pending_surrogate_ = trail_surrogate;

    FlushPendingSurrogate();

  }

}


void RegExpBuilder::FlushPendingSurrogate() {

  if (pending_surrogate_ != kNoPendingSurrogate) {

    ASSERT(is_unicode());

    uint32_t c = pending_surrogate_;

    pending_surrogate_ = kNoPendingSurrogate;

    AddCharacterClassForDesugaring(c);

  }

}


void RegExpBuilder::FlushCharacters() {

  FlushPendingSurrogate();

  pending_empty_ = false;

  if (characters_ != nullptr) {

    RegExpTree* atom = new (Z) RegExpAtom(characters_, flags_);

    characters_ = nullptr;

    text_.Add(atom);

    LAST(ADD_ATOM);

  }

}


void RegExpBuilder::FlushText() {

  FlushCharacters();

  intptr_t num_text = text_.length();

  if (num_text == 0) {

    return;

  } else if (num_text == 1) {

    terms_.Add(text_.Last());

  } else {

    RegExpText* text = new (Z) RegExpText();

    for (intptr_t i = 0; i < num_text; i++)

      text_[i]->AppendToText(text);

    terms_.Add(text);

  }

  text_.Clear();

}


void RegExpBuilder::AddCharacter(uint16_t c) {

  FlushPendingSurrogate();

  pending_empty_ = false;

  if (NeedsDesugaringForIgnoreCase(c)) {

    AddCharacterClassForDesugaring(c);

  } else {

    if (characters_ == nullptr) {

      characters_ = new (Z) ZoneGrowableArray<uint16_t>(4);

    }

    characters_->Add(c);

    LAST(ADD_CHAR);

  }

}


void RegExpBuilder::AddUnicodeCharacter(uint32_t c) {

  if (c > static_cast<uint32_t>(Utf16::kMaxCodeUnit)) {

    ASSERT(is_unicode());

    uint16_t surrogates[2];

    Utf16::Encode(c, surrogates);

    AddLeadSurrogate(surrogates[0]);

    AddTrailSurrogate(surrogates[1]);

  } else if (is_unicode() && Utf16::IsLeadSurrogate(c)) {

    AddLeadSurrogate(c);

  } else if (is_unicode() && Utf16::IsTrailSurrogate(c)) {

    AddTrailSurrogate(c);

  } else {

    AddCharacter(static_cast<uint16_t>(c));

  }

}


void RegExpBuilder::AddEscapedUnicodeCharacter(uint32_t character) {

  // A lead or trail surrogate parsed via escape sequence will not

  // pair up with any preceding lead or following trail surrogate.

  FlushPendingSurrogate();

  AddUnicodeCharacter(character);

  FlushPendingSurrogate();

}


void RegExpBuilder::AddEmpty() {

  pending_empty_ = true;

}


void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {

  if (NeedsDesugaringForUnicode(cc)) {

    // With /u, character class needs to be desugared, so it

    // must be a standalone term instead of being part of a RegExpText.

    AddTerm(cc);

  } else {

    AddAtom(cc);

  }

}


void RegExpBuilder::AddCharacterClassForDesugaring(uint32_t c) {

  auto ranges = CharacterRange::List(Z, CharacterRange::Singleton(c));

  AddTerm(new (Z) RegExpCharacterClass(ranges, flags_));

}


void RegExpBuilder::AddAtom(RegExpTree* term) {

  if (term->IsEmpty()) {

    AddEmpty();

    return;

  }

  if (term->IsTextElement()) {

    FlushCharacters();

    text_.Add(term);

  } else {

    FlushText();

    terms_.Add(term);

  }

  LAST(ADD_ATOM);

}


void RegExpBuilder::AddTerm(RegExpTree* term) {

  FlushText();

  terms_.Add(term);

  LAST(ADD_ATOM);

}


void RegExpBuilder::AddAssertion(RegExpTree* assert) {

  FlushText();

  terms_.Add(assert);

  LAST(ADD_ASSERT);

}


void RegExpBuilder::NewAlternative() {

  FlushTerms();

}


void RegExpBuilder::FlushTerms() {

  FlushText();

  intptr_t num_terms = terms_.length();

  RegExpTree* alternative;

  if (num_terms == 0) {

    alternative = RegExpEmpty::GetInstance();

  } else if (num_terms == 1) {

    alternative = terms_.Last();

  } else {

    ZoneGrowableArray<RegExpTree*>* terms =

        new (Z) ZoneGrowableArray<RegExpTree*>();

    for (intptr_t i = 0; i < terms_.length(); i++) {

      terms->Add(terms_[i]);

    }

    alternative = new (Z) RegExpAlternative(terms);

  }

  alternatives_.Add(alternative);

  terms_.Clear();

  LAST(ADD_NONE);

}


bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {

  if (!is_unicode()) return false;

  // TODO(yangguo): we could be smarter than this. Case-insensitivity does not

  // necessarily mean that we need to desugar. It's probably nicer to have a

  // separate pass to figure out unicode desugarings.

  if (ignore_case()) return true;

  ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();

  CharacterRange::Canonicalize(ranges);


  if (cc->is_negated()) {

    auto negated_ranges =

        new (Z) ZoneGrowableArray<CharacterRange>(ranges->length());

    CharacterRange::Negate(ranges, negated_ranges);

    ranges = negated_ranges;

  }


  for (int i = ranges->length() - 1; i >= 0; i--) {

    uint32_t from = ranges->At(i).from();

    uint32_t to = ranges->At(i).to();

    // Check for non-BMP characters.

    if (to >= Utf16::kMaxCodeUnit) return true;

    // Check for lone surrogates.

    if (from <= Utf16::kTrailSurrogateEnd && to >= Utf16::kLeadSurrogateStart) {

      return true;

    }

  }

  return false;

}


bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uint32_t c) {

  if (is_unicode() && ignore_case()) {

    icu::UnicodeSet set(c, c);

    set.closeOver(USET_CASE_INSENSITIVE);

    set.removeAllStrings();

    return set.size() > 1;

  }

  return false;

}


RegExpTree* RegExpBuilder::ToRegExp() {

  FlushTerms();

  intptr_t num_alternatives = alternatives_.length();

  if (num_alternatives == 0) {

    return RegExpEmpty::GetInstance();

  }

  if (num_alternatives == 1) {

    return alternatives_.Last();

  }

  ZoneGrowableArray<RegExpTree*>* alternatives =

      new (Z) ZoneGrowableArray<RegExpTree*>();

  for (intptr_t i = 0; i < alternatives_.length(); i++) {

    alternatives->Add(alternatives_[i]);

  }

  return new (Z) RegExpDisjunction(alternatives);

}


bool RegExpBuilder::AddQuantifierToAtom(

    intptr_t min,

    intptr_t max,

    RegExpQuantifier::QuantifierType quantifier_type) {

  if (pending_empty_) {

    pending_empty_ = false;

    return true;

  }

  RegExpTree* atom;

  if (characters_ != nullptr) {

    DEBUG_ASSERT(last_added_ == ADD_CHAR);

    // Last atom was character.


    ZoneGrowableArray<uint16_t>* char_vector =

        new (Z) ZoneGrowableArray<uint16_t>();

    char_vector->AddArray(*characters_);

    intptr_t num_chars = char_vector->length();

    if (num_chars > 1) {

      ZoneGrowableArray<uint16_t>* prefix =

          new (Z) ZoneGrowableArray<uint16_t>();

      for (intptr_t i = 0; i < num_chars - 1; i++) {

        prefix->Add(char_vector->At(i));

      }

      text_.Add(new (Z) RegExpAtom(prefix, flags_));

      ZoneGrowableArray<uint16_t>* tail = new (Z) ZoneGrowableArray<uint16_t>();

      tail->Add(char_vector->At(num_chars - 1));

      char_vector = tail;

    }

    characters_ = nullptr;

    atom = new (Z) RegExpAtom(char_vector, flags_);

    FlushText();

  } else if (text_.length() > 0) {

    DEBUG_ASSERT(last_added_ == ADD_ATOM);

    atom = text_.RemoveLast();

    FlushText();

  } else if (terms_.length() > 0) {

    DEBUG_ASSERT(last_added_ == ADD_ATOM);

    atom = terms_.RemoveLast();

    if (auto lookaround = atom->AsLookaround()) {

      // With /u, lookarounds are not quantifiable.

      if (is_unicode()) return false;

      // Lookbehinds are not quantifiable.

      if (lookaround->type() == RegExpLookaround::LOOKBEHIND) {

        return false;

      }

    }

    if (atom->max_match() == 0) {

      // Guaranteed to only match an empty string.

      LAST(ADD_TERM);

      if (min == 0) {

        return true;

      }

      terms_.Add(atom);

      return true;

    }

  } else {

    // Only call immediately after adding an atom or character!

    UNREACHABLE();

  }

  terms_.Add(new (Z) RegExpQuantifier(min, max, quantifier_type, atom));

  LAST(ADD_TERM);

  return true;

}


// ----------------------------------------------------------------------------

// Implementation of Parser


RegExpParser::RegExpParser(const String& in, String* error, RegExpFlags flags)

    : zone_(Thread::Current()->zone()),

      captures_(nullptr),

      named_captures_(nullptr),

      named_back_references_(nullptr),

      in_(in),

      current_(kEndMarker),

      next_pos_(0),

      captures_started_(0),

      capture_count_(0),

      has_more_(true),

      top_level_flags_(flags),

      simple_(false),

      contains_anchor_(false),

      is_scanned_for_captures_(false),

      has_named_captures_(false) {

  Advance();

}


inline uint32_t RegExpParser::ReadNext(bool update_position) {

  intptr_t position = next_pos_;

  const uint16_t c0 = in().CharAt(position);

  uint32_t c = c0;

  position++;

  if (is_unicode() && position < in().Length() && Utf16::IsLeadSurrogate(c0)) {

    const uint16_t c1 = in().CharAt(position);

    if (Utf16::IsTrailSurrogate(c1)) {

      c = Utf16::Decode(c0, c1);

      position++;

    }

  }

  if (update_position) next_pos_ = position;

  return c;

}


uint32_t RegExpParser::Next() {

  if (has_next()) {

    return ReadNext(false);

  } else {

    return kEndMarker;

  }

}


void RegExpParser::Advance() {

  if (has_next()) {

    current_ = ReadNext(true);

  } else {

    current_ = kEndMarker;

    // Advance so that position() points to 1 after the last character. This is

    // important so that Reset() to this position works correctly.

    next_pos_ = in().Length() + 1;

    has_more_ = false;

  }

}


void RegExpParser::Reset(intptr_t pos) {

  next_pos_ = pos;

  has_more_ = (pos < in().Length());

  Advance();

}


void RegExpParser::Advance(intptr_t dist) {

  next_pos_ += dist - 1;

  Advance();

}


bool RegExpParser::simple() {

  return simple_;

}


bool RegExpParser::IsSyntaxCharacterOrSlash(uint32_t c) {

  switch (c) {

    case '^':

    case '$':

    case '\\':

    case '.':

    case '*':

    case '+':

    case '?':

    case '(':

    case ')':

    case '[':

    case ']':

    case '{':

    case '}':

    case '|':

    case '/':

      return true;

    default:

      break;

  }

  return false;

}


void RegExpParser::ReportError(const char* message) {

  // Zip to the end to make sure the no more input is read.

  current_ = kEndMarker;

  next_pos_ = in().Length();


  // Throw a FormatException on parsing failures.

  Array& args = Array::Handle();

  String& str = String::Handle();

  args ^= Array::New(3);

  str ^= String::New(message);

  args.SetAt(0, str);

  args.SetAt(1, Symbols::Blank());

  args.SetAt(2, in());

  str ^= String::ConcatAll(args);

  args ^= Array::New(1);

  args.SetAt(0, str);

  Exceptions::ThrowByType(Exceptions::kFormat, args);

  UNREACHABLE();

}


// Pattern ::

//   Disjunction

RegExpTree* RegExpParser::ParsePattern() {

  RegExpTree* result = ParseDisjunction();

  PatchNamedBackReferences();

  ASSERT(!has_more());

  // If the result of parsing is a literal string atom, and it has the

  // same length as the input, then the atom is identical to the input.

  if (result->IsAtom() && result->AsAtom()->length() == in().Length()) {

    simple_ = true;

  }

  return result;

}


// Used for error messages where we would have fallen back on treating an

// escape as the identity escape, but we are in Unicode mode.

static const char* kUnicodeIdentity =

    "Invalid identity escape in Unicode pattern";


// Disjunction ::

//   Alternative

//   Alternative | Disjunction

// Alternative ::

//   [empty]

//   Term Alternative

// Term ::

//   Assertion

//   Atom

//   Atom Quantifier

RegExpTree* RegExpParser::ParseDisjunction() {

  // Used to store current state while parsing subexpressions.

  RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,

                                  0, nullptr, top_level_flags_, Z);

  RegExpParserState* stored_state = &initial_state;

  // Cache the builder in a local variable for quick access.

  RegExpBuilder* builder = initial_state.builder();

  while (true) {

    switch (current()) {

      case kEndMarker:

        if (stored_state->IsSubexpression()) {

          // Inside a parenthesized group when hitting end of input.

          ReportError("Unterminated group");

          UNREACHABLE();

        }

        ASSERT(INITIAL == stored_state->group_type());

        // Parsing completed successfully.

        return builder->ToRegExp();

      case ')': {

        if (!stored_state->IsSubexpression()) {

          ReportError("Unmatched ')'");

          UNREACHABLE();

        }

        ASSERT(INITIAL != stored_state->group_type());


        Advance();

        // End disjunction parsing and convert builder content to new single

        // regexp atom.

        RegExpTree* body = builder->ToRegExp();


        intptr_t end_capture_index = captures_started();


        intptr_t capture_index = stored_state->capture_index();

        SubexpressionType group_type = stored_state->group_type();


        // Build result of subexpression.

        if (group_type == CAPTURE) {

          if (stored_state->IsNamedCapture()) {

            CreateNamedCaptureAtIndex(stored_state->capture_name(),

                                      capture_index);

          }

          RegExpCapture* capture = GetCapture(capture_index);

          capture->set_body(body);

          body = capture;

        } else if (group_type != GROUPING) {

          ASSERT(group_type == POSITIVE_LOOKAROUND ||

                 group_type == NEGATIVE_LOOKAROUND);

          bool is_positive = (group_type == POSITIVE_LOOKAROUND);

          body = new (Z) RegExpLookaround(

              body, is_positive, end_capture_index - capture_index,

              capture_index, stored_state->lookaround_type());

        }


        // Restore previous state.

        stored_state = stored_state->previous_state();

        builder = stored_state->builder();


        builder->AddAtom(body);

        // For compatibility with JSC and ES3, we allow quantifiers after

        // lookaheads, and break in all cases.

        break;

      }

      case '|': {

        Advance();

        builder->NewAlternative();

        continue;

      }

      case '*':

      case '+':

      case '?':

        ReportError("Nothing to repeat");

        UNREACHABLE();

      case '^': {

        Advance();

        if (builder->is_multi_line()) {

          builder->AddAssertion(new (Z) RegExpAssertion(

              RegExpAssertion::START_OF_LINE, builder->flags()));

        } else {

          builder->AddAssertion(new (Z) RegExpAssertion(

              RegExpAssertion::START_OF_INPUT, builder->flags()));

          set_contains_anchor();

        }

        continue;

      }

      case '$': {

        Advance();

        RegExpAssertion::AssertionType assertion_type =

            builder->is_multi_line() ? RegExpAssertion::END_OF_LINE

                                     : RegExpAssertion::END_OF_INPUT;

        builder->AddAssertion(

            new (Z) RegExpAssertion(assertion_type, builder->flags()));

        continue;

      }

      case '.': {

        Advance();

        auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);

        if (builder->is_dot_all()) {

          // Everything.

          CharacterRange::AddClassEscape(

              '*', ranges,

              /*add_unicode_case_equivalents=*/false);

        } else {

          // everything except \x0a, \x0d, \u2028 and \u2029

          CharacterRange::AddClassEscape(

              '.', ranges,

              /*add_unicode_case_equivalents=*/false);

        }

        RegExpCharacterClass* cc =

            new (Z) RegExpCharacterClass(ranges, builder->flags());

        builder->AddCharacterClass(cc);

        break;

      }

      case '(': {

        stored_state = ParseOpenParenthesis(stored_state);

        builder = stored_state->builder();

        continue;

      }

      case '[': {

        RegExpTree* atom = ParseCharacterClass(builder);

        builder->AddCharacterClass(atom->AsCharacterClass());

        break;

      }

      // Atom ::

      //   \ AtomEscape

      case '\\':

        switch (Next()) {

          case kEndMarker:

            ReportError("\\ at end of pattern");

            UNREACHABLE();

          case 'b':

            Advance(2);

            builder->AddAssertion(new (Z) RegExpAssertion(

                RegExpAssertion::BOUNDARY, builder->flags()));

            continue;

          case 'B':

            Advance(2);

            builder->AddAssertion(new (Z) RegExpAssertion(

                RegExpAssertion::NON_BOUNDARY, builder->flags()));

            continue;

          // AtomEscape ::

          //   CharacterClassEscape

          //

          // CharacterClassEscape :: one of

          //   d D s S w W

          case 'd':

          case 'D':

          case 's':

          case 'S':

          case 'w':

          case 'W': {

            uint32_t c = Next();

            Advance(2);

            auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);

            CharacterRange::AddClassEscape(

                c, ranges, is_unicode() && builder->ignore_case());

            RegExpCharacterClass* cc =

                new (Z) RegExpCharacterClass(ranges, builder->flags());

            builder->AddCharacterClass(cc);

            break;

          }

          case 'p':

          case 'P': {

            uint32_t p = Next();

            Advance(2);


            if (is_unicode()) {

              auto name_1 = new (Z) ZoneGrowableArray<char>();

              auto name_2 = new (Z) ZoneGrowableArray<char>();

              auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);

              if (ParsePropertyClassName(name_1, name_2)) {

                if (AddPropertyClassRange(ranges, p == 'P', name_1, name_2)) {

                  RegExpCharacterClass* cc =

                      new (Z) RegExpCharacterClass(ranges, builder->flags());

                  builder->AddCharacterClass(cc);

                  break;

                }

              }

              ReportError("Invalid property name");

              UNREACHABLE();

            } else {

              builder->AddCharacter(p);

            }

            break;

          }

          case '1':

          case '2':

          case '3':

          case '4':

          case '5':

          case '6':

          case '7':

          case '8':

          case '9': {

            intptr_t index = 0;

            if (ParseBackReferenceIndex(&index)) {

              if (stored_state->IsInsideCaptureGroup(index)) {

                // The back reference is inside the capture group it refers to.

                // Nothing can possibly have been captured yet, so we use empty

                // instead. This ensures that, when checking a back reference,

                // the capture registers of the referenced capture are either

                // both set or both cleared.

                builder->AddEmpty();

              } else {

                RegExpCapture* capture = GetCapture(index);

                RegExpTree* atom =

                    new (Z) RegExpBackReference(capture, builder->flags());

                builder->AddAtom(atom);

              }

              break;

            }

            // With /u, no identity escapes except for syntax characters are

            // allowed. Otherwise, all identity escapes are allowed.

            if (is_unicode()) {

              ReportError(kUnicodeIdentity);

              UNREACHABLE();

            }

            uint32_t first_digit = Next();

            if (first_digit == '8' || first_digit == '9') {

              builder->AddCharacter(first_digit);

              Advance(2);

              break;

            }

          }

            FALL_THROUGH;

          case '0': {

            Advance();

            if (is_unicode() && Next() >= '0' && Next() <= '9') {

              // With /u, decimal escape with leading 0 are not parsed as octal.

              ReportError("Invalid decimal escape");

              UNREACHABLE();

            }

            uint32_t octal = ParseOctalLiteral();

            builder->AddCharacter(octal);

            break;

          }

          // ControlEscape :: one of

          //   f n r t v

          case 'f':

            Advance(2);

            builder->AddCharacter('\f');

            break;

          case 'n':

            Advance(2);

            builder->AddCharacter('\n');

            break;

          case 'r':

            Advance(2);

            builder->AddCharacter('\r');

            break;

          case 't':

            Advance(2);

            builder->AddCharacter('\t');

            break;

          case 'v':

            Advance(2);

            builder->AddCharacter('\v');

            break;

          case 'c': {

            Advance();

            uint32_t controlLetter = Next();

            // Special case if it is an ASCII letter.

            // Convert lower case letters to uppercase.

            uint32_t letter = controlLetter & ~('a' ^ 'A');

            if (letter < 'A' || 'Z' < letter) {

              // controlLetter is not in range 'A'-'Z' or 'a'-'z'.

              // This is outside the specification. We match JSC in

              // reading the backslash as a literal character instead

              // of as starting an escape.

              if (is_unicode()) {

                // With /u, invalid escapes are not treated as identity escapes.

                ReportError(kUnicodeIdentity);

                UNREACHABLE();

              }

              builder->AddCharacter('\\');

            } else {

              Advance(2);

              builder->AddCharacter(controlLetter & 0x1f);

            }

            break;

          }

          case 'x': {

            Advance(2);

            uint32_t value;

            if (ParseHexEscape(2, &value)) {

              builder->AddCharacter(value);

            } else if (!is_unicode()) {

              builder->AddCharacter('x');

            } else {

              // With /u, invalid escapes are not treated as identity escapes.

              ReportError(kUnicodeIdentity);

              UNREACHABLE();

            }

            break;

          }

          case 'u': {

            Advance(2);

            uint32_t value;

            if (ParseUnicodeEscape(&value)) {

              builder->AddEscapedUnicodeCharacter(value);

            } else if (!is_unicode()) {

              builder->AddCharacter('u');

            } else {

              // With /u, invalid escapes are not treated as identity escapes.

              ReportError(kUnicodeIdentity);

              UNREACHABLE();

            }

            break;

          }

          case 'k':

            // Either an identity escape or a named back-reference.  The two

            // interpretations are mutually exclusive: '\k' is interpreted as

            // an identity escape for non-Unicode patterns without named

            // capture groups, and as the beginning of a named back-reference

            // in all other cases.

            if (is_unicode() || HasNamedCaptures()) {

              Advance(2);

              ParseNamedBackReference(builder, stored_state);

              break;

            }

            FALL_THROUGH;

          default:

            Advance();

            // With the unicode flag, no identity escapes except for syntax

            // characters are allowed. Otherwise, all identity escapes are

            // allowed.

            if (!is_unicode() || IsSyntaxCharacterOrSlash(current())) {

              builder->AddCharacter(current());

              Advance();

            } else {

              ReportError(kUnicodeIdentity);

              UNREACHABLE();

            }

            break;

        }

        break;

      case '{': {

        intptr_t dummy;

        if (ParseIntervalQuantifier(&dummy, &dummy)) {

          ReportError("Nothing to repeat");

          UNREACHABLE();

        }

      }

        FALL_THROUGH;

      case '}':

      case ']':

        if (is_unicode()) {

          ReportError("Lone quantifier brackets");

          UNREACHABLE();

        }

        FALL_THROUGH;

      default:

        builder->AddUnicodeCharacter(current());

        Advance();

        break;

    }  // end switch(current())


    intptr_t min;

    intptr_t max;

    switch (current()) {

      // QuantifierPrefix ::

      //   *

      //   +

      //   ?

      //   {

      case '*':

        min = 0;

        max = RegExpTree::kInfinity;

        Advance();

        break;

      case '+':

        min = 1;

        max = RegExpTree::kInfinity;

        Advance();

        break;

      case '?':

        min = 0;

        max = 1;

        Advance();

        break;

      case '{':

        if (ParseIntervalQuantifier(&min, &max)) {

          if (max < min) {

            ReportError("numbers out of order in {} quantifier.");

            UNREACHABLE();

          }

          break;

        } else {

          continue;

        }

      default:

        continue;

    }

    RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;

    if (current() == '?') {

      quantifier_type = RegExpQuantifier::NON_GREEDY;

      Advance();

    } else if (FLAG_regexp_possessive_quantifier && current() == '+') {

      // FLAG_regexp_possessive_quantifier is a debug-only flag.

      quantifier_type = RegExpQuantifier::POSSESSIVE;

      Advance();

    }

    if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {

      ReportError("invalid quantifier.");

      UNREACHABLE();

    }

  }

}


#ifdef DEBUG

// Currently only used in an ASSERT.

static bool IsSpecialClassEscape(uint32_t c) {

  switch (c) {

    case 'd':

    case 'D':

    case 's':

    case 'S':

    case 'w':

    case 'W':

      return true;

    default:

      return false;

  }

}

#endif


RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(

    RegExpParserState* state) {

  RegExpLookaround::Type lookaround_type = state->lookaround_type();

  bool is_named_capture = false;

  const RegExpCaptureName* capture_name = nullptr;

  SubexpressionType subexpr_type = CAPTURE;

  Advance();

  if (current() == '?') {

    switch (Next()) {

      case ':':

        Advance(2);

        subexpr_type = GROUPING;

        break;

      case '=':

        Advance(2);

        lookaround_type = RegExpLookaround::LOOKAHEAD;

        subexpr_type = POSITIVE_LOOKAROUND;

        break;

      case '!':

        Advance(2);

        lookaround_type = RegExpLookaround::LOOKAHEAD;

        subexpr_type = NEGATIVE_LOOKAROUND;

        break;

      case '<':

        Advance();

        if (Next() == '=') {

          Advance(2);

          lookaround_type = RegExpLookaround::LOOKBEHIND;

          subexpr_type = POSITIVE_LOOKAROUND;

          break;

        } else if (Next() == '!') {

          Advance(2);

          lookaround_type = RegExpLookaround::LOOKBEHIND;

          subexpr_type = NEGATIVE_LOOKAROUND;

          break;

        }

        is_named_capture = true;

        has_named_captures_ = true;

        Advance();

        break;

      default:

        ReportError("Invalid group");

        UNREACHABLE();

    }

  }


  if (subexpr_type == CAPTURE) {

    if (captures_started_ >= kMaxCaptures) {

      ReportError("Too many captures");

      UNREACHABLE();

    }

    captures_started_++;


    if (is_named_capture) {

      capture_name = ParseCaptureGroupName();

    }

  }

  // Store current state and begin new disjunction parsing.

  return new (Z)

      RegExpParserState(state, subexpr_type, lookaround_type, captures_started_,

                        capture_name, state->builder()->flags(), Z);

}


// In order to know whether an escape is a backreference or not we have to scan

// the entire regexp and find the number of capturing parentheses.  However we

// don't want to scan the regexp twice unless it is necessary.  This mini-parser

// is called when needed.  It can see the difference between capturing and

// noncapturing parentheses and can skip character classes and backslash-escaped

// characters.

void RegExpParser::ScanForCaptures() {

  ASSERT(!is_scanned_for_captures_);

  const intptr_t saved_position = position();

  // Start with captures started previous to current position

  intptr_t capture_count = captures_started();

  // Add count of captures after this position.

  uintptr_t n;

  while ((n = current()) != kEndMarker) {

    Advance();

    switch (n) {

      case '\\':

        Advance();

        break;

      case '[': {

        uintptr_t c;

        while ((c = current()) != kEndMarker) {

          Advance();

          if (c == '\\') {

            Advance();

          } else {

            if (c == ']') break;

          }

        }

        break;

      }

      case '(':

        // At this point we could be in

        // * a non-capturing group '(:',

        // * a lookbehind assertion '(?<=' '(?<!'

        // * or a named capture '(?<'.

        //

        // Of these, only named captures are capturing groups.

        if (current() == '?') {

          Advance();

          if (current() != '<') break;


          Advance();

          if (current() == '=' || current() == '!') break;


          // Found a possible named capture. It could turn out to be a syntax

          // error (e.g. an unterminated or invalid name), but that distinction

          // does not matter for our purposes.

          has_named_captures_ = true;

        }

        capture_count++;

        break;

    }

  }

  capture_count_ = capture_count;

  is_scanned_for_captures_ = true;

  Reset(saved_position);

}


bool RegExpParser::ParseBackReferenceIndex(intptr_t* index_out) {

  ASSERT('\\' == current());

  ASSERT('1' <= Next() && Next() <= '9');

  // Try to parse a decimal literal that is no greater than the total number

  // of left capturing parentheses in the input.

  intptr_t start = position();

  intptr_t value = Next() - '0';

  Advance(2);

  while (true) {

    uint32_t c = current();

    if (Utils::IsDecimalDigit(c)) {

      value = 10 * value + (c - '0');

      if (value > kMaxCaptures) {

        Reset(start);

        return false;

      }

      Advance();

    } else {

      break;

    }

  }

  if (value > captures_started()) {

    if (!is_scanned_for_captures_) ScanForCaptures();

    if (value > capture_count_) {

      Reset(start);

      return false;

    }

  }

  *index_out = value;

  return true;

}


namespace {


static inline constexpr bool IsAsciiIdentifierPart(uint32_t ch) {

  return Utils::IsAlphaNumeric(ch) || ch == '_' || ch == '$';

}


// ES#sec-names-and-keywords Names and Keywords

// UnicodeIDStart, '$', '_' and '\'

static bool IsIdentifierStartSlow(uint32_t c) {

  // cannot use u_isIDStart because it does not work for

  // Other_ID_Start characters.

  return u_hasBinaryProperty(c, UCHAR_ID_START) ||

         (c < 0x60 && (c == '$' || c == '\\' || c == '_'));

}


// ES#sec-names-and-keywords Names and Keywords

// UnicodeIDContinue, '$', '_', '\', ZWJ, and ZWNJ

static bool IsIdentifierPartSlow(uint32_t c) {

  const uint32_t kZeroWidthNonJoiner = 0x200C;

  const uint32_t kZeroWidthJoiner = 0x200D;

  // Can't use u_isIDPart because it does not work for

  // Other_ID_Continue characters.

  return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) ||

         (c < 0x60 && (c == '$' || c == '\\' || c == '_')) ||

         c == kZeroWidthNonJoiner || c == kZeroWidthJoiner;

}


static inline bool IsIdentifierStart(uint32_t c) {

  if (c > 127) return IsIdentifierStartSlow(c);

  return IsAsciiIdentifierPart(c) && !Utils::IsDecimalDigit(c);

}


static inline bool IsIdentifierPart(uint32_t c) {

  if (c > 127) return IsIdentifierPartSlow(c);

  return IsAsciiIdentifierPart(c);

}


static bool IsSameName(const RegExpCaptureName* name1,

                       const RegExpCaptureName* name2) {

  if (name1->length() != name2->length()) return false;

  for (intptr_t i = 0; i < name1->length(); i++) {

    if (name1->At(i) != name2->At(i)) return false;

  }

  return true;

}


}  // end namespace


static void PushCodeUnit(RegExpCaptureName* v, uint32_t code_unit) {

  if (code_unit <= Utf16::kMaxCodeUnit) {

    v->Add(code_unit);

  } else {

    uint16_t units[2];

    Utf16::Encode(code_unit, units);

    v->Add(units[0]);

    v->Add(units[1]);

  }

}


const RegExpCaptureName* RegExpParser::ParseCaptureGroupName() {

  auto name = new (Z) RegExpCaptureName();


  bool at_start = true;

  while (true) {

    uint32_t c = current();

    Advance();


    // Convert unicode escapes.

    if (c == '\\' && current() == 'u') {

      Advance();

      if (!ParseUnicodeEscape(&c)) {

        ReportError("Invalid Unicode escape sequence");

        UNREACHABLE();

      }

    }


    // The backslash char is misclassified as both ID_Start and ID_Continue.

    if (c == '\\') {

      ReportError("Invalid capture group name");

      UNREACHABLE();

    }


    if (at_start) {

      if (!IsIdentifierStart(c)) {

        ReportError("Invalid capture group name");

        UNREACHABLE();

      }

      PushCodeUnit(name, c);

      at_start = false;

    } else {

      if (c == '>') {

        break;

      } else if (IsIdentifierPart(c)) {

        PushCodeUnit(name, c);

      } else {

        ReportError("Invalid capture group name");

        UNREACHABLE();

      }

    }

  }


  return name;

}


intptr_t RegExpParser::GetNamedCaptureIndex(const RegExpCaptureName* name) {

  for (const auto& capture : *named_captures_) {

    if (IsSameName(name, capture->name())) return capture->index();

  }

  return -1;

}


void RegExpParser::CreateNamedCaptureAtIndex(const RegExpCaptureName* name,

                                             intptr_t index) {

  ASSERT(0 < index && index <= captures_started_);

  ASSERT(name != nullptr);


  if (named_captures_ == nullptr) {

    named_captures_ = new (Z) ZoneGrowableArray<RegExpCapture*>(1);

  } else {

    // Check for duplicates and bail if we find any. Currently O(n^2).

    if (GetNamedCaptureIndex(name) >= 0) {

      ReportError("Duplicate capture group name");

      UNREACHABLE();

    }

  }


  RegExpCapture* capture = GetCapture(index);

  ASSERT(capture->name() == nullptr);


  capture->set_name(name);

  named_captures_->Add(capture);

}


bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,

                                           RegExpParserState* state) {

  // The parser is assumed to be on the '<' in \k<name>.

  if (current() != '<') {

    ReportError("Invalid named reference");

    UNREACHABLE();

  }


  Advance();

  const RegExpCaptureName* name = ParseCaptureGroupName();

  if (name == nullptr) {

    return false;

  }


  if (state->IsInsideCaptureGroup(name)) {

    builder->AddEmpty();

  } else {

    RegExpBackReference* atom = new (Z) RegExpBackReference(builder->flags());

    atom->set_name(name);


    builder->AddAtom(atom);


    if (named_back_references_ == nullptr) {

      named_back_references_ =

          new (Z) ZoneGrowableArray<RegExpBackReference*>(1);

    }

    named_back_references_->Add(atom);

  }


  return true;

}


void RegExpParser::PatchNamedBackReferences() {

  if (named_back_references_ == nullptr) return;


  if (named_captures_ == nullptr) {

    ReportError("Invalid named capture referenced");

    return;

  }


  // Look up and patch the actual capture for each named back reference.

  // Currently O(n^2), optimize if necessary.

  for (intptr_t i = 0; i < named_back_references_->length(); i++) {

    RegExpBackReference* ref = named_back_references_->At(i);

    intptr_t index = GetNamedCaptureIndex(ref->name());


    if (index < 0) {

      ReportError("Invalid named capture referenced");

      UNREACHABLE();

    }

    ref->set_capture(GetCapture(index));

  }

}


RegExpCapture* RegExpParser::GetCapture(intptr_t index) {

  // The index for the capture groups are one-based. Its index in the list is

  // zero-based.

  const intptr_t know_captures =

      is_scanned_for_captures_ ? capture_count_ : captures_started_;

  ASSERT(index <= know_captures);

  if (captures_ == nullptr) {

    captures_ = new (Z) ZoneGrowableArray<RegExpCapture*>(know_captures);

  }

  while (captures_->length() < know_captures) {

    captures_->Add(new (Z) RegExpCapture(captures_->length() + 1));

  }

  return captures_->At(index - 1);

}


ArrayPtr RegExpParser::CreateCaptureNameMap() {

  if (named_captures_ == nullptr || named_captures_->is_empty()) {

    return Array::null();

  }


  const intptr_t len = named_captures_->length() * 2;


  const Array& array = Array::Handle(Array::New(len));


  auto& name = String::Handle();

  auto& smi = Smi::Handle();

  for (intptr_t i = 0; i < named_captures_->length(); i++) {

    RegExpCapture* capture = named_captures_->At(i);

    name =

        String::FromUTF16(capture->name()->data(), capture->name()->length());

    smi = Smi::New(capture->index());

    array.SetAt(i * 2, name);

    array.SetAt(i * 2 + 1, smi);

  }


  return array.ptr();

}


bool RegExpParser::HasNamedCaptures() {

  if (has_named_captures_ || is_scanned_for_captures_) {

    return has_named_captures_;

  }


  ScanForCaptures();

  ASSERT(is_scanned_for_captures_);

  return has_named_captures_;

}


bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(intptr_t index) {

  for (RegExpParserState* s = this; s != nullptr; s = s->previous_state()) {

    if (s->group_type() != CAPTURE) continue;

    // Return true if we found the matching capture index.

    if (index == s->capture_index()) return true;

    // Abort if index is larger than what has been parsed up till this state.

    if (index > s->capture_index()) return false;

  }

  return false;

}


bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(

    const RegExpCaptureName* name) {

  ASSERT(name != nullptr);

  for (RegExpParserState* s = this; s != nullptr; s = s->previous_state()) {

    if (s->capture_name() == nullptr) continue;

    if (IsSameName(s->capture_name(), name)) return true;

  }

  return false;

}


// QuantifierPrefix ::

//   { DecimalDigits }

//   { DecimalDigits , }

//   { DecimalDigits , DecimalDigits }

//

// Returns true if parsing succeeds, and set the min_out and max_out

// values. Values are truncated to RegExpTree::kInfinity if they overflow.

bool RegExpParser::ParseIntervalQuantifier(intptr_t* min_out,

                                           intptr_t* max_out) {

  ASSERT(current() == '{');

  intptr_t start = position();

  Advance();

  intptr_t min = 0;

  if (!Utils::IsDecimalDigit(current())) {

    Reset(start);

    return false;

  }

  while (Utils::IsDecimalDigit(current())) {

    intptr_t next = current() - '0';

    if (min > (RegExpTree::kInfinity - next) / 10) {

      // Overflow. Skip past remaining decimal digits and return -1.

      do {

        Advance();

      } while (Utils::IsDecimalDigit(current()));

      min = RegExpTree::kInfinity;

      break;

    }

    min = 10 * min + next;

    Advance();

  }

  intptr_t max = 0;

  if (current() == '}') {

    max = min;

    Advance();

  } else if (current() == ',') {

    Advance();

    if (current() == '}') {

      max = RegExpTree::kInfinity;

      Advance();

    } else {

      while (Utils::IsDecimalDigit(current())) {

        intptr_t next = current() - '0';

        if (max > (RegExpTree::kInfinity - next) / 10) {

          do {

            Advance();

          } while (Utils::IsDecimalDigit(current()));

          max = RegExpTree::kInfinity;

          break;

        }

        max = 10 * max + next;

        Advance();

      }

      if (current() != '}') {

        Reset(start);

        return false;

      }

      Advance();

    }

  } else {

    Reset(start);

    return false;

  }

  *min_out = min;

  *max_out = max;

  return true;

}


uint32_t RegExpParser::ParseOctalLiteral() {

  ASSERT(('0' <= current() && current() <= '7') || current() == kEndMarker);

  // For compatibility with some other browsers (not all), we parse

  // up to three octal digits with a value below 256.

  uint32_t value = current() - '0';

  Advance();

  if ('0' <= current() && current() <= '7') {

    value = value * 8 + current() - '0';

    Advance();

    if (value < 32 && '0' <= current() && current() <= '7') {

      value = value * 8 + current() - '0';

      Advance();

    }

  }

  return value;

}


// Returns the value (0 .. 15) of a hexadecimal character c.

// If c is not a legal hexadecimal character, returns a value < 0.

static inline intptr_t HexValue(uint32_t c) {

  c -= '0';

  if (static_cast<unsigned>(c) <= 9) return c;

  c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.

  if (static_cast<unsigned>(c) <= 5) return c + 10;

  return -1;

}


bool RegExpParser::ParseHexEscape(intptr_t length, uint32_t* value) {

  intptr_t start = position();

  uint32_t val = 0;

  bool done = false;

  for (intptr_t i = 0; !done; i++) {

    uint32_t c = current();

    intptr_t d = HexValue(c);

    if (d < 0) {

      Reset(start);

      return false;

    }

    val = val * 16 + d;

    Advance();

    if (i == length - 1) {

      done = true;

    }

  }

  *value = val;

  return true;

}


// This parses RegExpUnicodeEscapeSequence as described in ECMA262.

bool RegExpParser::ParseUnicodeEscape(uint32_t* value) {

  // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are

  // allowed). In the latter case, the number of hex digits between { } is

  // arbitrary. \ and u have already been read.

  if (current() == '{' && is_unicode()) {

    int start = position();

    Advance();

    if (ParseUnlimitedLengthHexNumber(Utf::kMaxCodePoint, value)) {

      if (current() == '}') {

        Advance();

        return true;

      }

    }

    Reset(start);

    return false;

  }

  // \u but no {, or \u{...} escapes not allowed.

  bool result = ParseHexEscape(4, value);

  if (result && is_unicode() && Utf16::IsLeadSurrogate(*value) &&

      current() == '\\') {

    // Attempt to read trail surrogate.

    int start = position();

    if (Next() == 'u') {

      Advance(2);

      uint32_t trail;

      if (ParseHexEscape(4, &trail) && Utf16::IsTrailSurrogate(trail)) {

        *value = Utf16::Decode(static_cast<uint16_t>(*value),

                               static_cast<uint16_t>(trail));

        return true;

      }

    }

    Reset(start);

  }

  return result;

}


namespace {


bool IsExactPropertyAlias(const char* property_name, UProperty property) {

  const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);

  if (short_name != nullptr && strcmp(property_name, short_name) == 0) {

    return true;

  }

  for (int i = 0;; i++) {

    const char* long_name = u_getPropertyName(

        property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));

    if (long_name == nullptr) break;

    if (strcmp(property_name, long_name) == 0) return true;

  }

  return false;

}


bool IsExactPropertyValueAlias(const char* property_value_name,

                               UProperty property,

                               int32_t property_value) {

  const char* short_name =

      u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);

  if (short_name != nullptr && strcmp(property_value_name, short_name) == 0) {

    return true;

  }

  for (int i = 0;; i++) {

    const char* long_name = u_getPropertyValueName(

        property, property_value,

        static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));

    if (long_name == nullptr) break;

    if (strcmp(property_value_name, long_name) == 0) return true;

  }

  return false;

}


bool LookupPropertyValueName(UProperty property,

                             const char* property_value_name,

                             bool negate,

                             ZoneGrowableArray<CharacterRange>* result) {

  UProperty property_for_lookup = property;

  if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {

    // For the property Script_Extensions, we have to do the property value

    // name lookup as if the property is Script.

    property_for_lookup = UCHAR_SCRIPT;

  }

  int32_t property_value =

      u_getPropertyValueEnum(property_for_lookup, property_value_name);

  if (property_value == UCHAR_INVALID_CODE) return false;


  // We require the property name to match exactly to one of the property value

  // aliases. However, u_getPropertyValueEnum uses loose matching.

  if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup,

                                 property_value)) {

    return false;

  }


  UErrorCode ec = U_ZERO_ERROR;

  icu::UnicodeSet set;

  set.applyIntPropertyValue(property, property_value, ec);

  bool success = ec == U_ZERO_ERROR && (set.isEmpty() == 0);


  if (success) {

    set.removeAllStrings();

    if (negate) set.complement();

    for (int i = 0; i < set.getRangeCount(); i++) {

      result->Add(

          CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)));

    }

  }

  return success;

}


template <size_t N>

inline bool NameEquals(const char* name, const char (&literal)[N]) {

  return strncmp(name, literal, N + 1) == 0;

}


bool LookupSpecialPropertyValueName(const char* name,

                                    ZoneGrowableArray<CharacterRange>* result,

                                    bool negate) {

  if (NameEquals(name, "Any")) {

    if (negate) {

      // Leave the list of character ranges empty, since the negation of 'Any'

      // is the empty set.

    } else {

      result->Add(CharacterRange::Everything());

    }

  } else if (NameEquals(name, "ASCII")) {

    result->Add(negate ? CharacterRange::Range(0x80, Utf::kMaxCodePoint)

                       : CharacterRange::Range(0x0, 0x7F));

  } else if (NameEquals(name, "Assigned")) {

    return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",

                                   !negate, result);

  } else {

    return false;

  }

  return true;

}


// Explicitly list supported binary properties. The spec forbids supporting

// properties outside of this set to ensure interoperability.

bool IsSupportedBinaryProperty(UProperty property) {

  switch (property) {

    case UCHAR_ALPHABETIC:

    // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName.

    // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName.

    case UCHAR_ASCII_HEX_DIGIT:

    // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName.

    case UCHAR_BIDI_CONTROL:

    case UCHAR_BIDI_MIRRORED:

    case UCHAR_CASE_IGNORABLE:

    case UCHAR_CASED:

    case UCHAR_CHANGES_WHEN_CASEFOLDED:

    case UCHAR_CHANGES_WHEN_CASEMAPPED:

    case UCHAR_CHANGES_WHEN_LOWERCASED:

    case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:

    case UCHAR_CHANGES_WHEN_TITLECASED:

    case UCHAR_CHANGES_WHEN_UPPERCASED:

    case UCHAR_DASH:

    case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:

    case UCHAR_DEPRECATED:

    case UCHAR_DIACRITIC:

    case UCHAR_EMOJI:

    case UCHAR_EMOJI_COMPONENT:

    case UCHAR_EMOJI_MODIFIER_BASE:

    case UCHAR_EMOJI_MODIFIER:

    case UCHAR_EMOJI_PRESENTATION:

    case UCHAR_EXTENDED_PICTOGRAPHIC:

    case UCHAR_EXTENDER:

    case UCHAR_GRAPHEME_BASE:

    case UCHAR_GRAPHEME_EXTEND:

    case UCHAR_HEX_DIGIT:

    case UCHAR_ID_CONTINUE:

    case UCHAR_ID_START:

    case UCHAR_IDEOGRAPHIC:

    case UCHAR_IDS_BINARY_OPERATOR:

    case UCHAR_IDS_TRINARY_OPERATOR:

    case UCHAR_JOIN_CONTROL:

    case UCHAR_LOGICAL_ORDER_EXCEPTION:

    case UCHAR_LOWERCASE:

    case UCHAR_MATH:

    case UCHAR_NONCHARACTER_CODE_POINT:

    case UCHAR_PATTERN_SYNTAX:

    case UCHAR_PATTERN_WHITE_SPACE:

    case UCHAR_QUOTATION_MARK:

    case UCHAR_RADICAL:

    case UCHAR_REGIONAL_INDICATOR:

    case UCHAR_S_TERM:

    case UCHAR_SOFT_DOTTED:

    case UCHAR_TERMINAL_PUNCTUATION:

    case UCHAR_UNIFIED_IDEOGRAPH:

    case UCHAR_UPPERCASE:

    case UCHAR_VARIATION_SELECTOR:

    case UCHAR_WHITE_SPACE:

    case UCHAR_XID_CONTINUE:

    case UCHAR_XID_START:

      return true;

    default:

      break;

  }

  return false;

}


bool IsUnicodePropertyValueCharacter(char c) {

  // https://tc39.github.io/proposal-regexp-unicode-property-escapes/

  //

  // Note that using this to validate each parsed char is quite conservative.

  // A possible alternative solution would be to only ensure the parsed

  // property name/value candidate string does not contain '\0' characters and

  // let ICU lookups trigger the final failure.

  if (Utils::IsAlphaNumeric(c)) return true;

  return (c == '_');

}


}  // anonymous namespace


bool RegExpParser::ParsePropertyClassName(ZoneGrowableArray<char>* name_1,

                                          ZoneGrowableArray<char>* name_2) {

  ASSERT(name_1->is_empty());

  ASSERT(name_2->is_empty());

  // Parse the property class as follows:

  // - In \p{name}, 'name' is interpreted

  //   - either as a general category property value name.

  //   - or as a binary property name.

  // - In \p{name=value}, 'name' is interpreted as an enumerated property name,

  //   and 'value' is interpreted as one of the available property value names.

  // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.

  // - Loose matching is not applied.

  if (current() == '{') {

    // Parse \p{[PropertyName=]PropertyNameValue}

    for (Advance(); current() != '}' && current() != '='; Advance()) {

      if (!IsUnicodePropertyValueCharacter(current())) return false;

      if (!has_next()) return false;

      name_1->Add(static_cast<char>(current()));

    }

    if (current() == '=') {

      for (Advance(); current() != '}'; Advance()) {

        if (!IsUnicodePropertyValueCharacter(current())) return false;

        if (!has_next()) return false;

        name_2->Add(static_cast<char>(current()));

      }

      name_2->Add(0);  // null-terminate string.

    }

  } else {

    return false;

  }

  Advance();

  name_1->Add(0);  // null-terminate string.


  ASSERT(static_cast<size_t>(name_1->length() - 1) == strlen(name_1->data()));

  ASSERT(name_2->is_empty() ||

         static_cast<size_t>(name_2->length() - 1) == strlen(name_2->data()));

  return true;

}


bool RegExpParser::AddPropertyClassRange(

    ZoneGrowableArray<CharacterRange>* add_to,

    bool negate,

    ZoneGrowableArray<char>* name_1,

    ZoneGrowableArray<char>* name_2) {

  ASSERT(name_1->At(name_1->length() - 1) == '\0');

  ASSERT(name_2->is_empty() || name_2->At(name_2->length() - 1) == '\0');

  if (name_2->is_empty()) {

    // First attempt to interpret as general category property value name.

    const char* name = name_1->data();

    if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,

                                add_to)) {

      return true;

    }

    // Interpret "Any", "ASCII", and "Assigned".

    if (LookupSpecialPropertyValueName(name, add_to, negate)) {

      return true;

    }

    // Then attempt to interpret as binary property name with value name 'Y'.

    UProperty property = u_getPropertyEnum(name);

    if (!IsSupportedBinaryProperty(property)) return false;

    if (!IsExactPropertyAlias(name, property)) return false;

    return LookupPropertyValueName(property, negate ? "N" : "Y", false, add_to);

  } else {

    // Both property name and value name are specified. Attempt to interpret

    // the property name as enumerated property.

    const char* property_name = name_1->data();

    const char* value_name = name_2->data();

    UProperty property = u_getPropertyEnum(property_name);

    if (!IsExactPropertyAlias(property_name, property)) return false;

    if (property == UCHAR_GENERAL_CATEGORY) {

      // We want to allow aggregate value names such as "Letter".

      property = UCHAR_GENERAL_CATEGORY_MASK;

    } else if (property != UCHAR_SCRIPT &&

               property != UCHAR_SCRIPT_EXTENSIONS) {

      return false;

    }

    return LookupPropertyValueName(property, value_name, negate, add_to);

  }

}


bool RegExpParser::ParseUnlimitedLengthHexNumber(uint32_t max_value,

                                                 uint32_t* value) {

  uint32_t x = 0;

  int d = HexValue(current());

  if (d < 0) {

    return false;

  }

  while (d >= 0) {

    x = x * 16 + d;

    if (x > max_value) {

      return false;

    }

    Advance();

    d = HexValue(current());

  }

  *value = x;

  return true;

}


uint32_t RegExpParser::ParseClassCharacterEscape() {

  ASSERT(current() == '\\');

  DEBUG_ASSERT(has_next() && !IsSpecialClassEscape(Next()));

  Advance();

  switch (current()) {

    case 'b':

      Advance();

      return '\b';

    // ControlEscape :: one of

    //   f n r t v

    case 'f':

      Advance();

      return '\f';

    case 'n':

      Advance();

      return '\n';

    case 'r':

      Advance();

      return '\r';

    case 't':

      Advance();

      return '\t';

    case 'v':

      Advance();

      return '\v';

    case 'c': {

      uint32_t controlLetter = Next();

      uint32_t letter = controlLetter & ~('A' ^ 'a');

      // For compatibility with JSC, inside a character class

      // we also accept digits and underscore as control characters.

      if (letter >= 'A' && letter <= 'Z') {

        Advance(2);

        // Control letters mapped to ASCII control characters in the range

        // 0x00-0x1f.

        return controlLetter & 0x1f;

      }

      if (is_unicode()) {

        // With /u, \c# or \c_ are invalid.

        ReportError("Invalid class escape");

        UNREACHABLE();

      }

      if (Utils::IsDecimalDigit(controlLetter) || controlLetter == '_') {

        Advance(2);

        return controlLetter & 0x1f;

      }

      // We match JSC in reading the backslash as a literal

      // character instead of as starting an escape.

      return '\\';

    }

    case '0':

      // With /u, \0 is interpreted as NUL if not followed by another digit.

      if (is_unicode() && !(Next() >= '0' && Next() <= '9')) {

        Advance();

        return 0;

      }

      FALL_THROUGH;

    case '1':

    case '2':

    case '3':

    case '4':

    case '5':

    case '6':

    case '7':

      // For compatibility, we interpret a decimal escape that isn't

      // a back reference (and therefore either \0 or not valid according

      // to the specification) as a 1..3 digit octal character code.

      if (is_unicode()) {

        // With \u, decimal escape is not interpreted as octal character code.

        ReportError("Invalid class escape");

        UNREACHABLE();

      }

      return ParseOctalLiteral();

    case 'x': {

      Advance();

      uint32_t value;

      if (ParseHexEscape(2, &value)) {

        return value;

      }

      if (is_unicode()) {

        // With \u, invalid escapes are not treated as identity escapes.

        ReportError("Invalid escape");

        UNREACHABLE();

      }

      // If \x is not followed by a two-digit hexadecimal, treat it

      // as an identity escape.

      return 'x';

    }

    case 'u': {

      Advance();

      uint32_t value;

      if (ParseUnicodeEscape(&value)) {

        return value;

      }

      if (is_unicode()) {

        // With \u, invalid escapes are not treated as identity escapes.

        ReportError(kUnicodeIdentity);

        UNREACHABLE();

      }

      // If \u is not followed by a four-digit hexadecimal, treat it

      // as an identity escape.

      return 'u';

    }

    default: {

      // Extended identity escape. We accept any character that hasn't

      // been matched by a more specific case, not just the subset required

      // by the ECMAScript specification.

      uint32_t result = current();

      if (!is_unicode() || IsSyntaxCharacterOrSlash(result) || result == '-') {

        Advance();

        return result;

      }

      ReportError(kUnicodeIdentity);

      UNREACHABLE();

    }

  }

  return 0;

}


bool RegExpParser::ParseClassEscape(ZoneGrowableArray<CharacterRange>* ranges,

                                    bool add_unicode_case_equivalents,

                                    uint32_t* char_out) {

  uint32_t first = current();

  if (first == '\\') {

    switch (Next()) {

      case 'w':

      case 'W':

      case 'd':

      case 'D':

      case 's':

      case 'S': {

        CharacterRange::AddClassEscape(static_cast<uint16_t>(Next()), ranges,

                                       add_unicode_case_equivalents);

        Advance(2);

        return true;

      }

      case 'p':

      case 'P': {

        if (!is_unicode()) break;

        bool negate = Next() == 'P';

        Advance(2);

        auto name_1 = new (Z) ZoneGrowableArray<char>();

        auto name_2 = new (Z) ZoneGrowableArray<char>();

        if (!ParsePropertyClassName(name_1, name_2) ||

            !AddPropertyClassRange(ranges, negate, name_1, name_2)) {

          ReportError("Invalid property name in character class");

          UNREACHABLE();

        }

        return true;

      }

      case kEndMarker:

        ReportError("\\ at end of pattern");

        UNREACHABLE();

      default:

        break;

    }

    *char_out = ParseClassCharacterEscape();

    return false;

  }

  Advance();

  *char_out = first;

  return false;

}


RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {

  static const char* kUnterminated = "Unterminated character class";

  static const char* kRangeInvalid = "Invalid character class";

  static const char* kRangeOutOfOrder = "Range out of order in character class";


  ASSERT(current() == '[');

  Advance();

  bool is_negated = false;

  if (current() == '^') {

    is_negated = true;

    Advance();

  }

  ZoneGrowableArray<CharacterRange>* ranges =

      new (Z) ZoneGrowableArray<CharacterRange>(2);

  bool add_unicode_case_equivalents = is_unicode() && builder->ignore_case();

  while (has_more() && current() != ']') {

    uint32_t char_1 = 0;

    bool is_class_1 =

        ParseClassEscape(ranges, add_unicode_case_equivalents, &char_1);

    if (current() == '-') {

      Advance();

      if (current() == kEndMarker) {

        // If we reach the end we break out of the loop and let the

        // following code report an error.

        break;

      } else if (current() == ']') {

        if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));

        ranges->Add(CharacterRange::Singleton('-'));

        break;

      }

      uint32_t char_2 = 0;

      bool is_class_2 =

          ParseClassEscape(ranges, add_unicode_case_equivalents, &char_2);

      if (is_class_1 || is_class_2) {

        // Either end is an escaped character class. Treat the '-' verbatim.

        if (is_unicode()) {

          // ES2015 21.2.2.15.1 step 1.

          ReportError(kRangeInvalid);

          UNREACHABLE();

        }

        if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));

        ranges->Add(CharacterRange::Singleton('-'));

        if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2));

        continue;

      }

      if (char_1 > char_2) {

        ReportError(kRangeOutOfOrder);

        UNREACHABLE();

      }

      ranges->Add(CharacterRange::Range(char_1, char_2));

    } else {

      if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));

    }

  }

  if (!has_more()) {

    ReportError(kUnterminated);

    UNREACHABLE();

  }

  Advance();

  RegExpCharacterClass::CharacterClassFlags character_class_flags =

      RegExpCharacterClass::DefaultFlags();

  if (is_negated) character_class_flags |= RegExpCharacterClass::NEGATED;

  return new (Z)

      RegExpCharacterClass(ranges, builder->flags(), character_class_flags);

}


// ----------------------------------------------------------------------------

// The Parser interface.


void RegExpParser::ParseRegExp(const String& input,

                               RegExpFlags flags,

                               RegExpCompileData* result) {

  ASSERT(result != nullptr);

  RegExpParser parser(input, &result->error, flags);

  // Throws an exception if 'input' is not valid.

  RegExpTree* tree = parser.ParsePattern();

  ASSERT(tree != nullptr);

  ASSERT(result->error.IsNull());

  result->tree = tree;

  intptr_t capture_count = parser.captures_started();

  result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;

  result->contains_anchor = parser.contains_anchor();

  result->capture_name_map = parser.CreateCaptureNameMap();

  result->capture_count = capture_count;

}


}  // namespace dart

done
static void done(const char *config, const char *src, const char *srcOptions, const char *name)
Definition: DM.cpp:263

pos
SkPoint pos
Definition: ImageShaderTest.cpp:27

next
static float next(float f)
Definition: PathOpsAngleTest.cpp:32

UNREACHABLE
#define UNREACHABLE()
Definition: assert.h:248

DEBUG_ASSERT
#define DEBUG_ASSERT(cond)
Definition: assert.h:321

N
#define N
Definition: beziers.cpp:19

dart::Array
Definition: object.h:10815

dart::Array::New
static ArrayPtr New(intptr_t len, Heap::Space space=Heap::kNew)
Definition: object.h:10959

dart::BaseGrowableArray::AddArray
void AddArray(const BaseGrowableArray< T, B, Allocator > &src)
Definition: growable_array.h:131

dart::BaseGrowableArray::data
T * data() const
Definition: growable_array.h:61

dart::BaseGrowableArray::Add
void Add(const T &value)
Definition: growable_array.h:84

dart::BaseGrowableArray::At
const T & At(intptr_t index) const
Definition: growable_array.h:124

dart::BaseGrowableArray::is_empty
bool is_empty() const
Definition: growable_array.h:62

dart::BaseGrowableArray::length
intptr_t length() const
Definition: growable_array.h:60

dart::CharacterRange::AddClassEscape
static void AddClassEscape(uint16_t type, ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4651

dart::CharacterRange::Range
static CharacterRange Range(int32_t from, int32_t to)
Definition: regexp.h:40

dart::CharacterRange::Everything
static CharacterRange Everything()
Definition: regexp.h:44

dart::CharacterRange::Singleton
static CharacterRange Singleton(int32_t value)
Definition: regexp.h:37

dart::CharacterRange::Canonicalize
static void Canonicalize(ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4876

dart::CharacterRange::Negate
static void Negate(ZoneGrowableArray< CharacterRange > *src, ZoneGrowableArray< CharacterRange > *dst)
Definition: regexp.cc:4912

dart::CharacterRange::List
static ZoneGrowableArray< CharacterRange > * List(Zone *zone, CharacterRange range)
Definition: regexp.h:47

dart::Exceptions::ThrowByType
static DART_NORETURN void ThrowByType(ExceptionType type, const Array &arguments)
Definition: exceptions.cc:1052

dart::Exceptions::kFormat
@ kFormat
Definition: exceptions.h:62

dart::Object::null
static ObjectPtr null()
Definition: object.h:433

dart::Object::Handle
static Object & Handle()
Definition: object.h:407

dart::RegExpAssertion
Definition: regexp_ast.h:101

dart::RegExpAssertion::AssertionType
AssertionType
Definition: regexp_ast.h:103

dart::RegExpAssertion::END_OF_LINE
@ END_OF_LINE
Definition: regexp_ast.h:106

dart::RegExpAssertion::BOUNDARY
@ BOUNDARY
Definition: regexp_ast.h:108

dart::RegExpAssertion::START_OF_LINE
@ START_OF_LINE
Definition: regexp_ast.h:104

dart::RegExpAssertion::END_OF_INPUT
@ END_OF_INPUT
Definition: regexp_ast.h:107

dart::RegExpAssertion::START_OF_INPUT
@ START_OF_INPUT
Definition: regexp_ast.h:105

dart::RegExpAssertion::NON_BOUNDARY
@ NON_BOUNDARY
Definition: regexp_ast.h:109

dart::RegExpAtom
Definition: regexp_ast.h:219

dart::RegExpBackReference
Definition: regexp_ast.h:404

dart::RegExpBuilder
Definition: regexp_parser.h:15

dart::RegExpBuilder::AddAssertion
void AddAssertion(RegExpTree *tree)
Definition: regexp_parser.cc:182

dart::RegExpBuilder::AddCharacterClass
void AddCharacterClass(RegExpCharacterClass *cc)
Definition: regexp_parser.cc:146

dart::RegExpBuilder::AddQuantifierToAtom
bool AddQuantifierToAtom(intptr_t min, intptr_t max, RegExpQuantifier::QuantifierType type)
Definition: regexp_parser.cc:269

dart::RegExpBuilder::AddCharacterClassForDesugaring
void AddCharacterClassForDesugaring(uint32_t c)
Definition: regexp_parser.cc:156

dart::RegExpBuilder::NewAlternative
void NewAlternative()
Definition: regexp_parser.cc:188

dart::RegExpBuilder::AddTerm
void AddTerm(RegExpTree *tree)
Definition: regexp_parser.cc:176

dart::RegExpBuilder::RegExpBuilder
RegExpBuilder(RegExpFlags flags)
Definition: regexp_parser.cc:23

dart::RegExpBuilder::ToRegExp
RegExpTree * ToRegExp()
Definition: regexp_parser.cc:252

dart::RegExpBuilder::AddEscapedUnicodeCharacter
void AddEscapedUnicodeCharacter(uint32_t character)
Definition: regexp_parser.cc:134

dart::RegExpBuilder::AddEmpty
void AddEmpty()
Definition: regexp_parser.cc:142

dart::RegExpBuilder::ignore_case
bool ignore_case() const
Definition: regexp_parser.h:39

dart::RegExpBuilder::AddUnicodeCharacter
void AddUnicodeCharacter(uint32_t character)
Definition: regexp_parser.cc:118

dart::RegExpBuilder::AddAtom
void AddAtom(RegExpTree *tree)
Definition: regexp_parser.cc:161

dart::RegExpBuilder::AddCharacter
void AddCharacter(uint16_t character)
Definition: regexp_parser.cc:104

dart::RegExpCapture
Definition: regexp_ast.h:311

dart::RegExpCapture::set_body
void set_body(RegExpTree *body)
Definition: regexp_ast.h:333

dart::RegExpCharacterClass
Definition: regexp_ast.h:153

dart::RegExpCharacterClass::NEGATED
@ NEGATED
Definition: regexp_ast.h:158

dart::RegExpCharacterClass::CharacterClassFlags
intptr_t CharacterClassFlags
Definition: regexp_ast.h:163

dart::RegExpCharacterClass::DefaultFlags
static CharacterClassFlags DefaultFlags()
Definition: regexp_ast.h:164

dart::RegExpDisjunction
Definition: regexp_ast.h:61

dart::RegExpEmpty::GetInstance
static RegExpEmpty * GetInstance()
Definition: regexp_ast.h:440

dart::RegExpFlags
Definition: object.h:12702

dart::RegExpLookaround
Definition: regexp_ast.h:346

dart::RegExpLookaround::Type
Type
Definition: regexp_ast.h:348

dart::RegExpLookaround::LOOKBEHIND
@ LOOKBEHIND
Definition: regexp_ast.h:348

dart::RegExpLookaround::LOOKAHEAD
@ LOOKAHEAD
Definition: regexp_ast.h:348

dart::RegExpParser
Definition: regexp_parser.h:75

dart::RegExpParser::ParseOctalLiteral
uint32_t ParseOctalLiteral()
Definition: regexp_parser.cc:1381

dart::RegExpParser::ReportError
void ReportError(const char *message)
Definition: regexp_parser.cc:430

dart::RegExpParser::Reset
void Reset(intptr_t pos)
Definition: regexp_parser.cc:391

dart::RegExpParser::is_unicode
bool is_unicode() const
Definition: regexp_parser.h:141

dart::RegExpParser::position
intptr_t position()
Definition: regexp_parser.h:140

dart::RegExpParser::ParseDisjunction
RegExpTree * ParseDisjunction()
Definition: regexp_parser.cc:479

dart::RegExpParser::RegExpParser
RegExpParser(const String &in, String *error, RegExpFlags regexp_flags)
Definition: regexp_parser.cc:336

dart::RegExpParser::ParseClassEscape
bool ParseClassEscape(ZoneGrowableArray< CharacterRange > *ranges, bool add_unicode_case_equivalents, uint32_t *char_out)
Definition: regexp_parser.cc:1858

dart::RegExpParser::ParsePattern
RegExpTree * ParsePattern()
Definition: regexp_parser.cc:452

dart::RegExpParser::set_contains_anchor
void set_contains_anchor()
Definition: regexp_parser.h:138

dart::RegExpParser::ParseUnlimitedLengthHexNumber
bool ParseUnlimitedLengthHexNumber(uint32_t max_value, uint32_t *value)
Definition: regexp_parser.cc:1721

dart::RegExpParser::ParseHexEscape
bool ParseHexEscape(intptr_t length, uint32_t *value)
Definition: regexp_parser.cc:1408

dart::RegExpParser::ParseClassCharacterEscape
uint32_t ParseClassCharacterEscape()
Definition: regexp_parser.cc:1740

dart::RegExpParser::ParseIntervalQuantifier
bool ParseIntervalQuantifier(intptr_t *min_out, intptr_t *max_out)
Definition: regexp_parser.cc:1321

dart::RegExpParser::IsSyntaxCharacterOrSlash
static bool IsSyntaxCharacterOrSlash(uint32_t c)
Definition: regexp_parser.cc:406

dart::RegExpParser::AddPropertyClassRange
bool AddPropertyClassRange(ZoneGrowableArray< CharacterRange > *add_to, bool negate, ZoneGrowableArray< char > *name_1, ZoneGrowableArray< char > *name_2)
Definition: regexp_parser.cc:1680

dart::RegExpParser::kEndMarker
static constexpr uint32_t kEndMarker
Definition: regexp_parser.h:146

dart::RegExpParser::ParseUnicodeEscape
bool ParseUnicodeEscape(uint32_t *value)
Definition: regexp_parser.cc:1430

dart::RegExpParser::ParseBackReferenceIndex
bool ParseBackReferenceIndex(intptr_t *index_out)
Definition: regexp_parser.cc:1026

dart::RegExpParser::ParseCharacterClass
RegExpTree * ParseCharacterClass(const RegExpBuilder *builder)
Definition: regexp_parser.cc:1903

dart::RegExpParser::kMaxCaptures
static constexpr intptr_t kMaxCaptures
Definition: regexp_parser.h:145

dart::RegExpParser::simple
bool simple()
Definition: regexp_parser.cc:402

dart::RegExpParser::Advance
void Advance()
Definition: regexp_parser.cc:379

dart::RegExpParser::ParseRegExp
static void ParseRegExp(const String &input, RegExpFlags regexp_flags, RegExpCompileData *result)
Definition: regexp_parser.cc:1972

dart::RegExpParser::ParsePropertyClassName
bool ParsePropertyClassName(ZoneGrowableArray< char > *name_1, ZoneGrowableArray< char > *name_2)
Definition: regexp_parser.cc:1641

dart::RegExpParser::captures_started
intptr_t captures_started()
Definition: regexp_parser.h:139

dart::RegExpQuantifier
Definition: regexp_ast.h:263

dart::RegExpQuantifier::QuantifierType
QuantifierType
Definition: regexp_ast.h:265

dart::RegExpQuantifier::POSSESSIVE
@ POSSESSIVE
Definition: regexp_ast.h:265

dart::RegExpQuantifier::GREEDY
@ GREEDY
Definition: regexp_ast.h:265

dart::RegExpQuantifier::NON_GREEDY
@ NON_GREEDY
Definition: regexp_ast.h:265

dart::RegExpTree
Definition: regexp_ast.h:37

dart::RegExpTree::max_match
virtual intptr_t max_match() const =0

dart::RegExpTree::kInfinity
static constexpr intptr_t kInfinity
Definition: regexp_ast.h:39

dart::RegExpTree::IsTextElement
virtual bool IsTextElement() const
Definition: regexp_ast.h:44

dart::Smi::New
static SmiPtr New(intptr_t value)
Definition: object.h:10006

dart::String
Definition: object.h:10158

dart::String::Length
intptr_t Length() const
Definition: object.h:10210

dart::String::ConcatAll
static StringPtr ConcatAll(const Array &strings, Heap::Space space=Heap::kNew)
Definition: object.cc:24048

dart::String::New
static StringPtr New(const char *cstr, Heap::Space space=Heap::kNew)
Definition: object.cc:23698

dart::String::CharAt
uint16_t CharAt(intptr_t index) const
Definition: object.h:10259

dart::String::FromUTF16
static StringPtr FromUTF16(const uint16_t *utf16_array, intptr_t array_len, Heap::Space space=Heap::kNew)
Definition: object.cc:23739

dart::Symbols::Blank
static const String & Blank()
Definition: symbols.h:647

dart::Thread
Definition: thread.h:342

dart::Utf16::kLeadSurrogateStart
static constexpr int32_t kLeadSurrogateStart
Definition: unicode.h:159

dart::Utf16::kMaxCodeUnit
static constexpr int32_t kMaxCodeUnit
Definition: unicode.h:158

dart::Utf16::Decode
static int32_t Decode(uint16_t lead, uint16_t trail)
Definition: unicode.h:151

dart::Utf16::Encode
static void Encode(int32_t codepoint, uint16_t *dst)
Definition: unicode.cc:273

dart::Utf16::IsLeadSurrogate
static bool IsLeadSurrogate(uint32_t ch)
Definition: unicode.h:126

dart::Utf16::IsTrailSurrogate
static bool IsTrailSurrogate(uint32_t ch)
Definition: unicode.h:131

dart::Utf::kMaxCodePoint
static constexpr int32_t kMaxCodePoint
Definition: unicode.h:18

dart::Utils::IsDecimalDigit
static constexpr bool IsDecimalDigit(uint32_t c)
Definition: utils.h:386

dart::Utils::IsAlphaNumeric
static constexpr bool IsAlphaNumeric(uint32_t c)
Definition: utils.h:381

dart::ZoneGrowableArray< uint16_t >

ASSERT
#define ASSERT(E)
Definition: entrypoints_verification_test.cc:25

d
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE auto & d
Definition: main.cc:19

s
struct MyStruct s

state
AtkStateType state
Definition: fl_accessible_node.cc:10

flags
FlutterSemanticsFlag flags
Definition: fl_accessible_node.cc:106

start
glong start
Definition: fl_accessible_text_field.cc:39

args
G_BEGIN_DECLS G_MODULE_EXPORT FlValue * args
Definition: fl_event_channel.h:89

error
const uint8_t uint32_t uint32_t GError ** error
Definition: fl_pixel_buffer_texture_test.cc:40

i
int i
Definition: fl_socket_accessible.cc:18

value
uint8_t value
Definition: fl_standard_message_codec.cc:36

result
GAsyncResult * result
Definition: fl_text_input_plugin.cc:106

max
static float max(float r, float g, float b)
Definition: hsl.cpp:49

min
static float min(float r, float g, float b)
Definition: hsl.cpp:48

length
size_t length
Definition: key_event_handler.cc:41

character
char32_t character
Definition: keyboard_key_handler_unittests.cc:117

text
std::u16string text
Definition: keyboard_unittests.cc:332

message
Win32Message message
Definition: keyboard_unittests.cc:139

longjump.h

x
double x
Definition: mouse-input-test.cc:82

android_finder.DEBUG
bool DEBUG
Definition: android_finder.py:22

dart_profiler_symbols.p
p
Definition: dart_profiler_symbols.py:55

dart
Definition: dart_vm.cc:33

dart::name
const char *const name
Definition: method_recognizer.cc:309

dart::PushCodeUnit
static void PushCodeUnit(RegExpCaptureName *v, uint32_t code_unit)
Definition: regexp_parser.cc:1106

dart::kUnicodeIdentity
static const char * kUnicodeIdentity
Definition: regexp_parser.cc:466

dart::RegExpCaptureName
ZoneGrowableArray< uint16_t > RegExpCaptureName
Definition: regexp_parser.h:73

dart::HexValue
static int HexValue(char digit)
Definition: uri.cc:86

dart::false
false
Definition: isolate_reload.cc:58

dart::FLAG_regexp_possessive_quantifier
static constexpr bool FLAG_regexp_possessive_quantifier
Definition: regexp_parser.cc:21

dependency.builder
builder
Definition: dependency.py:1

flutter::set
DEF_SWITCHES_START aot vmservice shared library Name of the *so containing AOT compiled Dart assets for launching the service isolate vm snapshot The VM snapshot data that will be memory mapped as read only SnapshotAssetPath must be present isolate snapshot The isolate snapshot data that will be memory mapped as read only SnapshotAssetPath must be present cache dir Path to the cache directory This is different from the persistent_cache_path in embedder which is used for Skia shader cache icu native lib Path to the library file that exports the ICU data vm service The hostname IP address on which the Dart VM Service should be served If not set
Definition: switches.h:76

gn.compile_sksl_tests.tail
tail
Definition: compile_sksl_tests.py:64

gn.find_headers.len
len
Definition: find_headers.py:30

gn.is_clang.cc
cc
Definition: is_clang.py:12

pkg-config.prefix
def prefix
Definition: pkg-config.py:130

zip.parser
parser
Definition: zip.py:78

object_store.h

FALL_THROUGH
#define FALL_THROUGH
Definition: globals.h:15

Z
#define Z
Definition: regexp_parser.cc:18

regexp_parser.h

LAST
#define LAST(x)
Definition: regexp_parser.h:69

dart::RegExpCompileData
Definition: regexp.h:1430

symbols.h

unicode.h

true
true
Definition: verylargebitmap.cpp:163