Flutter Engine
The Flutter Engine
Classes | Public Member Functions | Static Public Member Functions | Static Public Attributes | List of all members
dart::RegExpParser Class Reference

#include <regexp_parser.h>

Inheritance diagram for dart::RegExpParser:
dart::ValueObject

Public Member Functions

 RegExpParser (const String &in, String *error, RegExpFlags regexp_flags)
 
RegExpTreeParsePattern ()
 
RegExpTreeParseDisjunction ()
 
RegExpTreeParseGroup ()
 
bool ParseIntervalQuantifier (intptr_t *min_out, intptr_t *max_out)
 
uint32_t ParseClassCharacterEscape ()
 
bool ParseHexEscape (intptr_t length, uint32_t *value)
 
bool ParseUnicodeEscape (uint32_t *value)
 
bool ParseUnlimitedLengthHexNumber (uint32_t max_value, uint32_t *value)
 
bool ParsePropertyClassName (ZoneGrowableArray< char > *name_1, ZoneGrowableArray< char > *name_2)
 
bool AddPropertyClassRange (ZoneGrowableArray< CharacterRange > *add_to, bool negate, ZoneGrowableArray< char > *name_1, ZoneGrowableArray< char > *name_2)
 
RegExpTreeGetPropertySequence (ZoneGrowableArray< char > *name_1)
 
RegExpTreeParseCharacterClass (const RegExpBuilder *builder)
 
uint32_t ParseOctalLiteral ()
 
bool ParseBackReferenceIndex (intptr_t *index_out)
 
bool ParseClassEscape (ZoneGrowableArray< CharacterRange > *ranges, bool add_unicode_case_equivalents, uint32_t *char_out)
 
void ReportError (const char *message)
 
void Advance ()
 
void Advance (intptr_t dist)
 
void Reset (intptr_t pos)
 
bool simple ()
 
bool contains_anchor ()
 
void set_contains_anchor ()
 
intptr_t captures_started ()
 
intptr_t position ()
 
bool is_unicode () const
 
- Public Member Functions inherited from dart::ValueObject
 ValueObject ()
 
 ~ValueObject ()
 

Static Public Member Functions

static void ParseRegExp (const String &input, RegExpFlags regexp_flags, RegExpCompileData *result)
 
static bool IsSyntaxCharacterOrSlash (uint32_t c)
 

Static Public Attributes

static constexpr intptr_t kMaxCaptures = 1 << 16
 
static constexpr uint32_t kEndMarker = (1 << 21)
 

Detailed Description

Definition at line 75 of file regexp_parser.h.

Constructor & Destructor Documentation

◆ RegExpParser()

dart::RegExpParser::RegExpParser ( const String in,
String error,
RegExpFlags  regexp_flags 
)

Definition at line 336 of file regexp_parser.cc.

337 : zone_(Thread::Current()->zone()),
338 captures_(nullptr),
339 named_captures_(nullptr),
340 named_back_references_(nullptr),
341 in_(in),
342 current_(kEndMarker),
343 next_pos_(0),
344 captures_started_(0),
345 capture_count_(0),
346 has_more_(true),
347 top_level_flags_(flags),
348 simple_(false),
349 contains_anchor_(false),
350 is_scanned_for_captures_(false),
351 has_named_captures_(false) {
352 Advance();
353}
static constexpr uint32_t kEndMarker
static Thread * Current()
Definition: thread.h:362
FlutterSemanticsFlag flags

Member Function Documentation

◆ AddPropertyClassRange()

bool dart::RegExpParser::AddPropertyClassRange ( ZoneGrowableArray< CharacterRange > *  add_to,
bool  negate,
ZoneGrowableArray< char > *  name_1,
ZoneGrowableArray< char > *  name_2 
)

Definition at line 1680 of file regexp_parser.cc.

1684 {
1685 ASSERT(name_1->At(name_1->length() - 1) == '\0');
1686 ASSERT(name_2->is_empty() || name_2->At(name_2->length() - 1) == '\0');
1687 if (name_2->is_empty()) {
1688 // First attempt to interpret as general category property value name.
1689 const char* name = name_1->data();
1690 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
1691 add_to)) {
1692 return true;
1693 }
1694 // Interpret "Any", "ASCII", and "Assigned".
1695 if (LookupSpecialPropertyValueName(name, add_to, negate)) {
1696 return true;
1697 }
1698 // Then attempt to interpret as binary property name with value name 'Y'.
1699 UProperty property = u_getPropertyEnum(name);
1700 if (!IsSupportedBinaryProperty(property)) return false;
1701 if (!IsExactPropertyAlias(name, property)) return false;
1702 return LookupPropertyValueName(property, negate ? "N" : "Y", false, add_to);
1703 } else {
1704 // Both property name and value name are specified. Attempt to interpret
1705 // the property name as enumerated property.
1706 const char* property_name = name_1->data();
1707 const char* value_name = name_2->data();
1708 UProperty property = u_getPropertyEnum(property_name);
1709 if (!IsExactPropertyAlias(property_name, property)) return false;
1710 if (property == UCHAR_GENERAL_CATEGORY) {
1711 // We want to allow aggregate value names such as "Letter".
1712 property = UCHAR_GENERAL_CATEGORY_MASK;
1713 } else if (property != UCHAR_SCRIPT &&
1714 property != UCHAR_SCRIPT_EXTENSIONS) {
1715 return false;
1716 }
1717 return LookupPropertyValueName(property, value_name, negate, add_to);
1718 }
1719}
#define ASSERT(E)
const char *const name

◆ Advance() [1/2]

void dart::RegExpParser::Advance ( )

Definition at line 379 of file regexp_parser.cc.

379 {
380 if (has_next()) {
381 current_ = ReadNext(true);
382 } else {
383 current_ = kEndMarker;
384 // Advance so that position() points to 1 after the last character. This is
385 // important so that Reset() to this position works correctly.
386 next_pos_ = in().Length() + 1;
387 has_more_ = false;
388 }
389}
intptr_t Length() const
Definition: object.h:10210

◆ Advance() [2/2]

void dart::RegExpParser::Advance ( intptr_t  dist)

Definition at line 397 of file regexp_parser.cc.

397 {
398 next_pos_ += dist - 1;
399 Advance();
400}

◆ captures_started()

intptr_t dart::RegExpParser::captures_started ( )
inline

Definition at line 139 of file regexp_parser.h.

139{ return captures_started_; }

◆ contains_anchor()

bool dart::RegExpParser::contains_anchor ( )
inline

Definition at line 137 of file regexp_parser.h.

137{ return contains_anchor_; }

◆ GetPropertySequence()

RegExpTree * dart::RegExpParser::GetPropertySequence ( ZoneGrowableArray< char > *  name_1)

◆ is_unicode()

bool dart::RegExpParser::is_unicode ( ) const
inline

Definition at line 141 of file regexp_parser.h.

141{ return top_level_flags_.IsUnicode(); }
bool IsUnicode() const
Definition: object.h:12725

◆ IsSyntaxCharacterOrSlash()

bool dart::RegExpParser::IsSyntaxCharacterOrSlash ( uint32_t  c)
static

Definition at line 406 of file regexp_parser.cc.

406 {
407 switch (c) {
408 case '^':
409 case '$':
410 case '\\':
411 case '.':
412 case '*':
413 case '+':
414 case '?':
415 case '(':
416 case ')':
417 case '[':
418 case ']':
419 case '{':
420 case '}':
421 case '|':
422 case '/':
423 return true;
424 default:
425 break;
426 }
427 return false;
428}

◆ ParseBackReferenceIndex()

bool dart::RegExpParser::ParseBackReferenceIndex ( intptr_t *  index_out)

Definition at line 1026 of file regexp_parser.cc.

1026 {
1027 ASSERT('\\' == current());
1028 ASSERT('1' <= Next() && Next() <= '9');
1029 // Try to parse a decimal literal that is no greater than the total number
1030 // of left capturing parentheses in the input.
1031 intptr_t start = position();
1032 intptr_t value = Next() - '0';
1033 Advance(2);
1034 while (true) {
1035 uint32_t c = current();
1036 if (Utils::IsDecimalDigit(c)) {
1037 value = 10 * value + (c - '0');
1038 if (value > kMaxCaptures) {
1039 Reset(start);
1040 return false;
1041 }
1042 Advance();
1043 } else {
1044 break;
1045 }
1046 }
1047 if (value > captures_started()) {
1048 if (!is_scanned_for_captures_) ScanForCaptures();
1049 if (value > capture_count_) {
1050 Reset(start);
1051 return false;
1052 }
1053 }
1054 *index_out = value;
1055 return true;
1056}
void Reset(intptr_t pos)
static constexpr intptr_t kMaxCaptures
intptr_t captures_started()
static constexpr bool IsDecimalDigit(uint32_t c)
Definition: utils.h:386
uint8_t value

◆ ParseCharacterClass()

RegExpTree * dart::RegExpParser::ParseCharacterClass ( const RegExpBuilder builder)

Definition at line 1903 of file regexp_parser.cc.

1903 {
1904 static const char* kUnterminated = "Unterminated character class";
1905 static const char* kRangeInvalid = "Invalid character class";
1906 static const char* kRangeOutOfOrder = "Range out of order in character class";
1907
1908 ASSERT(current() == '[');
1909 Advance();
1910 bool is_negated = false;
1911 if (current() == '^') {
1912 is_negated = true;
1913 Advance();
1914 }
1915 ZoneGrowableArray<CharacterRange>* ranges =
1916 new (Z) ZoneGrowableArray<CharacterRange>(2);
1917 bool add_unicode_case_equivalents = is_unicode() && builder->ignore_case();
1918 while (has_more() && current() != ']') {
1919 uint32_t char_1 = 0;
1920 bool is_class_1 =
1921 ParseClassEscape(ranges, add_unicode_case_equivalents, &char_1);
1922 if (current() == '-') {
1923 Advance();
1924 if (current() == kEndMarker) {
1925 // If we reach the end we break out of the loop and let the
1926 // following code report an error.
1927 break;
1928 } else if (current() == ']') {
1929 if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));
1930 ranges->Add(CharacterRange::Singleton('-'));
1931 break;
1932 }
1933 uint32_t char_2 = 0;
1934 bool is_class_2 =
1935 ParseClassEscape(ranges, add_unicode_case_equivalents, &char_2);
1936 if (is_class_1 || is_class_2) {
1937 // Either end is an escaped character class. Treat the '-' verbatim.
1938 if (is_unicode()) {
1939 // ES2015 21.2.2.15.1 step 1.
1940 ReportError(kRangeInvalid);
1941 UNREACHABLE();
1942 }
1943 if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));
1944 ranges->Add(CharacterRange::Singleton('-'));
1945 if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2));
1946 continue;
1947 }
1948 if (char_1 > char_2) {
1949 ReportError(kRangeOutOfOrder);
1950 UNREACHABLE();
1951 }
1952 ranges->Add(CharacterRange::Range(char_1, char_2));
1953 } else {
1954 if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1));
1955 }
1956 }
1957 if (!has_more()) {
1958 ReportError(kUnterminated);
1959 UNREACHABLE();
1960 }
1961 Advance();
1962 RegExpCharacterClass::CharacterClassFlags character_class_flags =
1964 if (is_negated) character_class_flags |= RegExpCharacterClass::NEGATED;
1965 return new (Z)
1966 RegExpCharacterClass(ranges, builder->flags(), character_class_flags);
1967}
#define UNREACHABLE()
Definition: assert.h:248
static CharacterRange Range(int32_t from, int32_t to)
Definition: regexp.h:40
static CharacterRange Singleton(int32_t value)
Definition: regexp.h:37
static CharacterClassFlags DefaultFlags()
Definition: regexp_ast.h:164
void ReportError(const char *message)
bool is_unicode() const
bool ParseClassEscape(ZoneGrowableArray< CharacterRange > *ranges, bool add_unicode_case_equivalents, uint32_t *char_out)
#define Z

◆ ParseClassCharacterEscape()

uint32_t dart::RegExpParser::ParseClassCharacterEscape ( )

Definition at line 1740 of file regexp_parser.cc.

1740 {
1741 ASSERT(current() == '\\');
1742 DEBUG_ASSERT(has_next() && !IsSpecialClassEscape(Next()));
1743 Advance();
1744 switch (current()) {
1745 case 'b':
1746 Advance();
1747 return '\b';
1748 // ControlEscape :: one of
1749 // f n r t v
1750 case 'f':
1751 Advance();
1752 return '\f';
1753 case 'n':
1754 Advance();
1755 return '\n';
1756 case 'r':
1757 Advance();
1758 return '\r';
1759 case 't':
1760 Advance();
1761 return '\t';
1762 case 'v':
1763 Advance();
1764 return '\v';
1765 case 'c': {
1766 uint32_t controlLetter = Next();
1767 uint32_t letter = controlLetter & ~('A' ^ 'a');
1768 // For compatibility with JSC, inside a character class
1769 // we also accept digits and underscore as control characters.
1770 if (letter >= 'A' && letter <= 'Z') {
1771 Advance(2);
1772 // Control letters mapped to ASCII control characters in the range
1773 // 0x00-0x1f.
1774 return controlLetter & 0x1f;
1775 }
1776 if (is_unicode()) {
1777 // With /u, \c# or \c_ are invalid.
1778 ReportError("Invalid class escape");
1779 UNREACHABLE();
1780 }
1781 if (Utils::IsDecimalDigit(controlLetter) || controlLetter == '_') {
1782 Advance(2);
1783 return controlLetter & 0x1f;
1784 }
1785 // We match JSC in reading the backslash as a literal
1786 // character instead of as starting an escape.
1787 return '\\';
1788 }
1789 case '0':
1790 // With /u, \0 is interpreted as NUL if not followed by another digit.
1791 if (is_unicode() && !(Next() >= '0' && Next() <= '9')) {
1792 Advance();
1793 return 0;
1794 }
1796 case '1':
1797 case '2':
1798 case '3':
1799 case '4':
1800 case '5':
1801 case '6':
1802 case '7':
1803 // For compatibility, we interpret a decimal escape that isn't
1804 // a back reference (and therefore either \0 or not valid according
1805 // to the specification) as a 1..3 digit octal character code.
1806 if (is_unicode()) {
1807 // With \u, decimal escape is not interpreted as octal character code.
1808 ReportError("Invalid class escape");
1809 UNREACHABLE();
1810 }
1811 return ParseOctalLiteral();
1812 case 'x': {
1813 Advance();
1814 uint32_t value;
1815 if (ParseHexEscape(2, &value)) {
1816 return value;
1817 }
1818 if (is_unicode()) {
1819 // With \u, invalid escapes are not treated as identity escapes.
1820 ReportError("Invalid escape");
1821 UNREACHABLE();
1822 }
1823 // If \x is not followed by a two-digit hexadecimal, treat it
1824 // as an identity escape.
1825 return 'x';
1826 }
1827 case 'u': {
1828 Advance();
1829 uint32_t value;
1830 if (ParseUnicodeEscape(&value)) {
1831 return value;
1832 }
1833 if (is_unicode()) {
1834 // With \u, invalid escapes are not treated as identity escapes.
1836 UNREACHABLE();
1837 }
1838 // If \u is not followed by a four-digit hexadecimal, treat it
1839 // as an identity escape.
1840 return 'u';
1841 }
1842 default: {
1843 // Extended identity escape. We accept any character that hasn't
1844 // been matched by a more specific case, not just the subset required
1845 // by the ECMAScript specification.
1846 uint32_t result = current();
1847 if (!is_unicode() || IsSyntaxCharacterOrSlash(result) || result == '-') {
1848 Advance();
1849 return result;
1850 }
1852 UNREACHABLE();
1853 }
1854 }
1855 return 0;
1856}
#define DEBUG_ASSERT(cond)
Definition: assert.h:321
uint32_t ParseOctalLiteral()
bool ParseHexEscape(intptr_t length, uint32_t *value)
static bool IsSyntaxCharacterOrSlash(uint32_t c)
bool ParseUnicodeEscape(uint32_t *value)
GAsyncResult * result
static const char * kUnicodeIdentity
#define FALL_THROUGH
Definition: globals.h:15

◆ ParseClassEscape()

bool dart::RegExpParser::ParseClassEscape ( ZoneGrowableArray< CharacterRange > *  ranges,
bool  add_unicode_case_equivalents,
uint32_t *  char_out 
)

Definition at line 1858 of file regexp_parser.cc.

1860 {
1861 uint32_t first = current();
1862 if (first == '\\') {
1863 switch (Next()) {
1864 case 'w':
1865 case 'W':
1866 case 'd':
1867 case 'D':
1868 case 's':
1869 case 'S': {
1870 CharacterRange::AddClassEscape(static_cast<uint16_t>(Next()), ranges,
1871 add_unicode_case_equivalents);
1872 Advance(2);
1873 return true;
1874 }
1875 case 'p':
1876 case 'P': {
1877 if (!is_unicode()) break;
1878 bool negate = Next() == 'P';
1879 Advance(2);
1880 auto name_1 = new (Z) ZoneGrowableArray<char>();
1881 auto name_2 = new (Z) ZoneGrowableArray<char>();
1882 if (!ParsePropertyClassName(name_1, name_2) ||
1883 !AddPropertyClassRange(ranges, negate, name_1, name_2)) {
1884 ReportError("Invalid property name in character class");
1885 UNREACHABLE();
1886 }
1887 return true;
1888 }
1889 case kEndMarker:
1890 ReportError("\\ at end of pattern");
1891 UNREACHABLE();
1892 default:
1893 break;
1894 }
1895 *char_out = ParseClassCharacterEscape();
1896 return false;
1897 }
1898 Advance();
1899 *char_out = first;
1900 return false;
1901}
static void AddClassEscape(uint16_t type, ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4651
uint32_t ParseClassCharacterEscape()
bool AddPropertyClassRange(ZoneGrowableArray< CharacterRange > *add_to, bool negate, ZoneGrowableArray< char > *name_1, ZoneGrowableArray< char > *name_2)
bool ParsePropertyClassName(ZoneGrowableArray< char > *name_1, ZoneGrowableArray< char > *name_2)

◆ ParseDisjunction()

RegExpTree * dart::RegExpParser::ParseDisjunction ( )

Definition at line 479 of file regexp_parser.cc.

479 {
480 // Used to store current state while parsing subexpressions.
481 RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,
482 0, nullptr, top_level_flags_, Z);
483 RegExpParserState* stored_state = &initial_state;
484 // Cache the builder in a local variable for quick access.
485 RegExpBuilder* builder = initial_state.builder();
486 while (true) {
487 switch (current()) {
488 case kEndMarker:
489 if (stored_state->IsSubexpression()) {
490 // Inside a parenthesized group when hitting end of input.
491 ReportError("Unterminated group");
492 UNREACHABLE();
493 }
494 ASSERT(INITIAL == stored_state->group_type());
495 // Parsing completed successfully.
496 return builder->ToRegExp();
497 case ')': {
498 if (!stored_state->IsSubexpression()) {
499 ReportError("Unmatched ')'");
500 UNREACHABLE();
501 }
502 ASSERT(INITIAL != stored_state->group_type());
503
504 Advance();
505 // End disjunction parsing and convert builder content to new single
506 // regexp atom.
507 RegExpTree* body = builder->ToRegExp();
508
509 intptr_t end_capture_index = captures_started();
510
511 intptr_t capture_index = stored_state->capture_index();
512 SubexpressionType group_type = stored_state->group_type();
513
514 // Build result of subexpression.
515 if (group_type == CAPTURE) {
516 if (stored_state->IsNamedCapture()) {
517 CreateNamedCaptureAtIndex(stored_state->capture_name(),
518 capture_index);
519 }
520 RegExpCapture* capture = GetCapture(capture_index);
521 capture->set_body(body);
522 body = capture;
523 } else if (group_type != GROUPING) {
524 ASSERT(group_type == POSITIVE_LOOKAROUND ||
525 group_type == NEGATIVE_LOOKAROUND);
526 bool is_positive = (group_type == POSITIVE_LOOKAROUND);
527 body = new (Z) RegExpLookaround(
528 body, is_positive, end_capture_index - capture_index,
529 capture_index, stored_state->lookaround_type());
530 }
531
532 // Restore previous state.
533 stored_state = stored_state->previous_state();
534 builder = stored_state->builder();
535
536 builder->AddAtom(body);
537 // For compatibility with JSC and ES3, we allow quantifiers after
538 // lookaheads, and break in all cases.
539 break;
540 }
541 case '|': {
542 Advance();
543 builder->NewAlternative();
544 continue;
545 }
546 case '*':
547 case '+':
548 case '?':
549 ReportError("Nothing to repeat");
550 UNREACHABLE();
551 case '^': {
552 Advance();
553 if (builder->is_multi_line()) {
554 builder->AddAssertion(new (Z) RegExpAssertion(
556 } else {
557 builder->AddAssertion(new (Z) RegExpAssertion(
560 }
561 continue;
562 }
563 case '$': {
564 Advance();
565 RegExpAssertion::AssertionType assertion_type =
566 builder->is_multi_line() ? RegExpAssertion::END_OF_LINE
568 builder->AddAssertion(
569 new (Z) RegExpAssertion(assertion_type, builder->flags()));
570 continue;
571 }
572 case '.': {
573 Advance();
574 auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);
575 if (builder->is_dot_all()) {
576 // Everything.
578 '*', ranges,
579 /*add_unicode_case_equivalents=*/false);
580 } else {
581 // everything except \x0a, \x0d, \u2028 and \u2029
583 '.', ranges,
584 /*add_unicode_case_equivalents=*/false);
585 }
586 RegExpCharacterClass* cc =
587 new (Z) RegExpCharacterClass(ranges, builder->flags());
588 builder->AddCharacterClass(cc);
589 break;
590 }
591 case '(': {
592 stored_state = ParseOpenParenthesis(stored_state);
593 builder = stored_state->builder();
594 continue;
595 }
596 case '[': {
597 RegExpTree* atom = ParseCharacterClass(builder);
598 builder->AddCharacterClass(atom->AsCharacterClass());
599 break;
600 }
601 // Atom ::
602 // \ AtomEscape
603 case '\\':
604 switch (Next()) {
605 case kEndMarker:
606 ReportError("\\ at end of pattern");
607 UNREACHABLE();
608 case 'b':
609 Advance(2);
610 builder->AddAssertion(new (Z) RegExpAssertion(
612 continue;
613 case 'B':
614 Advance(2);
615 builder->AddAssertion(new (Z) RegExpAssertion(
617 continue;
618 // AtomEscape ::
619 // CharacterClassEscape
620 //
621 // CharacterClassEscape :: one of
622 // d D s S w W
623 case 'd':
624 case 'D':
625 case 's':
626 case 'S':
627 case 'w':
628 case 'W': {
629 uint32_t c = Next();
630 Advance(2);
631 auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);
633 c, ranges, is_unicode() && builder->ignore_case());
634 RegExpCharacterClass* cc =
635 new (Z) RegExpCharacterClass(ranges, builder->flags());
636 builder->AddCharacterClass(cc);
637 break;
638 }
639 case 'p':
640 case 'P': {
641 uint32_t p = Next();
642 Advance(2);
643
644 if (is_unicode()) {
645 auto name_1 = new (Z) ZoneGrowableArray<char>();
646 auto name_2 = new (Z) ZoneGrowableArray<char>();
647 auto ranges = new (Z) ZoneGrowableArray<CharacterRange>(2);
648 if (ParsePropertyClassName(name_1, name_2)) {
649 if (AddPropertyClassRange(ranges, p == 'P', name_1, name_2)) {
650 RegExpCharacterClass* cc =
651 new (Z) RegExpCharacterClass(ranges, builder->flags());
652 builder->AddCharacterClass(cc);
653 break;
654 }
655 }
656 ReportError("Invalid property name");
657 UNREACHABLE();
658 } else {
659 builder->AddCharacter(p);
660 }
661 break;
662 }
663 case '1':
664 case '2':
665 case '3':
666 case '4':
667 case '5':
668 case '6':
669 case '7':
670 case '8':
671 case '9': {
672 intptr_t index = 0;
673 if (ParseBackReferenceIndex(&index)) {
674 if (stored_state->IsInsideCaptureGroup(index)) {
675 // The back reference is inside the capture group it refers to.
676 // Nothing can possibly have been captured yet, so we use empty
677 // instead. This ensures that, when checking a back reference,
678 // the capture registers of the referenced capture are either
679 // both set or both cleared.
680 builder->AddEmpty();
681 } else {
682 RegExpCapture* capture = GetCapture(index);
683 RegExpTree* atom =
684 new (Z) RegExpBackReference(capture, builder->flags());
685 builder->AddAtom(atom);
686 }
687 break;
688 }
689 // With /u, no identity escapes except for syntax characters are
690 // allowed. Otherwise, all identity escapes are allowed.
691 if (is_unicode()) {
693 UNREACHABLE();
694 }
695 uint32_t first_digit = Next();
696 if (first_digit == '8' || first_digit == '9') {
697 builder->AddCharacter(first_digit);
698 Advance(2);
699 break;
700 }
701 }
703 case '0': {
704 Advance();
705 if (is_unicode() && Next() >= '0' && Next() <= '9') {
706 // With /u, decimal escape with leading 0 are not parsed as octal.
707 ReportError("Invalid decimal escape");
708 UNREACHABLE();
709 }
710 uint32_t octal = ParseOctalLiteral();
711 builder->AddCharacter(octal);
712 break;
713 }
714 // ControlEscape :: one of
715 // f n r t v
716 case 'f':
717 Advance(2);
718 builder->AddCharacter('\f');
719 break;
720 case 'n':
721 Advance(2);
722 builder->AddCharacter('\n');
723 break;
724 case 'r':
725 Advance(2);
726 builder->AddCharacter('\r');
727 break;
728 case 't':
729 Advance(2);
730 builder->AddCharacter('\t');
731 break;
732 case 'v':
733 Advance(2);
734 builder->AddCharacter('\v');
735 break;
736 case 'c': {
737 Advance();
738 uint32_t controlLetter = Next();
739 // Special case if it is an ASCII letter.
740 // Convert lower case letters to uppercase.
741 uint32_t letter = controlLetter & ~('a' ^ 'A');
742 if (letter < 'A' || 'Z' < letter) {
743 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.
744 // This is outside the specification. We match JSC in
745 // reading the backslash as a literal character instead
746 // of as starting an escape.
747 if (is_unicode()) {
748 // With /u, invalid escapes are not treated as identity escapes.
750 UNREACHABLE();
751 }
752 builder->AddCharacter('\\');
753 } else {
754 Advance(2);
755 builder->AddCharacter(controlLetter & 0x1f);
756 }
757 break;
758 }
759 case 'x': {
760 Advance(2);
761 uint32_t value;
762 if (ParseHexEscape(2, &value)) {
763 builder->AddCharacter(value);
764 } else if (!is_unicode()) {
765 builder->AddCharacter('x');
766 } else {
767 // With /u, invalid escapes are not treated as identity escapes.
769 UNREACHABLE();
770 }
771 break;
772 }
773 case 'u': {
774 Advance(2);
775 uint32_t value;
777 builder->AddEscapedUnicodeCharacter(value);
778 } else if (!is_unicode()) {
779 builder->AddCharacter('u');
780 } else {
781 // With /u, invalid escapes are not treated as identity escapes.
783 UNREACHABLE();
784 }
785 break;
786 }
787 case 'k':
788 // Either an identity escape or a named back-reference. The two
789 // interpretations are mutually exclusive: '\k' is interpreted as
790 // an identity escape for non-Unicode patterns without named
791 // capture groups, and as the beginning of a named back-reference
792 // in all other cases.
793 if (is_unicode() || HasNamedCaptures()) {
794 Advance(2);
795 ParseNamedBackReference(builder, stored_state);
796 break;
797 }
799 default:
800 Advance();
801 // With the unicode flag, no identity escapes except for syntax
802 // characters are allowed. Otherwise, all identity escapes are
803 // allowed.
804 if (!is_unicode() || IsSyntaxCharacterOrSlash(current())) {
805 builder->AddCharacter(current());
806 Advance();
807 } else {
809 UNREACHABLE();
810 }
811 break;
812 }
813 break;
814 case '{': {
815 intptr_t dummy;
816 if (ParseIntervalQuantifier(&dummy, &dummy)) {
817 ReportError("Nothing to repeat");
818 UNREACHABLE();
819 }
820 }
822 case '}':
823 case ']':
824 if (is_unicode()) {
825 ReportError("Lone quantifier brackets");
826 UNREACHABLE();
827 }
829 default:
830 builder->AddUnicodeCharacter(current());
831 Advance();
832 break;
833 } // end switch(current())
834
835 intptr_t min;
836 intptr_t max;
837 switch (current()) {
838 // QuantifierPrefix ::
839 // *
840 // +
841 // ?
842 // {
843 case '*':
844 min = 0;
846 Advance();
847 break;
848 case '+':
849 min = 1;
851 Advance();
852 break;
853 case '?':
854 min = 0;
855 max = 1;
856 Advance();
857 break;
858 case '{':
860 if (max < min) {
861 ReportError("numbers out of order in {} quantifier.");
862 UNREACHABLE();
863 }
864 break;
865 } else {
866 continue;
867 }
868 default:
869 continue;
870 }
872 if (current() == '?') {
873 quantifier_type = RegExpQuantifier::NON_GREEDY;
874 Advance();
875 } else if (FLAG_regexp_possessive_quantifier && current() == '+') {
876 // FLAG_regexp_possessive_quantifier is a debug-only flag.
877 quantifier_type = RegExpQuantifier::POSSESSIVE;
878 Advance();
879 }
880 if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {
881 ReportError("invalid quantifier.");
882 UNREACHABLE();
883 }
884 }
885}
bool ParseIntervalQuantifier(intptr_t *min_out, intptr_t *max_out)
bool ParseBackReferenceIndex(intptr_t *index_out)
RegExpTree * ParseCharacterClass(const RegExpBuilder *builder)
static constexpr intptr_t kInfinity
Definition: regexp_ast.h:39
static float max(float r, float g, float b)
Definition: hsl.cpp:49
static float min(float r, float g, float b)
Definition: hsl.cpp:48
static constexpr bool FLAG_regexp_possessive_quantifier

◆ ParseGroup()

RegExpTree * dart::RegExpParser::ParseGroup ( )

◆ ParseHexEscape()

bool dart::RegExpParser::ParseHexEscape ( intptr_t  length,
uint32_t *  value 
)

Definition at line 1408 of file regexp_parser.cc.

1408 {
1409 intptr_t start = position();
1410 uint32_t val = 0;
1411 bool done = false;
1412 for (intptr_t i = 0; !done; i++) {
1413 uint32_t c = current();
1414 intptr_t d = HexValue(c);
1415 if (d < 0) {
1416 Reset(start);
1417 return false;
1418 }
1419 val = val * 16 + d;
1420 Advance();
1421 if (i == length - 1) {
1422 done = true;
1423 }
1424 }
1425 *value = val;
1426 return true;
1427}
static void done(const char *config, const char *src, const char *srcOptions, const char *name)
Definition: DM.cpp:263
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE auto & d
Definition: main.cc:19
size_t length
static int HexValue(char digit)
Definition: uri.cc:86

◆ ParseIntervalQuantifier()

bool dart::RegExpParser::ParseIntervalQuantifier ( intptr_t *  min_out,
intptr_t *  max_out 
)

Definition at line 1321 of file regexp_parser.cc.

1322 {
1323 ASSERT(current() == '{');
1324 intptr_t start = position();
1325 Advance();
1326 intptr_t min = 0;
1327 if (!Utils::IsDecimalDigit(current())) {
1328 Reset(start);
1329 return false;
1330 }
1331 while (Utils::IsDecimalDigit(current())) {
1332 intptr_t next = current() - '0';
1333 if (min > (RegExpTree::kInfinity - next) / 10) {
1334 // Overflow. Skip past remaining decimal digits and return -1.
1335 do {
1336 Advance();
1337 } while (Utils::IsDecimalDigit(current()));
1339 break;
1340 }
1341 min = 10 * min + next;
1342 Advance();
1343 }
1344 intptr_t max = 0;
1345 if (current() == '}') {
1346 max = min;
1347 Advance();
1348 } else if (current() == ',') {
1349 Advance();
1350 if (current() == '}') {
1352 Advance();
1353 } else {
1354 while (Utils::IsDecimalDigit(current())) {
1355 intptr_t next = current() - '0';
1356 if (max > (RegExpTree::kInfinity - next) / 10) {
1357 do {
1358 Advance();
1359 } while (Utils::IsDecimalDigit(current()));
1361 break;
1362 }
1363 max = 10 * max + next;
1364 Advance();
1365 }
1366 if (current() != '}') {
1367 Reset(start);
1368 return false;
1369 }
1370 Advance();
1371 }
1372 } else {
1373 Reset(start);
1374 return false;
1375 }
1376 *min_out = min;
1377 *max_out = max;
1378 return true;
1379}
static float next(float f)

◆ ParseOctalLiteral()

uint32_t dart::RegExpParser::ParseOctalLiteral ( )

Definition at line 1381 of file regexp_parser.cc.

1381 {
1382 ASSERT(('0' <= current() && current() <= '7') || current() == kEndMarker);
1383 // For compatibility with some other browsers (not all), we parse
1384 // up to three octal digits with a value below 256.
1385 uint32_t value = current() - '0';
1386 Advance();
1387 if ('0' <= current() && current() <= '7') {
1388 value = value * 8 + current() - '0';
1389 Advance();
1390 if (value < 32 && '0' <= current() && current() <= '7') {
1391 value = value * 8 + current() - '0';
1392 Advance();
1393 }
1394 }
1395 return value;
1396}

◆ ParsePattern()

RegExpTree * dart::RegExpParser::ParsePattern ( )

Definition at line 452 of file regexp_parser.cc.

452 {
453 RegExpTree* result = ParseDisjunction();
454 PatchNamedBackReferences();
455 ASSERT(!has_more());
456 // If the result of parsing is a literal string atom, and it has the
457 // same length as the input, then the atom is identical to the input.
458 if (result->IsAtom() && result->AsAtom()->length() == in().Length()) {
459 simple_ = true;
460 }
461 return result;
462}
RegExpTree * ParseDisjunction()

◆ ParsePropertyClassName()

bool dart::RegExpParser::ParsePropertyClassName ( ZoneGrowableArray< char > *  name_1,
ZoneGrowableArray< char > *  name_2 
)

Definition at line 1641 of file regexp_parser.cc.

1642 {
1643 ASSERT(name_1->is_empty());
1644 ASSERT(name_2->is_empty());
1645 // Parse the property class as follows:
1646 // - In \p{name}, 'name' is interpreted
1647 // - either as a general category property value name.
1648 // - or as a binary property name.
1649 // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
1650 // and 'value' is interpreted as one of the available property value names.
1651 // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
1652 // - Loose matching is not applied.
1653 if (current() == '{') {
1654 // Parse \p{[PropertyName=]PropertyNameValue}
1655 for (Advance(); current() != '}' && current() != '='; Advance()) {
1656 if (!IsUnicodePropertyValueCharacter(current())) return false;
1657 if (!has_next()) return false;
1658 name_1->Add(static_cast<char>(current()));
1659 }
1660 if (current() == '=') {
1661 for (Advance(); current() != '}'; Advance()) {
1662 if (!IsUnicodePropertyValueCharacter(current())) return false;
1663 if (!has_next()) return false;
1664 name_2->Add(static_cast<char>(current()));
1665 }
1666 name_2->Add(0); // null-terminate string.
1667 }
1668 } else {
1669 return false;
1670 }
1671 Advance();
1672 name_1->Add(0); // null-terminate string.
1673
1674 ASSERT(static_cast<size_t>(name_1->length() - 1) == strlen(name_1->data()));
1675 ASSERT(name_2->is_empty() ||
1676 static_cast<size_t>(name_2->length() - 1) == strlen(name_2->data()));
1677 return true;
1678}

◆ ParseRegExp()

void dart::RegExpParser::ParseRegExp ( const String input,
RegExpFlags  regexp_flags,
RegExpCompileData result 
)
static

Definition at line 1972 of file regexp_parser.cc.

1974 {
1975 ASSERT(result != nullptr);
1976 RegExpParser parser(input, &result->error, flags);
1977 // Throws an exception if 'input' is not valid.
1978 RegExpTree* tree = parser.ParsePattern();
1979 ASSERT(tree != nullptr);
1980 ASSERT(result->error.IsNull());
1981 result->tree = tree;
1982 intptr_t capture_count = parser.captures_started();
1983 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
1984 result->contains_anchor = parser.contains_anchor();
1985 result->capture_name_map = parser.CreateCaptureNameMap();
1986 result->capture_count = capture_count;
1987}
RegExpParser(const String &in, String *error, RegExpFlags regexp_flags)
parser
Definition: zip.py:78

◆ ParseUnicodeEscape()

bool dart::RegExpParser::ParseUnicodeEscape ( uint32_t *  value)

Definition at line 1430 of file regexp_parser.cc.

1430 {
1431 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
1432 // allowed). In the latter case, the number of hex digits between { } is
1433 // arbitrary. \ and u have already been read.
1434 if (current() == '{' && is_unicode()) {
1435 int start = position();
1436 Advance();
1438 if (current() == '}') {
1439 Advance();
1440 return true;
1441 }
1442 }
1443 Reset(start);
1444 return false;
1445 }
1446 // \u but no {, or \u{...} escapes not allowed.
1447 bool result = ParseHexEscape(4, value);
1449 current() == '\\') {
1450 // Attempt to read trail surrogate.
1451 int start = position();
1452 if (Next() == 'u') {
1453 Advance(2);
1454 uint32_t trail;
1455 if (ParseHexEscape(4, &trail) && Utf16::IsTrailSurrogate(trail)) {
1456 *value = Utf16::Decode(static_cast<uint16_t>(*value),
1457 static_cast<uint16_t>(trail));
1458 return true;
1459 }
1460 }
1461 Reset(start);
1462 }
1463 return result;
1464}
bool ParseUnlimitedLengthHexNumber(uint32_t max_value, uint32_t *value)
static int32_t Decode(uint16_t lead, uint16_t trail)
Definition: unicode.h:151
static bool IsLeadSurrogate(uint32_t ch)
Definition: unicode.h:126
static bool IsTrailSurrogate(uint32_t ch)
Definition: unicode.h:131
static constexpr int32_t kMaxCodePoint
Definition: unicode.h:18

◆ ParseUnlimitedLengthHexNumber()

bool dart::RegExpParser::ParseUnlimitedLengthHexNumber ( uint32_t  max_value,
uint32_t *  value 
)

Definition at line 1721 of file regexp_parser.cc.

1722 {
1723 uint32_t x = 0;
1724 int d = HexValue(current());
1725 if (d < 0) {
1726 return false;
1727 }
1728 while (d >= 0) {
1729 x = x * 16 + d;
1730 if (x > max_value) {
1731 return false;
1732 }
1733 Advance();
1734 d = HexValue(current());
1735 }
1736 *value = x;
1737 return true;
1738}
double x

◆ position()

intptr_t dart::RegExpParser::position ( )
inline

Definition at line 140 of file regexp_parser.h.

140{ return next_pos_ - 1; }

◆ ReportError()

void dart::RegExpParser::ReportError ( const char *  message)

Definition at line 430 of file regexp_parser.cc.

430 {
431 // Zip to the end to make sure the no more input is read.
432 current_ = kEndMarker;
433 next_pos_ = in().Length();
434
435 // Throw a FormatException on parsing failures.
436 Array& args = Array::Handle();
437 String& str = String::Handle();
438 args ^= Array::New(3);
439 str ^= String::New(message);
440 args.SetAt(0, str);
441 args.SetAt(1, Symbols::Blank());
442 args.SetAt(2, in());
443 str ^= String::ConcatAll(args);
444 args ^= Array::New(1);
445 args.SetAt(0, str);
447 UNREACHABLE();
448}
static ArrayPtr New(intptr_t len, Heap::Space space=Heap::kNew)
Definition: object.h:10959
static DART_NORETURN void ThrowByType(ExceptionType type, const Array &arguments)
Definition: exceptions.cc:1052
static Object & Handle()
Definition: object.h:407
static StringPtr ConcatAll(const Array &strings, Heap::Space space=Heap::kNew)
Definition: object.cc:24048
static StringPtr New(const char *cstr, Heap::Space space=Heap::kNew)
Definition: object.cc:23698
static const String & Blank()
Definition: symbols.h:647
G_BEGIN_DECLS G_MODULE_EXPORT FlValue * args
Win32Message message

◆ Reset()

void dart::RegExpParser::Reset ( intptr_t  pos)

Definition at line 391 of file regexp_parser.cc.

391 {
392 next_pos_ = pos;
393 has_more_ = (pos < in().Length());
394 Advance();
395}
SkPoint pos

◆ set_contains_anchor()

void dart::RegExpParser::set_contains_anchor ( )
inline

Definition at line 138 of file regexp_parser.h.

138{ contains_anchor_ = true; }

◆ simple()

bool dart::RegExpParser::simple ( )

Definition at line 402 of file regexp_parser.cc.

402 {
403 return simple_;
404}

Member Data Documentation

◆ kEndMarker

constexpr uint32_t dart::RegExpParser::kEndMarker = (1 << 21)
staticconstexpr

Definition at line 146 of file regexp_parser.h.

◆ kMaxCaptures

constexpr intptr_t dart::RegExpParser::kMaxCaptures = 1 << 16
staticconstexpr

Definition at line 145 of file regexp_parser.h.


The documentation for this class was generated from the following files: