Flutter Engine
The Flutter Engine
Public Member Functions | Static Public Member Functions | Static Public Attributes | List of all members
dart::CharacterRange Class Reference

#include <regexp.h>

Public Member Functions

 CharacterRange ()
 
 CharacterRange (int32_t from, int32_t to)
 
bool Contains (int32_t i) const
 
int32_t from () const
 
void set_from (int32_t value)
 
int32_t to () const
 
void set_to (int32_t value)
 
bool is_valid () const
 
bool IsEverything (int32_t max) const
 
bool IsSingleton () const
 

Static Public Member Functions

static void AddClassEscape (uint16_t type, ZoneGrowableArray< CharacterRange > *ranges)
 
static void AddClassEscape (uint16_t type, ZoneGrowableArray< CharacterRange > *ranges, bool add_unicode_case_equivalents)
 
static GrowableArray< const intptr_t > GetWordBounds ()
 
static CharacterRange Singleton (int32_t value)
 
static CharacterRange Range (int32_t from, int32_t to)
 
static CharacterRange Everything ()
 
static ZoneGrowableArray< CharacterRange > * List (Zone *zone, CharacterRange range)
 
static void AddCaseEquivalents (ZoneGrowableArray< CharacterRange > *ranges, bool is_one_byte, Zone *zone)
 
static void Split (ZoneGrowableArray< CharacterRange > *base, GrowableArray< const intptr_t > overlay, ZoneGrowableArray< CharacterRange > **included, ZoneGrowableArray< CharacterRange > **excluded, Zone *zone)
 
static bool IsCanonical (ZoneGrowableArray< CharacterRange > *ranges)
 
static void Canonicalize (ZoneGrowableArray< CharacterRange > *ranges)
 
static void Negate (ZoneGrowableArray< CharacterRange > *src, ZoneGrowableArray< CharacterRange > *dst)
 

Static Public Attributes

static constexpr intptr_t kStartMarker = (1 << 24)
 
static constexpr intptr_t kPayloadMask = (1 << 24) - 1
 

Detailed Description

Definition at line 25 of file regexp.h.

Constructor & Destructor Documentation

◆ CharacterRange() [1/2]

dart::CharacterRange::CharacterRange ( )
inline

Definition at line 27 of file regexp.h.

27: from_(0), to_(0) {}

◆ CharacterRange() [2/2]

dart::CharacterRange::CharacterRange ( int32_t  from,
int32_t  to 
)
inline

Definition at line 28 of file regexp.h.

28: from_(from), to_(to) {}
int32_t to() const
Definition: regexp.h:56
int32_t from() const
Definition: regexp.h:54

Member Function Documentation

◆ AddCaseEquivalents()

void dart::CharacterRange::AddCaseEquivalents ( ZoneGrowableArray< CharacterRange > *  ranges,
bool  is_one_byte,
Zone zone 
)
static

Definition at line 4691 of file regexp.cc.

4694 {
4696 int range_count = ranges->length();
4697 for (intptr_t i = 0; i < range_count; i++) {
4698 CharacterRange range = ranges->At(i);
4699 int32_t bottom = range.from();
4700 if (bottom > Utf16::kMaxCodeUnit) continue;
4701 int32_t top = Utils::Minimum(range.to(), Utf16::kMaxCodeUnit);
4702 // Nothing to be done for surrogates
4703 if (bottom >= Utf16::kLeadSurrogateStart &&
4705 continue;
4706 }
4707 if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
4708 if (bottom > Symbols::kMaxOneCharCodeSymbol) continue;
4711 }
4712 }
4713
4717 if (top == bottom) {
4718 // If this is a singleton we just expand the one character.
4719 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars);
4720 for (intptr_t i = 0; i < length; i++) {
4721 int32_t chr = chars[i];
4722 if (chr != bottom) {
4723 ranges->Add(CharacterRange::Singleton(chars[i]));
4724 }
4725 }
4726 } else {
4727 // If this is a range we expand the characters block by block,
4728 // expanding contiguous subranges (blocks) one at a time.
4729 // The approach is as follows. For a given start character we
4730 // look up the remainder of the block that contains it (represented
4731 // by the end point), for instance we find 'z' if the character
4732 // is 'c'. A block is characterized by the property
4733 // that all characters uncanonicalize in the same way, except that
4734 // each entry in the result is incremented by the distance from the first
4735 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A']
4736 // and the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
4737 // Once we've found the end point we look up its uncanonicalization
4738 // and produce a range for each element. For instance for [c-f]
4739 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
4740 // add a range if it is not already contained in the input, so [c-f]
4741 // will be skipped but [C-F] will be added. If this range is not
4742 // completely contained in a block we do this for all the blocks
4743 // covered by the range (handling characters that is not in a block
4744 // as a "singleton block").
4746 intptr_t pos = bottom;
4747 while (pos <= top) {
4748 intptr_t length = jsregexp_canonrange.get(pos, '\0', range);
4749 int32_t block_end;
4750 if (length == 0) {
4751 block_end = pos;
4752 } else {
4753 ASSERT(length == 1);
4754 block_end = range[0];
4755 }
4756 intptr_t end = (block_end > top) ? top : block_end;
4757 length = jsregexp_uncanonicalize.get(block_end, '\0', range);
4758 for (intptr_t i = 0; i < length; i++) {
4759 int32_t c = range[i];
4760 int32_t range_from = c - (block_end - pos);
4761 int32_t range_to = c - (block_end - end);
4762 if (!(bottom <= range_from && range_to <= top)) {
4763 ranges->Add(CharacterRange(range_from, range_to));
4764 }
4765 }
4766 pos = end + 1;
4767 }
4768 }
4769 }
4770}
SkPoint pos
static CharacterRange Singleton(int32_t value)
Definition: regexp.h:37
static void Canonicalize(ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4876
@ kMaxOneCharCodeSymbol
Definition: symbols.h:577
static constexpr int32_t kLeadSurrogateStart
Definition: unicode.h:159
static constexpr int32_t kMaxCodeUnit
Definition: unicode.h:158
static constexpr int32_t kTrailSurrogateEnd
Definition: unicode.h:162
static T Minimum(T x, T y)
Definition: utils.h:36
intptr_t get(int32_t c, int32_t n, int32_t *result)
Definition: unibrow-inl.h:15
#define ASSERT(E)
glong glong end
size_t length
static bool RangeContainsLatin1Equivalents(CharacterRange range)
Definition: regexp.cc:1947
static constexpr intptr_t kMaxWidth
Definition: unibrow.h:56

◆ AddClassEscape() [1/2]

void dart::CharacterRange::AddClassEscape ( uint16_t  type,
ZoneGrowableArray< CharacterRange > *  ranges 
)
static

Definition at line 4651 of file regexp.cc.

4652 {
4653 switch (type) {
4654 case 's':
4656 break;
4657 case 'S':
4659 break;
4660 case 'w':
4662 break;
4663 case 'W':
4665 break;
4666 case 'd':
4668 break;
4669 case 'D':
4671 break;
4672 case '.':
4674 break;
4675 // This is not a character range as defined by the spec but a
4676 // convenient shorthand for a character class that matches any
4677 // character.
4678 case '*':
4679 ranges->Add(CharacterRange::Everything());
4680 break;
4681 // This is the set of characters matched by the $ and ^ symbols
4682 // in multiline mode.
4683 case 'n':
4685 break;
4686 default:
4687 UNREACHABLE();
4688 }
4689}
#define UNREACHABLE()
Definition: assert.h:248
GLenum type
static CharacterRange Everything()
Definition: regexp.h:44
static constexpr intptr_t kWordRangeCount
Definition: regexp.cc:2778
static constexpr int32_t kLineTerminatorRanges[]
Definition: regexp.cc:2783
static void AddClassNegated(const int32_t *elmv, intptr_t elmc, ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4611
static constexpr int32_t kSpaceRanges[]
Definition: regexp.cc:2771
static constexpr int32_t kWordRanges[]
Definition: regexp.cc:2776
static constexpr intptr_t kSpaceRangeCount
Definition: regexp.cc:2775
static constexpr intptr_t kDigitRangeCount
Definition: regexp.cc:2780
static void AddClass(const int32_t *elmv, intptr_t elmc, ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4600
static constexpr intptr_t kLineTerminatorRangeCount
Definition: regexp.cc:2785
static constexpr int32_t kDigitRanges[]
Definition: regexp.cc:2779

◆ AddClassEscape() [2/2]

void dart::CharacterRange::AddClassEscape ( uint16_t  type,
ZoneGrowableArray< CharacterRange > *  ranges,
bool  add_unicode_case_equivalents 
)
static

Definition at line 4628 of file regexp.cc.

4630 {
4631 if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
4632 // See #sec-runtime-semantics-wordcharacters-abstract-operation
4633 // In case of unicode and ignore_case, we need to create the closure over
4634 // case equivalent characters before negating.
4635 ZoneGrowableArray<CharacterRange>* new_ranges =
4636 new ZoneGrowableArray<CharacterRange>(2);
4637 AddClass(kWordRanges, kWordRangeCount, new_ranges);
4638 AddUnicodeCaseEquivalents(new_ranges);
4639 if (type == 'W') {
4640 ZoneGrowableArray<CharacterRange>* negated =
4641 new ZoneGrowableArray<CharacterRange>(2);
4642 CharacterRange::Negate(new_ranges, negated);
4643 new_ranges = negated;
4644 }
4645 ranges->AddArray(*new_ranges);
4646 return;
4647 }
4648 AddClassEscape(type, ranges);
4649}
static void AddClassEscape(uint16_t type, ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4651
static void Negate(ZoneGrowableArray< CharacterRange > *src, ZoneGrowableArray< CharacterRange > *dst)
Definition: regexp.cc:4912
void AddUnicodeCaseEquivalents(ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4135

◆ Canonicalize()

void dart::CharacterRange::Canonicalize ( ZoneGrowableArray< CharacterRange > *  ranges)
static

Definition at line 4876 of file regexp.cc.

4877 {
4878 if (character_ranges->length() <= 1) return;
4879 // Check whether ranges are already canonical (increasing, non-overlapping,
4880 // non-adjacent).
4881 intptr_t n = character_ranges->length();
4882 intptr_t max = character_ranges->At(0).to();
4883 intptr_t i = 1;
4884 while (i < n) {
4885 CharacterRange current = character_ranges->At(i);
4886 if (current.from() <= max + 1) {
4887 break;
4888 }
4889 max = current.to();
4890 i++;
4891 }
4892 // Canonical until the i'th range. If that's all of them, we are done.
4893 if (i == n) return;
4894
4895 // The ranges at index i and forward are not canonicalized. Make them so by
4896 // doing the equivalent of insertion sort (inserting each into the previous
4897 // list, in order).
4898 // Notice that inserting a range can reduce the number of ranges in the
4899 // result due to combining of adjacent and overlapping ranges.
4900 intptr_t read = i; // Range to insert.
4901 intptr_t num_canonical = i; // Length of canonicalized part of list.
4902 do {
4903 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical,
4904 character_ranges->At(read));
4905 read++;
4906 } while (read < n);
4907 character_ranges->TruncateTo(num_canonical);
4908
4909 ASSERT(CharacterRange::IsCanonical(character_ranges));
4910}
static bool read(SkStream *stream, void *buffer, size_t amount)
static bool IsCanonical(ZoneGrowableArray< CharacterRange > *ranges)
Definition: regexp.cc:4772
static float max(float r, float g, float b)
Definition: hsl.cpp:49
static intptr_t InsertRangeInCanonicalList(ZoneGrowableArray< CharacterRange > *list, intptr_t count, CharacterRange insert)
Definition: regexp.cc:4811

◆ Contains()

bool dart::CharacterRange::Contains ( int32_t  i) const
inline

Definition at line 53 of file regexp.h.

53{ return from_ <= i && i <= to_; }

◆ Everything()

static CharacterRange dart::CharacterRange::Everything ( )
inlinestatic

Definition at line 44 of file regexp.h.

44 {
46 }
static constexpr int32_t kMaxCodePoint
Definition: unicode.h:18

◆ from()

int32_t dart::CharacterRange::from ( ) const
inline

Definition at line 54 of file regexp.h.

54{ return from_; }

◆ GetWordBounds()

static GrowableArray< const intptr_t > dart::CharacterRange::GetWordBounds ( )
static

◆ is_valid()

bool dart::CharacterRange::is_valid ( ) const
inline

Definition at line 58 of file regexp.h.

58{ return from_ <= to_; }

◆ IsCanonical()

bool dart::CharacterRange::IsCanonical ( ZoneGrowableArray< CharacterRange > *  ranges)
static

Definition at line 4772 of file regexp.cc.

4772 {
4773 ASSERT(ranges != nullptr);
4774 intptr_t n = ranges->length();
4775 if (n <= 1) return true;
4776 intptr_t max = ranges->At(0).to();
4777 for (intptr_t i = 1; i < n; i++) {
4778 CharacterRange next_range = ranges->At(i);
4779 if (next_range.from() <= max + 1) return false;
4780 max = next_range.to();
4781 }
4782 return true;
4783}

◆ IsEverything()

bool dart::CharacterRange::IsEverything ( int32_t  max) const
inline

Definition at line 59 of file regexp.h.

59{ return from_ == 0 && to_ >= max; }

◆ IsSingleton()

bool dart::CharacterRange::IsSingleton ( ) const
inline

Definition at line 60 of file regexp.h.

60{ return (from_ == to_); }

◆ List()

static ZoneGrowableArray< CharacterRange > * dart::CharacterRange::List ( Zone zone,
CharacterRange  range 
)
inlinestatic

Definition at line 47 of file regexp.h.

48 {
49 auto list = new (zone) ZoneGrowableArray<CharacterRange>(1);
50 list->Add(range);
51 return list;
52 }

◆ Negate()

void dart::CharacterRange::Negate ( ZoneGrowableArray< CharacterRange > *  src,
ZoneGrowableArray< CharacterRange > *  dst 
)
static

Definition at line 4912 of file regexp.cc.

4913 {
4915 ASSERT(negated_ranges->length() == 0);
4916 intptr_t range_count = ranges->length();
4917 uint32_t from = 0;
4918 intptr_t i = 0;
4919 if (range_count > 0 && ranges->At(0).from() == 0) {
4920 from = ranges->At(0).to();
4921 i = 1;
4922 }
4923 while (i < range_count) {
4924 CharacterRange range = ranges->At(i);
4925 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1));
4926 from = range.to();
4927 i++;
4928 }
4929 if (from < Utf::kMaxCodePoint) {
4930 negated_ranges->Add(CharacterRange(from + 1, Utf::kMaxCodePoint));
4931 }
4932}

◆ Range()

static CharacterRange dart::CharacterRange::Range ( int32_t  from,
int32_t  to 
)
inlinestatic

Definition at line 40 of file regexp.h.

40 {
41 ASSERT(from <= to);
42 return CharacterRange(from, to);
43 }

◆ set_from()

void dart::CharacterRange::set_from ( int32_t  value)
inline

Definition at line 55 of file regexp.h.

55{ from_ = value; }
uint8_t value

◆ set_to()

void dart::CharacterRange::set_to ( int32_t  value)
inline

Definition at line 57 of file regexp.h.

57{ to_ = value; }

◆ Singleton()

static CharacterRange dart::CharacterRange::Singleton ( int32_t  value)
inlinestatic

Definition at line 37 of file regexp.h.

37 {
38 return CharacterRange(value, value);
39 }

◆ Split()

static void dart::CharacterRange::Split ( ZoneGrowableArray< CharacterRange > *  base,
GrowableArray< const intptr_t >  overlay,
ZoneGrowableArray< CharacterRange > **  included,
ZoneGrowableArray< CharacterRange > **  excluded,
Zone zone 
)
static

◆ to()

int32_t dart::CharacterRange::to ( ) const
inline

Definition at line 56 of file regexp.h.

56{ return to_; }

Member Data Documentation

◆ kPayloadMask

constexpr intptr_t dart::CharacterRange::kPayloadMask = (1 << 24) - 1
staticconstexpr

Definition at line 81 of file regexp.h.

◆ kStartMarker

constexpr intptr_t dart::CharacterRange::kStartMarker = (1 << 24)
staticconstexpr

Definition at line 80 of file regexp.h.


The documentation for this class was generated from the following files: