Commit a7e9b8f0 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[regexp] Remove BufferedZoneList

.. as a custom data structure with questionable value.

Also: a few drive-by refactors.

Change-Id: I74957b70c4357795dc46ef5520d58b6a78be31b2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3240823
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77674}
parent bfa681ff
...@@ -30,90 +30,15 @@ enum class InClassEscapeState { ...@@ -30,90 +30,15 @@ enum class InClassEscapeState {
kNotInClass, kNotInClass,
}; };
// A BufferedZoneList is an automatically growing list, just like (and backed
// by) a ZoneList, that is optimized for the case of adding and removing
// a single element. The last element added is stored outside the backing list,
// and if no more than one element is ever added, the ZoneList isn't even
// allocated.
// Elements must not be nullptr pointers.
template <typename T, int initial_size>
class BufferedZoneList {
public:
BufferedZoneList() : list_(nullptr), last_(nullptr) {}
// Adds element at end of list. This element is buffered and can
// be read using last() or removed using RemoveLast until a new Add or until
// RemoveLast or GetList has been called.
void Add(T* value, Zone* zone) {
if (last_ != nullptr) {
if (list_ == nullptr) {
list_ = zone->New<ZoneList<T*>>(initial_size, zone);
}
list_->Add(last_, zone);
}
last_ = value;
}
T* last() {
DCHECK(last_ != nullptr);
return last_;
}
T* RemoveLast() {
DCHECK(last_ != nullptr);
T* result = last_;
if ((list_ != nullptr) && (list_->length() > 0))
last_ = list_->RemoveLast();
else
last_ = nullptr;
return result;
}
T* Get(int i) {
DCHECK((0 <= i) && (i < length()));
if (list_ == nullptr) {
DCHECK_EQ(0, i);
return last_;
} else {
if (i == list_->length()) {
DCHECK(last_ != nullptr);
return last_;
} else {
return list_->at(i);
}
}
}
void Clear() {
list_ = nullptr;
last_ = nullptr;
}
int length() {
int length = (list_ == nullptr) ? 0 : list_->length();
return length + ((last_ == nullptr) ? 0 : 1);
}
ZoneList<T*>* GetList(Zone* zone) {
if (list_ == nullptr) {
list_ = zone->New<ZoneList<T*>>(initial_size, zone);
}
if (last_ != nullptr) {
list_->Add(last_, zone);
last_ = nullptr;
}
return list_;
}
private:
ZoneList<T*>* list_;
T* last_;
};
// Accumulates RegExp atoms and assertions into lists of terms and alternatives. // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder : public ZoneObject { class RegExpBuilder {
public: public:
RegExpBuilder(Zone* zone, RegExpFlags flags); RegExpBuilder(Zone* zone, RegExpFlags flags)
: zone_(zone),
flags_(flags),
terms_(2, zone),
text_(2, zone),
alternatives_(2, zone) {}
void AddCharacter(base::uc16 character); void AddCharacter(base::uc16 character);
void AddUnicodeCharacter(base::uc32 character); void AddUnicodeCharacter(base::uc32 character);
void AddEscapedUnicodeCharacter(base::uc32 character); void AddEscapedUnicodeCharacter(base::uc32 character);
...@@ -149,15 +74,21 @@ class RegExpBuilder : public ZoneObject { ...@@ -149,15 +74,21 @@ class RegExpBuilder : public ZoneObject {
bool unicode() const { return IsUnicode(flags_); } bool unicode() const { return IsUnicode(flags_); }
Zone* const zone_; Zone* const zone_;
bool pending_empty_; bool pending_empty_ = false;
const RegExpFlags flags_; const RegExpFlags flags_;
ZoneList<base::uc16>* characters_; ZoneList<base::uc16>* characters_ = nullptr;
base::uc16 pending_surrogate_; base::uc16 pending_surrogate_ = kNoPendingSurrogate;
BufferedZoneList<RegExpTree, 2> terms_; ZoneList<RegExpTree*> terms_;
BufferedZoneList<RegExpTree, 2> text_; ZoneList<RegExpTree*> text_;
BufferedZoneList<RegExpTree, 2> alternatives_; ZoneList<RegExpTree*> alternatives_;
#ifdef DEBUG #ifdef DEBUG
enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; enum {
ADD_NONE,
ADD_CHAR,
ADD_TERM,
ADD_ASSERT,
ADD_ATOM
} last_added_ = ADD_NONE;
#define LAST(x) last_added_ = x; #define LAST(x) last_added_ = x;
#else #else
#define LAST(x) #define LAST(x)
...@@ -182,7 +113,7 @@ class RegExpParserState : public ZoneObject { ...@@ -182,7 +113,7 @@ class RegExpParserState : public ZoneObject {
const ZoneVector<base::uc16>* capture_name, const ZoneVector<base::uc16>* capture_name,
RegExpFlags flags, Zone* zone) RegExpFlags flags, Zone* zone)
: previous_state_(previous_state), : previous_state_(previous_state),
builder_(zone->New<RegExpBuilder>(zone, flags)), builder_(zone, flags),
group_type_(group_type), group_type_(group_type),
lookaround_type_(lookaround_type), lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index), disjunction_capture_index_(disjunction_capture_index),
...@@ -191,7 +122,7 @@ class RegExpParserState : public ZoneObject { ...@@ -191,7 +122,7 @@ class RegExpParserState : public ZoneObject {
RegExpParserState* previous_state() const { return previous_state_; } RegExpParserState* previous_state() const { return previous_state_; }
bool IsSubexpression() { return previous_state_ != nullptr; } bool IsSubexpression() { return previous_state_ != nullptr; }
// RegExpBuilder building this regexp's AST. // RegExpBuilder building this regexp's AST.
RegExpBuilder* builder() const { return builder_; } RegExpBuilder* builder() { return &builder_; }
// Type of regexp being parsed (parenthesized group or entire regexp). // Type of regexp being parsed (parenthesized group or entire regexp).
SubexpressionType group_type() const { return group_type_; } SubexpressionType group_type() const { return group_type_; }
// Lookahead or Lookbehind. // Lookahead or Lookbehind.
...@@ -234,7 +165,7 @@ class RegExpParserState : public ZoneObject { ...@@ -234,7 +165,7 @@ class RegExpParserState : public ZoneObject {
// Linked list implementation of stack of states. // Linked list implementation of stack of states.
RegExpParserState* const previous_state_; RegExpParserState* const previous_state_;
// Builder for the stored disjunction. // Builder for the stored disjunction.
RegExpBuilder* const builder_; RegExpBuilder builder_;
// Stored disjunction type (capture, look-ahead or grouping), if any. // Stored disjunction type (capture, look-ahead or grouping), if any.
const SubexpressionType group_type_; const SubexpressionType group_type_;
// Stored read direction. // Stored read direction.
...@@ -2073,21 +2004,6 @@ bool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) { ...@@ -2073,21 +2004,6 @@ bool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) {
return true; return true;
} }
RegExpBuilder::RegExpBuilder(Zone* zone, RegExpFlags flags)
: zone_(zone),
pending_empty_(false),
flags_(flags),
characters_(nullptr),
pending_surrogate_(kNoPendingSurrogate),
terms_(),
alternatives_()
#ifdef DEBUG
,
last_added_(ADD_NONE)
#endif
{
}
void RegExpBuilder::AddLeadSurrogate(base::uc16 lead_surrogate) { void RegExpBuilder::AddLeadSurrogate(base::uc16 lead_surrogate) {
DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate)); DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
FlushPendingSurrogate(); FlushPendingSurrogate();
...@@ -2150,10 +2066,12 @@ void RegExpBuilder::FlushText() { ...@@ -2150,10 +2066,12 @@ void RegExpBuilder::FlushText() {
terms_.Add(text_.last(), zone()); terms_.Add(text_.last(), zone());
} else { } else {
RegExpText* text = zone()->New<RegExpText>(zone()); RegExpText* text = zone()->New<RegExpText>(zone());
for (int i = 0; i < num_text; i++) text_.Get(i)->AppendToText(text, zone()); for (int i = 0; i < num_text; i++) {
text_[i]->AppendToText(text, zone());
}
terms_.Add(text, zone()); terms_.Add(text, zone());
} }
text_.Clear(); text_.Rewind(0);
} }
void RegExpBuilder::AddCharacter(base::uc16 c) { void RegExpBuilder::AddCharacter(base::uc16 c) {
...@@ -2252,10 +2170,11 @@ void RegExpBuilder::FlushTerms() { ...@@ -2252,10 +2170,11 @@ void RegExpBuilder::FlushTerms() {
} else if (num_terms == 1) { } else if (num_terms == 1) {
alternative = terms_.last(); alternative = terms_.last();
} else { } else {
alternative = zone()->New<RegExpAlternative>(terms_.GetList(zone())); alternative = zone()->New<RegExpAlternative>(
zone()->New<ZoneList<RegExpTree*>>(terms_, zone()));
} }
alternatives_.Add(alternative, zone()); alternatives_.Add(alternative, zone());
terms_.Clear(); terms_.Rewind(0);
LAST(ADD_NONE); LAST(ADD_NONE);
} }
...@@ -2298,7 +2217,8 @@ RegExpTree* RegExpBuilder::ToRegExp() { ...@@ -2298,7 +2217,8 @@ RegExpTree* RegExpBuilder::ToRegExp() {
int num_alternatives = alternatives_.length(); int num_alternatives = alternatives_.length();
if (num_alternatives == 0) return zone()->New<RegExpEmpty>(); if (num_alternatives == 0) return zone()->New<RegExpEmpty>();
if (num_alternatives == 1) return alternatives_.last(); if (num_alternatives == 1) return alternatives_.last();
return zone()->New<RegExpDisjunction>(alternatives_.GetList(zone())); return zone()->New<RegExpDisjunction>(
zone()->New<ZoneList<RegExpTree*>>(alternatives_, zone()));
} }
bool RegExpBuilder::AddQuantifierToAtom( bool RegExpBuilder::AddQuantifierToAtom(
...@@ -2410,5 +2330,7 @@ bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone, ...@@ -2410,5 +2330,7 @@ bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
return ParseRegExpFromHeapString(isolate, zone, input, flags, result); return ParseRegExpFromHeapString(isolate, zone, input, flags, result);
} }
#undef LAST
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment