Commit 9f8463e0 authored by Ana Pesko's avatar Ana Pesko Committed by Commit Bot

[regexp] Faster duplicate checking for named captures/back references

This CL changes the use of a ZoneList for keeping track of named captures
in a regular expression to a ZoneMap, to optimize finding the named
capture in the structure.

Bug: v8:9423
Change-Id: Id952ac8f86c1dc5d69a3b0251ff724d1509879dc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1687413
Commit-Queue: Ana Pesko <anapesko@google.com>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62601}
parent a0da3d54
...@@ -880,24 +880,25 @@ bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, ...@@ -880,24 +880,25 @@ bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,
DCHECK(0 < index && index <= captures_started_); DCHECK(0 < index && index <= captures_started_);
DCHECK_NOT_NULL(name); DCHECK_NOT_NULL(name);
RegExpCapture* capture = GetCapture(index);
DCHECK_NULL(capture->name());
capture->set_name(name);
if (named_captures_ == nullptr) { if (named_captures_ == nullptr) {
named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone()); named_captures_ = new (zone_->New(sizeof(*named_captures_)))
ZoneSet<RegExpCapture*, RegExpCaptureNameLess>(zone());
} else { } else {
// Check for duplicates and bail if we find any. // Check for duplicates and bail if we find any.
// TODO(jgruber): O(n^2).
for (const auto& named_capture : *named_captures_) { const auto& named_capture_it = named_captures_->find(capture);
if (*named_capture->name() == *name) { if (named_capture_it != named_captures_->end()) {
ReportError(CStrVector("Duplicate capture group name")); ReportError(CStrVector("Duplicate capture group name"));
return false; return false;
}
} }
} }
RegExpCapture* capture = GetCapture(index); named_captures_->emplace(capture);
DCHECK_NULL(capture->name());
capture->set_name(name);
named_captures_->Add(capture, zone());
return true; return true;
} }
...@@ -944,20 +945,22 @@ void RegExpParser::PatchNamedBackReferences() { ...@@ -944,20 +945,22 @@ void RegExpParser::PatchNamedBackReferences() {
} }
// Look up and patch the actual capture for each named back reference. // Look up and patch the actual capture for each named back reference.
// TODO(jgruber): O(n^2), optimize if necessary.
for (int i = 0; i < named_back_references_->length(); i++) { for (int i = 0; i < named_back_references_->length(); i++) {
RegExpBackReference* ref = named_back_references_->at(i); RegExpBackReference* ref = named_back_references_->at(i);
int index = -1; // Capture used to search the named_captures_ by name, index of the
for (const auto& capture : *named_captures_) { // capture is never used.
if (*capture->name() == *ref->name()) { static const int kInvalidIndex = 0;
index = capture->index(); RegExpCapture* search_capture = new (zone()) RegExpCapture(kInvalidIndex);
break; DCHECK_NULL(search_capture->name());
} search_capture->set_name(ref->name());
}
if (index == -1) { int index = -1;
const auto& capture_it = named_captures_->find(search_capture);
if (capture_it != named_captures_->end()) {
index = (*capture_it)->index();
} else {
ReportError(CStrVector("Invalid named capture referenced")); ReportError(CStrVector("Invalid named capture referenced"));
return; return;
} }
...@@ -982,16 +985,17 @@ RegExpCapture* RegExpParser::GetCapture(int index) { ...@@ -982,16 +985,17 @@ RegExpCapture* RegExpParser::GetCapture(int index) {
} }
Handle<FixedArray> RegExpParser::CreateCaptureNameMap() { Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {
if (named_captures_ == nullptr || named_captures_->is_empty()) if (named_captures_ == nullptr || named_captures_->empty()) {
return Handle<FixedArray>(); return Handle<FixedArray>();
}
Factory* factory = isolate()->factory(); Factory* factory = isolate()->factory();
int len = named_captures_->length() * 2; int len = static_cast<int>(named_captures_->size()) * 2;
Handle<FixedArray> array = factory->NewFixedArray(len); Handle<FixedArray> array = factory->NewFixedArray(len);
for (int i = 0; i < named_captures_->length(); i++) { int i = 0;
RegExpCapture* capture = named_captures_->at(i); for (const auto& capture : *named_captures_) {
Vector<const uc16> capture_name(capture->name()->data(), Vector<const uc16> capture_name(capture->name()->data(),
capture->name()->size()); capture->name()->size());
// CSA code in ConstructNewResultFromMatchInfo requires these strings to be // CSA code in ConstructNewResultFromMatchInfo requires these strings to be
...@@ -999,7 +1003,10 @@ Handle<FixedArray> RegExpParser::CreateCaptureNameMap() { ...@@ -999,7 +1003,10 @@ Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {
Handle<String> name = factory->InternalizeString(capture_name); Handle<String> name = factory->InternalizeString(capture_name);
array->set(i * 2, *name); array->set(i * 2, *name);
array->set(i * 2 + 1, Smi::FromInt(capture->index())); array->set(i * 2 + 1, Smi::FromInt(capture->index()));
i++;
} }
DCHECK_EQ(i * 2, len);
return array; return array;
} }
......
...@@ -326,11 +326,19 @@ class V8_EXPORT_PRIVATE RegExpParser { ...@@ -326,11 +326,19 @@ class V8_EXPORT_PRIVATE RegExpParser {
FlatStringReader* in() { return in_; } FlatStringReader* in() { return in_; }
void ScanForCaptures(); void ScanForCaptures();
struct RegExpCaptureNameLess {
bool operator()(const RegExpCapture* lhs, const RegExpCapture* rhs) const {
DCHECK_NOT_NULL(lhs);
DCHECK_NOT_NULL(rhs);
return *lhs->name() < *rhs->name();
}
};
Isolate* isolate_; Isolate* isolate_;
Zone* zone_; Zone* zone_;
Handle<String>* error_; Handle<String>* error_;
ZoneList<RegExpCapture*>* captures_; ZoneList<RegExpCapture*>* captures_;
ZoneList<RegExpCapture*>* named_captures_; ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
ZoneList<RegExpBackReference*>* named_back_references_; ZoneList<RegExpBackReference*>* named_back_references_;
FlatStringReader* in_; FlatStringReader* in_;
uc32 current_; uc32 current_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment