Commit 9f8463e0 authored by Ana Pesko's avatar Ana Pesko Committed by Commit Bot

[regexp] Faster duplicate checking for named captures/back references

This CL changes the use of a ZoneList for keeping track of named captures
in a regular expression to a ZoneMap, to optimize finding the named
capture in the structure.

Bug: v8:9423
Change-Id: Id952ac8f86c1dc5d69a3b0251ff724d1509879dc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1687413
Commit-Queue: Ana Pesko <anapesko@google.com>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62601}
parent a0da3d54
......@@ -880,24 +880,25 @@ bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,
DCHECK(0 < index && index <= captures_started_);
DCHECK_NOT_NULL(name);
RegExpCapture* capture = GetCapture(index);
DCHECK_NULL(capture->name());
capture->set_name(name);
if (named_captures_ == nullptr) {
named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone());
named_captures_ = new (zone_->New(sizeof(*named_captures_)))
ZoneSet<RegExpCapture*, RegExpCaptureNameLess>(zone());
} else {
// Check for duplicates and bail if we find any.
// TODO(jgruber): O(n^2).
for (const auto& named_capture : *named_captures_) {
if (*named_capture->name() == *name) {
ReportError(CStrVector("Duplicate capture group name"));
return false;
}
const auto& named_capture_it = named_captures_->find(capture);
if (named_capture_it != named_captures_->end()) {
ReportError(CStrVector("Duplicate capture group name"));
return false;
}
}
RegExpCapture* capture = GetCapture(index);
DCHECK_NULL(capture->name());
capture->set_name(name);
named_captures_->Add(capture, zone());
named_captures_->emplace(capture);
return true;
}
......@@ -944,20 +945,22 @@ void RegExpParser::PatchNamedBackReferences() {
}
// Look up and patch the actual capture for each named back reference.
// TODO(jgruber): O(n^2), optimize if necessary.
for (int i = 0; i < named_back_references_->length(); i++) {
RegExpBackReference* ref = named_back_references_->at(i);
int index = -1;
for (const auto& capture : *named_captures_) {
if (*capture->name() == *ref->name()) {
index = capture->index();
break;
}
}
// Capture used to search the named_captures_ by name, index of the
// capture is never used.
static const int kInvalidIndex = 0;
RegExpCapture* search_capture = new (zone()) RegExpCapture(kInvalidIndex);
DCHECK_NULL(search_capture->name());
search_capture->set_name(ref->name());
if (index == -1) {
int index = -1;
const auto& capture_it = named_captures_->find(search_capture);
if (capture_it != named_captures_->end()) {
index = (*capture_it)->index();
} else {
ReportError(CStrVector("Invalid named capture referenced"));
return;
}
......@@ -982,16 +985,17 @@ RegExpCapture* RegExpParser::GetCapture(int index) {
}
Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {
if (named_captures_ == nullptr || named_captures_->is_empty())
if (named_captures_ == nullptr || named_captures_->empty()) {
return Handle<FixedArray>();
}
Factory* factory = isolate()->factory();
int len = named_captures_->length() * 2;
int len = static_cast<int>(named_captures_->size()) * 2;
Handle<FixedArray> array = factory->NewFixedArray(len);
for (int i = 0; i < named_captures_->length(); i++) {
RegExpCapture* capture = named_captures_->at(i);
int i = 0;
for (const auto& capture : *named_captures_) {
Vector<const uc16> capture_name(capture->name()->data(),
capture->name()->size());
// CSA code in ConstructNewResultFromMatchInfo requires these strings to be
......@@ -999,7 +1003,10 @@ Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {
Handle<String> name = factory->InternalizeString(capture_name);
array->set(i * 2, *name);
array->set(i * 2 + 1, Smi::FromInt(capture->index()));
i++;
}
DCHECK_EQ(i * 2, len);
return array;
}
......
......@@ -326,11 +326,19 @@ class V8_EXPORT_PRIVATE RegExpParser {
FlatStringReader* in() { return in_; }
void ScanForCaptures();
struct RegExpCaptureNameLess {
bool operator()(const RegExpCapture* lhs, const RegExpCapture* rhs) const {
DCHECK_NOT_NULL(lhs);
DCHECK_NOT_NULL(rhs);
return *lhs->name() < *rhs->name();
}
};
Isolate* isolate_;
Zone* zone_;
Handle<String>* error_;
ZoneList<RegExpCapture*>* captures_;
ZoneList<RegExpCapture*>* named_captures_;
ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
ZoneList<RegExpBackReference*>* named_back_references_;
FlatStringReader* in_;
uc32 current_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment