Commit 1329d15e authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Throw on invalid capture group names in replacer string

References to invalid names (i.e. not specified as a named group in the
pattern) throw a SyntaxError. Unmatched groups are still replaced by the
empty string.

See https://github.com/tc39/proposal-regexp-named-groups/issues/14.

BUG=v8:5437

Review-Url: https://codereview.chromium.org/2791183002
Cr-Commit-Position: refs/heads/master@{#44471}
parent e97b29a4
......@@ -11529,6 +11529,8 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
break;
}
case '<': { // $<name> - named capture
typedef String::Match::CaptureState CaptureState;
if (!match->HasNamedCaptures()) {
builder.AppendCharacter('$');
continue_from_ix = peek_ix;
......@@ -11550,12 +11552,27 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
Handle<String> capture_name =
factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
bool capture_exists;
Handle<String> capture;
CaptureState capture_state;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, capture,
match->GetNamedCapture(capture_name, &capture_exists), String);
if (capture_exists) builder.AppendString(capture);
match->GetNamedCapture(capture_name, &capture_state), String);
switch (capture_state) {
case CaptureState::INVALID:
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
replacement),
String);
break;
case CaptureState::UNMATCHED:
break;
case CaptureState::MATCHED:
builder.AppendString(capture);
break;
}
continue_from_ix = closing_bracket_ix + 1;
break;
}
......
......@@ -8183,11 +8183,15 @@ class String: public Name {
virtual Handle<String> GetPrefix() = 0;
virtual Handle<String> GetSuffix() = 0;
// A named capture can be invalid (if it is not specified in the pattern),
// unmatched (specified but not matched in the current string), and matched.
enum CaptureState { INVALID, UNMATCHED, MATCHED };
virtual int CaptureCount() = 0;
virtual bool HasNamedCaptures() = 0;
virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) = 0;
CaptureState* state) = 0;
virtual ~Match() {}
};
......
......@@ -75,7 +75,6 @@ class CompiledReplacement {
SUBJECT_CAPTURE,
REPLACEMENT_SUBSTRING,
REPLACEMENT_STRING,
EMPTY,
NUMBER_OF_PART_TYPES
};
......@@ -100,7 +99,6 @@ class CompiledReplacement {
DCHECK(to > from);
return ReplacementPart(-from, to);
}
static inline ReplacementPart Empty() { return ReplacementPart(EMPTY, 0); }
// If tag <= 0 then it is the negation of a start index of a substring of
// the replacement pattern, otherwise it's a value from PartType.
......@@ -113,8 +111,7 @@ class CompiledReplacement {
int tag;
// The data value's interpretation depends on the value of tag:
// tag == SUBJECT_PREFIX ||
// tag == SUBJECT_SUFFIX ||
// tag == EMPTY: data is unused.
// tag == SUBJECT_SUFFIX: data is unused.
// tag == SUBJECT_CAPTURE: data is the number of the capture.
// tag == REPLACEMENT_SUBSTRING ||
// tag == REPLACEMENT_STRING: data is index into array of substrings
......@@ -251,30 +248,27 @@ class CompiledReplacement {
// Let capture be ? Get(namedCaptures, groupName).
int capture_index = LookupNamedCapture(
const int capture_index = LookupNamedCapture(
[=](String* capture_name) {
return capture_name->IsEqualTo(requested_name);
},
capture_name_map);
// If ? HasProperty(_namedCaptures_, _groupName_) is *false*, throw
// a *SyntaxError* exception.
if (capture_index == -1) return Nothing<bool>();
// If capture is undefined, replace the text through the following
// '>' with the empty string.
// Otherwise, replace the text through the following '>' with
// ? ToString(capture).
DCHECK_IMPLIES(
capture_index != -1,
1 <= capture_index && capture_index <= capture_count);
ReplacementPart replacement =
(capture_index == -1)
? ReplacementPart::Empty()
: ReplacementPart::SubjectCapture(capture_index);
DCHECK(1 <= capture_index && capture_index <= capture_count);
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
}
parts->Add(replacement, zone);
parts->Add(ReplacementPart::SubjectCapture(capture_index), zone);
last = closing_bracket_index + 1;
i = closing_bracket_index;
break;
......@@ -386,8 +380,6 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
case REPLACEMENT_STRING:
builder->AddString(replacement_substrings_[part.data]);
break;
case EMPTY:
break;
default:
UNREACHABLE();
}
......@@ -1018,19 +1010,32 @@ class MatchInfoBackedMatch : public String::Match {
}
MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) override {
CaptureState* state) override {
DCHECK(has_named_captures_);
const int capture_index = LookupNamedCapture(
[=](String* capture_name) { return capture_name->Equals(*name); },
*capture_name_map_);
if (capture_index == -1) {
*capture_exists = false;
*state = INVALID;
return name; // Arbitrary string handle.
}
DCHECK(1 <= capture_index && capture_index <= CaptureCount());
return GetCapture(capture_index, capture_exists);
bool capture_exists;
Handle<String> capture_value;
ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
GetCapture(capture_index, &capture_exists),
String);
if (!capture_exists) {
*state = UNMATCHED;
return isolate_->factory()->empty_string();
} else {
*state = MATCHED;
return capture_value;
}
}
private:
......@@ -1086,16 +1091,26 @@ class VectorBackedMatch : public String::Match {
}
MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) override {
CaptureState* state) override {
DCHECK(has_named_captures_);
Maybe<bool> maybe_capture_exists =
JSReceiver::HasProperty(groups_obj_, name);
if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
if (!maybe_capture_exists.FromJust()) {
*state = INVALID;
return name; // Arbitrary string handle.
}
Handle<Object> capture_obj;
ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
Object::GetProperty(groups_obj_, name), String);
if (capture_obj->IsUndefined(isolate_)) {
*capture_exists = false;
return name;
*state = UNMATCHED;
return isolate_->factory()->empty_string();
} else {
*capture_exists = true;
*state = MATCHED;
return Object::ToString(isolate_, capture_obj);
}
}
......@@ -1893,7 +1908,6 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
} else {
DCHECK(!functional_replace);
if (!groups_obj->IsUndefined(isolate)) {
// TODO(jgruber): Behavior in this case is not yet specced.
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
}
......
......@@ -41,8 +41,9 @@ RUNTIME_FUNCTION(Runtime_GetSubstitution) {
return match_; // Return arbitrary string handle.
}
MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) override {
CaptureState* state) override {
UNREACHABLE();
*state = INVALID;
return MaybeHandle<String>();
}
......
......@@ -320,56 +320,61 @@ function toSlowMode(re) {
// @@replace with a string replacement argument (no named captures).
{
let re = /(.)(.)/u;
let re = /(.)(.)|(x)/u;
assertEquals("$<snd>$<fst>cd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertEquals("cd", "abcd".replace(re, "$3"));
assertEquals("$<sndcd", "abcd".replace(re, "$<snd"));
assertEquals("$<42a>cd", "abcd".replace(re, "$<42$1>"));
assertEquals("$<thd>cd", "abcd".replace(re, "$<thd>"));
assertEquals("$<fth>cd", "abcd".replace(re, "$<fth>"));
assertEquals("$<a>cd", "abcd".replace(re, "$<$1>"));
}
// @@replace with a string replacement argument (global, named captures).
{
let re = /(?<fst>.)(?<snd>.)/gu;
let re = /(?<fst>.)(?<snd>.)|(?<thd>x)/gu;
assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("badc", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("", "abcd".replace(re, "$<42$1>"));
assertEquals("", "abcd".replace(re, "$<thd>"));
assertEquals("", "abcd".replace(re, "$<$1>"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
}
// @@replace with a string replacement argument (non-global, named captures).
{
let re = /(?<fst>.)(?<snd>.)/u;
let re = /(?<fst>.)(?<snd>.)|(?<thd>x)/u;
assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("cd", "abcd".replace(re, "$<42$1>"));
assertEquals("cd", "abcd".replace(re, "$<thd>"));
assertEquals("cd", "abcd".replace(re, "$<$1>"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
}
// @@replace with a string replacement argument (slow, global, named captures).
{
let re = toSlowMode(/(?<fst>.)(?<snd>.)/gu);
let re = toSlowMode(/(?<fst>.)(?<snd>.)|(?<thd>x)/gu);
assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("badc", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("", "abcd".replace(re, "$<42$1>"));
assertEquals("", "abcd".replace(re, "$<thd>"));
assertEquals("", "abcd".replace(re, "$<$1>"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
}
// @@replace with a string replacement argument (slow, non-global,
// named captures).
{
let re = toSlowMode(/(?<fst>.)(?<snd>.)/u);
let re = toSlowMode(/(?<fst>.)(?<snd>.)|(?<thd>x)/u);
assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("cd", "abcd".replace(re, "$<42$1>"));
assertEquals("cd", "abcd".replace(re, "$<thd>"));
assertEquals("cd", "abcd".replace(re, "$<$1>"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment