Commit 1329d15e authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Throw on invalid capture group names in replacer string

References to invalid names (i.e. not specified as a named group in the
pattern) throw a SyntaxError. Unmatched groups are still replaced by the
empty string.

See https://github.com/tc39/proposal-regexp-named-groups/issues/14.

BUG=v8:5437

Review-Url: https://codereview.chromium.org/2791183002
Cr-Commit-Position: refs/heads/master@{#44471}
parent e97b29a4
...@@ -11529,6 +11529,8 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match, ...@@ -11529,6 +11529,8 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
break; break;
} }
case '<': { // $<name> - named capture case '<': { // $<name> - named capture
typedef String::Match::CaptureState CaptureState;
if (!match->HasNamedCaptures()) { if (!match->HasNamedCaptures()) {
builder.AppendCharacter('$'); builder.AppendCharacter('$');
continue_from_ix = peek_ix; continue_from_ix = peek_ix;
...@@ -11550,12 +11552,27 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match, ...@@ -11550,12 +11552,27 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
Handle<String> capture_name = Handle<String> capture_name =
factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix); factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
bool capture_exists;
Handle<String> capture; Handle<String> capture;
CaptureState capture_state;
ASSIGN_RETURN_ON_EXCEPTION( ASSIGN_RETURN_ON_EXCEPTION(
isolate, capture, isolate, capture,
match->GetNamedCapture(capture_name, &capture_exists), String); match->GetNamedCapture(capture_name, &capture_state), String);
if (capture_exists) builder.AppendString(capture);
switch (capture_state) {
case CaptureState::INVALID:
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
replacement),
String);
break;
case CaptureState::UNMATCHED:
break;
case CaptureState::MATCHED:
builder.AppendString(capture);
break;
}
continue_from_ix = closing_bracket_ix + 1; continue_from_ix = closing_bracket_ix + 1;
break; break;
} }
......
...@@ -8183,11 +8183,15 @@ class String: public Name { ...@@ -8183,11 +8183,15 @@ class String: public Name {
virtual Handle<String> GetPrefix() = 0; virtual Handle<String> GetPrefix() = 0;
virtual Handle<String> GetSuffix() = 0; virtual Handle<String> GetSuffix() = 0;
// A named capture can be invalid (if it is not specified in the pattern),
// unmatched (specified but not matched in the current string), and matched.
enum CaptureState { INVALID, UNMATCHED, MATCHED };
virtual int CaptureCount() = 0; virtual int CaptureCount() = 0;
virtual bool HasNamedCaptures() = 0; virtual bool HasNamedCaptures() = 0;
virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0; virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
virtual MaybeHandle<String> GetNamedCapture(Handle<String> name, virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) = 0; CaptureState* state) = 0;
virtual ~Match() {} virtual ~Match() {}
}; };
......
...@@ -75,7 +75,6 @@ class CompiledReplacement { ...@@ -75,7 +75,6 @@ class CompiledReplacement {
SUBJECT_CAPTURE, SUBJECT_CAPTURE,
REPLACEMENT_SUBSTRING, REPLACEMENT_SUBSTRING,
REPLACEMENT_STRING, REPLACEMENT_STRING,
EMPTY,
NUMBER_OF_PART_TYPES NUMBER_OF_PART_TYPES
}; };
...@@ -100,7 +99,6 @@ class CompiledReplacement { ...@@ -100,7 +99,6 @@ class CompiledReplacement {
DCHECK(to > from); DCHECK(to > from);
return ReplacementPart(-from, to); return ReplacementPart(-from, to);
} }
static inline ReplacementPart Empty() { return ReplacementPart(EMPTY, 0); }
// If tag <= 0 then it is the negation of a start index of a substring of // If tag <= 0 then it is the negation of a start index of a substring of
// the replacement pattern, otherwise it's a value from PartType. // the replacement pattern, otherwise it's a value from PartType.
...@@ -113,8 +111,7 @@ class CompiledReplacement { ...@@ -113,8 +111,7 @@ class CompiledReplacement {
int tag; int tag;
// The data value's interpretation depends on the value of tag: // The data value's interpretation depends on the value of tag:
// tag == SUBJECT_PREFIX || // tag == SUBJECT_PREFIX ||
// tag == SUBJECT_SUFFIX || // tag == SUBJECT_SUFFIX: data is unused.
// tag == EMPTY: data is unused.
// tag == SUBJECT_CAPTURE: data is the number of the capture. // tag == SUBJECT_CAPTURE: data is the number of the capture.
// tag == REPLACEMENT_SUBSTRING || // tag == REPLACEMENT_SUBSTRING ||
// tag == REPLACEMENT_STRING: data is index into array of substrings // tag == REPLACEMENT_STRING: data is index into array of substrings
...@@ -251,30 +248,27 @@ class CompiledReplacement { ...@@ -251,30 +248,27 @@ class CompiledReplacement {
// Let capture be ? Get(namedCaptures, groupName). // Let capture be ? Get(namedCaptures, groupName).
int capture_index = LookupNamedCapture( const int capture_index = LookupNamedCapture(
[=](String* capture_name) { [=](String* capture_name) {
return capture_name->IsEqualTo(requested_name); return capture_name->IsEqualTo(requested_name);
}, },
capture_name_map); capture_name_map);
// If ? HasProperty(_namedCaptures_, _groupName_) is *false*, throw
// a *SyntaxError* exception.
if (capture_index == -1) return Nothing<bool>();
// If capture is undefined, replace the text through the following // If capture is undefined, replace the text through the following
// '>' with the empty string. // '>' with the empty string.
// Otherwise, replace the text through the following '>' with // Otherwise, replace the text through the following '>' with
// ? ToString(capture). // ? ToString(capture).
DCHECK_IMPLIES( DCHECK(1 <= capture_index && capture_index <= capture_count);
capture_index != -1,
1 <= capture_index && capture_index <= capture_count);
ReplacementPart replacement =
(capture_index == -1)
? ReplacementPart::Empty()
: ReplacementPart::SubjectCapture(capture_index);
if (i > last) { if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i), zone); parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
} }
parts->Add(replacement, zone); parts->Add(ReplacementPart::SubjectCapture(capture_index), zone);
last = closing_bracket_index + 1; last = closing_bracket_index + 1;
i = closing_bracket_index; i = closing_bracket_index;
break; break;
...@@ -386,8 +380,6 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder, ...@@ -386,8 +380,6 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
case REPLACEMENT_STRING: case REPLACEMENT_STRING:
builder->AddString(replacement_substrings_[part.data]); builder->AddString(replacement_substrings_[part.data]);
break; break;
case EMPTY:
break;
default: default:
UNREACHABLE(); UNREACHABLE();
} }
...@@ -1018,19 +1010,32 @@ class MatchInfoBackedMatch : public String::Match { ...@@ -1018,19 +1010,32 @@ class MatchInfoBackedMatch : public String::Match {
} }
MaybeHandle<String> GetNamedCapture(Handle<String> name, MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) override { CaptureState* state) override {
DCHECK(has_named_captures_); DCHECK(has_named_captures_);
const int capture_index = LookupNamedCapture( const int capture_index = LookupNamedCapture(
[=](String* capture_name) { return capture_name->Equals(*name); }, [=](String* capture_name) { return capture_name->Equals(*name); },
*capture_name_map_); *capture_name_map_);
if (capture_index == -1) { if (capture_index == -1) {
*capture_exists = false; *state = INVALID;
return name; // Arbitrary string handle. return name; // Arbitrary string handle.
} }
DCHECK(1 <= capture_index && capture_index <= CaptureCount()); DCHECK(1 <= capture_index && capture_index <= CaptureCount());
return GetCapture(capture_index, capture_exists);
bool capture_exists;
Handle<String> capture_value;
ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
GetCapture(capture_index, &capture_exists),
String);
if (!capture_exists) {
*state = UNMATCHED;
return isolate_->factory()->empty_string();
} else {
*state = MATCHED;
return capture_value;
}
} }
private: private:
...@@ -1086,16 +1091,26 @@ class VectorBackedMatch : public String::Match { ...@@ -1086,16 +1091,26 @@ class VectorBackedMatch : public String::Match {
} }
MaybeHandle<String> GetNamedCapture(Handle<String> name, MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) override { CaptureState* state) override {
DCHECK(has_named_captures_); DCHECK(has_named_captures_);
Maybe<bool> maybe_capture_exists =
JSReceiver::HasProperty(groups_obj_, name);
if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
if (!maybe_capture_exists.FromJust()) {
*state = INVALID;
return name; // Arbitrary string handle.
}
Handle<Object> capture_obj; Handle<Object> capture_obj;
ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj, ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
Object::GetProperty(groups_obj_, name), String); Object::GetProperty(groups_obj_, name), String);
if (capture_obj->IsUndefined(isolate_)) { if (capture_obj->IsUndefined(isolate_)) {
*capture_exists = false; *state = UNMATCHED;
return name; return isolate_->factory()->empty_string();
} else { } else {
*capture_exists = true; *state = MATCHED;
return Object::ToString(isolate_, capture_obj); return Object::ToString(isolate_, capture_obj);
} }
} }
...@@ -1893,7 +1908,6 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) { ...@@ -1893,7 +1908,6 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
} else { } else {
DCHECK(!functional_replace); DCHECK(!functional_replace);
if (!groups_obj->IsUndefined(isolate)) { if (!groups_obj->IsUndefined(isolate)) {
// TODO(jgruber): Behavior in this case is not yet specced.
ASSIGN_RETURN_FAILURE_ON_EXCEPTION( ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj)); isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
} }
......
...@@ -41,8 +41,9 @@ RUNTIME_FUNCTION(Runtime_GetSubstitution) { ...@@ -41,8 +41,9 @@ RUNTIME_FUNCTION(Runtime_GetSubstitution) {
return match_; // Return arbitrary string handle. return match_; // Return arbitrary string handle.
} }
MaybeHandle<String> GetNamedCapture(Handle<String> name, MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) override { CaptureState* state) override {
UNREACHABLE(); UNREACHABLE();
*state = INVALID;
return MaybeHandle<String>(); return MaybeHandle<String>();
} }
......
...@@ -320,56 +320,61 @@ function toSlowMode(re) { ...@@ -320,56 +320,61 @@ function toSlowMode(re) {
// @@replace with a string replacement argument (no named captures). // @@replace with a string replacement argument (no named captures).
{ {
let re = /(.)(.)/u; let re = /(.)(.)|(x)/u;
assertEquals("$<snd>$<fst>cd", "abcd".replace(re, "$<snd>$<fst>")); assertEquals("$<snd>$<fst>cd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1")); assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertEquals("cd", "abcd".replace(re, "$3"));
assertEquals("$<sndcd", "abcd".replace(re, "$<snd")); assertEquals("$<sndcd", "abcd".replace(re, "$<snd"));
assertEquals("$<42a>cd", "abcd".replace(re, "$<42$1>")); assertEquals("$<42a>cd", "abcd".replace(re, "$<42$1>"));
assertEquals("$<thd>cd", "abcd".replace(re, "$<thd>")); assertEquals("$<fth>cd", "abcd".replace(re, "$<fth>"));
assertEquals("$<a>cd", "abcd".replace(re, "$<$1>")); assertEquals("$<a>cd", "abcd".replace(re, "$<$1>"));
} }
// @@replace with a string replacement argument (global, named captures). // @@replace with a string replacement argument (global, named captures).
{ {
let re = /(?<fst>.)(?<snd>.)/gu; let re = /(?<fst>.)(?<snd>.)|(?<thd>x)/gu;
assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>")); assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("badc", "abcd".replace(re, "$2$1")); assertEquals("badc", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("", "abcd".replace(re, "$<42$1>"));
assertEquals("", "abcd".replace(re, "$<thd>")); assertEquals("", "abcd".replace(re, "$<thd>"));
assertEquals("", "abcd".replace(re, "$<$1>")); assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
} }
// @@replace with a string replacement argument (non-global, named captures). // @@replace with a string replacement argument (non-global, named captures).
{ {
let re = /(?<fst>.)(?<snd>.)/u; let re = /(?<fst>.)(?<snd>.)|(?<thd>x)/u;
assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>")); assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1")); assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("cd", "abcd".replace(re, "$<42$1>"));
assertEquals("cd", "abcd".replace(re, "$<thd>")); assertEquals("cd", "abcd".replace(re, "$<thd>"));
assertEquals("cd", "abcd".replace(re, "$<$1>")); assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
} }
// @@replace with a string replacement argument (slow, global, named captures). // @@replace with a string replacement argument (slow, global, named captures).
{ {
let re = toSlowMode(/(?<fst>.)(?<snd>.)/gu); let re = toSlowMode(/(?<fst>.)(?<snd>.)|(?<thd>x)/gu);
assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>")); assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("badc", "abcd".replace(re, "$2$1")); assertEquals("badc", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("", "abcd".replace(re, "$<42$1>"));
assertEquals("", "abcd".replace(re, "$<thd>")); assertEquals("", "abcd".replace(re, "$<thd>"));
assertEquals("", "abcd".replace(re, "$<$1>")); assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
} }
// @@replace with a string replacement argument (slow, non-global, // @@replace with a string replacement argument (slow, non-global,
// named captures). // named captures).
{ {
let re = toSlowMode(/(?<fst>.)(?<snd>.)/u); let re = toSlowMode(/(?<fst>.)(?<snd>.)|(?<thd>x)/u);
assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>")); assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1")); assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("cd", "abcd".replace(re, "$<42$1>"));
assertEquals("cd", "abcd".replace(re, "$<thd>")); assertEquals("cd", "abcd".replace(re, "$<thd>"));
assertEquals("cd", "abcd".replace(re, "$<$1>")); assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment