Commit 159236ec authored by jgruber's avatar jgruber Committed by Commit Bot

[regexp] Update semantics of GetSubstitution with named captures

The specced semantics of GetSubstitution are expected to change in the
case of malformed named references, or named references to nonexistent
named groups. The former will evaluate to the identity replacement of
'$<', while the latter will result in replacement by the empty string.

See also:
https://github.com/tc39/proposal-regexp-named-groups/issues/29

Bug: v8:5437, v8:6912
Cq-Include-Trybots: master.tryserver.v8:v8_linux_noi18n_rel_ng
Change-Id: I879288f775774cb0ec563f9d9129a99710efb77c
Reviewed-on: https://chromium-review.googlesource.com/708654
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48426}
parent 7a024200
......@@ -470,7 +470,6 @@ class ErrorUtils : public AllStatic {
T(ReduceNoInitial, "Reduce of empty array with no initial value") \
T(RegExpFlags, \
"Cannot supply flags when constructing one RegExp from another") \
T(RegExpInvalidReplaceString, "Invalid replacement string: '%'") \
T(RegExpNonObject, "% getter called on non-object %") \
T(RegExpNonRegExp, "% getter called on non-RegExp object") \
T(ResolverNotAFunction, "Promise resolver % is not a function") \
......
......@@ -11676,11 +11676,10 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
if (closing_bracket_ix == -1) {
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
replacement),
String);
// No closing bracket was found, treat '$<' as a string literal.
builder.AppendCharacter('$');
continue_from_ix = peek_ix;
break;
}
Handle<String> capture_name =
......@@ -11693,12 +11692,6 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
switch (capture_state) {
case CaptureState::INVALID:
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
replacement),
String);
break;
case CaptureState::UNMATCHED:
break;
case CaptureState::MATCHED:
......
......@@ -53,11 +53,9 @@ class CompiledReplacement {
explicit CompiledReplacement(Zone* zone)
: parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
// Return whether the replacement is simple. Can also fail and return Nothing
// if the given replacement string is invalid (and requires throwing a
// SyntaxError).
Maybe<bool> Compile(Handle<JSRegExp> regexp, Handle<String> replacement,
int capture_count, int subject_length);
// Return whether the replacement is simple.
bool Compile(Handle<JSRegExp> regexp, Handle<String> replacement,
int capture_count, int subject_length);
// Use Apply only if Compile returned false.
void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
......@@ -75,6 +73,7 @@ class CompiledReplacement {
SUBJECT_CAPTURE,
REPLACEMENT_SUBSTRING,
REPLACEMENT_STRING,
EMPTY_REPLACEMENT,
NUMBER_OF_PART_TYPES
};
......@@ -94,6 +93,9 @@ class CompiledReplacement {
static inline ReplacementPart ReplacementString() {
return ReplacementPart(REPLACEMENT_STRING, 0);
}
static inline ReplacementPart EmptyReplacement() {
return ReplacementPart(EMPTY_REPLACEMENT, 0);
}
static inline ReplacementPart ReplacementSubString(int from, int to) {
DCHECK_LE(0, from);
DCHECK_GT(to, from);
......@@ -116,6 +118,7 @@ class CompiledReplacement {
// tag == REPLACEMENT_SUBSTRING ||
// tag == REPLACEMENT_STRING: data is index into array of substrings
// of the replacement string.
// tag == EMPTY_REPLACEMENT: data is unused.
// tag <= 0: Temporary representation of the substring of the replacement
// string ranging over -tag .. data.
// Is replaced by REPLACEMENT_{SUB,}STRING when we create the
......@@ -124,11 +127,10 @@ class CompiledReplacement {
};
template <typename Char>
Maybe<bool> ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
Vector<Char> characters,
FixedArray* capture_name_map,
int capture_count, int subject_length,
Zone* zone) {
bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
Vector<Char> characters,
FixedArray* capture_name_map, int capture_count,
int subject_length, Zone* zone) {
// Equivalent to String::GetSubstitution, except that this method converts
// the replacement string into an internal representation that avoids
// repeated parsing when used repeatedly.
......@@ -228,8 +230,8 @@ class CompiledReplacement {
break;
}
// Scan until the next '>', throwing a SyntaxError exception if one
// is not found, and let the enclosed substring be groupName.
// Scan until the next '>', and let the enclosed substring be the
// groupName.
const int name_start_index = next_index + 1;
int closing_bracket_index = -1;
......@@ -240,8 +242,12 @@ class CompiledReplacement {
}
}
// Throw a SyntaxError for invalid replacement strings.
if (closing_bracket_index == -1) return Nothing<bool>();
// If no closing bracket is found, '$<' is treated as a string
// literal.
if (closing_bracket_index == -1) {
i = next_index;
break;
}
Vector<Char> requested_name =
characters.SubVector(name_start_index, closing_bracket_index);
......@@ -254,21 +260,21 @@ class CompiledReplacement {
},
capture_name_map);
// If ? HasProperty(_namedCaptures_, _groupName_) is *false*, throw
// a *SyntaxError* exception.
if (capture_index == -1) return Nothing<bool>();
// If capture is undefined, replace the text through the following
// '>' with the empty string.
// If capture is undefined or does not exist, replace the text
// through the following '>' with the empty string.
// Otherwise, replace the text through the following '>' with
// ? ToString(capture).
DCHECK(1 <= capture_index && capture_index <= capture_count);
DCHECK(capture_index == -1 ||
(1 <= capture_index && capture_index <= capture_count));
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
}
parts->Add(ReplacementPart::SubjectCapture(capture_index), zone);
parts->Add((capture_index == -1)
? ReplacementPart::EmptyReplacement()
: ReplacementPart::SubjectCapture(capture_index),
zone);
last = closing_bracket_index + 1;
i = closing_bracket_index;
break;
......@@ -282,12 +288,12 @@ class CompiledReplacement {
if (length > last) {
if (last == 0) {
// Replacement is simple. Do not use Apply to do the replacement.
return Just(true);
return true;
} else {
parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
}
}
return Just(false);
return false;
}
ZoneList<ReplacementPart> parts_;
......@@ -295,10 +301,9 @@ class CompiledReplacement {
Zone* zone_;
};
Maybe<bool> CompiledReplacement::Compile(Handle<JSRegExp> regexp,
Handle<String> replacement,
int capture_count,
int subject_length) {
bool CompiledReplacement::Compile(Handle<JSRegExp> regexp,
Handle<String> replacement, int capture_count,
int subject_length) {
{
DisallowHeapAllocation no_gc;
String::FlatContent content = replacement->GetFlatContent();
......@@ -314,7 +319,7 @@ Maybe<bool> CompiledReplacement::Compile(Handle<JSRegExp> regexp,
}
}
Maybe<bool> simple = Nothing<bool>();
bool simple;
if (content.IsOneByte()) {
simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
capture_name_map, capture_count,
......@@ -325,7 +330,7 @@ Maybe<bool> CompiledReplacement::Compile(Handle<JSRegExp> regexp,
capture_name_map, capture_count,
subject_length, zone());
}
if (simple.IsNothing() || simple.FromJust()) return simple;
if (simple) return true;
}
Isolate* isolate = replacement->GetIsolate();
......@@ -347,7 +352,7 @@ Maybe<bool> CompiledReplacement::Compile(Handle<JSRegExp> regexp,
substring_index++;
}
}
return Just(false);
return false;
}
......@@ -380,6 +385,8 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
case REPLACEMENT_STRING:
builder->AddString(replacement_substrings_[part.data]);
break;
case EMPTY_REPLACEMENT:
break;
default:
UNREACHABLE();
}
......@@ -608,15 +615,8 @@ MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
// CompiledReplacement uses zone allocation.
Zone zone(isolate->allocator(), ZONE_NAME);
CompiledReplacement compiled_replacement(&zone);
Maybe<bool> maybe_simple_replace = compiled_replacement.Compile(
const bool simple_replace = compiled_replacement.Compile(
regexp, replacement, capture_count, subject_length);
if (maybe_simple_replace.IsNothing()) {
THROW_NEW_ERROR_RETURN_FAILURE(
isolate, NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
replacement));
}
const bool simple_replace = maybe_simple_replace.FromJust();
// Shortcut for simple non-regexp global replacements
if (typeTag == JSRegExp::ATOM && simple_replace) {
......
......@@ -428,7 +428,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(18),
B(LdaConstant), U8(16),
B(Star), R(19),
......
......@@ -143,7 +143,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(19),
B(LdaConstant), U8(13),
B(Star), R(20),
......@@ -432,7 +432,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(19),
B(LdaConstant), U8(13),
B(Star), R(20),
......@@ -743,7 +743,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(19),
B(LdaConstant), U8(13),
B(Star), R(20),
......@@ -991,7 +991,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(16),
B(LdaConstant), U8(10),
B(Star), R(17),
......
......@@ -86,7 +86,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(11),
B(LdaConstant), U8(8),
B(Star), R(12),
......@@ -227,7 +227,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(12),
B(LdaConstant), U8(8),
B(Star), R(13),
......@@ -380,7 +380,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(11),
B(LdaConstant), U8(8),
B(Star), R(12),
......@@ -523,7 +523,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(10),
B(LdaConstant), U8(10),
B(Star), R(11),
......
......@@ -90,7 +90,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(13),
B(LdaConstant), U8(7),
B(Star), R(14),
......@@ -268,7 +268,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(13),
B(LdaConstant), U8(11),
B(Star), R(14),
......@@ -422,7 +422,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(11),
B(LdaConstant), U8(9),
B(Star), R(12),
......@@ -580,7 +580,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(16),
B(LdaConstant), U8(9),
B(Star), R(17),
......@@ -754,7 +754,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(16),
B(LdaConstant), U8(10),
B(Star), R(17),
......@@ -953,7 +953,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(15),
B(LdaConstant), U8(14),
B(Star), R(16),
......@@ -1116,7 +1116,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(20),
B(LdaConstant), U8(7),
B(Star), R(21),
......@@ -1358,7 +1358,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(20),
B(LdaConstant), U8(9),
B(Star), R(21),
......
......@@ -257,7 +257,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(138),
B(Wide), B(LdaSmi), I16(137),
B(Star), R(14),
B(LdaConstant), U8(15),
B(Star), R(15),
......
......@@ -360,6 +360,7 @@ function toSlowMode(re) {
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertEquals("cd", "abcd".replace(re, "$3"));
assertEquals("$<sndcd", "abcd".replace(re, "$<snd"));
assertEquals("$<sndacd", "abcd".replace(re, "$<snd$1"));
assertEquals("$<42a>cd", "abcd".replace(re, "$<42$1>"));
assertEquals("$<fth>cd", "abcd".replace(re, "$<fth>"));
assertEquals("$<a>cd", "abcd".replace(re, "$<$1>"));
......@@ -371,10 +372,11 @@ function toSlowMode(re) {
assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("badc", "abcd".replace(re, "$2$1"));
assertEquals("", "abcd".replace(re, "$<thd>"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
assertEquals("$<snd$<snd", "abcd".replace(re, "$<snd"));
assertEquals("$<snda$<sndc", "abcd".replace(re, "$<snd$1"));
assertEquals("", "abcd".replace(re, "$<42$1>"));
assertEquals("", "abcd".replace(re, "$<fth>"));
assertEquals("", "abcd".replace(re, "$<$1>"));
}
// @@replace with a string replacement argument (non-global, named captures).
......@@ -383,10 +385,11 @@ function toSlowMode(re) {
assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertEquals("cd", "abcd".replace(re, "$<thd>"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
assertEquals("$<sndcd", "abcd".replace(re, "$<snd"));
assertEquals("$<sndacd", "abcd".replace(re, "$<snd$1"));
assertEquals("cd", "abcd".replace(re, "$<42$1>"));
assertEquals("cd", "abcd".replace(re, "$<fth>"));
assertEquals("cd", "abcd".replace(re, "$<$1>"));
}
// @@replace with a string replacement argument (slow, global, named captures).
......@@ -395,10 +398,11 @@ function toSlowMode(re) {
assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("badc", "abcd".replace(re, "$2$1"));
assertEquals("", "abcd".replace(re, "$<thd>"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
assertEquals("$<snd$<snd", "abcd".replace(re, "$<snd"));
assertEquals("$<snda$<sndc", "abcd".replace(re, "$<snd$1"));
assertEquals("", "abcd".replace(re, "$<42$1>"));
assertEquals("", "abcd".replace(re, "$<fth>"));
assertEquals("", "abcd".replace(re, "$<$1>"));
}
// @@replace with a string replacement argument (slow, non-global,
......@@ -408,8 +412,9 @@ function toSlowMode(re) {
assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertEquals("cd", "abcd".replace(re, "$<thd>"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<42$1>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<fth>"), SyntaxError);
assertThrows(() => "abcd".replace(re, "$<$1>"), SyntaxError);
assertEquals("$<sndcd", "abcd".replace(re, "$<snd"));
assertEquals("$<sndacd", "abcd".replace(re, "$<snd$1"));
assertEquals("cd", "abcd".replace(re, "$<42$1>"));
assertEquals("cd", "abcd".replace(re, "$<fth>"));
assertEquals("cd", "abcd".replace(re, "$<$1>"));
}
......@@ -517,6 +517,10 @@
'language/statements/labeled/value-await-non-module-escaped': [FAIL],
'language/statements/labeled/value-yield-non-strict-escaped': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=6912
'built-ins/RegExp/named-groups/string-replace-missing': [FAIL],
'built-ins/RegExp/named-groups/string-replace-unclosed': [FAIL],
############################ INVALID TESTS #############################
# Test makes unjustified assumptions about the number of calls to SortCompare.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment