Commit c0fe56e6 authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Correct lastIndex behavior in RegExp.prototype[@@replace]

@@replace has a pretty complex implementation, taking different paths
for various situations (e.g.: global/nonglobal regexp, functional/string
replace argument, etc.). Each of these paths must implement similar
logic for calling into the RegExpBuiltinExec spec operation, and many
paths get this subtly wrong.

This CL fixes a couple of issues related to the way @@replace handles lastIndex:
* All paths now respect lastIndex when calling into exec (some used to assume 0).
* lastIndex is now advanced after a successful match for sticky regexps.
* lastIndex is now only reset to 0 on failure for sticky regexps.

BUG=v8:5361

Review-Url: https://codereview.chromium.org/2685183003
Cr-Commit-Position: refs/heads/master@{#43234}
parent 4e4a968e
...@@ -238,9 +238,6 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult( ...@@ -238,9 +238,6 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult(
Variable var_result(this, MachineRepresentation::kTagged); Variable var_result(this, MachineRepresentation::kTagged);
Label out(this); Label out(this);
Node* const native_context = LoadNativeContext(context);
Node* const string_length = LoadStringLength(string);
// Load lastIndex. // Load lastIndex.
Variable var_lastindex(this, MachineRepresentation::kTagged); Variable var_lastindex(this, MachineRepresentation::kTagged);
{ {
...@@ -282,6 +279,7 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult( ...@@ -282,6 +279,7 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult(
Label if_isoob(this, Label::kDeferred); Label if_isoob(this, Label::kDeferred);
GotoUnless(TaggedIsSmi(lastindex), &if_isoob); GotoUnless(TaggedIsSmi(lastindex), &if_isoob);
Node* const string_length = LoadStringLength(string);
GotoUnless(SmiLessThanOrEqual(lastindex, string_length), &if_isoob); GotoUnless(SmiLessThanOrEqual(lastindex, string_length), &if_isoob);
Goto(&run_exec); Goto(&run_exec);
...@@ -305,6 +303,7 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult( ...@@ -305,6 +303,7 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult(
Bind(&run_exec); Bind(&run_exec);
{ {
// Get last match info from the context. // Get last match info from the context.
Node* const native_context = LoadNativeContext(context);
Node* const last_match_info = LoadContextElement( Node* const last_match_info = LoadContextElement(
native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX); native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
...@@ -2337,9 +2336,6 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath( ...@@ -2337,9 +2336,6 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath(
// ToString({replace_value}) does not contain '$', i.e. we're doing a simple // ToString({replace_value}) does not contain '$', i.e. we're doing a simple
// string replacement. // string replacement.
Isolate* const isolate = this->isolate();
Node* const null = NullConstant();
Node* const int_zero = IntPtrConstant(0); Node* const int_zero = IntPtrConstant(0);
Node* const smi_zero = SmiConstant(Smi::kZero); Node* const smi_zero = SmiConstant(Smi::kZero);
...@@ -2372,21 +2368,11 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath( ...@@ -2372,21 +2368,11 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath(
Bind(&if_isnonglobal); Bind(&if_isnonglobal);
{ {
// Run exec, then manually construct the resulting string. // Run exec, then manually construct the resulting string.
Callable exec_callable = CodeFactory::RegExpExec(isolate); Label if_didnotmatch(this);
Node* const match_indices = CallStub(exec_callable, context, regexp, string, Node* const match_indices = RegExpPrototypeExecBodyWithoutResult(
smi_zero, last_match_info); context, regexp, string, &if_didnotmatch, true);
Label if_matched(this), if_didnotmatch(this);
Branch(WordEqual(match_indices, null), &if_didnotmatch, &if_matched);
Bind(&if_didnotmatch);
{
FastStoreLastIndex(regexp, smi_zero);
var_result.Bind(string);
Goto(&out);
}
Bind(&if_matched); // Successful match.
{ {
Node* const subject_start = smi_zero; Node* const subject_start = smi_zero;
Node* const match_start = LoadFixedArrayElement( Node* const match_start = LoadFixedArrayElement(
...@@ -2430,6 +2416,12 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath( ...@@ -2430,6 +2416,12 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath(
Goto(&out); Goto(&out);
} }
} }
Bind(&if_didnotmatch);
{
var_result.Bind(string);
Goto(&out);
}
} }
Bind(&out); Bind(&out);
...@@ -2506,6 +2498,24 @@ TF_BUILTIN(RegExpPrototypeReplace, RegExpBuiltinsAssembler) { ...@@ -2506,6 +2498,24 @@ TF_BUILTIN(RegExpPrototypeReplace, RegExpBuiltinsAssembler) {
Node* const replace_value = Parameter(2); Node* const replace_value = Parameter(2);
Node* const context = Parameter(5); Node* const context = Parameter(5);
// RegExpPrototypeReplace is a bit of a beast - a summary of dispatch logic:
//
// if (!IsFastRegExp(receiver)) CallRuntime(RegExpReplace)
// if (IsCallable(replace)) {
// if (IsGlobal(receiver)) {
// // Called 'fast-path' but contains several runtime calls.
// ReplaceGlobalCallableFastPath()
// } else {
// CallRuntime(StringReplaceNonGlobalRegExpWithFunction)
// }
// } else {
// if (replace.contains("$")) {
// CallRuntime(RegExpReplace)
// } else {
// ReplaceSimpleStringFastPath() // Bails to runtime for global regexps.
// }
// }
// Ensure {maybe_receiver} is a JSReceiver. // Ensure {maybe_receiver} is a JSReceiver.
Node* const map = ThrowIfNotJSReceiver( Node* const map = ThrowIfNotJSReceiver(
context, maybe_receiver, MessageTemplate::kIncompatibleMethodReceiver, context, maybe_receiver, MessageTemplate::kIncompatibleMethodReceiver,
......
...@@ -1084,15 +1084,32 @@ MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction( ...@@ -1084,15 +1084,32 @@ MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction(
Factory* factory = isolate->factory(); Factory* factory = isolate->factory();
Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
// TODO(jgruber): This is a pattern we could refactor. const int flags = regexp->GetFlags();
DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
// TODO(jgruber): This should be an easy port to CSA with massive payback.
const bool sticky = (flags & JSRegExp::kSticky) != 0;
uint32_t last_index = 0;
if (sticky) {
Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
Object::ToLength(isolate, last_index_obj),
String);
last_index = PositiveNumberToUint32(*last_index_obj);
if (static_cast<int>(last_index) > subject->length()) last_index = 0;
}
Handle<Object> match_indices_obj; Handle<Object> match_indices_obj;
ASSIGN_RETURN_ON_EXCEPTION( ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices_obj, isolate, match_indices_obj,
RegExpImpl::Exec(regexp, subject, 0, last_match_info), String); RegExpImpl::Exec(regexp, subject, last_index, last_match_info), String);
if (match_indices_obj->IsNull(isolate)) { if (match_indices_obj->IsNull(isolate)) {
RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), if (sticky) regexp->SetLastIndex(0);
String);
return subject; return subject;
} }
...@@ -1102,6 +1119,8 @@ MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction( ...@@ -1102,6 +1119,8 @@ MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction(
const int index = match_indices->Capture(0); const int index = match_indices->Capture(0);
const int end_of_match = match_indices->Capture(1); const int end_of_match = match_indices->Capture(1);
if (sticky) regexp->SetLastIndex(end_of_match);
IncrementalStringBuilder builder(isolate); IncrementalStringBuilder builder(isolate);
builder.AppendString(factory->NewSubString(subject, 0, index)); builder.AppendString(factory->NewSubString(subject, 0, index));
...@@ -1153,10 +1172,9 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate, ...@@ -1153,10 +1172,9 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
Handle<Object> replace_obj) { Handle<Object> replace_obj) {
Factory* factory = isolate->factory(); Factory* factory = isolate->factory();
// TODO(jgruber): We need the even stricter guarantee of an unmodified
// JSRegExp map here for access to GetFlags to be legal.
const int flags = regexp->GetFlags(); const int flags = regexp->GetFlags();
const bool global = (flags & JSRegExp::kGlobal) != 0; const bool global = (flags & JSRegExp::kGlobal) != 0;
const bool sticky = (flags & JSRegExp::kSticky) != 0;
// Functional fast-paths are dispatched directly by replace builtin. // Functional fast-paths are dispatched directly by replace builtin.
DCHECK(!replace_obj->IsCallable()); DCHECK(!replace_obj->IsCallable());
...@@ -1171,14 +1189,24 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate, ...@@ -1171,14 +1189,24 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
if (!global) { if (!global) {
// Non-global regexp search, string replace. // Non-global regexp search, string replace.
uint32_t last_index = 0;
if (sticky) {
Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
Object::ToLength(isolate, last_index_obj),
String);
last_index = PositiveNumberToUint32(*last_index_obj);
if (static_cast<int>(last_index) > string->length()) last_index = 0;
}
Handle<Object> match_indices_obj; Handle<Object> match_indices_obj;
ASSIGN_RETURN_ON_EXCEPTION( ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices_obj, isolate, match_indices_obj,
RegExpImpl::Exec(regexp, string, 0, last_match_info), String); RegExpImpl::Exec(regexp, string, last_index, last_match_info), String);
if (match_indices_obj->IsNull(isolate)) { if (match_indices_obj->IsNull(isolate)) {
RETURN_ON_EXCEPTION( if (sticky) regexp->SetLastIndex(0);
isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), String);
return string; return string;
} }
...@@ -1187,6 +1215,8 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate, ...@@ -1187,6 +1215,8 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
const int start_index = match_indices->Capture(0); const int start_index = match_indices->Capture(0);
const int end_index = match_indices->Capture(1); const int end_index = match_indices->Capture(1);
if (sticky) regexp->SetLastIndex(end_index);
IncrementalStringBuilder builder(isolate); IncrementalStringBuilder builder(isolate);
builder.AppendString(factory->NewSubString(string, 0, start_index)); builder.AppendString(factory->NewSubString(string, 0, start_index));
...@@ -1268,6 +1298,8 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) { ...@@ -1268,6 +1298,8 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
CONVERT_ARG_HANDLE_CHECKED(JSObject, replace, 2); CONVERT_ARG_HANDLE_CHECKED(JSObject, replace, 2);
DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
RETURN_RESULT_OR_FAILURE(isolate, StringReplaceNonGlobalRegExpWithFunction( RETURN_RESULT_OR_FAILURE(isolate, StringReplaceNonGlobalRegExpWithFunction(
isolate, subject, regexp, replace)); isolate, subject, regexp, replace));
} }
......
...@@ -55,31 +55,31 @@ assertEquals(1, r.lastIndex); ...@@ -55,31 +55,31 @@ assertEquals(1, r.lastIndex);
r = /a/; r = /a/;
r.lastIndex = 1; r.lastIndex = 1;
"zzzz".replace(r, ""); "zzzz".replace(r, "");
assertEquals(0, r.lastIndex); assertEquals(1, r.lastIndex);
// Test String.prototype.replace with non-atomic regexp and empty string. // Test String.prototype.replace with non-atomic regexp and empty string.
r = /\d/; r = /\d/;
r.lastIndex = 1; r.lastIndex = 1;
"zzzz".replace(r, ""); "zzzz".replace(r, "");
assertEquals(0, r.lastIndex); assertEquals(1, r.lastIndex);
// Test String.prototype.replace with atomic regexp and non-empty string. // Test String.prototype.replace with atomic regexp and non-empty string.
r = /a/; r = /a/;
r.lastIndex = 1; r.lastIndex = 1;
"zzzz".replace(r, "a"); "zzzz".replace(r, "a");
assertEquals(0, r.lastIndex); assertEquals(1, r.lastIndex);
// Test String.prototype.replace with non-atomic regexp and non-empty string. // Test String.prototype.replace with non-atomic regexp and non-empty string.
r = /\d/; r = /\d/;
r.lastIndex = 1; r.lastIndex = 1;
"zzzz".replace(r, "a"); "zzzz".replace(r, "a");
assertEquals(0, r.lastIndex); assertEquals(1, r.lastIndex);
// Test String.prototype.replace with replacement function // Test String.prototype.replace with replacement function
r = /a/; r = /a/;
r.lastIndex = 1; r.lastIndex = 1;
"zzzz".replace(r, function() { return ""; }); "zzzz".replace(r, function() { return ""; });
assertEquals(0, r.lastIndex); assertEquals(1, r.lastIndex);
// Regexp functions that returns multiple results: // Regexp functions that returns multiple results:
// A global regexp always resets lastIndex regardless of whether it matches. // A global regexp always resets lastIndex regardless of whether it matches.
...@@ -100,7 +100,7 @@ r.lastIndex = -1; ...@@ -100,7 +100,7 @@ r.lastIndex = -1;
"01234567".match(r); "01234567".match(r);
assertEquals(0, r.lastIndex); assertEquals(0, r.lastIndex);
// A non-global regexp resets lastIndex iff it does not match. // A non-global regexp resets lastIndex iff it is sticky.
r = /a/; r = /a/;
r.lastIndex = -1; r.lastIndex = -1;
"0123abcd".replace(r, "x"); "0123abcd".replace(r, "x");
...@@ -108,7 +108,7 @@ assertEquals(-1, r.lastIndex); ...@@ -108,7 +108,7 @@ assertEquals(-1, r.lastIndex);
r.lastIndex = -1; r.lastIndex = -1;
"01234567".replace(r, "x"); "01234567".replace(r, "x");
assertEquals(0, r.lastIndex); assertEquals(-1, r.lastIndex);
r.lastIndex = -1; r.lastIndex = -1;
"0123abcd".match(r); "0123abcd".match(r);
...@@ -118,6 +118,16 @@ r.lastIndex = -1; ...@@ -118,6 +118,16 @@ r.lastIndex = -1;
"01234567".match(r); "01234567".match(r);
assertEquals(-1, r.lastIndex); assertEquals(-1, r.lastIndex);
r = /a/y;
r.lastIndex = -1;
"0123abcd".replace(r, "x");
assertEquals(0, r.lastIndex);
r.lastIndex = -1;
"01234567".replace(r, "x");
assertEquals(0, r.lastIndex);
// Also test RegExp.prototype.exec and RegExp.prototype.test // Also test RegExp.prototype.exec and RegExp.prototype.test
r = /a/g; r = /a/g;
r.lastIndex = 1; r.lastIndex = 1;
......
...@@ -662,7 +662,6 @@ ...@@ -662,7 +662,6 @@
# than 4Mbytes of stack space. # than 4Mbytes of stack space.
'js1_5/Array/regress-350256-02': [FAIL], 'js1_5/Array/regress-350256-02': [FAIL],
# This test seems designed to fail (it produces a 700Mbyte string). # This test seems designed to fail (it produces a 700Mbyte string).
# We fail on out of memory. The important thing is not to crash. # We fail on out of memory. The important thing is not to crash.
'js1_5/Regress/regress-303213': [FAIL, ['mode == debug', TIMEOUT, NO_VARIANTS]], 'js1_5/Regress/regress-303213': [FAIL, ['mode == debug', TIMEOUT, NO_VARIANTS]],
...@@ -671,14 +670,10 @@ ...@@ -671,14 +670,10 @@
# is given null or undefined as this argument (and so does firefox nightly). # is given null or undefined as this argument (and so does firefox nightly).
'js1_5/Regress/regress-295052': [FAIL], 'js1_5/Regress/regress-295052': [FAIL],
# Bug 1202592: New ecma_3/String/15.5.4.11 is failing.
'ecma_3/String/15.5.4.11': [FAIL],
# Bug 1202597: New js1_5/Expressions/regress-394673 is failing. # Bug 1202597: New js1_5/Expressions/regress-394673 is failing.
# Marked as: Will not fix. V8 throws an acceptable RangeError. # Marked as: Will not fix. V8 throws an acceptable RangeError.
'js1_5/Expressions/regress-394673': [FAIL], 'js1_5/Expressions/regress-394673': [FAIL],
# Bug 762: http://code.google.com/p/v8/issues/detail?id=762 # Bug 762: http://code.google.com/p/v8/issues/detail?id=762
# We do not correctly handle assignments within "with" # We do not correctly handle assignments within "with"
'ecma_3/Statements/12.10-01': [FAIL], 'ecma_3/Statements/12.10-01': [FAIL],
......
...@@ -87,10 +87,6 @@ ...@@ -87,10 +87,6 @@
'language/asi/S7.9_A5.7_T1': [PASS, FAIL_OK], 'language/asi/S7.9_A5.7_T1': [PASS, FAIL_OK],
###### BEGIN REGEXP SUBCLASSING SECTION ###### ###### BEGIN REGEXP SUBCLASSING SECTION ######
# https://bugs.chromium.org/p/v8/issues/detail?id=5361
'built-ins/RegExp/prototype/Symbol.replace/y-init-lastindex': [FAIL],
'built-ins/RegExp/prototype/Symbol.replace/y-set-lastindex': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=5949 # https://bugs.chromium.org/p/v8/issues/detail?id=5949
'built-ins/RegExp/prototype/exec/failure-lastindex-no-access': [FAIL], 'built-ins/RegExp/prototype/exec/failure-lastindex-no-access': [FAIL],
'built-ins/RegExp/prototype/exec/success-lastindex-no-access': [FAIL], 'built-ins/RegExp/prototype/exec/success-lastindex-no-access': [FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment