Commit c0fe56e6 authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Correct lastIndex behavior in RegExp.prototype[@@replace]

@@replace has a pretty complex implementation, taking different paths
for various situations (e.g.: global/nonglobal regexp, functional/string
replace argument, etc.). Each of these paths must implement similar
logic for calling into the RegExpBuiltinExec spec operation, and many
paths get this subtly wrong.

This CL fixes a couple of issues related to the way @@replace handles lastIndex:
* All paths now respect lastIndex when calling into exec (some used to assume 0).
* lastIndex is now advanced after a successful match for sticky regexps.
* lastIndex is now only reset to 0 on failure for sticky regexps.

BUG=v8:5361

Review-Url: https://codereview.chromium.org/2685183003
Cr-Commit-Position: refs/heads/master@{#43234}
parent 4e4a968e
......@@ -238,9 +238,6 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult(
Variable var_result(this, MachineRepresentation::kTagged);
Label out(this);
Node* const native_context = LoadNativeContext(context);
Node* const string_length = LoadStringLength(string);
// Load lastIndex.
Variable var_lastindex(this, MachineRepresentation::kTagged);
{
......@@ -282,6 +279,7 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult(
Label if_isoob(this, Label::kDeferred);
GotoUnless(TaggedIsSmi(lastindex), &if_isoob);
Node* const string_length = LoadStringLength(string);
GotoUnless(SmiLessThanOrEqual(lastindex, string_length), &if_isoob);
Goto(&run_exec);
......@@ -305,6 +303,7 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult(
Bind(&run_exec);
{
// Get last match info from the context.
Node* const native_context = LoadNativeContext(context);
Node* const last_match_info = LoadContextElement(
native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
......@@ -2337,9 +2336,6 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath(
// ToString({replace_value}) does not contain '$', i.e. we're doing a simple
// string replacement.
Isolate* const isolate = this->isolate();
Node* const null = NullConstant();
Node* const int_zero = IntPtrConstant(0);
Node* const smi_zero = SmiConstant(Smi::kZero);
......@@ -2372,21 +2368,11 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath(
Bind(&if_isnonglobal);
{
// Run exec, then manually construct the resulting string.
Callable exec_callable = CodeFactory::RegExpExec(isolate);
Node* const match_indices = CallStub(exec_callable, context, regexp, string,
smi_zero, last_match_info);
Label if_matched(this), if_didnotmatch(this);
Branch(WordEqual(match_indices, null), &if_didnotmatch, &if_matched);
Bind(&if_didnotmatch);
{
FastStoreLastIndex(regexp, smi_zero);
var_result.Bind(string);
Goto(&out);
}
Label if_didnotmatch(this);
Node* const match_indices = RegExpPrototypeExecBodyWithoutResult(
context, regexp, string, &if_didnotmatch, true);
Bind(&if_matched);
// Successful match.
{
Node* const subject_start = smi_zero;
Node* const match_start = LoadFixedArrayElement(
......@@ -2430,6 +2416,12 @@ Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath(
Goto(&out);
}
}
Bind(&if_didnotmatch);
{
var_result.Bind(string);
Goto(&out);
}
}
Bind(&out);
......@@ -2506,6 +2498,24 @@ TF_BUILTIN(RegExpPrototypeReplace, RegExpBuiltinsAssembler) {
Node* const replace_value = Parameter(2);
Node* const context = Parameter(5);
// RegExpPrototypeReplace is a bit of a beast - a summary of dispatch logic:
//
// if (!IsFastRegExp(receiver)) CallRuntime(RegExpReplace)
// if (IsCallable(replace)) {
// if (IsGlobal(receiver)) {
// // Called 'fast-path' but contains several runtime calls.
// ReplaceGlobalCallableFastPath()
// } else {
// CallRuntime(StringReplaceNonGlobalRegExpWithFunction)
// }
// } else {
// if (replace.contains("$")) {
// CallRuntime(RegExpReplace)
// } else {
// ReplaceSimpleStringFastPath() // Bails to runtime for global regexps.
// }
// }
// Ensure {maybe_receiver} is a JSReceiver.
Node* const map = ThrowIfNotJSReceiver(
context, maybe_receiver, MessageTemplate::kIncompatibleMethodReceiver,
......
......@@ -1084,15 +1084,32 @@ MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction(
Factory* factory = isolate->factory();
Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
// TODO(jgruber): This is a pattern we could refactor.
const int flags = regexp->GetFlags();
DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
// TODO(jgruber): This should be an easy port to CSA with massive payback.
const bool sticky = (flags & JSRegExp::kSticky) != 0;
uint32_t last_index = 0;
if (sticky) {
Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
Object::ToLength(isolate, last_index_obj),
String);
last_index = PositiveNumberToUint32(*last_index_obj);
if (static_cast<int>(last_index) > subject->length()) last_index = 0;
}
Handle<Object> match_indices_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices_obj,
RegExpImpl::Exec(regexp, subject, 0, last_match_info), String);
RegExpImpl::Exec(regexp, subject, last_index, last_match_info), String);
if (match_indices_obj->IsNull(isolate)) {
RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
String);
if (sticky) regexp->SetLastIndex(0);
return subject;
}
......@@ -1102,6 +1119,8 @@ MUST_USE_RESULT MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction(
const int index = match_indices->Capture(0);
const int end_of_match = match_indices->Capture(1);
if (sticky) regexp->SetLastIndex(end_of_match);
IncrementalStringBuilder builder(isolate);
builder.AppendString(factory->NewSubString(subject, 0, index));
......@@ -1153,10 +1172,9 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
Handle<Object> replace_obj) {
Factory* factory = isolate->factory();
// TODO(jgruber): We need the even stricter guarantee of an unmodified
// JSRegExp map here for access to GetFlags to be legal.
const int flags = regexp->GetFlags();
const bool global = (flags & JSRegExp::kGlobal) != 0;
const bool sticky = (flags & JSRegExp::kSticky) != 0;
// Functional fast-paths are dispatched directly by replace builtin.
DCHECK(!replace_obj->IsCallable());
......@@ -1171,14 +1189,24 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
if (!global) {
// Non-global regexp search, string replace.
uint32_t last_index = 0;
if (sticky) {
Handle<Object> last_index_obj(regexp->LastIndex(), isolate);
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
Object::ToLength(isolate, last_index_obj),
String);
last_index = PositiveNumberToUint32(*last_index_obj);
if (static_cast<int>(last_index) > string->length()) last_index = 0;
}
Handle<Object> match_indices_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices_obj,
RegExpImpl::Exec(regexp, string, 0, last_match_info), String);
RegExpImpl::Exec(regexp, string, last_index, last_match_info), String);
if (match_indices_obj->IsNull(isolate)) {
RETURN_ON_EXCEPTION(
isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), String);
if (sticky) regexp->SetLastIndex(0);
return string;
}
......@@ -1187,6 +1215,8 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
const int start_index = match_indices->Capture(0);
const int end_index = match_indices->Capture(1);
if (sticky) regexp->SetLastIndex(end_index);
IncrementalStringBuilder builder(isolate);
builder.AppendString(factory->NewSubString(string, 0, start_index));
......@@ -1268,6 +1298,8 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
CONVERT_ARG_HANDLE_CHECKED(JSObject, replace, 2);
DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
RETURN_RESULT_OR_FAILURE(isolate, StringReplaceNonGlobalRegExpWithFunction(
isolate, subject, regexp, replace));
}
......
......@@ -55,31 +55,31 @@ assertEquals(1, r.lastIndex);
r = /a/;
r.lastIndex = 1;
"zzzz".replace(r, "");
assertEquals(0, r.lastIndex);
assertEquals(1, r.lastIndex);
// Test String.prototype.replace with non-atomic regexp and empty string.
r = /\d/;
r.lastIndex = 1;
"zzzz".replace(r, "");
assertEquals(0, r.lastIndex);
assertEquals(1, r.lastIndex);
// Test String.prototype.replace with atomic regexp and non-empty string.
r = /a/;
r.lastIndex = 1;
"zzzz".replace(r, "a");
assertEquals(0, r.lastIndex);
assertEquals(1, r.lastIndex);
// Test String.prototype.replace with non-atomic regexp and non-empty string.
r = /\d/;
r.lastIndex = 1;
"zzzz".replace(r, "a");
assertEquals(0, r.lastIndex);
assertEquals(1, r.lastIndex);
// Test String.prototype.replace with replacement function
r = /a/;
r.lastIndex = 1;
"zzzz".replace(r, function() { return ""; });
assertEquals(0, r.lastIndex);
assertEquals(1, r.lastIndex);
// Regexp functions that returns multiple results:
// A global regexp always resets lastIndex regardless of whether it matches.
......@@ -100,7 +100,7 @@ r.lastIndex = -1;
"01234567".match(r);
assertEquals(0, r.lastIndex);
// A non-global regexp resets lastIndex iff it does not match.
// A non-global regexp resets lastIndex iff it is sticky.
r = /a/;
r.lastIndex = -1;
"0123abcd".replace(r, "x");
......@@ -108,7 +108,7 @@ assertEquals(-1, r.lastIndex);
r.lastIndex = -1;
"01234567".replace(r, "x");
assertEquals(0, r.lastIndex);
assertEquals(-1, r.lastIndex);
r.lastIndex = -1;
"0123abcd".match(r);
......@@ -118,6 +118,16 @@ r.lastIndex = -1;
"01234567".match(r);
assertEquals(-1, r.lastIndex);
r = /a/y;
r.lastIndex = -1;
"0123abcd".replace(r, "x");
assertEquals(0, r.lastIndex);
r.lastIndex = -1;
"01234567".replace(r, "x");
assertEquals(0, r.lastIndex);
// Also test RegExp.prototype.exec and RegExp.prototype.test
r = /a/g;
r.lastIndex = 1;
......
......@@ -662,7 +662,6 @@
# than 4Mbytes of stack space.
'js1_5/Array/regress-350256-02': [FAIL],
# This test seems designed to fail (it produces a 700Mbyte string).
# We fail on out of memory. The important thing is not to crash.
'js1_5/Regress/regress-303213': [FAIL, ['mode == debug', TIMEOUT, NO_VARIANTS]],
......@@ -671,14 +670,10 @@
# is given null or undefined as this argument (and so does firefox nightly).
'js1_5/Regress/regress-295052': [FAIL],
# Bug 1202592: New ecma_3/String/15.5.4.11 is failing.
'ecma_3/String/15.5.4.11': [FAIL],
# Bug 1202597: New js1_5/Expressions/regress-394673 is failing.
# Marked as: Will not fix. V8 throws an acceptable RangeError.
'js1_5/Expressions/regress-394673': [FAIL],
# Bug 762: http://code.google.com/p/v8/issues/detail?id=762
# We do not correctly handle assignments within "with"
'ecma_3/Statements/12.10-01': [FAIL],
......
......@@ -87,10 +87,6 @@
'language/asi/S7.9_A5.7_T1': [PASS, FAIL_OK],
###### BEGIN REGEXP SUBCLASSING SECTION ######
# https://bugs.chromium.org/p/v8/issues/detail?id=5361
'built-ins/RegExp/prototype/Symbol.replace/y-init-lastindex': [FAIL],
'built-ins/RegExp/prototype/Symbol.replace/y-set-lastindex': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=5949
'built-ins/RegExp/prototype/exec/failure-lastindex-no-access': [FAIL],
'built-ins/RegExp/prototype/exec/success-lastindex-no-access': [FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment