Commit 33a4faa4 authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Port RegExp.prototype[@@replace]

This moves the implementation of @@replace from regexp.js to builtins-regexp.cc
(the TurboFan fast path) and runtime-regexp.cc (slow path). The fast path
handles all cases in which the regexp itself is an unmodified JSRegExp
instance, the given 'replace' argument is not callable and does not contain any
'$' characters (i.e. we are doing a string replacement).

BUG=v8:5339

Review-Url: https://codereview.chromium.org/2398423002
Cr-Commit-Position: refs/heads/master@{#40253}
parent 897d89ee
......@@ -1700,7 +1700,6 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
shared->set_instance_class_name(isolate->heap()->RegExp_string());
shared->DontAdaptArguments();
shared->set_length(2);
{
// RegExp.prototype setup.
......@@ -1746,6 +1745,13 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
InstallFunction(prototype, fun, factory->match_symbol(), DONT_ENUM);
}
{
Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.replace]"),
Builtins::kRegExpPrototypeReplace, 2, true);
InstallFunction(prototype, fun, factory->replace_symbol(), DONT_ENUM);
}
{
Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.search]"),
......@@ -1759,6 +1765,10 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
Builtins::kRegExpPrototypeSplit, 2, false);
InstallFunction(prototype, fun, factory->split_symbol(), DONT_ENUM);
}
// Store the initial RegExp.prototype map. This is used in fast-path
// checks. Do not alter the prototype after this point.
isolate->native_context()->set_regexp_prototype_map(prototype->map());
}
{
......
......@@ -207,6 +207,16 @@ compiler::Node* LoadLastIndex(CodeStubAssembler* a, compiler::Node* context,
return var_value.value();
}
// The fast-path of StoreLastIndex when regexp is guaranteed to be an unmodified
// JSRegExp instance.
void FastStoreLastIndex(CodeStubAssembler* a, compiler::Node* context,
compiler::Node* regexp, compiler::Node* value) {
// Store the in-object field.
static const int field_offset =
JSRegExp::kSize + JSRegExp::kLastIndexFieldIndex * kPointerSize;
a->StoreObjectField(regexp, field_offset, value);
}
void StoreLastIndex(CodeStubAssembler* a, compiler::Node* context,
compiler::Node* has_initialmap, compiler::Node* regexp,
compiler::Node* value) {
......@@ -218,10 +228,7 @@ void StoreLastIndex(CodeStubAssembler* a, compiler::Node* context,
a->Bind(&if_unmodified);
{
// Store the in-object field.
static const int field_offset =
JSRegExp::kSize + JSRegExp::kLastIndexFieldIndex * kPointerSize;
a->StoreObjectField(regexp, field_offset, value);
FastStoreLastIndex(a, context, regexp, value);
a->Goto(&out);
}
......@@ -454,6 +461,7 @@ namespace {
compiler::Node* ThrowIfNotJSReceiver(CodeStubAssembler* a, Isolate* isolate,
compiler::Node* context,
compiler::Node* value,
MessageTemplate::Template msg_template,
char const* method_name) {
typedef compiler::Node Node;
typedef CodeStubAssembler::Label Label;
......@@ -475,8 +483,7 @@ compiler::Node* ThrowIfNotJSReceiver(CodeStubAssembler* a, Isolate* isolate,
// The {value} is not a compatible receiver for this method.
a->Bind(&throw_exception);
{
Node* const message_id =
a->SmiConstant(Smi::FromInt(MessageTemplate::kRegExpNonObject));
Node* const message_id = a->SmiConstant(Smi::FromInt(msg_template));
Node* const method_name_str = a->HeapConstant(
isolate->factory()->NewStringFromAsciiChecked(method_name, TENURED));
......@@ -508,6 +515,37 @@ compiler::Node* IsInitialRegExpMap(CodeStubAssembler* a,
return has_initialmap;
}
// RegExp fast path implementations rely on unmodified JSRegExp instances.
// We use a fairly coarse granularity for this and simply check whether both
// the regexp itself is unmodified (i.e. its map has not changed) and its
// prototype is unmodified.
void BranchIfFastPath(CodeStubAssembler* a, compiler::Node* context,
compiler::Node* map,
CodeStubAssembler::Label* if_isunmodified,
CodeStubAssembler::Label* if_ismodified) {
typedef compiler::Node Node;
Node* const native_context = a->LoadNativeContext(context);
Node* const regexp_fun =
a->LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX);
Node* const initial_map =
a->LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
Node* const has_initialmap = a->WordEqual(map, initial_map);
a->GotoUnless(has_initialmap, if_ismodified);
Node* const initial_proto_initial_map = a->LoadContextElement(
native_context, Context::REGEXP_PROTOTYPE_MAP_INDEX);
Node* const proto_map = a->LoadMap(a->LoadMapPrototype(map));
Node* const proto_has_initialmap =
a->WordEqual(proto_map, initial_proto_initial_map);
// TODO(ishell): Update this check once map changes for constant field
// tracking are landing.
a->Branch(proto_has_initialmap, if_isunmodified, if_ismodified);
}
} // namespace
void Builtins::Generate_RegExpPrototypeFlagsGetter(CodeStubAssembler* a) {
......@@ -523,6 +561,7 @@ void Builtins::Generate_RegExpPrototypeFlagsGetter(CodeStubAssembler* a) {
Node* const int_one = a->IntPtrConstant(1);
Node* const map = ThrowIfNotJSReceiver(a, isolate, context, receiver,
MessageTemplate::kRegExpNonObject,
"RegExp.prototype.flags");
Variable var_length(a, MachineType::PointerRepresentation());
......@@ -819,7 +858,6 @@ void Builtins::Generate_RegExpPrototypeUnicodeGetter(CodeStubAssembler* a) {
"RegExp.prototype.unicode");
}
// The properties $1..$9 are the first nine capturing substrings of the last
// successful match, or ''. The function RegExpMakeCaptureGetter will be
// called with indices from 1 to 9.
......@@ -1183,8 +1221,6 @@ MaybeHandle<JSArray> RegExpSplit(Isolate* isolate, Handle<JSRegExp> regexp,
if (num_elems == limit) break;
// TODO(jgruber): Refactor GetLastMatchInfo methods to take an input
// argument.
Handle<Object> num_captures_obj =
JSReceiver::GetElement(isolate, match_indices,
RegExpImpl::kLastCaptureCount)
......@@ -1428,5 +1464,196 @@ BUILTIN(RegExpPrototypeSplit) {
return *NewJSArrayWithElements(isolate, elems, num_elems);
}
namespace {
compiler::Node* ReplaceFastPath(CodeStubAssembler* a, compiler::Node* context,
compiler::Node* regexp,
compiler::Node* subject_string,
compiler::Node* replace_string) {
// The fast path is reached only if {receiver} is an unmodified
// JSRegExp instance, {replace_value} is non-callable, and
// ToString({replace_value}) does not contain '$', i.e. we're doing a simple
// string replacement.
typedef CodeStubAssembler::Variable Variable;
typedef CodeStubAssembler::Label Label;
typedef compiler::Node Node;
Isolate* const isolate = a->isolate();
Node* const null = a->NullConstant();
Node* const int_zero = a->IntPtrConstant(0);
Node* const smi_zero = a->SmiConstant(Smi::kZero);
Label out(a);
Variable var_result(a, MachineRepresentation::kTagged);
// Load the last match info.
Node* const native_context = a->LoadNativeContext(context);
Node* const last_match_info = a->LoadContextElement(
native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
// Is {regexp} global?
Label if_isglobal(a), if_isnonglobal(a);
Node* const flags = a->LoadObjectField(regexp, JSRegExp::kFlagsOffset);
Node* const is_global =
a->WordAnd(a->SmiUntag(flags), a->IntPtrConstant(JSRegExp::kGlobal));
a->Branch(a->WordEqual(is_global, int_zero), &if_isnonglobal, &if_isglobal);
a->Bind(&if_isglobal);
{
// Hand off global regexps to runtime.
FastStoreLastIndex(a, context, regexp, smi_zero);
Node* const result =
a->CallRuntime(Runtime::kStringReplaceGlobalRegExpWithString, context,
subject_string, regexp, replace_string, last_match_info);
var_result.Bind(result);
a->Goto(&out);
}
a->Bind(&if_isnonglobal);
{
// Run exec, then manually construct the resulting string.
Callable exec_callable = CodeFactory::RegExpExec(isolate);
Node* const match_indices =
a->CallStub(exec_callable, context, regexp, subject_string, smi_zero,
last_match_info);
Label if_matched(a), if_didnotmatch(a);
a->Branch(a->WordEqual(match_indices, null), &if_didnotmatch, &if_matched);
a->Bind(&if_didnotmatch);
{
FastStoreLastIndex(a, context, regexp, smi_zero);
var_result.Bind(subject_string);
a->Goto(&out);
}
a->Bind(&if_matched);
{
Node* const match_elements = a->LoadElements(match_indices);
CodeStubAssembler::ParameterMode mode =
CodeStubAssembler::INTPTR_PARAMETERS;
Node* const subject_start = smi_zero;
Node* const match_start = a->LoadFixedArrayElement(
match_elements, a->IntPtrConstant(RegExpImpl::kFirstCapture), 0,
mode);
Node* const match_end = a->LoadFixedArrayElement(
match_elements, a->IntPtrConstant(RegExpImpl::kFirstCapture + 1), 0,
mode);
Node* const subject_end = a->LoadStringLength(subject_string);
Label if_replaceisempty(a), if_replaceisnotempty(a);
Node* const replace_length = a->LoadStringLength(replace_string);
a->Branch(a->SmiEqual(replace_length, smi_zero), &if_replaceisempty,
&if_replaceisnotempty);
a->Bind(&if_replaceisempty);
{
// TODO(jgruber): We could skip many of the checks that using SubString
// here entails.
Node* const first_part =
a->SubString(context, subject_string, subject_start, match_start);
Node* const second_part =
a->SubString(context, subject_string, match_end, subject_end);
Node* const result = a->StringConcat(context, first_part, second_part);
var_result.Bind(result);
a->Goto(&out);
}
a->Bind(&if_replaceisnotempty);
{
Node* const first_part =
a->SubString(context, subject_string, subject_start, match_start);
Node* const second_part = replace_string;
Node* const third_part =
a->SubString(context, subject_string, match_end, subject_end);
Node* result = a->StringConcat(context, first_part, second_part);
result = a->StringConcat(context, result, third_part);
var_result.Bind(result);
a->Goto(&out);
}
}
}
a->Bind(&out);
return var_result.value();
}
} // namespace
// ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@replace ] ( string, replaceValue )
void Builtins::Generate_RegExpPrototypeReplace(CodeStubAssembler* a) {
typedef CodeStubAssembler::Label Label;
typedef compiler::Node Node;
Isolate* const isolate = a->isolate();
Node* const maybe_receiver = a->Parameter(0);
Node* const maybe_string = a->Parameter(1);
Node* const replace_value = a->Parameter(2);
Node* const context = a->Parameter(5);
Node* const int_zero = a->IntPtrConstant(0);
// Ensure {receiver} is a JSReceiver.
Node* const map =
ThrowIfNotJSReceiver(a, isolate, context, maybe_receiver,
MessageTemplate::kIncompatibleMethodReceiver,
"RegExp.prototype.@@replace");
Node* const receiver = maybe_receiver;
// Convert {maybe_string} to a String.
Callable tostring_callable = CodeFactory::ToString(isolate);
Node* const string = a->CallStub(tostring_callable, context, maybe_string);
// Fast-path checks: 1. Is the {receiver} an unmodified JSRegExp instance?
Label checkreplacecallable(a), runtime(a, Label::kDeferred), fastpath(a);
BranchIfFastPath(a, context, map, &checkreplacecallable, &runtime);
a->Bind(&checkreplacecallable);
Node* const regexp = receiver;
// 2. Is {replace_value} callable?
Label checkreplacestring(a);
a->GotoIf(a->TaggedIsSmi(replace_value), &checkreplacestring);
Node* const replace_value_map = a->LoadMap(replace_value);
a->Branch(
a->Word32Equal(a->Word32And(a->LoadMapBitField(replace_value_map),
a->Int32Constant(1 << Map::kIsCallable)),
a->Int32Constant(0)),
&checkreplacestring, &runtime);
// 3. Does ToString({replace_value}) contain '$'?
a->Bind(&checkreplacestring);
{
Node* const replace_string =
a->CallStub(tostring_callable, context, replace_value);
Node* const dollar_char = a->IntPtrConstant('$');
Node* const smi_minusone = a->SmiConstant(Smi::FromInt(-1));
a->GotoUnless(a->SmiEqual(a->StringIndexOfChar(context, replace_string,
dollar_char, int_zero),
smi_minusone),
&runtime);
a->Return(ReplaceFastPath(a, context, regexp, string, replace_string));
}
a->Bind(&runtime);
{
Node* const result = a->CallRuntime(Runtime::kRegExpReplace, context,
receiver, string, replace_value);
a->Return(result);
}
}
} // namespace internal
} // namespace v8
......@@ -587,6 +587,7 @@ namespace internal {
TFJ(RegExpPrototypeIgnoreCaseGetter, 1) \
CPP(RegExpPrototypeMatch) \
TFJ(RegExpPrototypeMultilineGetter, 1) \
TFJ(RegExpPrototypeReplace, 3) \
CPP(RegExpPrototypeSearch) \
CPP(RegExpPrototypeSourceGetter) \
CPP(RegExpPrototypeSpeciesGetter) \
......
......@@ -1350,6 +1350,40 @@ Node* CodeStubAssembler::AllocateSlicedTwoByteString(Node* length, Node* parent,
return result;
}
Node* CodeStubAssembler::AllocateOneByteConsString(Node* length, Node* first,
Node* second) {
Node* result = Allocate(ConsString::kSize);
Node* map = LoadRoot(Heap::kConsOneByteStringMapRootIndex);
StoreMapNoWriteBarrier(result, map);
StoreObjectFieldNoWriteBarrier(result, ConsString::kLengthOffset, length,
MachineRepresentation::kTagged);
StoreObjectFieldNoWriteBarrier(result, ConsString::kHashFieldOffset,
Int32Constant(String::kEmptyHashField),
MachineRepresentation::kWord32);
StoreObjectFieldNoWriteBarrier(result, ConsString::kFirstOffset, first,
MachineRepresentation::kTagged);
StoreObjectFieldNoWriteBarrier(result, ConsString::kSecondOffset, second,
MachineRepresentation::kTagged);
return result;
}
Node* CodeStubAssembler::AllocateTwoByteConsString(Node* length, Node* first,
Node* second) {
Node* result = Allocate(ConsString::kSize);
Node* map = LoadRoot(Heap::kConsStringMapRootIndex);
StoreMapNoWriteBarrier(result, map);
StoreObjectFieldNoWriteBarrier(result, ConsString::kLengthOffset, length,
MachineRepresentation::kTagged);
StoreObjectFieldNoWriteBarrier(result, ConsString::kHashFieldOffset,
Int32Constant(String::kEmptyHashField),
MachineRepresentation::kWord32);
StoreObjectFieldNoWriteBarrier(result, ConsString::kFirstOffset, first,
MachineRepresentation::kTagged);
StoreObjectFieldNoWriteBarrier(result, ConsString::kSecondOffset, second,
MachineRepresentation::kTagged);
return result;
}
Node* CodeStubAssembler::AllocateRegExpResult(Node* context, Node* length,
Node* index, Node* input) {
Node* const max_length =
......@@ -1687,6 +1721,7 @@ void CodeStubAssembler::CopyFixedArrayElements(
void CodeStubAssembler::CopyStringCharacters(compiler::Node* from_string,
compiler::Node* to_string,
compiler::Node* from_index,
compiler::Node* to_index,
compiler::Node* character_count,
String::Encoding encoding) {
Label out(this);
......@@ -1704,20 +1739,22 @@ void CodeStubAssembler::CopyStringCharacters(compiler::Node* from_string,
{
Node* byte_count = SmiUntag(character_count);
Node* from_byte_index = SmiUntag(from_index);
Node* to_byte_index = SmiUntag(to_index);
if (encoding == String::ONE_BYTE_ENCODING) {
const int offset = SeqOneByteString::kHeaderSize - kHeapObjectTag;
from_offset = IntPtrAdd(IntPtrConstant(offset), from_byte_index);
limit_offset = IntPtrAdd(from_offset, byte_count);
to_offset = IntPtrConstant(offset);
to_offset = IntPtrAdd(IntPtrConstant(offset), to_byte_index);
} else {
STATIC_ASSERT(2 == sizeof(uc16));
byte_count = WordShl(byte_count, 1);
from_byte_index = WordShl(from_byte_index, 1);
to_byte_index = WordShl(to_byte_index, 1);
const int offset = SeqTwoByteString::kHeaderSize - kHeapObjectTag;
from_offset = IntPtrAdd(IntPtrConstant(offset), from_byte_index);
limit_offset = IntPtrAdd(from_offset, byte_count);
to_offset = IntPtrConstant(offset);
to_offset = IntPtrAdd(IntPtrConstant(offset), to_byte_index);
}
}
......@@ -2515,6 +2552,8 @@ Node* AllocAndCopyStringCharacters(CodeStubAssembler* a, Node* context,
Label end(a), two_byte_sequential(a);
Variable var_result(a, MachineRepresentation::kTagged);
Node* const smi_zero = a->SmiConstant(Smi::kZero);
STATIC_ASSERT((kOneByteStringTag & kStringEncodingMask) != 0);
a->GotoIf(a->Word32Equal(a->Word32And(from_instance_type,
a->Int32Constant(kStringEncodingMask)),
......@@ -2525,7 +2564,7 @@ Node* AllocAndCopyStringCharacters(CodeStubAssembler* a, Node* context,
{
Node* result =
a->AllocateSeqOneByteString(context, a->SmiToWord(character_count));
a->CopyStringCharacters(from, result, from_index, character_count,
a->CopyStringCharacters(from, result, from_index, smi_zero, character_count,
String::ONE_BYTE_ENCODING);
var_result.Bind(result);
......@@ -2537,7 +2576,7 @@ Node* AllocAndCopyStringCharacters(CodeStubAssembler* a, Node* context,
{
Node* result =
a->AllocateSeqTwoByteString(context, a->SmiToWord(character_count));
a->CopyStringCharacters(from, result, from_index, character_count,
a->CopyStringCharacters(from, result, from_index, smi_zero, character_count,
String::TWO_BYTE_ENCODING);
var_result.Bind(result);
......@@ -2772,6 +2811,146 @@ Node* CodeStubAssembler::SubString(Node* context, Node* string, Node* from,
return var_result.value();
}
Node* CodeStubAssembler::StringConcat(Node* context, Node* first,
Node* second) {
Variable var_result(this, MachineRepresentation::kTagged);
Label out(this), runtime(this, Label::kDeferred);
// TODO(jgruber): Handle indirect, external, and two-byte strings.
Node* const one_byte_seq_mask = Int32Constant(
kIsIndirectStringMask | kExternalStringTag | kStringEncodingMask);
Node* const expected_masked = Int32Constant(kOneByteStringTag);
Node* const first_instance_type = LoadInstanceType(first);
GotoUnless(Word32Equal(Word32And(first_instance_type, one_byte_seq_mask),
expected_masked),
&runtime);
Node* const second_instance_type = LoadInstanceType(second);
GotoUnless(Word32Equal(Word32And(second_instance_type, one_byte_seq_mask),
expected_masked),
&runtime);
Node* const smi_zero = SmiConstant(Smi::kZero);
Node* const first_length = LoadStringLength(first);
Node* const second_length = LoadStringLength(second);
Node* const length = SmiAdd(first_length, second_length);
Label if_makeseqstring(this), if_makeconsstring(this);
Node* const min_cons_length =
SmiConstant(Smi::FromInt(ConsString::kMinLength));
Branch(SmiLessThan(length, min_cons_length), &if_makeseqstring,
&if_makeconsstring);
Bind(&if_makeseqstring);
{
Node* result = AllocateSeqOneByteString(context, SmiToWord(length));
CopyStringCharacters(first, result, smi_zero, smi_zero, first_length,
String::ONE_BYTE_ENCODING);
CopyStringCharacters(second, result, smi_zero, first_length, second_length,
String::ONE_BYTE_ENCODING);
var_result.Bind(result);
Goto(&out);
}
Bind(&if_makeconsstring);
{
Node* result = AllocateOneByteConsString(length, first, second);
var_result.Bind(result);
Goto(&out);
}
Bind(&runtime);
{
Node* const result =
CallRuntime(Runtime::kStringAdd, context, first, second);
var_result.Bind(result);
Goto(&out);
}
Bind(&out);
return var_result.value();
}
Node* CodeStubAssembler::StringIndexOfChar(Node* context, Node* string,
Node* needle_char, Node* from) {
Variable var_result(this, MachineRepresentation::kTagged);
Label out(this), runtime(this, Label::kDeferred);
// Let runtime handle non-one-byte {needle_char}.
Node* const one_byte_char_mask = IntPtrConstant(0xFF);
GotoUnless(WordEqual(WordAnd(needle_char, one_byte_char_mask), needle_char),
&runtime);
// TODO(jgruber): Handle external and two-byte strings.
Node* const one_byte_seq_mask = Int32Constant(
kIsIndirectStringMask | kExternalStringTag | kStringEncodingMask);
Node* const expected_masked = Int32Constant(kOneByteStringTag);
Node* const string_instance_type = LoadInstanceType(string);
GotoUnless(Word32Equal(Word32And(string_instance_type, one_byte_seq_mask),
expected_masked),
&runtime);
// If we reach this, {string} is a non-indirect, non-external one-byte string.
Node* const length = LoadStringLength(string);
Node* const search_range_length = SmiUntag(SmiSub(length, from));
const int offset = SeqOneByteString::kHeaderSize - kHeapObjectTag;
Node* const begin = IntPtrConstant(offset);
Node* const cursor = IntPtrAdd(begin, SmiUntag(from));
Node* const end = IntPtrAdd(cursor, search_range_length);
Variable var_cursor(this, MachineType::PointerRepresentation());
Variable* vars[] = {&var_cursor};
Label loop(this, 1, vars), loop_tail(this);
var_cursor.Bind(cursor);
var_result.Bind(SmiConstant(Smi::FromInt(-1)));
Goto(&loop);
Bind(&loop);
{
Node* const cursor = var_cursor.value();
Node* value = Load(MachineType::Uint8(), string, cursor);
GotoUnless(WordEqual(value, needle_char), &loop_tail);
// Found a match.
Node* index = SmiTag(IntPtrSub(cursor, begin));
var_result.Bind(index);
Goto(&out);
Bind(&loop_tail);
{
Node* const new_cursor = IntPtrAdd(cursor, IntPtrConstant(1));
var_cursor.Bind(new_cursor);
Branch(IntPtrLessThan(new_cursor, end), &loop, &out);
}
}
Bind(&runtime);
{
Node* const pattern = StringFromCharCode(needle_char);
Node* const result =
CallRuntime(Runtime::kStringIndexOf, context, string, pattern, from);
var_result.Bind(result);
var_cursor.Bind(IntPtrConstant(0));
Goto(&out);
}
Bind(&out);
return var_result.value();
}
Node* CodeStubAssembler::StringFromCodePoint(compiler::Node* codepoint,
UnicodeEncoding encoding) {
Variable var_result(this, MachineRepresentation::kTagged);
......
......@@ -360,6 +360,19 @@ class CodeStubAssembler : public compiler::CodeAssembler {
compiler::Node* parent,
compiler::Node* offset);
// Allocate a one-byte ConsString with the given length, first and second
// parts. |length| is expected to be tagged, and |first| and |second| are
// expected to be one-byte strings.
compiler::Node* AllocateOneByteConsString(compiler::Node* length,
compiler::Node* first,
compiler::Node* second);
// Allocate a two-byte ConsString with the given length, first and second
// parts. |length| is expected to be tagged, and |first| and |second| are
// expected to be two-byte strings.
compiler::Node* AllocateTwoByteConsString(compiler::Node* length,
compiler::Node* first,
compiler::Node* second);
// Allocate a RegExpResult with the given length (the number of captures,
// including the match itself), index (the index where the match starts),
// and input string. |length| and |index| are expected to be tagged, and
......@@ -420,12 +433,16 @@ class CodeStubAssembler : public compiler::CodeAssembler {
ParameterMode mode = INTEGER_PARAMETERS);
// Copies |character_count| elements from |from_string| to |to_string|
// starting at the |from_index|'th character. |from_index| and
// |character_count| must be Smis s.t.
// 0 <= |from_index| <= |from_index| + |character_count| < from_string.length.
// starting at the |from_index|'th character. |from_string| and |to_string|
// must be either both one-byte strings or both two-byte strings.
// |from_index|, |to_index| and |character_count| must be Smis s.t.
// 0 <= |from_index| <= |from_index| + |character_count| <= from_string.length
// and
// 0 <= |to_index| <= |to_index| + |character_count| <= to_string.length.
void CopyStringCharacters(compiler::Node* from_string,
compiler::Node* to_string,
compiler::Node* from_index,
compiler::Node* to_index,
compiler::Node* character_count,
String::Encoding encoding);
......@@ -516,6 +533,19 @@ class CodeStubAssembler : public compiler::CodeAssembler {
compiler::Node* SubString(compiler::Node* context, compiler::Node* string,
compiler::Node* from, compiler::Node* to);
// Return a new string object produced by concatenating |first| with |second|.
compiler::Node* StringConcat(compiler::Node* context, compiler::Node* first,
compiler::Node* second);
// Return the first index >= {from} at which {needle_char} was found in
// {string}, or -1 if such an index does not exist. The returned value is
// a Smi, {string} is expected to be a String, {needle_char} is an intptr,
// and {from} is expected to be tagged.
compiler::Node* StringIndexOfChar(compiler::Node* context,
compiler::Node* string,
compiler::Node* needle_char,
compiler::Node* from);
compiler::Node* StringFromCodePoint(compiler::Node* codepoint,
UnicodeEncoding encoding);
......
......@@ -203,6 +203,7 @@ enum ContextLookupFlags {
V(PROXY_MAP_INDEX, Map, proxy_map) \
V(REGEXP_EXEC_FUNCTION_INDEX, JSFunction, regexp_exec_function) \
V(REGEXP_FUNCTION_INDEX, JSFunction, regexp_function) \
V(REGEXP_PROTOTYPE_MAP_INDEX, Map, regexp_prototype_map) \
V(REGEXP_RESULT_MAP_INDEX, Map, regexp_result_map) \
V(SCRIPT_CONTEXT_TABLE_INDEX, ScriptContextTable, script_context_table) \
V(SCRIPT_FUNCTION_INDEX, JSFunction, script_function) \
......
......@@ -208,7 +208,6 @@ function PostNatives(utils) {
"promise_state_symbol",
"reflect_apply",
"reflect_construct",
"regexp_flags_symbol",
"to_string_tag_symbol",
"object_to_string",
"species_symbol",
......
......@@ -11,26 +11,10 @@
// -------------------------------------------------------------------
// Imports
var GlobalArray = global.Array;
var GlobalObject = global.Object;
var GlobalRegExp = global.RegExp;
var GlobalRegExpPrototype = GlobalRegExp.prototype;
var InternalArray = utils.InternalArray;
var MaxSimple;
var MinSimple;
var RegExpExecJS = GlobalRegExp.prototype.exec;
var matchSymbol = utils.ImportNow("match_symbol");
var replaceSymbol = utils.ImportNow("replace_symbol");
var searchSymbol = utils.ImportNow("search_symbol");
var speciesSymbol = utils.ImportNow("species_symbol");
var splitSymbol = utils.ImportNow("split_symbol");
var SpeciesConstructor;
utils.Import(function(from) {
MaxSimple = from.MaxSimple;
MinSimple = from.MinSimple;
SpeciesConstructor = from.SpeciesConstructor;
});
// -------------------------------------------------------------------
......@@ -71,11 +55,6 @@ function RegExpInitialize(object, pattern, flags) {
}
function DoRegExpExec(regexp, string, index) {
return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo);
}
// This is kind of performance sensitive, so we want to avoid unnecessary
// type checks on inputs. But we also don't want to inline it several times
// manually, so we use a macro :-)
......@@ -101,228 +80,6 @@ macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
return result;
endmacro
// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
// Also takes an optional exec method in case our caller
// has already fetched exec.
function RegExpSubclassExec(regexp, string, exec) {
if (IS_UNDEFINED(exec)) {
exec = regexp.exec;
}
if (IS_CALLABLE(exec)) {
var result = %_Call(exec, regexp, string);
if (!IS_RECEIVER(result) && !IS_NULL(result)) {
throw %make_type_error(kInvalidRegExpExecResult);
}
return result;
}
return %_Call(RegExpExecJS, regexp, string);
}
%SetForceInlineFlag(RegExpSubclassExec);
// Legacy implementation of RegExp.prototype[Symbol.replace] which
// doesn't properly call the underlying exec method.
// TODO(lrn): This array will survive indefinitely if replace is never
// called again. However, it will be empty, since the contents are cleared
// in the finally block.
var reusableReplaceArray = new InternalArray(4);
// Helper function for replacing regular expressions with the result of a
// function application in String.prototype.replace.
function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) {
var resultArray = reusableReplaceArray;
if (resultArray) {
reusableReplaceArray = null;
} else {
// Inside a nested replace (replace called from the replacement function
// of another replace) or we have failed to set the reusable array
// back due to an exception in a replacement function. Create a new
// array to use in the future, or until the original is written back.
resultArray = new InternalArray(16);
}
var res = %RegExpExecMultiple(regexp,
subject,
RegExpLastMatchInfo,
resultArray);
regexp.lastIndex = 0;
if (IS_NULL(res)) {
// No matches at all.
reusableReplaceArray = resultArray;
return subject;
}
var len = res.length;
if (NUMBER_OF_CAPTURES(RegExpLastMatchInfo) == 2) {
// If the number of captures is two then there are no explicit captures in
// the regexp, just the implicit capture that captures the whole match. In
// this case we can simplify quite a bit and end up with something faster.
// The builder will consist of some integers that indicate slices of the
// input string and some replacements that were returned from the replace
// function.
var match_start = 0;
for (var i = 0; i < len; i++) {
var elem = res[i];
if (%_IsSmi(elem)) {
// Integers represent slices of the original string.
if (elem > 0) {
match_start = (elem >> 11) + (elem & 0x7ff);
} else {
match_start = res[++i] - elem;
}
} else {
var func_result = replace(elem, match_start, subject);
// Overwrite the i'th element in the results with the string we got
// back from the callback function.
res[i] = TO_STRING(func_result);
match_start += elem.length;
}
}
} else {
for (var i = 0; i < len; i++) {
var elem = res[i];
if (!%_IsSmi(elem)) {
// elem must be an Array.
// Use the apply argument as backing for global RegExp properties.
var func_result = %reflect_apply(replace, UNDEFINED, elem);
// Overwrite the i'th element in the results with the string we got
// back from the callback function.
res[i] = TO_STRING(func_result);
}
}
}
var result = %StringBuilderConcat(res, len, subject);
resultArray.length = 0;
reusableReplaceArray = resultArray;
return result;
}
// Compute the string of a given regular expression capture.
function CaptureString(string, lastCaptureInfo, index) {
// Scale the index.
var scaled = index << 1;
// Compute start and end.
var start = lastCaptureInfo[CAPTURE(scaled)];
// If start isn't valid, return undefined.
if (start < 0) return;
var end = lastCaptureInfo[CAPTURE(scaled + 1)];
return %_SubString(string, start, end);
}
function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) {
var matchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(matchInfo)) {
regexp.lastIndex = 0;
return subject;
}
var index = matchInfo[CAPTURE0];
var result = %_SubString(subject, 0, index);
var endOfMatch = matchInfo[CAPTURE1];
// Compute the parameter list consisting of the match, captures, index,
// and subject for the replace function invocation.
// The number of captures plus one for the match.
var m = NUMBER_OF_CAPTURES(matchInfo) >> 1;
var replacement;
if (m == 1) {
// No captures, only the match, which is always valid.
var s = %_SubString(subject, index, endOfMatch);
// Don't call directly to avoid exposing the built-in global object.
replacement = replace(s, index, subject);
} else {
var parameters = new InternalArray(m + 2);
for (var j = 0; j < m; j++) {
parameters[j] = CaptureString(subject, matchInfo, j);
}
parameters[j] = index;
parameters[j + 1] = subject;
replacement = %reflect_apply(replace, UNDEFINED, parameters);
}
result += replacement; // The add method converts to string if necessary.
// Can't use matchInfo any more from here, since the function could
// overwrite it.
return result + %_SubString(subject, endOfMatch, subject.length);
}
// Wraps access to matchInfo's captures into a format understood by
// GetSubstitution.
function MatchInfoCaptureWrapper(matches, subject) {
this.length = NUMBER_OF_CAPTURES(matches) >> 1;
this.match = matches;
this.subject = subject;
}
MatchInfoCaptureWrapper.prototype.at = function(ix) {
const match = this.match;
const start = match[CAPTURE(ix << 1)];
if (start < 0) return UNDEFINED;
return %_SubString(this.subject, start, match[CAPTURE((ix << 1) + 1)]);
};
%SetForceInlineFlag(MatchInfoCaptureWrapper.prototype.at);
function ArrayCaptureWrapper(array) {
this.length = array.length;
this.array = array;
}
ArrayCaptureWrapper.prototype.at = function(ix) {
return this.array[ix];
};
%SetForceInlineFlag(ArrayCaptureWrapper.prototype.at);
function RegExpReplace(string, replace) {
if (!IS_REGEXP(this)) {
throw %make_type_error(kIncompatibleMethodReceiver,
"RegExp.prototype.@@replace", this);
}
var subject = TO_STRING(string);
var search = this;
if (!IS_CALLABLE(replace)) {
replace = TO_STRING(replace);
if (!REGEXP_GLOBAL(search)) {
// Non-global regexp search, string replace.
var match = DoRegExpExec(search, subject, 0);
if (match == null) {
search.lastIndex = 0
return subject;
}
if (replace.length == 0) {
return %_SubString(subject, 0, match[CAPTURE0]) +
%_SubString(subject, match[CAPTURE1], subject.length)
}
const captures = new MatchInfoCaptureWrapper(match, subject);
const start = match[CAPTURE0];
const end = match[CAPTURE1];
const prefix = %_SubString(subject, 0, start);
const matched = %_SubString(subject, start, end);
const suffix = %_SubString(subject, end, subject.length);
return prefix +
GetSubstitution(matched, subject, start, captures, replace) +
suffix;
}
// Global regexp search, string replace.
search.lastIndex = 0;
return %StringReplaceGlobalRegExpWithString(
subject, search, replace, RegExpLastMatchInfo);
}
if (REGEXP_GLOBAL(search)) {
// Global regexp search, function replace.
return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
}
// Non-global regexp search, function replace.
return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace);
}
// ES#sec-getsubstitution
// GetSubstitution(matched, str, position, captures, replacement)
// Expand the $-expressions in the string and return a new string with
......@@ -408,120 +165,8 @@ function GetSubstitution(matched, string, position, captures, replacement) {
return result;
}
// ES#sec-advancestringindex
// AdvanceStringIndex ( S, index, unicode )
function AdvanceStringIndex(string, index, unicode) {
var increment = 1;
if (unicode) {
var first = %_StringCharCodeAt(string, index);
if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) {
var second = %_StringCharCodeAt(string, index + 1);
if (second >= 0xDC00 && second <= 0xDFFF) {
increment = 2;
}
}
}
return increment;
}
function SetAdvancedStringIndex(regexp, string, unicode) {
var lastIndex = regexp.lastIndex;
regexp.lastIndex = lastIndex +
AdvanceStringIndex(string, lastIndex, unicode);
}
// ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@replace ] ( string, replaceValue )
function RegExpSubclassReplace(string, replace) {
if (!IS_RECEIVER(this)) {
throw %make_type_error(kIncompatibleMethodReceiver,
"RegExp.prototype.@@replace", this);
}
string = TO_STRING(string);
var length = string.length;
var functionalReplace = IS_CALLABLE(replace);
if (!functionalReplace) replace = TO_STRING(replace);
var global = TO_BOOLEAN(this.global);
if (global) {
var unicode = TO_BOOLEAN(this.unicode);
this.lastIndex = 0;
}
// TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec'
// is actually a data property on RegExp.prototype.
var exec;
if (IS_REGEXP(this)) {
exec = this.exec;
if (exec === RegExpExecJS) {
return %_Call(RegExpReplace, this, string, replace);
}
}
var results = new InternalArray();
var result, replacement;
while (true) {
result = RegExpSubclassExec(this, string, exec);
// Ensure exec will be read again on the next loop through.
exec = UNDEFINED;
if (IS_NULL(result)) {
break;
} else {
results.push(result);
if (!global) break;
var matchStr = TO_STRING(result[0]);
if (matchStr === "") SetAdvancedStringIndex(this, string, unicode);
}
}
var accumulatedResult = "";
var nextSourcePosition = 0;
for (var i = 0; i < results.length; i++) {
result = results[i];
var capturesLength = MaxSimple(TO_LENGTH(result.length), 0);
var matched = TO_STRING(result[0]);
var matchedLength = matched.length;
var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0);
var captures = new InternalArray();
for (var n = 0; n < capturesLength; n++) {
var capture = result[n];
if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture);
captures[n] = capture;
}
if (functionalReplace) {
var parameters = new InternalArray(capturesLength + 2);
for (var j = 0; j < capturesLength; j++) {
parameters[j] = captures[j];
}
parameters[j] = position;
parameters[j + 1] = string;
replacement = %reflect_apply(replace, UNDEFINED, parameters, 0,
parameters.length);
} else {
const capturesWrapper = new ArrayCaptureWrapper(captures);
replacement = GetSubstitution(matched, string, position, capturesWrapper,
replace);
}
if (position >= nextSourcePosition) {
accumulatedResult +=
%_SubString(string, nextSourcePosition, position) + replacement;
nextSourcePosition = position + matchedLength;
}
}
if (nextSourcePosition >= length) return accumulatedResult;
return accumulatedResult + %_SubString(string, nextSourcePosition, length);
}
%FunctionRemovePrototype(RegExpSubclassReplace);
// -------------------------------------------------------------------
utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
replaceSymbol, RegExpSubclassReplace,
]);
%InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]);
// -------------------------------------------------------------------
......@@ -556,7 +201,6 @@ utils.Export(function(to) {
to.InternalRegExpMatch = InternalRegExpMatch;
to.InternalRegExpReplace = InternalRegExpReplace;
to.IsRegExp = IsRegExp;
to.RegExpExec = DoRegExpExec;
to.RegExpInitialize = RegExpInitialize;
to.RegExpLastMatchInfo = RegExpLastMatchInfo;
});
......
......@@ -11652,6 +11652,101 @@ int String::IndexOf(Isolate* isolate, Handle<String> receiver,
start_index);
}
MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
Handle<String> replacement) {
Factory* factory = isolate->factory();
const int replacement_length = replacement->length();
const int captures_length = match->CaptureCount();
replacement = String::Flatten(replacement);
Handle<String> dollar_string =
factory->LookupSingleCharacterStringFromCode('$');
int next = String::IndexOf(isolate, replacement, dollar_string, 0);
if (next < 0) {
return replacement;
}
IncrementalStringBuilder builder(isolate);
if (next > 0) {
builder.AppendString(factory->NewSubString(replacement, 0, next));
}
while (true) {
int pos = next + 1;
if (pos < replacement_length) {
const uint16_t peek = replacement->Get(pos);
if (peek == '$') { // $$
pos++;
builder.AppendCharacter('$');
} else if (peek == '&') { // $& - match
pos++;
builder.AppendString(match->GetMatch());
} else if (peek == '`') { // $` - prefix
pos++;
builder.AppendString(match->GetPrefix());
} else if (peek == '\'') { // $' - suffix
pos++;
builder.AppendString(match->GetSuffix());
} else if (peek >= '0' && peek <= '9') {
// Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
int scaled_index = (peek - '0');
int advance = 1;
if (pos + 1 < replacement_length) {
const uint16_t next_peek = replacement->Get(pos + 1);
if (next_peek >= '0' && next_peek <= '9') {
const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
if (new_scaled_index < captures_length) {
scaled_index = new_scaled_index;
advance = 2;
}
}
}
if (scaled_index != 0 && scaled_index < captures_length) {
bool capture_exists;
Handle<String> capture;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, capture,
match->GetCapture(scaled_index, &capture_exists), String);
if (capture_exists) builder.AppendString(capture);
pos += advance;
} else {
builder.AppendCharacter('$');
}
} else {
builder.AppendCharacter('$');
}
} else {
builder.AppendCharacter('$');
}
// Go the the next $ in the replacement.
next = String::IndexOf(isolate, replacement, dollar_string, pos);
// Return if there are no more $ characters in the replacement. If we
// haven't reached the end, we need to append the suffix.
if (next < 0) {
if (pos < replacement_length) {
builder.AppendString(
factory->NewSubString(replacement, pos, replacement_length));
}
return builder.Finish();
}
// Append substring between the previous and the next $ character.
if (next > pos) {
builder.AppendString(factory->NewSubString(replacement, pos, next));
}
}
UNREACHABLE();
return MaybeHandle<String>();
}
namespace { // for String.Prototype.lastIndexOf
template <typename schar, typename pchar>
......
......@@ -9484,6 +9484,25 @@ class String: public Name {
static Object* LastIndexOf(Isolate* isolate, Handle<Object> receiver,
Handle<Object> search, Handle<Object> position);
// Encapsulates logic related to a match and its capture groups as required
// by GetSubstitution.
class Match {
public:
virtual Handle<String> GetMatch() = 0;
virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
virtual Handle<String> GetPrefix() = 0;
virtual Handle<String> GetSuffix() = 0;
virtual int CaptureCount() = 0;
virtual ~Match() {}
};
// ES#sec-getsubstitution
// GetSubstitution(matched, str, position, captures, replacement)
// Expand the $-expressions in the string and return a new string with
// the result.
MUST_USE_RESULT static MaybeHandle<String> GetSubstitution(
Isolate* isolate, Match* match, Handle<String> replacement);
// String equality operations.
inline bool Equals(String* other);
inline static bool Equals(Handle<String> one, Handle<String> two);
......
......@@ -10,6 +10,7 @@
#include "src/messages.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-utils.h"
#include "src/string-builder.h"
#include "src/string-search.h"
......@@ -793,7 +794,6 @@ RUNTIME_FUNCTION(Runtime_RegExpFlags) {
return regexp->flags();
}
RUNTIME_FUNCTION(Runtime_RegExpSource) {
SealHandleScope shs(isolate);
DCHECK(args.length() == 1);
......@@ -822,7 +822,6 @@ RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
return *array;
}
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
HandleScope scope(isolate);
DCHECK(args.length() == 3);
......@@ -836,14 +835,110 @@ RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
return *regexp;
}
namespace {
class MatchInfoBackedMatch : public String::Match {
public:
MatchInfoBackedMatch(Isolate* isolate, Handle<String> subject,
Handle<JSObject> match_info)
: isolate_(isolate), match_info_(match_info) {
subject_ = String::Flatten(subject);
}
Handle<String> GetMatch() override {
return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
}
MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
Handle<Object> capture_obj =
RegExpUtils::GenericCaptureGetter(isolate_, match_info_, i);
if (capture_obj->IsUndefined(isolate_)) {
*capture_exists = false;
return isolate_->factory()->empty_string();
}
*capture_exists = true;
return Object::ToString(isolate_, capture_obj);
}
Handle<String> GetPrefix() override {
const int match_start =
RegExpUtils::GetLastMatchCapture(isolate_, match_info_, 0);
return isolate_->factory()->NewSubString(subject_, 0, match_start);
}
Handle<String> GetSuffix() override {
const int match_end =
RegExpUtils::GetLastMatchCapture(isolate_, match_info_, 1);
return isolate_->factory()->NewSubString(subject_, match_end,
subject_->length());
}
int CaptureCount() override {
return RegExpUtils::GetLastMatchNumberOfCaptures(isolate_, match_info_) / 2;
}
virtual ~MatchInfoBackedMatch() {}
// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
private:
Isolate* isolate_;
Handle<String> subject_;
Handle<JSObject> match_info_;
};
class VectorBackedMatch : public String::Match {
public:
VectorBackedMatch(Isolate* isolate, Handle<String> subject,
Handle<String> match, int match_position,
ZoneVector<Handle<Object>>* captures)
: isolate_(isolate),
match_(match),
match_position_(match_position),
captures_(captures) {
subject_ = String::Flatten(subject);
}
Handle<String> GetMatch() override { return match_; }
MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
Handle<Object> capture_obj = captures_->at(i);
if (capture_obj->IsUndefined(isolate_)) {
*capture_exists = false;
return isolate_->factory()->empty_string();
}
*capture_exists = true;
return Object::ToString(isolate_, capture_obj);
}
Handle<String> GetPrefix() override {
return isolate_->factory()->NewSubString(subject_, 0, match_position_);
}
Handle<String> GetSuffix() override {
const int match_end_position = match_position_ + match_->length();
return isolate_->factory()->NewSubString(subject_, match_end_position,
subject_->length());
}
int CaptureCount() override { return static_cast<int>(captures_->size()); }
virtual ~VectorBackedMatch() {}
private:
Isolate* isolate_;
Handle<String> subject_;
Handle<String> match_;
const int match_position_;
ZoneVector<Handle<Object>>* captures_;
};
// Only called from RegExpExecMultiple so it doesn't need to maintain
// separate last match info. See comment on that function.
template <bool has_capture>
static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSObject> last_match_array,
Handle<JSArray> result_array) {
MaybeHandle<Object> SearchRegExpMultiple(Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSObject> last_match_array,
Handle<FixedArray> result_elements) {
DCHECK(subject->IsFlat());
DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
......@@ -863,24 +958,20 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
for (int i = 0; i < capture_registers; i++) {
last_match[i] = Smi::cast(last_match_cache->get(i))->value();
}
Handle<FixedArray> cached_fixed_array =
Handle<FixedArray>(FixedArray::cast(cached_answer));
// The cache FixedArray is a COW-array and can therefore be reused.
JSArray::SetContent(result_array, cached_fixed_array);
Handle<FixedArray> cached_fixed_array(FixedArray::cast(cached_answer));
RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
last_match);
DeleteArray(last_match);
return *result_array;
// The cache FixedArray is a COW-array and we need to return a copy.
return isolate->factory()->CopyFixedArrayWithMap(
cached_fixed_array, isolate->factory()->fixed_array_map());
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return isolate->heap()->exception();
if (global_cache.HasException()) return MaybeHandle<Object>();
// Ensured in Runtime_RegExpExecMultiple.
DCHECK(result_array->HasFastObjectElements());
Handle<FixedArray> result_elements(
FixedArray::cast(result_array->elements()));
if (result_elements->length() < 16) {
result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
}
......@@ -947,7 +1038,7 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
}
}
if (global_cache.HasException()) return isolate->heap()->exception();
if (global_cache.HasException()) return MaybeHandle<Object>();
if (match_start >= 0) {
// Finished matching, with at least one match.
......@@ -959,6 +1050,9 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
global_cache.LastSuccessfulMatch());
Handle<FixedArray> result_fixed_array = builder.array();
result_fixed_array->Shrink(builder.length());
if (subject_length > kMinLengthToCache) {
// Store the last successful match into the array for caching.
// TODO(yangguo): do not expose last match to JS and simplify caching.
......@@ -969,33 +1063,28 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
for (int i = 0; i < capture_registers; i++) {
last_match_cache->set(i, Smi::FromInt(last_match[i]));
}
Handle<FixedArray> result_fixed_array = builder.array();
result_fixed_array->Shrink(builder.length());
// Cache the result and turn the FixedArray into a COW array.
RegExpResultsCache::Enter(
isolate, subject, handle(regexp->data(), isolate), result_fixed_array,
last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
}
return *builder.ToJSArray(result_array);
// The cache FixedArray is a COW-array and we need to return a copy.
return isolate->factory()->CopyFixedArrayWithMap(
result_fixed_array, isolate->factory()->fixed_array_map());
} else {
return isolate->heap()->null_value(); // No matches at all.
return isolate->factory()->null_value(); // No matches at all.
}
}
// This is only called for StringReplaceGlobalRegExpWithFunction. This sets
// lastMatchInfoOverride to maintain the last match info, so we don't need to
// set any other last match array info.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
HandleScope handles(isolate);
DCHECK(args.length() == 4);
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
CONVERT_ARG_HANDLE_CHECKED(JSObject, last_match_info, 2);
CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
MaybeHandle<Object> RegExpExecMultiple(Isolate* isolate,
Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSObject> last_match_info,
Handle<FixedArray> result_array) {
CHECK(last_match_info->HasFastObjectElements());
CHECK(result_array->HasFastObjectElements());
subject = String::Flatten(subject);
CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
......@@ -1009,6 +1098,520 @@ RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
}
}
// Helper function for replacing regular expressions with the result of a
// function application in String.prototype.replace.
MaybeHandle<String> StringReplaceGlobalRegExpWithFunction(
Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
Handle<Object> replace_obj) {
Factory* factory = isolate->factory();
// TODO(jgruber): Convert result_array into a List<Handle<Object>> (or
// similar) and adapt / remove FixedArrayBuilder.
Handle<JSObject> last_match_info = isolate->regexp_last_match_info();
Handle<FixedArray> result_array = factory->NewFixedArrayWithHoles(16);
Handle<Object> res;
ASSIGN_RETURN_ON_EXCEPTION(isolate, res,
RegExpExecMultiple(isolate, regexp, subject,
last_match_info, result_array),
String);
// Reload the last match info since it might have changed in the meantime.
last_match_info = isolate->regexp_last_match_info();
if (res->IsNull(isolate)) return subject; // No matches at all.
result_array = Handle<FixedArray>::cast(res);
const int result_length = result_array->length();
const int num_captures =
RegExpUtils::GetLastMatchNumberOfCaptures(isolate, last_match_info) / 2;
if (num_captures == 1) {
// If the number of captures is one then there are no explicit captures in
// the regexp, just the implicit capture that captures the whole match. In
// this case we can simplify quite a bit and end up with something faster.
// The builder will consist of some integers that indicate slices of the
// input string and some replacements that were returned from the replace
// function.
int match_start = 0;
for (int i = 0; i < result_length; i++) {
Handle<Object> elem = FixedArray::get(*result_array, i, isolate);
if (elem->IsSmi()) {
// Integers represent slices of the original string.
// TODO(jgruber): Maybe we don't need this weird encoding anymore (in
// preparation to invoking StringBuilderConcat), but can just copy into
// the result string with the IncrementalStringBuilder as we go?
const int elem_value = Handle<Smi>::cast(elem)->value();
if (elem_value > 0) {
match_start = (elem_value >> 11) + (elem_value & 0x7ff);
} else {
Handle<Object> next_elem =
FixedArray::get(*result_array, ++i, isolate);
const int next_elem_value = Handle<Smi>::cast(next_elem)->value();
match_start = next_elem_value - elem_value;
}
} else {
DCHECK(elem->IsString());
Handle<String> elem_string = Handle<String>::cast(elem);
// Overwrite the i'th element in the results with the string we got
// back from the callback function.
const int argc = 3;
ScopedVector<Handle<Object>> argv(argc);
argv[0] = elem_string;
argv[1] = handle(Smi::FromInt(match_start), isolate);
argv[2] = subject;
Handle<Object> replacement_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, replacement_obj,
Execution::Call(isolate, replace_obj, factory->undefined_value(),
argc, argv.start()),
String);
Handle<String> replacement;
ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
Object::ToString(isolate, replacement_obj),
String);
result_array->set(i, *replacement);
match_start += elem_string->length();
}
}
} else {
DCHECK(num_captures > 1);
for (int i = 0; i < result_length; i++) {
Handle<Object> elem = FixedArray::get(*result_array, i, isolate);
if (elem->IsSmi()) continue;
// TODO(jgruber): We can skip this whole round-trip through a JS array
// for result_array.
Handle<JSArray> elem_array = Handle<JSArray>::cast(elem);
Handle<FixedArray> elem_array_elems(
FixedArray::cast(elem_array->elements()), isolate);
const int argc = elem_array_elems->length();
ScopedVector<Handle<Object>> argv(argc);
for (int j = 0; j < argc; j++) {
argv[j] = FixedArray::get(*elem_array_elems, j, isolate);
}
// TODO(jgruber): This call is another pattern we could refactor.
Handle<Object> replacement_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, replacement_obj,
Execution::Call(isolate, replace_obj, factory->undefined_value(),
argc, argv.start()),
String);
Handle<String> replacement;
ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
Object::ToString(isolate, replacement_obj),
String);
result_array->set(i, *replacement);
}
}
if (result_length == 0) {
return factory->empty_string();
} else if (result_length == 1) {
Handle<Object> first = FixedArray::get(*result_array, 0, isolate);
if (first->IsString()) return Handle<String>::cast(first);
}
bool one_byte = subject->HasOnlyOneByteChars();
const int length = StringBuilderConcatLength(subject->length(), *result_array,
result_length, &one_byte);
if (length == -1) {
isolate->Throw(isolate->heap()->illegal_argument_string());
return MaybeHandle<String>();
}
if (one_byte) {
Handle<SeqOneByteString> answer;
ASSIGN_RETURN_ON_EXCEPTION(isolate, answer,
isolate->factory()->NewRawOneByteString(length),
String);
StringBuilderConcatHelper(*subject, answer->GetChars(), *result_array,
result_length);
return answer;
} else {
DCHECK(!one_byte);
Handle<SeqTwoByteString> answer;
ASSIGN_RETURN_ON_EXCEPTION(isolate, answer,
isolate->factory()->NewRawTwoByteString(length),
String);
StringBuilderConcatHelper(*subject, answer->GetChars(), *result_array,
result_length);
return answer;
}
UNREACHABLE();
return MaybeHandle<String>();
}
MaybeHandle<String> StringReplaceNonGlobalRegExpWithFunction(
Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
Handle<Object> replace_obj) {
Factory* factory = isolate->factory();
Handle<JSObject> last_match_info = isolate->regexp_last_match_info();
// TODO(jgruber): This is a pattern we could refactor.
Handle<Object> match_indices_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices_obj,
RegExpImpl::Exec(regexp, subject, 0, last_match_info), String);
if (match_indices_obj->IsNull(isolate)) {
RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
String);
return subject;
}
Handle<JSObject> match_indices = Handle<JSObject>::cast(match_indices_obj);
const int index = RegExpUtils::GetLastMatchCapture(isolate, match_indices, 0);
const int end_of_match =
RegExpUtils::GetLastMatchCapture(isolate, match_indices, 1);
IncrementalStringBuilder builder(isolate);
builder.AppendString(factory->NewSubString(subject, 0, index));
// Compute the parameter list consisting of the match, captures, index,
// and subject for the replace function invocation.
// The number of captures plus one for the match.
const int m =
RegExpUtils::GetLastMatchNumberOfCaptures(isolate, match_indices) / 2;
const int argc = m + 2;
ScopedVector<Handle<Object>> argv(argc);
for (int j = 0; j < m; j++) {
bool ok;
Handle<String> capture =
RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
if (ok) {
argv[j] = capture;
} else {
argv[j] = factory->undefined_value();
}
}
argv[m] = handle(Smi::FromInt(index), isolate);
argv[m + 1] = subject;
Handle<Object> replacement_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, replacement_obj,
Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
argv.start()),
String);
Handle<String> replacement;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, replacement, Object::ToString(isolate, replacement_obj), String);
builder.AppendString(replacement);
builder.AppendString(
factory->NewSubString(subject, end_of_match, subject->length()));
return builder.Finish();
}
// Legacy implementation of RegExp.prototype[Symbol.replace] which
// doesn't properly call the underlying exec method.
MaybeHandle<String> RegExpReplace(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> string,
Handle<Object> replace_obj) {
Factory* factory = isolate->factory();
// TODO(jgruber): We need the even stricter guarantee of an unmodified
// JSRegExp map here for access to GetFlags to be legal.
const int flags = regexp->GetFlags();
const bool global = (flags & JSRegExp::kGlobal) != 0;
const bool functional_replace = replace_obj->IsCallable();
if (!functional_replace) {
Handle<String> replace;
ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
Object::ToString(isolate, replace_obj), String);
replace = String::Flatten(replace);
Handle<JSObject> last_match_info = isolate->regexp_last_match_info();
if (!global) {
// Non-global regexp search, string replace.
Handle<Object> match_indices_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices_obj,
RegExpImpl::Exec(regexp, string, 0, last_match_info), String);
if (match_indices_obj->IsNull(isolate)) {
RETURN_ON_EXCEPTION(
isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), String);
return string;
}
auto match_indices = Handle<JSReceiver>::cast(match_indices_obj);
Handle<Object> start_index_obj =
JSReceiver::GetElement(isolate, match_indices,
RegExpImpl::kFirstCapture)
.ToHandleChecked();
const int start_index = Handle<Smi>::cast(start_index_obj)->value();
Handle<Object> end_index_obj =
JSReceiver::GetElement(isolate, match_indices,
RegExpImpl::kFirstCapture + 1)
.ToHandleChecked();
const int end_index = Handle<Smi>::cast(end_index_obj)->value();
IncrementalStringBuilder builder(isolate);
builder.AppendString(factory->NewSubString(string, 0, start_index));
if (replace->length() > 0) {
MatchInfoBackedMatch m(isolate, string, last_match_info);
Handle<String> replacement;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, replacement, String::GetSubstitution(isolate, &m, replace),
String);
builder.AppendString(replacement);
}
builder.AppendString(
factory->NewSubString(string, end_index, string->length()));
return builder.Finish();
} else {
// Global regexp search, string replace.
DCHECK(global);
RETURN_ON_EXCEPTION(
isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0), String);
if (replace->length() == 0) {
if (string->HasOnlyOneByteChars()) {
Object* result =
StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
isolate, string, regexp, last_match_info);
return handle(String::cast(result), isolate);
} else {
Object* result =
StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
isolate, string, regexp, last_match_info);
return handle(String::cast(result), isolate);
}
}
Object* result = StringReplaceGlobalRegExpWithString(
isolate, string, regexp, replace, last_match_info);
if (result->IsString()) {
return handle(String::cast(result), isolate);
} else {
return MaybeHandle<String>();
}
}
} else {
DCHECK(functional_replace);
if (global) {
// Global regexp search, function replace.
return StringReplaceGlobalRegExpWithFunction(isolate, string, regexp,
replace_obj);
} else {
// Non-global regexp search, function replace.
return StringReplaceNonGlobalRegExpWithFunction(isolate, string, regexp,
replace_obj);
}
}
UNREACHABLE();
return MaybeHandle<String>();
}
} // namespace
// Slow path for:
// ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplace) {
HandleScope scope(isolate);
DCHECK(args.length() == 3);
CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
Handle<Object> replace_obj = args.at<Object>(2);
Factory* factory = isolate->factory();
string = String::Flatten(string);
const int length = string->length();
const bool functional_replace = replace_obj->IsCallable();
Handle<String> replace;
if (!functional_replace) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
Object::ToString(isolate, replace_obj));
}
Handle<Object> global_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, global_obj,
JSReceiver::GetProperty(recv, factory->global_string()));
const bool global = global_obj->BooleanValue();
bool unicode = false;
if (global) {
Handle<Object> unicode_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, unicode_obj,
JSReceiver::GetProperty(recv, factory->unicode_string()));
unicode = unicode_obj->BooleanValue();
RETURN_FAILURE_ON_EXCEPTION(isolate,
RegExpUtils::SetLastIndex(isolate, recv, 0));
}
// TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec'
// is actually a data property on RegExp.prototype.
Handle<Object> exec = factory->undefined_value();
if (recv->IsJSRegExp()) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, exec, JSObject::GetProperty(
recv, factory->NewStringFromAsciiChecked("exec")));
if (RegExpUtils::IsBuiltinExec(exec)) {
RETURN_RESULT_OR_FAILURE(
isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
replace_obj));
}
}
Zone zone(isolate->allocator());
ZoneVector<Handle<Object>> results(&zone);
while (true) {
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpUtils::RegExpExec(isolate, recv, string, exec));
// Ensure exec will be read again on the next loop through.
exec = factory->undefined_value();
if (result->IsNull(isolate)) break;
results.push_back(result);
if (!global) break;
Handle<Object> match_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
Object::GetElement(isolate, result, 0));
Handle<String> match;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
Object::ToString(isolate, match_obj));
if (match->length() == 0) {
RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
isolate, recv, string, unicode));
}
}
// TODO(jgruber): Look into ReplacementStringBuilder instead.
IncrementalStringBuilder builder(isolate);
int next_source_position = 0;
for (const auto& result : results) {
Handle<Object> captures_length_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, captures_length_obj,
Object::GetProperty(result, factory->length_string()));
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, captures_length_obj,
Object::ToLength(isolate, captures_length_obj));
const int captures_length =
std::max(Handle<Smi>::cast(captures_length_obj)->value(), 0);
Handle<Object> match_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
Object::GetElement(isolate, result, 0));
Handle<String> match;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
Object::ToString(isolate, match_obj));
const int match_length = match->length();
Handle<Object> position_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, position_obj,
Object::GetProperty(result, factory->index_string()));
// TODO(jgruber): Extract and correct error handling. Since we can go up to
// 2^53 - 1 (at least for ToLength), we might actually need uint64_t here?
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, position_obj, Object::ToInteger(isolate, position_obj));
const int position =
std::max(std::min(Handle<Smi>::cast(position_obj)->value(), length), 0);
ZoneVector<Handle<Object>> captures(&zone);
for (int n = 0; n < captures_length; n++) {
Handle<Object> capture;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, capture, Object::GetElement(isolate, result, n));
if (!capture->IsUndefined(isolate)) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
Object::ToString(isolate, capture));
}
captures.push_back(capture);
}
Handle<String> replacement;
if (functional_replace) {
const int argc = captures_length + 2;
ScopedVector<Handle<Object>> argv(argc);
for (int j = 0; j < captures_length; j++) {
argv[j] = captures[j];
}
argv[captures_length] = handle(Smi::FromInt(position), isolate);
argv[captures_length + 1] = string;
Handle<Object> replacement_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, replacement_obj,
Execution::Call(isolate, replace_obj, factory->undefined_value(),
argc, argv.start()));
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, replacement, Object::ToString(isolate, replacement_obj));
} else {
VectorBackedMatch m(isolate, string, match, position, &captures);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, replacement, String::GetSubstitution(isolate, &m, replace));
}
if (position >= next_source_position) {
builder.AppendString(
factory->NewSubString(string, next_source_position, position));
builder.AppendString(replacement);
next_source_position = position + match_length;
}
}
if (next_source_position < length) {
builder.AppendString(
factory->NewSubString(string, next_source_position, length));
}
RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
}
RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
SealHandleScope shs(isolate);
......@@ -1025,5 +1628,6 @@ RUNTIME_FUNCTION(Runtime_IsRegExp) {
CONVERT_ARG_CHECKED(Object, obj, 0);
return isolate->heap()->ToBoolean(obj->IsJSRegExp());
}
} // namespace internal
} // namespace v8
......@@ -461,10 +461,10 @@ namespace internal {
F(StringSplit, 3, 1) \
F(RegExpExec, 4, 1) \
F(RegExpFlags, 1, 1) \
F(RegExpReplace, 3, 1) \
F(RegExpSource, 1, 1) \
F(RegExpConstructResult, 3, 1) \
F(RegExpInitializeAndCompile, 3, 1) \
F(RegExpExecMultiple, 4, 1) \
F(RegExpExecReThrow, 4, 1) \
F(IsRegExp, 1, 1)
......
......@@ -174,12 +174,6 @@ class FixedArrayBuilder {
int capacity() { return array_->length(); }
Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
JSArray::SetContent(target_array, array_);
target_array->set_length(Smi::FromInt(length_));
return target_array;
}
private:
Handle<FixedArray> array_;
......
......@@ -78,7 +78,7 @@ bytecodes: [
/* 15 S> */ B(LdrUndefined), R(0),
B(CreateArrayLiteral), U8(0), U8(0), U8(9),
B(Star), R(1),
B(CallJSRuntime), U8(145), R(0), U8(2),
B(CallJSRuntime), U8(146), R(0), U8(2),
/* 44 S> */ B(Return),
]
constant pool: [
......
......@@ -731,3 +731,11 @@ assertEquals(["acbc", "c", "c"], /a(.\2)b(\1)/.exec("aabcacbc"));
// \u{daff}\u{e000} is not a surrogate pair, while \u{daff}\u{dfff} is.
assertEquals(["\u{daff}", "\u{e000}"], "\u{daff}\u{e000}".split(/[a-z]{0,1}/u));
assertEquals(["\u{daff}\u{dfff}"], "\u{daff}\u{dfff}".split(/[a-z]{0,1}/u));
// Test that changing a property on RegExp.prototype results in us taking the
// slow path, which executes RegExp.prototype.exec instead of our
// RegExpExecStub.
const RegExpPrototypeExec = RegExp.prototype.exec;
RegExp.prototype.exec = function() { throw new Error(); }
assertThrows(() => "abc".replace(/./, ""));
RegExp.prototype.exec = RegExpPrototypeExec;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment