Commit f4ba786d authored by jgruber's avatar jgruber Committed by Commit bot

[string] Add a fast path to String.p.replace

This adds a fast path to skip runtime calls to GetSubstitution when
the replacer string does not contain a '$' char.

Extended background:

String.prototype.replace is (roughly) structured as follows:

* Check if {searchValue} has a @@replace Symbol, and delegate to that if
  so. We currently implement efficient fast paths when {searchValue} is
  a String or a fast RegExp.
* A specialized fast path for single-char {searchValue}, "long" subject
  string, and String {replaceValue} that do not contain '$' chars (yes,
  this fast path is very specialized).
* Check for the location of the first match using StringIndexOf, and
  exit early if no match is found.
* Finally build the return value, which is 'prefix + replacement +
  suffix', where replacement is either the result of calling {replaceValue}
  (if it is callable), or GetSubstitution(ToString({replaceValue}))
  otherwise.

There's several spots that could be improved.

StringIndexOf currently calls into C++ runtime for all but the simple
1-byte, 1-char {searchValue} case. We need to finally add support for
remaining cases.

The runtime call to GetSubstitution can be skipped if the replacer
string does not contain any '$' syntax. This CL handles that case.

BUG=

Review-Url: https://codereview.chromium.org/2813843002
Cr-Commit-Position: refs/heads/master@{#44606}
parent a5f91b3a
......@@ -19,6 +19,11 @@ class StringBuiltinsAssembler : public CodeStubAssembler {
explicit StringBuiltinsAssembler(compiler::CodeAssemblerState* state)
: CodeStubAssembler(state) {}
// ES#sec-getsubstitution
Node* GetSubstitution(Node* context, Node* subject_string,
Node* match_start_index, Node* match_end_index,
Node* replace_string);
protected:
Node* DirectStringData(Node* string, Node* string_instance_type) {
// Compute the effective offset of the first character.
......@@ -112,6 +117,8 @@ class StringBuiltinsAssembler : public CodeStubAssembler {
Node* search_string_instance_type, Node* position,
std::function<void(Node*)> f_return);
Node* IndexOfDollarChar(Node* const context, Node* const string);
Node* IsNullOrUndefined(Node* const value);
void RequireObjectCoercible(Node* const context, Node* const value,
const char* method_name);
......@@ -991,6 +998,60 @@ void StringBuiltinsAssembler::MaybeCallFunctionAtSymbol(
BIND(&out);
}
compiler::Node* StringBuiltinsAssembler::IndexOfDollarChar(Node* const context,
Node* const string) {
CSA_ASSERT(this, IsString(string));
Node* const dollar_string = HeapConstant(
isolate()->factory()->LookupSingleCharacterStringFromCode('$'));
Node* const dollar_ix = CallBuiltin(Builtins::kStringIndexOf, context, string,
dollar_string, SmiConstant(0));
CSA_ASSERT(this, TaggedIsSmi(dollar_ix));
return dollar_ix;
}
compiler::Node* StringBuiltinsAssembler::GetSubstitution(
Node* context, Node* subject_string, Node* match_start_index,
Node* match_end_index, Node* replace_string) {
CSA_ASSERT(this, IsString(subject_string));
CSA_ASSERT(this, IsString(replace_string));
CSA_ASSERT(this, TaggedIsPositiveSmi(match_start_index));
CSA_ASSERT(this, TaggedIsPositiveSmi(match_end_index));
VARIABLE(var_result, MachineRepresentation::kTagged, replace_string);
Label runtime(this), out(this);
// In this primitive implementation we simply look for the next '$' char in
// {replace_string}. If it doesn't exist, we can simply return
// {replace_string} itself. If it does, then we delegate to
// String::GetSubstitution, passing in the index of the first '$' to avoid
// repeated scanning work.
// TODO(jgruber): Possibly extend this in the future to handle more complex
// cases without runtime calls.
Node* const dollar_index = IndexOfDollarChar(context, replace_string);
Branch(SmiIsNegative(dollar_index), &out, &runtime);
BIND(&runtime);
{
CSA_ASSERT(this, TaggedIsPositiveSmi(dollar_index));
Callable substring_callable = CodeFactory::SubString(isolate());
Node* const matched = CallStub(substring_callable, context, subject_string,
match_start_index, match_end_index);
Node* const replacement_string =
CallRuntime(Runtime::kGetSubstitution, context, matched, subject_string,
match_start_index, replace_string, dollar_index);
var_result.Bind(replacement_string);
Goto(&out);
}
BIND(&out);
return var_result.value();
}
// ES6 #sec-string.prototype.replace
TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
Label out(this);
......@@ -1033,7 +1094,7 @@ TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
Node* const subject_length = LoadStringLength(subject_string);
Node* const search_length = LoadStringLength(search_string);
// Fast-path single-char {search}, long {receiver}, and simple string
// Fast-path single-char {search}, long cons {receiver}, and simple string
// {replace}.
{
Label next(this);
......@@ -1043,11 +1104,10 @@ TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
GotoIf(TaggedIsSmi(replace), &next);
GotoIfNot(IsString(replace), &next);
Node* const dollar_string = HeapConstant(
isolate()->factory()->LookupSingleCharacterStringFromCode('$'));
Node* const dollar_ix =
CallStub(indexof_callable, context, replace, dollar_string, smi_zero);
GotoIfNot(SmiIsNegative(dollar_ix), &next);
Node* const subject_instance_type = LoadInstanceType(subject_string);
GotoIfNot(IsConsStringInstanceType(subject_instance_type), &next);
GotoIf(TaggedIsPositiveSmi(IndexOfDollarChar(context, replace)), &next);
// Searching by traversing a cons string tree and replace with cons of
// slices works only when the replaced string is a single character, being
......@@ -1136,15 +1196,11 @@ TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
BIND(&if_notcallablereplace);
{
Node* const replace_string = CallStub(tostring_callable, context, replace);
// TODO(jgruber): Simplified GetSubstitution implementation in CSA.
Node* const matched = CallStub(substring_callable, context, subject_string,
match_start_index, match_end_index);
Node* const replacement_string =
CallRuntime(Runtime::kGetSubstitution, context, matched, subject_string,
match_start_index, replace_string);
var_result.Bind(CallStub(stringadd_callable, context, var_result.value(),
replacement_string));
Node* const replacement =
GetSubstitution(context, subject_string, match_start_index,
match_end_index, replace_string);
var_result.Bind(
CallStub(stringadd_callable, context, var_result.value(), replacement));
Goto(&out);
}
......
......@@ -3009,6 +3009,13 @@ Node* CodeStubAssembler::IsSequentialStringInstanceType(Node* instance_type) {
Int32Constant(kSeqStringTag));
}
Node* CodeStubAssembler::IsConsStringInstanceType(Node* instance_type) {
CSA_ASSERT(this, IsStringInstanceType(instance_type));
return Word32Equal(
Word32And(instance_type, Int32Constant(kStringRepresentationMask)),
Int32Constant(kConsStringTag));
}
Node* CodeStubAssembler::IsExternalStringInstanceType(Node* instance_type) {
CSA_ASSERT(this, IsStringInstanceType(instance_type));
return Word32Equal(
......
......@@ -708,6 +708,7 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler {
Node* IsExternalStringInstanceType(Node* instance_type);
Node* IsShortExternalStringInstanceType(Node* instance_type);
Node* IsSequentialStringInstanceType(Node* instance_type);
Node* IsConsStringInstanceType(Node* instance_type);
Node* IsString(Node* object);
Node* IsJSObject(Node* object);
Node* IsJSGlobalProxy(Node* object);
......
......@@ -11440,8 +11440,10 @@ int String::IndexOf(Isolate* isolate, Handle<String> receiver,
}
MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
Handle<String> replacement) {
Handle<String> replacement,
int start_index) {
DCHECK_IMPLIES(match->HasNamedCaptures(), FLAG_harmony_regexp_named_captures);
DCHECK_GE(start_index, 0);
Factory* factory = isolate->factory();
......@@ -11452,7 +11454,8 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
Handle<String> dollar_string =
factory->LookupSingleCharacterStringFromCode('$');
int next_dollar_ix = String::IndexOf(isolate, replacement, dollar_string, 0);
int next_dollar_ix =
String::IndexOf(isolate, replacement, dollar_string, start_index);
if (next_dollar_ix < 0) {
return replacement;
}
......
......@@ -8203,8 +8203,11 @@ class String: public Name {
// GetSubstitution(matched, str, position, captures, replacement)
// Expand the $-expressions in the string and return a new string with
// the result.
// A {start_index} can be passed to specify where to start scanning the
// replacement string.
MUST_USE_RESULT static MaybeHandle<String> GetSubstitution(
Isolate* isolate, Match* match, Handle<String> replacement);
Isolate* isolate, Match* match, Handle<String> replacement,
int start_index = 0);
// String equality operations.
inline bool Equals(String* other);
......
......@@ -17,11 +17,12 @@ namespace internal {
RUNTIME_FUNCTION(Runtime_GetSubstitution) {
HandleScope scope(isolate);
DCHECK_EQ(4, args.length());
DCHECK_EQ(5, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, matched, 0);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
CONVERT_SMI_ARG_CHECKED(position, 2);
CONVERT_ARG_HANDLE_CHECKED(String, replacement, 3);
CONVERT_SMI_ARG_CHECKED(start_index, 4);
// A simple match without captures.
class SimpleMatch : public String::Match {
......@@ -58,7 +59,8 @@ RUNTIME_FUNCTION(Runtime_GetSubstitution) {
SimpleMatch match(matched, prefix, suffix);
RETURN_RESULT_OR_FAILURE(
isolate, String::GetSubstitution(isolate, &match, replacement));
isolate,
String::GetSubstitution(isolate, &match, replacement, start_index));
}
// This may return an empty MaybeHandle if an exception is thrown or
......
......@@ -519,7 +519,7 @@ namespace internal {
F(StoreLookupSlot_Strict, 2, 1)
#define FOR_EACH_INTRINSIC_STRINGS(F) \
F(GetSubstitution, 4, 1) \
F(GetSubstitution, 5, 1) \
F(StringReplaceOneCharWithString, 3, 1) \
F(StringIndexOf, 3, 1) \
F(StringIndexOfUnchecked, 3, 1) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment