Commit 183eb36b authored by peterwmwong's avatar peterwmwong Committed by Commit Bot

[builtins] Port String.prototype.{search, match} to CSA

- Expose fast paths for RegExpPrototypeMatchBody/RegExpPrototypeSearchBody as TFS builtins
- Add StringPrototypeMatch and StringPrototypeSearch TFJ builtins
  - Add StringMatchSearchAssembler to ensure same search/match behavior
- Remove functionality from string.js

A quick benchmark shows gains of 20-30% for unoptimized code and 0-20% for optimized code.
https://github.com/peterwmwong/v8-perf/blob/master/string-search-match/README.md

Bug: v8:5049
Change-Id: I0fffee6e94e62ecae049c9e5798da52d67ae1823
Reviewed-on: https://chromium-review.googlesource.com/707824
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48452}
parent 9d9048db
......@@ -1975,6 +1975,8 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
true);
SimpleInstallFunction(prototype, "localeCompare",
Builtins::kStringPrototypeLocaleCompare, 1, true);
SimpleInstallFunction(prototype, "match", Builtins::kStringPrototypeMatch,
1, true);
#ifdef V8_INTL_SUPPORT
SimpleInstallFunction(prototype, "normalize",
Builtins::kStringPrototypeNormalizeIntl, 0, false);
......@@ -1986,6 +1988,8 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
1, true);
SimpleInstallFunction(prototype, "replace",
Builtins::kStringPrototypeReplace, 2, true);
SimpleInstallFunction(prototype, "search", Builtins::kStringPrototypeSearch,
1, true);
SimpleInstallFunction(prototype, "slice", Builtins::kStringPrototypeSlice,
2, false);
SimpleInstallFunction(prototype, "small", Builtins::kStringPrototypeSmall,
......
......@@ -854,8 +854,10 @@ namespace internal {
TFJ(RegExpPrototypeSplit, SharedFunctionInfo::kDontAdaptArgumentsSentinel) \
/* RegExp helpers */ \
TFS(RegExpExecAtom, kRegExp, kString, kLastIndex, kMatchInfo) \
TFS(RegExpMatchFast, kReceiver, kPattern) \
TFS(RegExpPrototypeExecSlow, kReceiver, kString) \
TFS(RegExpReplace, kRegExp, kString, kReplaceValue) \
TFS(RegExpSearchFast, kReceiver, kPattern) \
TFS(RegExpSplit, kRegExp, kString, kLimit) \
\
/* Set */ \
......@@ -933,12 +935,16 @@ namespace internal {
CPP(StringPrototypeLastIndexOf) \
/* ES6 #sec-string.prototype.link */ \
TFJ(StringPrototypeLink, 1, kValue) \
/* ES6 #sec-string.prototype.match */ \
TFJ(StringPrototypeMatch, 1, kRegexp) \
/* ES6 #sec-string.prototype.localecompare */ \
CPP(StringPrototypeLocaleCompare) \
/* ES6 #sec-string.prototype.repeat */ \
TFJ(StringPrototypeRepeat, 1, kCount) \
/* ES6 #sec-string.prototype.replace */ \
TFJ(StringPrototypeReplace, 2, kSearch, kReplace) \
/* ES6 #sec-string.prototype.search */ \
TFJ(StringPrototypeSearch, 1, kRegexp) \
/* ES6 #sec-string.prototype.slice */ \
TFJ(StringPrototypeSlice, SharedFunctionInfo::kDontAdaptArgumentsSentinel) \
/* ES6 #sec-string.prototype.small */ \
......
......@@ -310,7 +310,7 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context,
#ifdef V8_INTERPRETED_REGEXP
return CallRuntime(Runtime::kRegExpExec, context, regexp, string, last_index,
match_info);
#else // V8_INTERPRETED_REGEXP
#else // V8_INTERPRETED_REGEXP
CSA_ASSERT(this, TaggedIsNotSmi(regexp));
CSA_ASSERT(this, IsJSRegExp(regexp));
......@@ -2153,12 +2153,25 @@ TF_BUILTIN(RegExpPrototypeMatch, RegExpBuiltinsAssembler) {
BranchIfFastRegExp(context, receiver, &fast_path, &slow_path);
BIND(&fast_path);
RegExpPrototypeMatchBody(context, receiver, string, true);
// TODO(pwong): Could be optimized to remove the overhead of calling the
// builtin (at the cost of a larger builtin).
Return(CallBuiltin(Builtins::kRegExpMatchFast, context, receiver, string));
BIND(&slow_path);
RegExpPrototypeMatchBody(context, receiver, string, false);
}
// Helper that skips a few initial checks. and assumes...
// 1) receiver is a "fast" RegExp
// 2) pattern is a string
TF_BUILTIN(RegExpMatchFast, RegExpBuiltinsAssembler) {
Node* const receiver = Parameter(Descriptor::kReceiver);
Node* const string = Parameter(Descriptor::kPattern);
Node* const context = Parameter(Descriptor::kContext);
RegExpPrototypeMatchBody(context, receiver, string, true);
}
void RegExpBuiltinsAssembler::RegExpPrototypeSearchBodyFast(
Node* const context, Node* const regexp, Node* const string) {
CSA_ASSERT(this, IsFastRegExp(context, regexp));
......@@ -2281,12 +2294,25 @@ TF_BUILTIN(RegExpPrototypeSearch, RegExpBuiltinsAssembler) {
BranchIfFastRegExp(context, receiver, &fast_path, &slow_path);
BIND(&fast_path);
RegExpPrototypeSearchBodyFast(context, receiver, string);
// TODO(pwong): Could be optimized to remove the overhead of calling the
// builtin (at the cost of a larger builtin).
Return(CallBuiltin(Builtins::kRegExpSearchFast, context, receiver, string));
BIND(&slow_path);
RegExpPrototypeSearchBodySlow(context, receiver, string);
}
// Helper that skips a few initial checks. and assumes...
// 1) receiver is a "fast" RegExp
// 2) pattern is a string
TF_BUILTIN(RegExpSearchFast, RegExpBuiltinsAssembler) {
Node* const receiver = Parameter(Descriptor::kReceiver);
Node* const string = Parameter(Descriptor::kPattern);
Node* const context = Parameter(Descriptor::kContext);
RegExpPrototypeSearchBodyFast(context, receiver, string);
}
// Generates the fast path for @@split. {regexp} is an unmodified, non-sticky
// JSRegExp, {string} is a String, and {limit} is a Smi.
void RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(Node* const context,
......
......@@ -1447,6 +1447,98 @@ TF_BUILTIN(StringPrototypeReplace, StringBuiltinsAssembler) {
}
}
class StringMatchSearchAssembler : public StringBuiltinsAssembler {
public:
explicit StringMatchSearchAssembler(compiler::CodeAssemblerState* state)
: StringBuiltinsAssembler(state) {}
protected:
enum Variant { kMatch, kSearch };
void Generate(Variant variant, const char* method_name, Node* const receiver,
Node* maybe_regexp, Node* const context) {
Label call_regexp_match_search(this);
Builtins::Name builtin;
Handle<Symbol> symbol;
if (variant == kMatch) {
builtin = Builtins::kRegExpMatchFast;
symbol = isolate()->factory()->match_symbol();
} else {
builtin = Builtins::kRegExpSearchFast;
symbol = isolate()->factory()->search_symbol();
}
RequireObjectCoercible(context, receiver, method_name);
MaybeCallFunctionAtSymbol(
context, maybe_regexp, symbol,
[=] {
Node* const receiver_string = ToString_Inline(context, receiver);
return CallBuiltin(builtin, context, maybe_regexp, receiver_string);
},
[=](Node* fn) {
Callable call_callable = CodeFactory::Call(isolate());
return CallJS(call_callable, context, fn, maybe_regexp, receiver);
});
// maybe_regexp is not a RegExp nor has [@@match / @@search] property.
{
RegExpBuiltinsAssembler regexp_asm(state());
Node* const receiver_string = ToString_Inline(context, receiver);
Node* const pattern = Select(
IsUndefined(maybe_regexp), [=] { return EmptyStringConstant(); },
[=] { return ToString_Inline(context, maybe_regexp); },
MachineRepresentation::kTagged);
// Create RegExp
// TODO(pwong): This could be factored out as a helper (RegExpCreate) that
// also does the "is fast" checks.
Node* const native_context = LoadNativeContext(context);
Node* const regexp_function =
LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX);
Node* const initial_map = LoadObjectField(
regexp_function, JSFunction::kPrototypeOrInitialMapOffset);
Node* const regexp = CallRuntime(
Runtime::kRegExpInitializeAndCompile, context,
AllocateJSObjectFromMap(initial_map), pattern, EmptyStringConstant());
Label fast_path(this), slow_path(this);
regexp_asm.BranchIfFastRegExp(context, regexp, initial_map, &fast_path,
&slow_path);
BIND(&fast_path);
Return(CallBuiltin(builtin, context, regexp, receiver_string));
BIND(&slow_path);
{
Node* const maybe_func = GetProperty(context, regexp, symbol);
Callable call_callable = CodeFactory::Call(isolate());
Return(CallJS(call_callable, context, maybe_func, regexp,
receiver_string));
}
}
}
};
// ES6 #sec-string.prototype.match
TF_BUILTIN(StringPrototypeMatch, StringMatchSearchAssembler) {
Node* const receiver = Parameter(Descriptor::kReceiver);
Node* const maybe_regexp = Parameter(Descriptor::kRegexp);
Node* const context = Parameter(Descriptor::kContext);
Generate(kMatch, "String.prototype.match", receiver, maybe_regexp, context);
}
// ES6 #sec-string.prototype.search
TF_BUILTIN(StringPrototypeSearch, StringMatchSearchAssembler) {
Node* const receiver = Parameter(Descriptor::kReceiver);
Node* const maybe_regexp = Parameter(Descriptor::kRegexp);
Node* const context = Parameter(Descriptor::kContext);
Generate(kSearch, "String.prototype.search", receiver, maybe_regexp, context);
}
// ES6 section 21.1.3.18 String.prototype.slice ( start, end )
TF_BUILTIN(StringPrototypeSlice, StringBuiltinsAssembler) {
Label out(this);
......
......@@ -12,53 +12,9 @@
// Imports
var GlobalString = global.String;
var matchSymbol = utils.ImportNow("match_symbol");
var searchSymbol = utils.ImportNow("search_symbol");
//-------------------------------------------------------------------
// Set up the non-enumerable functions on the String prototype object.
DEFINE_METHODS(
GlobalString.prototype,
{
/* ES#sec-string.prototype.match */
match(pattern) {
CHECK_OBJECT_COERCIBLE(this, "String.prototype.match");
if (!IS_NULL_OR_UNDEFINED(pattern)) {
var matcher = pattern[matchSymbol];
if (!IS_UNDEFINED(matcher)) {
return %_Call(matcher, pattern, this);
}
}
var subject = TO_STRING(this);
// Equivalent to RegExpCreate (ES#sec-regexpcreate)
var regexp = %RegExpCreate(pattern);
return regexp[matchSymbol](subject);
}
/* ES#sec-string.prototype.search */
search(pattern) {
CHECK_OBJECT_COERCIBLE(this, "String.prototype.search");
if (!IS_NULL_OR_UNDEFINED(pattern)) {
var searcher = pattern[searchSymbol];
if (!IS_UNDEFINED(searcher)) {
return %_Call(searcher, pattern, this);
}
}
var subject = TO_STRING(this);
// Equivalent to RegExpCreate (ES#sec-regexpcreate)
var regexp = %RegExpCreate(pattern);
return %_Call(regexp[searchSymbol], regexp, subject);
}
}
);
function StringPad(thisString, maxLength, fillString) {
maxLength = TO_LENGTH(maxLength);
var stringLength = thisString.length;
......
......@@ -902,33 +902,6 @@ RUNTIME_FUNCTION(Runtime_StringSplit) {
return *result;
}
// ES##sec-regexpcreate
// RegExpCreate ( P, F )
RUNTIME_FUNCTION(Runtime_RegExpCreate) {
HandleScope scope(isolate);
DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(Object, source_object, 0);
Handle<String> source;
if (source_object->IsUndefined(isolate)) {
source = isolate->factory()->empty_string();
} else {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, source, Object::ToString(isolate, source_object));
}
Handle<Map> map(isolate->regexp_function()->initial_map());
Handle<JSRegExp> regexp =
Handle<JSRegExp>::cast(isolate->factory()->NewJSObjectFromMap(map));
JSRegExp::Flags flags = JSRegExp::kNone;
RETURN_FAILURE_ON_EXCEPTION(isolate,
JSRegExp::Initialize(regexp, source, flags));
return *regexp;
}
RUNTIME_FUNCTION(Runtime_RegExpExec) {
HandleScope scope(isolate);
DCHECK_EQ(4, args.length());
......@@ -1946,6 +1919,9 @@ RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
HandleScope scope(isolate);
DCHECK_EQ(3, args.length());
// TODO(pwong): To follow the spec more closely and simplify calling code,
// this could handle the canonicalization of pattern and flags. See
// https://tc39.github.io/ecma262/#sec-regexpinitialize
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
......
......@@ -481,7 +481,6 @@ namespace internal {
#define FOR_EACH_INTRINSIC_REGEXP(F) \
F(IsRegExp, 1, 1) \
F(RegExpCreate, 1, 1) \
F(RegExpExec, 4, 1) \
F(RegExpExecMultiple, 4, 1) \
F(RegExpExecReThrow, 0, 1) \
......
......@@ -43,9 +43,11 @@ assertFalse(!!"..foo*bar".match(sticky));
var stickyplain = /foobar/y;
assertTrue(!!"foobar".match(stickyplain));
assertTrue(!!"foobarfoobar".match(stickyplain));
assertEquals(6, stickyplain.lastIndex);
assertFalse(!!"..foobar".match(stickyplain));
assertTrue(!!"foobarfoobar".match(stickyplain));
assertEquals(12, stickyplain.lastIndex);
assertFalse(!!"..foobarfoobar".match(stickyplain));
var global = /foo.bar/g;
......
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
var pattern = {};
const pattern = {};
pattern[Symbol.match] = function(string) {
return string.length;
};
......@@ -11,6 +11,9 @@ assertThrows(() => String.prototype.match.call(null, pattern),
TypeError);
// Override is called.
assertEquals(5, "abcde".match(pattern));
// Receiver is not converted to string if pattern has Symbol.match
const receiver = { toString(){ throw new Error(); }, length: 6 };
assertEquals(6, String.prototype.match.call(receiver, pattern));
// Non-callable override.
pattern[Symbol.match] = "dumdidum";
assertThrows(() => "abcde".match(pattern), TypeError);
......
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
var pattern = {};
const pattern = {};
pattern[Symbol.search] = function(string) {
return string.length;
};
......@@ -11,6 +11,9 @@ assertThrows(() => String.prototype.search.call(null, pattern),
TypeError);
// Override is called.
assertEquals(5, "abcde".search(pattern));
// Receiver is not converted to string if pattern has Symbol.match
const receiver = { toString(){ throw new Error(); }, length: 6 };
assertEquals(6, String.prototype.search.call(receiver, pattern));
// Non-callable override.
pattern[Symbol.search] = "dumdidum";
assertThrows(() => "abcde".search(pattern), TypeError);
......
......@@ -73,6 +73,14 @@ test(function() {
String.prototype.includes.call(null);
}, "String.prototype.includes called on null or undefined", TypeError);
test(function() {
String.prototype.match.call(null);
}, "String.prototype.match called on null or undefined", TypeError);
test(function() {
String.prototype.search.call(null);
}, "String.prototype.search called on null or undefined", TypeError);
test(function() {
Array.prototype.shift.call(null);
}, "Array.prototype.shift called on null or undefined", TypeError);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment