Commit 2c3fe6d9 authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Migrate @@split to TurboFan

This shows around a 2.2x speedup compared to the old JS implementation (and
3.5x compared to CPP) for the fast path.

Adds ToUint32 to CodeStubAssembler.

BUG=v8:5339

Review-Url: https://codereview.chromium.org/2532403002
Cr-Commit-Position: refs/heads/master@{#41408}
parent 6b9c49ca
......@@ -1878,7 +1878,7 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
{
Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.split]"),
Builtins::kRegExpPrototypeSplit, 2, false);
Builtins::kRegExpPrototypeSplit, 2, true);
InstallFunction(prototype, fun, factory->split_symbol(), DONT_ENUM);
}
......
This diff is collapsed.
......@@ -610,7 +610,7 @@ namespace internal {
TFJ(RegExpPrototypeSearch, 1) \
CPP(RegExpPrototypeSourceGetter) \
CPP(RegExpPrototypeSpeciesGetter) \
CPP(RegExpPrototypeSplit) \
TFJ(RegExpPrototypeSplit, 2) \
TFJ(RegExpPrototypeStickyGetter, 0) \
TFJ(RegExpPrototypeTest, 1) \
CPP(RegExpPrototypeToString) \
......
......@@ -3892,6 +3892,108 @@ Node* CodeStubAssembler::ToNumber(Node* context, Node* input) {
return var_result.value();
}
Node* CodeStubAssembler::ToUint32(Node* context, Node* input) {
Node* const float_zero = Float64Constant(0.0);
Node* const float_two_32 = Float64Constant(static_cast<double>(1ULL << 32));
Label out(this);
Variable var_result(this, MachineRepresentation::kTagged);
var_result.Bind(input);
// Early exit for positive smis.
{
// TODO(jgruber): This branch and the recheck below can be removed once we
// have a ToNumber with multiple exits.
Label next(this, Label::kDeferred);
Branch(WordIsPositiveSmi(input), &out, &next);
Bind(&next);
}
Node* const number = ToNumber(context, input);
var_result.Bind(number);
// Perhaps we have a positive smi now.
{
Label next(this, Label::kDeferred);
Branch(WordIsPositiveSmi(number), &out, &next);
Bind(&next);
}
Label if_isnegativesmi(this), if_isheapnumber(this);
Branch(TaggedIsSmi(number), &if_isnegativesmi, &if_isheapnumber);
Bind(&if_isnegativesmi);
{
// floor({input}) mod 2^32 === {input} + 2^32.
Node* const float_number = SmiToFloat64(number);
Node* const float_result = Float64Add(float_number, float_two_32);
Node* const result = ChangeFloat64ToTagged(float_result);
var_result.Bind(result);
Goto(&out);
}
Bind(&if_isheapnumber);
{
Label return_zero(this);
Node* const value = LoadHeapNumberValue(number);
{
// +-0.
Label next(this);
Branch(Float64Equal(value, float_zero), &return_zero, &next);
Bind(&next);
}
{
// NaN.
Label next(this);
Branch(Float64Equal(value, value), &next, &return_zero);
Bind(&next);
}
{
// +Infinity.
Label next(this);
Node* const positive_infinity =
Float64Constant(std::numeric_limits<double>::infinity());
Branch(Float64Equal(value, positive_infinity), &return_zero, &next);
Bind(&next);
}
{
// -Infinity.
Label next(this);
Node* const negative_infinity =
Float64Constant(-1.0 * std::numeric_limits<double>::infinity());
Branch(Float64Equal(value, negative_infinity), &return_zero, &next);
Bind(&next);
}
// Return floor({input}) mod 2^32 (assuming mod semantics that always return
// positive results).
{
Node* x = Float64Floor(value);
x = Float64Mod(x, float_two_32);
x = Float64Add(x, float_two_32);
x = Float64Mod(x, float_two_32);
Node* const result = ChangeFloat64ToTagged(x);
var_result.Bind(result);
Goto(&out);
}
Bind(&return_zero);
{
var_result.Bind(SmiConstant(Smi::kZero));
Goto(&out);
}
}
Bind(&out);
return var_result.value();
}
Node* CodeStubAssembler::ToString(Node* context, Node* input) {
Label is_number(this);
Label runtime(this, Label::kDeferred);
......
......@@ -614,6 +614,11 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler {
// Convert any object to a Number.
Node* ToNumber(Node* context, Node* input);
// Converts |input| to one of 2^32 integer values in the range 0 through
// 2^32−1, inclusive.
// ES#sec-touint32
compiler::Node* ToUint32(compiler::Node* context, compiler::Node* input);
// Convert any object to a String.
Node* ToString(Node* context, Node* input);
......
......@@ -1269,6 +1269,223 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
isolate, subject, regexp, replace));
}
namespace {
// ES##sec-speciesconstructor
// SpeciesConstructor ( O, defaultConstructor )
MUST_USE_RESULT MaybeHandle<Object> SpeciesConstructor(
Isolate* isolate, Handle<JSReceiver> recv,
Handle<JSFunction> default_ctor) {
Handle<Object> ctor_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, ctor_obj,
JSObject::GetProperty(recv, isolate->factory()->constructor_string()),
Object);
if (ctor_obj->IsUndefined(isolate)) return default_ctor;
if (!ctor_obj->IsJSReceiver()) {
THROW_NEW_ERROR(isolate,
NewTypeError(MessageTemplate::kConstructorNotReceiver),
Object);
}
Handle<JSReceiver> ctor = Handle<JSReceiver>::cast(ctor_obj);
Handle<Object> species;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, species,
JSObject::GetProperty(ctor, isolate->factory()->species_symbol()),
Object);
if (species->IsNull(isolate) || species->IsUndefined(isolate)) {
return default_ctor;
}
if (species->IsConstructor()) return species;
THROW_NEW_ERROR(
isolate, NewTypeError(MessageTemplate::kSpeciesNotConstructor), Object);
}
MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
Handle<Object> object,
uint32_t* out) {
if (object->IsUndefined(isolate)) {
*out = kMaxUInt32;
return object;
}
Handle<Object> number;
ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
*out = NumberToUint32(*number);
return object;
}
Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
Handle<FixedArray> elems,
int num_elems) {
elems->Shrink(num_elems);
return isolate->factory()->NewJSArrayWithElements(elems);
}
} // namespace
// Slow path for:
// ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit) {
HandleScope scope(isolate);
DCHECK(args.length() == 3);
DCHECK(args[1]->IsString());
CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
Factory* factory = isolate->factory();
Handle<JSFunction> regexp_fun = isolate->regexp_function();
Handle<Object> ctor;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun));
Handle<Object> flags_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
Handle<String> flags;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
Object::ToString(isolate, flags_obj));
Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
Handle<String> new_flags = flags;
if (!sticky) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
factory->NewConsString(flags, y_str));
}
Handle<JSReceiver> splitter;
{
const int argc = 2;
ScopedVector<Handle<Object>> argv(argc);
argv[0] = recv;
argv[1] = new_flags;
Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor);
Handle<Object> splitter_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start()));
splitter = Handle<JSReceiver>::cast(splitter_obj);
}
uint32_t limit;
RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
const int length = string->length();
if (limit == 0) return *factory->NewJSArray(0);
if (length == 0) {
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
factory->undefined_value()));
if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
elems->set(0, *string);
return *factory->NewJSArrayWithElements(elems);
}
static const int kInitialArraySize = 8;
Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
int num_elems = 0;
int string_index = 0;
int prev_string_index = 0;
while (string_index < length) {
RETURN_FAILURE_ON_EXCEPTION(
isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
factory->undefined_value()));
if (result->IsNull(isolate)) {
string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
string_index, unicode);
continue;
}
Handle<Object> last_index_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
const int last_index = Handle<Smi>::cast(last_index_obj)->value();
const int end = std::min(last_index, length);
if (end == prev_string_index) {
string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
string_index, unicode);
continue;
}
{
Handle<String> substr =
factory->NewSubString(string, prev_string_index, string_index);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
if (static_cast<uint32_t>(num_elems) == limit) {
return *NewJSArrayWithElements(isolate, elems, num_elems);
}
}
prev_string_index = end;
Handle<Object> num_captures_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, num_captures_obj,
Object::GetProperty(result, isolate->factory()->length_string()));
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
const int num_captures =
std::max(Handle<Smi>::cast(num_captures_obj)->value(), 0);
for (int i = 1; i < num_captures; i++) {
Handle<Object> capture;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, capture, Object::GetElement(isolate, result, i));
elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
if (static_cast<uint32_t>(num_elems) == limit) {
return *NewJSArrayWithElements(isolate, elems, num_elems);
}
}
string_index = prev_string_index;
}
{
Handle<String> substr =
factory->NewSubString(string, prev_string_index, length);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
}
return *NewJSArrayWithElements(isolate, elems, num_elems);
}
// Slow path for:
// ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@replace ] ( string, replaceValue )
......
......@@ -462,6 +462,7 @@ namespace internal {
F(RegExpExecReThrow, 4, 1) \
F(RegExpInternalReplace, 3, 1) \
F(RegExpReplace, 3, 1) \
F(RegExpSplit, 3, 1) \
F(StringReplaceGlobalRegExpWithString, 4, 1) \
F(StringReplaceNonGlobalRegExpWithFunction, 3, 1) \
F(StringSplit, 3, 1)
......
......@@ -164,3 +164,11 @@ separator = { toString: function() { assertEquals(1, counter);
assertEquals(["a", "c"], String.prototype.split.call(subject, separator));
assertEquals(2, counter);
// Check ToUint32 conversion of limit.
assertArrayEquals(["a"], "a,b,c,d,e,f".split(/,/, -4294967295));
assertArrayEquals(["a"], "a,b,c,d,e,f".split(/,/, -4294967294.5));
assertArrayEquals(["a", "b"], "a,b,c,d,e,f".split(/,/, -4294967294));
assertArrayEquals(["a", "b", "c"], "a,b,c,d,e,f".split(/,/, -4294967293));
assertArrayEquals(["a", "b", "c", "d"], "a,b,c,d,e,f".split(/,/, -4294967292));
assertArrayEquals(["a", "b", "c", "d", "e", "f"], "a,b,c,d,e,f".split(/,/, -1));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment