Commit f597eec1 authored by Benedikt Meurer's avatar Benedikt Meurer Committed by Commit Bot

[builtins] Support two byte strings in StringEqual builtin.

This CL adds support for two byte string comparisons to the StringEqual
builtin, which so far was bailing out to the generic %StringEqual
runtime function whenever any two-byte string was involved. This made
comparisons that involved two-byte strings, either comparing them to
one-byte strings or comparing two two-byte strings, up to 3x slower than
if only one-byte strings were involved.

With this change, all direct string (SeqString or ExternalString)
equality checks are roughly on par now, and the weird performance cliff
is gone. On the micro-benchmark from the bug we go from

  stringEqualBothOneByteSeqString: 162 ms.
  stringEqualTwoByteAndOneByteSeqString: 446 ms.
  stringEqualOneByteAndTwoByteSeqString: 438 ms.
  stringEqualBothTwoByteSeqString: 472 ms.

to

  stringEqualBothOneByteSeqString: 151 ms.
  stringEqualTwoByteAndOneByteSeqString: 158 ms.
  stringEqualOneByteAndTwoByteSeqString: 166 ms.
  stringEqualBothTwoByteSeqString: 160 ms.

which is the desired result. On the esprima test of the
web-tooling-benchmark we seem to improve by 1-2%, which corresponds to
the savings of going to the runtime for many StringEqual comparisons.

Drive-by-cleanup: Introduce LoadAndUntagStringLength helper into the CSA
with proper typing to avoid the unnecessary shifts on 64-bit platforms
when keeping the length tagged initially in StringEqual.

Bug: v8:4913, v8:6365, v8:6371, v8:6936, v8:7022
Change-Id: I566f4b80e217513775ffbd35e0480154abf59b27
Reviewed-on: https://chromium-review.googlesource.com/749223Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Commit-Queue: Benedikt Meurer <bmeurer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#49067}
parent c9168fc5
......@@ -2338,7 +2338,7 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant) {
TNode<String> search_element_string = CAST(search_element);
Label continue_loop(this), next_iteration(this, &index_var),
slow_compare(this), runtime(this, Label::kDeferred);
Node* search_length = LoadStringLength(search_element_string);
Node* search_length = LoadAndUntagStringLength(search_element_string);
Goto(&next_iteration);
BIND(&next_iteration);
GotoIfNot(UintPtrLessThan(index_var.value(), array_length),
......@@ -2348,13 +2348,13 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant) {
GotoIf(WordEqual(search_element_string, element_k), &return_found);
Node* element_k_type = LoadInstanceType(element_k);
GotoIfNot(IsStringInstanceType(element_k_type), &continue_loop);
Branch(WordEqual(search_length, LoadStringLength(element_k)),
Branch(WordEqual(search_length, LoadAndUntagStringLength(element_k)),
&slow_compare, &continue_loop);
BIND(&slow_compare);
StringBuiltinsAssembler string_asm(state());
string_asm.StringEqual_Core(context, search_element_string, search_type,
search_length, element_k, element_k_type,
element_k, element_k_type, search_length,
&return_found, &continue_loop, &runtime);
BIND(&runtime);
TNode<Object> result = CallRuntime(Runtime::kStringEqual, context,
......
......@@ -162,33 +162,13 @@ void StringBuiltinsAssembler::ConvertAndBoundsCheckStartArgument(
void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left,
Node* right) {
// Here's pseudo-code for the algorithm below:
//
// if (lhs->length() != rhs->length()) return false;
// restart:
// if (lhs == rhs) return true;
// if (lhs->IsInternalizedString() && rhs->IsInternalizedString()) {
// return false;
// }
// if (lhs->IsSeqOneByteString() && rhs->IsSeqOneByteString()) {
// for (i = 0; i != lhs->length(); ++i) {
// if (lhs[i] != rhs[i]) return false;
// }
// return true;
// }
// if (lhs and/or rhs are indirect strings) {
// unwrap them and restart from the "restart:" label;
// }
// return %StringEqual(lhs, rhs);
VARIABLE(var_left, MachineRepresentation::kTagged, left);
VARIABLE(var_right, MachineRepresentation::kTagged, right);
Variable* input_vars[2] = {&var_left, &var_right};
Label if_equal(this), if_notequal(this), if_notbothdirectonebytestrings(this),
restart(this, 2, input_vars);
Label if_equal(this), if_notequal(this), if_indirect(this, Label::kDeferred),
restart(this, {&var_left, &var_right});
Node* lhs_length = LoadStringLength(left);
Node* rhs_length = LoadStringLength(right);
Node* lhs_length = LoadAndUntagStringLength(left);
Node* rhs_length = LoadAndUntagStringLength(right);
// Strings with different lengths cannot be equal.
GotoIf(WordNotEqual(lhs_length, rhs_length), &if_notequal);
......@@ -201,11 +181,10 @@ void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left,
Node* lhs_instance_type = LoadInstanceType(lhs);
Node* rhs_instance_type = LoadInstanceType(rhs);
StringEqual_Core(context, lhs, lhs_instance_type, lhs_length, rhs,
rhs_instance_type, &if_equal, &if_notequal,
&if_notbothdirectonebytestrings);
StringEqual_Core(context, lhs, lhs_instance_type, rhs, rhs_instance_type,
lhs_length, &if_equal, &if_notequal, &if_indirect);
BIND(&if_notbothdirectonebytestrings);
BIND(&if_indirect);
{
// Try to unwrap indirect strings, restart the above attempt on success.
MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
......@@ -223,13 +202,13 @@ void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left,
}
void StringBuiltinsAssembler::StringEqual_Core(
Node* context, Node* lhs, Node* lhs_instance_type, Node* lhs_length,
Node* rhs, Node* rhs_instance_type, Label* if_equal, Label* if_not_equal,
Label* if_notbothdirectonebyte) {
Node* context, Node* lhs, Node* lhs_instance_type, Node* rhs,
Node* rhs_instance_type, Node* length, Label* if_equal, Label* if_not_equal,
Label* if_indirect) {
CSA_ASSERT(this, IsString(lhs));
CSA_ASSERT(this, IsString(rhs));
CSA_ASSERT(this, WordEqual(LoadStringLength(lhs), lhs_length));
CSA_ASSERT(this, WordEqual(LoadStringLength(rhs), lhs_length));
CSA_ASSERT(this, WordEqual(LoadAndUntagStringLength(lhs), length));
CSA_ASSERT(this, WordEqual(LoadAndUntagStringLength(rhs), length));
// Fast check to see if {lhs} and {rhs} refer to the same String object.
GotoIf(WordEqual(lhs, rhs), if_equal);
......@@ -248,33 +227,75 @@ void StringBuiltinsAssembler::StringEqual_Core(
Int32Constant(kBothInternalizedTag)),
if_not_equal);
// Check that both {lhs} and {rhs} are flat one-byte strings, and that
// in case of ExternalStrings the data pointer is cached..
// Check if both {lhs} and {rhs} are direct strings, and that in case of
// ExternalStrings the data pointer is cached.
STATIC_ASSERT(kShortExternalStringTag != 0);
int const kBothDirectOneByteStringMask =
kStringEncodingMask | kIsIndirectStringMask | kShortExternalStringMask |
((kStringEncodingMask | kIsIndirectStringMask | kShortExternalStringMask)
<< 8);
int const kBothDirectOneByteStringTag =
kOneByteStringTag | (kOneByteStringTag << 8);
STATIC_ASSERT(kIsIndirectStringTag != 0);
int const kBothDirectStringMask =
kIsIndirectStringMask | kShortExternalStringMask |
((kIsIndirectStringMask | kShortExternalStringMask) << 8);
GotoIfNot(Word32Equal(Word32And(both_instance_types,
Int32Constant(kBothDirectOneByteStringMask)),
Int32Constant(kBothDirectOneByteStringTag)),
if_notbothdirectonebyte);
// At this point we know that we have two direct one-byte strings.
Int32Constant(kBothDirectStringMask)),
Int32Constant(0)),
if_indirect);
// Dispatch based on the {lhs} and {rhs} string encoding.
int const kBothStringEncodingMask =
kStringEncodingMask | (kStringEncodingMask << 8);
int const kOneOneByteStringTag = kOneByteStringTag | (kOneByteStringTag << 8);
int const kTwoTwoByteStringTag = kTwoByteStringTag | (kTwoByteStringTag << 8);
int const kOneTwoByteStringTag = kOneByteStringTag | (kTwoByteStringTag << 8);
Label if_oneonebytestring(this), if_twotwobytestring(this),
if_onetwobytestring(this), if_twoonebytestring(this);
Node* masked_instance_types =
Word32And(both_instance_types, Int32Constant(kBothStringEncodingMask));
GotoIf(
Word32Equal(masked_instance_types, Int32Constant(kOneOneByteStringTag)),
&if_oneonebytestring);
GotoIf(
Word32Equal(masked_instance_types, Int32Constant(kTwoTwoByteStringTag)),
&if_twotwobytestring);
Branch(
Word32Equal(masked_instance_types, Int32Constant(kOneTwoByteStringTag)),
&if_onetwobytestring, &if_twoonebytestring);
BIND(&if_oneonebytestring);
StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint8(), rhs,
rhs_instance_type, MachineType::Uint8(), length, if_equal,
if_not_equal);
BIND(&if_twotwobytestring);
StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint16(), rhs,
rhs_instance_type, MachineType::Uint16(), length, if_equal,
if_not_equal);
BIND(&if_onetwobytestring);
StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint8(), rhs,
rhs_instance_type, MachineType::Uint16(), length, if_equal,
if_not_equal);
BIND(&if_twoonebytestring);
StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint16(), rhs,
rhs_instance_type, MachineType::Uint8(), length, if_equal,
if_not_equal);
}
void StringBuiltinsAssembler::StringEqual_Loop(
Node* lhs, Node* lhs_instance_type, MachineType lhs_type, Node* rhs,
Node* rhs_instance_type, MachineType rhs_type, Node* length,
Label* if_equal, Label* if_not_equal) {
CSA_ASSERT(this, IsString(lhs));
CSA_ASSERT(this, IsString(rhs));
CSA_ASSERT(this, WordEqual(LoadAndUntagStringLength(lhs), length));
CSA_ASSERT(this, WordEqual(LoadAndUntagStringLength(rhs), length));
// Compute the effective offset of the first character.
Node* lhs_data = DirectStringData(lhs, lhs_instance_type);
Node* rhs_data = DirectStringData(rhs, rhs_instance_type);
// Compute the first offset after the string from the length.
Node* length = SmiUntag(lhs_length);
// Loop over the {lhs} and {rhs} strings to see if they are equal.
VARIABLE(var_offset, MachineType::PointerRepresentation());
VARIABLE(var_offset, MachineType::PointerRepresentation(), IntPtrConstant(0));
Label loop(this, &var_offset);
var_offset.Bind(IntPtrConstant(0));
Goto(&loop);
BIND(&loop);
{
......@@ -284,8 +305,12 @@ void StringBuiltinsAssembler::StringEqual_Core(
GotoIf(WordEqual(offset, length), if_equal);
// Load the next characters from {lhs} and {rhs}.
Node* lhs_value = Load(MachineType::Uint8(), lhs_data, offset);
Node* rhs_value = Load(MachineType::Uint8(), rhs_data, offset);
Node* lhs_value =
Load(lhs_type, lhs_data,
WordShl(offset, ElementSizeLog2Of(lhs_type.representation())));
Node* rhs_value =
Load(rhs_type, rhs_data,
WordShl(offset, ElementSizeLog2Of(rhs_type.representation())));
// Check if the characters match.
GotoIf(Word32NotEqual(lhs_value, rhs_value), if_not_equal);
......@@ -788,8 +813,8 @@ void StringBuiltinsAssembler::StringIndexOf(
VARIABLE(var_needle_byte, MachineType::PointerRepresentation(), int_zero);
VARIABLE(var_string_addr, MachineType::PointerRepresentation(), int_zero);
Node* const search_length = SmiUntag(LoadStringLength(search_string));
Node* const subject_length = SmiUntag(LoadStringLength(subject_string));
Node* const search_length = LoadAndUntagStringLength(search_string);
Node* const subject_length = LoadAndUntagStringLength(subject_string);
Node* const start_position = IntPtrMax(SmiUntag(position), int_zero);
Label zero_length_needle(this), return_minus_1(this);
......@@ -2093,7 +2118,7 @@ void StringTrimAssembler::Generate(String::TrimMode mode,
// Check that {receiver} is coercible to Object and convert it to a String.
Node* const string = ToThisString(context, receiver, method_name);
Node* const string_length = SmiUntag(LoadStringLength(string));
Node* const string_length = LoadAndUntagStringLength(string);
ToDirectStringAssembler to_direct(state(), string);
to_direct.TryToDirect(&if_runtime);
......
......@@ -20,11 +20,15 @@ class StringBuiltinsAssembler : public CodeStubAssembler {
Node* match_start_index, Node* match_end_index,
Node* replace_string);
void StringEqual_Core(Node* context, Node* lhs, Node* lhs_instance_type,
Node* lhs_length, Node* rhs, Node* rhs_instance_type,
Node* rhs, Node* rhs_instance_type, Node* length,
Label* if_equal, Label* if_not_equal,
Label* if_notbothdirectonebyte);
Label* if_indirect);
protected:
void StringEqual_Loop(Node* lhs, Node* lhs_instance_type,
MachineType lhs_type, Node* rhs,
Node* rhs_instance_type, MachineType rhs_type,
Node* length, Label* if_equal, Label* if_not_equal);
Node* DirectStringData(Node* string, Node* string_instance_type);
void DispatchOnStringEncodings(Node* const lhs_instance_type,
......
......@@ -1459,6 +1459,11 @@ TNode<Smi> CodeStubAssembler::LoadStringLength(SloppyTNode<String> object) {
return LoadObjectField<Smi>(object, String::kLengthOffset);
}
TNode<IntPtrT> CodeStubAssembler::LoadAndUntagStringLength(
SloppyTNode<String> object) {
return SmiUntag(LoadStringLength(object));
}
Node* CodeStubAssembler::PointerToSeqStringData(Node* seq_string) {
CSA_ASSERT(this, IsString(seq_string));
CSA_ASSERT(this,
......@@ -4422,7 +4427,7 @@ TNode<Uint32T> CodeStubAssembler::StringCharCodeAt(
// Translate the {index} into a Word.
index = ParameterToWord(index, parameter_mode);
CSA_ASSERT(this, IntPtrGreaterThanOrEqual(index, IntPtrConstant(0)));
CSA_ASSERT(this, IntPtrLessThan(index, SmiUntag(LoadStringLength(string))));
CSA_ASSERT(this, IntPtrLessThan(index, LoadAndUntagStringLength(string)));
VARIABLE(var_result, MachineRepresentation::kWord32);
......
......@@ -534,6 +534,8 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler {
// Load length field of a String object.
TNode<Smi> LoadStringLength(SloppyTNode<String> object);
// Load length field of a String object as intptr_t value.
TNode<IntPtrT> LoadAndUntagStringLength(SloppyTNode<String> object);
// Loads a pointer to the sequential String char array.
Node* PointerToSeqStringData(Node* seq_string);
// Load value field of a JSValue object.
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
(function() {
const s = '\u8765abc';
assertTrue(s === s);
assertFalse(s === 'abc');
assertFalse('abc' === s);
assertTrue(s.slice(-3) === 'abc');
assertTrue('abc' === s.slice(-3));
assertTrue(s.slice(0, 1) === '\u8765');
assertTrue('\u8765' === s.slice(0, 1));
assertTrue(s === '' + s);
assertTrue('' + s === s);
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment