Commit f597eec1 authored by Benedikt Meurer's avatar Benedikt Meurer Committed by Commit Bot

[builtins] Support two byte strings in StringEqual builtin.

This CL adds support for two byte string comparisons to the StringEqual
builtin, which so far was bailing out to the generic %StringEqual
runtime function whenever any two-byte string was involved. This made
comparisons that involved two-byte strings, either comparing them to
one-byte strings or comparing two two-byte strings, up to 3x slower than
if only one-byte strings were involved.

With this change, all direct string (SeqString or ExternalString)
equality checks are roughly on par now, and the weird performance cliff
is gone. On the micro-benchmark from the bug we go from

  stringEqualBothOneByteSeqString: 162 ms.
  stringEqualTwoByteAndOneByteSeqString: 446 ms.
  stringEqualOneByteAndTwoByteSeqString: 438 ms.
  stringEqualBothTwoByteSeqString: 472 ms.

to

  stringEqualBothOneByteSeqString: 151 ms.
  stringEqualTwoByteAndOneByteSeqString: 158 ms.
  stringEqualOneByteAndTwoByteSeqString: 166 ms.
  stringEqualBothTwoByteSeqString: 160 ms.

which is the desired result. On the esprima test of the
web-tooling-benchmark we seem to improve by 1-2%, which corresponds to
the savings of going to the runtime for many StringEqual comparisons.

Drive-by-cleanup: Introduce LoadAndUntagStringLength helper into the CSA
with proper typing to avoid the unnecessary shifts on 64-bit platforms
when keeping the length tagged initially in StringEqual.

Bug: v8:4913, v8:6365, v8:6371, v8:6936, v8:7022
Change-Id: I566f4b80e217513775ffbd35e0480154abf59b27
Reviewed-on: https://chromium-review.googlesource.com/749223Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Commit-Queue: Benedikt Meurer <bmeurer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#49067}
parent c9168fc5
...@@ -2338,7 +2338,7 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant) { ...@@ -2338,7 +2338,7 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant) {
TNode<String> search_element_string = CAST(search_element); TNode<String> search_element_string = CAST(search_element);
Label continue_loop(this), next_iteration(this, &index_var), Label continue_loop(this), next_iteration(this, &index_var),
slow_compare(this), runtime(this, Label::kDeferred); slow_compare(this), runtime(this, Label::kDeferred);
Node* search_length = LoadStringLength(search_element_string); Node* search_length = LoadAndUntagStringLength(search_element_string);
Goto(&next_iteration); Goto(&next_iteration);
BIND(&next_iteration); BIND(&next_iteration);
GotoIfNot(UintPtrLessThan(index_var.value(), array_length), GotoIfNot(UintPtrLessThan(index_var.value(), array_length),
...@@ -2348,13 +2348,13 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant) { ...@@ -2348,13 +2348,13 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant) {
GotoIf(WordEqual(search_element_string, element_k), &return_found); GotoIf(WordEqual(search_element_string, element_k), &return_found);
Node* element_k_type = LoadInstanceType(element_k); Node* element_k_type = LoadInstanceType(element_k);
GotoIfNot(IsStringInstanceType(element_k_type), &continue_loop); GotoIfNot(IsStringInstanceType(element_k_type), &continue_loop);
Branch(WordEqual(search_length, LoadStringLength(element_k)), Branch(WordEqual(search_length, LoadAndUntagStringLength(element_k)),
&slow_compare, &continue_loop); &slow_compare, &continue_loop);
BIND(&slow_compare); BIND(&slow_compare);
StringBuiltinsAssembler string_asm(state()); StringBuiltinsAssembler string_asm(state());
string_asm.StringEqual_Core(context, search_element_string, search_type, string_asm.StringEqual_Core(context, search_element_string, search_type,
search_length, element_k, element_k_type, element_k, element_k_type, search_length,
&return_found, &continue_loop, &runtime); &return_found, &continue_loop, &runtime);
BIND(&runtime); BIND(&runtime);
TNode<Object> result = CallRuntime(Runtime::kStringEqual, context, TNode<Object> result = CallRuntime(Runtime::kStringEqual, context,
......
...@@ -162,33 +162,13 @@ void StringBuiltinsAssembler::ConvertAndBoundsCheckStartArgument( ...@@ -162,33 +162,13 @@ void StringBuiltinsAssembler::ConvertAndBoundsCheckStartArgument(
void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left, void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left,
Node* right) { Node* right) {
// Here's pseudo-code for the algorithm below:
//
// if (lhs->length() != rhs->length()) return false;
// restart:
// if (lhs == rhs) return true;
// if (lhs->IsInternalizedString() && rhs->IsInternalizedString()) {
// return false;
// }
// if (lhs->IsSeqOneByteString() && rhs->IsSeqOneByteString()) {
// for (i = 0; i != lhs->length(); ++i) {
// if (lhs[i] != rhs[i]) return false;
// }
// return true;
// }
// if (lhs and/or rhs are indirect strings) {
// unwrap them and restart from the "restart:" label;
// }
// return %StringEqual(lhs, rhs);
VARIABLE(var_left, MachineRepresentation::kTagged, left); VARIABLE(var_left, MachineRepresentation::kTagged, left);
VARIABLE(var_right, MachineRepresentation::kTagged, right); VARIABLE(var_right, MachineRepresentation::kTagged, right);
Variable* input_vars[2] = {&var_left, &var_right}; Label if_equal(this), if_notequal(this), if_indirect(this, Label::kDeferred),
Label if_equal(this), if_notequal(this), if_notbothdirectonebytestrings(this), restart(this, {&var_left, &var_right});
restart(this, 2, input_vars);
Node* lhs_length = LoadStringLength(left); Node* lhs_length = LoadAndUntagStringLength(left);
Node* rhs_length = LoadStringLength(right); Node* rhs_length = LoadAndUntagStringLength(right);
// Strings with different lengths cannot be equal. // Strings with different lengths cannot be equal.
GotoIf(WordNotEqual(lhs_length, rhs_length), &if_notequal); GotoIf(WordNotEqual(lhs_length, rhs_length), &if_notequal);
...@@ -201,11 +181,10 @@ void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left, ...@@ -201,11 +181,10 @@ void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left,
Node* lhs_instance_type = LoadInstanceType(lhs); Node* lhs_instance_type = LoadInstanceType(lhs);
Node* rhs_instance_type = LoadInstanceType(rhs); Node* rhs_instance_type = LoadInstanceType(rhs);
StringEqual_Core(context, lhs, lhs_instance_type, lhs_length, rhs, StringEqual_Core(context, lhs, lhs_instance_type, rhs, rhs_instance_type,
rhs_instance_type, &if_equal, &if_notequal, lhs_length, &if_equal, &if_notequal, &if_indirect);
&if_notbothdirectonebytestrings);
BIND(&if_notbothdirectonebytestrings); BIND(&if_indirect);
{ {
// Try to unwrap indirect strings, restart the above attempt on success. // Try to unwrap indirect strings, restart the above attempt on success.
MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right, MaybeDerefIndirectStrings(&var_left, lhs_instance_type, &var_right,
...@@ -223,13 +202,13 @@ void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left, ...@@ -223,13 +202,13 @@ void StringBuiltinsAssembler::GenerateStringEqual(Node* context, Node* left,
} }
void StringBuiltinsAssembler::StringEqual_Core( void StringBuiltinsAssembler::StringEqual_Core(
Node* context, Node* lhs, Node* lhs_instance_type, Node* lhs_length, Node* context, Node* lhs, Node* lhs_instance_type, Node* rhs,
Node* rhs, Node* rhs_instance_type, Label* if_equal, Label* if_not_equal, Node* rhs_instance_type, Node* length, Label* if_equal, Label* if_not_equal,
Label* if_notbothdirectonebyte) { Label* if_indirect) {
CSA_ASSERT(this, IsString(lhs)); CSA_ASSERT(this, IsString(lhs));
CSA_ASSERT(this, IsString(rhs)); CSA_ASSERT(this, IsString(rhs));
CSA_ASSERT(this, WordEqual(LoadStringLength(lhs), lhs_length)); CSA_ASSERT(this, WordEqual(LoadAndUntagStringLength(lhs), length));
CSA_ASSERT(this, WordEqual(LoadStringLength(rhs), lhs_length)); CSA_ASSERT(this, WordEqual(LoadAndUntagStringLength(rhs), length));
// Fast check to see if {lhs} and {rhs} refer to the same String object. // Fast check to see if {lhs} and {rhs} refer to the same String object.
GotoIf(WordEqual(lhs, rhs), if_equal); GotoIf(WordEqual(lhs, rhs), if_equal);
...@@ -248,33 +227,75 @@ void StringBuiltinsAssembler::StringEqual_Core( ...@@ -248,33 +227,75 @@ void StringBuiltinsAssembler::StringEqual_Core(
Int32Constant(kBothInternalizedTag)), Int32Constant(kBothInternalizedTag)),
if_not_equal); if_not_equal);
// Check that both {lhs} and {rhs} are flat one-byte strings, and that // Check if both {lhs} and {rhs} are direct strings, and that in case of
// in case of ExternalStrings the data pointer is cached.. // ExternalStrings the data pointer is cached.
STATIC_ASSERT(kShortExternalStringTag != 0); STATIC_ASSERT(kShortExternalStringTag != 0);
int const kBothDirectOneByteStringMask = STATIC_ASSERT(kIsIndirectStringTag != 0);
kStringEncodingMask | kIsIndirectStringMask | kShortExternalStringMask | int const kBothDirectStringMask =
((kStringEncodingMask | kIsIndirectStringMask | kShortExternalStringMask) kIsIndirectStringMask | kShortExternalStringMask |
<< 8); ((kIsIndirectStringMask | kShortExternalStringMask) << 8);
int const kBothDirectOneByteStringTag =
kOneByteStringTag | (kOneByteStringTag << 8);
GotoIfNot(Word32Equal(Word32And(both_instance_types, GotoIfNot(Word32Equal(Word32And(both_instance_types,
Int32Constant(kBothDirectOneByteStringMask)), Int32Constant(kBothDirectStringMask)),
Int32Constant(kBothDirectOneByteStringTag)), Int32Constant(0)),
if_notbothdirectonebyte); if_indirect);
// Dispatch based on the {lhs} and {rhs} string encoding.
int const kBothStringEncodingMask =
kStringEncodingMask | (kStringEncodingMask << 8);
int const kOneOneByteStringTag = kOneByteStringTag | (kOneByteStringTag << 8);
int const kTwoTwoByteStringTag = kTwoByteStringTag | (kTwoByteStringTag << 8);
int const kOneTwoByteStringTag = kOneByteStringTag | (kTwoByteStringTag << 8);
Label if_oneonebytestring(this), if_twotwobytestring(this),
if_onetwobytestring(this), if_twoonebytestring(this);
Node* masked_instance_types =
Word32And(both_instance_types, Int32Constant(kBothStringEncodingMask));
GotoIf(
Word32Equal(masked_instance_types, Int32Constant(kOneOneByteStringTag)),
&if_oneonebytestring);
GotoIf(
Word32Equal(masked_instance_types, Int32Constant(kTwoTwoByteStringTag)),
&if_twotwobytestring);
Branch(
Word32Equal(masked_instance_types, Int32Constant(kOneTwoByteStringTag)),
&if_onetwobytestring, &if_twoonebytestring);
BIND(&if_oneonebytestring);
StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint8(), rhs,
rhs_instance_type, MachineType::Uint8(), length, if_equal,
if_not_equal);
BIND(&if_twotwobytestring);
StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint16(), rhs,
rhs_instance_type, MachineType::Uint16(), length, if_equal,
if_not_equal);
BIND(&if_onetwobytestring);
StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint8(), rhs,
rhs_instance_type, MachineType::Uint16(), length, if_equal,
if_not_equal);
// At this point we know that we have two direct one-byte strings. BIND(&if_twoonebytestring);
StringEqual_Loop(lhs, lhs_instance_type, MachineType::Uint16(), rhs,
rhs_instance_type, MachineType::Uint8(), length, if_equal,
if_not_equal);
}
void StringBuiltinsAssembler::StringEqual_Loop(
Node* lhs, Node* lhs_instance_type, MachineType lhs_type, Node* rhs,
Node* rhs_instance_type, MachineType rhs_type, Node* length,
Label* if_equal, Label* if_not_equal) {
CSA_ASSERT(this, IsString(lhs));
CSA_ASSERT(this, IsString(rhs));
CSA_ASSERT(this, WordEqual(LoadAndUntagStringLength(lhs), length));
CSA_ASSERT(this, WordEqual(LoadAndUntagStringLength(rhs), length));
// Compute the effective offset of the first character. // Compute the effective offset of the first character.
Node* lhs_data = DirectStringData(lhs, lhs_instance_type); Node* lhs_data = DirectStringData(lhs, lhs_instance_type);
Node* rhs_data = DirectStringData(rhs, rhs_instance_type); Node* rhs_data = DirectStringData(rhs, rhs_instance_type);
// Compute the first offset after the string from the length.
Node* length = SmiUntag(lhs_length);
// Loop over the {lhs} and {rhs} strings to see if they are equal. // Loop over the {lhs} and {rhs} strings to see if they are equal.
VARIABLE(var_offset, MachineType::PointerRepresentation()); VARIABLE(var_offset, MachineType::PointerRepresentation(), IntPtrConstant(0));
Label loop(this, &var_offset); Label loop(this, &var_offset);
var_offset.Bind(IntPtrConstant(0));
Goto(&loop); Goto(&loop);
BIND(&loop); BIND(&loop);
{ {
...@@ -284,8 +305,12 @@ void StringBuiltinsAssembler::StringEqual_Core( ...@@ -284,8 +305,12 @@ void StringBuiltinsAssembler::StringEqual_Core(
GotoIf(WordEqual(offset, length), if_equal); GotoIf(WordEqual(offset, length), if_equal);
// Load the next characters from {lhs} and {rhs}. // Load the next characters from {lhs} and {rhs}.
Node* lhs_value = Load(MachineType::Uint8(), lhs_data, offset); Node* lhs_value =
Node* rhs_value = Load(MachineType::Uint8(), rhs_data, offset); Load(lhs_type, lhs_data,
WordShl(offset, ElementSizeLog2Of(lhs_type.representation())));
Node* rhs_value =
Load(rhs_type, rhs_data,
WordShl(offset, ElementSizeLog2Of(rhs_type.representation())));
// Check if the characters match. // Check if the characters match.
GotoIf(Word32NotEqual(lhs_value, rhs_value), if_not_equal); GotoIf(Word32NotEqual(lhs_value, rhs_value), if_not_equal);
...@@ -788,8 +813,8 @@ void StringBuiltinsAssembler::StringIndexOf( ...@@ -788,8 +813,8 @@ void StringBuiltinsAssembler::StringIndexOf(
VARIABLE(var_needle_byte, MachineType::PointerRepresentation(), int_zero); VARIABLE(var_needle_byte, MachineType::PointerRepresentation(), int_zero);
VARIABLE(var_string_addr, MachineType::PointerRepresentation(), int_zero); VARIABLE(var_string_addr, MachineType::PointerRepresentation(), int_zero);
Node* const search_length = SmiUntag(LoadStringLength(search_string)); Node* const search_length = LoadAndUntagStringLength(search_string);
Node* const subject_length = SmiUntag(LoadStringLength(subject_string)); Node* const subject_length = LoadAndUntagStringLength(subject_string);
Node* const start_position = IntPtrMax(SmiUntag(position), int_zero); Node* const start_position = IntPtrMax(SmiUntag(position), int_zero);
Label zero_length_needle(this), return_minus_1(this); Label zero_length_needle(this), return_minus_1(this);
...@@ -2093,7 +2118,7 @@ void StringTrimAssembler::Generate(String::TrimMode mode, ...@@ -2093,7 +2118,7 @@ void StringTrimAssembler::Generate(String::TrimMode mode,
// Check that {receiver} is coercible to Object and convert it to a String. // Check that {receiver} is coercible to Object and convert it to a String.
Node* const string = ToThisString(context, receiver, method_name); Node* const string = ToThisString(context, receiver, method_name);
Node* const string_length = SmiUntag(LoadStringLength(string)); Node* const string_length = LoadAndUntagStringLength(string);
ToDirectStringAssembler to_direct(state(), string); ToDirectStringAssembler to_direct(state(), string);
to_direct.TryToDirect(&if_runtime); to_direct.TryToDirect(&if_runtime);
......
...@@ -20,11 +20,15 @@ class StringBuiltinsAssembler : public CodeStubAssembler { ...@@ -20,11 +20,15 @@ class StringBuiltinsAssembler : public CodeStubAssembler {
Node* match_start_index, Node* match_end_index, Node* match_start_index, Node* match_end_index,
Node* replace_string); Node* replace_string);
void StringEqual_Core(Node* context, Node* lhs, Node* lhs_instance_type, void StringEqual_Core(Node* context, Node* lhs, Node* lhs_instance_type,
Node* lhs_length, Node* rhs, Node* rhs_instance_type, Node* rhs, Node* rhs_instance_type, Node* length,
Label* if_equal, Label* if_not_equal, Label* if_equal, Label* if_not_equal,
Label* if_notbothdirectonebyte); Label* if_indirect);
protected: protected:
void StringEqual_Loop(Node* lhs, Node* lhs_instance_type,
MachineType lhs_type, Node* rhs,
Node* rhs_instance_type, MachineType rhs_type,
Node* length, Label* if_equal, Label* if_not_equal);
Node* DirectStringData(Node* string, Node* string_instance_type); Node* DirectStringData(Node* string, Node* string_instance_type);
void DispatchOnStringEncodings(Node* const lhs_instance_type, void DispatchOnStringEncodings(Node* const lhs_instance_type,
......
...@@ -1459,6 +1459,11 @@ TNode<Smi> CodeStubAssembler::LoadStringLength(SloppyTNode<String> object) { ...@@ -1459,6 +1459,11 @@ TNode<Smi> CodeStubAssembler::LoadStringLength(SloppyTNode<String> object) {
return LoadObjectField<Smi>(object, String::kLengthOffset); return LoadObjectField<Smi>(object, String::kLengthOffset);
} }
TNode<IntPtrT> CodeStubAssembler::LoadAndUntagStringLength(
SloppyTNode<String> object) {
return SmiUntag(LoadStringLength(object));
}
Node* CodeStubAssembler::PointerToSeqStringData(Node* seq_string) { Node* CodeStubAssembler::PointerToSeqStringData(Node* seq_string) {
CSA_ASSERT(this, IsString(seq_string)); CSA_ASSERT(this, IsString(seq_string));
CSA_ASSERT(this, CSA_ASSERT(this,
...@@ -4422,7 +4427,7 @@ TNode<Uint32T> CodeStubAssembler::StringCharCodeAt( ...@@ -4422,7 +4427,7 @@ TNode<Uint32T> CodeStubAssembler::StringCharCodeAt(
// Translate the {index} into a Word. // Translate the {index} into a Word.
index = ParameterToWord(index, parameter_mode); index = ParameterToWord(index, parameter_mode);
CSA_ASSERT(this, IntPtrGreaterThanOrEqual(index, IntPtrConstant(0))); CSA_ASSERT(this, IntPtrGreaterThanOrEqual(index, IntPtrConstant(0)));
CSA_ASSERT(this, IntPtrLessThan(index, SmiUntag(LoadStringLength(string)))); CSA_ASSERT(this, IntPtrLessThan(index, LoadAndUntagStringLength(string)));
VARIABLE(var_result, MachineRepresentation::kWord32); VARIABLE(var_result, MachineRepresentation::kWord32);
......
...@@ -534,6 +534,8 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler { ...@@ -534,6 +534,8 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler {
// Load length field of a String object. // Load length field of a String object.
TNode<Smi> LoadStringLength(SloppyTNode<String> object); TNode<Smi> LoadStringLength(SloppyTNode<String> object);
// Load length field of a String object as intptr_t value.
TNode<IntPtrT> LoadAndUntagStringLength(SloppyTNode<String> object);
// Loads a pointer to the sequential String char array. // Loads a pointer to the sequential String char array.
Node* PointerToSeqStringData(Node* seq_string); Node* PointerToSeqStringData(Node* seq_string);
// Load value field of a JSValue object. // Load value field of a JSValue object.
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
(function() {
const s = '\u8765abc';
assertTrue(s === s);
assertFalse(s === 'abc');
assertFalse('abc' === s);
assertTrue(s.slice(-3) === 'abc');
assertTrue('abc' === s.slice(-3));
assertTrue(s.slice(0, 1) === '\u8765');
assertTrue('\u8765' === s.slice(0, 1));
assertTrue(s === '' + s);
assertTrue('' + s === s);
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment