Commit 9ed9dff9 authored by Leszek Swirski's avatar Leszek Swirski Committed by V8 LUCI CQ

Revert "[builtins] use SIMD IndexOf/includes on large arrays"

This reverts commit ab76ffc8.

Reason for revert: Breaks on UBSan: https://ci.chromium.org/ui/p/v8/builders/ci/V8%20Linux64%20UBSan/21444/overview

Original change's description:
> [builtins] use SIMD IndexOf/includes on large arrays
>
> Change-Id: If751e813c7f45a4d18b84e8c0314a54c84894d61
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3639203
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Commit-Queue: Darius Mercadier <dmercadier@chromium.org>
> Reviewed-by: Toon Verwaest <verwaest@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#80771}

Change-Id: I62dd4249df122af567680cbaa18deb3c9ed44c90
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3672416
Owners-Override: Leszek Swirski <leszeks@chromium.org>
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Auto-Submit: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80773}
parent a7aba02a
......@@ -1819,8 +1819,6 @@ filegroup(
"src/objects/shared-function-info-inl.h",
"src/objects/shared-function-info.cc",
"src/objects/shared-function-info.h",
"src/objects/simd.cc",
"src/objects/simd.h",
"src/objects/slots-atomic-inl.h",
"src/objects/slots-inl.h",
"src/objects/slots.h",
......
......@@ -3315,7 +3315,6 @@ v8_header_set("v8_internal_headers") {
"src/objects/script.h",
"src/objects/shared-function-info-inl.h",
"src/objects/shared-function-info.h",
"src/objects/simd.h",
"src/objects/slots-atomic-inl.h",
"src/objects/slots-inl.h",
"src/objects/slots.h",
......@@ -4456,7 +4455,6 @@ v8_source_set("v8_base_without_compiler") {
"src/objects/property.cc",
"src/objects/scope-info.cc",
"src/objects/shared-function-info.cc",
"src/objects/simd.cc",
"src/objects/source-text-module.cc",
"src/objects/string-comparator.cc",
"src/objects/string-table.cc",
......
......@@ -11,12 +11,10 @@
#include "src/builtins/builtins.h"
#include "src/codegen/code-stub-assembler.h"
#include "src/codegen/interface-descriptors-inl.h"
#include "src/codegen/tnode.h"
#include "src/execution/frame-constants.h"
#include "src/heap/factory-inl.h"
#include "src/objects/allocation-site-inl.h"
#include "src/objects/arguments-inl.h"
#include "src/objects/elements-kind.h"
#include "src/objects/property-cell.h"
namespace v8 {
......@@ -590,15 +588,12 @@ class ArrayIncludesIndexofAssembler : public CodeStubAssembler {
enum SearchVariant { kIncludes, kIndexOf };
enum class SimpleElementKind { kSmiOrHole, kAny };
void Generate(SearchVariant variant, TNode<IntPtrT> argc,
TNode<Context> context);
void GenerateSmiOrObject(SearchVariant variant, TNode<Context> context,
TNode<FixedArray> elements,
TNode<Object> search_element,
TNode<Smi> array_length, TNode<Smi> from_index,
SimpleElementKind array_kind);
TNode<Smi> array_length, TNode<Smi> from_index);
void GeneratePackedDoubles(SearchVariant variant,
TNode<FixedDoubleArray> elements,
TNode<Object> search_element,
......@@ -614,22 +609,6 @@ class ArrayIncludesIndexofAssembler : public CodeStubAssembler {
Return(value);
BIND(&done);
}
private:
// Use SIMD code for arrays larger than kSIMDThreshold (in builtins that have
// SIMD implementations).
const int kSIMDThreshold = 48;
// For now, we can vectorize if:
// - SSE3/AVX are present (x86/x64). Note that if __AVX__ is defined, then
// __SSE3__ will be as well, so we just check __SSE3__.
// - Neon is present and the architecture is 64-bit (because Neon on 32-bit
// architecture lacks some instructions).
#if defined(__SSE3__) || defined(V8_HOST_ARCH_ARM64)
const bool kCanVectorize = true;
#else
const bool kCanVectorize = false;
#endif
};
void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant,
......@@ -702,8 +681,7 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant,
GotoIf(IntPtrGreaterThanOrEqual(index_var.value(), array_length_untagged),
&return_not_found);
Label if_smi(this), if_smiorobjects(this), if_packed_doubles(this),
if_holey_doubles(this);
Label if_smiorobjects(this), if_packed_doubles(this), if_holey_doubles(this);
TNode<Int32T> elements_kind = LoadElementsKind(array);
TNode<FixedArrayBase> elements = LoadElements(array);
......@@ -711,8 +689,6 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant,
static_assert(HOLEY_SMI_ELEMENTS == 1);
static_assert(PACKED_ELEMENTS == 2);
static_assert(HOLEY_ELEMENTS == 3);
GotoIf(IsElementsKindLessThanOrEqual(elements_kind, HOLEY_SMI_ELEMENTS),
&if_smi);
GotoIf(IsElementsKindLessThanOrEqual(elements_kind, HOLEY_ELEMENTS),
&if_smiorobjects);
GotoIf(
......@@ -725,16 +701,6 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant,
&if_smiorobjects);
Goto(&return_not_found);
BIND(&if_smi);
{
Callable callable = Builtins::CallableFor(
isolate(), (variant == kIncludes) ? Builtin::kArrayIncludesSmi
: Builtin::kArrayIndexOfSmi);
TNode<Object> result = CallStub(callable, context, elements, search_element,
array_length, SmiTag(index_var.value()));
args.PopAndReturn(result);
}
BIND(&if_smiorobjects);
{
Callable callable = (variant == kIncludes)
......@@ -794,7 +760,7 @@ void ArrayIncludesIndexofAssembler::Generate(SearchVariant variant,
void ArrayIncludesIndexofAssembler::GenerateSmiOrObject(
SearchVariant variant, TNode<Context> context, TNode<FixedArray> elements,
TNode<Object> search_element, TNode<Smi> array_length,
TNode<Smi> from_index, SimpleElementKind array_kind) {
TNode<Smi> from_index) {
TVARIABLE(IntPtrT, index_var, SmiUntag(from_index));
TVARIABLE(Float64T, search_num);
TNode<IntPtrT> array_length_untagged = SmiUntag(array_length);
......@@ -821,27 +787,7 @@ void ArrayIncludesIndexofAssembler::GenerateSmiOrObject(
TNode<Uint16T> search_type = LoadMapInstanceType(map);
GotoIf(IsStringInstanceType(search_type), &string_loop);
GotoIf(IsBigIntInstanceType(search_type), &bigint_loop);
if (kCanVectorize) {
Label simd_call(this);
Branch(
UintPtrLessThan(array_length_untagged, IntPtrConstant(kSIMDThreshold)),
&ident_loop, &simd_call);
BIND(&simd_call);
TNode<ExternalReference> simd_function = ExternalConstant(
ExternalReference::array_indexof_includes_smi_or_object());
TNode<IntPtrT> result = UncheckedCast<IntPtrT>(CallCFunction(
simd_function, MachineType::UintPtr(),
std::make_pair(MachineType::TaggedPointer(), elements),
std::make_pair(MachineType::UintPtr(), array_length_untagged),
std::make_pair(MachineType::UintPtr(), index_var.value()),
std::make_pair(MachineType::TaggedPointer(), search_element)));
index_var = ReinterpretCast<IntPtrT>(result);
Branch(IntPtrLessThan(index_var.value(), IntPtrConstant(0)),
&return_not_found, &return_found);
} else {
Goto(&ident_loop);
}
Goto(&ident_loop);
BIND(&ident_loop);
{
......@@ -873,31 +819,7 @@ void ArrayIncludesIndexofAssembler::GenerateSmiOrObject(
{
Label nan_loop(this, &index_var), not_nan_loop(this, &index_var);
Label* nan_handling = variant == kIncludes ? &nan_loop : &return_not_found;
GotoIfNot(Float64Equal(search_num.value(), search_num.value()),
nan_handling);
if (kCanVectorize && array_kind == SimpleElementKind::kSmiOrHole) {
Label smi_check(this), simd_call(this);
Branch(UintPtrLessThan(array_length_untagged,
IntPtrConstant(kSIMDThreshold)),
&not_nan_loop, &smi_check);
BIND(&smi_check);
Branch(TaggedIsSmi(search_element), &simd_call, &not_nan_loop);
BIND(&simd_call);
TNode<ExternalReference> simd_function = ExternalConstant(
ExternalReference::array_indexof_includes_smi_or_object());
TNode<IntPtrT> result = UncheckedCast<IntPtrT>(CallCFunction(
simd_function, MachineType::UintPtr(),
std::make_pair(MachineType::TaggedPointer(), elements),
std::make_pair(MachineType::UintPtr(), array_length_untagged),
std::make_pair(MachineType::UintPtr(), index_var.value()),
std::make_pair(MachineType::TaggedPointer(), search_element)));
index_var = ReinterpretCast<IntPtrT>(result);
Branch(IntPtrLessThan(index_var.value(), IntPtrConstant(0)),
&return_not_found, &return_found);
} else {
Goto(&not_nan_loop);
}
BranchIfFloat64IsNaN(search_num.value(), nan_handling, &not_nan_loop);
BIND(&not_nan_loop);
{
......@@ -1015,15 +937,15 @@ void ArrayIncludesIndexofAssembler::GeneratePackedDoubles(
TVARIABLE(IntPtrT, index_var, SmiUntag(from_index));
TNode<IntPtrT> array_length_untagged = SmiUntag(array_length);
Label nan_loop(this, &index_var), not_nan_case(this),
not_nan_loop(this, &index_var), hole_loop(this, &index_var),
search_notnan(this), return_found(this), return_not_found(this);
Label nan_loop(this, &index_var), not_nan_loop(this, &index_var),
hole_loop(this, &index_var), search_notnan(this), return_found(this),
return_not_found(this);
TVARIABLE(Float64T, search_num);
search_num = Float64Constant(0);
GotoIfNot(TaggedIsSmi(search_element), &search_notnan);
search_num = SmiToFloat64(CAST(search_element));
Goto(&not_nan_case);
Goto(&not_nan_loop);
BIND(&search_notnan);
GotoIfNot(IsHeapNumber(CAST(search_element)), &return_not_found);
......@@ -1031,29 +953,7 @@ void ArrayIncludesIndexofAssembler::GeneratePackedDoubles(
search_num = LoadHeapNumberValue(CAST(search_element));
Label* nan_handling = variant == kIncludes ? &nan_loop : &return_not_found;
BranchIfFloat64IsNaN(search_num.value(), nan_handling, &not_nan_case);
BIND(&not_nan_case);
if (kCanVectorize) {
Label simd_call(this);
Branch(
UintPtrLessThan(array_length_untagged, IntPtrConstant(kSIMDThreshold)),
&not_nan_loop, &simd_call);
BIND(&simd_call);
TNode<ExternalReference> simd_function =
ExternalConstant(ExternalReference::array_indexof_includes_double());
TNode<IntPtrT> result = UncheckedCast<IntPtrT>(CallCFunction(
simd_function, MachineType::UintPtr(),
std::make_pair(MachineType::TaggedPointer(), elements),
std::make_pair(MachineType::UintPtr(), array_length_untagged),
std::make_pair(MachineType::UintPtr(), index_var.value()),
std::make_pair(MachineType::TaggedPointer(), search_element)));
index_var = ReinterpretCast<IntPtrT>(result);
Branch(IntPtrLessThan(index_var.value(), IntPtrConstant(0)),
&return_not_found, &return_found);
} else {
Goto(&not_nan_loop);
}
BranchIfFloat64IsNaN(search_num.value(), nan_handling, &not_nan_loop);
BIND(&not_nan_loop);
{
......@@ -1105,15 +1005,15 @@ void ArrayIncludesIndexofAssembler::GenerateHoleyDoubles(
TVARIABLE(IntPtrT, index_var, SmiUntag(from_index));
TNode<IntPtrT> array_length_untagged = SmiUntag(array_length);
Label nan_loop(this, &index_var), not_nan_case(this),
not_nan_loop(this, &index_var), hole_loop(this, &index_var),
search_notnan(this), return_found(this), return_not_found(this);
Label nan_loop(this, &index_var), not_nan_loop(this, &index_var),
hole_loop(this, &index_var), search_notnan(this), return_found(this),
return_not_found(this);
TVARIABLE(Float64T, search_num);
search_num = Float64Constant(0);
GotoIfNot(TaggedIsSmi(search_element), &search_notnan);
search_num = SmiToFloat64(CAST(search_element));
Goto(&not_nan_case);
Goto(&not_nan_loop);
BIND(&search_notnan);
if (variant == kIncludes) {
......@@ -1124,29 +1024,7 @@ void ArrayIncludesIndexofAssembler::GenerateHoleyDoubles(
search_num = LoadHeapNumberValue(CAST(search_element));
Label* nan_handling = variant == kIncludes ? &nan_loop : &return_not_found;
BranchIfFloat64IsNaN(search_num.value(), nan_handling, &not_nan_case);
BIND(&not_nan_case);
if (kCanVectorize) {
Label simd_call(this);
Branch(
UintPtrLessThan(array_length_untagged, IntPtrConstant(kSIMDThreshold)),
&not_nan_loop, &simd_call);
BIND(&simd_call);
TNode<ExternalReference> simd_function =
ExternalConstant(ExternalReference::array_indexof_includes_double());
TNode<IntPtrT> result = UncheckedCast<IntPtrT>(CallCFunction(
simd_function, MachineType::UintPtr(),
std::make_pair(MachineType::TaggedPointer(), elements),
std::make_pair(MachineType::UintPtr(), array_length_untagged),
std::make_pair(MachineType::UintPtr(), index_var.value()),
std::make_pair(MachineType::TaggedPointer(), search_element)));
index_var = ReinterpretCast<IntPtrT>(result);
Branch(IntPtrLessThan(index_var.value(), IntPtrConstant(0)),
&return_not_found, &return_found);
} else {
Goto(&not_nan_loop);
}
BranchIfFloat64IsNaN(search_num.value(), nan_handling, &not_nan_loop);
BIND(&not_nan_loop);
{
......@@ -1220,17 +1098,6 @@ TF_BUILTIN(ArrayIncludes, ArrayIncludesIndexofAssembler) {
Generate(kIncludes, argc, context);
}
TF_BUILTIN(ArrayIncludesSmi, ArrayIncludesIndexofAssembler) {
auto context = Parameter<Context>(Descriptor::kContext);
auto elements = Parameter<FixedArray>(Descriptor::kElements);
auto search_element = Parameter<Object>(Descriptor::kSearchElement);
auto array_length = Parameter<Smi>(Descriptor::kLength);
auto from_index = Parameter<Smi>(Descriptor::kFromIndex);
GenerateSmiOrObject(kIncludes, context, elements, search_element,
array_length, from_index, SimpleElementKind::kSmiOrHole);
}
TF_BUILTIN(ArrayIncludesSmiOrObject, ArrayIncludesIndexofAssembler) {
auto context = Parameter<Context>(Descriptor::kContext);
auto elements = Parameter<FixedArray>(Descriptor::kElements);
......@@ -1239,7 +1106,7 @@ TF_BUILTIN(ArrayIncludesSmiOrObject, ArrayIncludesIndexofAssembler) {
auto from_index = Parameter<Smi>(Descriptor::kFromIndex);
GenerateSmiOrObject(kIncludes, context, elements, search_element,
array_length, from_index, SimpleElementKind::kAny);
array_length, from_index);
}
TF_BUILTIN(ArrayIncludesPackedDoubles, ArrayIncludesIndexofAssembler) {
......@@ -1272,17 +1139,6 @@ TF_BUILTIN(ArrayIndexOf, ArrayIncludesIndexofAssembler) {
Generate(kIndexOf, argc, context);
}
TF_BUILTIN(ArrayIndexOfSmi, ArrayIncludesIndexofAssembler) {
auto context = Parameter<Context>(Descriptor::kContext);
auto elements = Parameter<FixedArray>(Descriptor::kElements);
auto search_element = Parameter<Object>(Descriptor::kSearchElement);
auto array_length = Parameter<Smi>(Descriptor::kLength);
auto from_index = Parameter<Smi>(Descriptor::kFromIndex);
GenerateSmiOrObject(kIndexOf, context, elements, search_element, array_length,
from_index, SimpleElementKind::kSmiOrHole);
}
TF_BUILTIN(ArrayIndexOfSmiOrObject, ArrayIncludesIndexofAssembler) {
auto context = Parameter<Context>(Descriptor::kContext);
auto elements = Parameter<FixedArray>(Descriptor::kElements);
......@@ -1291,7 +1147,7 @@ TF_BUILTIN(ArrayIndexOfSmiOrObject, ArrayIncludesIndexofAssembler) {
auto from_index = Parameter<Smi>(Descriptor::kFromIndex);
GenerateSmiOrObject(kIndexOf, context, elements, search_element, array_length,
from_index, SimpleElementKind::kAny);
from_index);
}
TF_BUILTIN(ArrayIndexOfPackedDoubles, ArrayIncludesIndexofAssembler) {
......
......@@ -364,7 +364,6 @@ namespace internal {
/* ES6 #sec-array.prototype.fill */ \
CPP(ArrayPrototypeFill) \
/* ES7 #sec-array.prototype.includes */ \
TFS(ArrayIncludesSmi, kElements, kSearchElement, kLength, kFromIndex) \
TFS(ArrayIncludesSmiOrObject, kElements, kSearchElement, kLength, \
kFromIndex) \
TFS(ArrayIncludesPackedDoubles, kElements, kSearchElement, kLength, \
......@@ -373,7 +372,6 @@ namespace internal {
kFromIndex) \
TFJ(ArrayIncludes, kDontAdaptArgumentsSentinel) \
/* ES6 #sec-array.prototype.indexof */ \
TFS(ArrayIndexOfSmi, kElements, kSearchElement, kLength, kFromIndex) \
TFS(ArrayIndexOfSmiOrObject, kElements, kSearchElement, kLength, kFromIndex) \
TFS(ArrayIndexOfPackedDoubles, kElements, kSearchElement, kLength, \
kFromIndex) \
......
......@@ -29,7 +29,6 @@
#include "src/objects/object-type.h"
#include "src/objects/objects-inl.h"
#include "src/objects/ordered-hash-table.h"
#include "src/objects/simd.h"
#include "src/regexp/experimental/experimental.h"
#include "src/regexp/regexp-interpreter.h"
#include "src/regexp/regexp-macro-assembler-arch.h"
......@@ -1003,9 +1002,6 @@ FUNCTION_REFERENCE(try_string_to_index_or_lookup_existing,
FUNCTION_REFERENCE(string_from_forward_table,
StringForwardingTable::GetForwardStringAddress)
FUNCTION_REFERENCE(string_to_array_index_function, String::ToArrayIndex)
FUNCTION_REFERENCE(array_indexof_includes_smi_or_object,
ArrayIndexOfIncludesSmiOrObject)
FUNCTION_REFERENCE(array_indexof_includes_double, ArrayIndexOfIncludesDouble)
static Address LexicographicCompareWrapper(Isolate* isolate, Address smi_x,
Address smi_y) {
......
......@@ -186,9 +186,6 @@ class StatsCounter;
V(external_two_byte_string_get_chars, "external_two_byte_string_get_chars") \
V(smi_lexicographic_compare_function, "smi_lexicographic_compare_function") \
V(string_to_array_index_function, "String::ToArrayIndex") \
V(array_indexof_includes_smi_or_object, \
"array_indexof_includes_smi_or_object") \
V(array_indexof_includes_double, "array_indexof_includes_double") \
V(try_string_to_index_or_lookup_existing, \
"try_string_to_index_or_lookup_existing") \
V(string_from_forward_table, "string_from_forward_table") \
......
......@@ -7,7 +7,6 @@
#include <functional>
#include "src/api/api-inl.h"
#include "src/base/cpu.h"
#include "src/base/small-vector.h"
#include "src/builtins/builtins-promise.h"
#include "src/builtins/builtins-utils.h"
......@@ -2156,7 +2155,6 @@ Callable GetCallableForArrayIndexOfIncludes(ArrayIndexOfIncludesVariant variant,
switch (elements_kind) {
case PACKED_SMI_ELEMENTS:
case HOLEY_SMI_ELEMENTS:
return Builtins::CallableFor(isolate, Builtin::kArrayIndexOfSmi);
case PACKED_ELEMENTS:
case HOLEY_ELEMENTS:
return Builtins::CallableFor(isolate,
......@@ -2174,7 +2172,6 @@ Callable GetCallableForArrayIndexOfIncludes(ArrayIndexOfIncludesVariant variant,
switch (elements_kind) {
case PACKED_SMI_ELEMENTS:
case HOLEY_SMI_ELEMENTS:
return Builtins::CallableFor(isolate, Builtin::kArrayIncludesSmi);
case PACKED_ELEMENTS:
case HOLEY_ELEMENTS:
return Builtins::CallableFor(isolate,
......@@ -2192,6 +2189,7 @@ Callable GetCallableForArrayIndexOfIncludes(ArrayIndexOfIncludesVariant variant,
}
} // namespace
TNode<Object>
IteratingArrayBuiltinReducerAssembler::ReduceArrayPrototypeIndexOfIncludes(
ElementsKind kind, ArrayIndexOfIncludesVariant variant) {
......@@ -2229,6 +2227,7 @@ IteratingArrayBuiltinReducerAssembler::ReduceArrayPrototypeIndexOfIncludes(
return Call4(GetCallableForArrayIndexOfIncludes(variant, kind, isolate()),
context, elements, search_element, length, from_index);
}
namespace {
struct PromiseCtorFrameStateParams {
......
......@@ -1072,11 +1072,9 @@ static bool TransitivelyCalledBuiltinHasNoSideEffect(Builtin caller,
case Builtin::kArrayForEachLoopContinuation:
case Builtin::kArrayIncludesHoleyDoubles:
case Builtin::kArrayIncludesPackedDoubles:
case Builtin::kArrayIncludesSmi:
case Builtin::kArrayIncludesSmiOrObject:
case Builtin::kArrayIndexOfHoleyDoubles:
case Builtin::kArrayIndexOfPackedDoubles:
case Builtin::kArrayIndexOfSmi:
case Builtin::kArrayIndexOfSmiOrObject:
case Builtin::kArrayMapLoopContinuation:
case Builtin::kArrayReduceLoopContinuation:
......
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/objects/simd.h"
#include "src/base/cpu.h"
#include "src/objects/compressed-slots.h"
#include "src/objects/fixed-array-inl.h"
#include "src/objects/heap-number-inl.h"
#include "src/objects/smi-inl.h"
#ifdef _MSC_VER
// MSVC doesn't define SSE3. However, it does define AVX, and AVX implies SSE3.
#ifdef __AVX__
#define __SSE3__
#endif
#endif
#ifdef __SSE3__
#include <immintrin.h>
#endif
#ifdef V8_HOST_ARCH_ARM64
// We use Neon only on 64-bit ARM (because on 32-bit, some instructions and some
// types are not available). Note that ARM64 is guaranteed to have Neon.
#define NEON64
#include <arm_neon.h>
#endif
namespace v8 {
namespace internal {
namespace {
enum class SimdKinds { kSSE, kNeon, kAVX2, kNone };
inline SimdKinds get_vectorization_kind() {
#ifdef __SSE3__
static base::CPU cpu;
if (cpu.has_avx2()) {
return SimdKinds::kAVX2;
} else {
// No need for a runtime check since we do not support x86/x64 CPUs without
// SSE3.
return SimdKinds::kSSE;
}
#elif defined(NEON64)
// No need for a runtime check since all Arm64 CPUs have Neon.
return SimdKinds::kNeon;
#else
return SimdKinds::kNone;
#endif
}
// Searches for |search_element| in |array| using a simple non-vectorized linear
// search. This is used as a fall-back when SIMD are not available, and to
// process the end of arrays than SIMD cannot process.
template <typename T>
inline uintptr_t slow_search(T* array, uintptr_t array_len, uintptr_t index,
T search_element) {
for (; index < array_len; index++) {
if (array[index] == search_element) {
return index;
}
}
return -1;
}
#ifdef NEON64
// extract_first_nonzero_index returns the first non-zero index in |v|. |v| is a
// Neon vector that can be either 32x4 (the return is then 0, 1, 2 or 3) or 64x2
// (the return is then 0 or 1). This is more or less equivalent to doing a
// movemask followed by a tzcnt on Intel.
//
// The input |v| should be a vector of -1 or 0 (for instance {0, 0},
// {0, -1, 0, -1}, {0, -1, 0, 0}), where -1 represents a match (and 0 a
// non-match), that was obtained by doing a vceqq. This function extract the
// index of the first non-zero item of the vector. To do so, we "and" the vector
// with {4, 3, 2, 1} (each number is "4 - the index of the item it's in"), which
// produces a vector of "indices or 0". Then, we extract the maximum of this
// vector, which is the index of the 1st match. An example:
//
// v = {-1, 0, 0, -1}
// mask = {4, 3, 2, 1}
// v & mask = {4, 0, 0, 1}
// max(v & mask) = 4
// index of the first match = 4-max = 4-4 = 0
//
template <typename T>
inline int extract_first_nonzero_index(T v) {
UNREACHABLE();
}
template <>
inline int extract_first_nonzero_index(int32x4_t v) {
int32x4_t mask = {4, 3, 2, 1};
mask = vandq_u32(mask, v);
return 4 - vmaxvq_u32(mask);
}
template <>
inline int extract_first_nonzero_index(int64x2_t v) {
int32x4_t mask = {2, 0, 1, 0}; // Could also be {2,2,1,1} or {0,2,0,1}
mask = vandq_u32(mask, vreinterpretq_s32_s64(v));
return 2 - vmaxvq_u32(mask);
}
template <>
inline int extract_first_nonzero_index(float64x2_t v) {
int32x4_t mask = {2, 0, 1, 0}; // Could also be {2,2,1,1} or {0,2,0,1}
mask = vandq_u32(mask, vreinterpretq_s32_f64(v));
return 2 - vmaxvq_u32(mask);
}
#endif
#define VECTORIZED_LOOP_Neon(type_load, type_eq, set1, cmp, movemask) \
{ \
constexpr int elems_in_vector = sizeof(type_load) / sizeof(T); \
type_load search_element_vec = set1(search_element); \
\
for (; index + elems_in_vector <= array_len; index += elems_in_vector) { \
type_load vector = *reinterpret_cast<type_load*>(&array[index]); \
type_eq eq = cmp(vector, search_element_vec); \
if (movemask(eq)) { \
return index + extract_first_nonzero_index(eq); \
} \
} \
}
#define VECTORIZED_LOOP_x86(type_load, type_eq, set1, cmp, movemask, extract) \
{ \
constexpr int elems_in_vector = sizeof(type_load) / sizeof(T); \
type_load search_element_vec = set1(search_element); \
\
for (; index + elems_in_vector <= array_len; index += elems_in_vector) { \
type_load vector = *reinterpret_cast<type_load*>(&array[index]); \
type_eq eq = cmp(vector, search_element_vec); \
int eq_mask = movemask(eq); \
if (eq_mask) { \
return index + extract(eq_mask); \
} \
} \
}
// Uses SIMD to vectorize the search loop. This function should only be called
// for large-ish arrays. Note that nothing will break if |array_len| is less
// than vectorization_threshold: things will just be slower than necessary.
template <typename T>
inline uintptr_t fast_search_noavx(T* array, uintptr_t array_len,
uintptr_t index, T search_element) {
static_assert(std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value ||
std::is_same<T, double>::value);
#if !(defined(__SSE3__) || defined(NEON64))
// No SIMD available.
return slow_search(array, array_len, index, search_element);
#endif
#ifdef __SSE3__
const int target_align = 16;
#elif defined(NEON64)
const int target_align = 16;
#else
const int target_align = 4;
UNREACHABLE();
#endif
// Scalar loop to reach desired alignment
for (;
index < array_len &&
(reinterpret_cast<std::uintptr_t>(&(array[index])) % target_align) != 0;
index++) {
if (array[index] == search_element) {
return index;
}
}
// Inserting one of the vectorized loop
#ifdef __SSE3__
if constexpr (std::is_same<T, uint32_t>::value) {
#define MOVEMASK(x) _mm_movemask_ps(_mm_castsi128_ps(x))
#define EXTRACT(x) base::bits::CountTrailingZeros32(x)
VECTORIZED_LOOP_x86(__m128i, __m128i, _mm_set1_epi32, _mm_cmpeq_epi32,
MOVEMASK, EXTRACT)
#undef MOVEMASK
#undef EXTRACT
} else if constexpr (std::is_same<T, uint64_t>::value) {
#define SET1(x) _mm_castsi128_ps(_mm_set1_epi64x(x))
#define CMP(a, b) _mm_cmpeq_pd(_mm_castps_pd(a), _mm_castps_pd(b))
#define EXTRACT(x) base::bits::CountTrailingZeros32(x)
VECTORIZED_LOOP_x86(__m128, __m128d, SET1, CMP, _mm_movemask_pd, EXTRACT)
#undef SET1
#undef CMP
#undef EXTRACT
} else if constexpr (std::is_same<T, double>::value) {
#define EXTRACT(x) base::bits::CountTrailingZeros32(x)
VECTORIZED_LOOP_x86(__m128d, __m128d, _mm_set1_pd, _mm_cmpeq_pd,
_mm_movemask_pd, EXTRACT)
#undef EXTRACT
}
#elif defined(NEON64)
if constexpr (std::is_same<T, uint32_t>::value) {
VECTORIZED_LOOP_Neon(int32x4_t, int32x4_t, vdupq_n_u32, vceqq_u32,
vmaxvq_u32)
} else if constexpr (std::is_same<T, uint64_t>::value) {
VECTORIZED_LOOP_Neon(int64x2_t, int64x2_t, vdupq_n_u64, vceqq_u64,
vmaxvq_u32)
} else if constexpr (std::is_same<T, double>::value) {
VECTORIZED_LOOP_Neon(float64x2_t, float64x2_t, vdupq_n_f64, vceqq_f64,
vmaxvq_f64)
}
#else
UNREACHABLE();
#endif
// The vectorized loop stops when there are not enough items left in the array
// to fill a vector register. The slow_search function will take care of
// iterating through the few remaining items.
return slow_search(array, array_len, index, search_element);
}
#if defined(_MSC_VER) && defined(__clang__)
// Generating AVX2 code with Clang on Windows without the /arch:AVX2 flag does
// not seem possible at the moment.
#define IS_CLANG_WIN 1
#endif
// Since we don't compile with -mavx or -mavx2 (or /arch:AVX2 on MSVC), Clang
// and MSVC do not define __AVX__ nor __AVX2__. Thus, if __SSE3__ is defined, we
// generate the AVX2 code, and, at runtime, we'll decide to call it or not,
// depending on whether the CPU supports AVX2.
#if defined(__SSE3__) && !defined(_M_IX86) && !defined(IS_CLANG_WIN)
#ifdef _MSC_VER
#define TARGET_AVX2
#else
#define TARGET_AVX2 __attribute__((target("avx2")))
#endif
template <typename T>
TARGET_AVX2 inline uintptr_t fast_search_avx(T* array, uintptr_t array_len,
uintptr_t index,
T search_element) {
static_assert(std::is_same<T, uint32_t>::value ||
std::is_same<T, uint64_t>::value ||
std::is_same<T, double>::value);
const int target_align = 32;
// Scalar loop to reach desired alignment
for (;
index < array_len &&
(reinterpret_cast<std::uintptr_t>(&(array[index])) % target_align) != 0;
index++) {
if (array[index] == search_element) {
return index;
}
}
// Generating vectorized loop
if constexpr (std::is_same<T, uint32_t>::value) {
#define MOVEMASK(x) _mm256_movemask_ps(_mm256_castsi256_ps(x))
#define EXTRACT(x) base::bits::CountTrailingZeros32(x)
VECTORIZED_LOOP_x86(__m256i, __m256i, _mm256_set1_epi32, _mm256_cmpeq_epi32,
MOVEMASK, EXTRACT)
#undef MOVEMASK
#undef EXTRACT
} else if constexpr (std::is_same<T, uint64_t>::value) {
#define MOVEMASK(x) _mm256_movemask_pd(_mm256_castsi256_pd(x))
#define EXTRACT(x) base::bits::CountTrailingZeros32(x)
VECTORIZED_LOOP_x86(__m256i, __m256i, _mm256_set1_epi64x,
_mm256_cmpeq_epi64, MOVEMASK, EXTRACT)
#undef MOVEMASK
#undef EXTRACT
} else if constexpr (std::is_same<T, double>::value) {
#define CMP(a, b) _mm256_cmp_pd(a, b, _CMP_EQ_OQ)
#define EXTRACT(x) base::bits::CountTrailingZeros32(x)
VECTORIZED_LOOP_x86(__m256d, __m256d, _mm256_set1_pd, CMP,
_mm256_movemask_pd, EXTRACT)
#undef CMP
#undef EXTRACT
}
// The vectorized loop stops when there are not enough items left in the array
// to fill a vector register. The slow_search function will take care of
// iterating through the few remaining items.
return slow_search(array, array_len, index, search_element);
}
#undef TARGET_AVX2
#elif defined(IS_CLANG_WIN)
template <typename T>
inline uintptr_t fast_search_avx(T* array, uintptr_t array_len, uintptr_t index,
T search_element) {
// Falling back to SSE version
return fast_search_noavx(array, array_len, index, search_element);
}
#else
template <typename T>
uintptr_t fast_search_avx(T* array, uintptr_t array_len, uintptr_t index,
T search_element) {
UNREACHABLE();
}
#endif // ifdef __SSE3__
#undef IS_CLANG_WIN
#undef VECTORIZED_LOOP_Neon
#undef VECTORIZED_LOOP_x86
template <typename T>
inline uintptr_t search(T* array, uintptr_t array_len, uintptr_t index,
T search_element) {
if (get_vectorization_kind() == SimdKinds::kAVX2) {
return fast_search_avx(array, array_len, index, search_element);
} else {
return fast_search_noavx(array, array_len, index, search_element);
}
}
enum class ArrayIndexOfIncludesKind { DOUBLE, OBJECTORSMI };
// ArrayIndexOfIncludes only handles cases that can be efficiently
// vectorized:
//
// * Searching for a Smi in a Smi array
//
// * Searching for a Smi or Double in a Double array
//
// * Searching for an object in an object array.
//
// Other cases should be dealt with either with the CSA builtin or with the
// inlined optimized code.
template <ArrayIndexOfIncludesKind kind>
Address ArrayIndexOfIncludes(Address array_start, uintptr_t array_len,
uintptr_t from_index, Address search_element) {
if (array_len == 0) {
return Smi::FromInt(-1).ptr();
}
if constexpr (kind == ArrayIndexOfIncludesKind::DOUBLE) {
FixedDoubleArray fixed_array = FixedDoubleArray::cast(Object(array_start));
double* array = static_cast<double*>(
fixed_array.RawField(FixedDoubleArray::OffsetOfElementAt(0))
.ToVoidPtr());
double search_num;
if (Object(search_element).IsSmi()) {
search_num = Object(search_element).ToSmi().value();
} else {
DCHECK(Object(search_element).IsHeapNumber());
search_num = HeapNumber::cast(Object(search_element)).value();
}
DCHECK(!std::isnan(search_num));
return search<double>(array, array_len, from_index, search_num);
}
if constexpr (kind == ArrayIndexOfIncludesKind::OBJECTORSMI) {
FixedArray fixed_array = FixedArray::cast(Object(array_start));
Tagged_t* array =
static_cast<Tagged_t*>(fixed_array.data_start().ToVoidPtr());
DCHECK(!Object(search_element).IsHeapNumber());
DCHECK(!Object(search_element).IsBigInt());
DCHECK(!Object(search_element).IsString());
return search<Tagged_t>(array, array_len, from_index,
static_cast<Tagged_t>(search_element));
}
}
} // namespace
uintptr_t ArrayIndexOfIncludesSmiOrObject(Address array_start,
uintptr_t array_len,
uintptr_t from_index,
Address search_element) {
return ArrayIndexOfIncludes<ArrayIndexOfIncludesKind::OBJECTORSMI>(
array_start, array_len, from_index, search_element);
}
uintptr_t ArrayIndexOfIncludesDouble(Address array_start, uintptr_t array_len,
uintptr_t from_index,
Address search_element) {
return ArrayIndexOfIncludes<ArrayIndexOfIncludesKind::DOUBLE>(
array_start, array_len, from_index, search_element);
}
#ifdef NEON64
#undef NEON64
#endif
} // namespace internal
} // namespace v8
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_OBJECTS_SIMD_H_
#define V8_OBJECTS_SIMD_H_
#include <cstdint>
#include "include/v8-internal.h"
namespace v8 {
namespace internal {
uintptr_t ArrayIndexOfIncludesSmiOrObject(Address array_start,
uintptr_t array_len,
uintptr_t from_index,
Address search_element);
uintptr_t ArrayIndexOfIncludesDouble(Address array_start, uintptr_t array_len,
uintptr_t from_index,
Address search_element);
} // namespace internal
} // namespace v8
#endif // V8_OBJECTS_SIMD_H_
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Large array of packed Smi, and Smi search_element
(() => {
let a = [];
for (let i = 0; i < 200; i++) {
a[i] = i;
}
function testArrayIncludes(idx) {
return a.includes(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
assertEquals(true, testArrayIncludes(i));
}
// With fromIndex
for (let i = 0, from_index = 0; i+from_index < 200; i += 2, from_index++) {
assertEquals(true, testArrayIncludes(i, from_index));
}
})();
// Large array of holey Smi, and Smi search_element
(() => {
let a = [];
// Skipping every other item when initializing
for (let i = 0; i < 200; i+=2) {
a[i] = i;
}
function testArrayIncludes(idx) {
return a.includes(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
if (i % 2 == 0) {
assertEquals(true, testArrayIncludes(i));
} else {
assertEquals(false, testArrayIncludes(i));
}
}
// With fromIndex
for (let i = 0, from_index = 0; i + from_index < 200; i += 2, from_index++) {
if (i % 2 == 0) {
assertEquals(true, testArrayIncludes(i, from_index));
} else {
assertEquals(false, testArrayIncludes(i, from_index));
}
}
})();
// Large array of packed Doubles, and Double search_element
(() => {
let a = [];
for (let i = 0; i < 200; i++) {
a[i] = i + 0.5;
}
function testArrayIncludes(idx) {
return a.includes(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
assertEquals(true, testArrayIncludes(i + 0.5));
}
// With fromIndex
for (let i = 0, from_index = 0; i+from_index < 200; i += 2, from_index++) {
assertEquals(true, testArrayIncludes(i+0.5, from_index));
}
})();
// Large array of holey Doubles, and Double search_element
(() => {
let a = [];
// Skipping every other item when initializing
for (let i = 0; i < 200; i+=2) {
a[i] = i + 0.5;
}
function testArrayIncludes(idx) {
return a.includes(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
if (i % 2 == 0) {
assertEquals(true, testArrayIncludes(i + 0.5));
} else {
assertEquals(false, testArrayIncludes(i + 0.5));
}
}
// With fromIndex
for (let i = 0, from_index = 0; i + from_index < 200; i += 2, from_index++) {
if (i % 2 == 0) {
assertEquals(true, testArrayIncludes(i+0.5, from_index));
} else {
assertEquals(false, testArrayIncludes(i+0.5, from_index));
}
}
})();
// Large array of packed objects, and object search_element
(() => {
let a = [];
let b = [];
for (let i = 0; i < 200; i++) {
a[i] = { v: i };
b[i] = a[i];
}
function testArrayIncludes(idx) {
return a.includes(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
assertEquals(true, testArrayIncludes(b[i]));
}
// With fromIndex
for (let i = 0, from_index = 0; i+from_index < 200; i += 2, from_index++) {
assertEquals(true, testArrayIncludes(b[i], from_index));
}
})();
// Large array of holey objects, and object search_element
(() => {
let a = [];
let b = [];
// Skipping every other item when initializing
for (let i = 0; i < 200; i++) {
b[i] = { v: i };
if (i % 2 == 0) {
a[i] = b[i];
}
}
function testArrayIncludes(idx) {
return a.includes(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
if (i % 2 == 0) {
assertEquals(true, testArrayIncludes(b[i]));
} else {
assertEquals(false, testArrayIncludes(b[i]));
}
}
// With fromIndex
for (let i = 0, from_index = 0; i + from_index < 200; i += 2, from_index++) {
if (i % 2 == 0) {
assertEquals(true, testArrayIncludes(b[i], from_index));
} else {
assertEquals(false, testArrayIncludes(b[i], from_index));
}
}
})();
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Large array of packed Smi, and Smi search_element
(() => {
let a = [];
for (let i = 0; i < 200; i++) {
a[i] = i;
}
function testArrayIndexOf(idx) {
return a.indexOf(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
assertEquals(i, testArrayIndexOf(i));
}
// With fromIndex
for (let i = 0, from_index = 0; i+from_index < 200; i += 2, from_index++) {
assertEquals(i, testArrayIndexOf(i, from_index));
}
})();
// Large array of holey Smi, and Smi search_element
(() => {
let a = [];
// Skipping every other item when initializing
for (let i = 0; i < 200; i+=2) {
a[i] = i;
}
function testArrayIndexOf(idx) {
return a.indexOf(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
if (i % 2 == 0) {
assertEquals(i, testArrayIndexOf(i));
} else {
assertEquals(-1, testArrayIndexOf(i));
}
}
// With fromIndex
for (let i = 0, from_index = 0; i + from_index < 200; i += 2, from_index++) {
if (i % 2 == 0) {
assertEquals(i, testArrayIndexOf(i, from_index));
} else {
assertEquals(-1, testArrayIndexOf(i, from_index));
}
}
})();
// Large array of packed Doubles, and Double search_element
(() => {
let a = [];
for (let i = 0; i < 200; i++) {
a[i] = i + 0.5;
}
function testArrayIndexOf(idx) {
return a.indexOf(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
assertEquals(i, testArrayIndexOf(i + 0.5));
}
// With fromIndex
for (let i = 0, from_index = 0; i+from_index < 200; i += 2, from_index++) {
assertEquals(i, testArrayIndexOf(i+0.5, from_index));
}
})();
// Large array of holey Doubles, and Double search_element
(() => {
let a = [];
// Skipping every other item when initializing
for (let i = 0; i < 200; i+=2) {
a[i] = i + 0.5;
}
function testArrayIndexOf(idx) {
return a.indexOf(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
if (i % 2 == 0) {
assertEquals(i, testArrayIndexOf(i + 0.5));
} else {
assertEquals(-1, testArrayIndexOf(i + 0.5));
}
}
// With fromIndex
for (let i = 0, from_index = 0; i + from_index < 200; i += 2, from_index++) {
if (i % 2 == 0) {
assertEquals(i, testArrayIndexOf(i+0.5, from_index));
} else {
assertEquals(-1, testArrayIndexOf(i+0.5, from_index));
}
}
})();
// Large array of packed objects, and object search_element
(() => {
let a = [];
let b = [];
for (let i = 0; i < 200; i++) {
a[i] = { v: i };
b[i] = a[i];
}
function testArrayIndexOf(idx) {
return a.indexOf(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
assertEquals(i, testArrayIndexOf(b[i]));
}
// With fromIndex
for (let i = 0, from_index = 0; i+from_index < 200; i += 2, from_index++) {
assertEquals(i, testArrayIndexOf(b[i], from_index));
}
})();
// Large array of holey objects, and object search_element
(() => {
let a = [];
let b = [];
// Skipping every other item when initializing
for (let i = 0; i < 200; i++) {
b[i] = { v: i };
if (i % 2 == 0) {
a[i] = b[i];
}
}
function testArrayIndexOf(idx) {
return a.indexOf(idx);
}
// Without fromIndex
for (let i = 0; i < 200; i++) {
if (i % 2 == 0) {
assertEquals(i, testArrayIndexOf(b[i]));
} else {
assertEquals(-1, testArrayIndexOf(b[i]));
}
}
// With fromIndex
for (let i = 0, from_index = 0; i + from_index < 200; i += 2, from_index++) {
if (i % 2 == 0) {
assertEquals(i, testArrayIndexOf(b[i], from_index));
} else {
assertEquals(-1, testArrayIndexOf(b[i], from_index));
}
}
})();
// This test checks that when the item that IndexOf searches is present multiple
// time, the correct index is returned (in particular, when a single SIMD vector
// had multiple matches). For instance, if we do:
//
// [1, 2, 1, 3].indexOf(1)
//
// Then it should return 0 rather than 2.
(() => {
// The patterns that this function will check, where for instance patternABAB
// means that we'd like to build a vector containing {A, B, A, B}.
let patterns = {
patternABAB : (a, b, c, d) => [a, b, a, b, c, d, c, d],
patternAABB : (a, b, c, d) => [a, a, b, b, c, c, d, d],
patternABBA : (a, b, c, d) => [a, b, b, a, c, d, d, c],
patternABAA : (a, b, c, d) => [a, b, a, a, c, d, c, c],
patternAABA : (a, b, c, d) => [a, a, b, a, c, c, d, c],
patternAAAB : (a, b, c, d) => [a, a, a, b, c, c, c, d],
patternBAAA : (a, b, c, d) => [b, a, a, a, d, c, c, c]
};
// Starting |a| with a bunch of 0s, which might be handled by the scalar loop
// that the SIMD code does to reach 16/32-byte alignment.
let a = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let next_int = 1;
for (const [_, pattern] of Object.entries(patterns)) {
// It's a bit tricky to ensure that 2 items will be in the same SIMD batch
// because we can't control the alignment of the array from JS, and the
// SIMD code will start by skipping the first items to have the memory
// aligned on 16/32 bytes. So, we put each pattern 8 times in a row in |a|,
// but each time with an additional item, to make sure that each of those 8
// repeated pattern have a different alignment.
for (let i = 0; i < 8; i++) {
a = a.concat(pattern(next_int, next_int + 1, next_int + 2, next_int + 3));
a.push(next_int + 4); // By adding a 9th item, we make sure that the
// alignment of the next batch is not the same as
// the current one.
next_int += 5;
}
}
let b = a.slice();
b[10000] = 42; // Switch b to dictionary mode so that the SIMD code won't be
// used for it. We can then use `b.indexOf` as reference.
for (let x of b) {
if (x == undefined) break;
assertEquals(b.indexOf(x), a.indexOf(x));
}
})();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment