Commit 8c9213a1 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][ia32] Optimize swizzle with constant indices

When swizzle is called with a v128.const node, we can check that the
indices are either all in bounds, or if they are out of bounds the top
bit of each byte is set. This will match exactly pshufb behavior, and so
we can omit the paddusb (and getting external reference).

Bug: v8:10992
Change-Id: I5479a9eb92ebcfc12bedff5efd3e72bb4a43ff40
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2766222Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73583}
parent 656f35ab
......@@ -1272,7 +1272,12 @@ void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
XMMRegister mask, XMMRegister scratch,
Register tmp) {
Register tmp, bool omit_add) {
if (omit_add) {
Pshufb(dst, src, scratch);
return;
}
// Out-of-range indices should return 0, add 112 so that any value > 15
// saturates to 128 (top bit set), so pshufb will zero that lane.
Operand op = ExternalReferenceAsOperand(
......
......@@ -758,7 +758,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src,
XMMRegister tmp);
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask,
XMMRegister scratch, Register tmp);
XMMRegister scratch, Register tmp, bool omit_add = false);
void Push(Register src) { push(src); }
void Push(Operand src) { push(src); }
......
......@@ -2701,7 +2701,14 @@ void TurboAssembler::I32x4ExtAddPairwiseI16x8U(XMMRegister dst,
}
void TurboAssembler::I8x16Swizzle(XMMRegister dst, XMMRegister src,
XMMRegister mask) {
XMMRegister mask, bool omit_add) {
if (omit_add) {
// We have determined that the indices are immediates, and they are either
// within bounds, or the top bit is set, so we can omit the add.
Pshufb(dst, src, kScratchDoubleReg);
return;
}
// Out-of-range indices should return 0, add 112 so that any value > 15
// saturates to 128 (top bit set), so pshufb will zero that lane.
Operand op = ExternalReferenceAsOperand(
......
......@@ -645,7 +645,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I16x8ExtAddPairwiseI8x16S(XMMRegister dst, XMMRegister src);
void I32x4ExtAddPairwiseI16x8U(XMMRegister dst, XMMRegister src);
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask);
void I8x16Swizzle(XMMRegister dst, XMMRegister src, XMMRegister mask,
bool omit_add = false);
void Abspd(XMMRegister dst);
void Negpd(XMMRegister dst);
......
......@@ -3624,7 +3624,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kIA32I8x16Swizzle: {
__ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), kScratchDoubleReg,
i.TempRegister(0));
i.TempRegister(0), MiscField::decode(instr->opcode()));
break;
}
case kIA32I8x16Shuffle: {
......
......@@ -3002,9 +3002,20 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) { UNREACHABLE(); }
#endif // V8_ENABLE_WEBASSEMBLY
void InstructionSelector::VisitI8x16Swizzle(Node* node) {
InstructionCode op = kIA32I8x16Swizzle;
auto m = V128ConstMatcher(node->InputAt(1));
if (m.HasResolvedValue()) {
// If the indices vector is a const, check if they are in range, or if the
// top bit is set, then we can avoid the paddusb in the codegen and simply
// emit a pshufb.
auto imms = m.ResolvedValue().immediate();
op |= MiscField::encode(wasm::SimdSwizzle::AllInRangeOrTopBitSet(imms));
}
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kIA32I8x16Swizzle,
Emit(op,
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
arraysize(temps), temps);
......
......@@ -860,6 +860,7 @@ class V8_EXPORT_PRIVATE Instruction final {
FlagsCondition flags_condition() const {
return FlagsConditionField::decode(opcode());
}
int misc() const { return MiscField::decode(opcode()); }
static Instruction* New(Zone* zone, InstructionCode opcode) {
return New(zone, opcode, 0, nullptr, 0, nullptr, 0, nullptr);
......
......@@ -15,6 +15,7 @@
#include "src/compiler/backend/code-generator-impl.h"
#include "src/compiler/backend/code-generator.h"
#include "src/compiler/backend/gap-resolver.h"
#include "src/compiler/backend/instruction-codes.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/osr.h"
#include "src/heap/memory-chunk.h"
......@@ -3691,8 +3692,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16Swizzle: {
bool omit_add = MiscField::decode(instr->opcode());
__ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
i.InputSimd128Register(1), omit_add);
break;
}
case kX64I8x16Shuffle: {
......
......@@ -3544,12 +3544,27 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
void InstructionSelector::VisitI8x16Shuffle(Node* node) { UNREACHABLE(); }
#endif // V8_ENABLE_WEBASSEMBLY
#if V8_ENABLE_WEBASSEMBLY
void InstructionSelector::VisitI8x16Swizzle(Node* node) {
InstructionCode op = kX64I8x16Swizzle;
auto m = V128ConstMatcher(node->InputAt(1));
if (m.HasResolvedValue()) {
// If the indices vector is a const, check if they are in range, or if the
// top bit is set, then we can avoid the paddusb in the codegen and simply
// emit a pshufb
auto imms = m.ResolvedValue().immediate();
op |= MiscField::encode(wasm::SimdSwizzle::AllInRangeOrTopBitSet(imms));
}
X64OperandGenerator g(this);
Emit(kX64I8x16Swizzle,
Emit(op,
IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
}
#else
void InstructionSelector::VisitI8x16Swizzle(Node* node) { UNREACHABLE(); }
#endif // V8_ENABLE_WEBASSEMBLY
namespace {
void VisitPminOrPmax(InstructionSelector* selector, Node* node,
......
......@@ -13,6 +13,7 @@
#include "src/codegen/external-reference.h"
#include "src/common/globals.h"
#include "src/compiler/common-operator.h"
#include "src/compiler/machine-operator.h"
#include "src/compiler/node.h"
#include "src/compiler/operator.h"
#include "src/numbers/double.h"
......@@ -169,6 +170,8 @@ using Int32Matcher = IntMatcher<int32_t, IrOpcode::kInt32Constant>;
using Uint32Matcher = IntMatcher<uint32_t, IrOpcode::kInt32Constant>;
using Int64Matcher = IntMatcher<int64_t, IrOpcode::kInt64Constant>;
using Uint64Matcher = IntMatcher<uint64_t, IrOpcode::kInt64Constant>;
using V128ConstMatcher =
ValueMatcher<S128ImmediateParameter, IrOpcode::kS128Const>;
#if V8_HOST_ARCH_32_BIT
using IntPtrMatcher = Int32Matcher;
using UintPtrMatcher = Uint32Matcher;
......
......@@ -864,6 +864,9 @@ class V8_EXPORT_PRIVATE RawMachineAssembler {
}
// SIMD operations.
Node* S128Const(const uint8_t value[16]) {
return AddNode(machine()->S128Const(value));
}
Node* I64x2Splat(Node* a) { return AddNode(machine()->I64x2Splat(), a); }
Node* I64x2SplatI32Pair(Node* a, Node* b) {
return AddNode(machine()->I64x2SplatI32Pair(), a, b);
......
......@@ -161,6 +161,12 @@ void SimdShuffle::Pack16Lanes(uint32_t* dst, const uint8_t* shuffle) {
}
}
bool SimdSwizzle::AllInRangeOrTopBitSet(
std::array<uint8_t, kSimd128Size> shuffle) {
return std::all_of(shuffle.begin(), shuffle.end(),
[](auto i) { return (i < kSimd128Size) || (i & 0x80); });
}
} // namespace wasm
} // namespace internal
} // namespace v8
......@@ -95,6 +95,14 @@ class V8_EXPORT_PRIVATE SimdShuffle {
// Packs 16 bytes of shuffle into an array of 4 uint32_t.
static void Pack16Lanes(uint32_t* dst, const uint8_t* shuffle);
};
class V8_EXPORT_PRIVATE SimdSwizzle {
public:
// Checks if all the immediates are in range (< kSimd128Size), and if they are
// not, the top bit is set.
static bool AllInRangeOrTopBitSet(std::array<uint8_t, kSimd128Size> shuffle);
};
} // namespace wasm
} // namespace internal
} // namespace v8
......
......@@ -886,6 +886,50 @@ TEST_F(InstructionSelectorTest, SIMDSplatZero) {
}
}
struct SwizzleConstants {
uint8_t shuffle[kSimd128Size];
bool omit_add;
};
static constexpr SwizzleConstants kSwizzleConstants[] = {
{
// all lanes < kSimd128Size
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
true,
},
{
// lanes that are >= kSimd128Size have top bit set
{12, 13, 14, 15, 0x90, 0x91, 0x92, 0x93, 0xA0, 0xA1, 0xA2, 0xA3, 0xFC,
0xFD, 0xFE, 0xFF},
true,
},
{
{12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
false,
},
};
using InstructionSelectorSIMDSwizzleConstantTest =
InstructionSelectorTestWithParam<SwizzleConstants>;
TEST_P(InstructionSelectorSIMDSwizzleConstantTest, SimdSwizzleConstant) {
// Test optimization of swizzle with constant indices.
auto param = GetParam();
StreamBuilder m(this, MachineType::Simd128(), MachineType::Simd128());
Node* const c = m.S128Const(param.shuffle);
Node* swizzle = m.AddNode(m.machine()->I8x16Swizzle(), m.Parameter(0), c);
m.Return(swizzle);
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
ASSERT_EQ(kIA32I8x16Swizzle, s[1]->arch_opcode());
ASSERT_EQ(param.omit_add, s[1]->misc());
ASSERT_EQ(1U, s[0]->OutputCount());
}
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
InstructionSelectorSIMDSwizzleConstantTest,
::testing::ValuesIn(kSwizzleConstants));
} // namespace compiler
} // namespace internal
} // namespace v8
......@@ -2204,6 +2204,50 @@ INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
::testing::ValuesIn(kArchShuffles));
#endif // V8_ENABLE_WEBASSEMBLY
struct SwizzleConstants {
uint8_t shuffle[kSimd128Size];
bool omit_add;
};
static constexpr SwizzleConstants kSwizzleConstants[] = {
{
// all lanes < kSimd128Size
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
true,
},
{
// lanes that are >= kSimd128Size have top bit set
{12, 13, 14, 15, 0x90, 0x91, 0x92, 0x93, 0xA0, 0xA1, 0xA2, 0xA3, 0xFC,
0xFD, 0xFE, 0xFF},
true,
},
{
{12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
false,
},
};
using InstructionSelectorSIMDSwizzleConstantTest =
InstructionSelectorTestWithParam<SwizzleConstants>;
TEST_P(InstructionSelectorSIMDSwizzleConstantTest, SimdSwizzleConstant) {
// Test optimization of swizzle with constant indices.
auto param = GetParam();
StreamBuilder m(this, MachineType::Simd128(), MachineType::Simd128());
Node* const c = m.S128Const(param.shuffle);
Node* swizzle = m.AddNode(m.machine()->I8x16Swizzle(), m.Parameter(0), c);
m.Return(swizzle);
Stream s = m.Build();
ASSERT_EQ(2U, s.size());
ASSERT_EQ(kX64I8x16Swizzle, s[1]->arch_opcode());
ASSERT_EQ(param.omit_add, s[1]->misc());
ASSERT_EQ(1U, s[0]->OutputCount());
}
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
InstructionSelectorSIMDSwizzleConstantTest,
::testing::ValuesIn(kSwizzleConstants));
} // namespace compiler
} // namespace internal
} // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment