Commit cc68080f authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][ia32] Optimize integer splats of constant 0

Integer splats (especially for sizes < 32-bits) does not directly
translate to a single instruction on ia32. We can do better for special
values, like 0, which can be lowered to `eor dst dst`. We do this check
in the instruction selector, and emit a special opcode kX64S128Zero.

Also add a unittest to verify this optimization, and necessary
raw-assembler methods for the test.

Bug: v8:11093
Change-Id: Icfebef06a5ecf49619ea54f31a5296094fb53ff2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2516300Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71024}
parent f3a5941a
......@@ -2238,9 +2238,15 @@ void InstructionSelector::VisitF64x2ExtractLane(Node* node) {
void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.Use(node->InputAt(1));
Emit(kIA32I64x2SplatI32Pair, g.DefineAsRegister(node), operand0, operand1);
Int32Matcher match_left(node->InputAt(0));
Int32Matcher match_right(node->InputAt(1));
if (match_left.Is(0) && match_right.Is(0)) {
Emit(kIA32S128Zero, g.DefineAsRegister(node));
} else {
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.Use(node->InputAt(1));
Emit(kIA32I64x2SplatI32Pair, g.DefineAsRegister(node), operand0, operand1);
}
}
void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
......@@ -2356,7 +2362,13 @@ void InstructionSelector::VisitS128AndNot(Node* node) {
#define VISIT_SIMD_SPLAT(Type) \
void InstructionSelector::Visit##Type##Splat(Node* node) { \
VisitRO(this, node, kIA32##Type##Splat); \
Int32Matcher int32_matcher(node->InputAt(0)); \
if (int32_matcher.Is(0)) { \
IA32OperandGenerator g(this); \
Emit(kIA32S128Zero, g.DefineAsRegister(node)); \
} else { \
VisitRO(this, node, kIA32##Type##Splat); \
} \
}
SIMD_INT_TYPES(VISIT_SIMD_SPLAT)
#undef VISIT_SIMD_SPLAT
......
......@@ -840,6 +840,9 @@ class V8_EXPORT_PRIVATE RawMachineAssembler {
// SIMD operations.
Node* I64x2Splat(Node* a) { return AddNode(machine()->I64x2Splat(), a); }
Node* I64x2SplatI32Pair(Node* a, Node* b) {
return AddNode(machine()->I64x2SplatI32Pair(), a, b);
}
Node* I32x4Splat(Node* a) { return AddNode(machine()->I32x4Splat(), a); }
Node* I16x8Splat(Node* a) { return AddNode(machine()->I16x8Splat(), a); }
Node* I8x16Splat(Node* a) { return AddNode(machine()->I8x16Splat(), a); }
......
......@@ -836,6 +836,56 @@ TEST_F(InstructionSelectorTest, Word32Clz) {
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
}
// SIMD.
TEST_F(InstructionSelectorTest, SIMDSplatZero) {
// Test optimization for splat of contant 0.
// {i8x16,i16x8,i32x4,i64x2}.splat(const(0)) -> v128.zero().
// Optimizations for f32x4.splat and f64x2.splat not implemented since it
// doesn't improve the codegen as much (same number of instructions).
{
StreamBuilder m(this, MachineType::Simd128());
Node* const splat =
m.I64x2SplatI32Pair(m.Int32Constant(0), m.Int32Constant(0));
m.Return(splat);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kIA32S128Zero, s[0]->arch_opcode());
ASSERT_EQ(0U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
{
StreamBuilder m(this, MachineType::Simd128());
Node* const splat = m.I32x4Splat(m.Int32Constant(0));
m.Return(splat);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kIA32S128Zero, s[0]->arch_opcode());
ASSERT_EQ(0U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
{
StreamBuilder m(this, MachineType::Simd128());
Node* const splat = m.I16x8Splat(m.Int32Constant(0));
m.Return(splat);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kIA32S128Zero, s[0]->arch_opcode());
ASSERT_EQ(0U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
{
StreamBuilder m(this, MachineType::Simd128());
Node* const splat = m.I8x16Splat(m.Int32Constant(0));
m.Return(splat);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kIA32S128Zero, s[0]->arch_opcode());
ASSERT_EQ(0U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
}
} // namespace compiler
} // namespace internal
} // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment