Commit 930fb646 authored by Daan de Graaf's avatar Daan de Graaf Committed by V8 LUCI CQ

[wasm-simd][arm64] Fuse add and extadd_pairwise_u.

The two instructions are fused into a single Uadalp instruction,
improving performance of quantized neural network operator
implementations such as XNNPACK.

Bug: v8:11546
Change-Id: Ic11b35d1e7758ee0b4ccfe8f592edc1aa798f6f2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2939997Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Daan de Graaf <daagra@google.com>
Cr-Commit-Position: refs/heads/master@{#75102}
parent 5d84b6cb
...@@ -1217,6 +1217,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1217,6 +1217,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0).Format(src_f)); i.InputSimd128Register(0).Format(src_f));
break; break;
} }
case kArm64Uadalp: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
VectorFormat dst_f = VectorFormatFillQ(LaneSizeField::decode(opcode));
VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
__ Uadalp(i.OutputSimd128Register().Format(dst_f),
i.InputSimd128Register(1).Format(src_f));
break;
}
case kArm64Uaddlp: { case kArm64Uaddlp: {
VectorFormat dst_f = VectorFormatFillQ(LaneSizeField::decode(opcode)); VectorFormat dst_f = VectorFormatFillQ(LaneSizeField::decode(opcode));
VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f); VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
......
...@@ -43,6 +43,7 @@ namespace compiler { ...@@ -43,6 +43,7 @@ namespace compiler {
V(Arm64Mul32) \ V(Arm64Mul32) \
V(Arm64Smull) \ V(Arm64Smull) \
V(Arm64Smull2) \ V(Arm64Smull2) \
V(Arm64Uadalp) \
V(Arm64Uaddlp) \ V(Arm64Uaddlp) \
V(Arm64Umull) \ V(Arm64Umull) \
V(Arm64Umull2) \ V(Arm64Umull2) \
......
...@@ -44,6 +44,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -44,6 +44,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Mul32: case kArm64Mul32:
case kArm64Smull: case kArm64Smull:
case kArm64Smull2: case kArm64Smull2:
case kArm64Uadalp:
case kArm64Uaddlp: case kArm64Uaddlp:
case kArm64Umull: case kArm64Umull:
case kArm64Umull2: case kArm64Umull2:
......
...@@ -3757,6 +3757,24 @@ void InstructionSelector::VisitI64x2Mul(Node* node) { ...@@ -3757,6 +3757,24 @@ void InstructionSelector::VisitI64x2Mul(Node* node) {
g.UseRegister(left->InputAt(0))); \ g.UseRegister(left->InputAt(0))); \
return; \ return; \
} \ } \
/* Select Uadalp(x, y) for Add(x, ExtAddPairwiseU(y)). */ \
if (right->opcode() == \
IrOpcode::k##Type##ExtAddPairwise##PairwiseType##U && \
CanCover(node, right)) { \
Emit(kArm64Uadalp | LaneSizeField::encode(LaneSize), \
g.DefineSameAsFirst(node), g.UseRegister(left), \
g.UseRegister(right->InputAt(0))); \
return; \
} \
/* Select Uadalp(y, x) for Add(ExtAddPairwiseU(x), y). */ \
if (left->opcode() == \
IrOpcode::k##Type##ExtAddPairwise##PairwiseType##U && \
CanCover(node, left)) { \
Emit(kArm64Uadalp | LaneSizeField::encode(LaneSize), \
g.DefineSameAsFirst(node), g.UseRegister(right), \
g.UseRegister(left->InputAt(0))); \
return; \
} \
VisitRRR(this, kArm64##Type##Add, node); \ VisitRRR(this, kArm64##Type##Add, node); \
} }
......
...@@ -3363,21 +3363,23 @@ void RunAddExtAddPairwiseTest( ...@@ -3363,21 +3363,23 @@ void RunAddExtAddPairwiseTest(
WASM_SIMD_TEST(AddExtAddPairwiseI32Right) { WASM_SIMD_TEST(AddExtAddPairwiseI32Right) {
RunAddExtAddPairwiseTest<int32_t, int16_t>( RunAddExtAddPairwiseTest<int32_t, int16_t>(
execution_tier, RIGHT, kExprI32x4Add, {1, 2, 3, 4}, execution_tier, RIGHT, kExprI32x4Add, {1, 2, 3, 4},
kExprI32x4ExtAddPairwiseI16x8S, {1, 2, 3, 4, 5, 6, 7, 8}, {4, 9, 14, 19}); kExprI32x4ExtAddPairwiseI16x8S, {-1, -2, -3, -4, -5, -6, -7, -8},
{-2, -5, -8, -11});
} }
WASM_SIMD_TEST(AddExtAddPairwiseI32Left) { WASM_SIMD_TEST(AddExtAddPairwiseI32Left) {
RunAddExtAddPairwiseTest<int32_t, int16_t>( RunAddExtAddPairwiseTest<int32_t, int16_t>(
execution_tier, LEFT, kExprI32x4Add, {1, 2, 3, 4}, execution_tier, LEFT, kExprI32x4Add, {1, 2, 3, 4},
kExprI32x4ExtAddPairwiseI16x8S, {1, 2, 3, 4, 5, 6, 7, 8}, {4, 9, 14, 19}); kExprI32x4ExtAddPairwiseI16x8S, {-1, -2, -3, -4, -5, -6, -7, -8},
{-2, -5, -8, -11});
} }
WASM_SIMD_TEST(AddExtAddPairwiseI16Right) { WASM_SIMD_TEST(AddExtAddPairwiseI16Right) {
RunAddExtAddPairwiseTest<int16_t, int8_t>( RunAddExtAddPairwiseTest<int16_t, int8_t>(
execution_tier, RIGHT, kExprI16x8Add, {1, 2, 3, 4, 5, 6, 7, 8}, execution_tier, RIGHT, kExprI16x8Add, {1, 2, 3, 4, 5, 6, 7, 8},
kExprI16x8ExtAddPairwiseI8x16S, kExprI16x8ExtAddPairwiseI8x16S,
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16},
{4, 9, 14, 19, 24, 29, 34, 39}); {-2, -5, -8, -11, -14, -17, -20, -23});
} }
WASM_SIMD_TEST(AddExtAddPairwiseI16Left) { WASM_SIMD_TEST(AddExtAddPairwiseI16Left) {
...@@ -3388,6 +3390,18 @@ WASM_SIMD_TEST(AddExtAddPairwiseI16Left) { ...@@ -3388,6 +3390,18 @@ WASM_SIMD_TEST(AddExtAddPairwiseI16Left) {
{4, 9, 14, 19, 24, 29, 34, 39}); {4, 9, 14, 19, 24, 29, 34, 39});
} }
WASM_SIMD_TEST(AddExtAddPairwiseI32RightUnsigned) {
RunAddExtAddPairwiseTest<uint32_t, uint16_t>(
execution_tier, RIGHT, kExprI32x4Add, {1, 2, 3, 4},
kExprI32x4ExtAddPairwiseI16x8U, {1, 2, 3, 4, 5, 6, 7, 8}, {4, 9, 14, 19});
}
WASM_SIMD_TEST(AddExtAddPairwiseI32LeftUnsigned) {
RunAddExtAddPairwiseTest<uint32_t, uint16_t>(
execution_tier, LEFT, kExprI32x4Add, {1, 2, 3, 4},
kExprI32x4ExtAddPairwiseI16x8U, {1, 2, 3, 4, 5, 6, 7, 8}, {4, 9, 14, 19});
}
#define WASM_EXTRACT_I16x8_TEST(Sign, Type) \ #define WASM_EXTRACT_I16x8_TEST(Sign, Type) \
WASM_SIMD_TEST(I16X8ExtractLane##Sign) { \ WASM_SIMD_TEST(I16X8ExtractLane##Sign) { \
WasmRunner<int32_t, int32_t> r(execution_tier); \ WasmRunner<int32_t, int32_t> r(execution_tier); \
......
...@@ -961,11 +961,13 @@ std::ostream& operator<<(std::ostream& os, const PairwiseAddSide& side) { ...@@ -961,11 +961,13 @@ std::ostream& operator<<(std::ostream& os, const PairwiseAddSide& side) {
struct AddWithPairwiseAddSideAndWidth { struct AddWithPairwiseAddSideAndWidth {
PairwiseAddSide side; PairwiseAddSide side;
int32_t width; int32_t width;
bool isSigned;
}; };
std::ostream& operator<<(std::ostream& os, std::ostream& operator<<(std::ostream& os,
const AddWithPairwiseAddSideAndWidth& sw) { const AddWithPairwiseAddSideAndWidth& sw) {
return os << "{ side: " << sw.side << ", width: " << sw.width << " }"; return os << "{ side: " << sw.side << ", width: " << sw.width
<< ", isSigned: " << sw.isSigned << " }";
} }
using InstructionSelectorAddWithPairwiseAddTest = using InstructionSelectorAddWithPairwiseAddTest =
...@@ -978,9 +980,16 @@ TEST_P(InstructionSelectorAddWithPairwiseAddTest, AddWithPairwiseAdd) { ...@@ -978,9 +980,16 @@ TEST_P(InstructionSelectorAddWithPairwiseAddTest, AddWithPairwiseAdd) {
Node* x = m.Parameter(0); Node* x = m.Parameter(0);
Node* y = m.Parameter(1); Node* y = m.Parameter(1);
const Operator* pairwiseAddOp = const Operator* pairwiseAddOp;
params.width == 32 ? m.machine()->I32x4ExtAddPairwiseI16x8S() if (params.width == 32 && params.isSigned) {
: m.machine()->I16x8ExtAddPairwiseI8x16S(); pairwiseAddOp = m.machine()->I32x4ExtAddPairwiseI16x8S();
} else if (params.width == 16 && params.isSigned) {
pairwiseAddOp = m.machine()->I16x8ExtAddPairwiseI8x16S();
} else if (params.width == 32 && !params.isSigned) {
pairwiseAddOp = m.machine()->I32x4ExtAddPairwiseI16x8U();
} else {
pairwiseAddOp = m.machine()->I16x8ExtAddPairwiseI8x16U();
}
Node* pairwiseAdd = m.AddNode(pairwiseAddOp, x); Node* pairwiseAdd = m.AddNode(pairwiseAddOp, x);
const Operator* addOp = const Operator* addOp =
params.width == 32 ? m.machine()->I32x4Add() : m.machine()->I16x8Add(); params.width == 32 ? m.machine()->I32x4Add() : m.machine()->I16x8Add();
...@@ -989,15 +998,17 @@ TEST_P(InstructionSelectorAddWithPairwiseAddTest, AddWithPairwiseAdd) { ...@@ -989,15 +998,17 @@ TEST_P(InstructionSelectorAddWithPairwiseAddTest, AddWithPairwiseAdd) {
m.Return(add); m.Return(add);
Stream s = m.Build(); Stream s = m.Build();
// Should be fused to Sadalp // Should be fused to Sadalp/Uadalp
ASSERT_EQ(1U, s.size()); ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64Sadalp, s[0]->arch_opcode()); EXPECT_EQ(params.isSigned ? kArm64Sadalp : kArm64Uadalp, s[0]->arch_opcode());
EXPECT_EQ(2U, s[0]->InputCount()); EXPECT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount()); EXPECT_EQ(1U, s[0]->OutputCount());
} }
const AddWithPairwiseAddSideAndWidth kAddWithPairAddTestCases[] = { const AddWithPairwiseAddSideAndWidth kAddWithPairAddTestCases[] = {
{LEFT, 16}, {RIGHT, 16}, {LEFT, 32}, {RIGHT, 32}}; {LEFT, 16, true}, {RIGHT, 16, true}, {LEFT, 32, true},
{RIGHT, 32, true}, {LEFT, 16, false}, {RIGHT, 16, false},
{LEFT, 32, false}, {RIGHT, 32, false}};
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest, INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
InstructionSelectorAddWithPairwiseAddTest, InstructionSelectorAddWithPairwiseAddTest,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment