Commit 930fb646 authored by Daan de Graaf's avatar Daan de Graaf Committed by V8 LUCI CQ

[wasm-simd][arm64] Fuse add and extadd_pairwise_u.

The two instructions are fused into a single Uadalp instruction,
improving performance of quantized neural network operator
implementations such as XNNPACK.

Bug: v8:11546
Change-Id: Ic11b35d1e7758ee0b4ccfe8f592edc1aa798f6f2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2939997Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Daan de Graaf <daagra@google.com>
Cr-Commit-Position: refs/heads/master@{#75102}
parent 5d84b6cb
......@@ -1217,6 +1217,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0).Format(src_f));
break;
}
case kArm64Uadalp: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
VectorFormat dst_f = VectorFormatFillQ(LaneSizeField::decode(opcode));
VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
__ Uadalp(i.OutputSimd128Register().Format(dst_f),
i.InputSimd128Register(1).Format(src_f));
break;
}
case kArm64Uaddlp: {
VectorFormat dst_f = VectorFormatFillQ(LaneSizeField::decode(opcode));
VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
......
......@@ -43,6 +43,7 @@ namespace compiler {
V(Arm64Mul32) \
V(Arm64Smull) \
V(Arm64Smull2) \
V(Arm64Uadalp) \
V(Arm64Uaddlp) \
V(Arm64Umull) \
V(Arm64Umull2) \
......
......@@ -44,6 +44,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Mul32:
case kArm64Smull:
case kArm64Smull2:
case kArm64Uadalp:
case kArm64Uaddlp:
case kArm64Umull:
case kArm64Umull2:
......
......@@ -3757,6 +3757,24 @@ void InstructionSelector::VisitI64x2Mul(Node* node) {
g.UseRegister(left->InputAt(0))); \
return; \
} \
/* Select Uadalp(x, y) for Add(x, ExtAddPairwiseU(y)). */ \
if (right->opcode() == \
IrOpcode::k##Type##ExtAddPairwise##PairwiseType##U && \
CanCover(node, right)) { \
Emit(kArm64Uadalp | LaneSizeField::encode(LaneSize), \
g.DefineSameAsFirst(node), g.UseRegister(left), \
g.UseRegister(right->InputAt(0))); \
return; \
} \
/* Select Uadalp(y, x) for Add(ExtAddPairwiseU(x), y). */ \
if (left->opcode() == \
IrOpcode::k##Type##ExtAddPairwise##PairwiseType##U && \
CanCover(node, left)) { \
Emit(kArm64Uadalp | LaneSizeField::encode(LaneSize), \
g.DefineSameAsFirst(node), g.UseRegister(right), \
g.UseRegister(left->InputAt(0))); \
return; \
} \
VisitRRR(this, kArm64##Type##Add, node); \
}
......
......@@ -3363,21 +3363,23 @@ void RunAddExtAddPairwiseTest(
WASM_SIMD_TEST(AddExtAddPairwiseI32Right) {
RunAddExtAddPairwiseTest<int32_t, int16_t>(
execution_tier, RIGHT, kExprI32x4Add, {1, 2, 3, 4},
kExprI32x4ExtAddPairwiseI16x8S, {1, 2, 3, 4, 5, 6, 7, 8}, {4, 9, 14, 19});
kExprI32x4ExtAddPairwiseI16x8S, {-1, -2, -3, -4, -5, -6, -7, -8},
{-2, -5, -8, -11});
}
WASM_SIMD_TEST(AddExtAddPairwiseI32Left) {
RunAddExtAddPairwiseTest<int32_t, int16_t>(
execution_tier, LEFT, kExprI32x4Add, {1, 2, 3, 4},
kExprI32x4ExtAddPairwiseI16x8S, {1, 2, 3, 4, 5, 6, 7, 8}, {4, 9, 14, 19});
kExprI32x4ExtAddPairwiseI16x8S, {-1, -2, -3, -4, -5, -6, -7, -8},
{-2, -5, -8, -11});
}
WASM_SIMD_TEST(AddExtAddPairwiseI16Right) {
RunAddExtAddPairwiseTest<int16_t, int8_t>(
execution_tier, RIGHT, kExprI16x8Add, {1, 2, 3, 4, 5, 6, 7, 8},
kExprI16x8ExtAddPairwiseI8x16S,
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{4, 9, 14, 19, 24, 29, 34, 39});
{-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16},
{-2, -5, -8, -11, -14, -17, -20, -23});
}
WASM_SIMD_TEST(AddExtAddPairwiseI16Left) {
......@@ -3388,6 +3390,18 @@ WASM_SIMD_TEST(AddExtAddPairwiseI16Left) {
{4, 9, 14, 19, 24, 29, 34, 39});
}
WASM_SIMD_TEST(AddExtAddPairwiseI32RightUnsigned) {
RunAddExtAddPairwiseTest<uint32_t, uint16_t>(
execution_tier, RIGHT, kExprI32x4Add, {1, 2, 3, 4},
kExprI32x4ExtAddPairwiseI16x8U, {1, 2, 3, 4, 5, 6, 7, 8}, {4, 9, 14, 19});
}
WASM_SIMD_TEST(AddExtAddPairwiseI32LeftUnsigned) {
RunAddExtAddPairwiseTest<uint32_t, uint16_t>(
execution_tier, LEFT, kExprI32x4Add, {1, 2, 3, 4},
kExprI32x4ExtAddPairwiseI16x8U, {1, 2, 3, 4, 5, 6, 7, 8}, {4, 9, 14, 19});
}
#define WASM_EXTRACT_I16x8_TEST(Sign, Type) \
WASM_SIMD_TEST(I16X8ExtractLane##Sign) { \
WasmRunner<int32_t, int32_t> r(execution_tier); \
......
......@@ -961,11 +961,13 @@ std::ostream& operator<<(std::ostream& os, const PairwiseAddSide& side) {
struct AddWithPairwiseAddSideAndWidth {
PairwiseAddSide side;
int32_t width;
bool isSigned;
};
std::ostream& operator<<(std::ostream& os,
const AddWithPairwiseAddSideAndWidth& sw) {
return os << "{ side: " << sw.side << ", width: " << sw.width << " }";
return os << "{ side: " << sw.side << ", width: " << sw.width
<< ", isSigned: " << sw.isSigned << " }";
}
using InstructionSelectorAddWithPairwiseAddTest =
......@@ -978,9 +980,16 @@ TEST_P(InstructionSelectorAddWithPairwiseAddTest, AddWithPairwiseAdd) {
Node* x = m.Parameter(0);
Node* y = m.Parameter(1);
const Operator* pairwiseAddOp =
params.width == 32 ? m.machine()->I32x4ExtAddPairwiseI16x8S()
: m.machine()->I16x8ExtAddPairwiseI8x16S();
const Operator* pairwiseAddOp;
if (params.width == 32 && params.isSigned) {
pairwiseAddOp = m.machine()->I32x4ExtAddPairwiseI16x8S();
} else if (params.width == 16 && params.isSigned) {
pairwiseAddOp = m.machine()->I16x8ExtAddPairwiseI8x16S();
} else if (params.width == 32 && !params.isSigned) {
pairwiseAddOp = m.machine()->I32x4ExtAddPairwiseI16x8U();
} else {
pairwiseAddOp = m.machine()->I16x8ExtAddPairwiseI8x16U();
}
Node* pairwiseAdd = m.AddNode(pairwiseAddOp, x);
const Operator* addOp =
params.width == 32 ? m.machine()->I32x4Add() : m.machine()->I16x8Add();
......@@ -989,15 +998,17 @@ TEST_P(InstructionSelectorAddWithPairwiseAddTest, AddWithPairwiseAdd) {
m.Return(add);
Stream s = m.Build();
// Should be fused to Sadalp
// Should be fused to Sadalp/Uadalp
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64Sadalp, s[0]->arch_opcode());
EXPECT_EQ(params.isSigned ? kArm64Sadalp : kArm64Uadalp, s[0]->arch_opcode());
EXPECT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
const AddWithPairwiseAddSideAndWidth kAddWithPairAddTestCases[] = {
{LEFT, 16}, {RIGHT, 16}, {LEFT, 32}, {RIGHT, 32}};
{LEFT, 16, true}, {RIGHT, 16, true}, {LEFT, 32, true},
{RIGHT, 32, true}, {LEFT, 16, false}, {RIGHT, 16, false},
{LEFT, 32, false}, {RIGHT, 32, false}};
INSTANTIATE_TEST_SUITE_P(InstructionSelectorTest,
InstructionSelectorAddWithPairwiseAddTest,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment