Commit 584b3d0f authored by Ilja Iskovs's avatar Ilja Iskovs Committed by V8 LUCI CQ

[arm64] Merge some WASM SIMD opcodes using LaneSizeField

We are running out of encoding space for opcodes on arm64. This patch
merges some wasm simd opcodes of different simd types, encoding the lane
size in the instruction code using LaneSizeField instead. This reduces
the total number of opcodes on arm64 by 71.

Bug: v8:12093
Change-Id: Ib4d96d1db1ff9b08fafd665974f3494a507da770
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3109676Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Martyn Capewell <martyn.capewell@arm.com>
Cr-Commit-Position: refs/heads/main@{#76434}
parent 34687291
......@@ -189,23 +189,23 @@ namespace compiler {
V(Arm64Sxtl2) \
V(Arm64Uxtl) \
V(Arm64Uxtl2) \
V(Arm64F64x2Splat) \
V(Arm64F64x2ExtractLane) \
V(Arm64F64x2ReplaceLane) \
V(Arm64F64x2Abs) \
V(Arm64F64x2Neg) \
V(Arm64F64x2Sqrt) \
V(Arm64F64x2Add) \
V(Arm64F64x2Sub) \
V(Arm64F64x2Mul) \
V(Arm64F64x2MulElement) \
V(Arm64F64x2Div) \
V(Arm64F64x2Min) \
V(Arm64F64x2Max) \
V(Arm64F64x2Eq) \
V(Arm64F64x2Ne) \
V(Arm64F64x2Lt) \
V(Arm64F64x2Le) \
V(Arm64FSplat) \
V(Arm64FAbs) \
V(Arm64FSqrt) \
V(Arm64FNeg) \
V(Arm64FExtractLane) \
V(Arm64FReplaceLane) \
V(Arm64FAdd) \
V(Arm64FSub) \
V(Arm64FMul) \
V(Arm64FMulElement) \
V(Arm64FDiv) \
V(Arm64FMin) \
V(Arm64FMax) \
V(Arm64FEq) \
V(Arm64FNe) \
V(Arm64FLt) \
V(Arm64FLe) \
V(Arm64F64x2Qfma) \
V(Arm64F64x2Qfms) \
V(Arm64F64x2Pmin) \
......@@ -213,140 +213,69 @@ namespace compiler {
V(Arm64F64x2ConvertLowI32x4S) \
V(Arm64F64x2ConvertLowI32x4U) \
V(Arm64F64x2PromoteLowF32x4) \
V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \
V(Arm64F32x4SConvertI32x4) \
V(Arm64F32x4UConvertI32x4) \
V(Arm64F32x4Abs) \
V(Arm64F32x4Neg) \
V(Arm64F32x4Sqrt) \
V(Arm64F32x4RecipApprox) \
V(Arm64F32x4RecipSqrtApprox) \
V(Arm64F32x4Add) \
V(Arm64F32x4Sub) \
V(Arm64F32x4Mul) \
V(Arm64F32x4MulElement) \
V(Arm64F32x4Div) \
V(Arm64F32x4Min) \
V(Arm64F32x4Max) \
V(Arm64F32x4Eq) \
V(Arm64F32x4Ne) \
V(Arm64F32x4Lt) \
V(Arm64F32x4Le) \
V(Arm64F32x4Qfma) \
V(Arm64F32x4Qfms) \
V(Arm64F32x4Pmin) \
V(Arm64F32x4Pmax) \
V(Arm64F32x4DemoteF64x2Zero) \
V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \
V(Arm64I64x2Abs) \
V(Arm64I64x2Neg) \
V(Arm64ISplat) \
V(Arm64IAbs) \
V(Arm64INeg) \
V(Arm64IExtractLane) \
V(Arm64IReplaceLane) \
V(Arm64I64x2Shl) \
V(Arm64I64x2ShrS) \
V(Arm64I64x2Add) \
V(Arm64I64x2Sub) \
V(Arm64IAdd) \
V(Arm64ISub) \
V(Arm64I64x2Mul) \
V(Arm64I64x2Eq) \
V(Arm64I64x2Ne) \
V(Arm64I64x2GtS) \
V(Arm64I64x2GeS) \
V(Arm64IEq) \
V(Arm64INe) \
V(Arm64IGtS) \
V(Arm64IGeS) \
V(Arm64I64x2ShrU) \
V(Arm64I64x2BitMask) \
V(Arm64I32x4Splat) \
V(Arm64I32x4ExtractLane) \
V(Arm64I32x4ReplaceLane) \
V(Arm64I32x4SConvertF32x4) \
V(Arm64I32x4Neg) \
V(Arm64I32x4Shl) \
V(Arm64I32x4ShrS) \
V(Arm64I32x4Add) \
V(Arm64I32x4Sub) \
V(Arm64I32x4Mul) \
V(Arm64I32x4Mla) \
V(Arm64I32x4Mls) \
V(Arm64I32x4MinS) \
V(Arm64I32x4MaxS) \
V(Arm64I32x4Eq) \
V(Arm64I32x4Ne) \
V(Arm64I32x4GtS) \
V(Arm64I32x4GeS) \
V(Arm64Mla) \
V(Arm64Mls) \
V(Arm64IMinS) \
V(Arm64IMaxS) \
V(Arm64I32x4UConvertF32x4) \
V(Arm64I32x4ShrU) \
V(Arm64I32x4MinU) \
V(Arm64I32x4MaxU) \
V(Arm64I32x4GtU) \
V(Arm64I32x4GeU) \
V(Arm64I32x4Abs) \
V(Arm64IMinU) \
V(Arm64IMaxU) \
V(Arm64IGtU) \
V(Arm64IGeU) \
V(Arm64I32x4BitMask) \
V(Arm64I32x4DotI16x8S) \
V(Arm64I32x4TruncSatF64x2SZero) \
V(Arm64I32x4TruncSatF64x2UZero) \
V(Arm64I16x8Splat) \
V(Arm64I16x8ExtractLaneU) \
V(Arm64I16x8ExtractLaneS) \
V(Arm64I16x8ReplaceLane) \
V(Arm64I16x8Neg) \
V(Arm64IExtractLaneU) \
V(Arm64IExtractLaneS) \
V(Arm64I16x8Shl) \
V(Arm64I16x8ShrS) \
V(Arm64I16x8SConvertI32x4) \
V(Arm64I16x8Add) \
V(Arm64I16x8AddSatS) \
V(Arm64I16x8Sub) \
V(Arm64I16x8SubSatS) \
V(Arm64IAddSatS) \
V(Arm64ISubSatS) \
V(Arm64I16x8Mul) \
V(Arm64I16x8Mla) \
V(Arm64I16x8Mls) \
V(Arm64I16x8MinS) \
V(Arm64I16x8MaxS) \
V(Arm64I16x8Eq) \
V(Arm64I16x8Ne) \
V(Arm64I16x8GtS) \
V(Arm64I16x8GeS) \
V(Arm64I16x8ShrU) \
V(Arm64I16x8UConvertI32x4) \
V(Arm64I16x8AddSatU) \
V(Arm64I16x8SubSatU) \
V(Arm64I16x8MinU) \
V(Arm64I16x8MaxU) \
V(Arm64I16x8GtU) \
V(Arm64I16x8GeU) \
V(Arm64I16x8RoundingAverageU) \
V(Arm64IAddSatU) \
V(Arm64ISubSatU) \
V(Arm64RoundingAverageU) \
V(Arm64I16x8Q15MulRSatS) \
V(Arm64I16x8Abs) \
V(Arm64I16x8BitMask) \
V(Arm64I8x16Splat) \
V(Arm64I8x16ExtractLaneU) \
V(Arm64I8x16ExtractLaneS) \
V(Arm64I8x16ReplaceLane) \
V(Arm64I8x16Neg) \
V(Arm64I8x16Shl) \
V(Arm64I8x16ShrS) \
V(Arm64I8x16SConvertI16x8) \
V(Arm64I8x16Add) \
V(Arm64I8x16AddSatS) \
V(Arm64I8x16Sub) \
V(Arm64I8x16SubSatS) \
V(Arm64I8x16Mla) \
V(Arm64I8x16Mls) \
V(Arm64I8x16MinS) \
V(Arm64I8x16MaxS) \
V(Arm64I8x16Eq) \
V(Arm64I8x16Ne) \
V(Arm64I8x16GtS) \
V(Arm64I8x16GeS) \
V(Arm64I8x16ShrU) \
V(Arm64I8x16UConvertI16x8) \
V(Arm64I8x16AddSatU) \
V(Arm64I8x16SubSatU) \
V(Arm64I8x16MinU) \
V(Arm64I8x16MaxU) \
V(Arm64I8x16GtU) \
V(Arm64I8x16GeU) \
V(Arm64I8x16RoundingAverageU) \
V(Arm64I8x16Abs) \
V(Arm64I8x16BitMask) \
V(Arm64S128Const) \
V(Arm64S128Zero) \
......
......@@ -149,23 +149,23 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Float64MoveU64:
case kArm64U64MoveFloat64:
case kArm64Float64SilenceNaN:
case kArm64F64x2Splat:
case kArm64F64x2ExtractLane:
case kArm64F64x2ReplaceLane:
case kArm64F64x2Abs:
case kArm64F64x2Neg:
case kArm64F64x2Sqrt:
case kArm64F64x2Add:
case kArm64F64x2Sub:
case kArm64F64x2Mul:
case kArm64F64x2MulElement:
case kArm64F64x2Div:
case kArm64F64x2Min:
case kArm64F64x2Max:
case kArm64F64x2Eq:
case kArm64F64x2Ne:
case kArm64F64x2Lt:
case kArm64F64x2Le:
case kArm64FExtractLane:
case kArm64FReplaceLane:
case kArm64FSplat:
case kArm64FAbs:
case kArm64FSqrt:
case kArm64FNeg:
case kArm64FAdd:
case kArm64FSub:
case kArm64FMul:
case kArm64FMulElement:
case kArm64FDiv:
case kArm64FMin:
case kArm64FMax:
case kArm64FEq:
case kArm64FNe:
case kArm64FLt:
case kArm64FLe:
case kArm64F64x2Qfma:
case kArm64F64x2Qfms:
case kArm64F64x2Pmin:
......@@ -173,144 +173,73 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2ConvertLowI32x4S:
case kArm64F64x2ConvertLowI32x4U:
case kArm64F64x2PromoteLowF32x4:
case kArm64F32x4Splat:
case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane:
case kArm64F32x4SConvertI32x4:
case kArm64F32x4UConvertI32x4:
case kArm64F32x4Abs:
case kArm64F32x4Neg:
case kArm64F32x4Sqrt:
case kArm64F32x4RecipApprox:
case kArm64F32x4RecipSqrtApprox:
case kArm64F32x4Add:
case kArm64F32x4Sub:
case kArm64F32x4Mul:
case kArm64F32x4MulElement:
case kArm64F32x4Div:
case kArm64F32x4Min:
case kArm64F32x4Max:
case kArm64F32x4Eq:
case kArm64F32x4Ne:
case kArm64F32x4Lt:
case kArm64F32x4Le:
case kArm64F32x4Qfma:
case kArm64F32x4Qfms:
case kArm64F32x4Pmin:
case kArm64F32x4Pmax:
case kArm64F32x4DemoteF64x2Zero:
case kArm64I64x2Splat:
case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane:
case kArm64I64x2Abs:
case kArm64I64x2Neg:
case kArm64IExtractLane:
case kArm64IReplaceLane:
case kArm64ISplat:
case kArm64IAbs:
case kArm64INeg:
case kArm64Mla:
case kArm64Mls:
case kArm64RoundingAverageU:
case kArm64I64x2Shl:
case kArm64I64x2ShrS:
case kArm64I64x2Add:
case kArm64I64x2Sub:
case kArm64IAdd:
case kArm64ISub:
case kArm64I64x2Mul:
case kArm64I64x2Eq:
case kArm64I64x2Ne:
case kArm64I64x2GtS:
case kArm64I64x2GeS:
case kArm64IEq:
case kArm64INe:
case kArm64IGtS:
case kArm64IGeS:
case kArm64I64x2ShrU:
case kArm64I64x2BitMask:
case kArm64I32x4Splat:
case kArm64I32x4ExtractLane:
case kArm64I32x4ReplaceLane:
case kArm64I32x4SConvertF32x4:
case kArm64Sxtl:
case kArm64Sxtl2:
case kArm64Uxtl:
case kArm64Uxtl2:
case kArm64I32x4Neg:
case kArm64I32x4Shl:
case kArm64I32x4ShrS:
case kArm64I32x4Add:
case kArm64I32x4Sub:
case kArm64I32x4Mul:
case kArm64I32x4Mla:
case kArm64I32x4Mls:
case kArm64I32x4MinS:
case kArm64I32x4MaxS:
case kArm64I32x4Eq:
case kArm64I32x4Ne:
case kArm64I32x4GtS:
case kArm64I32x4GeS:
case kArm64IMinS:
case kArm64IMaxS:
case kArm64I32x4UConvertF32x4:
case kArm64I32x4ShrU:
case kArm64I32x4MinU:
case kArm64I32x4MaxU:
case kArm64I32x4GtU:
case kArm64I32x4GeU:
case kArm64I32x4Abs:
case kArm64IMinU:
case kArm64IMaxU:
case kArm64IGtU:
case kArm64IGeU:
case kArm64I32x4BitMask:
case kArm64I32x4DotI16x8S:
case kArm64I32x4TruncSatF64x2SZero:
case kArm64I32x4TruncSatF64x2UZero:
case kArm64I16x8Splat:
case kArm64I16x8ExtractLaneU:
case kArm64I16x8ExtractLaneS:
case kArm64I16x8ReplaceLane:
case kArm64I16x8Neg:
case kArm64IExtractLaneU:
case kArm64IExtractLaneS:
case kArm64I16x8Shl:
case kArm64I16x8ShrS:
case kArm64I16x8SConvertI32x4:
case kArm64I16x8Add:
case kArm64I16x8AddSatS:
case kArm64I16x8Sub:
case kArm64I16x8SubSatS:
case kArm64IAddSatS:
case kArm64ISubSatS:
case kArm64I16x8Mul:
case kArm64I16x8Mla:
case kArm64I16x8Mls:
case kArm64I16x8MinS:
case kArm64I16x8MaxS:
case kArm64I16x8Eq:
case kArm64I16x8Ne:
case kArm64I16x8GtS:
case kArm64I16x8GeS:
case kArm64I16x8ShrU:
case kArm64I16x8UConvertI32x4:
case kArm64I16x8AddSatU:
case kArm64I16x8SubSatU:
case kArm64I16x8MinU:
case kArm64I16x8MaxU:
case kArm64I16x8GtU:
case kArm64I16x8GeU:
case kArm64I16x8RoundingAverageU:
case kArm64IAddSatU:
case kArm64ISubSatU:
case kArm64I16x8Q15MulRSatS:
case kArm64I16x8Abs:
case kArm64I16x8BitMask:
case kArm64I8x16Splat:
case kArm64I8x16ExtractLaneU:
case kArm64I8x16ExtractLaneS:
case kArm64I8x16ReplaceLane:
case kArm64I8x16Neg:
case kArm64I8x16Shl:
case kArm64I8x16ShrS:
case kArm64I8x16SConvertI16x8:
case kArm64I8x16Add:
case kArm64I8x16AddSatS:
case kArm64I8x16Sub:
case kArm64I8x16SubSatS:
case kArm64I8x16Mla:
case kArm64I8x16Mls:
case kArm64I8x16MinS:
case kArm64I8x16MaxS:
case kArm64I8x16Eq:
case kArm64I8x16Ne:
case kArm64I8x16GtS:
case kArm64I8x16GeS:
case kArm64I8x16UConvertI16x8:
case kArm64I8x16AddSatU:
case kArm64I8x16SubSatU:
case kArm64I8x16ShrU:
case kArm64I8x16MinU:
case kArm64I8x16MaxU:
case kArm64I8x16GtU:
case kArm64I8x16GeU:
case kArm64I8x16RoundingAverageU:
case kArm64I8x16Abs:
case kArm64I8x16BitMask:
case kArm64S128Const:
case kArm64S128Zero:
......
......@@ -2163,6 +2163,7 @@ struct SIMDMulDPInst {
ArchOpcode multiply_add_arch_opcode;
ArchOpcode multiply_sub_arch_opcode;
MachineType machine_type;
const int lane_size;
};
std::ostream& operator<<(std::ostream& os, const SIMDMulDPInst& inst) {
......@@ -2174,10 +2175,10 @@ std::ostream& operator<<(std::ostream& os, const SIMDMulDPInst& inst) {
static const SIMDMulDPInst kSIMDMulDPInstructions[] = {
{"I32x4Mul", &MachineOperatorBuilder::I32x4Mul,
&MachineOperatorBuilder::I32x4Add, &MachineOperatorBuilder::I32x4Sub,
kArm64I32x4Mla, kArm64I32x4Mls, MachineType::Simd128()},
kArm64Mla, kArm64Mls, MachineType::Simd128(), 32},
{"I16x8Mul", &MachineOperatorBuilder::I16x8Mul,
&MachineOperatorBuilder::I16x8Add, &MachineOperatorBuilder::I16x8Sub,
kArm64I16x8Mla, kArm64I16x8Mls, MachineType::Simd128()}};
kArm64Mla, kArm64Mls, MachineType::Simd128(), 16}};
using InstructionSelectorSIMDDPWithSIMDMulTest =
InstructionSelectorTestWithParam<SIMDMulDPInst>;
......@@ -2193,6 +2194,7 @@ TEST_P(InstructionSelectorSIMDDPWithSIMDMulTest, AddWithMul) {
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(mdpi.multiply_add_arch_opcode, s[0]->arch_opcode());
EXPECT_EQ(mdpi.lane_size, LaneSizeField::decode(s[0]->opcode()));
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
......@@ -2204,6 +2206,7 @@ TEST_P(InstructionSelectorSIMDDPWithSIMDMulTest, AddWithMul) {
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(mdpi.multiply_add_arch_opcode, s[0]->arch_opcode());
EXPECT_EQ(mdpi.lane_size, LaneSizeField::decode(s[0]->opcode()));
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
......@@ -2220,6 +2223,7 @@ TEST_P(InstructionSelectorSIMDDPWithSIMDMulTest, SubWithMul) {
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(mdpi.multiply_sub_arch_opcode, s[0]->arch_opcode());
EXPECT_EQ(mdpi.lane_size, LaneSizeField::decode(s[0]->opcode()));
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
......@@ -2386,7 +2390,8 @@ TEST_P(InstructionSelectorSimdF32x4MulWithDupTest, MulWithDup) {
m.Return(m.AddNode(m.machine()->F32x4Mul(), m.Parameter(2), shuffle));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64F32x4MulElement, s[0]->arch_opcode());
EXPECT_EQ(kArm64FMulElement, s[0]->arch_opcode());
EXPECT_EQ(32, LaneSizeField::decode(s[0]->opcode()));
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(param.lane, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
......@@ -2402,7 +2407,8 @@ TEST_P(InstructionSelectorSimdF32x4MulWithDupTest, MulWithDup) {
m.Return(m.AddNode(m.machine()->F32x4Mul(), shuffle, m.Parameter(2)));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64F32x4MulElement, s[0]->arch_opcode());
EXPECT_EQ(kArm64FMulElement, s[0]->arch_opcode());
EXPECT_EQ(32, LaneSizeField::decode(s[0]->opcode()));
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(param.lane, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
......@@ -2429,7 +2435,8 @@ TEST_F(InstructionSelectorTest, SimdF32x4MulWithDupNegativeTest) {
// The shuffle is a i8x16.dup of lane 0.
EXPECT_EQ(kArm64S128Dup, s[0]->arch_opcode());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(kArm64F32x4Mul, s[1]->arch_opcode());
EXPECT_EQ(kArm64FMul, s[1]->arch_opcode());
EXPECT_EQ(32, LaneSizeField::decode(s[1]->opcode()));
EXPECT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(2U, s[1]->InputCount());
EXPECT_EQ(1U, s[1]->OutputCount());
......@@ -2472,7 +2479,8 @@ TEST_P(InstructionSelectorSimdF64x2MulWithDupTest, MulWithDup) {
m.Return(m.AddNode(m.machine()->F64x2Mul(), m.Parameter(2), shuffle));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64F64x2MulElement, s[0]->arch_opcode());
EXPECT_EQ(kArm64FMulElement, s[0]->arch_opcode());
EXPECT_EQ(64, LaneSizeField::decode(s[0]->opcode()));
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(param.lane, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
......@@ -2488,7 +2496,8 @@ TEST_P(InstructionSelectorSimdF64x2MulWithDupTest, MulWithDup) {
m.Return(m.AddNode(m.machine()->F64x2Mul(), shuffle, m.Parameter(2)));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64F64x2MulElement, s[0]->arch_opcode());
EXPECT_EQ(kArm64FMulElement, s[0]->arch_opcode());
EXPECT_EQ(64, LaneSizeField::decode(s[0]->opcode()));
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(param.lane, s.ToInt32(s[0]->InputAt(2)));
EXPECT_EQ(1U, s[0]->OutputCount());
......@@ -2515,7 +2524,8 @@ TEST_F(InstructionSelectorTest, SimdF64x2MulWithDupNegativeTest) {
// The shuffle is a i8x16.dup of lane 0.
EXPECT_EQ(kArm64S128Dup, s[0]->arch_opcode());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(kArm64F64x2Mul, s[1]->arch_opcode());
EXPECT_EQ(kArm64FMul, s[1]->arch_opcode());
EXPECT_EQ(64, LaneSizeField::decode(s[1]->opcode()));
EXPECT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(2U, s[1]->InputCount());
EXPECT_EQ(1U, s[1]->OutputCount());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment