Commit e14de8b9 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][arm] Implement saturating rounding multiply high

Removing ifdef guards since our 4 supported architectures
implement this now.

Bug: v8:10971
Change-Id: Ic0295b1492a6316df61340a38f3e6d06d8fe64ed
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2620900
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72023}
parent d6c4c884
...@@ -4308,7 +4308,8 @@ enum IntegerBinOp { ...@@ -4308,7 +4308,8 @@ enum IntegerBinOp {
VCEQ, VCEQ,
VCGE, VCGE,
VCGT, VCGT,
VRHADD VRHADD,
VQRDMULH
}; };
static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt, static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
...@@ -4352,6 +4353,9 @@ static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt, ...@@ -4352,6 +4353,9 @@ static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
case VRHADD: case VRHADD:
op_encoding = B8; op_encoding = B8;
break; break;
case VQRDMULH:
op_encoding = B24 | 0xB * B8;
break;
default: default:
UNREACHABLE(); UNREACHABLE();
} }
...@@ -4943,6 +4947,13 @@ void Assembler::vpaddl(NeonDataType dt, QwNeonRegister dst, ...@@ -4943,6 +4947,13 @@ void Assembler::vpaddl(NeonDataType dt, QwNeonRegister dst,
NeonDataTypeToSize(dt), dst.code(), src.code())); NeonDataTypeToSize(dt), dst.code(), src.code()));
} }
void Assembler::vqrdmulh(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
DCHECK(dt == NeonS16 || dt == NeonS32);
emit(EncodeNeonBinOp(VQRDMULH, dt, dst, src1, src2));
}
void Assembler::vcnt(QwNeonRegister dst, QwNeonRegister src) { void Assembler::vcnt(QwNeonRegister dst, QwNeonRegister src) {
// Qd = vcnt(Qm) SIMD Vector Count Set Bits. // Qd = vcnt(Qm) SIMD Vector Count Set Bits.
// Instruction details available at ARM DDI 0487F.b, F6-5094. // Instruction details available at ARM DDI 0487F.b, F6-5094.
......
...@@ -917,6 +917,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -917,6 +917,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
DwVfpRegister src2); DwVfpRegister src2);
void vpaddl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src); void vpaddl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src);
void vqrdmulh(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
// ARMv8 rounding instructions (NEON). // ARMv8 rounding instructions (NEON).
void vrintm(NeonDataType dt, const QwNeonRegister dst, void vrintm(NeonDataType dt, const QwNeonRegister dst,
......
...@@ -2711,6 +2711,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2711,6 +2711,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmov(NeonU16, dst, tmp2.low(), 0); __ vmov(NeonU16, dst, tmp2.low(), 0);
break; break;
} }
case kArmI16x8Q15MulRSatS: {
__ vqrdmulh(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16Splat: { case kArmI8x16Splat: {
__ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0)); __ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
break; break;
......
...@@ -253,6 +253,7 @@ namespace compiler { ...@@ -253,6 +253,7 @@ namespace compiler {
V(ArmI16x8RoundingAverageU) \ V(ArmI16x8RoundingAverageU) \
V(ArmI16x8Abs) \ V(ArmI16x8Abs) \
V(ArmI16x8BitMask) \ V(ArmI16x8BitMask) \
V(ArmI16x8Q15MulRSatS) \
V(ArmI8x16Splat) \ V(ArmI8x16Splat) \
V(ArmI8x16ExtractLaneS) \ V(ArmI8x16ExtractLaneS) \
V(ArmI8x16ReplaceLane) \ V(ArmI8x16ReplaceLane) \
......
...@@ -233,6 +233,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -233,6 +233,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI16x8RoundingAverageU: case kArmI16x8RoundingAverageU:
case kArmI16x8Abs: case kArmI16x8Abs:
case kArmI16x8BitMask: case kArmI16x8BitMask:
case kArmI16x8Q15MulRSatS:
case kArmI8x16Splat: case kArmI8x16Splat:
case kArmI8x16ExtractLaneS: case kArmI8x16ExtractLaneS:
case kArmI8x16ReplaceLane: case kArmI8x16ReplaceLane:
......
...@@ -2703,6 +2703,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2703,6 +2703,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8GtU, kArmI16x8GtU) \ V(I16x8GtU, kArmI16x8GtU) \
V(I16x8GeU, kArmI16x8GeU) \ V(I16x8GeU, kArmI16x8GeU) \
V(I16x8RoundingAverageU, kArmI16x8RoundingAverageU) \ V(I16x8RoundingAverageU, kArmI16x8RoundingAverageU) \
V(I16x8Q15MulRSatS, kArmI16x8Q15MulRSatS) \
V(I8x16SConvertI16x8, kArmI8x16SConvertI16x8) \ V(I8x16SConvertI16x8, kArmI8x16SConvertI16x8) \
V(I8x16Add, kArmI8x16Add) \ V(I8x16Add, kArmI8x16Add) \
V(I8x16AddSatS, kArmI8x16AddSatS) \ V(I8x16AddSatS, kArmI8x16AddSatS) \
......
...@@ -2748,11 +2748,6 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); } ...@@ -2748,11 +2748,6 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM // && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_MIPS // && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_MIPS
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// TODO(v8:10972) Prototype i64x2 widen i32x4. // TODO(v8:10972) Prototype i64x2 widen i32x4.
void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) { void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
......
...@@ -2077,6 +2077,8 @@ void Decoder::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -2077,6 +2077,8 @@ void Decoder::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
Format(instr, "vpmin.s'size3 'Dd, 'Dn, 'Dm"); Format(instr, "vpmin.s'size3 'Dd, 'Dn, 'Dm");
} else if (!u && opc == 0xA && !op1) { } else if (!u && opc == 0xA && !op1) {
Format(instr, "vpmax.s'size3 'Dd, 'Dn, 'Dm"); Format(instr, "vpmax.s'size3 'Dd, 'Dn, 'Dm");
} else if (u && opc == 0XB) {
Format(instr, "vqrdmulh.s'size3 'Qd, 'Qn, 'Qm");
} else if (!u && opc == 0xB) { } else if (!u && opc == 0xB) {
Format(instr, "vpadd.i'size3 'Dd, 'Dn, 'Dm"); Format(instr, "vpadd.i'size3 'Dd, 'Dn, 'Dm");
} else if (!u && !(sz >> 1) && opc == 0xD && !op1) { } else if (!u && !(sz >> 1) && opc == 0xD && !op1) {
......
...@@ -5485,6 +5485,15 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) { ...@@ -5485,6 +5485,15 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
} }
} }
set_neon_register(Vd, dst); set_neon_register(Vd, dst);
} else if (u && opc == 0xB) {
// vqrdmulh.<dt> Qd, Qm, Qn
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
if (size == Neon16) {
Binop<int16_t>(this, Vd, Vm, Vn, SaturateRoundingQMul<int16_t>);
} else {
DCHECK_EQ(Neon32, size);
Binop<int32_t>(this, Vd, Vm, Vn, SaturateRoundingQMul<int32_t>);
}
} else { } else {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
......
...@@ -1226,6 +1226,11 @@ TEST(Neon) { ...@@ -1226,6 +1226,11 @@ TEST(Neon) {
COMPARE(vmul(Neon32, q15, q0, q8), COMPARE(vmul(Neon32, q15, q0, q8),
"f260e970 vmul.i32 q15, q0, q8"); "f260e970 vmul.i32 q15, q0, q8");
COMPARE(vqrdmulh(NeonS16, q0, q1, q8),
"f3120b60 vqrdmulh.s16 q0, q1, q8");
COMPARE(vqrdmulh(NeonS32, q15, q0, q8),
"f360eb60 vqrdmulh.s32 q15, q0, q8");
COMPARE(vmull(NeonU8, q15, d0, d8), COMPARE(vmull(NeonU8, q15, d0, d8),
"f3c0ec08 vmull.u8 q15, d0, d8"); "f3c0ec08 vmull.u8 q15, d0, d8");
COMPARE(vmull(NeonS16, q15, d0, d8), COMPARE(vmull(NeonS16, q15, d0, d8),
......
...@@ -2328,14 +2328,11 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) { ...@@ -2328,14 +2328,11 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
base::RoundingAverageUnsigned); base::RoundingAverageUnsigned);
} }
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) { WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS, RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS,
SaturateRoundingQMul<int16_t>); SaturateRoundingQMul<int16_t>);
} }
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
namespace { namespace {
enum class MulHalf { kLow, kHigh }; enum class MulHalf { kLow, kHigh };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment