Commit e14de8b9 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][arm] Implement saturating rounding multiply high

Removing ifdef guards since our 4 supported architectures
implement this now.

Bug: v8:10971
Change-Id: Ic0295b1492a6316df61340a38f3e6d06d8fe64ed
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2620900
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72023}
parent d6c4c884
......@@ -4308,7 +4308,8 @@ enum IntegerBinOp {
VCEQ,
VCGE,
VCGT,
VRHADD
VRHADD,
VQRDMULH
};
static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
......@@ -4352,6 +4353,9 @@ static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
case VRHADD:
op_encoding = B8;
break;
case VQRDMULH:
op_encoding = B24 | 0xB * B8;
break;
default:
UNREACHABLE();
}
......@@ -4943,6 +4947,13 @@ void Assembler::vpaddl(NeonDataType dt, QwNeonRegister dst,
NeonDataTypeToSize(dt), dst.code(), src.code()));
}
void Assembler::vqrdmulh(NeonDataType dt, QwNeonRegister dst,
QwNeonRegister src1, QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
DCHECK(dt == NeonS16 || dt == NeonS32);
emit(EncodeNeonBinOp(VQRDMULH, dt, dst, src1, src2));
}
void Assembler::vcnt(QwNeonRegister dst, QwNeonRegister src) {
// Qd = vcnt(Qm) SIMD Vector Count Set Bits.
// Instruction details available at ARM DDI 0487F.b, F6-5094.
......
......@@ -917,6 +917,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
DwVfpRegister src2);
void vpaddl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src);
void vqrdmulh(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
// ARMv8 rounding instructions (NEON).
void vrintm(NeonDataType dt, const QwNeonRegister dst,
......
......@@ -2711,6 +2711,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmov(NeonU16, dst, tmp2.low(), 0);
break;
}
case kArmI16x8Q15MulRSatS: {
__ vqrdmulh(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
break;
}
case kArmI8x16Splat: {
__ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
break;
......
......@@ -253,6 +253,7 @@ namespace compiler {
V(ArmI16x8RoundingAverageU) \
V(ArmI16x8Abs) \
V(ArmI16x8BitMask) \
V(ArmI16x8Q15MulRSatS) \
V(ArmI8x16Splat) \
V(ArmI8x16ExtractLaneS) \
V(ArmI8x16ReplaceLane) \
......
......@@ -233,6 +233,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI16x8RoundingAverageU:
case kArmI16x8Abs:
case kArmI16x8BitMask:
case kArmI16x8Q15MulRSatS:
case kArmI8x16Splat:
case kArmI8x16ExtractLaneS:
case kArmI8x16ReplaceLane:
......
......@@ -2703,6 +2703,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I16x8GtU, kArmI16x8GtU) \
V(I16x8GeU, kArmI16x8GeU) \
V(I16x8RoundingAverageU, kArmI16x8RoundingAverageU) \
V(I16x8Q15MulRSatS, kArmI16x8Q15MulRSatS) \
V(I8x16SConvertI16x8, kArmI8x16SConvertI16x8) \
V(I8x16Add, kArmI8x16Add) \
V(I8x16AddSatS, kArmI8x16AddSatS) \
......
......@@ -2748,11 +2748,6 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_MIPS
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// TODO(v8:10972) Prototype i64x2 widen i32x4.
void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
......
......@@ -2077,6 +2077,8 @@ void Decoder::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
Format(instr, "vpmin.s'size3 'Dd, 'Dn, 'Dm");
} else if (!u && opc == 0xA && !op1) {
Format(instr, "vpmax.s'size3 'Dd, 'Dn, 'Dm");
} else if (u && opc == 0XB) {
Format(instr, "vqrdmulh.s'size3 'Qd, 'Qn, 'Qm");
} else if (!u && opc == 0xB) {
Format(instr, "vpadd.i'size3 'Dd, 'Dn, 'Dm");
} else if (!u && !(sz >> 1) && opc == 0xD && !op1) {
......
......@@ -5485,6 +5485,15 @@ void Simulator::DecodeAdvancedSIMDDataProcessing(Instruction* instr) {
}
}
set_neon_register(Vd, dst);
} else if (u && opc == 0xB) {
// vqrdmulh.<dt> Qd, Qm, Qn
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
if (size == Neon16) {
Binop<int16_t>(this, Vd, Vm, Vn, SaturateRoundingQMul<int16_t>);
} else {
DCHECK_EQ(Neon32, size);
Binop<int32_t>(this, Vd, Vm, Vn, SaturateRoundingQMul<int32_t>);
}
} else {
UNIMPLEMENTED();
}
......
......@@ -1226,6 +1226,11 @@ TEST(Neon) {
COMPARE(vmul(Neon32, q15, q0, q8),
"f260e970 vmul.i32 q15, q0, q8");
COMPARE(vqrdmulh(NeonS16, q0, q1, q8),
"f3120b60 vqrdmulh.s16 q0, q1, q8");
COMPARE(vqrdmulh(NeonS32, q15, q0, q8),
"f360eb60 vqrdmulh.s32 q15, q0, q8");
COMPARE(vmull(NeonU8, q15, d0, d8),
"f3c0ec08 vmull.u8 q15, d0, d8");
COMPARE(vmull(NeonS16, q15, d0, d8),
......
......@@ -2328,14 +2328,11 @@ WASM_SIMD_TEST(I16x8RoundingAverageU) {
base::RoundingAverageUnsigned);
}
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS,
SaturateRoundingQMul<int16_t>);
}
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
namespace {
enum class MulHalf { kLow, kHigh };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment