Commit 5ce5f429 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][arm] Prototype extended multiply

Bug: v8:11008
Change-Id: Ic7be8370e3e820d225558995a9ad2295811e98a4
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2567531Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71629}
parent 996aadbd
......@@ -1862,6 +1862,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ and_(i.OutputRegister(0), i.InputRegister(0),
Operand(kSpeculationPoisonRegister));
break;
case kArmVmullLow: {
auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
__ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).low(),
i.InputSimd128Register(1).low());
break;
}
case kArmVmullHigh: {
auto dt = static_cast<NeonDataType>(MiscField::decode(instr->opcode()));
__ vmull(dt, i.OutputSimd128Register(), i.InputSimd128Register(0).high(),
i.InputSimd128Register(1).high());
break;
}
case kArmF64x2Splat: {
Simd128Register dst = i.OutputSimd128Register();
DoubleRegister src = i.InputDoubleRegister(0);
......
......@@ -75,6 +75,8 @@ namespace compiler {
V(ArmVabsF64) \
V(ArmVnegF64) \
V(ArmVsqrtF64) \
V(ArmVmullLow) \
V(ArmVmullHigh) \
V(ArmVrintmF32) \
V(ArmVrintmF64) \
V(ArmVrintpF32) \
......
......@@ -76,6 +76,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmVabsF64:
case kArmVnegF64:
case kArmVsqrtF64:
case kArmVmullLow:
case kArmVmullHigh:
case kArmVrintmF32:
case kArmVrintmF64:
case kArmVrintpF32:
......
......@@ -86,7 +86,8 @@ void VisitRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
g.UseRegister(node->InputAt(0)));
}
void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
void VisitRRR(InstructionSelector* selector, InstructionCode opcode,
Node* node) {
ArmOperandGenerator g(selector);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)),
......@@ -3096,6 +3097,30 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node);
}
#define EXT_MUL_LIST(V) \
V(I16x8ExtMulLowI8x16S, kArmVmullLow, NeonS8) \
V(I16x8ExtMulHighI8x16S, kArmVmullHigh, NeonS8) \
V(I16x8ExtMulLowI8x16U, kArmVmullLow, NeonU8) \
V(I16x8ExtMulHighI8x16U, kArmVmullHigh, NeonU8) \
V(I32x4ExtMulLowI16x8S, kArmVmullLow, NeonS16) \
V(I32x4ExtMulHighI16x8S, kArmVmullHigh, NeonS16) \
V(I32x4ExtMulLowI16x8U, kArmVmullLow, NeonU16) \
V(I32x4ExtMulHighI16x8U, kArmVmullHigh, NeonU16) \
V(I64x2ExtMulLowI32x4S, kArmVmullLow, NeonS32) \
V(I64x2ExtMulHighI32x4S, kArmVmullHigh, NeonS32) \
V(I64x2ExtMulLowI32x4U, kArmVmullLow, NeonU32) \
V(I64x2ExtMulHighI32x4U, kArmVmullHigh, NeonU32)
#define VISIT_EXT_MUL(OPCODE, VMULL, NEONSIZE) \
void InstructionSelector::Visit##OPCODE(Node* node) { \
VisitRRR(this, VMULL | MiscField::encode(NEONSIZE), node); \
}
EXT_MUL_LIST(VISIT_EXT_MUL)
#undef VISIT_EXT_MUL
#undef EXT_MUL_LIST
void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
ArmOperandGenerator g(this);
......
......@@ -2737,30 +2737,7 @@ void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_ARM64
// && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM64
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10972) Prototype i64x2 widen i32x4.
void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2SConvertI32x4High(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2UConvertI32x4Low(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
UNIMPLEMENTED();
}
// TODO(v8:11002) Prototype i8x16.popcnt.
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
// TODO(v8:11008) Prototype extended multiplication.
void InstructionSelector::VisitI64x2ExtMulLowI32x4S(Node* node) {
UNIMPLEMENTED();
......@@ -2798,6 +2775,31 @@ void InstructionSelector::VisitI16x8ExtMulLowI8x16U(Node* node) {
void InstructionSelector::VisitI16x8ExtMulHighI8x16U(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM64
// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:10972) Prototype i64x2 widen i32x4.
void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2SConvertI32x4High(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2UConvertI32x4Low(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
UNIMPLEMENTED();
}
// TODO(v8:11002) Prototype i8x16.popcnt.
void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
// TODO(v8:11086) Prototype extended pairwise add.
void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) {
......
......@@ -4842,9 +4842,9 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
// Advanced SIMD three registers of different lengths.
int u = instr->Bit(24);
int opc = instr->Bits(11, 8);
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
if (opc == 0b1000) {
// vmlal.u<size> Qd, Dn, Dm
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
if (size != Neon32) UNIMPLEMENTED();
int Vd = instr->VFPDRegValue(kSimd128Precision);
......@@ -4859,33 +4859,46 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
dst[1] += (src1 >> 32) * (src2 >> 32);
set_neon_register<uint64_t>(Vd, dst);
} else if (opc == 0b1100) {
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
if (u) {
// vmull.u<size> Qd, Dn, Dm
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
switch (size) {
case Neon8: {
MultiplyLong<uint8_t, uint16_t>(this, Vd, Vn, Vm);
break;
}
case Neon16: {
MultiplyLong<uint16_t, uint32_t>(this, Vd, Vn, Vm);
break;
}
case Neon32: {
MultiplyLong<uint32_t, uint64_t>(this, Vd, Vn, Vm);
break;
}
default:
case Neon64: {
UNIMPLEMENTED();
}
}
} else {
// vmull.s<size> Qd, Dn, Dm
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vn = instr->VFPNRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kDoublePrecision);
switch (size) {
case Neon8: {
MultiplyLong<int8_t, int16_t>(this, Vd, Vn, Vm);
break;
}
case Neon16: {
MultiplyLong<int16_t, int32_t>(this, Vd, Vn, Vm);
break;
}
default:
case Neon32: {
MultiplyLong<int32_t, int64_t>(this, Vd, Vn, Vm);
break;
}
case Neon64: {
UNIMPLEMENTED();
}
}
}
}
......
......@@ -1208,6 +1208,10 @@ TEST(Neon) {
COMPARE(vmul(Neon32, q15, q0, q8),
"f260e970 vmul.i32 q15, q0, q8");
COMPARE(vmull(NeonU8, q15, d0, d8),
"f3c0ec08 vmull.u8 q15, d0, d8");
COMPARE(vmull(NeonS16, q15, d0, d8),
"f2d0ec08 vmull.s16 q15, d0, d8");
COMPARE(vmull(NeonU32, q15, d0, d8),
"f3e0ec08 vmull.u32 q15, d0, d8");
COMPARE(vmlal(NeonU32, q15, d0, d8),
......
......@@ -2329,7 +2329,9 @@ WASM_SIMD_TEST_NO_LOWERING(I16x8Q15MulRSatS) {
RunI16x8BinOpTest<int16_t>(execution_tier, lower_simd, kExprI16x8Q15MulRSatS,
SaturateRoundingQMul<int16_t>);
}
#endif // V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
// TODO(v8:11008) Prototype extended multiplication.
namespace {
enum class MulHalf { kLow, kHigh };
......@@ -2441,7 +2443,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ExtMulHighI32x4U) {
kExprI64x2ExtMulHighI32x4U, MultiplyLong,
kExprI32x4Splat, MulHalf::kHigh);
}
#endif // V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
WASM_SIMD_TEST(I32x4DotI16x8S) {
WasmRunner<int32_t, int16_t, int16_t> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment