Commit a7b9e588 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement i64x2 neg for arm

Bug: v8:9813
Change-Id: I75ca39612f0420548a56cc32edaa13a36a9713e9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1900661Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65041}
parent e533b669
...@@ -2621,6 +2621,38 @@ static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) { ...@@ -2621,6 +2621,38 @@ static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) {
*hi = i >> 32; *hi = i >> 32;
} }
// This checks if imm can be encoded into an immediate for vmov.
// See Table A7-15 in ARM DDI 0406C.d.
// Currently only supports the first row of the table.
static bool FitsVmovImm64(uint64_t imm, uint32_t* encoding) {
uint32_t lo = imm & 0xFFFFFFFF;
uint32_t hi = imm >> 32;
if (lo == hi && ((lo & 0xffffff00) == 0)) {
*encoding = ((lo & 0x80) << (24 - 7)); // a
*encoding |= ((lo & 0x70) << (16 - 4)); // bcd
*encoding |= (lo & 0x0f); // efgh
return true;
}
return false;
}
void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) {
uint32_t enc;
if (CpuFeatures::IsSupported(VFPv3) && FitsVmovImm64(imm, &enc)) {
CpuFeatureScope scope(this, VFPv3);
// Instruction details available in ARM DDI 0406C.b, A8-937.
// 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
// | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0)
int vd, d;
dst.split_code(&vd, &d);
emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | 0x1 * B6 |
0x1 * B4 | enc);
} else {
UNIMPLEMENTED();
}
}
// Only works for little endian floating point formats. // Only works for little endian floating point formats.
// We don't support VFP on the mixed endian floating point platform. // We don't support VFP on the mixed endian floating point platform.
static bool FitsVmovFPImmediate(Double d, uint32_t* encoding) { static bool FitsVmovFPImmediate(Double d, uint32_t* encoding) {
......
...@@ -850,6 +850,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -850,6 +850,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmov(NeonDataType dt, DwVfpRegister dst, int index, Register src); void vmov(NeonDataType dt, DwVfpRegister dst, int index, Register src);
void vmov(NeonDataType dt, Register dst, DwVfpRegister src, int index); void vmov(NeonDataType dt, Register dst, DwVfpRegister src, int index);
void vmov(QwNeonRegister dst, uint64_t imm);
void vmov(QwNeonRegister dst, QwNeonRegister src); void vmov(QwNeonRegister dst, QwNeonRegister src);
void vdup(NeonSize size, QwNeonRegister dst, Register src); void vdup(NeonSize size, QwNeonRegister dst, Register src);
void vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src, int index); void vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src, int index);
......
...@@ -295,15 +295,16 @@ enum LFlag { ...@@ -295,15 +295,16 @@ enum LFlag {
// Neon sizes. // Neon sizes.
enum NeonSize { Neon8 = 0x0, Neon16 = 0x1, Neon32 = 0x2, Neon64 = 0x3 }; enum NeonSize { Neon8 = 0x0, Neon16 = 0x1, Neon32 = 0x2, Neon64 = 0x3 };
// NEON data type // NEON data type, top bit set for unsigned data types.
enum NeonDataType { enum NeonDataType {
NeonS8 = 0, NeonS8 = 0,
NeonS16 = 1, NeonS16 = 1,
NeonS32 = 2, NeonS32 = 2,
// Gap to make it easier to extract U and size. NeonS64 = 3,
NeonU8 = 4, NeonU8 = 4,
NeonU16 = 5, NeonU16 = 5,
NeonU32 = 6 NeonU32 = 6,
NeonU64 = 7
}; };
inline int NeonU(NeonDataType dt) { return static_cast<int>(dt) >> 2; } inline int NeonU(NeonDataType dt) { return static_cast<int>(dt) >> 2; }
......
...@@ -1923,6 +1923,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1923,6 +1923,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1); __ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1);
break; break;
} }
case kArmI64x2Neg: {
Simd128Register dst = i.OutputSimd128Register();
__ vmov(dst, static_cast<uint64_t>(0));
__ vqsub(NeonS64, dst, dst, i.InputSimd128Register(0));
break;
}
case kArmF32x4Splat: { case kArmF32x4Splat: {
int src_code = i.InputFloatRegister(0).code(); int src_code = i.InputFloatRegister(0).code();
__ vdup(Neon32, i.OutputSimd128Register(), __ vdup(Neon32, i.OutputSimd128Register(),
......
...@@ -144,6 +144,7 @@ namespace compiler { ...@@ -144,6 +144,7 @@ namespace compiler {
V(ArmF64x2Le) \ V(ArmF64x2Le) \
V(ArmI64x2SplatI32Pair) \ V(ArmI64x2SplatI32Pair) \
V(ArmI64x2ReplaceLaneI32Pair) \ V(ArmI64x2ReplaceLaneI32Pair) \
V(ArmI64x2Neg) \
V(ArmF32x4Splat) \ V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \ V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \ V(ArmF32x4ReplaceLane) \
......
...@@ -124,6 +124,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -124,6 +124,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF64x2Le: case kArmF64x2Le:
case kArmI64x2SplatI32Pair: case kArmI64x2SplatI32Pair:
case kArmI64x2ReplaceLaneI32Pair: case kArmI64x2ReplaceLaneI32Pair:
case kArmI64x2Neg:
case kArmF32x4Splat: case kArmF32x4Splat:
case kArmF32x4ExtractLane: case kArmF32x4ExtractLane:
case kArmF32x4ReplaceLane: case kArmF32x4ReplaceLane:
......
...@@ -2588,6 +2588,12 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) { ...@@ -2588,6 +2588,12 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
low, high); low, high);
} }
void InstructionSelector::VisitI64x2Neg(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmI64x2Neg, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)));
}
void InstructionSelector::VisitF32x4Sqrt(Node* node) { void InstructionSelector::VisitF32x4Sqrt(Node* node) {
ArmOperandGenerator g(this); ArmOperandGenerator g(this);
// Use fixed registers in the lower 8 Q-registers so we can directly access // Use fixed registers in the lower 8 Q-registers so we can directly access
......
...@@ -2625,7 +2625,6 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) { ...@@ -2625,7 +2625,6 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
#if !V8_TARGET_ARCH_IA32 #if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
......
...@@ -1991,8 +1991,27 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { ...@@ -1991,8 +1991,27 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
break; break;
} }
case 5: case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && if (instr->Bit(23) == 1 && instr->Bits(21, 19) == 0 &&
(instr->Bit(4) == 1)) { instr->Bit(7) == 0 && instr->Bit(4) == 1) {
// One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6.
byte cmode = instr->Bits(11, 8);
switch (cmode) {
case 0: {
int vd = instr->VFPDRegValue(kSimd128Precision);
int a = instr->Bit(24);
int bcd = instr->Bits(18, 16);
int efgh = instr->Bits(3, 0);
int imm64 = a << 7 | bcd << 4 | efgh;
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmov.i32 q%d, %d", vd, imm64);
break;
}
default:
Unknown(instr);
}
} else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed // vmovl signed
if ((instr->VdValue() & 1) != 0) Unknown(instr); if ((instr->VdValue() & 1) != 0) Unknown(instr);
int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
......
...@@ -3931,7 +3931,7 @@ void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { ...@@ -3931,7 +3931,7 @@ void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
simulator->get_neon_register(Vn, src1); simulator->get_neon_register(Vn, src1);
simulator->get_neon_register(Vm, src2); simulator->get_neon_register(Vm, src2);
for (int i = 0; i < kLanes; i++) { for (int i = 0; i < kLanes; i++) {
src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); src1[i] = SaturateSub<T>(src1[i], src2[i]);
} }
simulator->set_neon_register(Vd, src1); simulator->set_neon_register(Vd, src1);
} }
...@@ -4294,6 +4294,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4294,6 +4294,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
case Neon32: case Neon32:
SubSaturate<int32_t>(this, Vd, Vm, Vn); SubSaturate<int32_t>(this, Vd, Vm, Vn);
break; break;
case Neon64:
SubSaturate<int64_t>(this, Vd, Vm, Vn);
break;
default: default:
UNREACHABLE(); UNREACHABLE();
break; break;
...@@ -4535,8 +4538,28 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { ...@@ -4535,8 +4538,28 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
break; break;
} }
case 5: case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && if (instr->Bit(23) == 1 && instr->Bits(21, 19) == 0 &&
(instr->Bit(4) == 1)) { instr->Bit(7) == 0 && instr->Bit(4) == 1) {
// One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6. Handles vmov, vorr, vmvn, vbic.
// Only handle vmov.i32 for now.
byte cmode = instr->Bits(11, 8);
switch (cmode) {
case 0: {
// vmov.i32 Qd, #<imm>
int vd = instr->VFPDRegValue(kSimd128Precision);
uint64_t imm = instr->Bit(24, 24) << 7; // i
imm |= instr->Bits(18, 16) << 4; // imm3
imm |= instr->Bits(3, 0); // imm4
imm |= imm << 32;
set_neon_register(vd, {imm, imm});
break;
}
default:
UNIMPLEMENTED();
}
} else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed // vmovl signed
if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
int Vd = instr->VFPDRegValue(kSimd128Precision); int Vd = instr->VFPDRegValue(kSimd128Precision);
......
...@@ -1038,6 +1038,10 @@ TEST(Neon) { ...@@ -1038,6 +1038,10 @@ TEST(Neon) {
"f22e01fe vmov q0, q15"); "f22e01fe vmov q0, q15");
COMPARE(vmov(q8, q9), COMPARE(vmov(q8, q9),
"f26201f2 vmov q8, q9"); "f26201f2 vmov q8, q9");
COMPARE(vmov(q1, 0),
"f2802050 vmov.i32 q1, 0");
COMPARE(vmov(q1, 0x0000001200000012),
"f2812052 vmov.i32 q1, 18");
COMPARE(vmvn(q0, q15), COMPARE(vmvn(q0, q15),
"f3b005ee vmvn q0, q15"); "f3b005ee vmvn q0, q15");
COMPARE(vmvn(q8, q9), COMPARE(vmvn(q8, q9),
......
...@@ -964,7 +964,6 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ReplaceLane) { ...@@ -964,7 +964,6 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ReplaceLane) {
} }
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
void RunI64x2UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunI64x2UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64UnOp expected_op) { WasmOpcode opcode, Int64UnOp expected_op) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd); WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
...@@ -991,6 +990,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Neg) { ...@@ -991,6 +990,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Neg) {
base::NegateWithWraparound); base::NegateWithWraparound);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
void RunI64x2ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunI64x2ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64ShiftOp expected_op) { WasmOpcode opcode, Int64ShiftOp expected_op) {
// Intentionally shift by 64, should be no-op. // Intentionally shift by 64, should be no-op.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment