Commit a7b9e588 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement i64x2 neg for arm

Bug: v8:9813
Change-Id: I75ca39612f0420548a56cc32edaa13a36a9713e9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1900661Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65041}
parent e533b669
......@@ -2621,6 +2621,38 @@ static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) {
*hi = i >> 32;
}
// This checks if imm can be encoded into an immediate for vmov.
// See Table A7-15 in ARM DDI 0406C.d.
// Currently only supports the first row of the table.
static bool FitsVmovImm64(uint64_t imm, uint32_t* encoding) {
uint32_t lo = imm & 0xFFFFFFFF;
uint32_t hi = imm >> 32;
if (lo == hi && ((lo & 0xffffff00) == 0)) {
*encoding = ((lo & 0x80) << (24 - 7)); // a
*encoding |= ((lo & 0x70) << (16 - 4)); // bcd
*encoding |= (lo & 0x0f); // efgh
return true;
}
return false;
}
void Assembler::vmov(const QwNeonRegister dst, uint64_t imm) {
uint32_t enc;
if (CpuFeatures::IsSupported(VFPv3) && FitsVmovImm64(imm, &enc)) {
CpuFeatureScope scope(this, VFPv3);
// Instruction details available in ARM DDI 0406C.b, A8-937.
// 001i1(27-23) | D(22) | 000(21-19) | imm3(18-16) | Vd(15-12) | cmode(11-8)
// | 0(7) | Q(6) | op(5) | 4(1) | imm4(3-0)
int vd, d;
dst.split_code(&vd, &d);
emit(kSpecialCondition | 0x05 * B23 | d * B22 | vd * B12 | 0x1 * B6 |
0x1 * B4 | enc);
} else {
UNIMPLEMENTED();
}
}
// Only works for little endian floating point formats.
// We don't support VFP on the mixed endian floating point platform.
static bool FitsVmovFPImmediate(Double d, uint32_t* encoding) {
......
......@@ -850,6 +850,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmov(NeonDataType dt, DwVfpRegister dst, int index, Register src);
void vmov(NeonDataType dt, Register dst, DwVfpRegister src, int index);
void vmov(QwNeonRegister dst, uint64_t imm);
void vmov(QwNeonRegister dst, QwNeonRegister src);
void vdup(NeonSize size, QwNeonRegister dst, Register src);
void vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src, int index);
......
......@@ -295,15 +295,16 @@ enum LFlag {
// Neon sizes.
enum NeonSize { Neon8 = 0x0, Neon16 = 0x1, Neon32 = 0x2, Neon64 = 0x3 };
// NEON data type
// NEON data type, top bit set for unsigned data types.
enum NeonDataType {
NeonS8 = 0,
NeonS16 = 1,
NeonS32 = 2,
// Gap to make it easier to extract U and size.
NeonS64 = 3,
NeonU8 = 4,
NeonU16 = 5,
NeonU32 = 6
NeonU32 = 6,
NeonU64 = 7
};
inline int NeonU(NeonDataType dt) { return static_cast<int>(dt) >> 2; }
......
......@@ -1923,6 +1923,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1);
break;
}
case kArmI64x2Neg: {
Simd128Register dst = i.OutputSimd128Register();
__ vmov(dst, static_cast<uint64_t>(0));
__ vqsub(NeonS64, dst, dst, i.InputSimd128Register(0));
break;
}
case kArmF32x4Splat: {
int src_code = i.InputFloatRegister(0).code();
__ vdup(Neon32, i.OutputSimd128Register(),
......
......@@ -144,6 +144,7 @@ namespace compiler {
V(ArmF64x2Le) \
V(ArmI64x2SplatI32Pair) \
V(ArmI64x2ReplaceLaneI32Pair) \
V(ArmI64x2Neg) \
V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \
......
......@@ -124,6 +124,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF64x2Le:
case kArmI64x2SplatI32Pair:
case kArmI64x2ReplaceLaneI32Pair:
case kArmI64x2Neg:
case kArmF32x4Splat:
case kArmF32x4ExtractLane:
case kArmF32x4ReplaceLane:
......
......@@ -2588,6 +2588,12 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
low, high);
}
void InstructionSelector::VisitI64x2Neg(Node* node) {
ArmOperandGenerator g(this);
Emit(kArmI64x2Neg, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)));
}
void InstructionSelector::VisitF32x4Sqrt(Node* node) {
ArmOperandGenerator g(this);
// Use fixed registers in the lower 8 Q-registers so we can directly access
......
......@@ -2625,7 +2625,6 @@ void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
#if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Max(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI64x2Add(Node* node) { UNIMPLEMENTED(); }
......
......@@ -1991,8 +1991,27 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
if (instr->Bit(23) == 1 && instr->Bits(21, 19) == 0 &&
instr->Bit(7) == 0 && instr->Bit(4) == 1) {
// One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6.
byte cmode = instr->Bits(11, 8);
switch (cmode) {
case 0: {
int vd = instr->VFPDRegValue(kSimd128Precision);
int a = instr->Bit(24);
int bcd = instr->Bits(18, 16);
int efgh = instr->Bits(3, 0);
int imm64 = a << 7 | bcd << 4 | efgh;
out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_,
"vmov.i32 q%d, %d", vd, imm64);
break;
}
default:
Unknown(instr);
}
} else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed
if ((instr->VdValue() & 1) != 0) Unknown(instr);
int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
......
......@@ -3931,7 +3931,7 @@ void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
simulator->get_neon_register(Vn, src1);
simulator->get_neon_register(Vm, src2);
for (int i = 0; i < kLanes; i++) {
src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));
src1[i] = SaturateSub<T>(src1[i], src2[i]);
}
simulator->set_neon_register(Vd, src1);
}
......@@ -4294,6 +4294,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
case Neon32:
SubSaturate<int32_t>(this, Vd, Vm, Vn);
break;
case Neon64:
SubSaturate<int64_t>(this, Vd, Vm, Vn);
break;
default:
UNREACHABLE();
break;
......@@ -4535,8 +4538,28 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
break;
}
case 5:
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
if (instr->Bit(23) == 1 && instr->Bits(21, 19) == 0 &&
instr->Bit(7) == 0 && instr->Bit(4) == 1) {
// One register and a modified immediate value, see ARM DDI 0406C.d
// A7.4.6. Handles vmov, vorr, vmvn, vbic.
// Only handle vmov.i32 for now.
byte cmode = instr->Bits(11, 8);
switch (cmode) {
case 0: {
// vmov.i32 Qd, #<imm>
int vd = instr->VFPDRegValue(kSimd128Precision);
uint64_t imm = instr->Bit(24, 24) << 7; // i
imm |= instr->Bits(18, 16) << 4; // imm3
imm |= instr->Bits(3, 0); // imm4
imm |= imm << 32;
set_neon_register(vd, {imm, imm});
break;
}
default:
UNIMPLEMENTED();
}
} else if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
(instr->Bit(4) == 1)) {
// vmovl signed
if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
int Vd = instr->VFPDRegValue(kSimd128Precision);
......
......@@ -1038,6 +1038,10 @@ TEST(Neon) {
"f22e01fe vmov q0, q15");
COMPARE(vmov(q8, q9),
"f26201f2 vmov q8, q9");
COMPARE(vmov(q1, 0),
"f2802050 vmov.i32 q1, 0");
COMPARE(vmov(q1, 0x0000001200000012),
"f2812052 vmov.i32 q1, 18");
COMPARE(vmvn(q0, q15),
"f3b005ee vmvn q0, q15");
COMPARE(vmvn(q8, q9),
......
......@@ -964,7 +964,6 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2ReplaceLane) {
}
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
void RunI64x2UnOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64UnOp expected_op) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
......@@ -991,6 +990,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Neg) {
base::NegateWithWraparound);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
void RunI64x2ShiftOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, Int64ShiftOp expected_op) {
// Intentionally shift by 64, should be no-op.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment