Commit 257c303f authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm] Implement i64x2.ne and i64x2 all_true

Bug: v8:11347,v8:11348,chromium:1174498
Change-Id: I9afaacefcab55a6d7eb48f6e9d1848b714f64eb6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2666147Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72603}
parent 483e30d0
......@@ -4001,6 +4001,7 @@ enum UnaryOp {
VRSQRTE,
VPADDL_S,
VPADDL_U,
VCEQ0,
VCLT0,
VCNT
};
......@@ -4077,6 +4078,10 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
case VPADDL_U:
op_encoding = 0x5 * B7;
break;
case VCEQ0:
// Only support integers.
op_encoding = 0x1 * B16 | 0x2 * B7;
break;
case VCLT0:
// Only support signed integers.
op_encoding = 0x1 * B16 | 0x4 * B7;
......@@ -4810,6 +4815,15 @@ void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2));
}
void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
int value) {
DCHECK(IsEnabled(NEON));
DCHECK_EQ(0, value);
// Qd = vceq(Qn, Qm, #0) Vector Compare Equal to Zero.
// Instruction details available in ARM DDI 0406C.d, A8-847.
emit(EncodeNeonUnaryOp(VCEQ0, NEON_Q, size, dst.code(), src1.code()));
}
void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
......
......@@ -951,6 +951,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vceq(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
void vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src, int value);
void vcge(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2);
......
......@@ -2441,6 +2441,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kArmI64x2Ne: {
Simd128Register dst = i.OutputSimd128Register();
UseScratchRegisterScope temps(tasm());
Simd128Register tmp = temps.AcquireQ();
__ vceq(Neon32, dst, i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ vrev64(Neon32, tmp, dst);
__ vand(dst, dst, tmp);
__ vmvn(dst, dst);
break;
}
case kArmI32x4Eq: {
__ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1));
......@@ -3250,6 +3261,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break;
}
case kArmV64x2AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
QwNeonRegister tmp = temps.AcquireQ();
Register dst = i.OutputRegister();
// src = | a | b | c | d |
// tmp = | max(a,b) | max(c,d) | ...
__ vpmax(NeonU32, tmp.low(), src.low(), src.high());
// tmp = | max(a,b) == 0 | max(c,d) == 0 | ...
__ vceq(Neon32, tmp, tmp, 0);
// tmp = | max(a,b) == 0 or max(c,d) == 0 | ...
__ vpmax(NeonU32, tmp.low(), tmp.low(), tmp.low());
// dst = (max(a,b) == 0 || max(c,d) == 0)
// dst will either be -1 or 0.
__ vmov(NeonS32, dst, tmp.low(), 0);
// dst = !dst (-1 -> 0, 0 -> 1)
__ add(dst, dst, Operand(1));
// This works because:
// !dst
// = !(max(a,b) == 0 || max(c,d) == 0)
// = max(a,b) != 0 && max(c,d) != 0
// = (a != 0 || b != 0) && (c != 0 || d != 0)
// = defintion of i64x2.all_true.
break;
}
case kArmV32x4AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
......
......@@ -192,6 +192,7 @@ namespace compiler {
V(ArmI64x2ShrU) \
V(ArmI64x2BitMask) \
V(ArmI64x2Eq) \
V(ArmI64x2Ne) \
V(ArmI64x2SConvertI32x4Low) \
V(ArmI64x2SConvertI32x4High) \
V(ArmI64x2UConvertI32x4Low) \
......@@ -333,6 +334,7 @@ namespace compiler {
V(ArmS8x8Reverse) \
V(ArmS8x4Reverse) \
V(ArmS8x2Reverse) \
V(ArmV64x2AllTrue) \
V(ArmV32x4AllTrue) \
V(ArmV16x8AllTrue) \
V(ArmV128AnyTrue) \
......
......@@ -172,6 +172,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI64x2ShrU:
case kArmI64x2BitMask:
case kArmI64x2Eq:
case kArmI64x2Ne:
case kArmI64x2SConvertI32x4Low:
case kArmI64x2SConvertI32x4High:
case kArmI64x2UConvertI32x4Low:
......@@ -313,6 +314,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmS8x8Reverse:
case kArmS8x4Reverse:
case kArmS8x2Reverse:
case kArmV64x2AllTrue:
case kArmV32x4AllTrue:
case kArmV16x8AllTrue:
case kArmV128AnyTrue:
......
......@@ -2596,6 +2596,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I8x16Abs, kArmI8x16Abs) \
V(I8x16Popcnt, kArmVcnt) \
V(S128Not, kArmS128Not) \
V(V64x2AllTrue, kArmV64x2AllTrue) \
V(V32x4AllTrue, kArmV32x4AllTrue) \
V(V16x8AllTrue, kArmV16x8AllTrue) \
V(V128AnyTrue, kArmV128AnyTrue) \
......@@ -2646,6 +2647,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I32x4MaxS, kArmI32x4MaxS) \
V(I32x4Eq, kArmI32x4Eq) \
V(I64x2Eq, kArmI64x2Eq) \
V(I64x2Ne, kArmI64x2Ne) \
V(I32x4Ne, kArmI32x4Ne) \
V(I32x4GtS, kArmI32x4GtS) \
V(I32x4GeS, kArmI32x4GeS) \
......
......@@ -2795,11 +2795,6 @@ void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitV64x2AllTrue(Node* node) { UNIMPLEMENTED(); }
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -2275,6 +2275,10 @@ void Decoder::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
Format(instr, q ? "vcnt.8 'Qd, 'Qm" : "vcnt.8 'Dd, 'Dm");
} else if (opc1 == 0 && opc2 == 0b1011) {
Format(instr, "vmvn 'Qd, 'Qm");
} else if (opc1 == 0b01 && opc2 == 0b0010) {
DCHECK_NE(0b11, size);
Format(instr,
q ? "vceq.s'size2 'Qd, 'Qm, #0" : "vceq.s.'size2 'Dd, 'Dm, #0");
} else if (opc1 == 0b01 && opc2 == 0b0100) {
DCHECK_NE(0b11, size);
Format(instr,
......
......@@ -4504,6 +4504,25 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
get_neon_register(vm, q_data);
for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
set_neon_register(vd, q_data);
} else if (opc1 == 0b01 && opc2 == 0b0010) {
// vceq.<dt> Qd, Qm, #0 (signed integers).
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
switch (size) {
case Neon8:
Unop<int8_t>(this, Vd, Vm, [](int8_t x) { return x == 0 ? -1 : 0; });
break;
case Neon16:
Unop<int16_t>(this, Vd, Vm,
[](int16_t x) { return x == 0 ? -1 : 0; });
break;
case Neon32:
Unop<int32_t>(this, Vd, Vm,
[](int32_t x) { return x == 0 ? -1 : 0; });
break;
case Neon64:
UNREACHABLE();
}
} else if (opc1 == 0b01 && opc2 == 0b0100) {
// vclt.<dt> Qd, Qm, #0 (signed integers).
int Vd = instr->VFPDRegValue(kSimd128Precision);
......@@ -4521,7 +4540,6 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
case Neon64:
UNREACHABLE();
}
} else if (opc1 == 0b01 && (opc2 & 0b0111) == 0b110) {
// vabs<type>.<size> Qd, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision);
......
......@@ -1038,11 +1038,9 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Eq) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Eq, Equal);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
......@@ -3513,9 +3511,7 @@ WASM_SIMD_TEST(S8x16MultiShuffleFuzz) {
CHECK_EQ(1, r.Call()); \
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_BOOL_REDUCTION_TEST(64x2, 2, WASM_I64V)
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_BOOL_REDUCTION_TEST(32x4, 4, WASM_I32V)
WASM_SIMD_BOOL_REDUCTION_TEST(16x8, 8, WASM_I32V)
WASM_SIMD_BOOL_REDUCTION_TEST(8x16, 16, WASM_I32V)
......@@ -4413,9 +4409,7 @@ WASM_SIMD_TEST(V128AnytrueWithNegativeZero) {
CHECK_EQ(1, r.Call(0x1)); \
CHECK_EQ(0, r.Call(0)); \
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_ALLTRUE_TEST(64x2, 2, 0xffffffffffffffff, int64_t)
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_ALLTRUE_TEST(32x4, 4, 0xffffffff, int32_t)
WASM_SIMD_ALLTRUE_TEST(16x8, 8, 0xffff, int32_t)
WASM_SIMD_ALLTRUE_TEST(8x16, 16, 0xff, int32_t)
......
......@@ -57,11 +57,6 @@
'proposals/memory64/memory_trap64': [FAIL],
}], # ALWAYS
['arch == arm', {
# TODO(zhin): Fails on arm, hitting UNIMPLEMENTED in instruction selector.
'proposals/simd/simd_i64x2_cmp': [FAIL],
}], # arch == arm
['arch == arm and not simulator_run', {
# See https://crbug.com/v8/10938 denormals not handled correctly on ARM.
'proposals/simd/simd_f32x4': [PASS, FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment