Commit 257c303f authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm] Implement i64x2.ne and i64x2 all_true

Bug: v8:11347,v8:11348,chromium:1174498
Change-Id: I9afaacefcab55a6d7eb48f6e9d1848b714f64eb6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2666147Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72603}
parent 483e30d0
...@@ -4001,6 +4001,7 @@ enum UnaryOp { ...@@ -4001,6 +4001,7 @@ enum UnaryOp {
VRSQRTE, VRSQRTE,
VPADDL_S, VPADDL_S,
VPADDL_U, VPADDL_U,
VCEQ0,
VCLT0, VCLT0,
VCNT VCNT
}; };
...@@ -4077,6 +4078,10 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size, ...@@ -4077,6 +4078,10 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
case VPADDL_U: case VPADDL_U:
op_encoding = 0x5 * B7; op_encoding = 0x5 * B7;
break; break;
case VCEQ0:
// Only support integers.
op_encoding = 0x1 * B16 | 0x2 * B7;
break;
case VCLT0: case VCLT0:
// Only support signed integers. // Only support signed integers.
op_encoding = 0x1 * B16 | 0x4 * B7; op_encoding = 0x1 * B16 | 0x4 * B7;
...@@ -4810,6 +4815,15 @@ void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, ...@@ -4810,6 +4815,15 @@ void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2)); emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2));
} }
void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
int value) {
DCHECK(IsEnabled(NEON));
DCHECK_EQ(0, value);
// Qd = vceq(Qn, Qm, #0) Vector Compare Equal to Zero.
// Instruction details available in ARM DDI 0406C.d, A8-847.
emit(EncodeNeonUnaryOp(VCEQ0, NEON_Q, size, dst.code(), src1.code()));
}
void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1, void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) { QwNeonRegister src2) {
DCHECK(IsEnabled(NEON)); DCHECK(IsEnabled(NEON));
......
...@@ -951,6 +951,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -951,6 +951,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vceq(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void vceq(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, void vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2); QwNeonRegister src2);
void vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src, int value);
void vcge(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void vcge(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, void vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2); QwNeonRegister src2);
......
...@@ -2441,6 +2441,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2441,6 +2441,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1)); i.InputSimd128Register(1));
break; break;
} }
case kArmI64x2Ne: {
Simd128Register dst = i.OutputSimd128Register();
UseScratchRegisterScope temps(tasm());
Simd128Register tmp = temps.AcquireQ();
__ vceq(Neon32, dst, i.InputSimd128Register(0),
i.InputSimd128Register(1));
__ vrev64(Neon32, tmp, dst);
__ vand(dst, dst, tmp);
__ vmvn(dst, dst);
break;
}
case kArmI32x4Eq: { case kArmI32x4Eq: {
__ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1)); i.InputSimd128Register(1));
...@@ -3250,6 +3261,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3250,6 +3261,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(i.OutputRegister(), Operand(1), LeaveCC, ne); __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
break; break;
} }
case kArmV64x2AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm());
QwNeonRegister tmp = temps.AcquireQ();
Register dst = i.OutputRegister();
// src = | a | b | c | d |
// tmp = | max(a,b) | max(c,d) | ...
__ vpmax(NeonU32, tmp.low(), src.low(), src.high());
// tmp = | max(a,b) == 0 | max(c,d) == 0 | ...
__ vceq(Neon32, tmp, tmp, 0);
// tmp = | max(a,b) == 0 or max(c,d) == 0 | ...
__ vpmax(NeonU32, tmp.low(), tmp.low(), tmp.low());
// dst = (max(a,b) == 0 || max(c,d) == 0)
// dst will either be -1 or 0.
__ vmov(NeonS32, dst, tmp.low(), 0);
// dst = !dst (-1 -> 0, 0 -> 1)
__ add(dst, dst, Operand(1));
// This works because:
// !dst
// = !(max(a,b) == 0 || max(c,d) == 0)
// = max(a,b) != 0 && max(c,d) != 0
// = (a != 0 || b != 0) && (c != 0 || d != 0)
// = defintion of i64x2.all_true.
break;
}
case kArmV32x4AllTrue: { case kArmV32x4AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0); const QwNeonRegister& src = i.InputSimd128Register(0);
UseScratchRegisterScope temps(tasm()); UseScratchRegisterScope temps(tasm());
......
...@@ -192,6 +192,7 @@ namespace compiler { ...@@ -192,6 +192,7 @@ namespace compiler {
V(ArmI64x2ShrU) \ V(ArmI64x2ShrU) \
V(ArmI64x2BitMask) \ V(ArmI64x2BitMask) \
V(ArmI64x2Eq) \ V(ArmI64x2Eq) \
V(ArmI64x2Ne) \
V(ArmI64x2SConvertI32x4Low) \ V(ArmI64x2SConvertI32x4Low) \
V(ArmI64x2SConvertI32x4High) \ V(ArmI64x2SConvertI32x4High) \
V(ArmI64x2UConvertI32x4Low) \ V(ArmI64x2UConvertI32x4Low) \
...@@ -333,6 +334,7 @@ namespace compiler { ...@@ -333,6 +334,7 @@ namespace compiler {
V(ArmS8x8Reverse) \ V(ArmS8x8Reverse) \
V(ArmS8x4Reverse) \ V(ArmS8x4Reverse) \
V(ArmS8x2Reverse) \ V(ArmS8x2Reverse) \
V(ArmV64x2AllTrue) \
V(ArmV32x4AllTrue) \ V(ArmV32x4AllTrue) \
V(ArmV16x8AllTrue) \ V(ArmV16x8AllTrue) \
V(ArmV128AnyTrue) \ V(ArmV128AnyTrue) \
......
...@@ -172,6 +172,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -172,6 +172,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmI64x2ShrU: case kArmI64x2ShrU:
case kArmI64x2BitMask: case kArmI64x2BitMask:
case kArmI64x2Eq: case kArmI64x2Eq:
case kArmI64x2Ne:
case kArmI64x2SConvertI32x4Low: case kArmI64x2SConvertI32x4Low:
case kArmI64x2SConvertI32x4High: case kArmI64x2SConvertI32x4High:
case kArmI64x2UConvertI32x4Low: case kArmI64x2UConvertI32x4Low:
...@@ -313,6 +314,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -313,6 +314,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmS8x8Reverse: case kArmS8x8Reverse:
case kArmS8x4Reverse: case kArmS8x4Reverse:
case kArmS8x2Reverse: case kArmS8x2Reverse:
case kArmV64x2AllTrue:
case kArmV32x4AllTrue: case kArmV32x4AllTrue:
case kArmV16x8AllTrue: case kArmV16x8AllTrue:
case kArmV128AnyTrue: case kArmV128AnyTrue:
......
...@@ -2596,6 +2596,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2596,6 +2596,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I8x16Abs, kArmI8x16Abs) \ V(I8x16Abs, kArmI8x16Abs) \
V(I8x16Popcnt, kArmVcnt) \ V(I8x16Popcnt, kArmVcnt) \
V(S128Not, kArmS128Not) \ V(S128Not, kArmS128Not) \
V(V64x2AllTrue, kArmV64x2AllTrue) \
V(V32x4AllTrue, kArmV32x4AllTrue) \ V(V32x4AllTrue, kArmV32x4AllTrue) \
V(V16x8AllTrue, kArmV16x8AllTrue) \ V(V16x8AllTrue, kArmV16x8AllTrue) \
V(V128AnyTrue, kArmV128AnyTrue) \ V(V128AnyTrue, kArmV128AnyTrue) \
...@@ -2646,6 +2647,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) { ...@@ -2646,6 +2647,7 @@ void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
V(I32x4MaxS, kArmI32x4MaxS) \ V(I32x4MaxS, kArmI32x4MaxS) \
V(I32x4Eq, kArmI32x4Eq) \ V(I32x4Eq, kArmI32x4Eq) \
V(I64x2Eq, kArmI64x2Eq) \ V(I64x2Eq, kArmI64x2Eq) \
V(I64x2Ne, kArmI64x2Ne) \
V(I32x4Ne, kArmI32x4Ne) \ V(I32x4Ne, kArmI32x4Ne) \
V(I32x4GtS, kArmI32x4GtS) \ V(I32x4GtS, kArmI32x4GtS) \
V(I32x4GeS, kArmI32x4GeS) \ V(I32x4GeS, kArmI32x4GeS) \
......
...@@ -2795,11 +2795,6 @@ void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); } ...@@ -2795,11 +2795,6 @@ void InstructionSelector::VisitI32x4WidenI8x16S(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI32x4WidenI8x16U(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI64x2Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitV64x2AllTrue(Node* node) { UNIMPLEMENTED(); }
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) { void InstructionSelector::VisitParameter(Node* node) {
......
...@@ -2275,6 +2275,10 @@ void Decoder::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) { ...@@ -2275,6 +2275,10 @@ void Decoder::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
Format(instr, q ? "vcnt.8 'Qd, 'Qm" : "vcnt.8 'Dd, 'Dm"); Format(instr, q ? "vcnt.8 'Qd, 'Qm" : "vcnt.8 'Dd, 'Dm");
} else if (opc1 == 0 && opc2 == 0b1011) { } else if (opc1 == 0 && opc2 == 0b1011) {
Format(instr, "vmvn 'Qd, 'Qm"); Format(instr, "vmvn 'Qd, 'Qm");
} else if (opc1 == 0b01 && opc2 == 0b0010) {
DCHECK_NE(0b11, size);
Format(instr,
q ? "vceq.s'size2 'Qd, 'Qm, #0" : "vceq.s.'size2 'Dd, 'Dm, #0");
} else if (opc1 == 0b01 && opc2 == 0b0100) { } else if (opc1 == 0b01 && opc2 == 0b0100) {
DCHECK_NE(0b11, size); DCHECK_NE(0b11, size);
Format(instr, Format(instr,
......
...@@ -4504,6 +4504,25 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) { ...@@ -4504,6 +4504,25 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
get_neon_register(vm, q_data); get_neon_register(vm, q_data);
for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
set_neon_register(vd, q_data); set_neon_register(vd, q_data);
} else if (opc1 == 0b01 && opc2 == 0b0010) {
// vceq.<dt> Qd, Qm, #0 (signed integers).
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
switch (size) {
case Neon8:
Unop<int8_t>(this, Vd, Vm, [](int8_t x) { return x == 0 ? -1 : 0; });
break;
case Neon16:
Unop<int16_t>(this, Vd, Vm,
[](int16_t x) { return x == 0 ? -1 : 0; });
break;
case Neon32:
Unop<int32_t>(this, Vd, Vm,
[](int32_t x) { return x == 0 ? -1 : 0; });
break;
case Neon64:
UNREACHABLE();
}
} else if (opc1 == 0b01 && opc2 == 0b0100) { } else if (opc1 == 0b01 && opc2 == 0b0100) {
// vclt.<dt> Qd, Qm, #0 (signed integers). // vclt.<dt> Qd, Qm, #0 (signed integers).
int Vd = instr->VFPDRegValue(kSimd128Precision); int Vd = instr->VFPDRegValue(kSimd128Precision);
...@@ -4521,7 +4540,6 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) { ...@@ -4521,7 +4540,6 @@ void Simulator::DecodeAdvancedSIMDTwoOrThreeRegisters(Instruction* instr) {
case Neon64: case Neon64:
UNREACHABLE(); UNREACHABLE();
} }
} else if (opc1 == 0b01 && (opc2 & 0b0111) == 0b110) { } else if (opc1 == 0b01 && (opc2 & 0b0111) == 0b110) {
// vabs<type>.<size> Qd, Qm // vabs<type>.<size> Qd, Qm
int Vd = instr->VFPDRegValue(kSimd128Precision); int Vd = instr->VFPDRegValue(kSimd128Precision);
......
...@@ -1038,11 +1038,9 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Eq) { ...@@ -1038,11 +1038,9 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2Eq) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Eq, Equal); RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Eq, Equal);
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) { WASM_SIMD_TEST_NO_LOWERING(I64x2Ne) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual); RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2Ne, NotEqual);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F64x2Splat) { WASM_SIMD_TEST(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd); WasmRunner<int32_t, double> r(execution_tier, lower_simd);
...@@ -3513,9 +3511,7 @@ WASM_SIMD_TEST(S8x16MultiShuffleFuzz) { ...@@ -3513,9 +3511,7 @@ WASM_SIMD_TEST(S8x16MultiShuffleFuzz) {
CHECK_EQ(1, r.Call()); \ CHECK_EQ(1, r.Call()); \
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_BOOL_REDUCTION_TEST(64x2, 2, WASM_I64V) WASM_SIMD_BOOL_REDUCTION_TEST(64x2, 2, WASM_I64V)
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_BOOL_REDUCTION_TEST(32x4, 4, WASM_I32V) WASM_SIMD_BOOL_REDUCTION_TEST(32x4, 4, WASM_I32V)
WASM_SIMD_BOOL_REDUCTION_TEST(16x8, 8, WASM_I32V) WASM_SIMD_BOOL_REDUCTION_TEST(16x8, 8, WASM_I32V)
WASM_SIMD_BOOL_REDUCTION_TEST(8x16, 16, WASM_I32V) WASM_SIMD_BOOL_REDUCTION_TEST(8x16, 16, WASM_I32V)
...@@ -4413,9 +4409,7 @@ WASM_SIMD_TEST(V128AnytrueWithNegativeZero) { ...@@ -4413,9 +4409,7 @@ WASM_SIMD_TEST(V128AnytrueWithNegativeZero) {
CHECK_EQ(1, r.Call(0x1)); \ CHECK_EQ(1, r.Call(0x1)); \
CHECK_EQ(0, r.Call(0)); \ CHECK_EQ(0, r.Call(0)); \
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_ALLTRUE_TEST(64x2, 2, 0xffffffffffffffff, int64_t) WASM_SIMD_ALLTRUE_TEST(64x2, 2, 0xffffffffffffffff, int64_t)
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_ALLTRUE_TEST(32x4, 4, 0xffffffff, int32_t) WASM_SIMD_ALLTRUE_TEST(32x4, 4, 0xffffffff, int32_t)
WASM_SIMD_ALLTRUE_TEST(16x8, 8, 0xffff, int32_t) WASM_SIMD_ALLTRUE_TEST(16x8, 8, 0xffff, int32_t)
WASM_SIMD_ALLTRUE_TEST(8x16, 16, 0xff, int32_t) WASM_SIMD_ALLTRUE_TEST(8x16, 16, 0xff, int32_t)
......
...@@ -57,11 +57,6 @@ ...@@ -57,11 +57,6 @@
'proposals/memory64/memory_trap64': [FAIL], 'proposals/memory64/memory_trap64': [FAIL],
}], # ALWAYS }], # ALWAYS
['arch == arm', {
# TODO(zhin): Fails on arm, hitting UNIMPLEMENTED in instruction selector.
'proposals/simd/simd_i64x2_cmp': [FAIL],
}], # arch == arm
['arch == arm and not simulator_run', { ['arch == arm and not simulator_run', {
# See https://crbug.com/v8/10938 denormals not handled correctly on ARM. # See https://crbug.com/v8/10938 denormals not handled correctly on ARM.
'proposals/simd/simd_f32x4': [PASS, FAIL], 'proposals/simd/simd_f32x4': [PASS, FAIL],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment