Commit ec6df835 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][liftoff][arm] Implement v64x2.alltrue and i64x2.ne

Extract v64x2.alltrue code sequence into macro-assembler for sharing
between TurboFan and Liftoff.

Bug: v8:11347,v8:11348
Change-Id: Ica436178b4f92ea0ed574010cd74f1babf66680f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2686013
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72683}
parent 4454b8fe
...@@ -2649,6 +2649,29 @@ void TurboAssembler::I64x2Eq(QwNeonRegister dst, QwNeonRegister src1, ...@@ -2649,6 +2649,29 @@ void TurboAssembler::I64x2Eq(QwNeonRegister dst, QwNeonRegister src1,
vand(dst, dst, scratch); vand(dst, dst, scratch);
} }
void TurboAssembler::V64x2AllTrue(Register dst, QwNeonRegister src) {
UseScratchRegisterScope temps(this);
QwNeonRegister tmp = temps.AcquireQ();
// src = | a | b | c | d |
// tmp = | max(a,b) | max(c,d) | ...
vpmax(NeonU32, tmp.low(), src.low(), src.high());
// tmp = | max(a,b) == 0 | max(c,d) == 0 | ...
vceq(Neon32, tmp, tmp, 0);
// tmp = | max(a,b) == 0 or max(c,d) == 0 | ...
vpmax(NeonU32, tmp.low(), tmp.low(), tmp.low());
// dst = (max(a,b) == 0 || max(c,d) == 0)
// dst will either be -1 or 0.
vmov(NeonS32, dst, tmp.low(), 0);
// dst = !dst (-1 -> 0, 0 -> 1)
add(dst, dst, Operand(1));
// This works because:
// !dst
// = !(max(a,b) == 0 || max(c,d) == 0)
// = max(a,b) != 0 && max(c,d) != 0
// = (a != 0 || b != 0) && (c != 0 || d != 0)
// = defintion of i64x2.all_true.
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
......
...@@ -570,6 +570,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -570,6 +570,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// and be used in both TurboFan and Liftoff. // and be used in both TurboFan and Liftoff.
void I64x2BitMask(Register dst, QwNeonRegister src); void I64x2BitMask(Register dst, QwNeonRegister src);
void I64x2Eq(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void I64x2Eq(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2);
void V64x2AllTrue(Register dst, QwNeonRegister src);
private: private:
// Compare single values and then load the fpscr flags to a register. // Compare single values and then load the fpscr flags to a register.
......
...@@ -3262,28 +3262,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3262,28 +3262,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kArmV64x2AllTrue: { case kArmV64x2AllTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0); __ V64x2AllTrue(i.OutputRegister(), i.InputSimd128Register(0));
UseScratchRegisterScope temps(tasm());
QwNeonRegister tmp = temps.AcquireQ();
Register dst = i.OutputRegister();
// src = | a | b | c | d |
// tmp = | max(a,b) | max(c,d) | ...
__ vpmax(NeonU32, tmp.low(), src.low(), src.high());
// tmp = | max(a,b) == 0 | max(c,d) == 0 | ...
__ vceq(Neon32, tmp, tmp, 0);
// tmp = | max(a,b) == 0 or max(c,d) == 0 | ...
__ vpmax(NeonU32, tmp.low(), tmp.low(), tmp.low());
// dst = (max(a,b) == 0 || max(c,d) == 0)
// dst will either be -1 or 0.
__ vmov(NeonS32, dst, tmp.low(), 0);
// dst = !dst (-1 -> 0, 0 -> 1)
__ add(dst, dst, Operand(1));
// This works because:
// !dst
// = !(max(a,b) == 0 || max(c,d) == 0)
// = max(a,b) != 0 && max(c,d) != 0
// = (a != 0 || b != 0) && (c != 0 || d != 0)
// = defintion of i64x2.all_true.
break; break;
} }
case kArmV32x4AllTrue: { case kArmV32x4AllTrue: {
......
...@@ -2826,7 +2826,7 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst, ...@@ -2826,7 +2826,7 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
void LiftoffAssembler::emit_v64x2_alltrue(LiftoffRegister dst, void LiftoffAssembler::emit_v64x2_alltrue(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
bailout(kSimd, "v64x2_alltrue"); V64x2AllTrue(dst.gp(), liftoff::GetSimd128Register(src));
} }
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment