Commit f672cefd authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][liftoff][arm][arm64] Implement bitmask

Implement i8x16 i16x8 i32x4 bitmask for arm and arm64.

The instruction sequence is the same as TurboFan, we have some special
handling for getting the second temporary Q register, reuse src if
possible.

Bug: v8:9909,v8:10308
Change-Id: I1c6fe0d076f0e14d05c4cc532e4d976f4ebcce30
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2222608
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68107}
parent aa5bcc09
......@@ -2575,7 +2575,26 @@ void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4_bitmask");
UseScratchRegisterScope temps(this);
Simd128Register tmp = liftoff::GetSimd128Register(src);
Simd128Register mask = temps.AcquireQ();
if (cache_state()->is_used(src)) {
// We only have 1 scratch Q register, so try and reuse src.
LiftoffRegList pinned = LiftoffRegList::ForRegs(src);
LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
mask = liftoff::GetSimd128Register(unused_pair);
}
vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001));
vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004));
vand(tmp, mask, tmp);
vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
VmovLow(dst.gp(), tmp.low());
}
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2689,7 +2708,27 @@ void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8_bitmask");
UseScratchRegisterScope temps(this);
Simd128Register tmp = liftoff::GetSimd128Register(src);
Simd128Register mask = temps.AcquireQ();
if (cache_state()->is_used(src)) {
// We only have 1 scratch Q register, so try and reuse src.
LiftoffRegList pinned = LiftoffRegList::ForRegs(src);
LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
mask = liftoff::GetSimd128Register(unused_pair);
}
vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001));
vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010));
vand(tmp, mask, tmp);
vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
vmov(NeonU16, dst.gp(), tmp.low(), 0);
}
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2876,7 +2915,29 @@ void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i8x16_bitmask");
UseScratchRegisterScope temps(this);
Simd128Register tmp = liftoff::GetSimd128Register(src);
Simd128Register mask = temps.AcquireQ();
if (cache_state()->is_used(src)) {
// We only have 1 scratch Q register, so try and reuse src.
LiftoffRegList pinned = LiftoffRegList::ForRegs(src);
LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
mask = liftoff::GetSimd128Register(unused_pair);
}
vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201));
vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201));
vand(tmp, mask, tmp);
vext(mask, tmp, tmp, 8);
vzip(Neon8, mask, tmp);
vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
vmov(NeonU16, dst.gp(), tmp.low(), 0);
}
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
......
......@@ -1515,7 +1515,17 @@ void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i32x4_bitmask");
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireQ();
VRegister mask = temps.AcquireQ();
Sshr(tmp.V4S(), src.fp().V4S(), 31);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
And(tmp.V16B(), mask.V16B(), tmp.V16B());
Addv(tmp.S(), tmp.V4S());
Mov(dst.gp().W(), tmp.V4S(), 0);
}
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1641,7 +1651,17 @@ void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i16x8_bitmask");
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireQ();
VRegister mask = temps.AcquireQ();
Sshr(tmp.V8H(), src.fp().V8H(), 15);
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
And(tmp.V16B(), mask.V16B(), tmp.V16B());
Addv(tmp.H(), tmp.V8H());
Mov(dst.gp().W(), tmp.V8H(), 0);
}
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -1791,7 +1811,19 @@ void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst,
void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i8x16_bitmask");
UseScratchRegisterScope temps(this);
VRegister tmp = temps.AcquireQ();
VRegister mask = temps.AcquireQ();
// Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0.
Sshr(tmp.V16B(), src.fp().V16B(), 7);
Movi(mask.V2D(), 0x8040'2010'0804'0201);
And(tmp.V16B(), mask.V16B(), tmp.V16B());
Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
Addv(tmp.H(), tmp.V8H());
Mov(dst.gp().W(), tmp.V8H(), 0);
}
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment