Commit 6bbcdc3d authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][liftoff] Implement bitselect

Tweak the register allocation logic in liftoff-compiler.cc. If we reuse
src1 or src2 for dst, it complicates the logic in the codegen
significantly. We will need to check which operand dst is equals to,
back it up, then make sure the mask ends up in dst (since thats how vbsl
and bsl works, the first operand holds the mask and is overwritten). By
tweaking the allocation logic, no code gen is required for the other
backends.

Bug: v8:9909
Change-Id: I17843322508b18247c91e5959cf1b996b7f6a61d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2171468
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67481}
parent 1cf47195
......@@ -2592,8 +2592,12 @@ void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
bailout(kSimd, "s128select");
LiftoffRegister mask) {
if (dst != mask) {
vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(mask));
}
vbsl(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1),
liftoff::GetSimd128Register(src2));
}
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
......
......@@ -1686,8 +1686,11 @@ void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
bailout(kSimd, "s128select");
LiftoffRegister mask) {
if (dst != mask) {
Mov(dst.fp().V16B(), mask.fp().V16B());
}
Bsl(dst.fp().V16B(), src1.fp().V16B(), src2.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
......
......@@ -2279,16 +2279,16 @@ void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
LiftoffRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(liftoff::kScratchDoubleReg, src1.fp(), src2.fp());
vandps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, src3.fp());
vandps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, mask.fp());
vxorps(dst.fp(), liftoff::kScratchDoubleReg, src2.fp());
} else {
movaps(liftoff::kScratchDoubleReg, src1.fp());
xorps(liftoff::kScratchDoubleReg, src2.fp());
andps(liftoff::kScratchDoubleReg, src3.fp());
andps(liftoff::kScratchDoubleReg, mask.fp());
if (dst.fp() != src2.fp()) movaps(dst.fp(), src2.fp());
xorps(dst.fp(), liftoff::kScratchDoubleReg);
}
......
......@@ -797,7 +797,7 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_s128_select(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister src2, LiftoffRegister src3);
LiftoffRegister src2, LiftoffRegister mask);
inline void emit_i8x16_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
......
......@@ -2380,9 +2380,12 @@ class LiftoffCompiler {
LiftoffRegister src2 = __ PopToRegister(LiftoffRegList::ForRegs(src3));
LiftoffRegister src1 =
__ PopToRegister(LiftoffRegList::ForRegs(src3, src2));
// Reusing src1 and src2 will complicate codegen for select for some
// backend, so we allow only reusing src3 (the mask), and pin src1 and src2.
LiftoffRegister dst =
src_rc == result_rc
? __ GetUnusedRegister(result_rc, {src1, src2, src3})
? __ GetUnusedRegister(result_rc, {src3},
LiftoffRegList::ForRegs(src1, src2))
: __ GetUnusedRegister(result_rc);
CallEmitFn(fn, dst, src1, src2, src3);
__ PushRegister(ValueType(result_type), dst);
......
......@@ -1721,7 +1721,7 @@ void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
LiftoffRegister mask) {
bailout(kSimd, "emit_s128_select");
}
......
......@@ -1542,12 +1542,12 @@ void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
if (dst == src3) {
LiftoffRegister mask) {
if (dst == mask) {
bsel_v(dst.fp().toW(), src2.fp().toW(), src1.fp().toW());
} else {
xor_v(kSimd128ScratchReg, src1.fp().toW(), src2.fp().toW());
and_v(kSimd128ScratchReg, kSimd128ScratchReg, src3.fp().toW());
and_v(kSimd128ScratchReg, kSimd128ScratchReg, mask.fp().toW());
xor_v(dst.fp().toW(), kSimd128ScratchReg, src2.fp().toW());
}
}
......
......@@ -1067,7 +1067,7 @@ void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
LiftoffRegister mask) {
bailout(kUnsupportedArchitecture, "emit_s128select");
}
......
......@@ -1099,7 +1099,7 @@ void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
LiftoffRegister mask) {
bailout(kUnsupportedArchitecture, "emit_s128select");
}
......
......@@ -2242,16 +2242,16 @@ void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister src3) {
LiftoffRegister mask) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(kScratchDoubleReg, src1.fp(), src2.fp());
vandps(kScratchDoubleReg, kScratchDoubleReg, src3.fp());
vandps(kScratchDoubleReg, kScratchDoubleReg, mask.fp());
vxorps(dst.fp(), kScratchDoubleReg, src2.fp());
} else {
movaps(kScratchDoubleReg, src1.fp());
xorps(kScratchDoubleReg, src2.fp());
andps(kScratchDoubleReg, src3.fp());
andps(kScratchDoubleReg, mask.fp());
if (dst.fp() != src2.fp()) movaps(dst.fp(), src2.fp());
xorps(dst.fp(), kScratchDoubleReg);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment