Commit f5336b3d authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Reland "[wasm-simd][liftoff][ia32][x64] Implement i8x16 shr"

This is a reland of edf90ee8

The fix here is to call GetUnusedRegister with {}, in
liftoff-assembler-ia32.h emit_i8x16_shri_u.

Original change's description:
> [wasm-simd][liftoff][ia32][x64] Implement i8x16 shr
>
> The code sequence is the same as TurboFan, only wrapped in a template to
> share the implementation.
>
> Bug: v8:9909
> Change-Id: I9c1b37bbfafe91d1bd8edd7f9dafd86ff1c07623
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2202723
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#67842}

Bug: v8:9909
Change-Id: Id56a612cd6580c68a5129e71d7a0e7b29d64b368
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2204080Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67873}
parent 59e2e119
......@@ -2103,6 +2103,40 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
(assm->*sse_op)(dst.fp(), shift);
}
}
enum class ShiftSignedness { kSigned, kUnsigned };
template <bool is_signed>
void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister rhs) {
// Same algorithm is used for both signed and unsigned shifts, the only
// difference is the actual shift and pack in the end. This is the same
// algorithm as used in code-generator-ia32.cc
Register tmp =
assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(rhs)).gp();
XMMRegister tmp_simd =
assm->GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs)).fp();
// Unpack the bytes into words, do logical shifts, and repack.
assm->Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp());
assm->Punpcklbw(dst.fp(), lhs.fp());
assm->mov(tmp, rhs.gp());
// Take shift value modulo 8.
assm->and_(tmp, 7);
assm->add(tmp, Immediate(8));
assm->Movd(tmp_simd, tmp);
if (is_signed) {
assm->Psraw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
tmp_simd);
assm->Psraw(dst.fp(), dst.fp(), tmp_simd);
assm->Packsswb(dst.fp(), liftoff::kScratchDoubleReg);
} else {
assm->Psrlw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
tmp_simd);
assm->Psrlw(dst.fp(), dst.fp(), tmp_simd);
assm->Packuswb(dst.fp(), liftoff::kScratchDoubleReg);
}
}
} // namespace liftoff
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
......@@ -2488,23 +2522,38 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shr_s");
liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "i8x16_shri_s");
Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp());
Punpcklbw(dst.fp(), lhs.fp());
uint8_t shift = (rhs & 7) + 8;
Psraw(liftoff::kScratchDoubleReg, shift);
Psraw(dst.fp(), shift);
Packsswb(dst.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shr_u");
liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "i8x16_shri_u");
Register tmp = GetUnusedRegister(kGpReg, {}).gp();
// Perform 16-bit shift, then mask away high bits.
uint8_t shift = rhs & 7;
Psrlw(dst.fp(), lhs.fp(), byte{shift});
uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
mov(tmp, mask);
Movd(liftoff::kScratchDoubleReg, tmp);
Pshufd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 0);
Pand(dst.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......
......@@ -2118,6 +2118,29 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
}
}
template <bool is_signed>
void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister rhs) {
// Same algorithm as the one in code-generator-x64.cc.
assm->Punpckhbw(kScratchDoubleReg, lhs.fp());
assm->Punpcklbw(dst.fp(), lhs.fp());
// Prepare shift value
assm->movq(kScratchRegister, rhs.gp());
// Take shift value modulo 8.
assm->andq(kScratchRegister, Immediate(7));
assm->addq(kScratchRegister, Immediate(8));
assm->Movq(liftoff::kScratchDoubleReg2, kScratchRegister);
if (is_signed) {
assm->Psraw(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
assm->Psraw(dst.fp(), liftoff::kScratchDoubleReg2);
assm->Packsswb(dst.fp(), kScratchDoubleReg);
} else {
assm->Psrlw(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
assm->Psrlw(dst.fp(), liftoff::kScratchDoubleReg2);
assm->Packuswb(dst.fp(), kScratchDoubleReg);
}
}
// Can be used by both the immediate and register version of the shifts. psraq
// is only available in AVX512, so we can't use it yet.
template <typename ShiftOperand>
......@@ -2524,23 +2547,43 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shr_s");
liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "i8x16_shri_s");
Punpckhbw(kScratchDoubleReg, lhs.fp());
Punpcklbw(dst.fp(), lhs.fp());
uint8_t shift = (rhs & 7) + 8;
Psraw(kScratchDoubleReg, shift);
Psraw(dst.fp(), shift);
Packsswb(dst.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shr_u");
liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "i8x16_shri_u");
// Perform 16-bit shift, then mask away high bits.
uint8_t shift = rhs & 7; // i.InputInt3(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrlw(dst.fp(), lhs.fp(), byte{shift});
} else if (dst != lhs) {
Movaps(dst.fp(), lhs.fp());
psrlw(dst.fp(), byte{shift});
}
uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
movl(kScratchRegister, Immediate(mask));
Movd(kScratchDoubleReg, kScratchRegister);
Pshufd(kScratchDoubleReg, kScratchDoubleReg, byte{0});
Pand(dst.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment