Commit edf90ee8 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][liftoff][ia32][x64] Implement i8x16 shr

The code sequence is the same as TurboFan, only wrapped in a template to
share the implementation.

Bug: v8:9909
Change-Id: I9c1b37bbfafe91d1bd8edd7f9dafd86ff1c07623
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2202723
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67842}
parent 1d267700
...@@ -2020,6 +2020,40 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst, ...@@ -2020,6 +2020,40 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
(assm->*sse_op)(dst.fp(), shift); (assm->*sse_op)(dst.fp(), shift);
} }
} }
enum class ShiftSignedness { kSigned, kUnsigned };
template <bool is_signed>
void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister rhs) {
// Same algorithm is used for both signed and unsigned shifts, the only
// difference is the actual shift and pack in the end. This is the same
// algorithm as used in code-generator-ia32.cc
Register tmp =
assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(rhs)).gp();
XMMRegister tmp_simd =
assm->GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs)).fp();
// Unpack the bytes into words, do logical shifts, and repack.
assm->Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp());
assm->Punpcklbw(dst.fp(), lhs.fp());
assm->mov(tmp, rhs.gp());
// Take shift value modulo 8.
assm->and_(tmp, 7);
assm->add(tmp, Immediate(8));
assm->Movd(tmp_simd, tmp);
if (is_signed) {
assm->Psraw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
tmp_simd);
assm->Psraw(dst.fp(), dst.fp(), tmp_simd);
assm->Packsswb(dst.fp(), liftoff::kScratchDoubleReg);
} else {
assm->Psrlw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
tmp_simd);
assm->Psrlw(dst.fp(), dst.fp(), tmp_simd);
assm->Packuswb(dst.fp(), liftoff::kScratchDoubleReg);
}
}
} // namespace liftoff } // namespace liftoff
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
...@@ -2405,23 +2439,38 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -2405,23 +2439,38 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shr_s"); liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) { LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "i8x16_shri_s"); Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp());
Punpcklbw(dst.fp(), lhs.fp());
uint8_t shift = (rhs & 7) + 8;
Psraw(liftoff::kScratchDoubleReg, shift);
Psraw(dst.fp(), shift);
Packsswb(dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shr_u"); liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) { LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "i8x16_shri_u"); Register tmp = GetUnusedRegister(kGpReg).gp();
// Perform 16-bit shift, then mask away high bits.
uint8_t shift = rhs & 7;
Psrlw(dst.fp(), lhs.fp(), byte{shift});
uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
mov(tmp, mask);
Movd(liftoff::kScratchDoubleReg, tmp);
Pshufd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 0);
Pand(dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......
...@@ -2028,6 +2028,29 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst, ...@@ -2028,6 +2028,29 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
} }
} }
template <bool is_signed>
void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister rhs) {
// Same algorithm as the one in code-generator-x64.cc.
assm->Punpckhbw(kScratchDoubleReg, lhs.fp());
assm->Punpcklbw(dst.fp(), lhs.fp());
// Prepare shift value
assm->movq(kScratchRegister, rhs.gp());
// Take shift value modulo 8.
assm->andq(kScratchRegister, Immediate(7));
assm->addq(kScratchRegister, Immediate(8));
assm->Movq(liftoff::kScratchDoubleReg2, kScratchRegister);
if (is_signed) {
assm->Psraw(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
assm->Psraw(dst.fp(), liftoff::kScratchDoubleReg2);
assm->Packsswb(dst.fp(), kScratchDoubleReg);
} else {
assm->Psrlw(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
assm->Psrlw(dst.fp(), liftoff::kScratchDoubleReg2);
assm->Packuswb(dst.fp(), kScratchDoubleReg);
}
}
// Can be used by both the immediate and register version of the shifts. psraq // Can be used by both the immediate and register version of the shifts. psraq
// is only available in AVX512, so we can't use it yet. // is only available in AVX512, so we can't use it yet.
template <typename ShiftOperand> template <typename ShiftOperand>
...@@ -2434,23 +2457,43 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -2434,23 +2457,43 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shr_s"); liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) { LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "i8x16_shri_s"); Punpckhbw(kScratchDoubleReg, lhs.fp());
Punpcklbw(dst.fp(), lhs.fp());
uint8_t shift = (rhs & 7) + 8;
Psraw(kScratchDoubleReg, shift);
Psraw(dst.fp(), shift);
Packsswb(dst.fp(), kScratchDoubleReg);
} }
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shr_u"); liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs);
} }
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) { LiftoffRegister lhs, int32_t rhs) {
bailout(kSimd, "i8x16_shri_u"); // Perform 16-bit shift, then mask away high bits.
uint8_t shift = rhs & 7; // i.InputInt3(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrlw(dst.fp(), lhs.fp(), byte{shift});
} else if (dst != lhs) {
Movaps(dst.fp(), lhs.fp());
psrlw(dst.fp(), byte{shift});
}
uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
movl(kScratchRegister, Immediate(mask));
Movd(kScratchDoubleReg, kScratchRegister);
Pshufd(kScratchDoubleReg, kScratchDoubleReg, byte{0});
Pand(dst.fp(), kScratchDoubleReg);
} }
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment