Commit db1ab4fa authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

Revert "[wasm-simd][liftoff][ia32][x64] Implement i8x16 shr"

This reverts commit edf90ee8.

Reason for revert: https://ci.chromium.org/p/v8/builders/ci/V8%20Win32%20-%20debug%20builder/36129?

Original change's description:
> [wasm-simd][liftoff][ia32][x64] Implement i8x16 shr
> 
> The code sequence is the same as TurboFan, only wrapped in a template to
> share the implementation.
> 
> Bug: v8:9909
> Change-Id: I9c1b37bbfafe91d1bd8edd7f9dafd86ff1c07623
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2202723
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#67842}

TBR=clemensb@chromium.org,zhin@chromium.org

Change-Id: I04b9993040fa8a1dd69a4fa892a35273682d3efa
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:9909
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2204550Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67843}
parent edf90ee8
......@@ -2020,40 +2020,6 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
(assm->*sse_op)(dst.fp(), shift);
}
}
enum class ShiftSignedness { kSigned, kUnsigned };
template <bool is_signed>
void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister rhs) {
// Same algorithm is used for both signed and unsigned shifts, the only
// difference is the actual shift and pack in the end. This is the same
// algorithm as used in code-generator-ia32.cc
Register tmp =
assm->GetUnusedRegister(kGpReg, LiftoffRegList::ForRegs(rhs)).gp();
XMMRegister tmp_simd =
assm->GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs)).fp();
// Unpack the bytes into words, do logical shifts, and repack.
assm->Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp());
assm->Punpcklbw(dst.fp(), lhs.fp());
assm->mov(tmp, rhs.gp());
// Take shift value modulo 8.
assm->and_(tmp, 7);
assm->add(tmp, Immediate(8));
assm->Movd(tmp_simd, tmp);
if (is_signed) {
assm->Psraw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
tmp_simd);
assm->Psraw(dst.fp(), dst.fp(), tmp_simd);
assm->Packsswb(dst.fp(), liftoff::kScratchDoubleReg);
} else {
assm->Psrlw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
tmp_simd);
assm->Psrlw(dst.fp(), dst.fp(), tmp_simd);
assm->Packuswb(dst.fp(), liftoff::kScratchDoubleReg);
}
}
} // namespace liftoff
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
......@@ -2439,38 +2405,23 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs);
bailout(kSimd, "i8x16_shr_s");
}
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
Punpckhbw(liftoff::kScratchDoubleReg, lhs.fp());
Punpcklbw(dst.fp(), lhs.fp());
uint8_t shift = (rhs & 7) + 8;
Psraw(liftoff::kScratchDoubleReg, shift);
Psraw(dst.fp(), shift);
Packsswb(dst.fp(), liftoff::kScratchDoubleReg);
bailout(kSimd, "i8x16_shri_s");
}
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs);
bailout(kSimd, "i8x16_shr_u");
}
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
Register tmp = GetUnusedRegister(kGpReg).gp();
// Perform 16-bit shift, then mask away high bits.
uint8_t shift = rhs & 7;
Psrlw(dst.fp(), lhs.fp(), byte{shift});
uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
mov(tmp, mask);
Movd(liftoff::kScratchDoubleReg, tmp);
Pshufd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, 0);
Pand(dst.fp(), liftoff::kScratchDoubleReg);
bailout(kSimd, "i8x16_shri_u");
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......
......@@ -2028,29 +2028,6 @@ void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
}
}
template <bool is_signed>
void EmitI8x16Shr(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister lhs, LiftoffRegister rhs) {
// Same algorithm as the one in code-generator-x64.cc.
assm->Punpckhbw(kScratchDoubleReg, lhs.fp());
assm->Punpcklbw(dst.fp(), lhs.fp());
// Prepare shift value
assm->movq(kScratchRegister, rhs.gp());
// Take shift value modulo 8.
assm->andq(kScratchRegister, Immediate(7));
assm->addq(kScratchRegister, Immediate(8));
assm->Movq(liftoff::kScratchDoubleReg2, kScratchRegister);
if (is_signed) {
assm->Psraw(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
assm->Psraw(dst.fp(), liftoff::kScratchDoubleReg2);
assm->Packsswb(dst.fp(), kScratchDoubleReg);
} else {
assm->Psrlw(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
assm->Psrlw(dst.fp(), liftoff::kScratchDoubleReg2);
assm->Packuswb(dst.fp(), kScratchDoubleReg);
}
}
// Can be used by both the immediate and register version of the shifts. psraq
// is only available in AVX512, so we can't use it yet.
template <typename ShiftOperand>
......@@ -2457,43 +2434,23 @@ void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitI8x16Shr</*is_signed=*/true>(this, dst, lhs, rhs);
bailout(kSimd, "i8x16_shr_s");
}
void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
Punpckhbw(kScratchDoubleReg, lhs.fp());
Punpcklbw(dst.fp(), lhs.fp());
uint8_t shift = (rhs & 7) + 8;
Psraw(kScratchDoubleReg, shift);
Psraw(dst.fp(), shift);
Packsswb(dst.fp(), kScratchDoubleReg);
bailout(kSimd, "i8x16_shri_s");
}
void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitI8x16Shr</*is_signed=*/false>(this, dst, lhs, rhs);
bailout(kSimd, "i8x16_shr_u");
}
void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
LiftoffRegister lhs, int32_t rhs) {
// Perform 16-bit shift, then mask away high bits.
uint8_t shift = rhs & 7; // i.InputInt3(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrlw(dst.fp(), lhs.fp(), byte{shift});
} else if (dst != lhs) {
Movaps(dst.fp(), lhs.fp());
psrlw(dst.fp(), byte{shift});
}
uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
movl(kScratchRegister, Immediate(mask));
Movd(kScratchDoubleReg, kScratchRegister);
Pshufd(kScratchDoubleReg, kScratchDoubleReg, byte{0});
Pand(dst.fp(), kScratchDoubleReg);
bailout(kSimd, "i8x16_shri_u");
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment