Commit df2ab0f0 authored by Zhou, Zhiguo's avatar Zhou, Zhiguo Committed by Commit Bot

[wasm-simd][liftoff] Implement S8x16Shuffle on x64 and ia32

Bug: v8:9909
Change-Id: I99c599ac1d872a8b4e7c154a942026b52ecb0bd5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2219688
Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68220}
parent de876331
......@@ -321,6 +321,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO(Pcmpeqb, pcmpeqb)
AVX_OP3_XO(Pcmpeqw, pcmpeqw)
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
AVX_OP3_XO(Por, por)
AVX_OP3_XO(Psubb, psubb)
AVX_OP3_XO(Psubw, psubw)
AVX_OP3_XO(Psubd, psubd)
......
......@@ -2866,6 +2866,13 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
imm_lane_idx);
}
void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
const uint8_t shuffle[16]) {
bailout(kSimd, "s8x16_shuffle");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp());
......
......@@ -1882,6 +1882,13 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
Umax(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
}
void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
const uint8_t shuffle[16]) {
bailout(kSimd, "s8x16_shuffle");
}
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V16B(), src.gp().W());
......
......@@ -2195,6 +2195,44 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
}
}
void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
const uint8_t shuffle[16]) {
LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
// Prepare 16 byte aligned buffer for shuffle control mask.
mov(tmp.gp(), esp);
and_(esp, -16);
movups(liftoff::kScratchDoubleReg, lhs.fp());
for (int i = 3; i >= 0; i--) {
uint32_t mask = 0;
for (int j = 3; j >= 0; j--) {
uint8_t lane = shuffle[i * 4 + j];
mask <<= 8;
mask |= lane < kSimd128Size ? lane : 0x80;
}
push(Immediate(mask));
}
Pshufb(liftoff::kScratchDoubleReg, Operand(esp, 0));
for (int i = 3; i >= 0; i--) {
uint32_t mask = 0;
for (int j = 3; j >= 0; j--) {
uint8_t lane = shuffle[i * 4 + j];
mask <<= 8;
mask |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
}
push(Immediate(mask));
}
if (dst.fp() != rhs.fp()) {
movups(dst.fp(), rhs.fp());
}
Pshufb(dst.fp(), Operand(esp, 0));
Por(dst.fp(), liftoff::kScratchDoubleReg);
mov(esp, tmp.gp());
}
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......
......@@ -748,6 +748,9 @@ class LiftoffAssembler : public TurboAssembler {
Register offset_reg, uint32_t offset_imm,
LoadType type, LoadTransformationKind transform,
uint32_t* protected_load_pc);
inline void emit_s8x16_shuffle(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs,
const uint8_t shuffle[16]);
inline void emit_s8x16_swizzle(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src);
......
......@@ -2941,8 +2941,15 @@ class LiftoffCompiler {
const Simd8x16ShuffleImmediate<validate>& imm,
const Value& input0, const Value& input1,
Value* result) {
unsupported(decoder, kSimd, "simd");
static constexpr RegClass result_rc = reg_class_for(ValueType::kS128);
LiftoffRegister rhs = __ PopToRegister();
LiftoffRegister lhs = __ PopToRegister(LiftoffRegList::ForRegs(rhs));
LiftoffRegister dst = __ GetUnusedRegister(result_rc, {lhs, rhs}, {});
__ LiftoffAssembler::emit_s8x16_shuffle(dst, lhs, rhs, imm.shuffle);
__ PushRegister(kWasmS128, dst);
}
void Throw(FullDecoder* decoder, const ExceptionIndexImmediate<validate>&,
const Vector<Value>& args) {
unsupported(decoder, kExceptionHandling, "throw");
......
......@@ -2240,6 +2240,44 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
}
}
void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
const uint8_t shuffle[16]) {
LiftoffRegister tmp_simd =
GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dst, lhs, rhs));
Movups(kScratchDoubleReg, lhs.fp());
uint64_t mask1[2] = {};
for (int i = 15; i >= 0; i--) {
uint8_t lane = shuffle[i];
int j = i >> 3;
mask1[j] <<= 8;
mask1[j] |= lane < kSimd128Size ? lane : 0x80;
}
TurboAssembler::Move(tmp_simd.fp(), mask1[0]);
movq(kScratchRegister, mask1[1]);
Pinsrq(tmp_simd.fp(), kScratchRegister, int8_t{1});
Pshufb(kScratchDoubleReg, tmp_simd.fp());
uint64_t mask2[2] = {};
for (int i = 15; i >= 0; i--) {
uint8_t lane = shuffle[i];
int j = i >> 3;
mask2[j] <<= 8;
mask2[j] |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
}
TurboAssembler::Move(tmp_simd.fp(), mask2[0]);
movq(kScratchRegister, mask2[1]);
Pinsrq(tmp_simd.fp(), kScratchRegister, int8_t{1});
if (dst.fp() != rhs.fp()) {
Movups(dst.fp(), rhs.fp());
}
Pshufb(dst.fp(), tmp_simd.fp());
Por(dst.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::emit_s8x16_swizzle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment