Commit 91cbf3e3 authored by Zhou, Zhiguo's avatar Zhou, Zhiguo Committed by Commit Bot

[wasm-simd][liftoff] Implement shl on x64 and ia32

Bug: v8:9909
Change-Id: I1bd17d6a1a6f32e9076d80c91be2743868ecad2b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2171475Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Zhiguo Zhou <zhiguo.zhou@intel.com>
Cr-Commit-Position: refs/heads/master@{#67763}
parent 364987a9
......@@ -2266,6 +2266,16 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2_shl");
}
void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
bailout(kSimd, "i64x2_shli");
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
vadd(Neon64, liftoff::GetSimd128Register(dst),
......@@ -2348,6 +2358,16 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i32x4_shl");
}
void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
bailout(kSimd, "i32x4_shli");
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
vadd(Neon32, liftoff::GetSimd128Register(dst),
......@@ -2405,6 +2425,16 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i16x8_shl");
}
void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
bailout(kSimd, "i16x8_shli");
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
vadd(Neon16, liftoff::GetSimd128Register(dst),
......@@ -2534,6 +2564,16 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
liftoff::GetSimd128Register(src));
}
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shl");
}
void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
bailout(kSimd, "i8x16_shli");
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
vadd(Neon8, liftoff::GetSimd128Register(dst),
......
......@@ -1252,6 +1252,16 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
Neg(dst.fp().V2D(), src.fp().V2D());
}
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2_shl");
}
void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
bailout(kSimd, "i64x2_shli");
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V2D(), lhs.fp().V2D(), rhs.fp().V2D());
......@@ -1309,6 +1319,16 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
Neg(dst.fp().V4S(), src.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i32x4_shl");
}
void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
bailout(kSimd, "i32x4_shli");
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
......@@ -1380,6 +1400,16 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
Neg(dst.fp().V8H(), src.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i16x8_shl");
}
void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
bailout(kSimd, "i16x8_shli");
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V8H(), lhs.fp().V8H(), rhs.fp().V8H());
......@@ -1475,6 +1505,16 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
Neg(dst.fp().V16B(), src.fp().V16B());
}
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i8x16_shl");
}
void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
bailout(kSimd, "i8x16_shli");
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
Add(dst.fp().V16B(), lhs.fp().V16B(), rhs.fp().V16B());
......
......@@ -1981,6 +1981,42 @@ void EmitSimdNonCommutativeBinOp(
(assm->*sse_op)(dst.fp(), rhs.fp());
}
}
template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister operand, LiftoffRegister count) {
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32);
LiftoffRegister tmp =
assm->GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(count));
constexpr int mask = (1 << width) - 1;
assm->mov(tmp.gp(), count.gp());
assm->and_(tmp.gp(), Immediate(mask));
assm->Movd(kScratchDoubleReg, tmp.gp());
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
} else {
if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
(assm->*sse_op)(dst.fp(), kScratchDoubleReg);
}
}
template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister operand, int32_t count) {
constexpr int mask = (1 << width) - 1;
byte shift = static_cast<byte>(count & mask);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx_op)(dst.fp(), operand.fp(), shift);
} else {
if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
(assm->*sse_op)(dst.fp(), shift);
}
}
} // namespace liftoff
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
......@@ -2314,6 +2350,55 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32);
static constexpr RegClass tmp_simd_rc = reg_class_for(ValueType::kS128);
LiftoffRegister tmp = GetUnusedRegister(tmp_rc, LiftoffRegList::ForRegs(rhs));
LiftoffRegister tmp_simd =
GetUnusedRegister(tmp_simd_rc, LiftoffRegList::ForRegs(dst, lhs));
// Mask off the unwanted bits before word-shifting.
Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
mov(tmp.gp(), rhs.gp());
and_(tmp.gp(), Immediate(7));
add(tmp.gp(), Immediate(8));
Movd(tmp_simd.fp(), tmp.gp());
Psrlw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, tmp_simd.fp());
Packuswb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpand(dst.fp(), lhs.fp(), liftoff::kScratchDoubleReg);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
pand(dst.fp(), liftoff::kScratchDoubleReg);
}
sub(tmp.gp(), Immediate(8));
Movd(tmp_simd.fp(), tmp.gp());
Psllw(dst.fp(), dst.fp(), tmp_simd.fp());
}
void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
static constexpr RegClass tmp_rc = reg_class_for(ValueType::kI32);
LiftoffRegister tmp = GetUnusedRegister(tmp_rc);
byte shift = static_cast<byte>(rhs & 0x7);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsllw(dst.fp(), lhs.fp(), shift);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
psllw(dst.fp(), shift);
}
uint8_t bmask = static_cast<uint8_t>(0xff << shift);
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
mov(tmp.gp(), mask);
Movd(liftoff::kScratchDoubleReg, tmp.gp());
Pshufd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, uint8_t{0});
Pand(dst.fp(), liftoff::kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
......@@ -2456,6 +2541,18 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
......@@ -2542,6 +2639,18 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
......@@ -2602,6 +2711,18 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
......
......@@ -810,6 +810,10 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_s128_select(LiftoffRegister dst, LiftoffRegister src1,
LiftoffRegister src2, LiftoffRegister mask);
inline void emit_i8x16_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs);
inline void emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i8x16_add_saturate_s(LiftoffRegister dst,
......@@ -837,6 +841,10 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_i8x16_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs);
inline void emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_add_saturate_s(LiftoffRegister dst,
......@@ -864,6 +872,10 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs);
inline void emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -879,6 +891,10 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_i32x4_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs);
inline void emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
......
......@@ -2349,6 +2349,31 @@ class LiftoffCompiler {
__ PushRegister(ValueType(result_type), dst);
}
template <typename EmitFn, typename EmitFnImm>
void EmitSimdShiftOp(EmitFn fn, EmitFnImm fnImm) {
static constexpr RegClass result_rc = reg_class_for(ValueType::kS128);
LiftoffAssembler::VarState rhs_slot = __ cache_state()->stack_state.back();
// Check if the RHS is an immediate.
if (rhs_slot.is_const()) {
__ cache_state()->stack_state.pop_back();
int32_t imm = rhs_slot.i32_const();
LiftoffRegister operand = __ PopToRegister();
LiftoffRegister dst = __ GetUnusedRegister(result_rc, {operand});
CallEmitFn(fnImm, dst, operand, imm);
__ PushRegister(kWasmS128, dst);
} else {
LiftoffRegister count = __ PopToRegister();
LiftoffRegister operand = __ PopToRegister();
LiftoffRegister dst = __ GetUnusedRegister(result_rc, {operand});
CallEmitFn(fn, dst, operand, count);
__ PushRegister(kWasmS128, dst);
}
}
void SimdOp(FullDecoder* decoder, WasmOpcode opcode, Vector<Value> args,
Value* result) {
if (!CpuFeatures::SupportsWasmSimd128()) {
......@@ -2475,6 +2500,9 @@ class LiftoffCompiler {
return EmitTerOp<kS128, kS128>(&LiftoffAssembler::emit_s128_select);
case wasm::kExprI8x16Neg:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_neg);
case wasm::kExprI8x16Shl:
return EmitSimdShiftOp(&LiftoffAssembler::emit_i8x16_shl,
&LiftoffAssembler::emit_i8x16_shli);
case wasm::kExprI8x16Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_add);
case wasm::kExprI8x16AddSaturateS:
......@@ -2503,6 +2531,9 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i8x16_max_u);
case wasm::kExprI16x8Neg:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_neg);
case wasm::kExprI16x8Shl:
return EmitSimdShiftOp(&LiftoffAssembler::emit_i16x8_shl,
&LiftoffAssembler::emit_i16x8_shli);
case wasm::kExprI16x8Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_add);
case wasm::kExprI16x8AddSaturateS:
......@@ -2531,6 +2562,9 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_u);
case wasm::kExprI32x4Neg:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_neg);
case wasm::kExprI32x4Shl:
return EmitSimdShiftOp(&LiftoffAssembler::emit_i32x4_shl,
&LiftoffAssembler::emit_i32x4_shli);
case wasm::kExprI32x4Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_add);
case wasm::kExprI32x4Sub:
......@@ -2547,6 +2581,9 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_max_u);
case wasm::kExprI64x2Neg:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_neg);
case wasm::kExprI64x2Shl:
return EmitSimdShiftOp(&LiftoffAssembler::emit_i64x2_shl,
&LiftoffAssembler::emit_i64x2_shli);
case wasm::kExprI64x2Add:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_add);
case wasm::kExprI64x2Sub:
......
......@@ -1943,6 +1943,38 @@ void EmitSimdNonCommutativeBinOp(
(assm->*sse_op)(dst.fp(), rhs.fp());
}
}
template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister operand, LiftoffRegister count) {
constexpr int mask = (1 << width) - 1;
assm->movq(kScratchRegister, count.gp());
assm->andq(kScratchRegister, Immediate(mask));
assm->Movq(kScratchDoubleReg, kScratchRegister);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
} else {
if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
(assm->*sse_op)(dst.fp(), kScratchDoubleReg);
}
}
template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
LiftoffRegister operand, int32_t count) {
constexpr int mask = (1 << width) - 1;
byte shift = static_cast<byte>(count & mask);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx_op)(dst.fp(), operand.fp(), shift);
} else {
if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
(assm->*sse_op)(dst.fp(), shift);
}
}
} // namespace liftoff
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
......@@ -2270,6 +2302,51 @@ void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
static constexpr RegClass tmp_simd_rc = reg_class_for(ValueType::kS128);
LiftoffRegister tmp_simd =
GetUnusedRegister(tmp_simd_rc, LiftoffRegList::ForRegs(dst, lhs));
// Mask off the unwanted bits before word-shifting.
Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
movq(kScratchRegister, rhs.gp());
andq(kScratchRegister, Immediate(7));
addq(kScratchRegister, Immediate(8));
Movq(tmp_simd.fp(), kScratchRegister);
Psrlw(kScratchDoubleReg, tmp_simd.fp());
Packuswb(kScratchDoubleReg, kScratchDoubleReg);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpand(dst.fp(), lhs.fp(), kScratchDoubleReg);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
pand(dst.fp(), kScratchDoubleReg);
}
subq(kScratchRegister, Immediate(8));
Movq(tmp_simd.fp(), kScratchRegister);
Psllw(dst.fp(), tmp_simd.fp());
}
void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
byte shift = static_cast<byte>(rhs & 0x7);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsllw(dst.fp(), lhs.fp(), shift);
} else {
if (dst.fp() != lhs.fp()) movaps(dst.fp(), lhs.fp());
psllw(dst.fp(), shift);
}
uint8_t bmask = static_cast<uint8_t>(0xff << shift);
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
movl(kScratchRegister, Immediate(mask));
Movd(kScratchDoubleReg, kScratchRegister);
Pshufd(kScratchDoubleReg, kScratchDoubleReg, uint8_t{0});
Pand(dst.fp(), kScratchDoubleReg);
}
void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
......@@ -2412,6 +2489,18 @@ void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
......@@ -2498,6 +2587,18 @@ void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
......@@ -2557,6 +2658,18 @@ void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
}
}
void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
int32_t rhs) {
liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment