Commit 14570fe0 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd] Enhance Shufps to copy src to dst

Extract Shufps to handle both AVX and SSE cases, in the SSE case it will
copy src to dst if they are not the same. This allows us to use it in
Liftoff as well, without the extra copy when AVX is supported.

In other places, the usage of Shufps is unnecessary, since they are
within a clause checking for non-AVX support, so we can simply use the
shufps (non-macro-assembler).

Bug: v8:9561
Change-Id: Icb043d7a43397c1b0810ece2666be567f0f5986c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2513866Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70911}
parent d988237e
......@@ -1727,6 +1727,18 @@ void TurboAssembler::RetpolineJump(Register reg) {
ret(0);
}
void TurboAssembler::Shufps(XMMRegister dst, XMMRegister src, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vshufps(dst, src, src, imm8);
} else {
if (dst != src) {
movss(dst, src);
}
shufps(dst, src, static_cast<byte>(0));
}
}
void TurboAssembler::Pextrd(Register dst, XMMRegister src, uint8_t imm8) {
if (imm8 == 0) {
Movd(dst, src);
......
......@@ -224,7 +224,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Divpd, divpd)
AVX_OP(Maxps, maxps)
AVX_OP(Maxpd, maxpd)
AVX_OP(Shufps, shufps)
AVX_OP(Cvtdq2ps, cvtdq2ps)
AVX_OP(Rcpps, rcpps)
AVX_OP(Rsqrtps, rsqrtps)
......@@ -519,6 +518,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Trap() override;
void DebugBreak() override;
// Shufps that will mov src into dst if AVX is not supported.
void Shufps(XMMRegister dst, XMMRegister src, byte imm8);
// Non-SSE2 instructions.
void Pextrd(Register dst, XMMRegister src, uint8_t imm8);
......
......@@ -2491,15 +2491,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputDoubleRegister(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vshufps(dst, src, src, byte{0x0});
} else {
DCHECK_EQ(dst, src);
__ Shufps(dst, dst, byte{0x0});
}
__ Shufps(i.OutputSimd128Register(), i.InputDoubleRegister(0), 0);
break;
}
case kX64F32x4ExtractLane: {
......@@ -3663,8 +3655,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope avx_scope(tasm(), AVX);
__ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
} else {
__ Movss(i.OutputSimd128Register(), i.MemoryOperand());
__ Shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
__ movss(i.OutputSimd128Register(), i.MemoryOperand());
__ shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
byte{0});
}
break;
......
......@@ -2309,8 +2309,8 @@ void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
CpuFeatureScope avx_scope(this, AVX);
vbroadcastss(dst.fp(), src_op);
} else {
Movss(dst.fp(), src_op);
Shufps(dst.fp(), dst.fp(), byte{0});
movss(dst.fp(), src_op);
shufps(dst.fp(), dst.fp(), byte{0});
}
} else if (memtype == MachineType::Int64()) {
Movddup(dst.fp(), src_op);
......@@ -2419,10 +2419,7 @@ void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() != src.fp()) {
Movss(dst.fp(), src.fp());
}
Shufps(dst.fp(), src.fp(), static_cast<byte>(0));
Shufps(dst.fp(), src.fp(), 0);
}
void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment