Commit 4a716fea authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Add AVX codegen for some x64 instructions

This adds avx for extractps, insertps, and cvtdq2ps. These require
SSE4_1, so modified AvxHelper to take another template arg for sse4
operations, and open the proper cpu scope before calling this arg.

Bug: v8:9561
Change-Id: Iad2be7ebab41b96f7eb74f4e2bd9776002e6a76c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1874378
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64529}
parent 081114b5
......@@ -2802,7 +2802,7 @@ void Assembler::movdqu(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src);
}
void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
void Assembler::extractps(Register dst, XMMRegister src, int8_t imm8) {
DCHECK(IsEnabled(SSE4_1));
DCHECK(is_uint8(imm8));
EnsureSpace ensure_space(this);
......
......@@ -1084,7 +1084,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// SSE 4.1 instruction
void insertps(XMMRegister dst, XMMRegister src, byte imm8);
void insertps(XMMRegister dst, Operand src, byte imm8);
void extractps(Register dst, XMMRegister src, byte imm8);
void extractps(Register dst, XMMRegister src, int8_t imm8);
void pextrb(Register dst, XMMRegister src, int8_t imm8);
void pextrb(Operand dst, XMMRegister src, int8_t imm8);
void pextrw(Register dst, XMMRegister src, int8_t imm8);
......@@ -1580,6 +1580,20 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x72, xmm4, dst, src, k66, k0F, kWIG);
emit(imm8);
}
void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8) {
vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
emit(imm8);
}
void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8) {
vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
emit(imm8);
}
void vextractps(Register dst, XMMRegister src, int8_t imm8) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x17, src, xmm0, idst, k66, k0F3A, kWIG);
emit(imm8);
}
void vpextrb(Register dst, XMMRegister src, uint8_t imm8) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
......@@ -1638,6 +1652,12 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
emit(imm8);
}
void vcvtdq2ps(XMMRegister dst, XMMRegister src) {
vinstr(0x5B, dst, xmm0, src, kNone, k0F, kWIG);
}
void vcvtdq2ps(XMMRegister dst, Operand src) {
vinstr(0x5B, dst, xmm0, src, kNone, k0F, kWIG);
}
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
......
......@@ -80,6 +80,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
template <typename Dst, typename... Args>
struct AvxHelper {
Assembler* assm;
base::Optional<CpuFeature> feature = base::nullopt;
// Call a method where the AVX version expects the dst argument to be
// duplicated.
template <void (Assembler::*avx)(Dst, Dst, Args...),
......@@ -88,6 +89,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, dst, args...);
} else if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, args...);
} else {
(assm->*no_avx)(dst, args...);
}
......@@ -100,6 +105,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(assm, AVX);
(assm->*avx)(dst, args...);
} else if (feature.has_value()) {
DCHECK(CpuFeatures::IsSupported(*feature));
CpuFeatureScope scope(assm, *feature);
(assm->*no_avx)(dst, args...);
} else {
(assm->*no_avx)(dst, args...);
}
......@@ -113,6 +122,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSE4_1(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE4_1)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
AVX_OP(Subsd, subsd)
AVX_OP(Divss, divss)
AVX_OP(Divsd, divsd)
......@@ -167,17 +182,20 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Psrad, psrad)
AVX_OP(Psrld, psrld)
AVX_OP(Paddd, paddd)
AVX_OP(Pmulld, pmulld)
AVX_OP(Pminsd, pminsd)
AVX_OP(Pminud, pminud)
AVX_OP(Pmaxsd, pmaxsd)
AVX_OP(Pmaxud, pmaxud)
AVX_OP(Pcmpgtd, pcmpgtd)
AVX_OP(Addpd, addpd)
AVX_OP(Subpd, subpd)
AVX_OP(Mulpd, mulpd)
AVX_OP(Divpd, divpd)
AVX_OP(Shufps, shufps)
AVX_OP(Cvtdq2ps, cvtdq2ps)
AVX_OP_SSE4_1(Pmulld, pmulld)
AVX_OP_SSE4_1(Pminsd, pminsd)
AVX_OP_SSE4_1(Pminud, pminud)
AVX_OP_SSE4_1(Pmaxsd, pmaxsd)
AVX_OP_SSE4_1(Pmaxud, pmaxud)
AVX_OP_SSE4_1(Extractps, extractps)
AVX_OP_SSE4_1(Insertps, insertps)
#undef AVX_OP
......
......@@ -2443,26 +2443,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F32x4ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
__ movd(i.OutputDoubleRegister(), kScratchRegister);
__ Extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
__ Movd(i.OutputDoubleRegister(), kScratchRegister);
break;
}
case kX64F32x4ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
// The insertps instruction uses imm8[5:4] to indicate the lane
// that needs to be replaced.
byte select = i.InputInt8(1) << 4 & 0x30;
if (instr->InputAt(2)->IsFPRegister()) {
__ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
__ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
select);
} else {
__ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
__ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
}
break;
}
case kX64F32x4SConvertI32x4: {
__ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4UConvertI32x4: {
......
......@@ -941,12 +941,23 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightOperand(current);
AppendToBuffer(",%s,0x%x,", NameOfXMMRegister(regop), *current++);
break;
case 0x17:
AppendToBuffer("vextractps ");
current += PrintRightOperand(current);
AppendToBuffer(",%s,0x%x,", NameOfXMMRegister(regop), *current++);
break;
case 0x20:
AppendToBuffer("vpinsrb %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightByteOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x21:
AppendToBuffer("vinsertps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightByteOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x22:
AppendToBuffer("vpinsrd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -1276,6 +1287,10 @@ int DisassemblerX64::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5B:
AppendToBuffer("vcvtdq2ps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0xC2: {
AppendToBuffer("vcmpps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......
......@@ -751,6 +751,10 @@ TEST(DisasmX64) {
#undef EMIT_SSE2_AVXINSTR
#undef EMIT_SSE34_AVXINSTR
__ vinsertps(xmm1, xmm2, xmm3, 1);
__ vinsertps(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 1);
__ vextractps(rax, xmm1, 1);
__ vlddqu(xmm1, Operand(rbx, rcx, times_4, 10000));
__ vpsllw(xmm0, xmm15, 21);
__ vpsrlw(xmm0, xmm15, 21);
......@@ -771,6 +775,9 @@ TEST(DisasmX64) {
__ vpinsrd(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 2);
__ vpshufd(xmm1, xmm2, 85);
__ vshufps(xmm3, xmm2, xmm3, 3);
__ vcvtdq2ps(xmm5, xmm1);
__ vcvtdq2ps(xmm5, Operand(rdx, 4));
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment