Commit 83fc8559 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] AVX codegen for load splat

Bug: v8:9886
Change-Id: I321e93d02971c6ba568d9d7c52d464ffc2754665
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1929837
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65277}
parent 2fb290d7
...@@ -3517,6 +3517,23 @@ void Assembler::movmskps(Register dst, XMMRegister src) { ...@@ -3517,6 +3517,23 @@ void Assembler::movmskps(Register dst, XMMRegister src) {
} }
// AVX instructions // AVX instructions
void Assembler::vmovddup(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF2, k0F, kWIG);
emit(0x12);
emit_sse_operand(dst, src);
}
void Assembler::vbroadcastss(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, k66, k0F38, kW0);
emit(0x18);
emit_sse_operand(dst, src);
}
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1, void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) { XMMRegister src2) {
DCHECK(IsEnabled(FMA3)); DCHECK(IsEnabled(FMA3));
......
...@@ -1116,6 +1116,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1116,6 +1116,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void movlhps(XMMRegister dst, XMMRegister src); void movlhps(XMMRegister dst, XMMRegister src);
// AVX instruction // AVX instruction
void vmovddup(XMMRegister dst, Operand src);
void vbroadcastss(XMMRegister dst, Operand src);
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x99, dst, src1, src2); vfmasd(0x99, dst, src1, src2);
} }
...@@ -1628,6 +1630,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1628,6 +1630,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG); vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
emit(imm8); emit(imm8);
} }
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
emit(imm8);
}
void vpshuflw(XMMRegister dst, Operand src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
emit(imm8);
}
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2); void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
......
...@@ -116,6 +116,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -116,6 +116,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \ .template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
} }
#define AVX_OP_SSE3(macro_name, name) \
template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \
AvxHelper<Dst, Args...>{this, base::Optional<CpuFeature>(SSE3)} \
.template emit<&Assembler::v##name, &Assembler::name>(dst, args...); \
}
#define AVX_OP_SSSE3(macro_name, name) \ #define AVX_OP_SSSE3(macro_name, name) \
template <typename Dst, typename... Args> \ template <typename Dst, typename... Args> \
void macro_name(Dst dst, Args... args) { \ void macro_name(Dst dst, Args... args) { \
...@@ -195,6 +202,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -195,6 +202,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Subps, subps) AVX_OP(Subps, subps)
AVX_OP(Mulps, mulps) AVX_OP(Mulps, mulps)
AVX_OP(Divps, divps) AVX_OP(Divps, divps)
AVX_OP(Pshuflw, pshuflw)
AVX_OP(Punpcklqdq, punpcklqdq)
AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSSE3(Pshufb, pshufb) AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignd, psignd) AVX_OP_SSSE3(Psignd, psignd)
AVX_OP_SSE4_1(Pmulld, pmulld) AVX_OP_SSE4_1(Pmulld, pmulld)
......
...@@ -3660,31 +3660,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3660,31 +3660,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64S8x16LoadSplat: { case kX64S8x16LoadSplat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0); __ Pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
__ pxor(kScratchDoubleReg, kScratchDoubleReg); __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
__ pshufb(i.OutputSimd128Register(), kScratchDoubleReg); __ Pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
break; break;
} }
case kX64S16x8LoadSplat: { case kX64S16x8LoadSplat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0); __ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
__ pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(), 0); __ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
__ punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register()); static_cast<uint8_t>(0));
__ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
break; break;
} }
case kX64S32x4LoadSplat: { case kX64S32x4LoadSplat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
// TODO(v8:9886): AVX codegen if (CpuFeatures::IsSupported(AVX)) {
__ movss(i.OutputSimd128Register(), i.MemoryOperand()); CpuFeatureScope avx_scope(tasm(), AVX);
__ shufps(i.OutputSimd128Register(), i.OutputSimd128Register(), __ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
static_cast<byte>(0)); } else {
__ Movss(i.OutputSimd128Register(), i.MemoryOperand());
__ Shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
static_cast<byte>(0));
}
break; break;
} }
case kX64S64x2LoadSplat: { case kX64S64x2LoadSplat: {
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
// TODO(v8:9886): AVX codegen __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
__ movsd(i.OutputSimd128Register(), i.MemoryOperand());
__ punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
break; break;
} }
case kX64I16x8Load8x8S: { case kX64I16x8Load8x8S: {
......
...@@ -819,6 +819,10 @@ int DisassemblerX64::AVXInstruction(byte* data) { ...@@ -819,6 +819,10 @@ int DisassemblerX64::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg(); int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
switch (opcode) { switch (opcode) {
case 0x18:
AppendToBuffer("vbroadcastss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x99: case 0x99:
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(), AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv)); NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
...@@ -1067,6 +1071,10 @@ int DisassemblerX64::AVXInstruction(byte* data) { ...@@ -1067,6 +1071,10 @@ int DisassemblerX64::AVXInstruction(byte* data) {
} }
AppendToBuffer(",%s", NameOfXMMRegister(regop)); AppendToBuffer(",%s", NameOfXMMRegister(regop));
break; break;
case 0x12:
AppendToBuffer("vmovddup %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x2A: case 0x2A:
AppendToBuffer("%s %s,%s,", vex_w() ? "vcvtqsi2sd" : "vcvtlsi2sd", AppendToBuffer("%s %s,%s,", vex_w() ? "vcvtqsi2sd" : "vcvtlsi2sd",
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv)); NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
...@@ -1126,6 +1134,11 @@ int DisassemblerX64::AVXInstruction(byte* data) { ...@@ -1126,6 +1134,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
AppendToBuffer("vlddqu %s,", NameOfXMMRegister(regop)); AppendToBuffer("vlddqu %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);
break; break;
case 0x70:
AppendToBuffer("vpshuflw %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x7C: case 0x7C:
AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop), AppendToBuffer("vhaddps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv)); NameOfXMMRegister(vvvv));
......
...@@ -763,7 +763,12 @@ TEST(DisasmX64) { ...@@ -763,7 +763,12 @@ TEST(DisasmX64) {
__ vpinsrd(xmm1, xmm2, rax, 2); __ vpinsrd(xmm1, xmm2, rax, 2);
__ vpinsrd(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 2); __ vpinsrd(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 2);
__ vpshufd(xmm1, xmm2, 85); __ vpshufd(xmm1, xmm2, 85);
__ vpshuflw(xmm1, xmm2, 85);
__ vpshuflw(xmm1, Operand(rbx, rcx, times_4, 10000), 85);
__ vshufps(xmm3, xmm2, xmm3, 3); __ vshufps(xmm3, xmm2, xmm3, 3);
__ vmovddup(xmm1, Operand(rbx, rcx, times_4, 10000));
__ vbroadcastss(xmm1, Operand(rbx, rcx, times_4, 10000));
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment