Commit 1effe529 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Add AVX codegen

Mostly for f32x4 instructions.

Bug: v8:9561
Change-Id: I3a3dc06305acb9e336c494fc399cf5d21518c0e8
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1950488Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65382}
parent 2450b3bc
......@@ -1634,6 +1634,16 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
emit(imm8);
}
void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
uint8_t mask) {
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
emit(mask);
}
void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask) {
vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
emit(mask);
}
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
......
......@@ -139,6 +139,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Subsd, subsd)
AVX_OP(Divss, divss)
AVX_OP(Divsd, divsd)
AVX_OP(Orps, orps)
AVX_OP(Xorps, xorps)
AVX_OP(Xorpd, xorpd)
AVX_OP(Movd, movd)
......@@ -193,8 +194,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Addpd, addpd)
AVX_OP(Subpd, subpd)
AVX_OP(Mulpd, mulpd)
AVX_OP(Minps, minps)
AVX_OP(Minpd, minpd)
AVX_OP(Divpd, divpd)
AVX_OP(Maxps, maxps)
AVX_OP(Maxpd, maxpd)
AVX_OP(Shufps, shufps)
AVX_OP(Cvtdq2ps, cvtdq2ps)
......@@ -208,6 +211,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Pshuflw, pshuflw)
AVX_OP(Punpcklqdq, punpcklqdq)
AVX_OP(Pshufd, pshufd)
AVX_OP(Cmpps, cmpps)
AVX_OP(Cmppd, cmppd)
AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSSE3(Pshufb, pshufb)
......@@ -220,6 +224,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Extractps, extractps)
AVX_OP_SSE4_1(Insertps, insertps)
AVX_OP_SSE4_1(Pinsrq, pinsrq)
AVX_OP_SSE4_1(Pblendw, pblendw)
#undef AVX_OP
......
......@@ -2463,14 +2463,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
__ psubd(dst, kScratchDoubleReg); // get hi 16 bits
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ psrld(dst, 1); // divide by 2 to get in unsigned range
__ cvtdq2ps(dst, dst); // convert hi exactly
__ addps(dst, dst); // double hi, exactly
__ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
__ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ Pblendw(kScratchDoubleReg, dst,
static_cast<uint8_t>(0x55)); // get lo 16 bits
__ Psubd(dst, kScratchDoubleReg); // get hi 16 bits
__ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ Psrld(dst,
static_cast<byte>(1)); // divide by 2 to get in unsigned range
__ Cvtdq2ps(dst, dst); // convert hi exactly
__ Addps(dst, dst); // double hi, exactly
__ Addps(dst, kScratchDoubleReg); // add hi and lo, may round.
break;
}
case kX64F32x4Abs: {
......@@ -2545,16 +2547,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The minps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minps in both orders, merge the resuls, and adjust.
__ movaps(kScratchDoubleReg, src1);
__ minps(kScratchDoubleReg, dst);
__ minps(dst, src1);
__ Movaps(kScratchDoubleReg, src1);
__ Minps(kScratchDoubleReg, dst);
__ Minps(dst, src1);
// propagate -0's and NaNs, which may be non-canonical.
__ orps(kScratchDoubleReg, dst);
__ Orps(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ cmpps(dst, kScratchDoubleReg, 3);
__ orps(kScratchDoubleReg, dst);
__ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg);
__ Cmpps(dst, kScratchDoubleReg, static_cast<int8_t>(3));
__ Orps(kScratchDoubleReg, dst);
__ Psrld(dst, static_cast<byte>(10));
__ Andnps(dst, kScratchDoubleReg);
break;
}
case kX64F32x4Max: {
......@@ -2563,39 +2565,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxps instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxps in both orders, merge the resuls, and adjust.
__ movaps(kScratchDoubleReg, src1);
__ maxps(kScratchDoubleReg, dst);
__ maxps(dst, src1);
__ Movaps(kScratchDoubleReg, src1);
__ Maxps(kScratchDoubleReg, dst);
__ Maxps(dst, src1);
// Find discrepancies.
__ xorps(dst, kScratchDoubleReg);
__ Xorps(dst, kScratchDoubleReg);
// Propagate NaNs, which may be non-canonical.
__ orps(kScratchDoubleReg, dst);
__ Orps(kScratchDoubleReg, dst);
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ subps(kScratchDoubleReg, dst);
__ Subps(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ cmpps(dst, kScratchDoubleReg, 3);
__ psrld(dst, 10);
__ andnps(dst, kScratchDoubleReg);
__ Cmpps(dst, kScratchDoubleReg, static_cast<int8_t>(3));
__ Psrld(dst, static_cast<byte>(10));
__ Andnps(dst, kScratchDoubleReg);
break;
}
case kX64F32x4Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
__ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
static_cast<int8_t>(0x0));
break;
}
case kX64F32x4Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
__ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
static_cast<int8_t>(0x4));
break;
}
case kX64F32x4Lt: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Le: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ Cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Qfma: {
......@@ -2605,9 +2609,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(2));
} else {
XMMRegister tmp = i.TempSimd128Register(0);
__ movaps(tmp, i.InputSimd128Register(2));
__ mulps(tmp, i.InputSimd128Register(1));
__ addps(i.OutputSimd128Register(), tmp);
__ Movaps(tmp, i.InputSimd128Register(2));
__ Mulps(tmp, i.InputSimd128Register(1));
__ Addps(i.OutputSimd128Register(), tmp);
}
break;
}
......@@ -2618,9 +2622,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(2));
} else {
XMMRegister tmp = i.TempSimd128Register(0);
__ movaps(tmp, i.InputSimd128Register(2));
__ mulps(tmp, i.InputSimd128Register(1));
__ subps(i.OutputSimd128Register(), tmp);
__ Movaps(tmp, i.InputSimd128Register(2));
__ Mulps(tmp, i.InputSimd128Register(1));
__ Subps(i.OutputSimd128Register(), tmp);
}
break;
}
......
......@@ -930,6 +930,12 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x0E:
AppendToBuffer("vpblendw %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x14:
AppendToBuffer("vpextrb ");
current += PrintRightByteOperand(current);
......
......@@ -770,6 +770,8 @@ TEST(DisasmX64) {
__ vpshuflw(xmm1, xmm2, 85);
__ vpshuflw(xmm1, Operand(rbx, rcx, times_4, 10000), 85);
__ vshufps(xmm3, xmm2, xmm3, 3);
__ vpblendw(xmm1, xmm2, xmm3, 23);
__ vpblendw(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 23);
__ vmovddup(xmm1, xmm2);
__ vmovddup(xmm1, Operand(rbx, rcx, times_4, 10000));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment