Commit 3f746ecf authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Add some AVX codegen

Adds AVX codegen for f64x2 min, max, qfma, and qfms.

Bug: v8:9561
Change-Id: Id32ba8d4367e4f9b3cccea2bc7ce24dfb04ec188
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1950487Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65362}
parent 277381d8
......@@ -158,6 +158,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Andps, andps)
AVX_OP(Andnps, andnps)
AVX_OP(Andpd, andpd)
AVX_OP(Andnpd, andnpd)
AVX_OP(Orpd, orpd)
AVX_OP(Cmpeqps, cmpeqps)
AVX_OP(Cmpltps, cmpltps)
......@@ -192,7 +193,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Addpd, addpd)
AVX_OP(Subpd, subpd)
AVX_OP(Mulpd, mulpd)
AVX_OP(Minpd, minpd)
AVX_OP(Divpd, divpd)
AVX_OP(Maxpd, maxpd)
AVX_OP(Shufps, shufps)
AVX_OP(Cvtdq2ps, cvtdq2ps)
AVX_OP(Rcpps, rcpps)
......@@ -205,6 +208,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Pshuflw, pshuflw)
AVX_OP(Punpcklqdq, punpcklqdq)
AVX_OP(Pshufd, pshufd)
AVX_OP(Cmppd, cmppd)
AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignd, psignd)
......
......@@ -2347,16 +2347,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The minpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minpd in both orders, merge the resuls, and adjust.
__ movapd(kScratchDoubleReg, src1);
__ minpd(kScratchDoubleReg, dst);
__ minpd(dst, src1);
__ Movapd(kScratchDoubleReg, src1);
__ Minpd(kScratchDoubleReg, dst);
__ Minpd(dst, src1);
// propagate -0's and NaNs, which may be non-canonical.
__ orpd(kScratchDoubleReg, dst);
__ Orpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ cmppd(dst, kScratchDoubleReg, 3);
__ orpd(kScratchDoubleReg, dst);
__ psrlq(dst, 13);
__ andnpd(dst, kScratchDoubleReg);
__ Cmppd(dst, kScratchDoubleReg, static_cast<int8_t>(3));
__ Orpd(kScratchDoubleReg, dst);
__ Psrlq(dst, 13);
__ Andnpd(dst, kScratchDoubleReg);
break;
}
case kX64F64x2Max: {
......@@ -2365,19 +2365,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxpd in both orders, merge the resuls, and adjust.
__ movapd(kScratchDoubleReg, src1);
__ maxpd(kScratchDoubleReg, dst);
__ maxpd(dst, src1);
__ Movapd(kScratchDoubleReg, src1);
__ Maxpd(kScratchDoubleReg, dst);
__ Maxpd(dst, src1);
// Find discrepancies.
__ xorpd(dst, kScratchDoubleReg);
__ Xorpd(dst, kScratchDoubleReg);
// Propagate NaNs, which may be non-canonical.
__ orpd(kScratchDoubleReg, dst);
__ Orpd(kScratchDoubleReg, dst);
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ subpd(kScratchDoubleReg, dst);
__ Subpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ cmppd(dst, kScratchDoubleReg, 3);
__ psrlq(dst, 13);
__ andnpd(dst, kScratchDoubleReg);
__ Cmppd(dst, kScratchDoubleReg, static_cast<int8_t>(3));
__ Psrlq(dst, 13);
__ Andnpd(dst, kScratchDoubleReg);
break;
}
case kX64F64x2Eq: {
......@@ -2407,9 +2407,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(2));
} else {
XMMRegister tmp = i.TempSimd128Register(0);
__ movapd(tmp, i.InputSimd128Register(2));
__ mulpd(tmp, i.InputSimd128Register(1));
__ addpd(i.OutputSimd128Register(), tmp);
__ Movapd(tmp, i.InputSimd128Register(2));
__ Mulpd(tmp, i.InputSimd128Register(1));
__ Addpd(i.OutputSimd128Register(), tmp);
}
break;
}
......@@ -2420,9 +2420,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(2));
} else {
XMMRegister tmp = i.TempSimd128Register(0);
__ movapd(tmp, i.InputSimd128Register(2));
__ mulpd(tmp, i.InputSimd128Register(1));
__ subpd(i.OutputSimd128Register(), tmp);
__ Movapd(tmp, i.InputSimd128Register(2));
__ Mulpd(tmp, i.InputSimd128Register(1));
__ Subpd(i.OutputSimd128Register(), tmp);
}
break;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment