Commit 307490b0 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Add AVX codegen for i32x4 conversions and hadd

Bug: v8:9561
Change-Id: I4a2c6217dea540b81256dcc833412da573f54795
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2069403Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66587}
parent 92a2f0c7
...@@ -1273,6 +1273,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1273,6 +1273,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) { void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
} }
void vcvttps2dq(XMMRegister dst, XMMRegister src) {
vinstr(0x5b, dst, xmm0, src, kF3, k0F, kWIG);
}
void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) { void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
XMMRegister isrc2 = XMMRegister::from_code(src2.code()); XMMRegister isrc2 = XMMRegister::from_code(src2.code());
vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0); vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
......
...@@ -176,6 +176,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -176,6 +176,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Sqrtsd, sqrtsd) AVX_OP(Sqrtsd, sqrtsd)
AVX_OP(Sqrtps, sqrtps) AVX_OP(Sqrtps, sqrtps)
AVX_OP(Sqrtpd, sqrtpd) AVX_OP(Sqrtpd, sqrtpd)
AVX_OP(Cvttps2dq, cvttps2dq)
AVX_OP(Ucomiss, ucomiss) AVX_OP(Ucomiss, ucomiss)
AVX_OP(Ucomisd, ucomisd) AVX_OP(Ucomisd, ucomisd)
AVX_OP(Paddusb, paddusb) AVX_OP(Paddusb, paddusb)
...@@ -219,6 +220,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -219,6 +220,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Cmppd, cmppd) AVX_OP(Cmppd, cmppd)
AVX_OP(Movlhps, movlhps) AVX_OP(Movlhps, movlhps)
AVX_OP_SSE3(Movddup, movddup) AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSSE3(Phaddd, phaddd)
AVX_OP_SSSE3(Pshufb, pshufb) AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignd, psignd) AVX_OP_SSSE3(Psignd, psignd)
AVX_OP_SSSE3(Palignr, palignr) AVX_OP_SSSE3(Palignr, palignr)
......
...@@ -2912,18 +2912,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2912,18 +2912,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
// NAN->0 // NAN->0
__ movaps(tmp, dst); __ Movaps(tmp, dst);
__ cmpeqps(tmp, tmp); __ Cmpeqps(tmp, tmp);
__ pand(dst, tmp); __ Pand(dst, tmp);
// Set top bit if >= 0 (but not -0.0!) // Set top bit if >= 0 (but not -0.0!)
__ pxor(tmp, dst); __ Pxor(tmp, dst);
// Convert // Convert
__ cvttps2dq(dst, dst); __ Cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0 // Set top bit if >=0 is now < 0
__ pand(tmp, dst); __ Pand(tmp, dst);
__ psrad(tmp, 31); __ Psrad(tmp, static_cast<byte>(31));
// Set positive overflow lanes to 0x7FFFFFFF // Set positive overflow lanes to 0x7FFFFFFF
__ pxor(dst, tmp); __ Pxor(dst, tmp);
break; break;
} }
case kX64I32x4SConvertI16x8Low: { case kX64I32x4SConvertI16x8Low: {
...@@ -2966,7 +2966,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2966,7 +2966,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64I32x4AddHoriz: { case kX64I32x4AddHoriz: {
CpuFeatureScope sse_scope(tasm(), SSSE3); CpuFeatureScope sse_scope(tasm(), SSSE3);
__ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I32x4Sub: { case kX64I32x4Sub: {
...@@ -3018,26 +3018,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3018,26 +3018,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
XMMRegister tmp2 = i.TempSimd128Register(1); XMMRegister tmp2 = i.TempSimd128Register(1);
// NAN->0, negative->0 // NAN->0, negative->0
__ pxor(tmp2, tmp2); __ Pxor(tmp2, tmp2);
__ maxps(dst, tmp2); __ Maxps(dst, tmp2);
// scratch: float representation of max_signed // scratch: float representation of max_signed
__ pcmpeqd(tmp2, tmp2); __ Pcmpeqd(tmp2, tmp2);
__ psrld(tmp2, 1); // 0x7fffffff __ Psrld(tmp2, static_cast<uint8_t>(1)); // 0x7fffffff
__ cvtdq2ps(tmp2, tmp2); // 0x4f000000 __ Cvtdq2ps(tmp2, tmp2); // 0x4f000000
// tmp: convert (src-max_signed). // tmp: convert (src-max_signed).
// Positive overflow lanes -> 0x7FFFFFFF // Positive overflow lanes -> 0x7FFFFFFF
// Negative lanes -> 0 // Negative lanes -> 0
__ movaps(tmp, dst); __ Movaps(tmp, dst);
__ subps(tmp, tmp2); __ Subps(tmp, tmp2);
__ cmpleps(tmp2, tmp); __ Cmpleps(tmp2, tmp);
__ cvttps2dq(tmp, tmp); __ Cvttps2dq(tmp, tmp);
__ pxor(tmp, tmp2); __ Pxor(tmp, tmp2);
__ pxor(tmp2, tmp2); __ Pxor(tmp2, tmp2);
__ pmaxsd(tmp, tmp2); __ Pmaxsd(tmp, tmp2);
// convert. Overflow lanes above max_signed will be 0x80000000 // convert. Overflow lanes above max_signed will be 0x80000000
__ cvttps2dq(dst, dst); __ Cvttps2dq(dst, dst);
// Add (src-max_signed) for overflow lanes. // Add (src-max_signed) for overflow lanes.
__ paddd(dst, tmp); __ Paddd(dst, tmp);
break; break;
} }
case kX64I32x4UConvertI16x8Low: { case kX64I32x4UConvertI16x8Low: {
......
...@@ -1068,6 +1068,10 @@ int DisassemblerX64::AVXInstruction(byte* data) { ...@@ -1068,6 +1068,10 @@ int DisassemblerX64::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv)); NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);
break; break;
case 0x5B:
AppendToBuffer("vcvttps2dq %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x5C: case 0x5C:
AppendToBuffer("vsubss %s,%s,", NameOfXMMRegister(regop), AppendToBuffer("vsubss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv)); NameOfXMMRegister(vvvv));
......
...@@ -635,6 +635,7 @@ TEST(DisasmX64) { ...@@ -635,6 +635,7 @@ TEST(DisasmX64) {
__ vcvtss2sd(xmm4, xmm9, xmm11); __ vcvtss2sd(xmm4, xmm9, xmm11);
__ vcvtss2sd(xmm4, xmm9, Operand(rbx, rcx, times_1, 10000)); __ vcvtss2sd(xmm4, xmm9, Operand(rbx, rcx, times_1, 10000));
__ vcvttps2dq(xmm4, xmm11);
__ vcvtlsi2sd(xmm5, xmm9, rcx); __ vcvtlsi2sd(xmm5, xmm9, rcx);
__ vcvtlsi2sd(xmm9, xmm3, Operand(rbx, r9, times_4, 10000)); __ vcvtlsi2sd(xmm9, xmm3, Operand(rbx, r9, times_4, 10000));
__ vcvtqsi2sd(xmm5, xmm9, r11); __ vcvtqsi2sd(xmm5, xmm9, r11);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment