Commit beff7956 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Add AVX codegen for i8x16 comparisons

Bug: v8:9561
Change-Id: Ia9d1c263f4dc382f8b772e3cba57a4f14235b310
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2069402Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66547}
parent 9dacf73a
...@@ -147,6 +147,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -147,6 +147,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Pcmpeqb, pcmpeqb) AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqw, pcmpeqw) AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pcmpeqd, pcmpeqd) AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Pcmpgtb, pcmpgtb)
AVX_OP(Pmaxub, pmaxub)
AVX_OP(Pminub, pminub)
AVX_OP(Addss, addss) AVX_OP(Addss, addss)
AVX_OP(Addsd, addsd) AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd) AVX_OP(Mulsd, mulsd)
...@@ -223,11 +226,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -223,11 +226,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP_SSSE3(Pabsw, pabsw) AVX_OP_SSSE3(Pabsw, pabsw)
AVX_OP_SSSE3(Pabsd, pabsd) AVX_OP_SSSE3(Pabsd, pabsd)
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq) AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
AVX_OP_SSE4_1(Pmulld, pmulld) AVX_OP_SSE4_1(Pminsb, pminsb)
AVX_OP_SSE4_1(Pminsd, pminsd) AVX_OP_SSE4_1(Pminsd, pminsd)
AVX_OP_SSE4_1(Pminud, pminud) AVX_OP_SSE4_1(Pminud, pminud)
AVX_OP_SSE4_1(Pmaxsb, pmaxsb)
AVX_OP_SSE4_1(Pmaxsd, pmaxsd) AVX_OP_SSE4_1(Pmaxsd, pmaxsd)
AVX_OP_SSE4_1(Pmaxud, pmaxud) AVX_OP_SSE4_1(Pmaxud, pmaxud)
AVX_OP_SSE4_1(Pmulld, pmulld)
AVX_OP_SSE4_1(Extractps, extractps) AVX_OP_SSE4_1(Extractps, extractps)
AVX_OP_SSE4_1(Insertps, insertps) AVX_OP_SSE4_1(Insertps, insertps)
AVX_OP_SSE4_1(Pinsrq, pinsrq) AVX_OP_SSE4_1(Pinsrq, pinsrq)
......
...@@ -3436,35 +3436,35 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3436,35 +3436,35 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64I8x16MinS: { case kX64I8x16MinS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I8x16MaxS: { case kX64I8x16MaxS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I8x16Eq: { case kX64I8x16Eq: {
__ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I8x16Ne: { case kX64I8x16Ne: {
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
__ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
__ pcmpeqb(tmp, tmp); __ Pcmpeqb(tmp, tmp);
__ pxor(i.OutputSimd128Register(), tmp); __ Pxor(i.OutputSimd128Register(), tmp);
break; break;
} }
case kX64I8x16GtS: { case kX64I8x16GtS: {
__ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I8x16GeS: { case kX64I8x16GeS: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1); XMMRegister src = i.InputSimd128Register(1);
__ pminsb(dst, src); __ Pminsb(dst, src);
__ pcmpeqb(dst, src); __ Pcmpeqb(dst, src);
break; break;
} }
case kX64I8x16UConvertI16x8: { case kX64I8x16UConvertI16x8: {
...@@ -3503,12 +3503,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3503,12 +3503,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64I8x16MinU: { case kX64I8x16MinU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I8x16MaxU: { case kX64I8x16MaxU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64I8x16GtU: { case kX64I8x16GtU: {
...@@ -3516,18 +3516,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3516,18 +3516,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1); XMMRegister src = i.InputSimd128Register(1);
XMMRegister tmp = i.TempSimd128Register(0); XMMRegister tmp = i.TempSimd128Register(0);
__ pmaxub(dst, src); __ Pmaxub(dst, src);
__ pcmpeqb(dst, src); __ Pcmpeqb(dst, src);
__ pcmpeqb(tmp, tmp); __ Pcmpeqb(tmp, tmp);
__ pxor(dst, tmp); __ Pxor(dst, tmp);
break; break;
} }
case kX64I8x16GeU: { case kX64I8x16GeU: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(1); XMMRegister src = i.InputSimd128Register(1);
__ pminub(dst, src); __ Pminub(dst, src);
__ pcmpeqb(dst, src); __ Pcmpeqb(dst, src);
break; break;
} }
case kX64I8x16RoundingAverageU: { case kX64I8x16RoundingAverageU: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment