Commit 89d9eb73 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Optimize more ops for AVX

All these opcodes have a simple lowering into a single x64 instruction.
We can perform a similar optimization when AVX is supported to not force
dst == src1.

Bug: v8:10116
Change-Id: I4ad2975b6f241d8209025682202b476c08b3491b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2486383Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70636}
parent c77dd2ff
......@@ -2394,19 +2394,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F64x2Add: {
ASSEMBLE_SSE_BINOP(Addpd);
ASSEMBLE_SIMD_BINOP(addpd);
break;
}
case kX64F64x2Sub: {
ASSEMBLE_SSE_BINOP(Subpd);
ASSEMBLE_SIMD_BINOP(subpd);
break;
}
case kX64F64x2Mul: {
ASSEMBLE_SSE_BINOP(Mulpd);
ASSEMBLE_SIMD_BINOP(mulpd);
break;
}
case kX64F64x2Div: {
ASSEMBLE_SSE_BINOP(Divpd);
ASSEMBLE_SIMD_BINOP(divpd);
break;
}
case kX64F64x2Min: {
......@@ -2449,23 +2449,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F64x2Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(cmpeqpd);
break;
}
case kX64F64x2Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(cmpneqpd);
break;
}
case kX64F64x2Lt: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(cmpltpd);
break;
}
case kX64F64x2Le: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(cmplepd);
break;
}
case kX64F64x2Qfma: {
......@@ -2650,25 +2646,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F32x4Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
int8_t{0x0});
ASSEMBLE_SIMD_BINOP(cmpeqps);
break;
}
case kX64F32x4Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
int8_t{0x4});
ASSEMBLE_SIMD_BINOP(cmpneqps);
break;
}
case kX64F32x4Lt: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(cmpltps);
break;
}
case kX64F32x4Le: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(cmpleps);
break;
}
case kX64F32x4Qfma: {
......@@ -2787,13 +2777,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I64x2Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(paddq);
break;
}
case kX64I64x2Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(psubq);
break;
}
case kX64I64x2Mul: {
......@@ -2897,31 +2885,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4Add: {
__ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(paddd);
break;
}
case kX64I32x4AddHoriz: {
__ Phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(phaddd);
break;
}
case kX64I32x4Sub: {
__ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(psubd);
break;
}
case kX64I32x4Mul: {
__ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmulld);
break;
}
case kX64I32x4MinS: {
__ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pminsd);
break;
}
case kX64I32x4MaxS: {
__ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmaxsd);
break;
}
case kX64I32x4Eq: {
__ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pcmpeqd);
break;
}
case kX64I32x4Ne: {
......@@ -2932,7 +2920,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4GtS: {
__ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pcmpgtd);
break;
}
case kX64I32x4GeS: {
......@@ -2986,11 +2974,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4MinU: {
__ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pminud);
break;
}
case kX64I32x4MaxU: {
__ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmaxud);
break;
}
case kX64I32x4GtU: {
......
......@@ -2797,46 +2797,46 @@ VISIT_ATOMIC_BINOP(Xor)
#undef VISIT_ATOMIC_BINOP
#define SIMD_BINOP_SSE_AVX_LIST(V) \
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Div) \
V(F64x2Eq) \
V(F64x2Ne) \
V(F64x2Lt) \
V(F64x2Le) \
V(F32x4Add) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Div) \
V(F32x4Eq) \
V(F32x4Ne) \
V(F32x4Lt) \
V(F32x4Le) \
V(I64x2Add) \
V(I64x2Sub) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
V(I32x4MaxS) \
V(I32x4Eq) \
V(I32x4GtS) \
V(I32x4MinU) \
V(I32x4MaxU) \
V(S128And) \
V(S128Or) \
V(S128Xor)
#define SIMD_BINOP_LIST(V) \
V(F64x2Add) \
V(F64x2Sub) \
V(F64x2Mul) \
V(F64x2Div) \
V(F64x2Min) \
V(F64x2Max) \
V(F64x2Eq) \
V(F64x2Ne) \
V(F64x2Lt) \
V(F64x2Le) \
V(F32x4AddHoriz) \
V(F32x4Min) \
V(F32x4Max) \
V(F32x4Eq) \
V(F32x4Ne) \
V(F32x4Lt) \
V(F32x4Le) \
V(I64x2Add) \
V(I64x2Sub) \
V(I64x2Eq) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
V(I32x4Mul) \
V(I32x4MinS) \
V(I32x4MaxS) \
V(I32x4Eq) \
V(I32x4GtS) \
V(I32x4GeS) \
V(I32x4MinU) \
V(I32x4MaxU) \
V(I32x4GeU) \
V(I32x4DotI16x8S) \
V(I16x8SConvertI32x4) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment