Commit d4f7ea80 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Don't fix dst to src on AVX

On AVX, many instructions can have 3 operands, unlike SSE which only has
2. So on SSE we use DefineSameAsFirst on the dst. But on AVX, using that
will cause some unnecessary moves.

This change moves a bunch of instructions that have single instruction
codegen into a macro list which supports the this non-restricted AVX
codegen.

Bug: v8:9561
Change-Id: I348a8396e8a1129daf2e1ed08ae8526e1bc3a73b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2505254Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70888}
parent 8e3ae62d
......@@ -3007,7 +3007,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4DotI16x8S: {
__ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmaddwd);
break;
}
case kX64S128Const: {
......@@ -3081,44 +3081,43 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I16x8SConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(packssdw);
break;
}
case kX64I16x8Add: {
__ Paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(paddw);
break;
}
case kX64I16x8AddSatS: {
__ Paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(paddsw);
break;
}
case kX64I16x8AddHoriz: {
__ Phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(phaddw);
break;
}
case kX64I16x8Sub: {
__ Psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(psubw);
break;
}
case kX64I16x8SubSatS: {
__ Psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(psubsw);
break;
}
case kX64I16x8Mul: {
__ Pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmullw);
break;
}
case kX64I16x8MinS: {
__ Pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pminsw);
break;
}
case kX64I16x8MaxS: {
__ Pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmaxsw);
break;
}
case kX64I16x8Eq: {
__ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pcmpeqw);
break;
}
case kX64I16x8Ne: {
......@@ -3129,7 +3128,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I16x8GtS: {
__ Pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pcmpgtw);
break;
}
case kX64I16x8GeS: {
......@@ -3160,19 +3159,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I16x8AddSatU: {
__ Paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(paddusw);
break;
}
case kX64I16x8SubSatU: {
__ Psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(psubusw);
break;
}
case kX64I16x8MinU: {
__ Pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pminuw);
break;
}
case kX64I16x8MaxU: {
__ Pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmaxuw);
break;
}
case kX64I16x8GtU: {
......@@ -3193,7 +3192,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I16x8RoundingAverageU: {
__ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pavgw);
break;
}
case kX64I16x8Abs: {
......@@ -3268,8 +3267,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16SConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(packsswb);
break;
}
case kX64I8x16Neg: {
......@@ -3350,19 +3348,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16Add: {
__ Paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(paddb);
break;
}
case kX64I8x16AddSatS: {
__ Paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(paddsb);
break;
}
case kX64I8x16Sub: {
__ Psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(psubb);
break;
}
case kX64I8x16SubSatS: {
__ Psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(psubsb);
break;
}
case kX64I8x16Mul: {
......@@ -3399,15 +3397,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16MinS: {
__ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pminsb);
break;
}
case kX64I8x16MaxS: {
__ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmaxsb);
break;
}
case kX64I8x16Eq: {
__ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pcmpeqb);
break;
}
case kX64I8x16Ne: {
......@@ -3418,7 +3416,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16GtS: {
__ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pcmpgtb);
break;
}
case kX64I8x16GeS: {
......@@ -3467,19 +3465,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16AddSatU: {
__ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(paddusb);
break;
}
case kX64I8x16SubSatU: {
__ Psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(psubusb);
break;
}
case kX64I8x16MinU: {
__ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pminub);
break;
}
case kX64I8x16MaxU: {
__ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pmaxub);
break;
}
case kX64I8x16GtU: {
......@@ -3500,7 +3498,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I8x16RoundingAverageU: {
__ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(1));
ASSEMBLE_SIMD_BINOP(pavgb);
break;
}
case kX64I8x16Abs: {
......
......@@ -2829,6 +2829,37 @@ VISIT_ATOMIC_BINOP(Xor)
V(I32x4GtS) \
V(I32x4MinU) \
V(I32x4MaxU) \
V(I32x4DotI16x8S) \
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSatS) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8SubSatS) \
V(I16x8Mul) \
V(I16x8MinS) \
V(I16x8MaxS) \
V(I16x8Eq) \
V(I16x8GtS) \
V(I16x8AddSatU) \
V(I16x8SubSatU) \
V(I16x8MinU) \
V(I16x8MaxU) \
V(I16x8RoundingAverageU) \
V(I8x16SConvertI16x8) \
V(I8x16Add) \
V(I8x16AddSatS) \
V(I8x16Sub) \
V(I8x16SubSatS) \
V(I8x16MinS) \
V(I8x16MaxS) \
V(I8x16Eq) \
V(I8x16GtS) \
V(I8x16AddSatU) \
V(I8x16SubSatU) \
V(I8x16MinU) \
V(I8x16MaxU) \
V(I8x16RoundingAverageU) \
V(S128And) \
V(S128Or) \
V(S128Xor)
......@@ -2842,41 +2873,10 @@ VISIT_ATOMIC_BINOP(Xor)
V(I64x2Eq) \
V(I32x4GeS) \
V(I32x4GeU) \
V(I32x4DotI16x8S) \
V(I16x8SConvertI32x4) \
V(I16x8Add) \
V(I16x8AddSatS) \
V(I16x8AddHoriz) \
V(I16x8Sub) \
V(I16x8SubSatS) \
V(I16x8Mul) \
V(I16x8MinS) \
V(I16x8MaxS) \
V(I16x8Eq) \
V(I16x8GtS) \
V(I16x8GeS) \
V(I16x8AddSatU) \
V(I16x8SubSatU) \
V(I16x8MinU) \
V(I16x8MaxU) \
V(I16x8GeU) \
V(I16x8RoundingAverageU) \
V(I8x16SConvertI16x8) \
V(I8x16Add) \
V(I8x16AddSatS) \
V(I8x16Sub) \
V(I8x16SubSatS) \
V(I8x16MinS) \
V(I8x16MaxS) \
V(I8x16Eq) \
V(I8x16GtS) \
V(I8x16GeS) \
V(I8x16AddSatU) \
V(I8x16SubSatU) \
V(I8x16MinU) \
V(I8x16MaxU) \
V(I8x16GeU) \
V(I8x16RoundingAverageU)
V(I8x16GeU)
#define SIMD_BINOP_ONE_TEMP_LIST(V) \
V(I32x4Ne) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment