Commit a66bb000 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Optimize blend and palignr shuffles for AVX

For pblendw and palignr, if AVX is supported, we can use the 3-operand
AVX instruction, this can save us a move.

Bug: v8:11270
Change-Id: Ifd837e29c76886a3008bc63c17d4a68bc6aae364
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2596578Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71857}
parent b145152d
......@@ -616,14 +616,22 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
} \
} while (false)
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
do { \
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
if (instr->InputAt(1)->IsSimd128Register()) { \
__ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
} else { \
__ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
} \
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
do { \
XMMRegister dst = i.OutputSimd128Register(); \
XMMRegister src = i.InputSimd128Register(0); \
if (CpuFeatures::IsSupported(AVX)) { \
CpuFeatureScope avx_scope(tasm(), AVX); \
DCHECK(instr->InputAt(1)->IsSimd128Register()); \
__ v##opcode(dst, src, i.InputSimd128Register(1), imm); \
} else { \
DCHECK_EQ(dst, src); \
if (instr->InputAt(1)->IsSimd128Register()) { \
__ opcode(dst, i.InputSimd128Register(1), imm); \
} else { \
__ opcode(dst, i.InputOperand(1), imm); \
} \
} \
} while (false)
#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
......@@ -3956,7 +3964,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64S16x8Blend: {
ASSEMBLE_SIMD_IMM_SHUFFLE(Pblendw, i.InputUint8(2));
ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, i.InputUint8(2));
break;
}
case kX64S16x8HalfShuffle1: {
......@@ -3975,7 +3983,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64S8x16Alignr: {
ASSEMBLE_SIMD_IMM_SHUFFLE(Palignr, i.InputUint8(2));
ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, i.InputUint8(2));
break;
}
case kX64S16x8Dup: {
......
......@@ -3487,7 +3487,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
// Swap inputs from the normal order for (v)palignr.
SwapShuffleInputs(node);
is_swizzle = false; // It's simpler to just handle the general case.
no_same_as_first = false; // SSE requires same-as-first.
no_same_as_first = CpuFeatures::IsSupported(AVX);
// TODO(v8:9608): also see v8:9083
src1_needs_reg = true;
opcode = kX64S8x16Alignr;
......@@ -3528,6 +3528,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
opcode = kX64S16x8Blend;
uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
imms[imm_count++] = blend_mask;
no_same_as_first = CpuFeatures::IsSupported(AVX);
} else {
opcode = kX64S32x4Shuffle;
no_same_as_first = true;
......@@ -3547,6 +3548,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
opcode = kX64S16x8Blend;
blend_mask = wasm::SimdShuffle::PackBlend8(shuffle16x8);
imms[imm_count++] = blend_mask;
no_same_as_first = CpuFeatures::IsSupported(AVX);
} else if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
opcode = kX64S16x8Dup;
src0_needs_reg = false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment