Commit a66bb000 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Optimize blend and palignr shuffles for AVX

For pblendw and palignr, if AVX is supported, we can use the 3-operand
AVX instruction, this can save us a move.

Bug: v8:11270
Change-Id: Ifd837e29c76886a3008bc63c17d4a68bc6aae364
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2596578Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71857}
parent b145152d
...@@ -616,14 +616,22 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, ...@@ -616,14 +616,22 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
} \ } \
} while (false) } while (false)
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \ #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
do { \ do { \
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \ XMMRegister dst = i.OutputSimd128Register(); \
if (instr->InputAt(1)->IsSimd128Register()) { \ XMMRegister src = i.InputSimd128Register(0); \
__ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \ if (CpuFeatures::IsSupported(AVX)) { \
} else { \ CpuFeatureScope avx_scope(tasm(), AVX); \
__ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \ DCHECK(instr->InputAt(1)->IsSimd128Register()); \
} \ __ v##opcode(dst, src, i.InputSimd128Register(1), imm); \
} else { \
DCHECK_EQ(dst, src); \
if (instr->InputAt(1)->IsSimd128Register()) { \
__ opcode(dst, i.InputSimd128Register(1), imm); \
} else { \
__ opcode(dst, i.InputOperand(1), imm); \
} \
} \
} while (false) } while (false)
#define ASSEMBLE_SIMD_ALL_TRUE(opcode) \ #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
...@@ -3956,7 +3964,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3956,7 +3964,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64S16x8Blend: { case kX64S16x8Blend: {
ASSEMBLE_SIMD_IMM_SHUFFLE(Pblendw, i.InputUint8(2)); ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, i.InputUint8(2));
break; break;
} }
case kX64S16x8HalfShuffle1: { case kX64S16x8HalfShuffle1: {
...@@ -3975,7 +3983,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3975,7 +3983,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64S8x16Alignr: { case kX64S8x16Alignr: {
ASSEMBLE_SIMD_IMM_SHUFFLE(Palignr, i.InputUint8(2)); ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, i.InputUint8(2));
break; break;
} }
case kX64S16x8Dup: { case kX64S16x8Dup: {
......
...@@ -3487,7 +3487,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) { ...@@ -3487,7 +3487,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
// Swap inputs from the normal order for (v)palignr. // Swap inputs from the normal order for (v)palignr.
SwapShuffleInputs(node); SwapShuffleInputs(node);
is_swizzle = false; // It's simpler to just handle the general case. is_swizzle = false; // It's simpler to just handle the general case.
no_same_as_first = false; // SSE requires same-as-first. no_same_as_first = CpuFeatures::IsSupported(AVX);
// TODO(v8:9608): also see v8:9083 // TODO(v8:9608): also see v8:9083
src1_needs_reg = true; src1_needs_reg = true;
opcode = kX64S8x16Alignr; opcode = kX64S8x16Alignr;
...@@ -3528,6 +3528,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) { ...@@ -3528,6 +3528,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
opcode = kX64S16x8Blend; opcode = kX64S16x8Blend;
uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4); uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
imms[imm_count++] = blend_mask; imms[imm_count++] = blend_mask;
no_same_as_first = CpuFeatures::IsSupported(AVX);
} else { } else {
opcode = kX64S32x4Shuffle; opcode = kX64S32x4Shuffle;
no_same_as_first = true; no_same_as_first = true;
...@@ -3547,6 +3548,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) { ...@@ -3547,6 +3548,7 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
opcode = kX64S16x8Blend; opcode = kX64S16x8Blend;
blend_mask = wasm::SimdShuffle::PackBlend8(shuffle16x8); blend_mask = wasm::SimdShuffle::PackBlend8(shuffle16x8);
imms[imm_count++] = blend_mask; imms[imm_count++] = blend_mask;
no_same_as_first = CpuFeatures::IsSupported(AVX);
} else if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) { } else if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
opcode = kX64S16x8Dup; opcode = kX64S16x8Dup;
src0_needs_reg = false; src0_needs_reg = false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment