Commit 46ce9b05 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Optimize arch shuffle if AVX supported

AVX has 3-operands shuffle/unpack operations. We currently always
require that dst == src0 in all cases, which is not required if we have
AVX. For the arch shuffles that map to a single native instruction, add
support to check for AVX in the instruction-selector, to not require
same as first, and in the code-gen to support generating AVX.

The other arch shuffles are slightly more complicated, and can be
optimized in a future change.

Bug: v8:11270
Change-Id: I25b271aeff71fbe860d5bcc8abb17c36bcdab32c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2591858Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71820}
parent 3d83638c
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "src/base/overflowing-math.h" #include "src/base/overflowing-math.h"
#include "src/codegen/assembler.h" #include "src/codegen/assembler.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/macro-assembler.h" #include "src/codegen/macro-assembler.h"
#include "src/codegen/optimized-compilation-info.h" #include "src/codegen/optimized-compilation-info.h"
#include "src/codegen/x64/assembler-x64.h" #include "src/codegen/x64/assembler-x64.h"
...@@ -600,12 +601,19 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, ...@@ -600,12 +601,19 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
} \ } \
} while (false) } while (false)
#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \ #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
do { \ do { \
XMMRegister dst = i.OutputSimd128Register(); \ XMMRegister dst = i.OutputSimd128Register(); \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \ byte input_index = instr->InputCount() == 2 ? 1 : 0; \
byte input_index = instr->InputCount() == 2 ? 1 : 0; \ if (CpuFeatures::IsSupported(AVX)) { \
ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \ CpuFeatureScope avx_scope(tasm(), AVX); \
DCHECK(instr->InputAt(input_index)->IsSimd128Register()); \
__ v##opcode(dst, i.InputSimd128Register(0), \
i.InputSimd128Register(input_index)); \
} else { \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
} \
} while (false) } while (false)
#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \ #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
...@@ -3997,28 +4005,28 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3997,28 +4005,28 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64S64x2UnpackHigh: case kX64S64x2UnpackHigh:
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhqdq); ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
break; break;
case kX64S32x4UnpackHigh: case kX64S32x4UnpackHigh:
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhdq); ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
break; break;
case kX64S16x8UnpackHigh: case kX64S16x8UnpackHigh:
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhwd); ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
break; break;
case kX64S8x16UnpackHigh: case kX64S8x16UnpackHigh:
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhbw); ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
break; break;
case kX64S64x2UnpackLow: case kX64S64x2UnpackLow:
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklqdq); ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
break; break;
case kX64S32x4UnpackLow: case kX64S32x4UnpackLow:
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckldq); ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
break; break;
case kX64S16x8UnpackLow: case kX64S16x8UnpackLow:
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklwd); ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
break; break;
case kX64S8x16UnpackLow: case kX64S8x16UnpackLow:
ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklbw); ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
break; break;
case kX64S16x8UnzipHigh: { case kX64S16x8UnzipHigh: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
......
...@@ -3336,6 +3336,10 @@ struct ShuffleEntry { ...@@ -3336,6 +3336,10 @@ struct ShuffleEntry {
ArchOpcode opcode; ArchOpcode opcode;
bool src0_needs_reg; bool src0_needs_reg;
bool src1_needs_reg; bool src1_needs_reg;
// If AVX is supported, this shuffle can use AVX's three-operand encoding, so
// does not require same as first. We conservatively set this to false
// (original behavior), and selectively enable for specific arch shuffles.
bool no_same_as_first_if_avx = false;
}; };
// Shuffles that map to architecture-specific instruction sequences. These are // Shuffles that map to architecture-specific instruction sequences. These are
...@@ -3347,34 +3351,42 @@ static const ShuffleEntry arch_shuffles[] = { ...@@ -3347,34 +3351,42 @@ static const ShuffleEntry arch_shuffles[] = {
{{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}, {{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23},
kX64S64x2UnpackLow, kX64S64x2UnpackLow,
true, true,
true,
true}, true},
{{8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}, {{8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31},
kX64S64x2UnpackHigh, kX64S64x2UnpackHigh,
true, true,
true,
true}, true},
{{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}, {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
kX64S32x4UnpackLow, kX64S32x4UnpackLow,
true, true,
true,
true}, true},
{{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}, {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
kX64S32x4UnpackHigh, kX64S32x4UnpackHigh,
true, true,
true,
true}, true},
{{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}, {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
kX64S16x8UnpackLow, kX64S16x8UnpackLow,
true, true,
true,
true}, true},
{{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}, {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
kX64S16x8UnpackHigh, kX64S16x8UnpackHigh,
true, true,
true,
true}, true},
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}, {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
kX64S8x16UnpackLow, kX64S8x16UnpackLow,
true, true,
true,
true}, true},
{{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}, {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
kX64S8x16UnpackHigh, kX64S8x16UnpackHigh,
true, true,
true,
true}, true},
{{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}, {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
...@@ -3489,7 +3501,8 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) { ...@@ -3489,7 +3501,8 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
// SSE can't take advantage of both operands in registers and needs // SSE can't take advantage of both operands in registers and needs
// same-as-first. // same-as-first.
src1_needs_reg = arch_shuffle->src1_needs_reg; src1_needs_reg = arch_shuffle->src1_needs_reg;
no_same_as_first = false; no_same_as_first =
IsSupported(AVX) && arch_shuffle->no_same_as_first_if_avx;
} else if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) { } else if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4); uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
if (is_swizzle) { if (is_swizzle) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment