Commit 3bb0f51a authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][x64] Pattern match on shufps-style shuffles

When a 8x16 shuffle matches a 32x4 shuffle (every group of 4 indices are
consecutive), and the first 2 indices are in the range [0-3], and the
other 2 indices are in the range [4-7], then we can match it to a
shufps. E.g. [0,2,4,6], [1,3,5,7]. These shuffles are commonly used to
extract odd/even floats.

Change-Id: I031fe44f71a13bbc72115c22b02a5eaaf29d3794
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2596579
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71860}
parent 1215f2a8
......@@ -3936,6 +3936,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64Shufps: {
__ Shufps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), i.InputUint8(2));
break;
}
case kX64S32x4Rotate: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
......
......@@ -350,6 +350,7 @@ namespace compiler {
V(X64S128Load32x2U) \
V(X64S128Store32Lane) \
V(X64S128Store64Lane) \
V(X64Shufps) \
V(X64S32x4Rotate) \
V(X64S32x4Swizzle) \
V(X64S32x4Shuffle) \
......
......@@ -312,6 +312,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64V16x8AllTrue:
case kX64I8x16Swizzle:
case kX64I8x16Shuffle:
case kX64Shufps:
case kX64S32x4Rotate:
case kX64S32x4Swizzle:
case kX64S32x4Shuffle:
......
......@@ -3447,6 +3447,15 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
return false;
}
bool TryMatchShufps(const uint8_t* shuffle32x4) {
DCHECK_GT(8, shuffle32x4[2]);
DCHECK_GT(8, shuffle32x4[3]);
// shufps can be used if the first 2 indices select the first input [0-3], and
// the other 2 indices select the second input [4-7].
return shuffle32x4[0] < 4 && shuffle32x4[1] < 4 && shuffle32x4[2] > 3 &&
shuffle32x4[3] > 3;
}
} // namespace
void InstructionSelector::VisitI8x16Shuffle(Node* node) {
......@@ -3529,6 +3538,12 @@ void InstructionSelector::VisitI8x16Shuffle(Node* node) {
uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
imms[imm_count++] = blend_mask;
no_same_as_first = CpuFeatures::IsSupported(AVX);
} else if (TryMatchShufps(shuffle32x4)) {
opcode = kX64Shufps;
uint8_t mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
imms[imm_count++] = mask;
src1_needs_reg = true;
no_same_as_first = IsSupported(AVX);
} else {
opcode = kX64S32x4Shuffle;
no_same_as_first = true;
......
......@@ -3082,6 +3082,28 @@ WASM_SIMD_TEST(S8x16Concat) {
}
}
WASM_SIMD_TEST(ShuffleShufps) {
// We reverse engineer the shufps immediates into 8x16 shuffles.
std::array<int8_t, kSimd128Size> expected;
for (int mask = 0; mask < 256; mask++) {
// Each iteration of this loop sets byte[i] of the 32x4 lanes.
// Low 2 lanes (2-bits each) select from first input.
uint8_t index0 = (mask & 3) * 4;
uint8_t index1 = ((mask >> 2) & 3) * 4;
// Next 2 bits select from src2, so add 16 to the index.
uint8_t index2 = ((mask >> 4) & 3) * 4 + 16;
uint8_t index3 = ((mask >> 6) & 3) * 4 + 16;
for (int i = 0; i < 4; i++) {
expected[0 + i] = index0 + i;
expected[4 + i] = index1 + i;
expected[8 + i] = index2 + i;
expected[12 + i] = index3 + i;
}
RunShuffleOpTest(execution_tier, lower_simd, kExprI8x16Shuffle, expected);
}
}
struct SwizzleTestArgs {
const Shuffle input;
const Shuffle indices;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment