Commit bcb4fbd4 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

[wasm simd] Handle more shuffles

- Shuffle canonicalization improved to reverse operands to match
  more architectural shuffles.
- Handles shuffles where the order of operands is reversed.
- Adds tests for non-canonical shuffles, and for swizzles.
- Improves TryMatchConcat method.
- Substantially rewrites shuffles on ia32 to better handle swizzles
  and fix bugs on reversed shuffles where source registers are
  overwritten.
- Adds Palignr macro-assembler instructions for ia32.

Bug: v8:6020
Change-Id: I8e43a1e7650057c66690af1504b67509a1437d75
Reviewed-on: https://chromium-review.googlesource.com/1070934
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarMartyn Capewell <martyn.capewell@arm.com>
Reviewed-by: 's avatarJaroslav Sevcik <jarin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#53648}
parent 43886bc3
...@@ -2445,7 +2445,9 @@ static const ShuffleEntry arch_shuffles[] = { ...@@ -2445,7 +2445,9 @@ static const ShuffleEntry arch_shuffles[] = {
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}}; {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}};
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
size_t num_entries, uint8_t mask, ArchOpcode* opcode) { size_t num_entries, bool is_swizzle,
ArchOpcode* opcode) {
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
for (size_t i = 0; i < num_entries; ++i) { for (size_t i = 0; i < num_entries; ++i) {
const ShuffleEntry& entry = table[i]; const ShuffleEntry& entry = table[i];
int j = 0; int j = 0;
...@@ -2477,48 +2479,48 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1, ...@@ -2477,48 +2479,48 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
} // namespace } // namespace
void InstructionSelector::VisitS8x16Shuffle(Node* node) { void InstructionSelector::VisitS8x16Shuffle(Node* node) {
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op()); uint8_t shuffle[kSimd128Size];
uint8_t mask = CanonicalizeShuffle(node); bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
uint8_t shuffle32x4[4]; uint8_t shuffle32x4[4];
ArmOperandGenerator g(this); ArmOperandGenerator g(this);
int index = 0; int index = 0;
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) { if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
if (TryMatchDup<4>(shuffle, &index)) { if (TryMatchDup<4>(shuffle, &index)) {
InstructionOperand src = index < 4 ? g.UseRegister(node->InputAt(0)) InstructionOperand src =
: g.UseRegister(node->InputAt(1)); index < 4 ? g.UseRegister(input0) : g.UseRegister(input1);
Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon32), Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon32),
g.UseImmediate(index % 4)); g.UseImmediate(index % 4));
} else { } else {
Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
} }
return; return;
} }
if (TryMatchDup<8>(shuffle, &index)) { if (TryMatchDup<8>(shuffle, &index)) {
InstructionOperand src = index < 8 ? g.UseRegister(node->InputAt(0)) InstructionOperand src =
: g.UseRegister(node->InputAt(1)); index < 8 ? g.UseRegister(input0) : g.UseRegister(input1);
Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon16), Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon16),
g.UseImmediate(index % 8)); g.UseImmediate(index % 8));
return; return;
} }
if (TryMatchDup<16>(shuffle, &index)) { if (TryMatchDup<16>(shuffle, &index)) {
InstructionOperand src = index < 16 ? g.UseRegister(node->InputAt(0)) InstructionOperand src =
: g.UseRegister(node->InputAt(1)); index < 16 ? g.UseRegister(input0) : g.UseRegister(input1);
Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon8), Emit(kArmS128Dup, g.DefineAsRegister(node), src, g.UseImmediate(Neon8),
g.UseImmediate(index % 16)); g.UseImmediate(index % 16));
return; return;
} }
ArchOpcode opcode; ArchOpcode opcode;
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles), if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
mask, &opcode)) { is_swizzle, &opcode)) {
VisitRRRShuffle(this, opcode, node); VisitRRRShuffle(this, opcode, node);
return; return;
} }
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
uint8_t offset; uint8_t offset;
if (TryMatchConcat(shuffle, mask, &offset)) { if (TryMatchConcat(shuffle, &offset)) {
Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(input1), g.UseImmediate(offset)); g.UseRegister(input1), g.UseImmediate(offset));
return; return;
...@@ -2527,10 +2529,10 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { ...@@ -2527,10 +2529,10 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
InstructionOperand src0, src1; InstructionOperand src0, src1;
ArrangeShuffleTable(&g, input0, input1, &src0, &src1); ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1, Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1,
g.UseImmediate(Pack4Lanes(shuffle, mask)), g.UseImmediate(Pack4Lanes(shuffle)),
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)), g.UseImmediate(Pack4Lanes(shuffle + 4)),
g.UseImmediate(Pack4Lanes(shuffle + 8, mask)), g.UseImmediate(Pack4Lanes(shuffle + 8)),
g.UseImmediate(Pack4Lanes(shuffle + 12, mask))); g.UseImmediate(Pack4Lanes(shuffle + 12)));
} }
void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) { void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
......
...@@ -3088,7 +3088,9 @@ static const ShuffleEntry arch_shuffles[] = { ...@@ -3088,7 +3088,9 @@ static const ShuffleEntry arch_shuffles[] = {
kArm64S8x2Reverse}}; kArm64S8x2Reverse}};
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
size_t num_entries, uint8_t mask, ArchOpcode* opcode) { size_t num_entries, bool is_swizzle,
ArchOpcode* opcode) {
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
for (size_t i = 0; i < num_entries; i++) { for (size_t i = 0; i < num_entries; i++) {
const ShuffleEntry& entry = table[i]; const ShuffleEntry& entry = table[i];
int j = 0; int j = 0;
...@@ -3120,48 +3122,48 @@ void ArrangeShuffleTable(Arm64OperandGenerator* g, Node* input0, Node* input1, ...@@ -3120,48 +3122,48 @@ void ArrangeShuffleTable(Arm64OperandGenerator* g, Node* input0, Node* input1,
} // namespace } // namespace
void InstructionSelector::VisitS8x16Shuffle(Node* node) { void InstructionSelector::VisitS8x16Shuffle(Node* node) {
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op()); uint8_t shuffle[kSimd128Size];
uint8_t mask = CanonicalizeShuffle(node); bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
uint8_t shuffle32x4[4]; uint8_t shuffle32x4[4];
Arm64OperandGenerator g(this); Arm64OperandGenerator g(this);
ArchOpcode opcode; ArchOpcode opcode;
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles), if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
mask, &opcode)) { is_swizzle, &opcode)) {
VisitRRR(this, opcode, node); VisitRRR(this, opcode, node);
return; return;
} }
Node* input0 = node->InputAt(0); Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1); Node* input1 = node->InputAt(1);
uint8_t bias; uint8_t offset;
if (TryMatchConcat(shuffle, mask, &bias)) { if (TryMatchConcat(shuffle, &offset)) {
Emit(kArm64S8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), Emit(kArm64S8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(input1), g.UseImmediate(bias)); g.UseRegister(input1), g.UseImmediate(offset));
return; return;
} }
int index = 0; int index = 0;
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) { if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
if (TryMatchDup<4>(shuffle, &index)) { if (TryMatchDup<4>(shuffle, &index)) {
InstructionOperand src = index < 4 ? g.UseRegister(node->InputAt(0)) InstructionOperand src =
: g.UseRegister(node->InputAt(1)); index < 4 ? g.UseRegister(input0) : g.UseRegister(input1);
Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(4), Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(4),
g.UseImmediate(index % 4)); g.UseImmediate(index % 4));
} else { } else {
Emit(kArm64S32x4Shuffle, g.DefineAsRegister(node), Emit(kArm64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
} }
return; return;
} }
if (TryMatchDup<8>(shuffle, &index)) { if (TryMatchDup<8>(shuffle, &index)) {
InstructionOperand src = index < 8 ? g.UseRegister(node->InputAt(0)) InstructionOperand src =
: g.UseRegister(node->InputAt(1)); index < 8 ? g.UseRegister(input0) : g.UseRegister(input1);
Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(8), Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(8),
g.UseImmediate(index % 8)); g.UseImmediate(index % 8));
return; return;
} }
if (TryMatchDup<16>(shuffle, &index)) { if (TryMatchDup<16>(shuffle, &index)) {
InstructionOperand src = index < 16 ? g.UseRegister(node->InputAt(0)) InstructionOperand src =
: g.UseRegister(node->InputAt(1)); index < 16 ? g.UseRegister(input0) : g.UseRegister(input1);
Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(16), Emit(kArm64S128Dup, g.DefineAsRegister(node), src, g.UseImmediate(16),
g.UseImmediate(index % 16)); g.UseImmediate(index % 16));
return; return;
...@@ -3170,10 +3172,10 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { ...@@ -3170,10 +3172,10 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
InstructionOperand src0, src1; InstructionOperand src0, src1;
ArrangeShuffleTable(&g, input0, input1, &src0, &src1); ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
Emit(kArm64S8x16Shuffle, g.DefineAsRegister(node), src0, src1, Emit(kArm64S8x16Shuffle, g.DefineAsRegister(node), src0, src1,
g.UseImmediate(Pack4Lanes(shuffle, mask)), g.UseImmediate(Pack4Lanes(shuffle)),
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)), g.UseImmediate(Pack4Lanes(shuffle + 4)),
g.UseImmediate(Pack4Lanes(shuffle + 8, mask)), g.UseImmediate(Pack4Lanes(shuffle + 8)),
g.UseImmediate(Pack4Lanes(shuffle + 12, mask))); g.UseImmediate(Pack4Lanes(shuffle + 12)));
} }
void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) { void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
......
...@@ -3139,13 +3139,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3139,13 +3139,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
Operand src0 = i.InputOperand(0); Operand src0 = i.InputOperand(0);
Register tmp = i.TempRegister(0); Register tmp = i.TempRegister(0);
if (!src0.is_reg(dst)) { // Prepare 16 byte aligned buffer for shuffle control mask
__ movups(dst, src0);
}
// Prepare 16-byte boundary buffer for shuffle control mask
__ mov(tmp, esp); __ mov(tmp, esp);
__ and_(esp, -16); __ and_(esp, -16);
if (instr->InputCount() == 5) { // only one input operand if (instr->InputCount() == 5) { // only one input operand
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
for (int j = 4; j > 0; j--) { for (int j = 4; j > 0; j--) {
uint32_t mask = i.InputUint32(j); uint32_t mask = i.InputUint32(j);
__ push(Immediate(mask)); __ push(Immediate(mask));
...@@ -3153,6 +3151,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3153,6 +3151,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pshufb(dst, Operand(esp, 0)); __ Pshufb(dst, Operand(esp, 0));
} else { // two input operands } else { // two input operands
DCHECK_EQ(6, instr->InputCount()); DCHECK_EQ(6, instr->InputCount());
__ movups(kScratchDoubleReg, src0);
for (int j = 5; j > 1; j--) { for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j); uint32_t lanes = i.InputUint32(j);
uint32_t mask = 0; uint32_t mask = 0;
...@@ -3162,8 +3161,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3162,8 +3161,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
__ push(Immediate(mask)); __ push(Immediate(mask));
} }
__ Pshufb(dst, Operand(esp, 0)); __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
__ movups(kScratchDoubleReg, i.InputOperand(1)); Operand src1 = i.InputOperand(1);
if (!src1.is_reg(dst)) __ movups(dst, src1);
for (int j = 5; j > 1; j--) { for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j); uint32_t lanes = i.InputUint32(j);
uint32_t mask = 0; uint32_t mask = 0;
...@@ -3173,74 +3173,55 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3173,74 +3173,55 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
__ push(Immediate(mask)); __ push(Immediate(mask));
} }
__ Pshufb(kScratchDoubleReg, Operand(esp, 0)); __ Pshufb(dst, Operand(esp, 0));
__ por(dst, kScratchDoubleReg); __ por(dst, kScratchDoubleReg);
} }
__ mov(esp, tmp); __ mov(esp, tmp);
break; break;
} }
case kIA32S32x4Swizzle: { case kIA32S32x4Swizzle: {
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(2)); DCHECK_EQ(2, instr->InputCount());
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
break; break;
} }
case kIA32S32x4Shuffle: { case kIA32S32x4Shuffle: {
DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above. DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(2));
__ Pshufd(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2)); __ Pshufd(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(2));
__ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3)); __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
break; break;
} }
case kSSES16x8Blend: { case kSSES16x8Blend: {
CpuFeatureScope sse_scope(tasm(), SSSE3); CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputCount() == 2) {
// swizzle
__ pblendw(i.OutputSimd128Register(), i.InputOperand(0),
i.InputInt8(1));
} else {
// shuffle
DCHECK_EQ(3, instr->InputCount());
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ pblendw(i.OutputSimd128Register(), i.InputOperand(1), __ pblendw(i.OutputSimd128Register(), i.InputOperand(1), i.InputInt8(2));
i.InputInt8(2));
}
break; break;
} }
case kAVXS16x8Blend: { case kAVXS16x8Blend: {
CpuFeatureScope avx_scope(tasm(), AVX); CpuFeatureScope sse_scope(tasm(), AVX);
__ vpblendw(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vpblendw(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1), i.InputInt8(2)); i.InputOperand(1), i.InputInt8(2));
break; break;
} }
case kIA32S16x8ShuffleBlend: { case kIA32S16x8HalfShuffle1: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
if (instr->InputCount() == 3) {
// swizzle
__ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1)); __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
__ Pshufhw(dst, dst, i.InputInt8(2)); __ Pshufhw(dst, dst, i.InputInt8(2));
} else { break;
// shuffle }
DCHECK_EQ(5, instr->InputCount()); case kIA32S16x8HalfShuffle2: {
__ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2)); XMMRegister dst = i.OutputSimd128Register();
__ Pshufhw(dst, dst, i.InputInt8(3));
__ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2)); __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
__ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3)); __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
__ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
__ Pshufhw(dst, dst, i.InputInt8(3));
__ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4)); __ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
}
break; break;
} }
case kSSES8x16Alignr: { case kSSES8x16Alignr: {
CpuFeatureScope sse_scope(tasm(), SSSE3); CpuFeatureScope sse_scope(tasm(), SSSE3);
if (instr->InputCount() == 2) {
// swizzle
__ palignr(i.OutputSimd128Register(), i.InputOperand(0),
i.InputInt8(1));
} else {
// shuffle
DCHECK_EQ(3, instr->InputCount());
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ palignr(i.OutputSimd128Register(), i.InputOperand(1), __ palignr(i.OutputSimd128Register(), i.InputOperand(1), i.InputInt8(2));
i.InputInt8(2));
}
break; break;
} }
case kAVXS8x16Alignr: { case kAVXS8x16Alignr: {
......
...@@ -305,7 +305,8 @@ namespace compiler { ...@@ -305,7 +305,8 @@ namespace compiler {
V(IA32S32x4Shuffle) \ V(IA32S32x4Shuffle) \
V(SSES16x8Blend) \ V(SSES16x8Blend) \
V(AVXS16x8Blend) \ V(AVXS16x8Blend) \
V(IA32S16x8ShuffleBlend) \ V(IA32S16x8HalfShuffle1) \
V(IA32S16x8HalfShuffle2) \
V(SSES8x16Alignr) \ V(SSES8x16Alignr) \
V(AVXS8x16Alignr) \ V(AVXS8x16Alignr) \
V(IA32S1x4AnyTrue) \ V(IA32S1x4AnyTrue) \
......
...@@ -287,7 +287,8 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -287,7 +287,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32S32x4Shuffle: case kIA32S32x4Shuffle:
case kSSES16x8Blend: case kSSES16x8Blend:
case kAVXS16x8Blend: case kAVXS16x8Blend:
case kIA32S16x8ShuffleBlend: case kIA32S16x8HalfShuffle1:
case kIA32S16x8HalfShuffle2:
case kSSES8x16Alignr: case kSSES8x16Alignr:
case kAVXS8x16Alignr: case kAVXS8x16Alignr:
case kIA32S1x4AnyTrue: case kIA32S1x4AnyTrue:
......
...@@ -2058,23 +2058,13 @@ bool Is16x8BlendedShuffle(uint8_t* shuffle16x8, uint8_t* blend_mask) { ...@@ -2058,23 +2058,13 @@ bool Is16x8BlendedShuffle(uint8_t* shuffle16x8, uint8_t* blend_mask) {
return true; return true;
} }
void SwapShuffleInputs(Node* node) {
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
node->ReplaceInput(0, input1);
node->ReplaceInput(1, input0);
}
} // namespace } // namespace
// TODO(bbudge) Make sure identity shuffle emits no instructions.
void InstructionSelector::VisitS8x16Shuffle(Node* node) { void InstructionSelector::VisitS8x16Shuffle(Node* node) {
static const int kMaxSwizzleIndex = 15; uint8_t shuffle[kSimd128Size];
static const int kMaxShuffleIndex = 31; bool is_swizzle;
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op()); CanonicalizeShuffle(node, shuffle, &is_swizzle);
uint8_t mask = CanonicalizeShuffle(node);
bool is_swizzle = (mask == kMaxSwizzleIndex);
DCHECK_IMPLIES(!is_swizzle, mask == kMaxShuffleIndex);
USE(kMaxShuffleIndex);
int imm_count = 0; int imm_count = 0;
static const int kMaxImms = 6; static const int kMaxImms = 6;
...@@ -2085,23 +2075,30 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { ...@@ -2085,23 +2075,30 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
IA32OperandGenerator g(this); IA32OperandGenerator g(this);
bool use_avx = CpuFeatures::IsSupported(AVX); bool use_avx = CpuFeatures::IsSupported(AVX);
// AVX and swizzles don't generally need DefineSameAsFirst to avoid a move.
bool no_same_as_first = use_avx || is_swizzle;
// We generally need UseRegister for the first source.
bool no_use_register = false;
ArchOpcode opcode = kIA32S8x16Shuffle; // general shuffle is the default ArchOpcode opcode = kIA32S8x16Shuffle; // general shuffle is the default
uint8_t offset; uint8_t offset;
uint8_t shuffle32x4[4]; uint8_t shuffle32x4[4];
uint8_t shuffle16x8[8]; uint8_t shuffle16x8[8];
if (TryMatchConcat(shuffle, mask, &offset)) { if (TryMatchConcat(shuffle, &offset)) {
// Swap inputs for (v)palignr. // Swap inputs from the normal order for (v)palignr.
// TODO(bbudge) Handle concatenations where the sources are reversed.
SwapShuffleInputs(node); SwapShuffleInputs(node);
// palignr takes a single imm8 offset. is_swizzle = false; // It's simpler to just handle the general case.
no_same_as_first = use_avx; // SSE requires same-as-first.
opcode = use_avx ? kAVXS8x16Alignr : kSSES8x16Alignr; opcode = use_avx ? kAVXS8x16Alignr : kSSES8x16Alignr;
// palignr takes a single imm8 offset.
imms[imm_count++] = offset; imms[imm_count++] = offset;
} else if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) { } else if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
uint8_t shuffle_mask = PackShuffle4(shuffle32x4); uint8_t shuffle_mask = PackShuffle4(shuffle32x4);
if (is_swizzle) { if (is_swizzle) {
// pshufd takes a single imm8 shuffle mask. // pshufd takes a single imm8 shuffle mask.
opcode = kIA32S32x4Swizzle; opcode = kIA32S32x4Swizzle;
no_same_as_first = true;
no_use_register = true;
imms[imm_count++] = shuffle_mask; imms[imm_count++] = shuffle_mask;
} else { } else {
// 2 operand shuffle // 2 operand shuffle
...@@ -2112,6 +2109,8 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { ...@@ -2112,6 +2109,8 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
imms[imm_count++] = blend_mask; imms[imm_count++] = blend_mask;
} else { } else {
opcode = kIA32S32x4Shuffle; opcode = kIA32S32x4Shuffle;
no_same_as_first = true;
no_use_register = true;
imms[imm_count++] = shuffle_mask; imms[imm_count++] = shuffle_mask;
int8_t blend_mask = PackBlend4(shuffle32x4); int8_t blend_mask = PackBlend4(shuffle32x4);
imms[imm_count++] = blend_mask; imms[imm_count++] = blend_mask;
...@@ -2124,39 +2123,46 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { ...@@ -2124,39 +2123,46 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
blend_mask = PackBlend8(shuffle16x8); blend_mask = PackBlend8(shuffle16x8);
imms[imm_count++] = blend_mask; imms[imm_count++] = blend_mask;
} else if (Is16x8BlendedShuffle(shuffle16x8, &blend_mask)) { } else if (Is16x8BlendedShuffle(shuffle16x8, &blend_mask)) {
opcode = kIA32S16x8ShuffleBlend; opcode = is_swizzle ? kIA32S16x8HalfShuffle1 : kIA32S16x8HalfShuffle2;
// Half-shuffles don't need DefineSameAsFirst or UseRegister(src0).
no_same_as_first = true;
no_use_register = true;
uint8_t mask_lo = PackShuffle4(shuffle16x8); uint8_t mask_lo = PackShuffle4(shuffle16x8);
uint8_t mask_hi = PackShuffle4(shuffle16x8 + 4); uint8_t mask_hi = PackShuffle4(shuffle16x8 + 4);
imms[imm_count++] = mask_lo; imms[imm_count++] = mask_lo;
imms[imm_count++] = mask_hi; imms[imm_count++] = mask_hi;
// TODO(bbudge) eliminate the blend for swizzles. if (!is_swizzle) imms[imm_count++] = blend_mask;
imms[imm_count++] = blend_mask;
} }
} }
if (opcode == kIA32S8x16Shuffle) { if (opcode == kIA32S8x16Shuffle) {
// General shuffle. // Use same-as-first for general swizzle, but not shuffle.
imms[imm_count++] = Pack4Lanes(shuffle, mask); no_same_as_first = !is_swizzle;
imms[imm_count++] = Pack4Lanes(shuffle + 4, mask); no_use_register = no_same_as_first;
imms[imm_count++] = Pack4Lanes(shuffle + 8, mask); imms[imm_count++] = Pack4Lanes(shuffle);
imms[imm_count++] = Pack4Lanes(shuffle + 12, mask); imms[imm_count++] = Pack4Lanes(shuffle + 4);
imms[imm_count++] = Pack4Lanes(shuffle + 8);
imms[imm_count++] = Pack4Lanes(shuffle + 12);
temps[temp_count++] = g.TempRegister(); temps[temp_count++] = g.TempRegister();
} }
// Swizzles and AVX don't require input[0] == output. // Use DefineAsRegister(node) and Use(src0) if we can without forcing an extra
InstructionOperand output = use_avx || is_swizzle ? g.DefineAsRegister(node) // move instruction in the CodeGenerator.
: g.DefineSameAsFirst(node); Node* input0 = node->InputAt(0);
InstructionOperand dst =
no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
InstructionOperand src0 =
no_use_register ? g.Use(input0) : g.UseRegister(input0);
int input_count = 0; int input_count = 0;
InstructionOperand inputs[2 + kMaxImms + kMaxTemps]; InstructionOperand inputs[2 + kMaxImms + kMaxTemps];
InstructionOperand src0 = g.UseRegister(node->InputAt(0));
inputs[input_count++] = src0; inputs[input_count++] = src0;
if (!is_swizzle || (use_avx && opcode != kIA32S8x16Shuffle)) { if (!is_swizzle) {
inputs[input_count++] = g.Use(node->InputAt(1)); inputs[input_count++] = g.Use(node->InputAt(1));
} }
for (int i = 0; i < imm_count; ++i) { for (int i = 0; i < imm_count; ++i) {
inputs[input_count++] = g.UseImmediate(imms[i]); inputs[input_count++] = g.UseImmediate(imms[i]);
} }
Emit(opcode, 1, &output, input_count, inputs, temp_count, temps); Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
} }
// static // static
......
...@@ -2884,16 +2884,19 @@ bool InstructionSelector::TryMatch16x8Shuffle(const uint8_t* shuffle, ...@@ -2884,16 +2884,19 @@ bool InstructionSelector::TryMatch16x8Shuffle(const uint8_t* shuffle,
} }
// static // static
bool InstructionSelector::TryMatchConcat(const uint8_t* shuffle, uint8_t mask, bool InstructionSelector::TryMatchConcat(const uint8_t* shuffle,
uint8_t* offset) { uint8_t* offset) {
// Don't match the identity shuffle (e.g. [0 1 2 ... 15]).
uint8_t start = shuffle[0]; uint8_t start = shuffle[0];
int i = 1; if (start == 0) return false;
for (; i < 16 - start; ++i) { DCHECK_GT(kSimd128Size, start); // The shuffle should be canonicalized.
if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return false; // A concatenation is a series of consecutive indices, with at most one jump
// in the middle from the last lane to the first.
for (int i = 1; i < kSimd128Size; ++i) {
if ((shuffle[i]) != ((shuffle[i - 1] + 1))) {
if (shuffle[i - 1] != 15) return false;
if (shuffle[i] % kSimd128Size != 0) return false;
} }
uint8_t wrap = 16;
for (; i < 16; ++i, ++wrap) {
if ((shuffle[i] & mask) != (wrap & mask)) return false;
} }
*offset = start; *offset = start;
return true; return true;
...@@ -2907,23 +2910,21 @@ bool InstructionSelector::TryMatchBlend(const uint8_t* shuffle) { ...@@ -2907,23 +2910,21 @@ bool InstructionSelector::TryMatchBlend(const uint8_t* shuffle) {
return true; return true;
} }
uint8_t InstructionSelector::CanonicalizeShuffle(Node* node) { void InstructionSelector::CanonicalizeShuffle(Node* node, uint8_t* shuffle,
static const int kMaxLaneIndex = 15; bool* is_swizzle) {
static const int kMaxShuffleIndex = 31; // Get raw shuffle indices.
memcpy(shuffle, OpParameter<uint8_t*>(node->op()), kSimd128Size);
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op()); // Detect shuffles that only operate on one input.
uint8_t mask = kMaxShuffleIndex;
// If shuffle is unary, set 'mask' to ignore the high bit of the indices.
// Replace any unused source with the other.
if (GetVirtualRegister(node->InputAt(0)) == if (GetVirtualRegister(node->InputAt(0)) ==
GetVirtualRegister(node->InputAt(1))) { GetVirtualRegister(node->InputAt(1))) {
// unary, src0 == src1. *is_swizzle = true;
mask = kMaxLaneIndex;
} else { } else {
// Inputs are distinct; check that both are required.
bool src0_is_used = false; bool src0_is_used = false;
bool src1_is_used = false; bool src1_is_used = false;
for (int i = 0; i < 16; ++i) { for (int i = 0; i < kSimd128Size; ++i) {
if (shuffle[i] <= kMaxLaneIndex) { if (shuffle[i] < kSimd128Size) {
src0_is_used = true; src0_is_used = true;
} else { } else {
src1_is_used = true; src1_is_used = true;
...@@ -2931,25 +2932,47 @@ uint8_t InstructionSelector::CanonicalizeShuffle(Node* node) { ...@@ -2931,25 +2932,47 @@ uint8_t InstructionSelector::CanonicalizeShuffle(Node* node) {
} }
if (src0_is_used && !src1_is_used) { if (src0_is_used && !src1_is_used) {
node->ReplaceInput(1, node->InputAt(0)); node->ReplaceInput(1, node->InputAt(0));
mask = kMaxLaneIndex; *is_swizzle = true;
} else if (src1_is_used && !src0_is_used) { } else if (src1_is_used && !src0_is_used) {
node->ReplaceInput(0, node->InputAt(1)); node->ReplaceInput(0, node->InputAt(1));
mask = kMaxLaneIndex; *is_swizzle = true;
} else {
*is_swizzle = false;
// Canonicalize general 2 input shuffles so that the first input lanes are
// encountered first. This makes architectural shuffle pattern matching
// easier, since we only need to consider 1 input ordering instead of 2.
if (shuffle[0] >= kSimd128Size) {
// The second operand is used first. Swap inputs and adjust the shuffle.
SwapShuffleInputs(node);
for (int i = 0; i < kSimd128Size; ++i) {
shuffle[i] ^= kSimd128Size;
}
}
} }
} }
return mask; if (*is_swizzle) {
for (int i = 0; i < kSimd128Size; ++i) shuffle[i] &= kSimd128Size - 1;
}
} }
// static // static
int32_t InstructionSelector::Pack4Lanes(const uint8_t* shuffle, uint8_t mask) { int32_t InstructionSelector::Pack4Lanes(const uint8_t* shuffle) {
int32_t result = 0; int32_t result = 0;
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
result <<= 8; result <<= 8;
result |= shuffle[i] & mask; result |= shuffle[i];
} }
return result; return result;
} }
// static
void InstructionSelector::SwapShuffleInputs(Node* node) {
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
node->ReplaceInput(0, input1);
node->ReplaceInput(1, input0);
}
bool InstructionSelector::NeedsPoisoning(IsSafetyCheck safety_check) const { bool InstructionSelector::NeedsPoisoning(IsSafetyCheck safety_check) const {
switch (poisoning_level_) { switch (poisoning_level_) {
case PoisoningMitigationLevel::kDontPoison: case PoisoningMitigationLevel::kDontPoison:
......
...@@ -633,22 +633,25 @@ class V8_EXPORT_PRIVATE InstructionSelector final { ...@@ -633,22 +633,25 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
// Tries to match a byte shuffle to a concatenate operation, formed by taking // Tries to match a byte shuffle to a concatenate operation, formed by taking
// 16 bytes from the 32 byte concatenation of the inputs. If successful, it // 16 bytes from the 32 byte concatenation of the inputs. If successful, it
// writes the byte offset. E.g. [4 5 6 7 .. 16 17 18 19] concatenates both // writes the byte offset. E.g. [4 5 6 7 .. 16 17 18 19] concatenates both
// source vectors with offset 4. // source vectors with offset 4. The shuffle should be canonicalized.
static bool TryMatchConcat(const uint8_t* shuffle, uint8_t mask, static bool TryMatchConcat(const uint8_t* shuffle, uint8_t* offset);
uint8_t* offset);
// Tries to match a byte shuffle to a blend operation, which is a shuffle // Tries to match a byte shuffle to a blend operation, which is a shuffle
// where no lanes change position. E.g. [0 9 2 11 .. 14 31] interleaves the // where no lanes change position. E.g. [0 9 2 11 .. 14 31] interleaves the
// even lanes of the first source with the odd lanes of the second. // even lanes of the first source with the odd lanes of the second. The
// shuffle should be canonicalized.
static bool TryMatchBlend(const uint8_t* shuffle); static bool TryMatchBlend(const uint8_t* shuffle);
// Packs 4 bytes of shuffle into a 32 bit immediate, using a mask from // Packs 4 bytes of shuffle into a 32 bit immediate.
// CanonicalizeShuffle to convert unary shuffles. static int32_t Pack4Lanes(const uint8_t* shuffle);
static int32_t Pack4Lanes(const uint8_t* shuffle, uint8_t mask);
// Canonicalize shuffles to make pattern matching simpler. Returns a mask that // Canonicalize shuffles to make pattern matching simpler. Returns the shuffle
// will clear the high bit of indices if shuffle is unary (a swizzle). // indices, and a boolean indicating if the shuffle is a swizzle (one input).
uint8_t CanonicalizeShuffle(Node* node); void CanonicalizeShuffle(Node* node, uint8_t* shuffle, bool* is_swizzle);
// Swaps the two first input operands of the node, to help match shuffles
// to specific architectural instructions.
void SwapShuffleInputs(Node* node);
// =========================================================================== // ===========================================================================
......
...@@ -2133,7 +2133,9 @@ static const ShuffleEntry arch_shuffles[] = { ...@@ -2133,7 +2133,9 @@ static const ShuffleEntry arch_shuffles[] = {
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kMipsS8x2Reverse}}; {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kMipsS8x2Reverse}};
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
size_t num_entries, uint8_t mask, ArchOpcode* opcode) { size_t num_entries, bool is_swizzle,
ArchOpcode* opcode) {
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
for (size_t i = 0; i < num_entries; ++i) { for (size_t i = 0; i < num_entries; ++i) {
const ShuffleEntry& entry = table[i]; const ShuffleEntry& entry = table[i];
int j = 0; int j = 0;
...@@ -2153,35 +2155,35 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, ...@@ -2153,35 +2155,35 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
} // namespace } // namespace
void InstructionSelector::VisitS8x16Shuffle(Node* node) { void InstructionSelector::VisitS8x16Shuffle(Node* node) {
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op()); uint8_t shuffle[kSimd128Size];
uint8_t mask = CanonicalizeShuffle(node); bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
uint8_t shuffle32x4[4]; uint8_t shuffle32x4[4];
ArchOpcode opcode; ArchOpcode opcode;
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles), if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
mask, &opcode)) { is_swizzle, &opcode)) {
VisitRRR(this, opcode, node); VisitRRR(this, opcode, node);
return; return;
} }
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
uint8_t offset; uint8_t offset;
MipsOperandGenerator g(this); MipsOperandGenerator g(this);
if (TryMatchConcat(shuffle, mask, &offset)) { if (TryMatchConcat(shuffle, &offset)) {
Emit(kMipsS8x16Concat, g.DefineSameAsFirst(node), Emit(kMipsS8x16Concat, g.DefineSameAsFirst(node), g.UseRegister(input0),
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(0)), g.UseRegister(input1), g.UseImmediate(offset));
g.UseImmediate(offset));
return; return;
} }
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) { if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
Emit(kMipsS32x4Shuffle, g.DefineAsRegister(node), Emit(kMipsS32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
return; return;
} }
Emit(kMipsS8x16Shuffle, g.DefineAsRegister(node), Emit(kMipsS8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle)),
g.UseImmediate(Pack4Lanes(shuffle, mask)), g.UseImmediate(Pack4Lanes(shuffle + 4)),
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)), g.UseImmediate(Pack4Lanes(shuffle + 8)),
g.UseImmediate(Pack4Lanes(shuffle + 8, mask)), g.UseImmediate(Pack4Lanes(shuffle + 12)));
g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
} }
void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) { void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
......
...@@ -2802,7 +2802,9 @@ static const ShuffleEntry arch_shuffles[] = { ...@@ -2802,7 +2802,9 @@ static const ShuffleEntry arch_shuffles[] = {
kMips64S8x2Reverse}}; kMips64S8x2Reverse}};
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
size_t num_entries, uint8_t mask, ArchOpcode* opcode) { size_t num_entries, bool is_swizzle,
ArchOpcode* opcode) {
uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
for (size_t i = 0; i < num_entries; ++i) { for (size_t i = 0; i < num_entries; ++i) {
const ShuffleEntry& entry = table[i]; const ShuffleEntry& entry = table[i];
int j = 0; int j = 0;
...@@ -2822,35 +2824,35 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, ...@@ -2822,35 +2824,35 @@ bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
} // namespace } // namespace
void InstructionSelector::VisitS8x16Shuffle(Node* node) { void InstructionSelector::VisitS8x16Shuffle(Node* node) {
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op()); uint8_t shuffle[kSimd128Size];
uint8_t mask = CanonicalizeShuffle(node); bool is_swizzle;
CanonicalizeShuffle(node, shuffle, &is_swizzle);
uint8_t shuffle32x4[4]; uint8_t shuffle32x4[4];
ArchOpcode opcode; ArchOpcode opcode;
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles), if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
mask, &opcode)) { is_swizzle, &opcode)) {
VisitRRR(this, opcode, node); VisitRRR(this, opcode, node);
return; return;
} }
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
uint8_t offset; uint8_t offset;
Mips64OperandGenerator g(this); Mips64OperandGenerator g(this);
if (TryMatchConcat(shuffle, mask, &offset)) { if (TryMatchConcat(shuffle, &offset)) {
Emit(kMips64S8x16Concat, g.DefineSameAsFirst(node), Emit(kMips64S8x16Concat, g.DefineSameAsFirst(node), g.UseRegister(input0),
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(0)), g.UseRegister(input1), g.UseImmediate(offset));
g.UseImmediate(offset));
return; return;
} }
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) { if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
Emit(kMips64S32x4Shuffle, g.DefineAsRegister(node), Emit(kMips64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle32x4)));
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
return; return;
} }
Emit(kMips64S8x16Shuffle, g.DefineAsRegister(node), Emit(kMips64S8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), g.UseRegister(input1), g.UseImmediate(Pack4Lanes(shuffle)),
g.UseImmediate(Pack4Lanes(shuffle, mask)), g.UseImmediate(Pack4Lanes(shuffle + 4)),
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)), g.UseImmediate(Pack4Lanes(shuffle + 8)),
g.UseImmediate(Pack4Lanes(shuffle + 8, mask)), g.UseImmediate(Pack4Lanes(shuffle + 12)));
g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
} }
void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) { void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
......
...@@ -1678,262 +1678,213 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(F32x4AddHoriz) { ...@@ -1678,262 +1678,213 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(F32x4AddHoriz) {
{{1.0f, 5.0f, 9.0f, 13.0f}}); {{1.0f, 5.0f, 9.0f, 13.0f}});
} }
// Test shuffle ops.
template <typename T>
void RunShuffleOpTest(WasmExecutionMode execution_mode, LowerSimd lower_simd,
WasmOpcode simd_op,
const std::array<T, kSimd128Size / sizeof(T)>& shuffle) {
// Test the original shuffle.
RunBinaryLaneOpTest<T>(execution_mode, lower_simd, simd_op, shuffle);
// Test a non-canonical (inputs reversed) version of the shuffle.
std::array<T, kSimd128Size / sizeof(T)> other_shuffle(shuffle);
for (size_t i = 0; i < shuffle.size(); ++i) other_shuffle[i] ^= kSimd128Size;
RunBinaryLaneOpTest<T>(execution_mode, lower_simd, simd_op, other_shuffle);
// Test the swizzle (one-operand) version of the shuffle.
std::array<T, kSimd128Size / sizeof(T)> swizzle(shuffle);
for (size_t i = 0; i < shuffle.size(); ++i) swizzle[i] &= (kSimd128Size - 1);
RunBinaryLaneOpTest<T>(execution_mode, lower_simd, simd_op, swizzle);
// Test the non-canonical swizzle (one-operand) version of the shuffle.
std::array<T, kSimd128Size / sizeof(T)> other_swizzle(shuffle);
for (size_t i = 0; i < shuffle.size(); ++i) other_swizzle[i] |= kSimd128Size;
RunBinaryLaneOpTest<T>(execution_mode, lower_simd, simd_op, other_swizzle);
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \ #if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32 V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Test some regular shuffles that may have special handling on some targets. // Test some regular shuffles that may have special handling on some targets.
// Test a normal and unary versions (where second operand isn't used).
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4Dup) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4Dup) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{16, 17, 18, 19, 16, 17, 18, 19, 16, 17, 18, 19, 16, 17, 18, 19}}); {{16, 17, 18, 19, 16, 17, 18, 19, 16, 17, 18, 19, 16, 17, 18, 19}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4ZipLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4ZipLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}); {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4ZipRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4ZipRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}}); {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4UnzipLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4UnzipLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}}); {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4UnzipRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4UnzipRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}}); {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4TransposeLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4TransposeLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}}); {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4TransposeRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4TransposeRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}}); {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{4, 5, 6, 7, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13, 14, 15}});
} }
// Reverses are only unary. // Reverses are only unary.
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x2Reverse) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x2Reverse) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}}); {{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}});
} }
// Test irregular shuffle. // Test irregular shuffle.
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4Irregular) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S32x4Irregular) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19, 20, 21, 22, 23}}); {{0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19, 20, 21, 22, 23}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8Dup) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8Dup) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19}}); {{18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19, 18, 19}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8ZipLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8ZipLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}); {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8ZipRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8ZipRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}); {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 14, 15}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8UnzipLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8UnzipLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}}); {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8UnzipRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8UnzipRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}}); {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8TransposeLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8TransposeLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}}); {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 0, 1, 4, 5, 4, 5, 8, 9, 8, 9, 12, 13, 12, 13}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8TransposeRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8TransposeRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}}); {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{2, 3, 2, 3, 6, 7, 6, 7, 10, 11, 10, 11, 14, 15, 14, 15}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x4Reverse) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x4Reverse) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}}); {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x2Reverse) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x2Reverse) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}}); {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8Irregular) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S16x8Irregular) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 16, 17, 16, 17, 0, 1, 4, 5, 20, 21, 6, 7, 22, 23}}); {{0, 1, 16, 17, 16, 17, 0, 1, 4, 5, 20, 21, 6, 7, 22, 23}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 1, 0, 1, 0, 1, 0, 1, 4, 5, 4, 5, 6, 7, 6, 7}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Dup) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Dup) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19}}); {{19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16ZipLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16ZipLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}); {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16ZipRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16ZipRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}); {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16UnzipLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16UnzipLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}}); {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16UnzipRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16UnzipRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}}); {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{1, 3, 5, 7, 9, 11, 13, 15, 1, 3, 5, 7, 9, 11, 13, 15}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16TransposeLeft) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16TransposeLeft) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}}); {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16TransposeRight) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16TransposeRight) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}}); {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x8Reverse) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x8Reverse) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}}); {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x4Reverse) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x4Reverse) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}}); {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x2Reverse) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x2Reverse) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}}); {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}});
} }
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Irregular) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Irregular) {
RunBinaryLaneOpTest<int8_t>( RunShuffleOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle, execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 16, 0, 16, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}); {{0, 16, 0, 16, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}});
RunBinaryLaneOpTest<int8_t>(
execution_mode, lower_simd, kExprS8x16Shuffle,
{{0, 0, 0, 0, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}});
} }
// Test shuffles that blend the two vectors (elements remain in their lanes.) // Test shuffles that blend the two vectors (elements remain in their lanes.)
...@@ -1943,8 +1894,7 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Blend) { ...@@ -1943,8 +1894,7 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Blend) {
for (int bias = 1; bias < kLanes; bias++) { for (int bias = 1; bias < kLanes; bias++) {
for (int i = 0; i < bias; i++) expected[i] = i; for (int i = 0; i < bias; i++) expected[i] = i;
for (int i = bias; i < kLanes; i++) expected[i] = i + kLanes; for (int i = bias; i < kLanes; i++) expected[i] = i + kLanes;
RunBinaryLaneOpTest(execution_mode, lower_simd, kExprS8x16Shuffle, RunShuffleOpTest(execution_mode, lower_simd, kExprS8x16Shuffle, expected);
expected);
} }
} }
...@@ -1952,18 +1902,18 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Blend) { ...@@ -1952,18 +1902,18 @@ WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Blend) {
WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Concat) { WASM_SIMD_COMPILED_AND_LOWERED_TEST(S8x16Concat) {
static const int kLanes = 16; static const int kLanes = 16;
std::array<uint8_t, kLanes> expected; std::array<uint8_t, kLanes> expected;
for (int bias = 1; bias < kLanes; bias++) { // n is offset or bias of concatenation.
for (int n = 1; n < kLanes; ++n) {
int i = 0; int i = 0;
// last kLanes - bias bytes of first vector. // last kLanes - n bytes of first vector.
for (int j = bias; j < kLanes; j++) { for (int j = n; j < kLanes; ++j) {
expected[i++] = j; expected[i++] = j;
} }
// first bias lanes of second vector // first n bytes of second vector
for (int j = 0; j < bias; j++) { for (int j = 0; j < n; ++j) {
expected[i++] = j + kLanes; expected[i++] = j + kLanes;
} }
RunBinaryLaneOpTest(execution_mode, lower_simd, kExprS8x16Shuffle, RunShuffleOpTest(execution_mode, lower_simd, kExprS8x16Shuffle, expected);
expected);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment