Commit 09e7d3e0 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

[wasm simd] Add code generation for byte reversing swizzles

- Adds opcodes for 2x8, 4x4, and 8x2 byte reversing swizzles on ia32.

Bug: v8:6020
Change-Id: I2c5d4ba88b42a17fc75ac0307e80d8c11d1838ba
Reviewed-on: https://chromium-review.googlesource.com/1121956
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarJing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#54302}
parent 80e6cc92
...@@ -3382,7 +3382,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3382,7 +3382,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpor(dst, dst, kScratchDoubleReg); __ vpor(dst, dst, kScratchDoubleReg);
break; break;
} }
case kSSES8x8Reverse:
case kSSES8x4Reverse:
case kSSES8x2Reverse: {
DCHECK_EQ(1, instr->InputCount());
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
if (arch_opcode != kSSES8x2Reverse) {
// First shuffle words into position.
int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
__ pshuflw(dst, dst, shuffle_mask);
__ pshufhw(dst, dst, shuffle_mask);
}
__ movaps(kScratchDoubleReg, dst);
__ psrlw(kScratchDoubleReg, 8);
__ psllw(dst, 8);
__ por(dst, kScratchDoubleReg);
break;
}
case kAVXS8x2Reverse:
case kAVXS8x4Reverse:
case kAVXS8x8Reverse: {
DCHECK_EQ(1, instr->InputCount());
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = dst;
if (arch_opcode != kAVXS8x2Reverse) {
// First shuffle words into position.
int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
__ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
__ vpshufhw(dst, dst, shuffle_mask);
} else {
src = i.InputSimd128Register(0);
}
// Reverse each 16 bit lane.
__ vpsrlw(kScratchDoubleReg, src, 8);
__ vpsllw(dst, src, 8);
__ vpor(dst, dst, kScratchDoubleReg);
break;
}
case kIA32S1x4AnyTrue: case kIA32S1x4AnyTrue:
case kIA32S1x8AnyTrue: case kIA32S1x8AnyTrue:
case kIA32S1x16AnyTrue: { case kIA32S1x16AnyTrue: {
......
...@@ -333,6 +333,12 @@ namespace compiler { ...@@ -333,6 +333,12 @@ namespace compiler {
V(AVXS8x16TransposeLow) \ V(AVXS8x16TransposeLow) \
V(SSES8x16TransposeHigh) \ V(SSES8x16TransposeHigh) \
V(AVXS8x16TransposeHigh) \ V(AVXS8x16TransposeHigh) \
V(SSES8x8Reverse) \
V(AVXS8x8Reverse) \
V(SSES8x4Reverse) \
V(AVXS8x4Reverse) \
V(SSES8x2Reverse) \
V(AVXS8x2Reverse) \
V(IA32S1x4AnyTrue) \ V(IA32S1x4AnyTrue) \
V(IA32S1x4AllTrue) \ V(IA32S1x4AllTrue) \
V(IA32S1x8AnyTrue) \ V(IA32S1x8AnyTrue) \
......
...@@ -315,6 +315,12 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -315,6 +315,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXS8x16TransposeLow: case kAVXS8x16TransposeLow:
case kSSES8x16TransposeHigh: case kSSES8x16TransposeHigh:
case kAVXS8x16TransposeHigh: case kAVXS8x16TransposeHigh:
case kSSES8x8Reverse:
case kAVXS8x8Reverse:
case kSSES8x4Reverse:
case kAVXS8x4Reverse:
case kSSES8x2Reverse:
case kAVXS8x2Reverse:
case kIA32S1x4AnyTrue: case kIA32S1x4AnyTrue:
case kIA32S1x4AllTrue: case kIA32S1x4AllTrue:
case kIA32S1x8AnyTrue: case kIA32S1x8AnyTrue:
......
...@@ -2162,6 +2162,21 @@ static const ShuffleEntry arch_shuffles[] = { ...@@ -2162,6 +2162,21 @@ static const ShuffleEntry arch_shuffles[] = {
kSSES8x16TransposeHigh, kSSES8x16TransposeHigh,
kAVXS8x16TransposeHigh, kAVXS8x16TransposeHigh,
true, true,
true},
{{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
kSSES8x8Reverse,
kAVXS8x8Reverse,
false,
false},
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
kSSES8x4Reverse,
kAVXS8x4Reverse,
false,
false},
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
kSSES8x2Reverse,
kAVXS8x2Reverse,
true,
true}}; true}};
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
...@@ -2224,7 +2239,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { ...@@ -2224,7 +2239,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
arraysize(arch_shuffles), is_swizzle, arraysize(arch_shuffles), is_swizzle,
&arch_shuffle)) { &arch_shuffle)) {
opcode = use_avx ? arch_shuffle->avx_opcode : arch_shuffle->opcode; opcode = use_avx ? arch_shuffle->avx_opcode : arch_shuffle->opcode;
src0_needs_reg = arch_shuffle->src0_needs_reg; src0_needs_reg = !use_avx || arch_shuffle->src0_needs_reg;
// SSE can't take advantage of both operands in registers and needs // SSE can't take advantage of both operands in registers and needs
// same-as-first. // same-as-first.
src1_needs_reg = use_avx && arch_shuffle->src1_needs_reg; src1_needs_reg = use_avx && arch_shuffle->src1_needs_reg;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment