Commit 09e7d3e0 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

[wasm simd] Add code generation for byte reversing swizzles

- Adds opcodes for 2x8, 4x4, and 8x2 byte reversing swizzles on ia32.

Bug: v8:6020
Change-Id: I2c5d4ba88b42a17fc75ac0307e80d8c11d1838ba
Reviewed-on: https://chromium-review.googlesource.com/1121956
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarJing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#54302}
parent 80e6cc92
......@@ -3382,7 +3382,45 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpor(dst, dst, kScratchDoubleReg);
break;
}
case kSSES8x8Reverse:
case kSSES8x4Reverse:
case kSSES8x2Reverse: {
DCHECK_EQ(1, instr->InputCount());
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
if (arch_opcode != kSSES8x2Reverse) {
// First shuffle words into position.
int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
__ pshuflw(dst, dst, shuffle_mask);
__ pshufhw(dst, dst, shuffle_mask);
}
__ movaps(kScratchDoubleReg, dst);
__ psrlw(kScratchDoubleReg, 8);
__ psllw(dst, 8);
__ por(dst, kScratchDoubleReg);
break;
}
case kAVXS8x2Reverse:
case kAVXS8x4Reverse:
case kAVXS8x8Reverse: {
DCHECK_EQ(1, instr->InputCount());
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = dst;
if (arch_opcode != kAVXS8x2Reverse) {
// First shuffle words into position.
int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
__ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
__ vpshufhw(dst, dst, shuffle_mask);
} else {
src = i.InputSimd128Register(0);
}
// Reverse each 16 bit lane.
__ vpsrlw(kScratchDoubleReg, src, 8);
__ vpsllw(dst, src, 8);
__ vpor(dst, dst, kScratchDoubleReg);
break;
}
case kIA32S1x4AnyTrue:
case kIA32S1x8AnyTrue:
case kIA32S1x16AnyTrue: {
......
......@@ -333,6 +333,12 @@ namespace compiler {
V(AVXS8x16TransposeLow) \
V(SSES8x16TransposeHigh) \
V(AVXS8x16TransposeHigh) \
V(SSES8x8Reverse) \
V(AVXS8x8Reverse) \
V(SSES8x4Reverse) \
V(AVXS8x4Reverse) \
V(SSES8x2Reverse) \
V(AVXS8x2Reverse) \
V(IA32S1x4AnyTrue) \
V(IA32S1x4AllTrue) \
V(IA32S1x8AnyTrue) \
......
......@@ -315,6 +315,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXS8x16TransposeLow:
case kSSES8x16TransposeHigh:
case kAVXS8x16TransposeHigh:
case kSSES8x8Reverse:
case kAVXS8x8Reverse:
case kSSES8x4Reverse:
case kAVXS8x4Reverse:
case kSSES8x2Reverse:
case kAVXS8x2Reverse:
case kIA32S1x4AnyTrue:
case kIA32S1x4AllTrue:
case kIA32S1x8AnyTrue:
......
......@@ -2162,6 +2162,21 @@ static const ShuffleEntry arch_shuffles[] = {
kSSES8x16TransposeHigh,
kAVXS8x16TransposeHigh,
true,
true},
{{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
kSSES8x8Reverse,
kAVXS8x8Reverse,
false,
false},
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
kSSES8x4Reverse,
kAVXS8x4Reverse,
false,
false},
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
kSSES8x2Reverse,
kAVXS8x2Reverse,
true,
true}};
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
......@@ -2224,7 +2239,7 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
arraysize(arch_shuffles), is_swizzle,
&arch_shuffle)) {
opcode = use_avx ? arch_shuffle->avx_opcode : arch_shuffle->opcode;
src0_needs_reg = arch_shuffle->src0_needs_reg;
src0_needs_reg = !use_avx || arch_shuffle->src0_needs_reg;
// SSE can't take advantage of both operands in registers and needs
// same-as-first.
src1_needs_reg = use_avx && arch_shuffle->src1_needs_reg;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment