Commit 342b5b7e authored by Kanghua Yu's avatar Kanghua Yu Committed by Commit Bot

[wasm][ia32] Add S8x16Shuffle

Change-Id: I9a78e0a8f673f311414f72055958c52d3c2cb0cd
Reviewed-on: https://chromium-review.googlesource.com/908256
Commit-Queue: Kanghua Yu <kanghua.yu@intel.com>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarAseem Garg <aseemgarg@chromium.org>
Cr-Commit-Position: refs/heads/master@{#52166}
parent a29de090
......@@ -2995,6 +2995,51 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vxorps(dst, dst, i.InputSimd128Register(2));
break;
}
case kIA32S8x16Shuffle: {
XMMRegister dst = i.OutputSimd128Register();
Register tmp = i.TempRegister(0);
// Prepare 16-byte boundary buffer for shuffle control mask
__ mov(tmp, esp);
__ movups(dst, i.InputOperand(0));
__ and_(esp, -16);
if (instr->InputCount() == 5) { // only one input operand
for (int j = 4; j > 0; j--) {
uint32_t mask = i.InputUint32(j);
__ push(Immediate(mask));
}
__ Pshufb(dst, Operand(esp, 0));
} else { // two input operands
DCHECK_EQ(6, instr->InputCount());
for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j);
uint32_t mask = 0;
for (int k = 0; k < 32; k += 8) {
uint8_t lane = lanes >> k;
mask |= (lane < kSimd128Size ? lane : 0x80) << k;
}
__ push(Immediate(mask));
}
__ Pshufb(dst, Operand(esp, 0));
__ movups(kScratchDoubleReg, i.InputOperand(1));
for (int j = 5; j > 1; j--) {
uint32_t lanes = i.InputUint32(j);
uint32_t mask = 0;
for (int k = 0; k < 32; k += 8) {
uint8_t lane = lanes >> k;
mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
}
__ push(Immediate(mask));
}
__ Pshufb(kScratchDoubleReg, Operand(esp, 0));
__ por(dst, kScratchDoubleReg);
}
__ mov(esp, tmp);
break;
}
case kIA32S32x4Swizzle: {
__ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
break;
}
case kIA32StackCheck: {
ExternalReference const stack_limit =
ExternalReference::address_of_stack_limit(__ isolate());
......
......@@ -280,7 +280,9 @@ namespace compiler {
V(SSES128Xor) \
V(AVXS128Xor) \
V(SSES128Select) \
V(AVXS128Select)
V(AVXS128Select) \
V(IA32S8x16Shuffle) \
V(IA32S32x4Swizzle)
// Addressing modes represent the "shape" of inputs to an instruction.
// Many instructions support multiple addressing modes. Addressing modes
......
......@@ -263,6 +263,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXS128Xor:
case kSSES128Select:
case kAVXS128Select:
case kIA32S8x16Shuffle:
case kIA32S32x4Swizzle:
return (instr->addressing_mode() == kMode_None)
? kNoOpcodeFlags
: kIsLoadOperation | kHasSideEffect;
......
......@@ -1967,6 +1967,43 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
UNREACHABLE();
}
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
static const int kMaxSwizzleIndex = 15;
static const int kMaxShuffleIndex = 31;
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op());
uint8_t mask = CanonicalizeShuffle(node);
uint8_t shuffle32x4[4];
IA32OperandGenerator g(this);
InstructionOperand output = g.DefineAsRegister(node);
InstructionOperand inputs[6];
InstructionOperand temps[1];
size_t input_count = 0;
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
if (mask == kMaxSwizzleIndex) {
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
Emit(kIA32S32x4Swizzle, output, g.Use(input0),
g.UseImmediate((shuffle32x4[0] & 3) | ((shuffle32x4[1] & 3) << 2) |
((shuffle32x4[2] & 3) << 4) |
((shuffle32x4[3] & 3) << 6)));
return;
}
// TODO(ia32): handle non 32x4 swizzles here
inputs[input_count++] = g.Use(input0);
} else {
DCHECK_EQ(kMaxShuffleIndex, mask);
USE(kMaxShuffleIndex);
inputs[input_count++] = g.Use(input0);
inputs[input_count++] = g.Use(input1);
}
inputs[input_count++] = g.UseImmediate(Pack4Lanes(shuffle, mask));
inputs[input_count++] = g.UseImmediate(Pack4Lanes(shuffle + 4, mask));
inputs[input_count++] = g.UseImmediate(Pack4Lanes(shuffle + 8, mask));
inputs[input_count++] = g.UseImmediate(Pack4Lanes(shuffle + 12, mask));
temps[0] = g.TempRegister();
Emit(kIA32S8x16Shuffle, 1, &output, input_count, inputs, 1, temps);
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -2421,13 +2421,13 @@ void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrU(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16Mul(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x4AnyTrue(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS1x4AllTrue(Node* node) { UNIMPLEMENTED(); }
......
......@@ -184,7 +184,8 @@ void SimdScalarLowering::LowerGraph() {
V(I8x16LtS) \
V(I8x16LeS) \
V(I8x16LtU) \
V(I8x16LeU)
V(I8x16LeU) \
V(S8x16Shuffle)
MachineType SimdScalarLowering::MachineTypeFrom(SimdType simdType) {
switch (simdType) {
......@@ -1172,6 +1173,19 @@ void SimdScalarLowering::LowerNode(Node* node) {
ReplaceNode(node, rep_node, num_lanes);
break;
}
case IrOpcode::kS8x16Shuffle: {
DCHECK_EQ(2, node->InputCount());
const uint8_t* shuffle = OpParameter<uint8_t*>(node->op());
Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type);
Node** rep_right = GetReplacementsWithType(node->InputAt(1), rep_type);
Node** rep_node = zone()->NewArray<Node*>(16);
for (int i = 0; i < 16; i++) {
int lane = shuffle[i];
rep_node[i] = lane < 16 ? rep_left[lane] : rep_right[lane - 16];
}
ReplaceNode(node, rep_node, 16);
break;
}
default: { DefaultLowering(node); }
}
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment