Commit 5d7039ea authored by bbudge's avatar bbudge Committed by Commit Bot

[WASM] Simplify SIMD shuffle opcodes.

- Eliminates S32x4Shuffle, S16x8Shuffle opcodes. All shuffles are subsumed
  by S8x16Shuffle. This aligns us with the latest WASM SIMD spec.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2923103003
Cr-Commit-Position: refs/heads/master@{#45929}
parent a4cf434f
...@@ -340,14 +340,6 @@ Condition FlagsConditionToCondition(FlagsCondition condition) { ...@@ -340,14 +340,6 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
UNREACHABLE(); UNREACHABLE();
} }
int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) {
// If unary shuffle, table is src0 (2 d-registers).
if (src0.is(src1)) return 2;
// Binary shuffle, table is src0, src1. They must be consecutive
DCHECK_EQ(src0.code() + 1, src1.code());
return 4; // 4 d-registers.
}
} // namespace } // namespace
#define ASSEMBLE_CHECKED_LOAD_FP(Type) \ #define ASSEMBLE_CHECKED_LOAD_FP(Type) \
...@@ -2291,39 +2283,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2291,39 +2283,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
break; break;
} }
case kArmS16x8Shuffle: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
DwVfpRegister table_base = src0.low();
int table_size = GetVtblTableSize(src0, src1);
// Convert the shuffle lane masks to byte masks in kScratchQuadReg.
int scratch_s_base = kScratchQuadReg.code() * 4;
for (int j = 0; j < 2; j++) {
int32_t four_lanes = i.InputInt32(2 + j);
for (int k = 0; k < 2; k++) {
uint8_t w0 = (four_lanes & 0xF) * kShortSize;
four_lanes >>= 8;
uint8_t w1 = (four_lanes & 0xF) * kShortSize;
four_lanes >>= 8;
int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24);
// Ensure byte indices are in [0, 31] so masks are never NaNs.
four_lanes &= 0x1F1F1F1F;
__ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k),
bit_cast<float>(mask));
}
}
NeonListOperand table(table_base, table_size);
if (!dst.is(src0) && !dst.is(src1)) {
__ vtbl(dst.low(), table, kScratchQuadReg.low());
__ vtbl(dst.high(), table, kScratchQuadReg.high());
} else {
__ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low());
__ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high());
__ vmov(dst, kScratchQuadReg);
}
break;
}
case kArmS8x16ZipLeft: { case kArmS8x16ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(), Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1); src1 = i.InputSimd128Register(1);
...@@ -2388,7 +2347,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2388,7 +2347,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
src0 = i.InputSimd128Register(0), src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1); src1 = i.InputSimd128Register(1);
DwVfpRegister table_base = src0.low(); DwVfpRegister table_base = src0.low();
int table_size = GetVtblTableSize(src0, src1); // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
// src1. They must be consecutive.
int table_size = src0.is(src1) ? 2 : 4;
DCHECK_IMPLIES(!src0.is(src1), src0.code() + 1 == src1.code());
// The shuffle lane mask is a byte mask, materialize in kScratchQuadReg. // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg.
int scratch_s_base = kScratchQuadReg.code() * 4; int scratch_s_base = kScratchQuadReg.code() * 4;
for (int j = 0; j < 4; j++) { for (int j = 0; j < 4; j++) {
......
...@@ -246,7 +246,6 @@ namespace compiler { ...@@ -246,7 +246,6 @@ namespace compiler {
V(ArmS16x8UnzipRight) \ V(ArmS16x8UnzipRight) \
V(ArmS16x8TransposeLeft) \ V(ArmS16x8TransposeLeft) \
V(ArmS16x8TransposeRight) \ V(ArmS16x8TransposeRight) \
V(ArmS16x8Shuffle) \
V(ArmS8x16ZipLeft) \ V(ArmS8x16ZipLeft) \
V(ArmS8x16ZipRight) \ V(ArmS8x16ZipRight) \
V(ArmS8x16UnzipLeft) \ V(ArmS8x16UnzipLeft) \
......
...@@ -230,7 +230,6 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -230,7 +230,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmS16x8UnzipRight: case kArmS16x8UnzipRight:
case kArmS16x8TransposeLeft: case kArmS16x8TransposeLeft:
case kArmS16x8TransposeRight: case kArmS16x8TransposeRight:
case kArmS16x8Shuffle:
case kArmS8x16ZipLeft: case kArmS8x16ZipLeft:
case kArmS8x16ZipRight: case kArmS8x16ZipRight:
case kArmS8x16UnzipLeft: case kArmS8x16UnzipLeft:
......
...@@ -1711,10 +1711,6 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -1711,10 +1711,6 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitS128Not(node); return MarkAsSimd128(node), VisitS128Not(node);
case IrOpcode::kS128Select: case IrOpcode::kS128Select:
return MarkAsSimd128(node), VisitS128Select(node); return MarkAsSimd128(node), VisitS128Select(node);
case IrOpcode::kS32x4Shuffle:
return MarkAsSimd128(node), VisitS32x4Shuffle(node);
case IrOpcode::kS16x8Shuffle:
return MarkAsSimd128(node), VisitS16x8Shuffle(node);
case IrOpcode::kS8x16Shuffle: case IrOpcode::kS8x16Shuffle:
return MarkAsSimd128(node), VisitS8x16Shuffle(node); return MarkAsSimd128(node), VisitS8x16Shuffle(node);
case IrOpcode::kS1x4AnyTrue: case IrOpcode::kS1x4AnyTrue:
...@@ -2378,22 +2374,11 @@ void InstructionSelector::VisitS128Not(Node* node) { UNIMPLEMENTED(); } ...@@ -2378,22 +2374,11 @@ void InstructionSelector::VisitS128Not(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \ #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 !V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS128Zero(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS128Zero(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS128Select(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS128Select(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64 // !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS32x4Shuffle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x8Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM #if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM #endif // !V8_TARGET_ARCH_ARM
......
...@@ -981,22 +981,6 @@ SIMD_LANE_OP_LIST(SIMD_LANE_OPS) ...@@ -981,22 +981,6 @@ SIMD_LANE_OP_LIST(SIMD_LANE_OPS)
SIMD_FORMAT_LIST(SIMD_SHIFT_OPS) SIMD_FORMAT_LIST(SIMD_SHIFT_OPS)
#undef SIMD_SHIFT_OPS #undef SIMD_SHIFT_OPS
const Operator* MachineOperatorBuilder::S32x4Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(4);
memcpy(array, shuffle, 4);
return new (zone_)
Operator1<uint8_t*>(IrOpcode::kS32x4Shuffle, Operator::kPure, "Shuffle",
2, 0, 0, 1, 0, 0, array);
}
const Operator* MachineOperatorBuilder::S16x8Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(8);
memcpy(array, shuffle, 8);
return new (zone_)
Operator1<uint8_t*>(IrOpcode::kS16x8Shuffle, Operator::kPure, "Shuffle",
2, 0, 0, 1, 0, 0, array);
}
const Operator* MachineOperatorBuilder::S8x16Shuffle(uint8_t shuffle[16]) { const Operator* MachineOperatorBuilder::S8x16Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(16); uint8_t* array = zone_->NewArray<uint8_t>(16);
memcpy(array, shuffle, 16); memcpy(array, shuffle, 16);
......
...@@ -577,8 +577,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -577,8 +577,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* S128Not(); const Operator* S128Not();
const Operator* S128Select(); const Operator* S128Select();
const Operator* S32x4Shuffle(uint8_t shuffle[16]);
const Operator* S16x8Shuffle(uint8_t shuffle[16]);
const Operator* S8x16Shuffle(uint8_t shuffle[16]); const Operator* S8x16Shuffle(uint8_t shuffle[16]);
const Operator* S1x4AnyTrue(); const Operator* S1x4AnyTrue();
......
...@@ -701,8 +701,6 @@ ...@@ -701,8 +701,6 @@
V(S128Or) \ V(S128Or) \
V(S128Xor) \ V(S128Xor) \
V(S128Select) \ V(S128Select) \
V(S32x4Shuffle) \
V(S16x8Shuffle) \
V(S8x16Shuffle) \ V(S8x16Shuffle) \
V(S1x4AnyTrue) \ V(S1x4AnyTrue) \
V(S1x4AllTrue) \ V(S1x4AllTrue) \
......
...@@ -3584,22 +3584,11 @@ Node* WasmGraphBuilder::SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift, ...@@ -3584,22 +3584,11 @@ Node* WasmGraphBuilder::SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
} }
} }
Node* WasmGraphBuilder::SimdShuffleOp(uint8_t shuffle[16], unsigned lanes, Node* WasmGraphBuilder::Simd8x16ShuffleOp(uint8_t shuffle[16],
const NodeVector& inputs) { const NodeVector& inputs) {
has_simd_ = true; has_simd_ = true;
switch (lanes) { return graph()->NewNode(jsgraph()->machine()->S8x16Shuffle(shuffle),
case 4: inputs[0], inputs[1]);
return graph()->NewNode(jsgraph()->machine()->S32x4Shuffle(shuffle),
inputs[0], inputs[1]);
case 8:
return graph()->NewNode(jsgraph()->machine()->S16x8Shuffle(shuffle),
inputs[0], inputs[1]);
case 16:
return graph()->NewNode(jsgraph()->machine()->S8x16Shuffle(shuffle),
inputs[0], inputs[1]);
default:
UNREACHABLE();
}
} }
static void RecordFunctionCompilation(CodeEventListener::LogEventsAndTags tag, static void RecordFunctionCompilation(CodeEventListener::LogEventsAndTags tag,
......
...@@ -254,8 +254,7 @@ class WasmGraphBuilder { ...@@ -254,8 +254,7 @@ class WasmGraphBuilder {
Node* SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift, Node* SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
const NodeVector& inputs); const NodeVector& inputs);
Node* SimdShuffleOp(uint8_t shuffle[16], unsigned lanes, Node* Simd8x16ShuffleOp(uint8_t shuffle[16], const NodeVector& inputs);
const NodeVector& inputs);
bool has_simd() const { return has_simd_; } bool has_simd() const { return has_simd_; }
......
...@@ -313,15 +313,13 @@ struct SimdShiftOperand { ...@@ -313,15 +313,13 @@ struct SimdShiftOperand {
} }
}; };
// Operand for SIMD shuffle operations. // Operand for SIMD S8x16 shuffle operations.
template <bool checked> template <bool checked>
struct SimdShuffleOperand { struct Simd8x16ShuffleOperand {
uint8_t shuffle[16]; uint8_t shuffle[kSimd128Size];
unsigned lanes;
inline SimdShuffleOperand(Decoder* decoder, const byte* pc, unsigned lanes_) { inline Simd8x16ShuffleOperand(Decoder* decoder, const byte* pc) {
lanes = lanes_; for (uint32_t i = 0; i < kSimd128Size; ++i) {
for (unsigned i = 0; i < lanes; i++) {
shuffle[i] = decoder->read_u8<checked>(pc + 2 + i, "shuffle"); shuffle[i] = decoder->read_u8<checked>(pc + 2 + i, "shuffle");
} }
} }
......
...@@ -146,21 +146,6 @@ struct Control { ...@@ -146,21 +146,6 @@ struct Control {
} }
}; };
namespace {
inline unsigned GetShuffleMaskSize(WasmOpcode opcode) {
switch (opcode) {
case kExprS32x4Shuffle:
return 4;
case kExprS16x8Shuffle:
return 8;
case kExprS8x16Shuffle:
return 16;
default:
UNREACHABLE();
}
}
} // namespace
// Macros that build nodes only if there is a graph and the current SSA // Macros that build nodes only if there is a graph and the current SSA
// environment is reachable from start. This avoids problems with malformed // environment is reachable from start. This avoids problems with malformed
// TF graphs when decoding inputs that have unreachable code. // TF graphs when decoding inputs that have unreachable code.
...@@ -421,13 +406,12 @@ class WasmDecoder : public Decoder { ...@@ -421,13 +406,12 @@ class WasmDecoder : public Decoder {
} }
} }
inline bool Validate(const byte* pc, WasmOpcode opcode, inline bool Validate(const byte* pc, Simd8x16ShuffleOperand<true>& operand) {
SimdShuffleOperand<true>& operand) {
unsigned lanes = GetShuffleMaskSize(opcode);
uint8_t max_lane = 0; uint8_t max_lane = 0;
for (unsigned i = 0; i < lanes; i++) for (uint32_t i = 0; i < kSimd128Size; ++i)
max_lane = std::max(max_lane, operand.shuffle[i]); max_lane = std::max(max_lane, operand.shuffle[i]);
if (operand.lanes != lanes || max_lane > 2 * lanes) { // Shuffle indices must be in [0..31] for a 16 lane shuffle.
if (max_lane > 2 * kSimd128Size) {
error(pc_ + 2, "invalid shuffle mask"); error(pc_ + 2, "invalid shuffle mask");
return false; return false;
} else { } else {
...@@ -520,11 +504,9 @@ class WasmDecoder : public Decoder { ...@@ -520,11 +504,9 @@ class WasmDecoder : public Decoder {
{ {
return 3; return 3;
} }
// Shuffles contain a byte array to determine the shuffle. // Shuffles require a byte per lane, or 16 immediate bytes.
case kExprS32x4Shuffle:
case kExprS16x8Shuffle:
case kExprS8x16Shuffle: case kExprS8x16Shuffle:
return 2 + GetShuffleMaskSize(opcode); return 2 + kSimd128Size;
default: default:
decoder->error(pc, "invalid SIMD opcode"); decoder->error(pc, "invalid SIMD opcode");
return 2; return 2;
...@@ -1558,17 +1540,16 @@ class WasmFullDecoder : public WasmDecoder { ...@@ -1558,17 +1540,16 @@ class WasmFullDecoder : public WasmDecoder {
return operand.length; return operand.length;
} }
unsigned SimdShuffleOp(WasmOpcode opcode) { unsigned Simd8x16ShuffleOp() {
SimdShuffleOperand<true> operand(this, pc_, GetShuffleMaskSize(opcode)); Simd8x16ShuffleOperand<true> operand(this, pc_);
if (Validate(pc_, opcode, operand)) { if (Validate(pc_, operand)) {
compiler::NodeVector inputs(2, zone_); compiler::NodeVector inputs(2, zone_);
inputs[1] = Pop(1, ValueType::kSimd128).node; inputs[1] = Pop(1, ValueType::kSimd128).node;
inputs[0] = Pop(0, ValueType::kSimd128).node; inputs[0] = Pop(0, ValueType::kSimd128).node;
TFNode* node = TFNode* node = BUILD(Simd8x16ShuffleOp, operand.shuffle, inputs);
BUILD(SimdShuffleOp, operand.shuffle, operand.lanes, inputs);
Push(ValueType::kSimd128, node); Push(ValueType::kSimd128, node);
} }
return operand.lanes; return 16;
} }
unsigned DecodeSimdOpcode(WasmOpcode opcode) { unsigned DecodeSimdOpcode(WasmOpcode opcode) {
...@@ -1606,10 +1587,8 @@ class WasmFullDecoder : public WasmDecoder { ...@@ -1606,10 +1587,8 @@ class WasmFullDecoder : public WasmDecoder {
len = SimdShiftOp(opcode); len = SimdShiftOp(opcode);
break; break;
} }
case kExprS32x4Shuffle:
case kExprS16x8Shuffle:
case kExprS8x16Shuffle: { case kExprS8x16Shuffle: {
len = SimdShuffleOp(opcode); len = Simd8x16ShuffleOp();
break; break;
} }
default: { default: {
......
...@@ -219,8 +219,6 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { ...@@ -219,8 +219,6 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S128_OP(Xor, "xor") CASE_S128_OP(Xor, "xor")
CASE_S128_OP(Not, "not") CASE_S128_OP(Not, "not")
CASE_S128_OP(Select, "select") CASE_S128_OP(Select, "select")
CASE_S32x4_OP(Shuffle, "shuffle")
CASE_S16x8_OP(Shuffle, "shuffle")
CASE_S8x16_OP(Shuffle, "shuffle") CASE_S8x16_OP(Shuffle, "shuffle")
CASE_S1x4_OP(AnyTrue, "any_true") CASE_S1x4_OP(AnyTrue, "any_true")
CASE_S1x4_OP(AllTrue, "all_true") CASE_S1x4_OP(AllTrue, "all_true")
......
...@@ -409,8 +409,6 @@ constexpr WasmCodePosition kNoCodePosition = -1; ...@@ -409,8 +409,6 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(I8x16ShrU, 0xe571, _) V(I8x16ShrU, 0xe571, _)
#define FOREACH_SIMD_MASK_OPERAND_OPCODE(V) \ #define FOREACH_SIMD_MASK_OPERAND_OPCODE(V) \
V(S32x4Shuffle, 0xe52d, s_ss) \
V(S16x8Shuffle, 0xe54c, s_ss) \
V(S8x16Shuffle, 0xe56b, s_ss) V(S8x16Shuffle, 0xe56b, s_ss)
#define FOREACH_ATOMIC_OPCODE(V) \ #define FOREACH_ATOMIC_OPCODE(V) \
......
This diff is collapsed.
...@@ -2625,8 +2625,6 @@ TEST_F(WasmOpcodeLengthTest, SimdExpressions) { ...@@ -2625,8 +2625,6 @@ TEST_F(WasmOpcodeLengthTest, SimdExpressions) {
EXPECT_LENGTH_N(3, kSimdPrefix, static_cast<byte>(kExpr##name & 0xff)); EXPECT_LENGTH_N(3, kSimdPrefix, static_cast<byte>(kExpr##name & 0xff));
FOREACH_SIMD_1_OPERAND_OPCODE(TEST_SIMD) FOREACH_SIMD_1_OPERAND_OPCODE(TEST_SIMD)
#undef TEST_SIMD #undef TEST_SIMD
EXPECT_LENGTH_N(6, kSimdPrefix, static_cast<byte>(kExprS32x4Shuffle & 0xff));
EXPECT_LENGTH_N(10, kSimdPrefix, static_cast<byte>(kExprS16x8Shuffle & 0xff));
EXPECT_LENGTH_N(18, kSimdPrefix, static_cast<byte>(kExprS8x16Shuffle & 0xff)); EXPECT_LENGTH_N(18, kSimdPrefix, static_cast<byte>(kExprS8x16Shuffle & 0xff));
#undef TEST_SIMD #undef TEST_SIMD
// test for bad simd opcode // test for bad simd opcode
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment