Commit 5d7039ea authored by bbudge's avatar bbudge Committed by Commit Bot

[WASM] Simplify SIMD shuffle opcodes.

- Eliminates S32x4Shuffle, S16x8Shuffle opcodes. All shuffles are subsumed
  by S8x16Shuffle. This aligns us with the latest WASM SIMD spec.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2923103003
Cr-Commit-Position: refs/heads/master@{#45929}
parent a4cf434f
......@@ -340,14 +340,6 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
UNREACHABLE();
}
int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) {
// If unary shuffle, table is src0 (2 d-registers).
if (src0.is(src1)) return 2;
// Binary shuffle, table is src0, src1. They must be consecutive
DCHECK_EQ(src0.code() + 1, src1.code());
return 4; // 4 d-registers.
}
} // namespace
#define ASSEMBLE_CHECKED_LOAD_FP(Type) \
......@@ -2291,39 +2283,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
break;
}
case kArmS16x8Shuffle: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
DwVfpRegister table_base = src0.low();
int table_size = GetVtblTableSize(src0, src1);
// Convert the shuffle lane masks to byte masks in kScratchQuadReg.
int scratch_s_base = kScratchQuadReg.code() * 4;
for (int j = 0; j < 2; j++) {
int32_t four_lanes = i.InputInt32(2 + j);
for (int k = 0; k < 2; k++) {
uint8_t w0 = (four_lanes & 0xF) * kShortSize;
four_lanes >>= 8;
uint8_t w1 = (four_lanes & 0xF) * kShortSize;
four_lanes >>= 8;
int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24);
// Ensure byte indices are in [0, 31] so masks are never NaNs.
four_lanes &= 0x1F1F1F1F;
__ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k),
bit_cast<float>(mask));
}
}
NeonListOperand table(table_base, table_size);
if (!dst.is(src0) && !dst.is(src1)) {
__ vtbl(dst.low(), table, kScratchQuadReg.low());
__ vtbl(dst.high(), table, kScratchQuadReg.high());
} else {
__ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low());
__ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high());
__ vmov(dst, kScratchQuadReg);
}
break;
}
case kArmS8x16ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
......@@ -2388,7 +2347,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
DwVfpRegister table_base = src0.low();
int table_size = GetVtblTableSize(src0, src1);
// If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
// src1. They must be consecutive.
int table_size = src0.is(src1) ? 2 : 4;
DCHECK_IMPLIES(!src0.is(src1), src0.code() + 1 == src1.code());
// The shuffle lane mask is a byte mask, materialize in kScratchQuadReg.
int scratch_s_base = kScratchQuadReg.code() * 4;
for (int j = 0; j < 4; j++) {
......
......@@ -246,7 +246,6 @@ namespace compiler {
V(ArmS16x8UnzipRight) \
V(ArmS16x8TransposeLeft) \
V(ArmS16x8TransposeRight) \
V(ArmS16x8Shuffle) \
V(ArmS8x16ZipLeft) \
V(ArmS8x16ZipRight) \
V(ArmS8x16UnzipLeft) \
......
......@@ -230,7 +230,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmS16x8UnzipRight:
case kArmS16x8TransposeLeft:
case kArmS16x8TransposeRight:
case kArmS16x8Shuffle:
case kArmS8x16ZipLeft:
case kArmS8x16ZipRight:
case kArmS8x16UnzipLeft:
......
......@@ -1711,10 +1711,6 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitS128Not(node);
case IrOpcode::kS128Select:
return MarkAsSimd128(node), VisitS128Select(node);
case IrOpcode::kS32x4Shuffle:
return MarkAsSimd128(node), VisitS32x4Shuffle(node);
case IrOpcode::kS16x8Shuffle:
return MarkAsSimd128(node), VisitS16x8Shuffle(node);
case IrOpcode::kS8x16Shuffle:
return MarkAsSimd128(node), VisitS8x16Shuffle(node);
case IrOpcode::kS1x4AnyTrue:
......@@ -2378,22 +2374,11 @@ void InstructionSelector::VisitS128Not(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS128Zero(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS128Select(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS32x4Shuffle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x8Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
......
......@@ -981,22 +981,6 @@ SIMD_LANE_OP_LIST(SIMD_LANE_OPS)
SIMD_FORMAT_LIST(SIMD_SHIFT_OPS)
#undef SIMD_SHIFT_OPS
const Operator* MachineOperatorBuilder::S32x4Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(4);
memcpy(array, shuffle, 4);
return new (zone_)
Operator1<uint8_t*>(IrOpcode::kS32x4Shuffle, Operator::kPure, "Shuffle",
2, 0, 0, 1, 0, 0, array);
}
const Operator* MachineOperatorBuilder::S16x8Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(8);
memcpy(array, shuffle, 8);
return new (zone_)
Operator1<uint8_t*>(IrOpcode::kS16x8Shuffle, Operator::kPure, "Shuffle",
2, 0, 0, 1, 0, 0, array);
}
const Operator* MachineOperatorBuilder::S8x16Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(16);
memcpy(array, shuffle, 16);
......
......@@ -577,8 +577,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* S128Not();
const Operator* S128Select();
const Operator* S32x4Shuffle(uint8_t shuffle[16]);
const Operator* S16x8Shuffle(uint8_t shuffle[16]);
const Operator* S8x16Shuffle(uint8_t shuffle[16]);
const Operator* S1x4AnyTrue();
......
......@@ -701,8 +701,6 @@
V(S128Or) \
V(S128Xor) \
V(S128Select) \
V(S32x4Shuffle) \
V(S16x8Shuffle) \
V(S8x16Shuffle) \
V(S1x4AnyTrue) \
V(S1x4AllTrue) \
......
......@@ -3584,22 +3584,11 @@ Node* WasmGraphBuilder::SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
}
}
Node* WasmGraphBuilder::SimdShuffleOp(uint8_t shuffle[16], unsigned lanes,
const NodeVector& inputs) {
Node* WasmGraphBuilder::Simd8x16ShuffleOp(uint8_t shuffle[16],
const NodeVector& inputs) {
has_simd_ = true;
switch (lanes) {
case 4:
return graph()->NewNode(jsgraph()->machine()->S32x4Shuffle(shuffle),
inputs[0], inputs[1]);
case 8:
return graph()->NewNode(jsgraph()->machine()->S16x8Shuffle(shuffle),
inputs[0], inputs[1]);
case 16:
return graph()->NewNode(jsgraph()->machine()->S8x16Shuffle(shuffle),
inputs[0], inputs[1]);
default:
UNREACHABLE();
}
return graph()->NewNode(jsgraph()->machine()->S8x16Shuffle(shuffle),
inputs[0], inputs[1]);
}
static void RecordFunctionCompilation(CodeEventListener::LogEventsAndTags tag,
......
......@@ -254,8 +254,7 @@ class WasmGraphBuilder {
Node* SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
const NodeVector& inputs);
Node* SimdShuffleOp(uint8_t shuffle[16], unsigned lanes,
const NodeVector& inputs);
Node* Simd8x16ShuffleOp(uint8_t shuffle[16], const NodeVector& inputs);
bool has_simd() const { return has_simd_; }
......
......@@ -313,15 +313,13 @@ struct SimdShiftOperand {
}
};
// Operand for SIMD shuffle operations.
// Operand for SIMD S8x16 shuffle operations.
template <bool checked>
struct SimdShuffleOperand {
uint8_t shuffle[16];
unsigned lanes;
struct Simd8x16ShuffleOperand {
uint8_t shuffle[kSimd128Size];
inline SimdShuffleOperand(Decoder* decoder, const byte* pc, unsigned lanes_) {
lanes = lanes_;
for (unsigned i = 0; i < lanes; i++) {
inline Simd8x16ShuffleOperand(Decoder* decoder, const byte* pc) {
for (uint32_t i = 0; i < kSimd128Size; ++i) {
shuffle[i] = decoder->read_u8<checked>(pc + 2 + i, "shuffle");
}
}
......
......@@ -146,21 +146,6 @@ struct Control {
}
};
namespace {
inline unsigned GetShuffleMaskSize(WasmOpcode opcode) {
switch (opcode) {
case kExprS32x4Shuffle:
return 4;
case kExprS16x8Shuffle:
return 8;
case kExprS8x16Shuffle:
return 16;
default:
UNREACHABLE();
}
}
} // namespace
// Macros that build nodes only if there is a graph and the current SSA
// environment is reachable from start. This avoids problems with malformed
// TF graphs when decoding inputs that have unreachable code.
......@@ -421,13 +406,12 @@ class WasmDecoder : public Decoder {
}
}
inline bool Validate(const byte* pc, WasmOpcode opcode,
SimdShuffleOperand<true>& operand) {
unsigned lanes = GetShuffleMaskSize(opcode);
inline bool Validate(const byte* pc, Simd8x16ShuffleOperand<true>& operand) {
uint8_t max_lane = 0;
for (unsigned i = 0; i < lanes; i++)
for (uint32_t i = 0; i < kSimd128Size; ++i)
max_lane = std::max(max_lane, operand.shuffle[i]);
if (operand.lanes != lanes || max_lane > 2 * lanes) {
// Shuffle indices must be in [0..31] for a 16 lane shuffle.
if (max_lane > 2 * kSimd128Size) {
error(pc_ + 2, "invalid shuffle mask");
return false;
} else {
......@@ -520,11 +504,9 @@ class WasmDecoder : public Decoder {
{
return 3;
}
// Shuffles contain a byte array to determine the shuffle.
case kExprS32x4Shuffle:
case kExprS16x8Shuffle:
// Shuffles require a byte per lane, or 16 immediate bytes.
case kExprS8x16Shuffle:
return 2 + GetShuffleMaskSize(opcode);
return 2 + kSimd128Size;
default:
decoder->error(pc, "invalid SIMD opcode");
return 2;
......@@ -1558,17 +1540,16 @@ class WasmFullDecoder : public WasmDecoder {
return operand.length;
}
unsigned SimdShuffleOp(WasmOpcode opcode) {
SimdShuffleOperand<true> operand(this, pc_, GetShuffleMaskSize(opcode));
if (Validate(pc_, opcode, operand)) {
unsigned Simd8x16ShuffleOp() {
Simd8x16ShuffleOperand<true> operand(this, pc_);
if (Validate(pc_, operand)) {
compiler::NodeVector inputs(2, zone_);
inputs[1] = Pop(1, ValueType::kSimd128).node;
inputs[0] = Pop(0, ValueType::kSimd128).node;
TFNode* node =
BUILD(SimdShuffleOp, operand.shuffle, operand.lanes, inputs);
TFNode* node = BUILD(Simd8x16ShuffleOp, operand.shuffle, inputs);
Push(ValueType::kSimd128, node);
}
return operand.lanes;
return 16;
}
unsigned DecodeSimdOpcode(WasmOpcode opcode) {
......@@ -1606,10 +1587,8 @@ class WasmFullDecoder : public WasmDecoder {
len = SimdShiftOp(opcode);
break;
}
case kExprS32x4Shuffle:
case kExprS16x8Shuffle:
case kExprS8x16Shuffle: {
len = SimdShuffleOp(opcode);
len = Simd8x16ShuffleOp();
break;
}
default: {
......
......@@ -219,8 +219,6 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S128_OP(Xor, "xor")
CASE_S128_OP(Not, "not")
CASE_S128_OP(Select, "select")
CASE_S32x4_OP(Shuffle, "shuffle")
CASE_S16x8_OP(Shuffle, "shuffle")
CASE_S8x16_OP(Shuffle, "shuffle")
CASE_S1x4_OP(AnyTrue, "any_true")
CASE_S1x4_OP(AllTrue, "all_true")
......
......@@ -409,8 +409,6 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(I8x16ShrU, 0xe571, _)
#define FOREACH_SIMD_MASK_OPERAND_OPCODE(V) \
V(S32x4Shuffle, 0xe52d, s_ss) \
V(S16x8Shuffle, 0xe54c, s_ss) \
V(S8x16Shuffle, 0xe56b, s_ss)
#define FOREACH_ATOMIC_OPCODE(V) \
......
This diff is collapsed.
......@@ -2625,8 +2625,6 @@ TEST_F(WasmOpcodeLengthTest, SimdExpressions) {
EXPECT_LENGTH_N(3, kSimdPrefix, static_cast<byte>(kExpr##name & 0xff));
FOREACH_SIMD_1_OPERAND_OPCODE(TEST_SIMD)
#undef TEST_SIMD
EXPECT_LENGTH_N(6, kSimdPrefix, static_cast<byte>(kExprS32x4Shuffle & 0xff));
EXPECT_LENGTH_N(10, kSimdPrefix, static_cast<byte>(kExprS16x8Shuffle & 0xff));
EXPECT_LENGTH_N(18, kSimdPrefix, static_cast<byte>(kExprS8x16Shuffle & 0xff));
#undef TEST_SIMD
// test for bad simd opcode
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment