Commit 5d7039ea authored by bbudge's avatar bbudge Committed by Commit Bot

[WASM] Simplify SIMD shuffle opcodes.

- Eliminates S32x4Shuffle, S16x8Shuffle opcodes. All shuffles are subsumed
  by S8x16Shuffle. This aligns us with the latest WASM SIMD spec.

LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2923103003
Cr-Commit-Position: refs/heads/master@{#45929}
parent a4cf434f
......@@ -340,14 +340,6 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
UNREACHABLE();
}
int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) {
// If unary shuffle, table is src0 (2 d-registers).
if (src0.is(src1)) return 2;
// Binary shuffle, table is src0, src1. They must be consecutive
DCHECK_EQ(src0.code() + 1, src1.code());
return 4; // 4 d-registers.
}
} // namespace
#define ASSEMBLE_CHECKED_LOAD_FP(Type) \
......@@ -2291,39 +2283,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
break;
}
case kArmS16x8Shuffle: {
Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
DwVfpRegister table_base = src0.low();
int table_size = GetVtblTableSize(src0, src1);
// Convert the shuffle lane masks to byte masks in kScratchQuadReg.
int scratch_s_base = kScratchQuadReg.code() * 4;
for (int j = 0; j < 2; j++) {
int32_t four_lanes = i.InputInt32(2 + j);
for (int k = 0; k < 2; k++) {
uint8_t w0 = (four_lanes & 0xF) * kShortSize;
four_lanes >>= 8;
uint8_t w1 = (four_lanes & 0xF) * kShortSize;
four_lanes >>= 8;
int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24);
// Ensure byte indices are in [0, 31] so masks are never NaNs.
four_lanes &= 0x1F1F1F1F;
__ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k),
bit_cast<float>(mask));
}
}
NeonListOperand table(table_base, table_size);
if (!dst.is(src0) && !dst.is(src1)) {
__ vtbl(dst.low(), table, kScratchQuadReg.low());
__ vtbl(dst.high(), table, kScratchQuadReg.high());
} else {
__ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low());
__ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high());
__ vmov(dst, kScratchQuadReg);
}
break;
}
case kArmS8x16ZipLeft: {
Simd128Register dst = i.OutputSimd128Register(),
src1 = i.InputSimd128Register(1);
......@@ -2388,7 +2347,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1);
DwVfpRegister table_base = src0.low();
int table_size = GetVtblTableSize(src0, src1);
// If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
// src1. They must be consecutive.
int table_size = src0.is(src1) ? 2 : 4;
DCHECK_IMPLIES(!src0.is(src1), src0.code() + 1 == src1.code());
// The shuffle lane mask is a byte mask, materialize in kScratchQuadReg.
int scratch_s_base = kScratchQuadReg.code() * 4;
for (int j = 0; j < 4; j++) {
......
......@@ -246,7 +246,6 @@ namespace compiler {
V(ArmS16x8UnzipRight) \
V(ArmS16x8TransposeLeft) \
V(ArmS16x8TransposeRight) \
V(ArmS16x8Shuffle) \
V(ArmS8x16ZipLeft) \
V(ArmS8x16ZipRight) \
V(ArmS8x16UnzipLeft) \
......
......@@ -230,7 +230,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmS16x8UnzipRight:
case kArmS16x8TransposeLeft:
case kArmS16x8TransposeRight:
case kArmS16x8Shuffle:
case kArmS8x16ZipLeft:
case kArmS8x16ZipRight:
case kArmS8x16UnzipLeft:
......
......@@ -2535,32 +2535,71 @@ void InstructionSelector::VisitS128Select(Node* node) {
}
namespace {
template <int LANES>
// Tries to match 8x16 byte shuffle to equivalent 32x4 word shuffle.
bool TryMatch32x4Shuffle(const uint8_t* shuffle, uint8_t* shuffle32x4) {
static const int kLanes = 4;
static const int kLaneSize = 4;
for (int i = 0; i < kLanes; ++i) {
if (shuffle[i * kLaneSize] % kLaneSize != 0) return false;
for (int j = 1; j < kLaneSize; ++j) {
if (shuffle[i * kLaneSize + j] - shuffle[i * kLaneSize + j - 1] != 1)
return false;
}
shuffle32x4[i] = shuffle[i * kLaneSize] / kLaneSize;
}
return true;
}
// Tries to match byte shuffle to concatenate (vext) operation.
bool TryMatchConcat(const uint8_t* shuffle, uint8_t mask, uint8_t* offset) {
uint8_t start = shuffle[0];
for (int i = 1; i < kSimd128Size - start; ++i) {
if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return false;
}
uint8_t wrap = kSimd128Size;
for (int i = kSimd128Size - start; i < kSimd128Size; ++i, ++wrap) {
if ((shuffle[i] & mask) != (wrap & mask)) return false;
}
*offset = start;
return true;
}
struct ShuffleEntry {
uint8_t shuffle[LANES];
uint8_t shuffle[kSimd128Size];
ArchOpcode opcode;
};
static const ShuffleEntry<4> arch_s32x4_shuffles[] = {
{{0, 4, 1, 5}, kArmS32x4ZipLeft},
{{2, 6, 3, 7}, kArmS32x4ZipRight},
{{0, 2, 4, 6}, kArmS32x4UnzipLeft},
{{1, 3, 5, 7}, kArmS32x4UnzipRight},
{{0, 4, 2, 6}, kArmS32x4TransposeLeft},
{{1, 5, 3, 7}, kArmS32x4TransposeRight},
{{1, 0, 3, 2}, kArmS32x2Reverse}};
static const ShuffleEntry<8> arch_s16x8_shuffles[] = {
{{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft},
{{4, 12, 5, 13, 6, 14, 7, 15}, kArmS16x8ZipRight},
{{0, 2, 4, 6, 8, 10, 12, 14}, kArmS16x8UnzipLeft},
{{1, 3, 5, 7, 9, 11, 13, 15}, kArmS16x8UnzipRight},
{{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft},
{{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight},
{{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse},
{{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}};
static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
static const ShuffleEntry arch_shuffles[] = {
{{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
kArmS32x4ZipLeft},
{{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
kArmS32x4ZipRight},
{{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27},
kArmS32x4UnzipLeft},
{{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31},
kArmS32x4UnzipRight},
{{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27},
kArmS32x4TransposeLeft},
{{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31},
kArmS32x4TransposeRight},
{{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, kArmS32x2Reverse},
{{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
kArmS16x8ZipLeft},
{{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
kArmS16x8ZipRight},
{{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
kArmS16x8UnzipLeft},
{{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
kArmS16x8UnzipRight},
{{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29},
kArmS16x8TransposeLeft},
{{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31},
kArmS16x8TransposeRight},
{{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}, kArmS16x4Reverse},
{{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, kArmS16x2Reverse},
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
kArmS8x16ZipLeft},
{{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
......@@ -2577,45 +2616,28 @@ static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse},
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}};
// Use a non-shuffle opcode to signal no match.
static const ArchOpcode kNoShuffle = kArmS128Not;
template <int LANES>
ArchOpcode TryMatchArchShuffle(const uint8_t* shuffle,
const ShuffleEntry<LANES>* table,
size_t num_entries, uint8_t mask) {
for (size_t i = 0; i < num_entries; i++) {
const ShuffleEntry<LANES>& entry = table[i];
bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
size_t num_entries, uint8_t mask, ArchOpcode* opcode) {
for (size_t i = 0; i < num_entries; ++i) {
const ShuffleEntry& entry = table[i];
int j = 0;
for (; j < LANES; j++) {
for (; j < kSimd128Size; ++j) {
if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
break;
}
}
if (j == LANES) return entry.opcode;
}
return kNoShuffle;
}
// Returns the bias if shuffle is a concatenation, 0 otherwise.
template <int LANES>
uint8_t TryMatchConcat(const uint8_t* shuffle, uint8_t mask) {
uint8_t start = shuffle[0];
int i = 1;
for (; i < LANES - start; i++) {
if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return 0;
}
uint8_t wrap = LANES;
for (; i < LANES; i++, wrap++) {
if ((shuffle[i] & mask) != (wrap & mask)) return 0;
if (j == kSimd128Size) {
*opcode = entry.opcode;
return true;
}
}
return start;
return false;
}
// Canonicalize shuffles to make pattern matching simpler. Returns a mask that
// will ignore the high bit of indices in some cases.
uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
int num_lanes) {
uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node) {
static const int kUnaryShuffleMask = kSimd128Size - 1;
const uint8_t* shuffle = OpParameter<uint8_t*>(node);
uint8_t mask = 0xff;
// If shuffle is unary, set 'mask' to ignore the high bit of the indices.
......@@ -2623,12 +2645,12 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
if (selector->GetVirtualRegister(node->InputAt(0)) ==
selector->GetVirtualRegister(node->InputAt(1))) {
// unary, src0 == src1.
mask = num_lanes - 1;
mask = kUnaryShuffleMask;
} else {
bool src0_is_used = false;
bool src1_is_used = false;
for (int i = 0; i < num_lanes; i++) {
if (shuffle[i] < num_lanes) {
for (int i = 0; i < kSimd128Size; i++) {
if (shuffle[i] < kSimd128Size) {
src0_is_used = true;
} else {
src1_is_used = true;
......@@ -2636,10 +2658,10 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
}
if (src0_is_used && !src1_is_used) {
node->ReplaceInput(1, node->InputAt(0));
mask = num_lanes - 1;
mask = kUnaryShuffleMask;
} else if (src1_is_used && !src0_is_used) {
node->ReplaceInput(0, node->InputAt(1));
mask = num_lanes - 1;
mask = kUnaryShuffleMask;
}
}
return mask;
......@@ -2647,7 +2669,7 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
int32_t Pack4Lanes(const uint8_t* shuffle, uint8_t mask) {
int32_t result = 0;
for (int i = 3; i >= 0; i--) {
for (int i = 3; i >= 0; --i) {
result <<= 8;
result |= shuffle[i] & mask;
}
......@@ -2668,70 +2690,29 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
} // namespace
void InstructionSelector::VisitS32x4Shuffle(Node* node) {
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
const uint8_t* shuffle = OpParameter<uint8_t*>(node);
uint8_t mask = CanonicalizeShuffle(this, node, 4);
ArchOpcode opcode = TryMatchArchShuffle<4>(
shuffle, arch_s32x4_shuffles, arraysize(arch_s32x4_shuffles), mask);
if (opcode != kNoShuffle) {
VisitRRRShuffle(this, opcode, node);
return;
}
uint8_t mask = CanonicalizeShuffle(this, node);
uint8_t shuffle32x4[4];
ArmOperandGenerator g(this);
uint8_t lanes = TryMatchConcat<4>(shuffle, mask);
if (lanes != 0) {
Emit(kArmS8x16Concat, g.DefineAsRegister(node),
if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
Emit(kArmS32x4Shuffle, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
g.UseImmediate(lanes * 4));
g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
return;
}
Emit(kArmS32x4Shuffle, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
g.UseImmediate(Pack4Lanes(shuffle, mask)));
}
void InstructionSelector::VisitS16x8Shuffle(Node* node) {
const uint8_t* shuffle = OpParameter<uint8_t*>(node);
uint8_t mask = CanonicalizeShuffle(this, node, 8);
ArchOpcode opcode = TryMatchArchShuffle<8>(
shuffle, arch_s16x8_shuffles, arraysize(arch_s16x8_shuffles), mask);
if (opcode != kNoShuffle) {
VisitRRRShuffle(this, opcode, node);
return;
}
ArmOperandGenerator g(this);
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
uint8_t lanes = TryMatchConcat<8>(shuffle, mask);
if (lanes != 0) {
Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(input1), g.UseImmediate(lanes * 2));
return;
}
// Code generator uses vtbl, arrange sources to form a valid lookup table.
InstructionOperand src0, src1;
ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
Emit(kArmS16x8Shuffle, g.DefineAsRegister(node), src0, src1,
g.UseImmediate(Pack4Lanes(shuffle, mask)),
g.UseImmediate(Pack4Lanes(shuffle + 4, mask)));
}
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
const uint8_t* shuffle = OpParameter<uint8_t*>(node);
uint8_t mask = CanonicalizeShuffle(this, node, 16);
ArchOpcode opcode = TryMatchArchShuffle<16>(
shuffle, arch_s8x16_shuffles, arraysize(arch_s8x16_shuffles), mask);
if (opcode != kNoShuffle) {
ArchOpcode opcode;
if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
mask, &opcode)) {
VisitRRRShuffle(this, opcode, node);
return;
}
ArmOperandGenerator g(this);
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
uint8_t lanes = TryMatchConcat<16>(shuffle, mask);
if (lanes != 0) {
uint8_t offset;
if (TryMatchConcat(shuffle, mask, &offset)) {
Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
g.UseRegister(input1), g.UseImmediate(lanes));
g.UseRegister(input1), g.UseImmediate(offset));
return;
}
// Code generator uses vtbl, arrange sources to form a valid lookup table.
......
......@@ -1711,10 +1711,6 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitS128Not(node);
case IrOpcode::kS128Select:
return MarkAsSimd128(node), VisitS128Select(node);
case IrOpcode::kS32x4Shuffle:
return MarkAsSimd128(node), VisitS32x4Shuffle(node);
case IrOpcode::kS16x8Shuffle:
return MarkAsSimd128(node), VisitS16x8Shuffle(node);
case IrOpcode::kS8x16Shuffle:
return MarkAsSimd128(node), VisitS8x16Shuffle(node);
case IrOpcode::kS1x4AnyTrue:
......@@ -2378,22 +2374,11 @@ void InstructionSelector::VisitS128Not(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS128Zero(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitS128Select(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS &&
// !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS32x4Shuffle(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitS16x8Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitS8x16Shuffle(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
......
......@@ -981,22 +981,6 @@ SIMD_LANE_OP_LIST(SIMD_LANE_OPS)
SIMD_FORMAT_LIST(SIMD_SHIFT_OPS)
#undef SIMD_SHIFT_OPS
const Operator* MachineOperatorBuilder::S32x4Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(4);
memcpy(array, shuffle, 4);
return new (zone_)
Operator1<uint8_t*>(IrOpcode::kS32x4Shuffle, Operator::kPure, "Shuffle",
2, 0, 0, 1, 0, 0, array);
}
const Operator* MachineOperatorBuilder::S16x8Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(8);
memcpy(array, shuffle, 8);
return new (zone_)
Operator1<uint8_t*>(IrOpcode::kS16x8Shuffle, Operator::kPure, "Shuffle",
2, 0, 0, 1, 0, 0, array);
}
const Operator* MachineOperatorBuilder::S8x16Shuffle(uint8_t shuffle[16]) {
uint8_t* array = zone_->NewArray<uint8_t>(16);
memcpy(array, shuffle, 16);
......
......@@ -577,8 +577,6 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* S128Not();
const Operator* S128Select();
const Operator* S32x4Shuffle(uint8_t shuffle[16]);
const Operator* S16x8Shuffle(uint8_t shuffle[16]);
const Operator* S8x16Shuffle(uint8_t shuffle[16]);
const Operator* S1x4AnyTrue();
......
......@@ -701,8 +701,6 @@
V(S128Or) \
V(S128Xor) \
V(S128Select) \
V(S32x4Shuffle) \
V(S16x8Shuffle) \
V(S8x16Shuffle) \
V(S1x4AnyTrue) \
V(S1x4AllTrue) \
......
......@@ -3584,22 +3584,11 @@ Node* WasmGraphBuilder::SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
}
}
Node* WasmGraphBuilder::SimdShuffleOp(uint8_t shuffle[16], unsigned lanes,
const NodeVector& inputs) {
Node* WasmGraphBuilder::Simd8x16ShuffleOp(uint8_t shuffle[16],
const NodeVector& inputs) {
has_simd_ = true;
switch (lanes) {
case 4:
return graph()->NewNode(jsgraph()->machine()->S32x4Shuffle(shuffle),
inputs[0], inputs[1]);
case 8:
return graph()->NewNode(jsgraph()->machine()->S16x8Shuffle(shuffle),
inputs[0], inputs[1]);
case 16:
return graph()->NewNode(jsgraph()->machine()->S8x16Shuffle(shuffle),
inputs[0], inputs[1]);
default:
UNREACHABLE();
}
return graph()->NewNode(jsgraph()->machine()->S8x16Shuffle(shuffle),
inputs[0], inputs[1]);
}
static void RecordFunctionCompilation(CodeEventListener::LogEventsAndTags tag,
......
......@@ -254,8 +254,7 @@ class WasmGraphBuilder {
Node* SimdShiftOp(wasm::WasmOpcode opcode, uint8_t shift,
const NodeVector& inputs);
Node* SimdShuffleOp(uint8_t shuffle[16], unsigned lanes,
const NodeVector& inputs);
Node* Simd8x16ShuffleOp(uint8_t shuffle[16], const NodeVector& inputs);
bool has_simd() const { return has_simd_; }
......
......@@ -313,15 +313,13 @@ struct SimdShiftOperand {
}
};
// Operand for SIMD shuffle operations.
// Operand for SIMD S8x16 shuffle operations.
template <bool checked>
struct SimdShuffleOperand {
uint8_t shuffle[16];
unsigned lanes;
struct Simd8x16ShuffleOperand {
uint8_t shuffle[kSimd128Size];
inline SimdShuffleOperand(Decoder* decoder, const byte* pc, unsigned lanes_) {
lanes = lanes_;
for (unsigned i = 0; i < lanes; i++) {
inline Simd8x16ShuffleOperand(Decoder* decoder, const byte* pc) {
for (uint32_t i = 0; i < kSimd128Size; ++i) {
shuffle[i] = decoder->read_u8<checked>(pc + 2 + i, "shuffle");
}
}
......
......@@ -146,21 +146,6 @@ struct Control {
}
};
namespace {
inline unsigned GetShuffleMaskSize(WasmOpcode opcode) {
switch (opcode) {
case kExprS32x4Shuffle:
return 4;
case kExprS16x8Shuffle:
return 8;
case kExprS8x16Shuffle:
return 16;
default:
UNREACHABLE();
}
}
} // namespace
// Macros that build nodes only if there is a graph and the current SSA
// environment is reachable from start. This avoids problems with malformed
// TF graphs when decoding inputs that have unreachable code.
......@@ -421,13 +406,12 @@ class WasmDecoder : public Decoder {
}
}
inline bool Validate(const byte* pc, WasmOpcode opcode,
SimdShuffleOperand<true>& operand) {
unsigned lanes = GetShuffleMaskSize(opcode);
inline bool Validate(const byte* pc, Simd8x16ShuffleOperand<true>& operand) {
uint8_t max_lane = 0;
for (unsigned i = 0; i < lanes; i++)
for (uint32_t i = 0; i < kSimd128Size; ++i)
max_lane = std::max(max_lane, operand.shuffle[i]);
if (operand.lanes != lanes || max_lane > 2 * lanes) {
// Shuffle indices must be in [0..31] for a 16 lane shuffle.
if (max_lane > 2 * kSimd128Size) {
error(pc_ + 2, "invalid shuffle mask");
return false;
} else {
......@@ -520,11 +504,9 @@ class WasmDecoder : public Decoder {
{
return 3;
}
// Shuffles contain a byte array to determine the shuffle.
case kExprS32x4Shuffle:
case kExprS16x8Shuffle:
// Shuffles require a byte per lane, or 16 immediate bytes.
case kExprS8x16Shuffle:
return 2 + GetShuffleMaskSize(opcode);
return 2 + kSimd128Size;
default:
decoder->error(pc, "invalid SIMD opcode");
return 2;
......@@ -1558,17 +1540,16 @@ class WasmFullDecoder : public WasmDecoder {
return operand.length;
}
unsigned SimdShuffleOp(WasmOpcode opcode) {
SimdShuffleOperand<true> operand(this, pc_, GetShuffleMaskSize(opcode));
if (Validate(pc_, opcode, operand)) {
unsigned Simd8x16ShuffleOp() {
Simd8x16ShuffleOperand<true> operand(this, pc_);
if (Validate(pc_, operand)) {
compiler::NodeVector inputs(2, zone_);
inputs[1] = Pop(1, ValueType::kSimd128).node;
inputs[0] = Pop(0, ValueType::kSimd128).node;
TFNode* node =
BUILD(SimdShuffleOp, operand.shuffle, operand.lanes, inputs);
TFNode* node = BUILD(Simd8x16ShuffleOp, operand.shuffle, inputs);
Push(ValueType::kSimd128, node);
}
return operand.lanes;
return 16;
}
unsigned DecodeSimdOpcode(WasmOpcode opcode) {
......@@ -1606,10 +1587,8 @@ class WasmFullDecoder : public WasmDecoder {
len = SimdShiftOp(opcode);
break;
}
case kExprS32x4Shuffle:
case kExprS16x8Shuffle:
case kExprS8x16Shuffle: {
len = SimdShuffleOp(opcode);
len = Simd8x16ShuffleOp();
break;
}
default: {
......
......@@ -219,8 +219,6 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_S128_OP(Xor, "xor")
CASE_S128_OP(Not, "not")
CASE_S128_OP(Select, "select")
CASE_S32x4_OP(Shuffle, "shuffle")
CASE_S16x8_OP(Shuffle, "shuffle")
CASE_S8x16_OP(Shuffle, "shuffle")
CASE_S1x4_OP(AnyTrue, "any_true")
CASE_S1x4_OP(AllTrue, "all_true")
......
......@@ -409,8 +409,6 @@ constexpr WasmCodePosition kNoCodePosition = -1;
V(I8x16ShrU, 0xe571, _)
#define FOREACH_SIMD_MASK_OPERAND_OPCODE(V) \
V(S32x4Shuffle, 0xe52d, s_ss) \
V(S16x8Shuffle, 0xe54c, s_ss) \
V(S8x16Shuffle, 0xe56b, s_ss)
#define FOREACH_ATOMIC_OPCODE(V) \
......
......@@ -399,13 +399,6 @@ T RecipSqrt(T a) {
#define WASM_SIMD_I8x16_REPLACE_LANE(lane, x, y) \
x, y, WASM_SIMD_OP(kExprI8x16ReplaceLane), TO_BYTE(lane)
#define WASM_SIMD_S32x4_SHUFFLE_OP(opcode, m, x, y) \
x, y, WASM_SIMD_OP(opcode), TO_BYTE(m[0]), TO_BYTE(m[1]), TO_BYTE(m[2]), \
TO_BYTE(m[3])
#define WASM_SIMD_S16x8_SHUFFLE_OP(opcode, m, x, y) \
x, y, WASM_SIMD_OP(opcode), TO_BYTE(m[0]), TO_BYTE(m[1]), TO_BYTE(m[2]), \
TO_BYTE(m[3]), TO_BYTE(m[4]), TO_BYTE(m[5]), TO_BYTE(m[6]), \
TO_BYTE(m[7])
#define WASM_SIMD_S8x16_SHUFFLE_OP(opcode, m, x, y) \
x, y, WASM_SIMD_OP(opcode), TO_BYTE(m[0]), TO_BYTE(m[1]), TO_BYTE(m[2]), \
TO_BYTE(m[3]), TO_BYTE(m[4]), TO_BYTE(m[5]), TO_BYTE(m[6]), \
......@@ -1588,38 +1581,17 @@ void RunBinaryLaneOpTest(
src0[i] = i;
src1[i] = kElems + i;
}
switch (simd_op) {
case kExprS32x4Shuffle: {
BUILD(r,
WASM_SET_GLOBAL(0, WASM_SIMD_S32x4_SHUFFLE_OP(simd_op, expected,
WASM_GET_GLOBAL(0),
WASM_GET_GLOBAL(1))),
WASM_ONE);
break;
}
case kExprS16x8Shuffle: {
BUILD(r,
WASM_SET_GLOBAL(0, WASM_SIMD_S16x8_SHUFFLE_OP(simd_op, expected,
WASM_GET_GLOBAL(0),
WASM_GET_GLOBAL(1))),
WASM_ONE);
break;
}
case kExprS8x16Shuffle: {
BUILD(r,
WASM_SET_GLOBAL(0, WASM_SIMD_S8x16_SHUFFLE_OP(simd_op, expected,
WASM_GET_GLOBAL(0),
WASM_GET_GLOBAL(1))),
WASM_ONE);
break;
}
default: {
BUILD(r,
WASM_SET_GLOBAL(0, WASM_SIMD_BINOP(simd_op, WASM_GET_GLOBAL(0),
WASM_GET_GLOBAL(1))),
WASM_ONE);
break;
}
if (simd_op == kExprS8x16Shuffle) {
BUILD(r,
WASM_SET_GLOBAL(0, WASM_SIMD_S8x16_SHUFFLE_OP(simd_op, expected,
WASM_GET_GLOBAL(0),
WASM_GET_GLOBAL(1))),
WASM_ONE);
} else {
BUILD(r,
WASM_SET_GLOBAL(0, WASM_SIMD_BINOP(simd_op, WASM_GET_GLOBAL(0),
WASM_GET_GLOBAL(1))),
WASM_ONE);
}
CHECK_EQ(1, r.Call());
......@@ -1646,92 +1618,138 @@ WASM_SIMD_TEST(F32x4AddHoriz) {
// Test some regular shuffles that may have special handling on some targets.
// Test a normal and unary versions (where second operand isn't used).
WASM_SIMD_TEST(S32x4ZipLeft) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{0, 4, 1, 5}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{0, 0, 1, 1}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle, {{0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7}});
}
WASM_SIMD_TEST(S32x4ZipRight) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{2, 6, 3, 7}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{2, 2, 3, 3}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15}});
}
WASM_SIMD_TEST(S32x4UnzipLeft) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{0, 2, 4, 6}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{0, 2, 0, 2}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}});
RunBinaryLaneOpTest<int8_t>(kExprS8x16Shuffle, {{0, 1, 2, 3, 8, 9, 10, 11, 0,
1, 2, 3, 8, 9, 10, 11}});
}
WASM_SIMD_TEST(S32x4UnzipRight) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{1, 3, 5, 7}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{1, 3, 1, 3}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15}});
}
WASM_SIMD_TEST(S32x4TransposeLeft) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{0, 4, 2, 6}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{0, 0, 2, 2}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}});
RunBinaryLaneOpTest<int8_t>(kExprS8x16Shuffle, {{0, 1, 2, 3, 0, 1, 2, 3, 8, 9,
10, 11, 8, 9, 10, 11}});
}
WASM_SIMD_TEST(S32x4TransposeRight) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{1, 5, 3, 7}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{1, 1, 3, 3}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{4, 5, 6, 7, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13, 14, 15}});
}
// Reverses are only unary.
WASM_SIMD_TEST(S32x2Reverse) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{1, 0, 3, 2}});
RunBinaryLaneOpTest<int8_t>(kExprS8x16Shuffle, {{4, 5, 6, 7, 0, 1, 2, 3, 12,
13, 14, 15, 8, 9, 10, 11}});
}
// Test irregular shuffle.
WASM_SIMD_TEST(S32x4Irregular) {
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{0, 4, 4, 5}});
RunBinaryLaneOpTest<int32_t>(kExprS32x4Shuffle, {{0, 0, 0, 1}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19, 20, 21, 22, 23}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle, {{0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7}});
}
WASM_SIMD_TEST(S16x8ZipLeft) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{0, 8, 1, 9, 2, 10, 3, 11}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{0, 0, 1, 1, 2, 2, 3, 3}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle, {{0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7}});
}
WASM_SIMD_TEST(S16x8ZipRight) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle,
{{4, 12, 5, 13, 6, 14, 7, 15}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{4, 4, 5, 5, 6, 6, 7, 7}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 14, 15}});
}
WASM_SIMD_TEST(S16x8UnzipLeft) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle,
{{0, 2, 4, 6, 8, 10, 12, 14}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{0, 2, 4, 6, 0, 2, 4, 6}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}});
RunBinaryLaneOpTest<int8_t>(kExprS8x16Shuffle, {{0, 1, 4, 5, 8, 9, 12, 13, 0,
1, 4, 5, 8, 9, 12, 13}});
}
WASM_SIMD_TEST(S16x8UnzipRight) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle,
{{1, 3, 5, 7, 9, 11, 13, 15}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{1, 3, 5, 7, 1, 3, 5, 7}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15}});
}
WASM_SIMD_TEST(S16x8TransposeLeft) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle,
{{0, 8, 2, 10, 4, 12, 6, 14}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{0, 0, 2, 2, 4, 4, 6, 6}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}});
RunBinaryLaneOpTest<int8_t>(kExprS8x16Shuffle, {{0, 1, 0, 1, 4, 5, 4, 5, 8, 9,
8, 9, 12, 13, 12, 13}});
}
WASM_SIMD_TEST(S16x8TransposeRight) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle,
{{1, 9, 3, 11, 5, 13, 7, 15}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{1, 1, 3, 3, 5, 5, 7, 7}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{2, 3, 2, 3, 6, 7, 6, 7, 10, 11, 10, 11, 14, 15, 14, 15}});
}
WASM_SIMD_TEST(S16x4Reverse) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{3, 2, 1, 0, 7, 6, 5, 4}});
RunBinaryLaneOpTest<int8_t>(kExprS8x16Shuffle, {{6, 7, 4, 5, 2, 3, 0, 1, 14,
15, 12, 13, 10, 11, 8, 9}});
}
WASM_SIMD_TEST(S16x2Reverse) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{1, 0, 3, 2, 5, 4, 7, 6}});
RunBinaryLaneOpTest<int8_t>(kExprS8x16Shuffle, {{2, 3, 0, 1, 6, 7, 4, 5, 10,
11, 8, 9, 14, 15, 12, 13}});
}
WASM_SIMD_TEST(S16x8Irregular) {
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{0, 8, 8, 0, 2, 10, 3, 11}});
RunBinaryLaneOpTest<int16_t>(kExprS16x8Shuffle, {{0, 0, 0, 0, 2, 2, 3, 3}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle,
{{0, 1, 16, 17, 16, 17, 0, 1, 4, 5, 20, 21, 6, 7, 22, 23}});
RunBinaryLaneOpTest<int8_t>(
kExprS8x16Shuffle, {{0, 1, 0, 1, 0, 1, 0, 1, 4, 5, 4, 5, 6, 7, 6, 7}});
}
WASM_SIMD_TEST(S8x16ZipLeft) {
......@@ -1807,10 +1825,11 @@ WASM_SIMD_TEST(S8x16Irregular) {
}
// Test shuffles that concatenate the two vectors.
template <typename T>
void RunConcatOpTest(WasmOpcode simd_op) {
static const int kLanes = kSimd128Size / sizeof(T);
std::array<T, kLanes> expected;
void RunConcatOpTest() {}
WASM_SIMD_TEST(S8x16Concat) {
static const int kLanes = 16;
std::array<uint8_t, kLanes> expected;
for (int bias = 1; bias < kLanes; bias++) {
int i = 0;
// last kLanes - bias bytes of first vector.
......@@ -1821,15 +1840,9 @@ void RunConcatOpTest(WasmOpcode simd_op) {
for (int j = 0; j < bias; j++) {
expected[i++] = j + kLanes;
}
RunBinaryLaneOpTest<T>(simd_op, expected);
RunBinaryLaneOpTest(kExprS8x16Shuffle, expected);
}
}
WASM_SIMD_TEST(S32x4Concat) { RunConcatOpTest<int32_t>(kExprS32x4Shuffle); }
WASM_SIMD_TEST(S16x8Concat) { RunConcatOpTest<int16_t>(kExprS16x8Shuffle); }
WASM_SIMD_TEST(S8x16Concat) { RunConcatOpTest<int8_t>(kExprS8x16Shuffle); }
#endif // V8_TARGET_ARCH_ARM
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
......
......@@ -2625,8 +2625,6 @@ TEST_F(WasmOpcodeLengthTest, SimdExpressions) {
EXPECT_LENGTH_N(3, kSimdPrefix, static_cast<byte>(kExpr##name & 0xff));
FOREACH_SIMD_1_OPERAND_OPCODE(TEST_SIMD)
#undef TEST_SIMD
EXPECT_LENGTH_N(6, kSimdPrefix, static_cast<byte>(kExprS32x4Shuffle & 0xff));
EXPECT_LENGTH_N(10, kSimdPrefix, static_cast<byte>(kExprS16x8Shuffle & 0xff));
EXPECT_LENGTH_N(18, kSimdPrefix, static_cast<byte>(kExprS8x16Shuffle & 0xff));
#undef TEST_SIMD
// test for bad simd opcode
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment